1use std::fmt::Write as _;
2use std::fs;
3use std::io::{self, Read, Write};
4use std::path::{Path, PathBuf};
5use std::process::{Command, Stdio};
6use std::sync::Arc;
7use std::sync::mpsc;
8use std::thread;
9use std::time::{Duration, Instant, SystemTime};
10
11use fathomdb_schema::{SchemaError, SchemaManager};
12use rusqlite::{DatabaseName, OptionalExtension, TransactionBehavior};
13use serde::{Deserialize, Serialize};
14use sha2::{Digest, Sha256};
15
16use crate::{
17 EngineError, ProjectionRepairReport, ProjectionService, executable_trust,
18 ids::new_id,
19 operational::{
20 OperationalCollectionKind, OperationalCollectionRecord, OperationalCompactionReport,
21 OperationalCurrentRow, OperationalFilterClause, OperationalFilterField,
22 OperationalFilterFieldType, OperationalFilterMode, OperationalFilterValue,
23 OperationalHistoryValidationIssue, OperationalHistoryValidationReport,
24 OperationalMutationRow, OperationalPurgeReport, OperationalReadReport,
25 OperationalReadRequest, OperationalRegisterRequest, OperationalRepairReport,
26 OperationalRetentionActionKind, OperationalRetentionPlanItem,
27 OperationalRetentionPlanReport, OperationalRetentionRunItem, OperationalRetentionRunReport,
28 OperationalSecondaryIndexDefinition, OperationalSecondaryIndexRebuildReport,
29 OperationalTraceReport, extract_secondary_index_entries_for_current,
30 extract_secondary_index_entries_for_mutation, parse_operational_secondary_indexes_json,
31 parse_operational_validation_contract, validate_operational_payload_against_contract,
32 },
33 projection::ProjectionTarget,
34 sqlite,
35};
36
37#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
39pub struct IntegrityReport {
40 pub physical_ok: bool,
41 pub foreign_keys_ok: bool,
42 pub missing_fts_rows: usize,
43 pub missing_property_fts_rows: usize,
44 pub duplicate_active_logical_ids: usize,
45 pub operational_missing_collections: usize,
46 pub operational_missing_last_mutations: usize,
47 pub warnings: Vec<String>,
48}
49
50#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
52pub struct FtsPropertySchemaRecord {
53 pub kind: String,
55 pub property_paths: Vec<String>,
60 pub entries: Vec<FtsPropertyPathSpec>,
65 pub exclude_paths: Vec<String>,
68 pub separator: String,
70 pub format_version: i64,
72}
73
74#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize)]
76#[serde(rename_all = "snake_case")]
77pub enum FtsPropertyPathMode {
78 #[default]
81 Scalar,
82 Recursive,
85}
86
87#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
89pub struct FtsPropertyPathSpec {
90 pub path: String,
92 pub mode: FtsPropertyPathMode,
94}
95
96impl FtsPropertyPathSpec {
97 #[must_use]
98 pub fn scalar(path: impl Into<String>) -> Self {
99 Self {
100 path: path.into(),
101 mode: FtsPropertyPathMode::Scalar,
102 }
103 }
104
105 #[must_use]
106 pub fn recursive(path: impl Into<String>) -> Self {
107 Self {
108 path: path.into(),
109 mode: FtsPropertyPathMode::Recursive,
110 }
111 }
112}
113
114#[derive(Clone, Copy, Debug)]
116pub struct SafeExportOptions {
117 pub force_checkpoint: bool,
121}
122
123impl Default for SafeExportOptions {
124 fn default() -> Self {
125 Self {
126 force_checkpoint: true,
127 }
128 }
129}
130
131const EXPORT_PROTOCOL_VERSION: u32 = 1;
133
134#[derive(Clone, Debug, Serialize)]
136pub struct SafeExportManifest {
137 pub exported_at: u64,
139 pub sha256: String,
141 pub schema_version: u32,
143 pub protocol_version: u32,
145 pub page_count: u64,
147}
148
149#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
151pub struct TraceReport {
152 pub source_ref: String,
153 pub node_rows: usize,
154 pub edge_rows: usize,
155 pub action_rows: usize,
156 pub operational_mutation_rows: usize,
157 pub node_logical_ids: Vec<String>,
158 pub action_ids: Vec<String>,
159 pub operational_mutation_ids: Vec<String>,
160}
161
162#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
164pub struct SkippedEdge {
165 pub edge_logical_id: String,
166 pub missing_endpoint: String,
167}
168
169#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
171pub struct LogicalRestoreReport {
172 pub logical_id: String,
173 pub was_noop: bool,
174 pub restored_node_rows: usize,
175 pub restored_edge_rows: usize,
176 pub restored_chunk_rows: usize,
177 pub restored_fts_rows: usize,
178 pub restored_property_fts_rows: usize,
179 pub restored_vec_rows: usize,
180 pub skipped_edges: Vec<SkippedEdge>,
181 pub notes: Vec<String>,
182}
183
184#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
186pub struct LogicalPurgeReport {
187 pub logical_id: String,
188 pub was_noop: bool,
189 pub deleted_node_rows: usize,
190 pub deleted_edge_rows: usize,
191 pub deleted_chunk_rows: usize,
192 pub deleted_fts_rows: usize,
193 pub deleted_vec_rows: usize,
194 pub notes: Vec<String>,
195}
196
197#[derive(Clone, Debug, Serialize, Deserialize)]
199pub struct ProvenancePurgeOptions {
200 pub dry_run: bool,
201 #[serde(default)]
202 pub preserve_event_types: Vec<String>,
203}
204
205#[derive(Clone, Debug, Serialize)]
207pub struct ProvenancePurgeReport {
208 pub events_deleted: u64,
209 pub events_preserved: u64,
210 pub oldest_remaining: Option<i64>,
211}
212
213#[derive(Debug)]
215pub struct AdminService {
216 database_path: PathBuf,
217 schema_manager: Arc<SchemaManager>,
218 projections: ProjectionService,
219}
220
221#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
223pub struct SemanticReport {
224 pub orphaned_chunks: usize,
226 pub null_source_ref_nodes: usize,
228 pub broken_step_fk: usize,
230 pub broken_action_fk: usize,
232 pub stale_fts_rows: usize,
234 pub fts_rows_for_superseded_nodes: usize,
236 pub stale_property_fts_rows: usize,
238 pub orphaned_property_fts_rows: usize,
240 pub mismatched_kind_property_fts_rows: usize,
242 pub duplicate_property_fts_rows: usize,
244 pub drifted_property_fts_rows: usize,
246 pub dangling_edges: usize,
248 pub orphaned_supersession_chains: usize,
250 pub stale_vec_rows: usize,
252 pub vec_rows_for_superseded_nodes: usize,
254 pub missing_operational_current_rows: usize,
256 pub stale_operational_current_rows: usize,
258 pub disabled_collection_mutations: usize,
260 pub orphaned_last_access_metadata_rows: usize,
262 pub warnings: Vec<String>,
263}
264
265#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
267#[serde(rename_all = "snake_case")]
268pub struct VectorRegenerationConfig {
269 pub profile: String,
270 pub table_name: String,
271 pub model_identity: String,
272 pub model_version: String,
273 pub dimension: usize,
274 pub normalization_policy: String,
275 pub chunking_policy: String,
276 pub preprocessing_policy: String,
277 pub generator_command: Vec<String>,
278}
279
280#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
282pub struct VectorRegenerationReport {
283 pub profile: String,
284 pub table_name: String,
285 pub dimension: usize,
286 pub total_chunks: usize,
287 pub regenerated_rows: usize,
288 pub contract_persisted: bool,
289 pub notes: Vec<String>,
290}
291
292#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
294#[serde(rename_all = "snake_case")]
295pub struct VectorGeneratorPolicy {
296 pub timeout_ms: u64,
297 pub max_stdout_bytes: usize,
298 pub max_stderr_bytes: usize,
299 pub max_input_bytes: usize,
300 pub max_chunks: usize,
301 #[serde(default = "default_require_absolute_executable")]
302 pub require_absolute_executable: bool,
303 #[serde(default = "default_reject_world_writable_executable")]
304 pub reject_world_writable_executable: bool,
305 #[serde(default)]
306 pub allowed_executable_roots: Vec<String>,
307 #[serde(default)]
308 pub preserve_env_vars: Vec<String>,
309}
310
311impl Default for VectorGeneratorPolicy {
312 fn default() -> Self {
313 Self {
314 timeout_ms: 300_000,
315 max_stdout_bytes: 64 * 1024 * 1024,
316 max_stderr_bytes: 1024 * 1024,
317 max_input_bytes: 64 * 1024 * 1024,
318 max_chunks: 1_000_000,
319 require_absolute_executable: true,
320 reject_world_writable_executable: true,
321 allowed_executable_roots: vec![],
322 preserve_env_vars: vec![],
323 }
324 }
325}
326
327const fn default_require_absolute_executable() -> bool {
328 true
329}
330
331const fn default_reject_world_writable_executable() -> bool {
332 true
333}
334
335const CURRENT_VECTOR_CONTRACT_FORMAT_VERSION: i64 = 1;
336const MAX_PROFILE_LEN: usize = 128;
337const MAX_MODEL_IDENTITY_LEN: usize = 256;
338const MAX_MODEL_VERSION_LEN: usize = 128;
339const MAX_POLICY_LEN: usize = 128;
340const MAX_GENERATOR_COMMAND_ARG_LEN: usize = 4096;
341const MAX_GENERATOR_COMMAND_TOTAL_LEN: usize = 16 * 1024;
342const MAX_CONTRACT_JSON_BYTES: usize = 32 * 1024;
343const MAX_AUDIT_METADATA_BYTES: usize = 2048;
344const DEFAULT_OPERATIONAL_READ_LIMIT: usize = 100;
345const MAX_OPERATIONAL_READ_LIMIT: usize = 1000;
346
347#[derive(Clone, Debug)]
349pub struct AdminHandle {
350 inner: Arc<AdminService>,
351}
352
353impl AdminHandle {
354 #[must_use]
356 pub fn new(service: AdminService) -> Self {
357 Self {
358 inner: Arc::new(service),
359 }
360 }
361
362 #[must_use]
364 pub fn service(&self) -> Arc<AdminService> {
365 Arc::clone(&self.inner)
366 }
367}
368
369impl AdminService {
370 #[must_use]
372 pub fn new(path: impl AsRef<Path>, schema_manager: Arc<SchemaManager>) -> Self {
373 let database_path = path.as_ref().to_path_buf();
374 let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
375 Self {
376 database_path,
377 schema_manager,
378 projections,
379 }
380 }
381
382 fn connect(&self) -> Result<rusqlite::Connection, EngineError> {
383 #[cfg(feature = "sqlite-vec")]
384 let conn = sqlite::open_connection_with_vec(&self.database_path)?;
385 #[cfg(not(feature = "sqlite-vec"))]
386 let conn = sqlite::open_connection(&self.database_path)?;
387 self.schema_manager.bootstrap(&conn)?;
388 Ok(conn)
389 }
390
391 pub fn check_integrity(&self) -> Result<IntegrityReport, EngineError> {
394 let conn = self.connect()?;
395
396 let physical_result: String =
397 conn.query_row("PRAGMA integrity_check", [], |row| row.get(0))?;
398 let foreign_key_count: i64 =
399 conn.query_row("SELECT count(*) FROM pragma_foreign_key_check", [], |row| {
400 row.get(0)
401 })?;
402 let missing_fts_rows: i64 = conn.query_row(
403 r"
404 SELECT count(*)
405 FROM chunks c
406 JOIN nodes n
407 ON n.logical_id = c.node_logical_id
408 AND n.superseded_at IS NULL
409 WHERE NOT EXISTS (
410 SELECT 1
411 FROM fts_nodes f
412 WHERE f.chunk_id = c.id
413 )
414 ",
415 [],
416 |row| row.get(0),
417 )?;
418 let duplicate_active: i64 = conn.query_row(
419 r"
420 SELECT count(*)
421 FROM (
422 SELECT logical_id
423 FROM nodes
424 WHERE superseded_at IS NULL
425 GROUP BY logical_id
426 HAVING count(*) > 1
427 )
428 ",
429 [],
430 |row| row.get(0),
431 )?;
432 let operational_missing_collections: i64 = conn.query_row(
433 r"
434 SELECT (
435 SELECT count(*)
436 FROM operational_mutations m
437 LEFT JOIN operational_collections c ON c.name = m.collection_name
438 WHERE c.name IS NULL
439 ) + (
440 SELECT count(*)
441 FROM operational_current oc
442 LEFT JOIN operational_collections c ON c.name = oc.collection_name
443 WHERE c.name IS NULL
444 )
445 ",
446 [],
447 |row| row.get(0),
448 )?;
449 let operational_missing_last_mutations: i64 = conn.query_row(
450 r"
451 SELECT count(*)
452 FROM operational_current oc
453 LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
454 WHERE m.id IS NULL
455 ",
456 [],
457 |row| row.get(0),
458 )?;
459
460 let missing_property_fts_rows = count_missing_property_fts_rows(&conn)?;
464
465 let mut warnings = Vec::new();
466 if missing_fts_rows > 0 {
467 warnings.push("missing FTS projections detected".to_owned());
468 }
469 if missing_property_fts_rows > 0 {
470 warnings.push("missing property FTS projections detected".to_owned());
471 }
472 if duplicate_active > 0 {
473 warnings.push("duplicate active logical_ids detected".to_owned());
474 }
475 if operational_missing_collections > 0 {
476 warnings.push("operational rows reference missing collections".to_owned());
477 }
478 if operational_missing_last_mutations > 0 {
479 warnings.push("operational current rows reference missing last mutations".to_owned());
480 }
481
482 Ok(IntegrityReport {
487 physical_ok: physical_result == "ok",
488 foreign_keys_ok: foreign_key_count == 0,
489 missing_fts_rows: i64_to_usize(missing_fts_rows),
490 missing_property_fts_rows: i64_to_usize(missing_property_fts_rows),
491 duplicate_active_logical_ids: i64_to_usize(duplicate_active),
492 operational_missing_collections: i64_to_usize(operational_missing_collections),
493 operational_missing_last_mutations: i64_to_usize(operational_missing_last_mutations),
494 warnings,
495 })
496 }
497
498 #[allow(clippy::too_many_lines)]
501 pub fn check_semantics(&self) -> Result<SemanticReport, EngineError> {
502 let conn = self.connect()?;
503
504 let orphaned_chunks: i64 = conn.query_row(
505 r"
506 SELECT count(*)
507 FROM chunks c
508 WHERE NOT EXISTS (
509 SELECT 1 FROM nodes n
510 WHERE n.logical_id = c.node_logical_id
511 )
512 ",
513 [],
514 |row| row.get(0),
515 )?;
516
517 let null_source_ref_nodes: i64 = conn.query_row(
518 "SELECT count(*) FROM nodes WHERE source_ref IS NULL AND superseded_at IS NULL",
519 [],
520 |row| row.get(0),
521 )?;
522
523 let broken_step_fk: i64 = conn.query_row(
524 r"
525 SELECT count(*) FROM steps s
526 WHERE NOT EXISTS (SELECT 1 FROM runs r WHERE r.id = s.run_id)
527 ",
528 [],
529 |row| row.get(0),
530 )?;
531
532 let broken_action_fk: i64 = conn.query_row(
533 r"
534 SELECT count(*) FROM actions a
535 WHERE NOT EXISTS (SELECT 1 FROM steps s WHERE s.id = a.step_id)
536 ",
537 [],
538 |row| row.get(0),
539 )?;
540
541 let stale_fts_rows: i64 = conn.query_row(
542 r"
543 SELECT count(*) FROM fts_nodes f
544 WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = f.chunk_id)
545 ",
546 [],
547 |row| row.get(0),
548 )?;
549
550 let fts_rows_for_superseded_nodes: i64 = conn.query_row(
551 r"
552 SELECT count(*) FROM fts_nodes f
553 WHERE NOT EXISTS (
554 SELECT 1 FROM nodes n
555 WHERE n.logical_id = f.node_logical_id AND n.superseded_at IS NULL
556 )
557 ",
558 [],
559 |row| row.get(0),
560 )?;
561
562 let stale_property_fts_rows: i64 = conn.query_row(
563 r"
564 SELECT count(*) FROM fts_node_properties fp
565 WHERE NOT EXISTS (
566 SELECT 1 FROM nodes n
567 WHERE n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
568 )
569 ",
570 [],
571 |row| row.get(0),
572 )?;
573
574 let orphaned_property_fts_rows: i64 = conn.query_row(
575 r"
576 SELECT count(*) FROM fts_node_properties fp
577 WHERE NOT EXISTS (
578 SELECT 1 FROM fts_property_schemas s WHERE s.kind = fp.kind
579 )
580 ",
581 [],
582 |row| row.get(0),
583 )?;
584
585 let mismatched_kind_property_fts_rows: i64 = conn.query_row(
586 r"
587 SELECT count(*) FROM fts_node_properties fp
588 JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL
589 WHERE n.kind != fp.kind
590 ",
591 [],
592 |row| row.get(0),
593 )?;
594
595 let duplicate_property_fts_rows: i64 = conn.query_row(
596 r"
597 SELECT count(*) FROM (
598 SELECT node_logical_id FROM fts_node_properties
599 GROUP BY node_logical_id
600 HAVING count(*) > 1
601 )
602 ",
603 [],
604 |row| row.get(0),
605 )?;
606
607 let drifted_property_fts_rows = count_drifted_property_fts_rows(&conn)?;
608
609 let dangling_edges: i64 = conn.query_row(
610 r"
611 SELECT count(*) FROM edges e
612 WHERE e.superseded_at IS NULL AND (
613 NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.source_logical_id AND n.superseded_at IS NULL)
614 OR
615 NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.target_logical_id AND n.superseded_at IS NULL)
616 )
617 ",
618 [],
619 |row| row.get(0),
620 )?;
621
622 let orphaned_supersession_chains: i64 = conn.query_row(
623 r"
624 SELECT count(*) FROM (
625 SELECT logical_id FROM nodes
626 GROUP BY logical_id
627 HAVING count(*) > 0 AND sum(CASE WHEN superseded_at IS NULL THEN 1 ELSE 0 END) = 0
628 )
629 ",
630 [],
631 |row| row.get(0),
632 )?;
633
634 #[cfg(feature = "sqlite-vec")]
636 let stale_vec_rows: i64 = match conn.query_row(
637 r"
638 SELECT count(*) FROM vec_nodes_active v
639 WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = v.chunk_id)
640 ",
641 [],
642 |row| row.get(0),
643 ) {
644 Ok(n) => n,
645 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
646 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
647 {
648 0
649 }
650 Err(e) => return Err(EngineError::Sqlite(e)),
651 };
652 #[cfg(not(feature = "sqlite-vec"))]
653 let stale_vec_rows: i64 = 0;
654
655 #[cfg(feature = "sqlite-vec")]
656 let vec_rows_for_superseded_nodes: i64 = match conn.query_row(
657 r"
658 SELECT count(*) FROM vec_nodes_active v
659 JOIN chunks c ON c.id = v.chunk_id
660 WHERE NOT EXISTS (
661 SELECT 1 FROM nodes n
662 WHERE n.logical_id = c.node_logical_id
663 )
664 ",
665 [],
666 |row| row.get(0),
667 ) {
668 Ok(n) => n,
669 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
670 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
671 {
672 0
673 }
674 Err(e) => return Err(EngineError::Sqlite(e)),
675 };
676 #[cfg(not(feature = "sqlite-vec"))]
677 let vec_rows_for_superseded_nodes: i64 = 0;
678 let missing_operational_current_rows: i64 = conn.query_row(
679 r"
680 SELECT count(*)
681 FROM operational_mutations m
682 JOIN operational_collections c
683 ON c.name = m.collection_name
684 AND c.kind = 'latest_state'
685 WHERE m.op_kind = 'put'
686 AND NOT EXISTS (
687 SELECT 1
688 FROM operational_mutations newer
689 WHERE newer.collection_name = m.collection_name
690 AND newer.record_key = m.record_key
691 AND newer.mutation_order > m.mutation_order
692 )
693 AND NOT EXISTS (
694 SELECT 1
695 FROM operational_current oc
696 WHERE oc.collection_name = m.collection_name
697 AND oc.record_key = m.record_key
698 )
699 ",
700 [],
701 |row| row.get(0),
702 )?;
703 let stale_operational_current_rows: i64 = conn.query_row(
704 r"
705 SELECT count(*)
706 FROM operational_current oc
707 JOIN operational_collections c
708 ON c.name = oc.collection_name
709 AND c.kind = 'latest_state'
710 LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
711 WHERE m.id IS NULL
712 OR m.collection_name != oc.collection_name
713 OR m.record_key != oc.record_key
714 OR m.op_kind != 'put'
715 OR m.payload_json != oc.payload_json
716 OR EXISTS (
717 SELECT 1
718 FROM operational_mutations newer
719 WHERE newer.collection_name = oc.collection_name
720 AND newer.record_key = oc.record_key
721 AND newer.mutation_order > m.mutation_order
722 )
723 ",
724 [],
725 |row| row.get(0),
726 )?;
727 let disabled_collection_mutations: i64 = conn.query_row(
728 r"
729 SELECT count(*)
730 FROM operational_mutations m
731 JOIN operational_collections c ON c.name = m.collection_name
732 WHERE c.disabled_at IS NOT NULL AND m.created_at > c.disabled_at
733 ",
734 [],
735 |row| row.get(0),
736 )?;
737 let orphaned_last_access_metadata_rows: i64 = conn.query_row(
738 r"
739 SELECT count(*)
740 FROM node_access_metadata am
741 WHERE NOT EXISTS (
742 SELECT 1 FROM nodes n WHERE n.logical_id = am.logical_id
743 )
744 ",
745 [],
746 |row| row.get(0),
747 )?;
748
749 let mut warnings = Vec::new();
750 if orphaned_chunks > 0 {
751 warnings.push(format!(
752 "{orphaned_chunks} orphaned chunk(s) with no surviving node history"
753 ));
754 }
755 if null_source_ref_nodes > 0 {
756 warnings.push(format!(
757 "{null_source_ref_nodes} active node(s) with null source_ref"
758 ));
759 }
760 if broken_step_fk > 0 {
761 warnings.push(format!(
762 "{broken_step_fk} step(s) referencing non-existent run"
763 ));
764 }
765 if broken_action_fk > 0 {
766 warnings.push(format!(
767 "{broken_action_fk} action(s) referencing non-existent step"
768 ));
769 }
770 if stale_fts_rows > 0 {
771 warnings.push(format!(
772 "{stale_fts_rows} stale FTS row(s) referencing missing chunk"
773 ));
774 }
775 if fts_rows_for_superseded_nodes > 0 {
776 warnings.push(format!(
777 "{fts_rows_for_superseded_nodes} FTS row(s) for superseded node(s)"
778 ));
779 }
780 if stale_property_fts_rows > 0 {
781 warnings.push(format!(
782 "{stale_property_fts_rows} stale property FTS row(s) for superseded/missing node(s)"
783 ));
784 }
785 if orphaned_property_fts_rows > 0 {
786 warnings.push(format!(
787 "{orphaned_property_fts_rows} orphaned property FTS row(s) for unregistered kind(s)"
788 ));
789 }
790 if mismatched_kind_property_fts_rows > 0 {
791 warnings.push(format!(
792 "{mismatched_kind_property_fts_rows} property FTS row(s) whose kind does not match the active node"
793 ));
794 }
795 if duplicate_property_fts_rows > 0 {
796 warnings.push(format!(
797 "{duplicate_property_fts_rows} active logical ID(s) with duplicate property FTS rows"
798 ));
799 }
800 if drifted_property_fts_rows > 0 {
801 warnings.push(format!(
802 "{drifted_property_fts_rows} property FTS row(s) with stale text_content"
803 ));
804 }
805 if dangling_edges > 0 {
806 warnings.push(format!(
807 "{dangling_edges} active edge(s) with missing endpoint node"
808 ));
809 }
810 if orphaned_supersession_chains > 0 {
811 warnings.push(format!(
812 "{orphaned_supersession_chains} logical_id(s) with all versions superseded"
813 ));
814 }
815 if stale_vec_rows > 0 {
816 warnings.push(format!(
817 "{stale_vec_rows} stale vec row(s) referencing missing chunk"
818 ));
819 }
820 if vec_rows_for_superseded_nodes > 0 {
821 warnings.push(format!(
822 "{vec_rows_for_superseded_nodes} vec row(s) whose node history is missing"
823 ));
824 }
825 if missing_operational_current_rows > 0 {
826 warnings.push(format!(
827 "{missing_operational_current_rows} latest-state key(s) missing operational_current rows"
828 ));
829 }
830 if stale_operational_current_rows > 0 {
831 warnings.push(format!(
832 "{stale_operational_current_rows} stale operational_current row(s)"
833 ));
834 }
835 if disabled_collection_mutations > 0 {
836 warnings.push(format!(
837 "{disabled_collection_mutations} mutation(s) were written after collection disable"
838 ));
839 }
840 if orphaned_last_access_metadata_rows > 0 {
841 warnings.push(format!(
842 "{orphaned_last_access_metadata_rows} last_access metadata row(s) reference missing node history"
843 ));
844 }
845
846 Ok(SemanticReport {
847 orphaned_chunks: i64_to_usize(orphaned_chunks),
848 null_source_ref_nodes: i64_to_usize(null_source_ref_nodes),
849 broken_step_fk: i64_to_usize(broken_step_fk),
850 broken_action_fk: i64_to_usize(broken_action_fk),
851 stale_fts_rows: i64_to_usize(stale_fts_rows),
852 fts_rows_for_superseded_nodes: i64_to_usize(fts_rows_for_superseded_nodes),
853 stale_property_fts_rows: i64_to_usize(stale_property_fts_rows),
854 orphaned_property_fts_rows: i64_to_usize(orphaned_property_fts_rows),
855 mismatched_kind_property_fts_rows: i64_to_usize(mismatched_kind_property_fts_rows),
856 duplicate_property_fts_rows: i64_to_usize(duplicate_property_fts_rows),
857 drifted_property_fts_rows: i64_to_usize(drifted_property_fts_rows),
858 dangling_edges: i64_to_usize(dangling_edges),
859 orphaned_supersession_chains: i64_to_usize(orphaned_supersession_chains),
860 stale_vec_rows: i64_to_usize(stale_vec_rows),
861 vec_rows_for_superseded_nodes: i64_to_usize(vec_rows_for_superseded_nodes),
862 missing_operational_current_rows: i64_to_usize(missing_operational_current_rows),
863 stale_operational_current_rows: i64_to_usize(stale_operational_current_rows),
864 disabled_collection_mutations: i64_to_usize(disabled_collection_mutations),
865 orphaned_last_access_metadata_rows: i64_to_usize(orphaned_last_access_metadata_rows),
866 warnings,
867 })
868 }
869
870 pub fn register_operational_collection(
873 &self,
874 request: &OperationalRegisterRequest,
875 ) -> Result<OperationalCollectionRecord, EngineError> {
876 if request.name.trim().is_empty() {
877 return Err(EngineError::InvalidWrite(
878 "operational collection name must not be empty".to_owned(),
879 ));
880 }
881 if request.schema_json.is_empty() {
882 return Err(EngineError::InvalidWrite(
883 "operational collection schema_json must not be empty".to_owned(),
884 ));
885 }
886 if request.retention_json.is_empty() {
887 return Err(EngineError::InvalidWrite(
888 "operational collection retention_json must not be empty".to_owned(),
889 ));
890 }
891 if request.filter_fields_json.is_empty() {
892 return Err(EngineError::InvalidWrite(
893 "operational collection filter_fields_json must not be empty".to_owned(),
894 ));
895 }
896 parse_operational_validation_contract(&request.validation_json)
897 .map_err(EngineError::InvalidWrite)?;
898 parse_operational_secondary_indexes_json(&request.secondary_indexes_json, request.kind)
899 .map_err(EngineError::InvalidWrite)?;
900 if request.format_version <= 0 {
901 return Err(EngineError::InvalidWrite(
902 "operational collection format_version must be positive".to_owned(),
903 ));
904 }
905 parse_operational_filter_fields(&request.filter_fields_json)
906 .map_err(EngineError::InvalidWrite)?;
907
908 let mut conn = self.connect()?;
909 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
910 tx.execute(
911 "INSERT INTO operational_collections \
912 (name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at) \
913 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, unixepoch())",
914 rusqlite::params![
915 request.name.as_str(),
916 request.kind.as_str(),
917 request.schema_json.as_str(),
918 request.retention_json.as_str(),
919 request.filter_fields_json.as_str(),
920 request.validation_json.as_str(),
921 request.secondary_indexes_json.as_str(),
922 request.format_version,
923 ],
924 )?;
925 persist_simple_provenance_event(
926 &tx,
927 "operational_collection_registered",
928 request.name.as_str(),
929 Some(serde_json::json!({
930 "kind": request.kind.as_str(),
931 "format_version": request.format_version,
932 })),
933 )?;
934 tx.commit()?;
935
936 self.describe_operational_collection(&request.name)?
937 .ok_or_else(|| {
938 EngineError::Bridge("registered collection missing after commit".to_owned())
939 })
940 }
941
942 pub fn describe_operational_collection(
945 &self,
946 name: &str,
947 ) -> Result<Option<OperationalCollectionRecord>, EngineError> {
948 let conn = self.connect()?;
949 load_operational_collection_record(&conn, name)
950 }
951
952 pub fn update_operational_collection_filters(
956 &self,
957 name: &str,
958 filter_fields_json: &str,
959 ) -> Result<OperationalCollectionRecord, EngineError> {
960 if filter_fields_json.is_empty() {
961 return Err(EngineError::InvalidWrite(
962 "operational collection filter_fields_json must not be empty".to_owned(),
963 ));
964 }
965 let declared_fields = parse_operational_filter_fields(filter_fields_json)
966 .map_err(EngineError::InvalidWrite)?;
967
968 let mut conn = self.connect()?;
969 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
970 load_operational_collection_record(&tx, name)?.ok_or_else(|| {
971 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
972 })?;
973 tx.execute(
974 "UPDATE operational_collections SET filter_fields_json = ?2 WHERE name = ?1",
975 rusqlite::params![name, filter_fields_json],
976 )?;
977 tx.execute(
978 "DELETE FROM operational_filter_values WHERE collection_name = ?1",
979 [name],
980 )?;
981
982 let mut mutation_stmt = tx.prepare(
983 "SELECT id, payload_json FROM operational_mutations \
984 WHERE collection_name = ?1 ORDER BY mutation_order",
985 )?;
986 let mutations = mutation_stmt
987 .query_map([name], |row| {
988 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
989 })?
990 .collect::<Result<Vec<_>, _>>()?;
991 drop(mutation_stmt);
992
993 let mut insert_filter_value = tx.prepare_cached(
994 "INSERT INTO operational_filter_values \
995 (mutation_id, collection_name, field_name, string_value, integer_value) \
996 VALUES (?1, ?2, ?3, ?4, ?5)",
997 )?;
998 let mut inserted_values = 0usize;
999 for (mutation_id, payload_json) in &mutations {
1000 for filter_value in
1001 extract_operational_filter_values(&declared_fields, payload_json.as_str())
1002 {
1003 insert_filter_value.execute(rusqlite::params![
1004 mutation_id,
1005 name,
1006 filter_value.field_name,
1007 filter_value.string_value,
1008 filter_value.integer_value,
1009 ])?;
1010 inserted_values += 1;
1011 }
1012 }
1013 drop(insert_filter_value);
1014
1015 persist_simple_provenance_event(
1016 &tx,
1017 "operational_collection_filter_fields_updated",
1018 name,
1019 Some(serde_json::json!({
1020 "field_count": declared_fields.len(),
1021 "mutations_backfilled": mutations.len(),
1022 "inserted_filter_values": inserted_values,
1023 })),
1024 )?;
1025 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1026 EngineError::Bridge("operational collection missing after filter update".to_owned())
1027 })?;
1028 tx.commit()?;
1029 Ok(updated)
1030 }
1031
1032 pub fn update_operational_collection_validation(
1035 &self,
1036 name: &str,
1037 validation_json: &str,
1038 ) -> Result<OperationalCollectionRecord, EngineError> {
1039 parse_operational_validation_contract(validation_json)
1040 .map_err(EngineError::InvalidWrite)?;
1041
1042 let mut conn = self.connect()?;
1043 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1044 load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1045 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1046 })?;
1047 tx.execute(
1048 "UPDATE operational_collections SET validation_json = ?2 WHERE name = ?1",
1049 rusqlite::params![name, validation_json],
1050 )?;
1051 persist_simple_provenance_event(
1052 &tx,
1053 "operational_collection_validation_updated",
1054 name,
1055 Some(serde_json::json!({
1056 "has_validation": !validation_json.is_empty(),
1057 })),
1058 )?;
1059 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1060 EngineError::Bridge("operational collection missing after validation update".to_owned())
1061 })?;
1062 tx.commit()?;
1063 Ok(updated)
1064 }
1065
1066 pub fn update_operational_collection_secondary_indexes(
1070 &self,
1071 name: &str,
1072 secondary_indexes_json: &str,
1073 ) -> Result<OperationalCollectionRecord, EngineError> {
1074 let mut conn = self.connect()?;
1075 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1076 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1077 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1078 })?;
1079 let indexes = parse_operational_secondary_indexes_json(secondary_indexes_json, record.kind)
1080 .map_err(EngineError::InvalidWrite)?;
1081 tx.execute(
1082 "UPDATE operational_collections SET secondary_indexes_json = ?2 WHERE name = ?1",
1083 rusqlite::params![name, secondary_indexes_json],
1084 )?;
1085 let (mutation_entries_rebuilt, current_entries_rebuilt) =
1086 rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1087 persist_simple_provenance_event(
1088 &tx,
1089 "operational_collection_secondary_indexes_updated",
1090 name,
1091 Some(serde_json::json!({
1092 "index_count": indexes.len(),
1093 "mutation_entries_rebuilt": mutation_entries_rebuilt,
1094 "current_entries_rebuilt": current_entries_rebuilt,
1095 })),
1096 )?;
1097 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1098 EngineError::Bridge(
1099 "operational collection missing after secondary index update".to_owned(),
1100 )
1101 })?;
1102 tx.commit()?;
1103 Ok(updated)
1104 }
1105
1106 pub fn rebuild_operational_secondary_indexes(
1109 &self,
1110 name: &str,
1111 ) -> Result<OperationalSecondaryIndexRebuildReport, EngineError> {
1112 let mut conn = self.connect()?;
1113 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1114 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1115 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1116 })?;
1117 let indexes =
1118 parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1119 .map_err(EngineError::InvalidWrite)?;
1120 let (mutation_entries_rebuilt, current_entries_rebuilt) =
1121 rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
1122 persist_simple_provenance_event(
1123 &tx,
1124 "operational_secondary_indexes_rebuilt",
1125 name,
1126 Some(serde_json::json!({
1127 "index_count": indexes.len(),
1128 "mutation_entries_rebuilt": mutation_entries_rebuilt,
1129 "current_entries_rebuilt": current_entries_rebuilt,
1130 })),
1131 )?;
1132 tx.commit()?;
1133 Ok(OperationalSecondaryIndexRebuildReport {
1134 collection_name: name.to_owned(),
1135 mutation_entries_rebuilt,
1136 current_entries_rebuilt,
1137 })
1138 }
1139
1140 pub fn validate_operational_collection_history(
1143 &self,
1144 name: &str,
1145 ) -> Result<OperationalHistoryValidationReport, EngineError> {
1146 let conn = self.connect()?;
1147 let record = load_operational_collection_record(&conn, name)?.ok_or_else(|| {
1148 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1149 })?;
1150 let Some(contract) = parse_operational_validation_contract(&record.validation_json)
1151 .map_err(EngineError::InvalidWrite)?
1152 else {
1153 return Err(EngineError::InvalidWrite(format!(
1154 "operational collection '{name}' has no validation_json configured"
1155 )));
1156 };
1157
1158 let mut stmt = conn.prepare(
1159 "SELECT id, record_key, op_kind, payload_json FROM operational_mutations \
1160 WHERE collection_name = ?1 ORDER BY mutation_order",
1161 )?;
1162 let rows = stmt
1163 .query_map([name], |row| {
1164 Ok((
1165 row.get::<_, String>(0)?,
1166 row.get::<_, String>(1)?,
1167 row.get::<_, String>(2)?,
1168 row.get::<_, String>(3)?,
1169 ))
1170 })?
1171 .collect::<Result<Vec<_>, _>>()?;
1172 drop(stmt);
1173
1174 let mut checked_rows = 0usize;
1175 let mut issues = Vec::new();
1176 for (mutation_id, record_key, op_kind, payload_json) in rows {
1177 if op_kind == "delete" {
1178 continue;
1179 }
1180 checked_rows += 1;
1181 if let Err(message) =
1182 validate_operational_payload_against_contract(&contract, payload_json.as_str())
1183 {
1184 issues.push(OperationalHistoryValidationIssue {
1185 mutation_id,
1186 record_key,
1187 op_kind,
1188 message,
1189 });
1190 }
1191 }
1192
1193 Ok(OperationalHistoryValidationReport {
1194 collection_name: name.to_owned(),
1195 checked_rows,
1196 invalid_row_count: issues.len(),
1197 issues,
1198 })
1199 }
1200
1201 pub fn disable_operational_collection(
1204 &self,
1205 name: &str,
1206 ) -> Result<OperationalCollectionRecord, EngineError> {
1207 let mut conn = self.connect()?;
1208 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1209 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1210 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1211 })?;
1212 let changed = if record.disabled_at.is_none() {
1213 tx.execute(
1214 "UPDATE operational_collections SET disabled_at = unixepoch() WHERE name = ?1",
1215 [name],
1216 )?;
1217 true
1218 } else {
1219 false
1220 };
1221 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1222 EngineError::Bridge("operational collection missing after disable".to_owned())
1223 })?;
1224 persist_simple_provenance_event(
1225 &tx,
1226 "operational_collection_disabled",
1227 name,
1228 Some(serde_json::json!({
1229 "disabled_at": record.disabled_at,
1230 "changed": changed,
1231 })),
1232 )?;
1233 tx.commit()?;
1234 Ok(record)
1235 }
1236
1237 pub fn compact_operational_collection(
1240 &self,
1241 name: &str,
1242 dry_run: bool,
1243 ) -> Result<OperationalCompactionReport, EngineError> {
1244 let mut conn = self.connect()?;
1245 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1246 let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1247 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1248 })?;
1249 validate_append_only_operational_collection(&collection, "compact")?;
1250 let (mutation_ids, before_timestamp) =
1251 operational_compaction_candidates(&tx, &collection.retention_json, name)?;
1252 if dry_run {
1253 drop(tx);
1254 return Ok(OperationalCompactionReport {
1255 collection_name: name.to_owned(),
1256 deleted_mutations: mutation_ids.len(),
1257 dry_run: true,
1258 before_timestamp,
1259 });
1260 }
1261 let mut delete_stmt =
1262 tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
1263 for mutation_id in &mutation_ids {
1264 delete_stmt.execute([mutation_id.as_str()])?;
1265 }
1266 drop(delete_stmt);
1267 persist_simple_provenance_event(
1268 &tx,
1269 "operational_collection_compacted",
1270 name,
1271 Some(serde_json::json!({
1272 "deleted_mutations": mutation_ids.len(),
1273 "before_timestamp": before_timestamp,
1274 })),
1275 )?;
1276 tx.commit()?;
1277 Ok(OperationalCompactionReport {
1278 collection_name: name.to_owned(),
1279 deleted_mutations: mutation_ids.len(),
1280 dry_run: false,
1281 before_timestamp,
1282 })
1283 }
1284
1285 pub fn purge_operational_collection(
1288 &self,
1289 name: &str,
1290 before_timestamp: i64,
1291 ) -> Result<OperationalPurgeReport, EngineError> {
1292 let mut conn = self.connect()?;
1293 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1294 let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1295 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1296 })?;
1297 validate_append_only_operational_collection(&collection, "purge")?;
1298 let deleted_mutations = tx.execute(
1299 "DELETE FROM operational_mutations WHERE collection_name = ?1 AND created_at < ?2",
1300 rusqlite::params![name, before_timestamp],
1301 )?;
1302 persist_simple_provenance_event(
1303 &tx,
1304 "operational_collection_purged",
1305 name,
1306 Some(serde_json::json!({
1307 "deleted_mutations": deleted_mutations,
1308 "before_timestamp": before_timestamp,
1309 })),
1310 )?;
1311 tx.commit()?;
1312 Ok(OperationalPurgeReport {
1313 collection_name: name.to_owned(),
1314 deleted_mutations,
1315 before_timestamp,
1316 })
1317 }
1318
1319 pub fn plan_operational_retention(
1322 &self,
1323 now_timestamp: i64,
1324 collection_names: Option<&[String]>,
1325 max_collections: Option<usize>,
1326 ) -> Result<OperationalRetentionPlanReport, EngineError> {
1327 let conn = self.connect()?;
1328 let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1329 let mut items = Vec::with_capacity(records.len());
1330 for record in records {
1331 items.push(plan_operational_retention_item(
1332 &conn,
1333 &record,
1334 now_timestamp,
1335 )?);
1336 }
1337 Ok(OperationalRetentionPlanReport {
1338 planned_at: now_timestamp,
1339 collections_examined: items.len(),
1340 items,
1341 })
1342 }
1343
1344 pub fn run_operational_retention(
1347 &self,
1348 now_timestamp: i64,
1349 collection_names: Option<&[String]>,
1350 max_collections: Option<usize>,
1351 dry_run: bool,
1352 ) -> Result<OperationalRetentionRunReport, EngineError> {
1353 let mut conn = self.connect()?;
1354 let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1355 let mut items = Vec::with_capacity(records.len());
1356 let mut collections_acted_on = 0usize;
1357
1358 for record in records {
1359 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1360 let item = run_operational_retention_item(&tx, &record, now_timestamp, dry_run)?;
1361 if item.deleted_mutations > 0 {
1362 collections_acted_on += 1;
1363 }
1364 if dry_run || item.action_kind == OperationalRetentionActionKind::Noop {
1365 drop(tx);
1366 } else {
1367 tx.commit()?;
1368 }
1369 items.push(item);
1370 }
1371
1372 Ok(OperationalRetentionRunReport {
1373 executed_at: now_timestamp,
1374 collections_examined: items.len(),
1375 collections_acted_on,
1376 dry_run,
1377 items,
1378 })
1379 }
1380
1381 pub fn trace_operational_collection(
1384 &self,
1385 collection_name: &str,
1386 record_key: Option<&str>,
1387 ) -> Result<OperationalTraceReport, EngineError> {
1388 let conn = self.connect()?;
1389 ensure_operational_collection_registered(&conn, collection_name)?;
1390 let mutations = if let Some(record_key) = record_key {
1391 let mut stmt = conn.prepare(
1392 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1393 FROM operational_mutations \
1394 WHERE collection_name = ?1 AND record_key = ?2 \
1395 ORDER BY mutation_order",
1396 )?;
1397 stmt.query_map([collection_name, record_key], map_operational_mutation_row)?
1398 .collect::<Result<Vec<_>, _>>()?
1399 } else {
1400 let mut stmt = conn.prepare(
1401 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1402 FROM operational_mutations \
1403 WHERE collection_name = ?1 \
1404 ORDER BY mutation_order",
1405 )?;
1406 stmt.query_map([collection_name], map_operational_mutation_row)?
1407 .collect::<Result<Vec<_>, _>>()?
1408 };
1409 let current_rows = if let Some(record_key) = record_key {
1410 let mut stmt = conn.prepare(
1411 "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1412 FROM operational_current \
1413 WHERE collection_name = ?1 AND record_key = ?2 \
1414 ORDER BY updated_at, record_key",
1415 )?;
1416 stmt.query_map([collection_name, record_key], map_operational_current_row)?
1417 .collect::<Result<Vec<_>, _>>()?
1418 } else {
1419 let mut stmt = conn.prepare(
1420 "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1421 FROM operational_current \
1422 WHERE collection_name = ?1 \
1423 ORDER BY updated_at, record_key",
1424 )?;
1425 stmt.query_map([collection_name], map_operational_current_row)?
1426 .collect::<Result<Vec<_>, _>>()?
1427 };
1428
1429 Ok(OperationalTraceReport {
1430 collection_name: collection_name.to_owned(),
1431 record_key: record_key.map(str::to_owned),
1432 mutation_count: mutations.len(),
1433 current_count: current_rows.len(),
1434 mutations,
1435 current_rows,
1436 })
1437 }
1438
1439 pub fn read_operational_collection(
1442 &self,
1443 request: &OperationalReadRequest,
1444 ) -> Result<OperationalReadReport, EngineError> {
1445 if request.collection_name.trim().is_empty() {
1446 return Err(EngineError::InvalidWrite(
1447 "operational read collection_name must not be empty".to_owned(),
1448 ));
1449 }
1450 if request.filters.is_empty() {
1451 return Err(EngineError::InvalidWrite(
1452 "operational read requires at least one filter clause".to_owned(),
1453 ));
1454 }
1455
1456 let conn = self.connect()?;
1457 let record = load_operational_collection_record(&conn, &request.collection_name)?
1458 .ok_or_else(|| {
1459 EngineError::InvalidWrite(format!(
1460 "operational collection '{}' is not registered",
1461 request.collection_name
1462 ))
1463 })?;
1464 validate_append_only_operational_collection(&record, "read")?;
1465 let declared_fields = parse_operational_filter_fields(&record.filter_fields_json)
1466 .map_err(EngineError::InvalidWrite)?;
1467 let secondary_indexes =
1468 parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1469 .map_err(EngineError::InvalidWrite)?;
1470 let applied_limit = operational_read_limit(request.limit)?;
1471 let filters = compile_operational_read_filters(&request.filters, &declared_fields)?;
1472 if let Some(report) = execute_operational_secondary_index_read(
1473 &conn,
1474 &request.collection_name,
1475 &filters,
1476 &secondary_indexes,
1477 applied_limit,
1478 )? {
1479 return Ok(report);
1480 }
1481 execute_operational_filtered_read(&conn, &request.collection_name, &filters, applied_limit)
1482 }
1483
1484 pub fn rebuild_operational_current(
1487 &self,
1488 collection_name: Option<&str>,
1489 ) -> Result<OperationalRepairReport, EngineError> {
1490 let mut conn = self.connect()?;
1491 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1492 let collections = if let Some(name) = collection_name {
1493 let maybe_kind: Option<String> = tx
1494 .query_row(
1495 "SELECT kind FROM operational_collections WHERE name = ?1",
1496 [name],
1497 |row| row.get(0),
1498 )
1499 .optional()?;
1500 let Some(kind) = maybe_kind else {
1501 return Err(EngineError::InvalidWrite(format!(
1502 "operational collection '{name}' is not registered"
1503 )));
1504 };
1505 if kind != OperationalCollectionKind::LatestState.as_str() {
1506 return Err(EngineError::InvalidWrite(format!(
1507 "operational collection '{name}' is not latest_state"
1508 )));
1509 }
1510 vec![name.to_owned()]
1511 } else {
1512 let mut stmt = tx.prepare(
1513 "SELECT name FROM operational_collections WHERE kind = 'latest_state' ORDER BY name",
1514 )?;
1515 stmt.query_map([], |row| row.get::<_, String>(0))?
1516 .collect::<Result<Vec<_>, _>>()?
1517 };
1518
1519 let rebuilt_rows = rebuild_operational_current_rows(&tx, &collections)?;
1520 for collection in &collections {
1521 let record = load_operational_collection_record(&tx, collection)?.ok_or_else(|| {
1522 EngineError::Bridge(format!(
1523 "operational collection '{collection}' missing during current rebuild"
1524 ))
1525 })?;
1526 let indexes = parse_operational_secondary_indexes_json(
1527 &record.secondary_indexes_json,
1528 record.kind,
1529 )
1530 .map_err(EngineError::InvalidWrite)?;
1531 if !indexes.is_empty() {
1532 rebuild_operational_secondary_index_entries(
1533 &tx,
1534 &record.name,
1535 record.kind,
1536 &indexes,
1537 )?;
1538 }
1539 }
1540
1541 persist_simple_provenance_event(
1542 &tx,
1543 "operational_current_rebuilt",
1544 collection_name.unwrap_or("*"),
1545 Some(serde_json::json!({
1546 "collections_rebuilt": collections.len(),
1547 "current_rows_rebuilt": rebuilt_rows,
1548 })),
1549 )?;
1550 tx.commit()?;
1551
1552 Ok(OperationalRepairReport {
1553 collections_rebuilt: collections.len(),
1554 current_rows_rebuilt: rebuilt_rows,
1555 })
1556 }
1557
1558 pub fn rebuild_projections(
1561 &self,
1562 target: ProjectionTarget,
1563 ) -> Result<ProjectionRepairReport, EngineError> {
1564 self.projections.rebuild_projections(target)
1565 }
1566
1567 pub fn rebuild_missing_projections(&self) -> Result<ProjectionRepairReport, EngineError> {
1570 self.projections.rebuild_missing_projections()
1571 }
1572
1573 pub fn register_fts_property_schema(
1582 &self,
1583 kind: &str,
1584 property_paths: &[String],
1585 separator: Option<&str>,
1586 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1587 let specs: Vec<FtsPropertyPathSpec> = property_paths
1588 .iter()
1589 .map(|p| FtsPropertyPathSpec::scalar(p.clone()))
1590 .collect();
1591 self.register_fts_property_schema_with_entries(kind, &specs, separator, &[])
1592 }
1593
1594 pub fn register_fts_property_schema_with_entries(
1605 &self,
1606 kind: &str,
1607 entries: &[FtsPropertyPathSpec],
1608 separator: Option<&str>,
1609 exclude_paths: &[String],
1610 ) -> Result<FtsPropertySchemaRecord, EngineError> {
1611 let paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
1612 validate_fts_property_paths(&paths)?;
1613 for p in exclude_paths {
1614 if !p.starts_with("$.") {
1615 return Err(EngineError::InvalidWrite(format!(
1616 "exclude_paths entries must start with '$.' but got: {p}"
1617 )));
1618 }
1619 }
1620 let separator = separator.unwrap_or(" ");
1621 let paths_json = serialize_property_paths_json(entries, exclude_paths)?;
1622
1623 let mut conn = self.connect()?;
1624 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1625
1626 let previous_row: Option<(String, String)> = tx
1632 .query_row(
1633 "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
1634 [kind],
1635 |row| {
1636 let json: String = row.get(0)?;
1637 let sep: String = row.get(1)?;
1638 Ok((json, sep))
1639 },
1640 )
1641 .optional()?;
1642 let had_previous_schema = previous_row.is_some();
1643 let previous_recursive_paths: Vec<String> = previous_row
1644 .map(|(json, sep)| crate::writer::parse_property_schema_json(&json, &sep))
1645 .map_or(Vec::new(), |schema| {
1646 schema
1647 .paths
1648 .into_iter()
1649 .filter(|p| p.mode == crate::writer::PropertyPathMode::Recursive)
1650 .map(|p| p.path)
1651 .collect()
1652 });
1653 let new_recursive_paths: Vec<&str> = entries
1654 .iter()
1655 .filter(|e| e.mode == FtsPropertyPathMode::Recursive)
1656 .map(|e| e.path.as_str())
1657 .collect();
1658 let introduces_new_recursive = new_recursive_paths
1659 .iter()
1660 .any(|p| !previous_recursive_paths.iter().any(|prev| prev == p));
1661
1662 tx.execute(
1663 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
1664 VALUES (?1, ?2, ?3) \
1665 ON CONFLICT(kind) DO UPDATE SET property_paths_json = ?2, separator = ?3",
1666 rusqlite::params![kind, paths_json, separator],
1667 )?;
1668
1669 let needs_rebuild = introduces_new_recursive || had_previous_schema;
1677 if needs_rebuild {
1678 tx.execute("DELETE FROM fts_node_properties WHERE kind = ?1", [kind])?;
1679 tx.execute(
1680 "DELETE FROM fts_node_property_positions WHERE kind = ?1",
1681 [kind],
1682 )?;
1683 crate::projection::insert_property_fts_rows_for_kind(&tx, kind)?;
1688 }
1689
1690 persist_simple_provenance_event(
1691 &tx,
1692 "fts_property_schema_registered",
1693 kind,
1694 Some(serde_json::json!({
1695 "property_paths": paths,
1696 "separator": separator,
1697 "exclude_paths": exclude_paths,
1698 "eager_rebuild": needs_rebuild,
1699 })),
1700 )?;
1701 tx.commit()?;
1702
1703 self.describe_fts_property_schema(kind)?.ok_or_else(|| {
1704 EngineError::Bridge("registered FTS property schema missing after commit".to_owned())
1705 })
1706 }
1707
1708 pub fn describe_fts_property_schema(
1713 &self,
1714 kind: &str,
1715 ) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
1716 let conn = self.connect()?;
1717 load_fts_property_schema_record(&conn, kind)
1718 }
1719
1720 pub fn list_fts_property_schemas(&self) -> Result<Vec<FtsPropertySchemaRecord>, EngineError> {
1725 let conn = self.connect()?;
1726 let mut stmt = conn.prepare(
1727 "SELECT kind, property_paths_json, separator, format_version \
1728 FROM fts_property_schemas ORDER BY kind",
1729 )?;
1730 let records = stmt
1731 .query_map([], |row| {
1732 let kind: String = row.get(0)?;
1733 let paths_json: String = row.get(1)?;
1734 let separator: String = row.get(2)?;
1735 let format_version: i64 = row.get(3)?;
1736 Ok(build_fts_property_schema_record(
1737 kind,
1738 &paths_json,
1739 separator,
1740 format_version,
1741 ))
1742 })?
1743 .collect::<Result<Vec<_>, _>>()?;
1744 Ok(records)
1745 }
1746
1747 pub fn remove_fts_property_schema(&self, kind: &str) -> Result<(), EngineError> {
1755 let mut conn = self.connect()?;
1756 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1757 let deleted = tx.execute("DELETE FROM fts_property_schemas WHERE kind = ?1", [kind])?;
1758 if deleted == 0 {
1759 return Err(EngineError::InvalidWrite(format!(
1760 "FTS property schema for kind '{kind}' is not registered"
1761 )));
1762 }
1763 persist_simple_provenance_event(&tx, "fts_property_schema_removed", kind, None)?;
1764 tx.commit()?;
1765 Ok(())
1766 }
1767
1768 pub fn restore_vector_profiles(&self) -> Result<ProjectionRepairReport, EngineError> {
1774 let conn = self.connect()?;
1775 let profiles: Vec<(String, String, i64)> = {
1776 let mut stmt = conn.prepare(
1777 "SELECT profile, table_name, dimension \
1778 FROM vector_profiles WHERE enabled = 1 ORDER BY profile",
1779 )?;
1780 stmt.query_map([], |row| {
1781 Ok((
1782 row.get::<_, String>(0)?,
1783 row.get::<_, String>(1)?,
1784 row.get::<_, i64>(2)?,
1785 ))
1786 })?
1787 .collect::<Result<Vec<_>, _>>()?
1788 };
1789
1790 for (profile, table_name, dimension) in &profiles {
1791 let dimension = usize::try_from(*dimension).map_err(|_| {
1792 EngineError::Bridge(format!("invalid vector profile dimension: {dimension}"))
1793 })?;
1794 self.schema_manager
1795 .ensure_vector_profile(&conn, profile, table_name, dimension)?;
1796 }
1797
1798 Ok(ProjectionRepairReport {
1799 targets: vec![ProjectionTarget::Vec],
1800 rebuilt_rows: profiles.len(),
1801 notes: vec![],
1802 })
1803 }
1804
1805 #[allow(clippy::too_many_lines)]
1816 pub fn regenerate_vector_embeddings(
1817 &self,
1818 config: &VectorRegenerationConfig,
1819 ) -> Result<VectorRegenerationReport, EngineError> {
1820 self.regenerate_vector_embeddings_with_policy(config, &VectorGeneratorPolicy::default())
1821 }
1822
1823 #[allow(clippy::too_many_lines)]
1828 pub fn regenerate_vector_embeddings_with_policy(
1829 &self,
1830 config: &VectorRegenerationConfig,
1831 policy: &VectorGeneratorPolicy,
1832 ) -> Result<VectorRegenerationReport, EngineError> {
1833 let conn = self.connect()?;
1834 let config = validate_vector_regeneration_config(&conn, config, policy)
1835 .map_err(|failure| failure.to_engine_error())?;
1836 let chunks = collect_regeneration_chunks(&conn)?;
1837 let payload = build_regeneration_input(&config, chunks.clone());
1838 let snapshot_hash = compute_snapshot_hash(&payload)?;
1839 let audit_metadata = VectorRegenerationAuditMetadata {
1840 profile: config.profile.clone(),
1841 model_identity: config.model_identity.clone(),
1842 model_version: config.model_version.clone(),
1843 chunk_count: chunks.len(),
1844 snapshot_hash: snapshot_hash.clone(),
1845 failure_class: None,
1846 };
1847 persist_vector_regeneration_event(
1848 &conn,
1849 "vector_regeneration_requested",
1850 &config.profile,
1851 &audit_metadata,
1852 )?;
1853 let notes = generator_policy_notes(policy);
1854 let generated = match run_vector_generator_bounded(&config, &payload, policy) {
1855 Ok(generated) => generated,
1856 Err(failure) => {
1857 self.persist_vector_regeneration_failure_best_effort(
1858 &config.profile,
1859 &audit_metadata,
1860 &failure,
1861 );
1862 return Err(failure.to_engine_error());
1863 }
1864 };
1865 let mut embedding_map = match validate_generated_embeddings(&config, &chunks, generated) {
1866 Ok(embedding_map) => embedding_map,
1867 Err(failure) => {
1868 self.persist_vector_regeneration_failure_best_effort(
1869 &config.profile,
1870 &audit_metadata,
1871 &failure,
1872 );
1873 return Err(failure.to_engine_error());
1874 }
1875 };
1876
1877 let mut conn = conn;
1878 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1879 match self.schema_manager.ensure_vector_profile(
1880 &tx,
1881 &config.profile,
1882 &config.table_name,
1883 config.dimension,
1884 ) {
1885 Ok(()) => {}
1886 Err(SchemaError::MissingCapability(message)) => {
1887 let failure = VectorRegenerationFailure::new(
1888 VectorRegenerationFailureClass::UnsupportedVecCapability,
1889 message,
1890 );
1891 drop(tx);
1892 self.persist_vector_regeneration_failure_best_effort(
1893 &config.profile,
1894 &audit_metadata,
1895 &failure,
1896 );
1897 return Err(failure.to_engine_error());
1898 }
1899 Err(error) => return Err(EngineError::Schema(error)),
1900 }
1901 let apply_chunks = collect_regeneration_chunks(&tx)?;
1902 let apply_payload = build_regeneration_input(&config, apply_chunks.clone());
1903 let apply_hash = compute_snapshot_hash(&apply_payload)?;
1904 if apply_hash != snapshot_hash {
1905 let failure = VectorRegenerationFailure::new(
1906 VectorRegenerationFailureClass::SnapshotDrift,
1907 "chunk snapshot changed during generation; retry".to_owned(),
1908 );
1909 drop(tx);
1910 self.persist_vector_regeneration_failure_best_effort(
1911 &config.profile,
1912 &audit_metadata,
1913 &failure,
1914 );
1915 return Err(failure.to_engine_error());
1916 }
1917 persist_vector_contract(&tx, &config, &snapshot_hash)?;
1918 tx.execute("DELETE FROM vec_nodes_active", [])?;
1919 let mut stmt = tx
1920 .prepare_cached("INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES (?1, ?2)")?;
1921 let mut regenerated_rows = 0usize;
1922 for chunk in &apply_chunks {
1923 let Some(embedding) = embedding_map.remove(&chunk.chunk_id) else {
1924 drop(stmt);
1925 drop(tx);
1926 let failure = VectorRegenerationFailure::new(
1927 VectorRegenerationFailureClass::MalformedGeneratorJson,
1928 format!(
1929 "generator did not return embedding for chunk '{}'",
1930 chunk.chunk_id
1931 ),
1932 );
1933 self.persist_vector_regeneration_failure_best_effort(
1934 &config.profile,
1935 &audit_metadata,
1936 &failure,
1937 );
1938 return Err(failure.to_engine_error());
1939 };
1940 stmt.execute(rusqlite::params![chunk.chunk_id.as_str(), embedding])?;
1941 regenerated_rows += 1;
1942 }
1943 drop(stmt);
1944 persist_vector_regeneration_event(
1945 &tx,
1946 "vector_regeneration_apply",
1947 &config.profile,
1948 &audit_metadata,
1949 )?;
1950 tx.commit()?;
1951
1952 Ok(VectorRegenerationReport {
1953 profile: config.profile.clone(),
1954 table_name: config.table_name.clone(),
1955 dimension: config.dimension,
1956 total_chunks: chunks.len(),
1957 regenerated_rows,
1958 contract_persisted: true,
1959 notes,
1960 })
1961 }
1962
1963 fn persist_vector_regeneration_failure_best_effort(
1964 &self,
1965 profile: &str,
1966 metadata: &VectorRegenerationAuditMetadata,
1967 failure: &VectorRegenerationFailure,
1968 ) {
1969 let Ok(conn) = self.connect() else {
1970 return;
1971 };
1972 let failure_metadata = VectorRegenerationAuditMetadata {
1973 profile: metadata.profile.clone(),
1974 model_identity: metadata.model_identity.clone(),
1975 model_version: metadata.model_version.clone(),
1976 chunk_count: metadata.chunk_count,
1977 snapshot_hash: metadata.snapshot_hash.clone(),
1978 failure_class: Some(failure.failure_class_label().to_owned()),
1979 };
1980 let _ = persist_vector_regeneration_event(
1981 &conn,
1982 "vector_regeneration_failed",
1983 profile,
1984 &failure_metadata,
1985 );
1986 }
1987
1988 pub fn trace_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
1991 let conn = self.connect()?;
1992
1993 let node_logical_ids = collect_strings(
1994 &conn,
1995 "SELECT logical_id FROM nodes WHERE source_ref = ?1 ORDER BY created_at",
1996 source_ref,
1997 )?;
1998 let action_ids = collect_strings(
1999 &conn,
2000 "SELECT id FROM actions WHERE source_ref = ?1 ORDER BY created_at",
2001 source_ref,
2002 )?;
2003 let operational_mutation_ids = collect_strings(
2004 &conn,
2005 "SELECT id FROM operational_mutations WHERE source_ref = ?1 ORDER BY mutation_order",
2006 source_ref,
2007 )?;
2008
2009 Ok(TraceReport {
2010 source_ref: source_ref.to_owned(),
2011 node_rows: count_source_ref(&conn, "nodes", source_ref)?,
2012 edge_rows: count_source_ref(&conn, "edges", source_ref)?,
2013 action_rows: count_source_ref(&conn, "actions", source_ref)?,
2014 operational_mutation_rows: count_source_ref(
2015 &conn,
2016 "operational_mutations",
2017 source_ref,
2018 )?,
2019 node_logical_ids,
2020 action_ids,
2021 operational_mutation_ids,
2022 })
2023 }
2024
2025 #[allow(clippy::too_many_lines)]
2029 pub fn restore_logical_id(
2030 &self,
2031 logical_id: &str,
2032 ) -> Result<LogicalRestoreReport, EngineError> {
2033 let mut conn = self.connect()?;
2034 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2035
2036 let active_count: i64 = tx.query_row(
2037 "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2038 [logical_id],
2039 |row| row.get(0),
2040 )?;
2041 if active_count > 0 {
2042 return Ok(LogicalRestoreReport {
2043 logical_id: logical_id.to_owned(),
2044 was_noop: true,
2045 restored_node_rows: 0,
2046 restored_edge_rows: 0,
2047 restored_chunk_rows: 0,
2048 restored_fts_rows: 0,
2049 restored_property_fts_rows: 0,
2050 restored_vec_rows: 0,
2051 skipped_edges: Vec::new(),
2052 notes: vec!["logical_id already active".to_owned()],
2053 });
2054 }
2055
2056 let restored_node: Option<(String, String)> = tx
2057 .query_row(
2058 "SELECT row_id, kind FROM nodes \
2059 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
2060 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
2061 [logical_id],
2062 |row| Ok((row.get(0)?, row.get(1)?)),
2063 )
2064 .optional()?;
2065 let (restored_node_row_id, restored_kind) = restored_node.ok_or_else(|| {
2066 EngineError::InvalidWrite(format!("logical_id '{logical_id}' is not retired"))
2067 })?;
2068
2069 tx.execute(
2070 "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2071 [restored_node_row_id.as_str()],
2072 )?;
2073
2074 let retire_scope: Option<(i64, Option<String>, i64)> = tx
2075 .query_row(
2076 "SELECT rowid, source_ref, created_at FROM provenance_events \
2077 WHERE event_type = 'node_retire' AND subject = ?1 \
2078 ORDER BY created_at DESC, rowid DESC LIMIT 1",
2079 [logical_id],
2080 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
2081 )
2082 .optional()?;
2083 let (restored_edge_rows, skipped_edges) = if let Some((
2084 retire_event_rowid,
2085 retire_source_ref,
2086 retire_created_at,
2087 )) = retire_scope
2088 {
2089 restore_validated_edges(
2090 &tx,
2091 logical_id,
2092 retire_source_ref.as_deref(),
2093 retire_created_at,
2094 retire_event_rowid,
2095 )?
2096 } else {
2097 (0, Vec::new())
2098 };
2099
2100 let restored_chunk_rows: usize = tx
2101 .query_row(
2102 "SELECT count(*) FROM chunks WHERE node_logical_id = ?1",
2103 [logical_id],
2104 |row| row.get::<_, i64>(0),
2105 )
2106 .map(i64_to_usize)?;
2107 tx.execute(
2108 "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2109 [logical_id],
2110 )?;
2111 let restored_fts_rows = tx.execute(
2112 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
2113 SELECT id, node_logical_id, ?2, text_content \
2114 FROM chunks WHERE node_logical_id = ?1",
2115 rusqlite::params![logical_id, restored_kind],
2116 )?;
2117 let restored_vec_rows = count_vec_rows_for_logical_id(&tx, logical_id)?;
2118
2119 tx.execute(
2121 "DELETE FROM fts_node_properties WHERE node_logical_id = ?1",
2122 [logical_id],
2123 )?;
2124 let restored_property_fts_rows =
2125 rebuild_single_node_property_fts(&tx, logical_id, &restored_kind)?;
2126
2127 persist_simple_provenance_event(
2128 &tx,
2129 "restore_logical_id",
2130 logical_id,
2131 Some(serde_json::json!({
2132 "restored_node_rows": 1,
2133 "restored_edge_rows": restored_edge_rows,
2134 "restored_chunk_rows": restored_chunk_rows,
2135 "restored_fts_rows": restored_fts_rows,
2136 "restored_property_fts_rows": restored_property_fts_rows,
2137 "restored_vec_rows": restored_vec_rows,
2138 })),
2139 )?;
2140 tx.commit()?;
2141
2142 Ok(LogicalRestoreReport {
2143 logical_id: logical_id.to_owned(),
2144 was_noop: false,
2145 restored_node_rows: 1,
2146 restored_edge_rows,
2147 restored_chunk_rows,
2148 restored_fts_rows,
2149 restored_property_fts_rows,
2150 restored_vec_rows,
2151 skipped_edges,
2152 notes: Vec::new(),
2153 })
2154 }
2155
2156 pub fn purge_logical_id(&self, logical_id: &str) -> Result<LogicalPurgeReport, EngineError> {
2160 let mut conn = self.connect()?;
2161 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2162
2163 let active_count: i64 = tx.query_row(
2164 "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
2165 [logical_id],
2166 |row| row.get(0),
2167 )?;
2168 if active_count > 0 {
2169 return Ok(LogicalPurgeReport {
2170 logical_id: logical_id.to_owned(),
2171 was_noop: true,
2172 deleted_node_rows: 0,
2173 deleted_edge_rows: 0,
2174 deleted_chunk_rows: 0,
2175 deleted_fts_rows: 0,
2176 deleted_vec_rows: 0,
2177 notes: vec!["logical_id is active; purge skipped".to_owned()],
2178 });
2179 }
2180
2181 let node_rows: i64 = tx.query_row(
2182 "SELECT count(*) FROM nodes WHERE logical_id = ?1",
2183 [logical_id],
2184 |row| row.get(0),
2185 )?;
2186 if node_rows == 0 {
2187 return Err(EngineError::InvalidWrite(format!(
2188 "logical_id '{logical_id}' does not exist"
2189 )));
2190 }
2191
2192 let deleted_vec_rows = delete_vec_rows_for_logical_id(&tx, logical_id)?;
2193 let deleted_fts_rows = tx.execute(
2194 "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
2195 [logical_id],
2196 )?;
2197 let deleted_edge_rows = tx.execute(
2198 "DELETE FROM edges WHERE source_logical_id = ?1 OR target_logical_id = ?1",
2199 [logical_id],
2200 )?;
2201 let deleted_chunk_rows = tx.execute(
2202 "DELETE FROM chunks WHERE node_logical_id = ?1",
2203 [logical_id],
2204 )?;
2205 let deleted_node_rows =
2206 tx.execute("DELETE FROM nodes WHERE logical_id = ?1", [logical_id])?;
2207 tx.execute(
2208 "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2209 [logical_id],
2210 )?;
2211
2212 persist_simple_provenance_event(
2213 &tx,
2214 "purge_logical_id",
2215 logical_id,
2216 Some(serde_json::json!({
2217 "deleted_node_rows": deleted_node_rows,
2218 "deleted_edge_rows": deleted_edge_rows,
2219 "deleted_chunk_rows": deleted_chunk_rows,
2220 "deleted_fts_rows": deleted_fts_rows,
2221 "deleted_vec_rows": deleted_vec_rows,
2222 })),
2223 )?;
2224 tx.commit()?;
2225
2226 Ok(LogicalPurgeReport {
2227 logical_id: logical_id.to_owned(),
2228 was_noop: false,
2229 deleted_node_rows,
2230 deleted_edge_rows,
2231 deleted_chunk_rows,
2232 deleted_fts_rows,
2233 deleted_vec_rows,
2234 notes: Vec::new(),
2235 })
2236 }
2237
2238 pub fn purge_provenance_events(
2248 &self,
2249 before_timestamp: i64,
2250 options: &ProvenancePurgeOptions,
2251 ) -> Result<ProvenancePurgeReport, EngineError> {
2252 let mut conn = self.connect()?;
2253 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2254
2255 let preserved_types: Vec<&str> = if options.preserve_event_types.is_empty() {
2256 vec!["excise", "purge_logical_id"]
2257 } else {
2258 options
2259 .preserve_event_types
2260 .iter()
2261 .map(String::as_str)
2262 .collect()
2263 };
2264
2265 let placeholders: String = (0..preserved_types.len())
2267 .map(|i| format!("?{}", i + 2))
2268 .collect::<Vec<_>>()
2269 .join(", ");
2270 let count_query = format!(
2271 "SELECT count(*) FROM provenance_events \
2272 WHERE created_at < ?1 AND event_type NOT IN ({placeholders})"
2273 );
2274 let delete_query = format!(
2275 "DELETE FROM provenance_events WHERE rowid IN (\
2276 SELECT rowid FROM provenance_events \
2277 WHERE created_at < ?1 AND event_type NOT IN ({placeholders}) \
2278 LIMIT 10000)"
2279 );
2280
2281 let bind_params = |stmt: &mut rusqlite::Statement<'_>| -> Result<(), rusqlite::Error> {
2282 stmt.raw_bind_parameter(1, before_timestamp)?;
2283 for (i, event_type) in preserved_types.iter().enumerate() {
2284 stmt.raw_bind_parameter(i + 2, *event_type)?;
2285 }
2286 Ok(())
2287 };
2288
2289 let events_deleted = if options.dry_run {
2290 let mut stmt = tx.prepare(&count_query)?;
2291 bind_params(&mut stmt)?;
2292 stmt.raw_query()
2293 .next()?
2294 .map_or(0, |row| row.get::<_, u64>(0).unwrap_or(0))
2295 } else {
2296 let mut total_deleted: u64 = 0;
2297 loop {
2298 let mut stmt = tx.prepare(&delete_query)?;
2299 bind_params(&mut stmt)?;
2300 let deleted = stmt.raw_execute()?;
2301 if deleted == 0 {
2302 break;
2303 }
2304 total_deleted += deleted as u64;
2305 }
2306 total_deleted
2307 };
2308
2309 let total_after: u64 =
2310 tx.query_row("SELECT count(*) FROM provenance_events", [], |row| {
2311 row.get(0)
2312 })?;
2313
2314 let oldest_remaining: Option<i64> = tx
2315 .query_row("SELECT MIN(created_at) FROM provenance_events", [], |row| {
2316 row.get(0)
2317 })
2318 .optional()?
2319 .flatten();
2320
2321 if !options.dry_run {
2322 tx.commit()?;
2323 }
2324
2325 let events_preserved = if options.dry_run {
2328 total_after - events_deleted
2329 } else {
2330 total_after
2331 };
2332
2333 Ok(ProvenancePurgeReport {
2334 events_deleted,
2335 events_preserved,
2336 oldest_remaining,
2337 })
2338 }
2339
2340 #[allow(clippy::too_many_lines)]
2344 pub fn excise_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
2345 let mut conn = self.connect()?;
2346
2347 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
2348 let affected_operational_collections = collect_strings_tx(
2349 &tx,
2350 "SELECT DISTINCT m.collection_name \
2351 FROM operational_mutations m \
2352 JOIN operational_collections c ON c.name = m.collection_name \
2353 WHERE m.source_ref = ?1 AND c.kind = 'latest_state' \
2354 ORDER BY m.collection_name",
2355 source_ref,
2356 )?;
2357
2358 let pairs: Vec<(String, String)> = {
2360 let mut stmt = tx.prepare(
2361 "SELECT row_id, logical_id FROM nodes \
2362 WHERE source_ref = ?1 AND superseded_at IS NULL",
2363 )?;
2364 stmt.query_map([source_ref], |row| {
2365 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2366 })?
2367 .collect::<Result<Vec<_>, _>>()?
2368 };
2369 let affected_logical_ids: Vec<String> = pairs
2370 .iter()
2371 .map(|(_, logical_id)| logical_id.clone())
2372 .collect();
2373
2374 tx.execute(
2376 "UPDATE nodes SET superseded_at = unixepoch() \
2377 WHERE source_ref = ?1 AND superseded_at IS NULL",
2378 [source_ref],
2379 )?;
2380 tx.execute(
2381 "UPDATE edges SET superseded_at = unixepoch() \
2382 WHERE source_ref = ?1 AND superseded_at IS NULL",
2383 [source_ref],
2384 )?;
2385 tx.execute(
2386 "UPDATE actions SET superseded_at = unixepoch() \
2387 WHERE source_ref = ?1 AND superseded_at IS NULL",
2388 [source_ref],
2389 )?;
2390 clear_operational_current_rows(&tx, &affected_operational_collections)?;
2391 tx.execute(
2392 "DELETE FROM operational_mutations WHERE source_ref = ?1",
2393 [source_ref],
2394 )?;
2395 for logical_id in &affected_logical_ids {
2396 delete_vec_rows_for_logical_id(&tx, logical_id)?;
2397 tx.execute(
2398 "DELETE FROM chunks WHERE node_logical_id = ?1",
2399 [logical_id.as_str()],
2400 )?;
2401 }
2402
2403 for (excised_row_id, logical_id) in &pairs {
2405 let prior: Option<String> = tx
2406 .query_row(
2407 "SELECT row_id FROM nodes \
2408 WHERE logical_id = ?1 AND row_id != ?2 \
2409 ORDER BY created_at DESC LIMIT 1",
2410 [logical_id.as_str(), excised_row_id.as_str()],
2411 |row| row.get(0),
2412 )
2413 .optional()?;
2414 if let Some(prior_id) = prior {
2415 tx.execute(
2416 "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2417 [prior_id.as_str()],
2418 )?;
2419 }
2420 }
2421
2422 for logical_id in &affected_logical_ids {
2423 let has_active_node = tx
2424 .query_row(
2425 "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
2426 [logical_id.as_str()],
2427 |row| row.get::<_, i64>(0),
2428 )
2429 .optional()?
2430 .is_some();
2431 if !has_active_node {
2432 tx.execute(
2433 "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2434 [logical_id.as_str()],
2435 )?;
2436 }
2437 }
2438
2439 rebuild_operational_current_rows(&tx, &affected_operational_collections)?;
2440
2441 tx.execute("DELETE FROM fts_nodes", [])?;
2444 tx.execute(
2445 r"
2446 INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content)
2447 SELECT c.id, n.logical_id, n.kind, c.text_content
2448 FROM chunks c
2449 JOIN nodes n
2450 ON n.logical_id = c.node_logical_id
2451 AND n.superseded_at IS NULL
2452 ",
2453 [],
2454 )?;
2455
2456 rebuild_property_fts_in_tx(&tx)?;
2458
2459 tx.execute(
2463 "INSERT INTO provenance_events (id, event_type, subject, source_ref) \
2464 VALUES (?1, 'excise_source', ?2, ?2)",
2465 rusqlite::params![new_id(), source_ref],
2466 )?;
2467
2468 tx.commit()?;
2469
2470 self.trace_source(source_ref)
2471 }
2472
2473 pub fn safe_export(
2477 &self,
2478 destination_path: impl AsRef<Path>,
2479 options: SafeExportOptions,
2480 ) -> Result<SafeExportManifest, EngineError> {
2481 let destination_path = destination_path.as_ref();
2482
2483 let conn = self.connect()?;
2487
2488 if options.force_checkpoint {
2489 trace_info!("safe_export: wal checkpoint started");
2490 let (busy, log, checkpointed): (i64, i64, i64) =
2491 conn.query_row("PRAGMA wal_checkpoint(FULL)", [], |row| {
2492 Ok((row.get(0)?, row.get(1)?, row.get(2)?))
2493 })?;
2494 if busy != 0 {
2495 trace_warn!(
2496 busy,
2497 log_frames = log,
2498 checkpointed_frames = checkpointed,
2499 "safe_export: wal checkpoint blocked by active readers"
2500 );
2501 return Err(EngineError::Bridge(format!(
2502 "WAL checkpoint blocked: {busy} active reader(s) prevented a full checkpoint; \
2503 log frames={log}, checkpointed={checkpointed}; \
2504 retry export when no readers are active"
2505 )));
2506 }
2507 trace_info!(
2508 log_frames = log,
2509 checkpointed_frames = checkpointed,
2510 "safe_export: wal checkpoint completed"
2511 );
2512 }
2513
2514 let schema_version: u32 = conn
2515 .query_row(
2516 "SELECT COALESCE(MAX(version), 0) FROM fathom_schema_migrations",
2517 [],
2518 |row| row.get(0),
2519 )
2520 .unwrap_or(0);
2521
2522 if let Some(parent) = destination_path.parent() {
2525 fs::create_dir_all(parent)?;
2526 }
2527 conn.backup(DatabaseName::Main, destination_path, None)?;
2528
2529 drop(conn);
2530
2531 let page_count: u64 = {
2535 let export_conn = rusqlite::Connection::open_with_flags(
2536 destination_path,
2537 rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY
2538 | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
2539 )?;
2540 export_conn.query_row("PRAGMA page_count", [], |row| row.get(0))?
2541 };
2542
2543 let sha256 = {
2546 let mut file = fs::File::open(destination_path)?;
2547 let mut hasher = Sha256::new();
2548 io::copy(&mut file, &mut hasher)?;
2549 format!("{:x}", hasher.finalize())
2550 };
2551
2552 let exported_at = SystemTime::now()
2554 .duration_since(SystemTime::UNIX_EPOCH)
2555 .map_err(|e| EngineError::Bridge(format!("system clock error: {e}")))?
2556 .as_secs();
2557
2558 let manifest = SafeExportManifest {
2559 exported_at,
2560 sha256,
2561 schema_version,
2562 protocol_version: EXPORT_PROTOCOL_VERSION,
2563 page_count,
2564 };
2565
2566 let manifest_path = {
2568 let mut p = destination_path.to_path_buf();
2569 let stem = p
2570 .file_name()
2571 .map(|n| format!("{}.export-manifest.json", n.to_string_lossy()))
2572 .ok_or_else(|| {
2573 EngineError::Bridge("destination path has no filename".to_owned())
2574 })?;
2575 p.set_file_name(stem);
2576 p
2577 };
2578 let manifest_json =
2579 serde_json::to_string(&manifest).map_err(|e| EngineError::Bridge(e.to_string()))?;
2580
2581 let manifest_tmp = manifest_path.with_extension("json.tmp");
2584 if let Err(e) = fs::write(&manifest_tmp, &manifest_json)
2585 .and_then(|()| fs::rename(&manifest_tmp, &manifest_path))
2586 {
2587 let _ = fs::remove_file(&manifest_tmp);
2588 return Err(e.into());
2589 }
2590
2591 Ok(manifest)
2592 }
2593}
2594
2595#[allow(dead_code)]
2596#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
2597struct VectorEmbeddingContractRecord {
2598 profile: String,
2599 table_name: String,
2600 model_identity: String,
2601 model_version: String,
2602 dimension: usize,
2603 normalization_policy: String,
2604 chunking_policy: String,
2605 preprocessing_policy: String,
2606 generator_command_json: String,
2607 applied_at: i64,
2608 snapshot_hash: String,
2609 contract_format_version: i64,
2610}
2611
2612#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2613struct VectorRegenerationInputChunk {
2614 chunk_id: String,
2615 node_logical_id: String,
2616 kind: String,
2617 text_content: String,
2618 byte_start: Option<i64>,
2619 byte_end: Option<i64>,
2620 source_ref: Option<String>,
2621 created_at: i64,
2622}
2623
2624#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2625struct VectorRegenerationInput {
2626 profile: String,
2627 table_name: String,
2628 model_identity: String,
2629 model_version: String,
2630 dimension: usize,
2631 normalization_policy: String,
2632 chunking_policy: String,
2633 preprocessing_policy: String,
2634 chunks: Vec<VectorRegenerationInputChunk>,
2635}
2636
2637#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2638struct GeneratedEmbedding {
2639 chunk_id: String,
2640 embedding: Vec<f32>,
2641}
2642
2643#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2644struct GeneratedEmbeddings {
2645 embeddings: Vec<GeneratedEmbedding>,
2646}
2647
2648#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2649pub(crate) enum VectorRegenerationFailureClass {
2650 InvalidContract,
2651 PayloadTooLarge,
2652 GeneratorTimeout,
2653 GeneratorStdoutOverflow,
2654 GeneratorStderrOverflow,
2655 GeneratorNonzeroExit,
2656 MalformedGeneratorJson,
2657 SnapshotDrift,
2658 UnsupportedVecCapability,
2659}
2660
2661impl VectorRegenerationFailureClass {
2662 fn label(self) -> &'static str {
2663 match self {
2664 Self::InvalidContract => "invalid contract",
2665 Self::PayloadTooLarge => "payload too large",
2666 Self::GeneratorTimeout => "generator timeout",
2667 Self::GeneratorStdoutOverflow => "generator stdout overflow",
2668 Self::GeneratorStderrOverflow => "generator stderr overflow",
2669 Self::GeneratorNonzeroExit => "generator nonzero exit",
2670 Self::MalformedGeneratorJson => "malformed generator json",
2671 Self::SnapshotDrift => "snapshot drift",
2672 Self::UnsupportedVecCapability => "unsupported vec capability",
2673 }
2674 }
2675
2676 fn retryable(self) -> bool {
2677 matches!(self, Self::SnapshotDrift)
2678 }
2679}
2680
2681#[derive(Clone, Debug, PartialEq, Eq)]
2682pub(crate) struct VectorRegenerationFailure {
2683 class: VectorRegenerationFailureClass,
2684 detail: String,
2685}
2686
2687impl VectorRegenerationFailure {
2688 pub(crate) fn new(class: VectorRegenerationFailureClass, detail: impl Into<String>) -> Self {
2689 Self {
2690 class,
2691 detail: detail.into(),
2692 }
2693 }
2694
2695 fn to_engine_error(&self) -> EngineError {
2696 let retry_suffix = if self.class.retryable() {
2697 " [retryable]"
2698 } else {
2699 ""
2700 };
2701 EngineError::Bridge(format!(
2702 "vector regeneration {}: {}{}",
2703 self.class.label(),
2704 self.detail,
2705 retry_suffix
2706 ))
2707 }
2708
2709 fn failure_class_label(&self) -> &'static str {
2710 self.class.label()
2711 }
2712}
2713
2714#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2715struct VectorRegenerationAuditMetadata {
2716 profile: String,
2717 model_identity: String,
2718 model_version: String,
2719 chunk_count: usize,
2720 snapshot_hash: String,
2721 #[serde(skip_serializing_if = "Option::is_none")]
2722 failure_class: Option<String>,
2723}
2724
2725#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
2726#[serde(tag = "mode", rename_all = "snake_case")]
2727enum OperationalRetentionPolicy {
2728 KeepAll,
2729 PurgeBeforeSeconds { max_age_seconds: i64 },
2730 KeepLast { max_rows: usize },
2731}
2732
2733pub fn load_vector_regeneration_config(
2736 path: impl AsRef<Path>,
2737) -> Result<VectorRegenerationConfig, EngineError> {
2738 let path = path.as_ref();
2739 let raw = fs::read_to_string(path)?;
2740 match path.extension().and_then(|ext| ext.to_str()) {
2741 Some("toml") => {
2742 toml::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2743 }
2744 Some("json") | None => {
2745 serde_json::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2746 }
2747 Some(other) => Err(EngineError::Bridge(format!(
2748 "unsupported vector regeneration config extension: {other}"
2749 ))),
2750 }
2751}
2752
2753fn validate_vector_regeneration_config(
2754 conn: &rusqlite::Connection,
2755 config: &VectorRegenerationConfig,
2756 policy: &VectorGeneratorPolicy,
2757) -> Result<VectorRegenerationConfig, VectorRegenerationFailure> {
2758 let profile = validate_bounded_text("profile", &config.profile, MAX_PROFILE_LEN)?;
2759 let table_name = validate_bounded_text("table_name", &config.table_name, MAX_PROFILE_LEN)?;
2760 if table_name != "vec_nodes_active" {
2761 return Err(VectorRegenerationFailure::new(
2762 VectorRegenerationFailureClass::InvalidContract,
2763 format!("table_name must be vec_nodes_active, got '{table_name}'"),
2764 ));
2765 }
2766 let model_identity = validate_bounded_text(
2767 "model_identity",
2768 &config.model_identity,
2769 MAX_MODEL_IDENTITY_LEN,
2770 )?;
2771 let model_version = validate_bounded_text(
2772 "model_version",
2773 &config.model_version,
2774 MAX_MODEL_VERSION_LEN,
2775 )?;
2776 if config.dimension == 0 {
2777 return Err(VectorRegenerationFailure::new(
2778 VectorRegenerationFailureClass::InvalidContract,
2779 "dimension must be greater than zero".to_owned(),
2780 ));
2781 }
2782 let normalization_policy = validate_bounded_text(
2783 "normalization_policy",
2784 &config.normalization_policy,
2785 MAX_POLICY_LEN,
2786 )?;
2787 let chunking_policy =
2788 validate_bounded_text("chunking_policy", &config.chunking_policy, MAX_POLICY_LEN)?;
2789 let preprocessing_policy = validate_bounded_text(
2790 "preprocessing_policy",
2791 &config.preprocessing_policy,
2792 MAX_POLICY_LEN,
2793 )?;
2794 let generator_command = validate_generator_command(&config.generator_command, policy)?;
2795
2796 if let Some(existing_dimension) = current_vector_profile_dimension(conn, &profile)?
2797 && existing_dimension != config.dimension
2798 {
2799 return Err(VectorRegenerationFailure::new(
2800 VectorRegenerationFailureClass::InvalidContract,
2801 format!(
2802 "dimension {} does not match existing vector profile dimension {}",
2803 config.dimension, existing_dimension
2804 ),
2805 ));
2806 }
2807
2808 validate_existing_contract_version(conn, &profile)?;
2809
2810 let normalized = VectorRegenerationConfig {
2811 profile,
2812 table_name,
2813 model_identity,
2814 model_version,
2815 dimension: config.dimension,
2816 normalization_policy,
2817 chunking_policy,
2818 preprocessing_policy,
2819 generator_command,
2820 };
2821 let serialized = serde_json::to_vec(&normalized).map_err(|error| {
2822 VectorRegenerationFailure::new(
2823 VectorRegenerationFailureClass::InvalidContract,
2824 error.to_string(),
2825 )
2826 })?;
2827 if serialized.len() > MAX_CONTRACT_JSON_BYTES {
2828 return Err(VectorRegenerationFailure::new(
2829 VectorRegenerationFailureClass::InvalidContract,
2830 format!("serialized contract exceeds {MAX_CONTRACT_JSON_BYTES} bytes"),
2831 ));
2832 }
2833
2834 Ok(normalized)
2835}
2836
2837#[allow(clippy::cast_possible_wrap)]
2838fn persist_vector_contract(
2839 conn: &rusqlite::Connection,
2840 config: &VectorRegenerationConfig,
2841 snapshot_hash: &str,
2842) -> Result<(), EngineError> {
2843 let generator_command_json = serde_json::to_string(&config.generator_command)
2844 .map_err(|error| EngineError::Bridge(error.to_string()))?;
2845 conn.execute(
2846 r"
2847 INSERT OR REPLACE INTO vector_embedding_contracts (
2848 profile,
2849 table_name,
2850 model_identity,
2851 model_version,
2852 dimension,
2853 normalization_policy,
2854 chunking_policy,
2855 preprocessing_policy,
2856 generator_command_json,
2857 applied_at,
2858 snapshot_hash,
2859 contract_format_version,
2860 updated_at
2861 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, unixepoch(), ?10, ?11, unixepoch())
2862 ",
2863 rusqlite::params![
2864 config.profile.as_str(),
2865 config.table_name.as_str(),
2866 config.model_identity.as_str(),
2867 config.model_version.as_str(),
2868 config.dimension as i64,
2869 config.normalization_policy.as_str(),
2870 config.chunking_policy.as_str(),
2871 config.preprocessing_policy.as_str(),
2872 generator_command_json,
2873 snapshot_hash,
2874 CURRENT_VECTOR_CONTRACT_FORMAT_VERSION,
2875 ],
2876 )?;
2877 Ok(())
2878}
2879
2880fn persist_vector_regeneration_event(
2881 conn: &rusqlite::Connection,
2882 event_type: &str,
2883 subject: &str,
2884 metadata: &VectorRegenerationAuditMetadata,
2885) -> Result<(), EngineError> {
2886 let metadata_json = serialize_audit_metadata(metadata)?;
2887 conn.execute(
2888 "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2889 rusqlite::params![new_id(), event_type, subject, metadata_json],
2890 )?;
2891 Ok(())
2892}
2893
2894fn persist_simple_provenance_event(
2895 conn: &rusqlite::Connection,
2896 event_type: &str,
2897 subject: &str,
2898 metadata: Option<serde_json::Value>,
2899) -> Result<(), EngineError> {
2900 let metadata_json = metadata.map(|value| value.to_string()).unwrap_or_default();
2901 conn.execute(
2902 "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2903 rusqlite::params![new_id(), event_type, subject, metadata_json],
2904 )?;
2905 Ok(())
2906}
2907
2908fn count_missing_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
2912 let schemas = crate::writer::load_fts_property_schemas(conn)?;
2913 if schemas.is_empty() {
2914 return Ok(0);
2915 }
2916
2917 let mut missing = 0i64;
2918 for (kind, schema) in &schemas {
2919 let mut stmt = conn.prepare(
2920 "SELECT n.logical_id, n.properties FROM nodes n \
2921 WHERE n.kind = ?1 AND n.superseded_at IS NULL \
2922 AND NOT EXISTS (SELECT 1 FROM fts_node_properties fp WHERE fp.node_logical_id = n.logical_id)",
2923 )?;
2924 let rows = stmt.query_map([kind.as_str()], |row| {
2925 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
2926 })?;
2927 for row in rows {
2928 let (_logical_id, properties_str) = row?;
2929 let props: serde_json::Value =
2930 serde_json::from_str(&properties_str).unwrap_or_default();
2931 if crate::writer::extract_property_fts(&props, schema)
2932 .0
2933 .is_some()
2934 {
2935 missing += 1;
2936 }
2937 }
2938 }
2939 Ok(missing)
2940}
2941
2942fn count_drifted_property_fts_rows(conn: &rusqlite::Connection) -> Result<i64, EngineError> {
2947 let schemas = crate::writer::load_fts_property_schemas(conn)?;
2948 if schemas.is_empty() {
2949 return Ok(0);
2950 }
2951
2952 let mut drifted = 0i64;
2953 for (kind, schema) in &schemas {
2954 let mut stmt = conn.prepare(
2955 "SELECT fp.node_logical_id, fp.text_content, n.properties \
2956 FROM fts_node_properties fp \
2957 JOIN nodes n ON n.logical_id = fp.node_logical_id AND n.superseded_at IS NULL \
2958 WHERE fp.kind = ?1 AND n.kind = ?1",
2959 )?;
2960 let rows = stmt.query_map([kind.as_str()], |row| {
2961 Ok((
2962 row.get::<_, String>(0)?,
2963 row.get::<_, String>(1)?,
2964 row.get::<_, String>(2)?,
2965 ))
2966 })?;
2967 for row in rows {
2968 let (_logical_id, stored_text, properties_str) = row?;
2969 let props: serde_json::Value =
2970 serde_json::from_str(&properties_str).unwrap_or_default();
2971 let (expected, _positions, _stats) =
2972 crate::writer::extract_property_fts(&props, schema);
2973 match expected {
2974 Some(text) if text == stored_text => {}
2975 _ => drifted += 1,
2976 }
2977 }
2978 }
2979 Ok(drifted)
2980}
2981
2982fn rebuild_property_fts_in_tx(conn: &rusqlite::Connection) -> Result<usize, EngineError> {
2984 conn.execute("DELETE FROM fts_node_properties", [])?;
2985 conn.execute("DELETE FROM fts_node_property_positions", [])?;
2986 let inserted = crate::projection::insert_property_fts_rows(
2987 conn,
2988 "SELECT logical_id, properties FROM nodes WHERE kind = ?1 AND superseded_at IS NULL",
2989 )?;
2990 Ok(inserted)
2991}
2992
2993fn rebuild_single_node_property_fts(
2996 conn: &rusqlite::Connection,
2997 logical_id: &str,
2998 kind: &str,
2999) -> Result<usize, EngineError> {
3000 let schema: Option<(String, String)> = conn
3001 .query_row(
3002 "SELECT property_paths_json, separator FROM fts_property_schemas WHERE kind = ?1",
3003 [kind],
3004 |row| {
3005 let paths_json: String = row.get(0)?;
3006 let separator: String = row.get(1)?;
3007 Ok((paths_json, separator))
3008 },
3009 )
3010 .optional()?;
3011 let Some((paths_json, separator)) = schema else {
3012 return Ok(0);
3013 };
3014 let parsed = crate::writer::parse_property_schema_json(&paths_json, &separator);
3015 let properties_str: Option<String> = conn
3016 .query_row(
3017 "SELECT properties FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
3018 [logical_id],
3019 |row| row.get(0),
3020 )
3021 .optional()?;
3022 let Some(properties_str) = properties_str else {
3023 return Ok(0);
3024 };
3025 let props: serde_json::Value = serde_json::from_str(&properties_str).unwrap_or_default();
3026 let (text, positions, _stats) = crate::writer::extract_property_fts(&props, &parsed);
3027 let Some(text) = text else {
3028 return Ok(0);
3029 };
3030 conn.execute(
3031 "DELETE FROM fts_node_property_positions WHERE node_logical_id = ?1",
3032 rusqlite::params![logical_id],
3033 )?;
3034 conn.execute(
3035 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) VALUES (?1, ?2, ?3)",
3036 rusqlite::params![logical_id, kind, text],
3037 )?;
3038 for pos in &positions {
3039 conn.execute(
3040 "INSERT INTO fts_node_property_positions \
3041 (node_logical_id, kind, start_offset, end_offset, leaf_path) \
3042 VALUES (?1, ?2, ?3, ?4, ?5)",
3043 rusqlite::params![
3044 logical_id,
3045 kind,
3046 i64::try_from(pos.start_offset).unwrap_or(i64::MAX),
3047 i64::try_from(pos.end_offset).unwrap_or(i64::MAX),
3048 pos.leaf_path,
3049 ],
3050 )?;
3051 }
3052 Ok(1)
3053}
3054
3055fn serialize_property_paths_json(
3056 entries: &[FtsPropertyPathSpec],
3057 exclude_paths: &[String],
3058) -> Result<String, EngineError> {
3059 let all_scalar = entries
3063 .iter()
3064 .all(|e| e.mode == FtsPropertyPathMode::Scalar);
3065 if all_scalar && exclude_paths.is_empty() {
3066 let paths: Vec<&str> = entries.iter().map(|e| e.path.as_str()).collect();
3067 return serde_json::to_string(&paths).map_err(|e| {
3068 EngineError::InvalidWrite(format!("failed to serialize property paths: {e}"))
3069 });
3070 }
3071
3072 let mut obj = serde_json::Map::new();
3073 let paths_json: Vec<serde_json::Value> = entries
3074 .iter()
3075 .map(|e| {
3076 let mode_str = match e.mode {
3077 FtsPropertyPathMode::Scalar => "scalar",
3078 FtsPropertyPathMode::Recursive => "recursive",
3079 };
3080 serde_json::json!({ "path": e.path, "mode": mode_str })
3081 })
3082 .collect();
3083 obj.insert("paths".to_owned(), serde_json::Value::Array(paths_json));
3084 if !exclude_paths.is_empty() {
3085 obj.insert("exclude_paths".to_owned(), serde_json::json!(exclude_paths));
3086 }
3087 serde_json::to_string(&serde_json::Value::Object(obj))
3088 .map_err(|e| EngineError::InvalidWrite(format!("failed to serialize property paths: {e}")))
3089}
3090
3091fn validate_fts_property_paths(paths: &[String]) -> Result<(), EngineError> {
3092 if paths.is_empty() {
3093 return Err(EngineError::InvalidWrite(
3094 "FTS property paths must not be empty".to_owned(),
3095 ));
3096 }
3097 let mut seen = std::collections::HashSet::new();
3098 for path in paths {
3099 if !path.starts_with("$.") {
3100 return Err(EngineError::InvalidWrite(format!(
3101 "FTS property path must start with '$.' but got: {path}"
3102 )));
3103 }
3104 let after_prefix = &path[2..]; let segments: Vec<&str> = after_prefix.split('.').collect();
3106 if segments.is_empty() || segments.iter().any(|s| s.is_empty()) {
3107 return Err(EngineError::InvalidWrite(format!(
3108 "FTS property path has empty segment(s): {path}"
3109 )));
3110 }
3111 for seg in &segments {
3112 if !seg.chars().all(|c| c.is_alphanumeric() || c == '_') {
3113 return Err(EngineError::InvalidWrite(format!(
3114 "FTS property path segment contains invalid characters: {path}"
3115 )));
3116 }
3117 }
3118 if !seen.insert(path) {
3119 return Err(EngineError::InvalidWrite(format!(
3120 "duplicate FTS property path: {path}"
3121 )));
3122 }
3123 }
3124 Ok(())
3125}
3126
3127fn load_fts_property_schema_record(
3128 conn: &rusqlite::Connection,
3129 kind: &str,
3130) -> Result<Option<FtsPropertySchemaRecord>, EngineError> {
3131 let row = conn
3132 .query_row(
3133 "SELECT kind, property_paths_json, separator, format_version \
3134 FROM fts_property_schemas WHERE kind = ?1",
3135 [kind],
3136 |row| {
3137 let kind: String = row.get(0)?;
3138 let paths_json: String = row.get(1)?;
3139 let separator: String = row.get(2)?;
3140 let format_version: i64 = row.get(3)?;
3141 Ok(build_fts_property_schema_record(
3142 kind,
3143 &paths_json,
3144 separator,
3145 format_version,
3146 ))
3147 },
3148 )
3149 .optional()?;
3150 Ok(row)
3151}
3152
3153fn build_fts_property_schema_record(
3159 kind: String,
3160 paths_json: &str,
3161 separator: String,
3162 format_version: i64,
3163) -> FtsPropertySchemaRecord {
3164 let schema = crate::writer::parse_property_schema_json(paths_json, &separator);
3165 let entries: Vec<FtsPropertyPathSpec> = schema
3166 .paths
3167 .into_iter()
3168 .map(|entry| FtsPropertyPathSpec {
3169 path: entry.path,
3170 mode: match entry.mode {
3171 crate::writer::PropertyPathMode::Scalar => FtsPropertyPathMode::Scalar,
3172 crate::writer::PropertyPathMode::Recursive => FtsPropertyPathMode::Recursive,
3173 },
3174 })
3175 .collect();
3176 let property_paths: Vec<String> = entries.iter().map(|e| e.path.clone()).collect();
3177 FtsPropertySchemaRecord {
3178 kind,
3179 property_paths,
3180 entries,
3181 exclude_paths: schema.exclude_paths,
3182 separator,
3183 format_version,
3184 }
3185}
3186
3187fn build_regeneration_input(
3188 config: &VectorRegenerationConfig,
3189 chunks: Vec<VectorRegenerationInputChunk>,
3190) -> VectorRegenerationInput {
3191 VectorRegenerationInput {
3192 profile: config.profile.clone(),
3193 table_name: config.table_name.clone(),
3194 model_identity: config.model_identity.clone(),
3195 model_version: config.model_version.clone(),
3196 dimension: config.dimension,
3197 normalization_policy: config.normalization_policy.clone(),
3198 chunking_policy: config.chunking_policy.clone(),
3199 preprocessing_policy: config.preprocessing_policy.clone(),
3200 chunks,
3201 }
3202}
3203
3204fn compute_snapshot_hash(payload: &VectorRegenerationInput) -> Result<String, EngineError> {
3205 let bytes =
3206 serde_json::to_vec(payload).map_err(|error| EngineError::Bridge(error.to_string()))?;
3207 let mut hasher = Sha256::new();
3208 hasher.update(bytes);
3209 Ok(format!("{:x}", hasher.finalize()))
3210}
3211
3212fn collect_regeneration_chunks(
3213 conn: &rusqlite::Connection,
3214) -> Result<Vec<VectorRegenerationInputChunk>, EngineError> {
3215 let mut stmt = conn.prepare(
3216 r"
3217 SELECT c.id, c.node_logical_id, n.kind, c.text_content, c.byte_start, c.byte_end, n.source_ref, c.created_at
3218 FROM chunks c
3219 JOIN nodes n
3220 ON n.logical_id = c.node_logical_id
3221 AND n.superseded_at IS NULL
3222 ORDER BY c.created_at, c.id
3223 ",
3224 )?;
3225 let chunks = stmt
3226 .query_map([], |row| {
3227 Ok(VectorRegenerationInputChunk {
3228 chunk_id: row.get(0)?,
3229 node_logical_id: row.get(1)?,
3230 kind: row.get(2)?,
3231 text_content: row.get(3)?,
3232 byte_start: row.get(4)?,
3233 byte_end: row.get(5)?,
3234 source_ref: row.get(6)?,
3235 created_at: row.get(7)?,
3236 })
3237 })?
3238 .collect::<Result<Vec<_>, _>>()?;
3239 Ok(chunks)
3240}
3241
3242fn validate_generated_embeddings(
3243 config: &VectorRegenerationConfig,
3244 chunks: &[VectorRegenerationInputChunk],
3245 generated: GeneratedEmbeddings,
3246) -> Result<std::collections::HashMap<String, Vec<u8>>, VectorRegenerationFailure> {
3247 if generated.embeddings.len() != chunks.len() {
3248 return Err(VectorRegenerationFailure::new(
3249 VectorRegenerationFailureClass::MalformedGeneratorJson,
3250 format!(
3251 "generator returned {} embedding(s) for {} chunk(s)",
3252 generated.embeddings.len(),
3253 chunks.len()
3254 ),
3255 ));
3256 }
3257
3258 let mut embedding_map = std::collections::HashMap::new();
3259 for embedding in generated.embeddings {
3260 if embedding.embedding.len() != config.dimension {
3261 return Err(VectorRegenerationFailure::new(
3262 VectorRegenerationFailureClass::MalformedGeneratorJson,
3263 format!(
3264 "embedding for chunk '{}' has dimension {}, expected {}",
3265 embedding.chunk_id,
3266 embedding.embedding.len(),
3267 config.dimension
3268 ),
3269 ));
3270 }
3271 if embedding.embedding.iter().any(|value| !value.is_finite()) {
3272 return Err(VectorRegenerationFailure::new(
3273 VectorRegenerationFailureClass::MalformedGeneratorJson,
3274 format!(
3275 "embedding for chunk '{}' contains non-finite values",
3276 embedding.chunk_id
3277 ),
3278 ));
3279 }
3280 let bytes: Vec<u8> = embedding
3281 .embedding
3282 .iter()
3283 .flat_map(|value| value.to_le_bytes())
3284 .collect();
3285 if embedding_map
3286 .insert(embedding.chunk_id.clone(), bytes)
3287 .is_some()
3288 {
3289 return Err(VectorRegenerationFailure::new(
3290 VectorRegenerationFailureClass::MalformedGeneratorJson,
3291 format!(
3292 "duplicate embedding returned for chunk '{}'",
3293 embedding.chunk_id
3294 ),
3295 ));
3296 }
3297 }
3298
3299 Ok(embedding_map)
3300}
3301
3302fn generator_policy_notes(policy: &VectorGeneratorPolicy) -> Vec<String> {
3303 let mut notes = vec!["vector embeddings regenerated from application contract".to_owned()];
3304 if !policy.allowed_executable_roots.is_empty() {
3305 notes.push("generator executable roots enforced by operator policy".to_owned());
3306 }
3307 if !policy.preserve_env_vars.is_empty() {
3308 notes.push("generator environment reduced to preserved variables".to_owned());
3309 }
3310 notes
3311}
3312
3313enum GeneratorStream {
3314 Stdout,
3315 Stderr,
3316}
3317
3318enum StreamReadResult {
3319 Complete(Vec<u8>),
3320 Overflow,
3321 Io(io::Error),
3322}
3323
3324fn validate_bounded_text(
3325 field: &str,
3326 value: &str,
3327 max_len: usize,
3328) -> Result<String, VectorRegenerationFailure> {
3329 let trimmed = value.trim();
3330 if trimmed.is_empty() {
3331 return Err(VectorRegenerationFailure::new(
3332 VectorRegenerationFailureClass::InvalidContract,
3333 format!("{field} must not be empty"),
3334 ));
3335 }
3336 if trimmed.len() > max_len {
3337 return Err(VectorRegenerationFailure::new(
3338 VectorRegenerationFailureClass::InvalidContract,
3339 format!("{field} exceeds max length {max_len}"),
3340 ));
3341 }
3342 Ok(trimmed.to_owned())
3343}
3344
3345fn validate_generator_command(
3346 command: &[String],
3347 policy: &VectorGeneratorPolicy,
3348) -> Result<Vec<String>, VectorRegenerationFailure> {
3349 if command.is_empty() {
3350 return Err(VectorRegenerationFailure::new(
3351 VectorRegenerationFailureClass::InvalidContract,
3352 "generator_command must contain at least one element".to_owned(),
3353 ));
3354 }
3355 let mut total_len = 0usize;
3356 for argument in command {
3357 if argument.is_empty() {
3358 return Err(VectorRegenerationFailure::new(
3359 VectorRegenerationFailureClass::InvalidContract,
3360 "generator_command entries must not be empty".to_owned(),
3361 ));
3362 }
3363 if argument.len() > MAX_GENERATOR_COMMAND_ARG_LEN {
3364 return Err(VectorRegenerationFailure::new(
3365 VectorRegenerationFailureClass::InvalidContract,
3366 format!(
3367 "generator_command argument exceeds max length {MAX_GENERATOR_COMMAND_ARG_LEN}"
3368 ),
3369 ));
3370 }
3371 total_len += argument.len();
3372 }
3373 if total_len > MAX_GENERATOR_COMMAND_TOTAL_LEN {
3374 return Err(VectorRegenerationFailure::new(
3375 VectorRegenerationFailureClass::InvalidContract,
3376 format!(
3377 "generator_command exceeds max serialized length {MAX_GENERATOR_COMMAND_TOTAL_LEN}"
3378 ),
3379 ));
3380 }
3381 executable_trust::validate_generator_executable(&command[0], policy)?;
3382 Ok(command.to_vec())
3383}
3384
3385fn current_vector_profile_dimension(
3386 conn: &rusqlite::Connection,
3387 profile: &str,
3388) -> Result<Option<usize>, VectorRegenerationFailure> {
3389 let dimension: Option<i64> = conn
3390 .query_row(
3391 "SELECT dimension FROM vector_profiles WHERE profile = ?1 AND enabled = 1",
3392 [profile],
3393 |row| row.get(0),
3394 )
3395 .optional()
3396 .map_err(|error| {
3397 VectorRegenerationFailure::new(
3398 VectorRegenerationFailureClass::InvalidContract,
3399 error.to_string(),
3400 )
3401 })?;
3402 dimension
3403 .map(|value| {
3404 usize::try_from(value).map_err(|_| {
3405 VectorRegenerationFailure::new(
3406 VectorRegenerationFailureClass::InvalidContract,
3407 format!("stored vector profile dimension is invalid: {value}"),
3408 )
3409 })
3410 })
3411 .transpose()
3412}
3413
3414fn validate_existing_contract_version(
3415 conn: &rusqlite::Connection,
3416 profile: &str,
3417) -> Result<(), VectorRegenerationFailure> {
3418 let version: Option<i64> = conn
3419 .query_row(
3420 "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = ?1",
3421 [profile],
3422 |row| row.get(0),
3423 )
3424 .optional()
3425 .map_err(|error| {
3426 VectorRegenerationFailure::new(
3427 VectorRegenerationFailureClass::InvalidContract,
3428 error.to_string(),
3429 )
3430 })?;
3431 if let Some(version) = version
3432 && version > CURRENT_VECTOR_CONTRACT_FORMAT_VERSION
3433 {
3434 return Err(VectorRegenerationFailure::new(
3435 VectorRegenerationFailureClass::InvalidContract,
3436 format!(
3437 "persisted contract format version {version} is unsupported; supported version is {CURRENT_VECTOR_CONTRACT_FORMAT_VERSION}"
3438 ),
3439 ));
3440 }
3441 Ok(())
3442}
3443
3444fn serialize_audit_metadata(
3445 metadata: &VectorRegenerationAuditMetadata,
3446) -> Result<String, EngineError> {
3447 let json =
3448 serde_json::to_string(metadata).map_err(|error| EngineError::Bridge(error.to_string()))?;
3449 if json.len() > MAX_AUDIT_METADATA_BYTES {
3450 return Err(VectorRegenerationFailure::new(
3451 VectorRegenerationFailureClass::InvalidContract,
3452 format!("audit metadata exceeds {MAX_AUDIT_METADATA_BYTES} bytes"),
3453 )
3454 .to_engine_error());
3455 }
3456 Ok(json)
3457}
3458
3459#[allow(clippy::too_many_lines)]
3460fn run_vector_generator_bounded(
3461 config: &VectorRegenerationConfig,
3462 payload: &VectorRegenerationInput,
3463 policy: &VectorGeneratorPolicy,
3464) -> Result<GeneratedEmbeddings, VectorRegenerationFailure> {
3465 if payload.chunks.len() > policy.max_chunks {
3466 return Err(VectorRegenerationFailure::new(
3467 VectorRegenerationFailureClass::PayloadTooLarge,
3468 format!(
3469 "chunk count {} exceeds max_chunks {}",
3470 payload.chunks.len(),
3471 policy.max_chunks
3472 ),
3473 ));
3474 }
3475
3476 let input = serde_json::to_vec(payload).map_err(|error| {
3477 VectorRegenerationFailure::new(
3478 VectorRegenerationFailureClass::MalformedGeneratorJson,
3479 error.to_string(),
3480 )
3481 })?;
3482 if input.len() > policy.max_input_bytes {
3483 return Err(VectorRegenerationFailure::new(
3484 VectorRegenerationFailureClass::PayloadTooLarge,
3485 format!(
3486 "serialized input {} bytes exceeds max_input_bytes {}",
3487 input.len(),
3488 policy.max_input_bytes
3489 ),
3490 ));
3491 }
3492
3493 let mut command = Command::new(config.generator_command.first().ok_or_else(|| {
3494 VectorRegenerationFailure::new(
3495 VectorRegenerationFailureClass::InvalidContract,
3496 "missing generator executable",
3497 )
3498 })?);
3499 command.args(config.generator_command.iter().skip(1));
3500 command.stdin(Stdio::piped());
3501 command.stdout(Stdio::piped());
3502 command.stderr(Stdio::piped());
3503 command.env_clear();
3504 for env_var in &policy.preserve_env_vars {
3505 if let Some(value) = std::env::var_os(env_var) {
3506 command.env(env_var, value);
3507 }
3508 }
3509
3510 let mut child = command.spawn().map_err(|error| {
3511 VectorRegenerationFailure::new(
3512 VectorRegenerationFailureClass::GeneratorNonzeroExit,
3513 format!("failed to spawn generator: {error}"),
3514 )
3515 })?;
3516 if let Some(mut stdin) = child.stdin.take() {
3517 stdin.write_all(&input).map_err(|error| {
3518 VectorRegenerationFailure::new(
3519 VectorRegenerationFailureClass::GeneratorNonzeroExit,
3520 format!("failed to write generator stdin: {error}"),
3521 )
3522 })?;
3523 } else {
3524 return Err(VectorRegenerationFailure::new(
3525 VectorRegenerationFailureClass::GeneratorNonzeroExit,
3526 "failed to open generator stdin",
3527 ));
3528 }
3529
3530 let stdout = child.stdout.take().ok_or_else(|| {
3531 VectorRegenerationFailure::new(
3532 VectorRegenerationFailureClass::GeneratorNonzeroExit,
3533 "failed to open generator stdout",
3534 )
3535 })?;
3536 let stderr = child.stderr.take().ok_or_else(|| {
3537 VectorRegenerationFailure::new(
3538 VectorRegenerationFailureClass::GeneratorNonzeroExit,
3539 "failed to open generator stderr",
3540 )
3541 })?;
3542
3543 let (tx, rx) = mpsc::channel();
3544 let stdout_handle = spawn_capped_reader(
3545 stdout,
3546 policy.max_stdout_bytes,
3547 GeneratorStream::Stdout,
3548 tx.clone(),
3549 );
3550 let stderr_handle =
3551 spawn_capped_reader(stderr, policy.max_stderr_bytes, GeneratorStream::Stderr, tx);
3552
3553 let start = Instant::now();
3554 let timeout = Duration::from_millis(policy.timeout_ms);
3555 let mut stdout_bytes: Option<Vec<u8>> = None;
3556 let mut stderr_bytes: Option<Vec<u8>> = None;
3557 let mut status = None;
3558 let mut stream_error: Option<VectorRegenerationFailure> = None;
3559
3560 while status.is_none() && stream_error.is_none() {
3561 while let Ok((stream, result)) = rx.try_recv() {
3562 match (stream, result) {
3563 (GeneratorStream::Stdout, StreamReadResult::Complete(bytes)) => {
3564 stdout_bytes = Some(bytes);
3565 }
3566 (GeneratorStream::Stderr, StreamReadResult::Complete(bytes)) => {
3567 stderr_bytes = Some(bytes);
3568 }
3569 (GeneratorStream::Stdout, StreamReadResult::Overflow) => {
3570 stream_error = Some(VectorRegenerationFailure::new(
3571 VectorRegenerationFailureClass::GeneratorStdoutOverflow,
3572 format!(
3573 "stdout exceeded max_stdout_bytes {}",
3574 policy.max_stdout_bytes
3575 ),
3576 ));
3577 }
3578 (GeneratorStream::Stderr, StreamReadResult::Overflow) => {
3579 stream_error = Some(VectorRegenerationFailure::new(
3580 VectorRegenerationFailureClass::GeneratorStderrOverflow,
3581 format!(
3582 "stderr exceeded max_stderr_bytes {}",
3583 policy.max_stderr_bytes
3584 ),
3585 ));
3586 }
3587 (_, StreamReadResult::Io(error)) => {
3588 stream_error = Some(VectorRegenerationFailure::new(
3589 VectorRegenerationFailureClass::GeneratorNonzeroExit,
3590 format!("failed to read generator stream: {error}"),
3591 ));
3592 }
3593 }
3594 }
3595
3596 if stream_error.is_some() {
3597 let _ = child.kill();
3598 break;
3599 }
3600 if start.elapsed() > timeout {
3601 let _ = child.kill();
3602 stream_error = Some(VectorRegenerationFailure::new(
3603 VectorRegenerationFailureClass::GeneratorTimeout,
3604 format!("generator exceeded timeout after {}ms", policy.timeout_ms),
3605 ));
3606 break;
3607 }
3608 status = child.try_wait().map_err(|error| {
3609 VectorRegenerationFailure::new(
3610 VectorRegenerationFailureClass::GeneratorNonzeroExit,
3611 format!("failed to poll generator status: {error}"),
3612 )
3613 })?;
3614 if status.is_none() {
3615 thread::sleep(Duration::from_millis(10));
3616 }
3617 }
3618
3619 let _ = child.wait();
3620 let _ = stdout_handle.join();
3621 let _ = stderr_handle.join();
3622
3623 while let Ok((stream, result)) = rx.try_recv() {
3624 match (stream, result) {
3625 (GeneratorStream::Stdout, StreamReadResult::Complete(bytes)) => {
3626 stdout_bytes = Some(bytes);
3627 }
3628 (GeneratorStream::Stderr, StreamReadResult::Complete(bytes)) => {
3629 stderr_bytes = Some(bytes);
3630 }
3631 (GeneratorStream::Stdout, StreamReadResult::Overflow) => {
3632 stream_error = Some(VectorRegenerationFailure::new(
3633 VectorRegenerationFailureClass::GeneratorStdoutOverflow,
3634 format!(
3635 "stdout exceeded max_stdout_bytes {}",
3636 policy.max_stdout_bytes
3637 ),
3638 ));
3639 }
3640 (GeneratorStream::Stderr, StreamReadResult::Overflow) => {
3641 stream_error = Some(VectorRegenerationFailure::new(
3642 VectorRegenerationFailureClass::GeneratorStderrOverflow,
3643 format!(
3644 "stderr exceeded max_stderr_bytes {}",
3645 policy.max_stderr_bytes
3646 ),
3647 ));
3648 }
3649 (_, StreamReadResult::Io(error)) => {
3650 stream_error = Some(VectorRegenerationFailure::new(
3651 VectorRegenerationFailureClass::GeneratorNonzeroExit,
3652 format!("failed to read generator stream: {error}"),
3653 ));
3654 }
3655 }
3656 }
3657
3658 if let Some(error) = stream_error {
3659 return Err(error);
3660 }
3661
3662 let status = status.ok_or_else(|| {
3663 VectorRegenerationFailure::new(
3664 VectorRegenerationFailureClass::GeneratorNonzeroExit,
3665 "vector generator exited without a status",
3666 )
3667 })?;
3668 if !status.success() {
3669 let stderr =
3670 truncate_error_text(&stderr_bytes.unwrap_or_default(), policy.max_stderr_bytes);
3671 return Err(VectorRegenerationFailure::new(
3672 VectorRegenerationFailureClass::GeneratorNonzeroExit,
3673 stderr,
3674 ));
3675 }
3676
3677 let stdout = stdout_bytes.unwrap_or_default();
3678 serde_json::from_slice(&stdout).map_err(|error| {
3679 VectorRegenerationFailure::new(
3680 VectorRegenerationFailureClass::MalformedGeneratorJson,
3681 format!("decode generator output: {error}"),
3682 )
3683 })
3684}
3685
3686fn spawn_capped_reader<R: Read + Send + 'static>(
3687 mut reader: R,
3688 max_bytes: usize,
3689 stream: GeneratorStream,
3690 tx: mpsc::Sender<(GeneratorStream, StreamReadResult)>,
3691) -> thread::JoinHandle<()> {
3692 thread::spawn(move || {
3693 let mut buffer = Vec::new();
3694 let mut chunk = [0u8; 8192];
3695 loop {
3696 match reader.read(&mut chunk) {
3697 Ok(0) => {
3698 let _ = tx.send((stream, StreamReadResult::Complete(buffer)));
3699 break;
3700 }
3701 Ok(read_bytes) => {
3702 if buffer.len() + read_bytes > max_bytes {
3703 let _ = tx.send((stream, StreamReadResult::Overflow));
3704 break;
3705 }
3706 buffer.extend_from_slice(&chunk[..read_bytes]);
3707 }
3708 Err(error) => {
3709 let _ = tx.send((stream, StreamReadResult::Io(error)));
3710 break;
3711 }
3712 }
3713 }
3714 })
3715}
3716
3717fn truncate_error_text(bytes: &[u8], max_bytes: usize) -> String {
3718 let mut text = String::from_utf8_lossy(bytes).into_owned();
3719 if bytes.len() > max_bytes {
3720 text.push_str(" [truncated]");
3721 }
3722 text
3723}
3724
3725fn count_source_ref(
3726 conn: &rusqlite::Connection,
3727 table: &str,
3728 source_ref: &str,
3729) -> Result<usize, EngineError> {
3730 let sql = match table {
3731 "nodes" => "SELECT count(*) FROM nodes WHERE source_ref = ?1",
3732 "edges" => "SELECT count(*) FROM edges WHERE source_ref = ?1",
3733 "actions" => "SELECT count(*) FROM actions WHERE source_ref = ?1",
3734 "operational_mutations" => {
3735 "SELECT count(*) FROM operational_mutations WHERE source_ref = ?1"
3736 }
3737 other => return Err(EngineError::Bridge(format!("unknown table: {other}"))),
3738 };
3739 let count: i64 = conn.query_row(sql, [source_ref], |row| row.get(0))?;
3740 usize::try_from(count)
3743 .map_err(|_| EngineError::Bridge(format!("count overflow for table {table}: {count}")))
3744}
3745
3746fn rebuild_operational_current_rows(
3747 tx: &rusqlite::Transaction<'_>,
3748 collections: &[String],
3749) -> Result<usize, EngineError> {
3750 let mut rebuilt_rows = 0usize;
3751 clear_operational_current_rows(tx, collections)?;
3752 let mut ins_current = tx.prepare_cached(
3753 "INSERT INTO operational_current \
3754 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
3755 VALUES (?1, ?2, ?3, ?4, ?5)",
3756 )?;
3757
3758 for collection in collections {
3759 let mut stmt = tx.prepare(
3760 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
3761 FROM operational_mutations \
3762 WHERE collection_name = ?1 \
3763 ORDER BY record_key, mutation_order",
3764 )?;
3765 let mut latest_by_key: std::collections::HashMap<String, Option<(String, i64, String)>> =
3766 std::collections::HashMap::new();
3767 let rows = stmt.query_map([collection], map_operational_mutation_row)?;
3768 for row in rows {
3769 let mutation = row?;
3770 match mutation.op_kind.as_str() {
3771 "put" => {
3772 latest_by_key.insert(
3773 mutation.record_key,
3774 Some((mutation.payload_json, mutation.created_at, mutation.id)),
3775 );
3776 }
3777 "delete" => {
3778 latest_by_key.insert(mutation.record_key, None);
3779 }
3780 _ => {}
3781 }
3782 }
3783
3784 for (record_key, state) in latest_by_key {
3785 if let Some((payload_json, updated_at, last_mutation_id)) = state {
3786 ins_current.execute(rusqlite::params![
3787 collection,
3788 record_key,
3789 payload_json,
3790 updated_at,
3791 last_mutation_id,
3792 ])?;
3793 rebuilt_rows += 1;
3794 }
3795 }
3796 }
3797
3798 drop(ins_current);
3799 Ok(rebuilt_rows)
3800}
3801
3802fn clear_operational_current_rows(
3803 tx: &rusqlite::Transaction<'_>,
3804 collections: &[String],
3805) -> Result<(), EngineError> {
3806 let mut delete_current =
3807 tx.prepare_cached("DELETE FROM operational_current WHERE collection_name = ?1")?;
3808 let mut delete_secondary_current = tx.prepare_cached(
3809 "DELETE FROM operational_secondary_index_entries \
3810 WHERE collection_name = ?1 AND subject_kind = 'current'",
3811 )?;
3812 for collection in collections {
3813 delete_secondary_current.execute([collection])?;
3814 delete_current.execute([collection])?;
3815 }
3816 drop(delete_secondary_current);
3817 drop(delete_current);
3818 Ok(())
3819}
3820
3821fn clear_operational_secondary_index_entries(
3822 tx: &rusqlite::Transaction<'_>,
3823 collection_name: &str,
3824) -> Result<(), EngineError> {
3825 tx.execute(
3826 "DELETE FROM operational_secondary_index_entries WHERE collection_name = ?1",
3827 [collection_name],
3828 )?;
3829 Ok(())
3830}
3831
3832fn insert_operational_secondary_index_entry(
3833 tx: &rusqlite::Transaction<'_>,
3834 collection_name: &str,
3835 subject_kind: &str,
3836 mutation_id: &str,
3837 record_key: &str,
3838 entry: &crate::operational::OperationalSecondaryIndexEntry,
3839) -> Result<(), EngineError> {
3840 tx.execute(
3841 "INSERT INTO operational_secondary_index_entries \
3842 (collection_name, index_name, subject_kind, mutation_id, record_key, sort_timestamp, \
3843 slot1_text, slot1_integer, slot2_text, slot2_integer, slot3_text, slot3_integer) \
3844 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
3845 rusqlite::params![
3846 collection_name,
3847 entry.index_name,
3848 subject_kind,
3849 mutation_id,
3850 record_key,
3851 entry.sort_timestamp,
3852 entry.slot1_text,
3853 entry.slot1_integer,
3854 entry.slot2_text,
3855 entry.slot2_integer,
3856 entry.slot3_text,
3857 entry.slot3_integer,
3858 ],
3859 )?;
3860 Ok(())
3861}
3862
3863fn rebuild_operational_secondary_index_entries(
3864 tx: &rusqlite::Transaction<'_>,
3865 collection_name: &str,
3866 collection_kind: OperationalCollectionKind,
3867 indexes: &[OperationalSecondaryIndexDefinition],
3868) -> Result<(usize, usize), EngineError> {
3869 clear_operational_secondary_index_entries(tx, collection_name)?;
3870
3871 let mut mutation_entries_rebuilt = 0usize;
3872 if collection_kind == OperationalCollectionKind::AppendOnlyLog {
3873 let mut stmt = tx.prepare(
3874 "SELECT id, record_key, payload_json FROM operational_mutations \
3875 WHERE collection_name = ?1 ORDER BY mutation_order",
3876 )?;
3877 let rows = stmt
3878 .query_map([collection_name], |row| {
3879 Ok((
3880 row.get::<_, String>(0)?,
3881 row.get::<_, String>(1)?,
3882 row.get::<_, String>(2)?,
3883 ))
3884 })?
3885 .collect::<Result<Vec<_>, _>>()?;
3886 drop(stmt);
3887 for (mutation_id, record_key, payload_json) in rows {
3888 for entry in extract_secondary_index_entries_for_mutation(indexes, &payload_json) {
3889 insert_operational_secondary_index_entry(
3890 tx,
3891 collection_name,
3892 "mutation",
3893 &mutation_id,
3894 &record_key,
3895 &entry,
3896 )?;
3897 mutation_entries_rebuilt += 1;
3898 }
3899 }
3900 }
3901
3902 let mut current_entries_rebuilt = 0usize;
3903 if collection_kind == OperationalCollectionKind::LatestState {
3904 let mut stmt = tx.prepare(
3905 "SELECT record_key, payload_json, updated_at, last_mutation_id FROM operational_current \
3906 WHERE collection_name = ?1 ORDER BY updated_at DESC, record_key",
3907 )?;
3908 let rows = stmt
3909 .query_map([collection_name], |row| {
3910 Ok((
3911 row.get::<_, String>(0)?,
3912 row.get::<_, String>(1)?,
3913 row.get::<_, i64>(2)?,
3914 row.get::<_, String>(3)?,
3915 ))
3916 })?
3917 .collect::<Result<Vec<_>, _>>()?;
3918 drop(stmt);
3919 for (record_key, payload_json, updated_at, last_mutation_id) in rows {
3920 for entry in
3921 extract_secondary_index_entries_for_current(indexes, &payload_json, updated_at)
3922 {
3923 insert_operational_secondary_index_entry(
3924 tx,
3925 collection_name,
3926 "current",
3927 &last_mutation_id,
3928 &record_key,
3929 &entry,
3930 )?;
3931 current_entries_rebuilt += 1;
3932 }
3933 }
3934 }
3935
3936 Ok((mutation_entries_rebuilt, current_entries_rebuilt))
3937}
3938
3939fn collect_strings_tx(
3940 tx: &rusqlite::Transaction<'_>,
3941 sql: &str,
3942 value: &str,
3943) -> Result<Vec<String>, EngineError> {
3944 let mut stmt = tx.prepare(sql)?;
3945 let rows = stmt.query_map([value], |row| row.get::<_, String>(0))?;
3946 rows.collect::<Result<Vec<_>, _>>()
3947 .map_err(EngineError::from)
3948}
3949
3950#[allow(clippy::expect_used)]
3953fn i64_to_usize(val: i64) -> usize {
3954 usize::try_from(val).expect("count(*) must be non-negative")
3955}
3956
3957fn collect_strings(
3964 conn: &rusqlite::Connection,
3965 sql: &str,
3966 param: &str,
3967) -> Result<Vec<String>, EngineError> {
3968 let mut stmt = conn.prepare(sql)?;
3969 let values = stmt
3970 .query_map([param], |row| row.get::<_, String>(0))?
3971 .collect::<Result<Vec<_>, _>>()?;
3972 Ok(values)
3973}
3974
3975fn collect_edge_logical_ids_for_restore(
3976 tx: &rusqlite::Transaction<'_>,
3977 logical_id: &str,
3978 retire_source_ref: Option<&str>,
3979 retire_created_at: i64,
3980 retire_event_rowid: i64,
3981) -> Result<Vec<String>, EngineError> {
3982 let mut stmt = tx.prepare(
3983 "SELECT DISTINCT e.logical_id \
3984 FROM edges e \
3985 JOIN provenance_events p \
3986 ON p.subject = e.logical_id \
3987 AND p.event_type = 'edge_retire' \
3988 AND ( \
3989 p.created_at > ?3 \
3990 OR (p.created_at = ?3 AND p.rowid >= ?4) \
3991 ) \
3992 AND ((?2 IS NULL AND p.source_ref IS NULL) OR p.source_ref = ?2) \
3993 WHERE e.superseded_at IS NOT NULL \
3994 AND (e.source_logical_id = ?1 OR e.target_logical_id = ?1) \
3995 AND NOT EXISTS ( \
3996 SELECT 1 FROM edges active \
3997 WHERE active.logical_id = e.logical_id \
3998 AND active.superseded_at IS NULL \
3999 ) \
4000 ORDER BY e.logical_id",
4001 )?;
4002 let edge_ids = stmt
4003 .query_map(
4004 rusqlite::params![
4005 logical_id,
4006 retire_source_ref,
4007 retire_created_at,
4008 retire_event_rowid
4009 ],
4010 |row| row.get::<_, String>(0),
4011 )?
4012 .collect::<Result<Vec<_>, _>>()?;
4013 Ok(edge_ids)
4014}
4015
4016fn restore_validated_edges(
4019 tx: &rusqlite::Transaction<'_>,
4020 logical_id: &str,
4021 retire_source_ref: Option<&str>,
4022 retire_created_at: i64,
4023 retire_event_rowid: i64,
4024) -> Result<(usize, Vec<SkippedEdge>), EngineError> {
4025 let edge_logical_ids = collect_edge_logical_ids_for_restore(
4026 tx,
4027 logical_id,
4028 retire_source_ref,
4029 retire_created_at,
4030 retire_event_rowid,
4031 )?;
4032 let mut restored = 0usize;
4033 let mut skipped = Vec::new();
4034 for edge_logical_id in &edge_logical_ids {
4035 let edge_detail: Option<(String, String, String)> = tx
4036 .query_row(
4037 "SELECT row_id, source_logical_id, target_logical_id FROM edges \
4038 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
4039 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
4040 [edge_logical_id.as_str()],
4041 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
4042 )
4043 .optional()?;
4044 let Some((edge_row_id, source_lid, target_lid)) = edge_detail else {
4045 continue;
4046 };
4047 let other_endpoint = if source_lid == logical_id {
4048 &target_lid
4049 } else {
4050 &source_lid
4051 };
4052 let endpoint_active: bool = tx
4053 .query_row(
4054 "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
4055 [other_endpoint.as_str()],
4056 |_| Ok(true),
4057 )
4058 .optional()?
4059 .unwrap_or(false);
4060 if !endpoint_active {
4061 skipped.push(SkippedEdge {
4062 edge_logical_id: edge_logical_id.clone(),
4063 missing_endpoint: other_endpoint.clone(),
4064 });
4065 continue;
4066 }
4067 restored += tx.execute(
4068 "UPDATE edges SET superseded_at = NULL WHERE row_id = ?1",
4069 [edge_row_id.as_str()],
4070 )?;
4071 }
4072 Ok((restored, skipped))
4073}
4074
4075#[cfg(feature = "sqlite-vec")]
4076fn count_vec_rows_for_logical_id(
4077 tx: &rusqlite::Transaction<'_>,
4078 logical_id: &str,
4079) -> Result<usize, EngineError> {
4080 match tx.query_row(
4081 "SELECT count(*) FROM vec_nodes_active v \
4082 JOIN chunks c ON c.id = v.chunk_id \
4083 WHERE c.node_logical_id = ?1",
4084 [logical_id],
4085 |row| row.get::<_, i64>(0),
4086 ) {
4087 Ok(count) => Ok(i64_to_usize(count)),
4088 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
4089 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
4090 {
4091 Ok(0)
4092 }
4093 Err(error) => Err(EngineError::Sqlite(error)),
4094 }
4095}
4096
4097#[cfg(not(feature = "sqlite-vec"))]
4098#[allow(clippy::unnecessary_wraps)]
4099fn count_vec_rows_for_logical_id(
4100 _tx: &rusqlite::Transaction<'_>,
4101 _logical_id: &str,
4102) -> Result<usize, EngineError> {
4103 Ok(0)
4104}
4105
4106#[cfg(feature = "sqlite-vec")]
4107fn delete_vec_rows_for_logical_id(
4108 tx: &rusqlite::Transaction<'_>,
4109 logical_id: &str,
4110) -> Result<usize, EngineError> {
4111 match tx.execute(
4112 "DELETE FROM vec_nodes_active \
4113 WHERE chunk_id IN (SELECT id FROM chunks WHERE node_logical_id = ?1)",
4114 [logical_id],
4115 ) {
4116 Ok(count) => Ok(count),
4117 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
4118 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
4119 {
4120 Ok(0)
4121 }
4122 Err(error) => Err(EngineError::Sqlite(error)),
4123 }
4124}
4125
4126#[cfg(not(feature = "sqlite-vec"))]
4127#[allow(clippy::unnecessary_wraps)]
4128fn delete_vec_rows_for_logical_id(
4129 _tx: &rusqlite::Transaction<'_>,
4130 _logical_id: &str,
4131) -> Result<usize, EngineError> {
4132 Ok(0)
4133}
4134
4135fn ensure_operational_collection_registered(
4136 conn: &rusqlite::Connection,
4137 collection_name: &str,
4138) -> Result<(), EngineError> {
4139 if load_operational_collection_record(conn, collection_name)?.is_none() {
4140 return Err(EngineError::InvalidWrite(format!(
4141 "operational collection '{collection_name}' is not registered"
4142 )));
4143 }
4144 Ok(())
4145}
4146
4147fn load_operational_collection_record(
4148 conn: &rusqlite::Connection,
4149 name: &str,
4150) -> Result<Option<OperationalCollectionRecord>, EngineError> {
4151 conn.query_row(
4152 "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4153 FROM operational_collections WHERE name = ?1",
4154 [name],
4155 map_operational_collection_row,
4156 )
4157 .optional()
4158 .map_err(EngineError::Sqlite)
4159}
4160
4161fn validate_append_only_operational_collection(
4162 record: &OperationalCollectionRecord,
4163 operation: &str,
4164) -> Result<(), EngineError> {
4165 if record.kind != OperationalCollectionKind::AppendOnlyLog {
4166 return Err(EngineError::InvalidWrite(format!(
4167 "operational collection '{}' must be append_only_log to {operation}",
4168 record.name
4169 )));
4170 }
4171 Ok(())
4172}
4173
4174#[derive(Clone, Debug, PartialEq, Eq)]
4175struct CompiledOperationalReadFilter {
4176 field: String,
4177 condition: OperationalReadCondition,
4178}
4179
4180#[derive(Clone, Debug)]
4181struct MatchedAppendOnlySecondaryIndexRead<'a> {
4182 index_name: &'a str,
4183 value_filter: &'a CompiledOperationalReadFilter,
4184 time_range: Option<&'a CompiledOperationalReadFilter>,
4185}
4186
4187#[derive(Clone, Debug, PartialEq, Eq)]
4188enum OperationalReadCondition {
4189 ExactString(String),
4190 ExactInteger(i64),
4191 Prefix(String),
4192 Range {
4193 lower: Option<i64>,
4194 upper: Option<i64>,
4195 },
4196}
4197
4198fn operational_read_limit(limit: Option<usize>) -> Result<usize, EngineError> {
4199 let applied_limit = limit.unwrap_or(DEFAULT_OPERATIONAL_READ_LIMIT);
4200 if applied_limit == 0 {
4201 return Err(EngineError::InvalidWrite(
4202 "operational read limit must be greater than zero".to_owned(),
4203 ));
4204 }
4205 Ok(applied_limit.min(MAX_OPERATIONAL_READ_LIMIT))
4206}
4207
4208fn parse_operational_filter_fields(
4209 filter_fields_json: &str,
4210) -> Result<Vec<OperationalFilterField>, String> {
4211 let fields: Vec<OperationalFilterField> = serde_json::from_str(filter_fields_json)
4212 .map_err(|error| format!("invalid filter_fields_json: {error}"))?;
4213 let mut seen = std::collections::HashSet::new();
4214 for field in &fields {
4215 if field.name.trim().is_empty() {
4216 return Err("filter_fields_json field names must not be empty".to_owned());
4217 }
4218 if !seen.insert(field.name.as_str()) {
4219 return Err(format!(
4220 "filter_fields_json contains duplicate field '{}'",
4221 field.name
4222 ));
4223 }
4224 if field.modes.is_empty() {
4225 return Err(format!(
4226 "filter_fields_json field '{}' must declare at least one mode",
4227 field.name
4228 ));
4229 }
4230 if field.modes.contains(&OperationalFilterMode::Prefix)
4231 && field.field_type != OperationalFilterFieldType::String
4232 {
4233 return Err(format!(
4234 "filter field '{}' only supports prefix for string types",
4235 field.name
4236 ));
4237 }
4238 }
4239 Ok(fields)
4240}
4241
4242fn compile_operational_read_filters(
4243 filters: &[OperationalFilterClause],
4244 declared_fields: &[OperationalFilterField],
4245) -> Result<Vec<CompiledOperationalReadFilter>, EngineError> {
4246 let field_map = declared_fields
4247 .iter()
4248 .map(|field| (field.name.as_str(), field))
4249 .collect::<std::collections::HashMap<_, _>>();
4250 filters
4251 .iter()
4252 .map(|filter| match filter {
4253 OperationalFilterClause::Exact { field, value } => {
4254 let declared = field_map.get(field.as_str()).ok_or_else(|| {
4255 EngineError::InvalidWrite(format!(
4256 "operational read filter uses undeclared field '{field}'"
4257 ))
4258 })?;
4259 if !declared.modes.contains(&OperationalFilterMode::Exact) {
4260 return Err(EngineError::InvalidWrite(format!(
4261 "operational read field '{field}' does not allow exact filters"
4262 )));
4263 }
4264 let condition = match (declared.field_type, value) {
4265 (OperationalFilterFieldType::String, OperationalFilterValue::String(value)) => {
4266 OperationalReadCondition::ExactString(value.clone())
4267 }
4268 (
4269 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp,
4270 OperationalFilterValue::Integer(value),
4271 ) => OperationalReadCondition::ExactInteger(*value),
4272 _ => {
4273 return Err(EngineError::InvalidWrite(format!(
4274 "operational read field '{field}' received a value with the wrong type"
4275 )));
4276 }
4277 };
4278 Ok(CompiledOperationalReadFilter {
4279 field: field.clone(),
4280 condition,
4281 })
4282 }
4283 OperationalFilterClause::Prefix { field, value } => {
4284 let declared = field_map.get(field.as_str()).ok_or_else(|| {
4285 EngineError::InvalidWrite(format!(
4286 "operational read filter uses undeclared field '{field}'"
4287 ))
4288 })?;
4289 if !declared.modes.contains(&OperationalFilterMode::Prefix) {
4290 return Err(EngineError::InvalidWrite(format!(
4291 "operational read field '{field}' does not allow prefix filters"
4292 )));
4293 }
4294 if declared.field_type != OperationalFilterFieldType::String {
4295 return Err(EngineError::InvalidWrite(format!(
4296 "operational read field '{field}' only supports prefix filters for strings"
4297 )));
4298 }
4299 Ok(CompiledOperationalReadFilter {
4300 field: field.clone(),
4301 condition: OperationalReadCondition::Prefix(value.clone()),
4302 })
4303 }
4304 OperationalFilterClause::Range {
4305 field,
4306 lower,
4307 upper,
4308 } => {
4309 let declared = field_map.get(field.as_str()).ok_or_else(|| {
4310 EngineError::InvalidWrite(format!(
4311 "operational read filter uses undeclared field '{field}'"
4312 ))
4313 })?;
4314 if !declared.modes.contains(&OperationalFilterMode::Range) {
4315 return Err(EngineError::InvalidWrite(format!(
4316 "operational read field '{field}' does not allow range filters"
4317 )));
4318 }
4319 if !matches!(
4320 declared.field_type,
4321 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp
4322 ) {
4323 return Err(EngineError::InvalidWrite(format!(
4324 "operational read field '{field}' only supports range filters for integer/timestamp fields"
4325 )));
4326 }
4327 if lower.is_none() && upper.is_none() {
4328 return Err(EngineError::InvalidWrite(format!(
4329 "operational read range filter for '{field}' must specify a lower or upper bound"
4330 )));
4331 }
4332 Ok(CompiledOperationalReadFilter {
4333 field: field.clone(),
4334 condition: OperationalReadCondition::Range {
4335 lower: *lower,
4336 upper: *upper,
4337 },
4338 })
4339 }
4340 })
4341 .collect()
4342}
4343
4344fn match_append_only_secondary_index_read<'a>(
4345 filters: &'a [CompiledOperationalReadFilter],
4346 indexes: &'a [OperationalSecondaryIndexDefinition],
4347) -> Option<MatchedAppendOnlySecondaryIndexRead<'a>> {
4348 indexes.iter().find_map(|index| {
4349 let OperationalSecondaryIndexDefinition::AppendOnlyFieldTime {
4350 name,
4351 field,
4352 value_type,
4353 time_field,
4354 } = index
4355 else {
4356 return None;
4357 };
4358 if !(1..=2).contains(&filters.len()) {
4359 return None;
4360 }
4361
4362 let mut value_filter = None;
4363 let mut time_range = None;
4364 for filter in filters {
4365 if filter.field == *field {
4366 let supported = matches!(
4367 (&filter.condition, value_type),
4368 (
4369 OperationalReadCondition::ExactString(_)
4370 | OperationalReadCondition::Prefix(_),
4371 crate::operational::OperationalSecondaryIndexValueType::String
4372 ) | (
4373 OperationalReadCondition::ExactInteger(_),
4374 crate::operational::OperationalSecondaryIndexValueType::Integer
4375 | crate::operational::OperationalSecondaryIndexValueType::Timestamp
4376 )
4377 );
4378 if !supported || value_filter.is_some() {
4379 return None;
4380 }
4381 value_filter = Some(filter);
4382 continue;
4383 }
4384 if filter.field == *time_field {
4385 if !matches!(filter.condition, OperationalReadCondition::Range { .. })
4386 || time_range.is_some()
4387 {
4388 return None;
4389 }
4390 time_range = Some(filter);
4391 continue;
4392 }
4393 return None;
4394 }
4395
4396 value_filter.map(|value_filter| MatchedAppendOnlySecondaryIndexRead {
4397 index_name: name.as_str(),
4398 value_filter,
4399 time_range,
4400 })
4401 })
4402}
4403
4404fn execute_operational_secondary_index_read(
4405 conn: &rusqlite::Connection,
4406 collection_name: &str,
4407 filters: &[CompiledOperationalReadFilter],
4408 indexes: &[OperationalSecondaryIndexDefinition],
4409 applied_limit: usize,
4410) -> Result<Option<OperationalReadReport>, EngineError> {
4411 use rusqlite::types::Value;
4412
4413 let Some(matched) = match_append_only_secondary_index_read(filters, indexes) else {
4414 return Ok(None);
4415 };
4416
4417 let mut sql = String::from(
4418 "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4419 FROM operational_secondary_index_entries s \
4420 JOIN operational_mutations m ON m.id = s.mutation_id \
4421 WHERE s.collection_name = ?1 AND s.index_name = ?2 AND s.subject_kind = 'mutation' ",
4422 );
4423 let mut params = vec![
4424 Value::from(collection_name.to_owned()),
4425 Value::from(matched.index_name.to_owned()),
4426 ];
4427
4428 match &matched.value_filter.condition {
4429 OperationalReadCondition::ExactString(value) => {
4430 let _ = write!(sql, "AND s.slot1_text = ?{} ", params.len() + 1);
4431 params.push(Value::from(value.clone()));
4432 }
4433 OperationalReadCondition::Prefix(value) => {
4434 let _ = write!(sql, "AND s.slot1_text GLOB ?{} ", params.len() + 1);
4435 params.push(Value::from(glob_prefix_pattern(value)));
4436 }
4437 OperationalReadCondition::ExactInteger(value) => {
4438 let _ = write!(sql, "AND s.slot1_integer = ?{} ", params.len() + 1);
4439 params.push(Value::from(*value));
4440 }
4441 OperationalReadCondition::Range { .. } => return Ok(None),
4442 }
4443
4444 if let Some(time_range) = matched.time_range
4445 && let OperationalReadCondition::Range { lower, upper } = &time_range.condition
4446 {
4447 if let Some(lower) = lower {
4448 let _ = write!(sql, "AND s.sort_timestamp >= ?{} ", params.len() + 1);
4449 params.push(Value::from(*lower));
4450 }
4451 if let Some(upper) = upper {
4452 let _ = write!(sql, "AND s.sort_timestamp <= ?{} ", params.len() + 1);
4453 params.push(Value::from(*upper));
4454 }
4455 }
4456
4457 let _ = write!(
4458 sql,
4459 "ORDER BY s.sort_timestamp DESC, m.mutation_order DESC LIMIT ?{}",
4460 params.len() + 1
4461 );
4462 params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4463 |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4464 )?));
4465
4466 let mut stmt = conn.prepare(&sql)?;
4467 let mut rows = stmt
4468 .query_map(
4469 rusqlite::params_from_iter(params),
4470 map_operational_mutation_row,
4471 )?
4472 .collect::<Result<Vec<_>, _>>()?;
4473 let was_limited = rows.len() > applied_limit;
4474 if was_limited {
4475 rows.truncate(applied_limit);
4476 }
4477
4478 Ok(Some(OperationalReadReport {
4479 collection_name: collection_name.to_owned(),
4480 row_count: rows.len(),
4481 applied_limit,
4482 was_limited,
4483 rows,
4484 }))
4485}
4486
4487fn execute_operational_filtered_read(
4488 conn: &rusqlite::Connection,
4489 collection_name: &str,
4490 filters: &[CompiledOperationalReadFilter],
4491 applied_limit: usize,
4492) -> Result<OperationalReadReport, EngineError> {
4493 use rusqlite::types::Value;
4494
4495 let mut sql = String::from(
4496 "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
4497 FROM operational_mutations m ",
4498 );
4499 let mut params = vec![Value::from(collection_name.to_owned())];
4500 for (index, filter) in filters.iter().enumerate() {
4501 let _ = write!(
4502 sql,
4503 "JOIN operational_filter_values f{index} \
4504 ON f{index}.mutation_id = m.id \
4505 AND f{index}.collection_name = m.collection_name "
4506 );
4507 match &filter.condition {
4508 OperationalReadCondition::ExactString(value) => {
4509 let _ = write!(
4510 sql,
4511 "AND f{index}.field_name = ?{} AND f{index}.string_value = ?{} ",
4512 params.len() + 1,
4513 params.len() + 2
4514 );
4515 params.push(Value::from(filter.field.clone()));
4516 params.push(Value::from(value.clone()));
4517 }
4518 OperationalReadCondition::ExactInteger(value) => {
4519 let _ = write!(
4520 sql,
4521 "AND f{index}.field_name = ?{} AND f{index}.integer_value = ?{} ",
4522 params.len() + 1,
4523 params.len() + 2
4524 );
4525 params.push(Value::from(filter.field.clone()));
4526 params.push(Value::from(*value));
4527 }
4528 OperationalReadCondition::Prefix(value) => {
4529 let _ = write!(
4530 sql,
4531 "AND f{index}.field_name = ?{} AND f{index}.string_value GLOB ?{} ",
4532 params.len() + 1,
4533 params.len() + 2
4534 );
4535 params.push(Value::from(filter.field.clone()));
4536 params.push(Value::from(glob_prefix_pattern(value)));
4537 }
4538 OperationalReadCondition::Range { lower, upper } => {
4539 let _ = write!(sql, "AND f{index}.field_name = ?{} ", params.len() + 1);
4540 params.push(Value::from(filter.field.clone()));
4541 if let Some(lower) = lower {
4542 let _ = write!(sql, "AND f{index}.integer_value >= ?{} ", params.len() + 1);
4543 params.push(Value::from(*lower));
4544 }
4545 if let Some(upper) = upper {
4546 let _ = write!(sql, "AND f{index}.integer_value <= ?{} ", params.len() + 1);
4547 params.push(Value::from(*upper));
4548 }
4549 }
4550 }
4551 }
4552 let _ = write!(
4553 sql,
4554 "WHERE m.collection_name = ?1 ORDER BY m.mutation_order DESC LIMIT ?{}",
4555 params.len() + 1
4556 );
4557 params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
4558 |_| EngineError::Bridge("operational read limit overflow".to_owned()),
4559 )?));
4560
4561 let mut stmt = conn.prepare(&sql)?;
4562 let mut rows = stmt
4563 .query_map(
4564 rusqlite::params_from_iter(params),
4565 map_operational_mutation_row,
4566 )?
4567 .collect::<Result<Vec<_>, _>>()?;
4568 let was_limited = rows.len() > applied_limit;
4569 if was_limited {
4570 rows.truncate(applied_limit);
4571 }
4572 Ok(OperationalReadReport {
4573 collection_name: collection_name.to_owned(),
4574 row_count: rows.len(),
4575 applied_limit,
4576 was_limited,
4577 rows,
4578 })
4579}
4580
4581fn glob_prefix_pattern(value: &str) -> String {
4582 let mut pattern = String::with_capacity(value.len() + 1);
4583 for ch in value.chars() {
4584 match ch {
4585 '*' => pattern.push_str("[*]"),
4586 '?' => pattern.push_str("[?]"),
4587 '[' => pattern.push_str("[[]"),
4588 _ => pattern.push(ch),
4589 }
4590 }
4591 pattern.push('*');
4592 pattern
4593}
4594
4595#[derive(Clone, Debug, PartialEq, Eq)]
4596struct ExtractedOperationalFilterValue {
4597 field_name: String,
4598 string_value: Option<String>,
4599 integer_value: Option<i64>,
4600}
4601
4602fn extract_operational_filter_values(
4603 filter_fields: &[OperationalFilterField],
4604 payload_json: &str,
4605) -> Vec<ExtractedOperationalFilterValue> {
4606 let Ok(parsed) = serde_json::from_str::<serde_json::Value>(payload_json) else {
4607 return Vec::new();
4608 };
4609 let Some(object) = parsed.as_object() else {
4610 return Vec::new();
4611 };
4612
4613 filter_fields
4614 .iter()
4615 .filter_map(|field| {
4616 let value = object.get(&field.name)?;
4617 match field.field_type {
4618 OperationalFilterFieldType::String => {
4619 value
4620 .as_str()
4621 .map(|string_value| ExtractedOperationalFilterValue {
4622 field_name: field.name.clone(),
4623 string_value: Some(string_value.to_owned()),
4624 integer_value: None,
4625 })
4626 }
4627 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp => {
4628 value
4629 .as_i64()
4630 .map(|integer_value| ExtractedOperationalFilterValue {
4631 field_name: field.name.clone(),
4632 string_value: None,
4633 integer_value: Some(integer_value),
4634 })
4635 }
4636 }
4637 })
4638 .collect()
4639}
4640
4641fn operational_compaction_candidates(
4642 conn: &rusqlite::Connection,
4643 retention_json: &str,
4644 collection_name: &str,
4645) -> Result<(Vec<String>, Option<i64>), EngineError> {
4646 operational_compaction_candidates_at(
4647 conn,
4648 retention_json,
4649 collection_name,
4650 current_unix_timestamp()?,
4651 )
4652}
4653
4654fn operational_compaction_candidates_at(
4655 conn: &rusqlite::Connection,
4656 retention_json: &str,
4657 collection_name: &str,
4658 now_timestamp: i64,
4659) -> Result<(Vec<String>, Option<i64>), EngineError> {
4660 let policy = parse_operational_retention_policy(retention_json)?;
4661 match policy {
4662 OperationalRetentionPolicy::KeepAll => Ok((Vec::new(), None)),
4663 OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4664 let before_timestamp = now_timestamp - max_age_seconds;
4665 let mut stmt = conn.prepare(
4666 "SELECT id FROM operational_mutations \
4667 WHERE collection_name = ?1 AND created_at < ?2 \
4668 ORDER BY mutation_order",
4669 )?;
4670 let mutation_ids = stmt
4671 .query_map(
4672 rusqlite::params![collection_name, before_timestamp],
4673 |row| row.get::<_, String>(0),
4674 )?
4675 .collect::<Result<Vec<_>, _>>()?;
4676 Ok((mutation_ids, Some(before_timestamp)))
4677 }
4678 OperationalRetentionPolicy::KeepLast { max_rows } => {
4679 let mut stmt = conn.prepare(
4680 "SELECT id FROM operational_mutations \
4681 WHERE collection_name = ?1 \
4682 ORDER BY mutation_order DESC",
4683 )?;
4684 let ordered_ids = stmt
4685 .query_map([collection_name], |row| row.get::<_, String>(0))?
4686 .collect::<Result<Vec<_>, _>>()?;
4687 Ok((ordered_ids.into_iter().skip(max_rows).collect(), None))
4688 }
4689 }
4690}
4691
4692fn parse_operational_retention_policy(
4693 retention_json: &str,
4694) -> Result<OperationalRetentionPolicy, EngineError> {
4695 let policy: OperationalRetentionPolicy = serde_json::from_str(retention_json)
4696 .map_err(|error| EngineError::InvalidWrite(format!("invalid retention_json: {error}")))?;
4697 match policy {
4698 OperationalRetentionPolicy::KeepAll => Ok(policy),
4699 OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4700 if max_age_seconds <= 0 {
4701 return Err(EngineError::InvalidWrite(
4702 "retention_json max_age_seconds must be greater than zero".to_owned(),
4703 ));
4704 }
4705 Ok(policy)
4706 }
4707 OperationalRetentionPolicy::KeepLast { max_rows } => {
4708 if max_rows == 0 {
4709 return Err(EngineError::InvalidWrite(
4710 "retention_json max_rows must be greater than zero".to_owned(),
4711 ));
4712 }
4713 Ok(policy)
4714 }
4715 }
4716}
4717
4718fn load_operational_retention_records(
4719 conn: &rusqlite::Connection,
4720 collection_names: Option<&[String]>,
4721 max_collections: Option<usize>,
4722) -> Result<Vec<OperationalCollectionRecord>, EngineError> {
4723 let limit = max_collections.unwrap_or(usize::MAX);
4724 if limit == 0 {
4725 return Err(EngineError::InvalidWrite(
4726 "max_collections must be greater than zero".to_owned(),
4727 ));
4728 }
4729
4730 let mut records = Vec::new();
4731 if let Some(collection_names) = collection_names {
4732 for name in collection_names.iter().take(limit) {
4733 let record = load_operational_collection_record(conn, name)?.ok_or_else(|| {
4734 EngineError::InvalidWrite(format!(
4735 "operational collection '{name}' is not registered"
4736 ))
4737 })?;
4738 records.push(record);
4739 }
4740 return Ok(records);
4741 }
4742
4743 let mut stmt = conn.prepare(
4744 "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4745 FROM operational_collections ORDER BY name",
4746 )?;
4747 let rows = stmt
4748 .query_map([], map_operational_collection_row)?
4749 .take(limit)
4750 .collect::<Result<Vec<_>, _>>()?;
4751 Ok(rows)
4752}
4753
4754fn last_operational_retention_run_at(
4755 conn: &rusqlite::Connection,
4756 collection_name: &str,
4757) -> Result<Option<i64>, EngineError> {
4758 conn.query_row(
4759 "SELECT MAX(executed_at) FROM operational_retention_runs WHERE collection_name = ?1",
4760 [collection_name],
4761 |row| row.get(0),
4762 )
4763 .optional()
4764 .map_err(EngineError::Sqlite)
4765 .map(Option::flatten)
4766}
4767
4768fn count_operational_mutations_for_collection(
4769 conn: &rusqlite::Connection,
4770 collection_name: &str,
4771) -> Result<usize, EngineError> {
4772 let count: i64 = conn.query_row(
4773 "SELECT count(*) FROM operational_mutations WHERE collection_name = ?1",
4774 [collection_name],
4775 |row| row.get(0),
4776 )?;
4777 usize::try_from(count).map_err(|_| {
4778 EngineError::Bridge(format!("count overflow for collection {collection_name}"))
4779 })
4780}
4781
4782fn retention_action_kind_and_limit(
4783 policy: &OperationalRetentionPolicy,
4784) -> (OperationalRetentionActionKind, Option<usize>) {
4785 match policy {
4786 OperationalRetentionPolicy::KeepAll => (OperationalRetentionActionKind::Noop, None),
4787 OperationalRetentionPolicy::PurgeBeforeSeconds { .. } => {
4788 (OperationalRetentionActionKind::PurgeBeforeSeconds, None)
4789 }
4790 OperationalRetentionPolicy::KeepLast { max_rows } => {
4791 (OperationalRetentionActionKind::KeepLast, Some(*max_rows))
4792 }
4793 }
4794}
4795
4796fn plan_operational_retention_item(
4797 conn: &rusqlite::Connection,
4798 record: &OperationalCollectionRecord,
4799 now_timestamp: i64,
4800) -> Result<OperationalRetentionPlanItem, EngineError> {
4801 let last_run_at = last_operational_retention_run_at(conn, &record.name)?;
4802 if record.kind != OperationalCollectionKind::AppendOnlyLog {
4803 return Ok(OperationalRetentionPlanItem {
4804 collection_name: record.name.clone(),
4805 action_kind: OperationalRetentionActionKind::Noop,
4806 candidate_deletions: 0,
4807 before_timestamp: None,
4808 max_rows: None,
4809 last_run_at,
4810 });
4811 }
4812 let policy = parse_operational_retention_policy(&record.retention_json)?;
4813 let (action_kind, max_rows) = retention_action_kind_and_limit(&policy);
4814 let (candidate_ids, before_timestamp) = operational_compaction_candidates_at(
4815 conn,
4816 &record.retention_json,
4817 &record.name,
4818 now_timestamp,
4819 )?;
4820 Ok(OperationalRetentionPlanItem {
4821 collection_name: record.name.clone(),
4822 action_kind,
4823 candidate_deletions: candidate_ids.len(),
4824 before_timestamp,
4825 max_rows,
4826 last_run_at,
4827 })
4828}
4829
4830fn run_operational_retention_item(
4831 tx: &rusqlite::Transaction<'_>,
4832 record: &OperationalCollectionRecord,
4833 now_timestamp: i64,
4834 dry_run: bool,
4835) -> Result<OperationalRetentionRunItem, EngineError> {
4836 let plan = plan_operational_retention_item(tx, record, now_timestamp)?;
4837 let mut deleted_mutations = 0usize;
4838 if record.kind == OperationalCollectionKind::AppendOnlyLog
4839 && plan.action_kind != OperationalRetentionActionKind::Noop
4840 && plan.candidate_deletions > 0
4841 && !dry_run
4842 {
4843 let (candidate_ids, _) = operational_compaction_candidates_at(
4844 tx,
4845 &record.retention_json,
4846 &record.name,
4847 now_timestamp,
4848 )?;
4849 let mut delete_stmt =
4850 tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
4851 for mutation_id in &candidate_ids {
4852 delete_stmt.execute([mutation_id.as_str()])?;
4853 deleted_mutations += 1;
4854 }
4855 drop(delete_stmt);
4856
4857 persist_simple_provenance_event(
4858 tx,
4859 "operational_retention_run",
4860 &record.name,
4861 Some(serde_json::json!({
4862 "action_kind": plan.action_kind,
4863 "deleted_mutations": deleted_mutations,
4864 "before_timestamp": plan.before_timestamp,
4865 "max_rows": plan.max_rows,
4866 "executed_at": now_timestamp,
4867 })),
4868 )?;
4869 }
4870
4871 let live_rows_remaining = count_operational_mutations_for_collection(tx, &record.name)?;
4872 let effective_deleted_mutations = if dry_run {
4873 plan.candidate_deletions
4874 } else {
4875 deleted_mutations
4876 };
4877 let rows_remaining = if dry_run {
4878 live_rows_remaining.saturating_sub(effective_deleted_mutations)
4879 } else {
4880 live_rows_remaining
4881 };
4882 if !dry_run && plan.action_kind != OperationalRetentionActionKind::Noop {
4883 tx.execute(
4884 "INSERT INTO operational_retention_runs \
4885 (id, collection_name, executed_at, action_kind, dry_run, deleted_mutations, rows_remaining, metadata_json) \
4886 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
4887 rusqlite::params![
4888 new_id(),
4889 record.name,
4890 now_timestamp,
4891 serde_json::to_string(&plan.action_kind)
4892 .unwrap_or_else(|_| "\"noop\"".to_owned())
4893 .trim_matches('"')
4894 .to_owned(),
4895 i32::from(dry_run),
4896 deleted_mutations,
4897 rows_remaining,
4898 serde_json::json!({
4899 "before_timestamp": plan.before_timestamp,
4900 "max_rows": plan.max_rows,
4901 })
4902 .to_string(),
4903 ],
4904 )?;
4905 }
4906
4907 Ok(OperationalRetentionRunItem {
4908 collection_name: plan.collection_name,
4909 action_kind: plan.action_kind,
4910 deleted_mutations: effective_deleted_mutations,
4911 before_timestamp: plan.before_timestamp,
4912 max_rows: plan.max_rows,
4913 rows_remaining,
4914 })
4915}
4916
4917fn current_unix_timestamp() -> Result<i64, EngineError> {
4918 let now = SystemTime::now()
4919 .duration_since(SystemTime::UNIX_EPOCH)
4920 .map_err(|error| EngineError::Bridge(format!("system clock error: {error}")))?;
4921 i64::try_from(now.as_secs())
4922 .map_err(|_| EngineError::Bridge("unix timestamp overflow".to_owned()))
4923}
4924
4925fn map_operational_collection_row(
4926 row: &rusqlite::Row<'_>,
4927) -> Result<OperationalCollectionRecord, rusqlite::Error> {
4928 let kind_text: String = row.get(1)?;
4929 let kind = OperationalCollectionKind::try_from(kind_text.as_str()).map_err(|message| {
4930 rusqlite::Error::FromSqlConversionFailure(
4931 1,
4932 rusqlite::types::Type::Text,
4933 Box::new(io::Error::new(io::ErrorKind::InvalidData, message)),
4934 )
4935 })?;
4936 Ok(OperationalCollectionRecord {
4937 name: row.get(0)?,
4938 kind,
4939 schema_json: row.get(2)?,
4940 retention_json: row.get(3)?,
4941 filter_fields_json: row.get(4)?,
4942 validation_json: row.get(5)?,
4943 secondary_indexes_json: row.get(6)?,
4944 format_version: row.get(7)?,
4945 created_at: row.get(8)?,
4946 disabled_at: row.get(9)?,
4947 })
4948}
4949
4950fn map_operational_mutation_row(
4951 row: &rusqlite::Row<'_>,
4952) -> Result<OperationalMutationRow, rusqlite::Error> {
4953 Ok(OperationalMutationRow {
4954 id: row.get(0)?,
4955 collection_name: row.get(1)?,
4956 record_key: row.get(2)?,
4957 op_kind: row.get(3)?,
4958 payload_json: row.get(4)?,
4959 source_ref: row.get(5)?,
4960 created_at: row.get(6)?,
4961 })
4962}
4963
4964fn map_operational_current_row(
4965 row: &rusqlite::Row<'_>,
4966) -> Result<OperationalCurrentRow, rusqlite::Error> {
4967 Ok(OperationalCurrentRow {
4968 collection_name: row.get(0)?,
4969 record_key: row.get(1)?,
4970 payload_json: row.get(2)?,
4971 updated_at: row.get(3)?,
4972 last_mutation_id: row.get(4)?,
4973 })
4974}
4975
4976#[cfg(test)]
4977#[allow(clippy::expect_used)]
4978mod tests {
4979 use std::fs;
4980 use std::sync::Arc;
4981
4982 use fathomdb_schema::SchemaManager;
4983 use tempfile::NamedTempFile;
4984
4985 use super::{
4986 AdminService, FtsPropertyPathMode, FtsPropertyPathSpec, SafeExportOptions,
4987 VectorRegenerationConfig,
4988 };
4989 use crate::projection::ProjectionTarget;
4990 use crate::sqlite;
4991 use crate::{
4992 EngineError, ExecutionCoordinator, OperationalCollectionKind, OperationalRegisterRequest,
4993 TelemetryCounters,
4994 };
4995
4996 use fathomdb_query::QueryBuilder;
4997
4998 #[cfg(feature = "sqlite-vec")]
4999 use super::{VectorGeneratorPolicy, load_vector_regeneration_config};
5000
5001 #[allow(dead_code)]
5002 #[cfg(unix)]
5003 fn set_file_mode(path: &std::path::Path, mode: u32) {
5004 use std::os::unix::fs::PermissionsExt;
5005
5006 let mut permissions = fs::metadata(path).expect("script metadata").permissions();
5007 permissions.set_mode(mode);
5008 fs::set_permissions(path, permissions).expect("chmod");
5009 }
5010
5011 #[allow(dead_code)]
5012 #[cfg(not(unix))]
5013 fn set_file_mode(_path: &std::path::Path, _mode: u32) {}
5014
5015 fn setup() -> (NamedTempFile, AdminService) {
5016 let db = NamedTempFile::new().expect("temp file");
5017 let schema = Arc::new(SchemaManager::new());
5018 {
5019 let conn = sqlite::open_connection(db.path()).expect("connection");
5020 schema.bootstrap(&conn).expect("bootstrap");
5021 }
5022 let service = AdminService::new(db.path(), Arc::clone(&schema));
5023 (db, service)
5024 }
5025
5026 #[test]
5027 fn check_integrity_includes_active_uniqueness_count() {
5028 let (_db, service) = setup();
5029 let report = service.check_integrity().expect("integrity check");
5030 assert_eq!(report.duplicate_active_logical_ids, 0);
5031 assert_eq!(report.operational_missing_collections, 0);
5032 assert_eq!(report.operational_missing_last_mutations, 0);
5033 }
5034
5035 #[test]
5036 fn trace_source_returns_node_logical_ids() {
5037 let (db, service) = setup();
5038 {
5039 let conn = sqlite::open_connection(db.path()).expect("conn");
5040 conn.execute(
5041 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5042 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 'source-1')",
5043 [],
5044 )
5045 .expect("insert node");
5046 }
5047 let report = service.trace_source("source-1").expect("trace");
5048 assert_eq!(report.node_rows, 1);
5049 assert_eq!(report.node_logical_ids, vec!["lg1"]);
5050 }
5051
5052 #[test]
5053 fn trace_source_includes_operational_mutations() {
5054 let (db, service) = setup();
5055 {
5056 let conn = sqlite::open_connection(db.path()).expect("conn");
5057 conn.execute(
5058 "INSERT INTO operational_collections \
5059 (name, kind, schema_json, retention_json, format_version, created_at) \
5060 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5061 [],
5062 )
5063 .expect("insert collection");
5064 conn.execute(
5065 "INSERT INTO operational_mutations \
5066 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5067 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"ok\"}', 'source-1', 100, 1)",
5068 [],
5069 )
5070 .expect("insert mutation");
5071 }
5072
5073 let report = service.trace_source("source-1").expect("trace");
5074 assert_eq!(report.operational_mutation_rows, 1);
5075 assert_eq!(report.operational_mutation_ids, vec!["m1"]);
5076 }
5077
5078 #[test]
5079 fn excise_source_restores_prior_active_node() {
5080 let (db, service) = setup();
5081 {
5082 let conn = sqlite::open_connection(db.path()).expect("conn");
5083 conn.execute(
5084 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5085 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
5086 [],
5087 )
5088 .expect("insert v1 superseded");
5089 conn.execute(
5090 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5091 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
5092 [],
5093 )
5094 .expect("insert v2 active");
5095 }
5096 service.excise_source("source-2").expect("excise");
5097 {
5098 let conn = sqlite::open_connection(db.path()).expect("conn");
5099 let active_row_id: String = conn
5100 .query_row(
5101 "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
5102 [],
5103 |row| row.get(0),
5104 )
5105 .expect("active row exists after excise");
5106 assert_eq!(active_row_id, "r1");
5107 }
5108 }
5109
5110 #[test]
5111 fn excise_source_deletes_operational_mutations_and_repairs_latest_state_current() {
5112 let (db, service) = setup();
5113 {
5114 let conn = sqlite::open_connection(db.path()).expect("conn");
5115 conn.execute(
5116 "INSERT INTO operational_collections \
5117 (name, kind, schema_json, retention_json, format_version, created_at) \
5118 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5119 [],
5120 )
5121 .expect("insert collection");
5122 conn.execute(
5123 "INSERT INTO operational_mutations \
5124 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5125 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'source-1', 100, 1)",
5126 [],
5127 )
5128 .expect("insert prior mutation");
5129 conn.execute(
5130 "INSERT INTO operational_mutations \
5131 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5132 VALUES ('m2', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'source-2', 200, 2)",
5133 [],
5134 )
5135 .expect("insert excised mutation");
5136 conn.execute(
5137 "INSERT INTO operational_current \
5138 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
5139 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 200, 'm2')",
5140 [],
5141 )
5142 .expect("insert current row");
5143 }
5144
5145 let traced = service
5146 .trace_source("source-2")
5147 .expect("trace before excise");
5148 assert_eq!(traced.operational_mutation_rows, 1);
5149 assert_eq!(traced.operational_mutation_ids, vec!["m2"]);
5150
5151 let excised = service.excise_source("source-2").expect("excise");
5152 assert_eq!(excised.operational_mutation_rows, 0);
5153 assert!(excised.operational_mutation_ids.is_empty());
5154
5155 {
5156 let conn = sqlite::open_connection(db.path()).expect("conn");
5157 let remaining: i64 = conn
5158 .query_row(
5159 "SELECT count(*) FROM operational_mutations WHERE source_ref = 'source-2'",
5160 [],
5161 |row| row.get(0),
5162 )
5163 .expect("remaining count");
5164 assert_eq!(remaining, 0);
5165
5166 let current: (String, String) = conn
5167 .query_row(
5168 "SELECT payload_json, last_mutation_id FROM operational_current \
5169 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5170 [],
5171 |row| Ok((row.get(0)?, row.get(1)?)),
5172 )
5173 .expect("rebuilt current row");
5174 assert_eq!(current.0, "{\"status\":\"old\"}");
5175 assert_eq!(current.1, "m1");
5176 }
5177 }
5178
5179 #[test]
5180 fn restore_logical_id_reestablishes_last_pre_retire_content_and_attached_edges() {
5181 let (db, service) = setup();
5182 {
5183 let conn = sqlite::open_connection(db.path()).expect("conn");
5184 conn.execute(
5185 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5186 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5187 [],
5188 )
5189 .expect("insert node");
5190 conn.execute(
5191 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5192 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5193 [],
5194 )
5195 .expect("insert target node");
5196 conn.execute(
5197 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5198 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5199 [],
5200 )
5201 .expect("insert chunk");
5202 conn.execute(
5203 "INSERT INTO edges \
5204 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5205 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5206 [],
5207 )
5208 .expect("insert edge");
5209 conn.execute(
5210 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5211 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5212 [],
5213 )
5214 .expect("insert node retire event");
5215 conn.execute(
5216 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5217 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
5218 [],
5219 )
5220 .expect("insert edge retire event");
5221 conn.execute(
5222 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5223 [],
5224 )
5225 .expect("retire node");
5226 conn.execute(
5227 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
5228 [],
5229 )
5230 .expect("retire edge");
5231 conn.execute("DELETE FROM fts_nodes", [])
5232 .expect("clear fts");
5233 }
5234
5235 let report = service.restore_logical_id("doc-1").expect("restore");
5236 assert_eq!(report.logical_id, "doc-1");
5237 assert!(!report.was_noop);
5238 assert_eq!(report.restored_node_rows, 1);
5239 assert_eq!(report.restored_edge_rows, 1);
5240 assert_eq!(report.restored_chunk_rows, 1);
5241 assert_eq!(report.restored_fts_rows, 1);
5242
5243 let conn = sqlite::open_connection(db.path()).expect("conn");
5244 let active_node_count: i64 = conn
5245 .query_row(
5246 "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5247 [],
5248 |row| row.get(0),
5249 )
5250 .expect("active node count");
5251 assert_eq!(active_node_count, 1);
5252 let active_edge_count: i64 = conn
5253 .query_row(
5254 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5255 [],
5256 |row| row.get(0),
5257 )
5258 .expect("active edge count");
5259 assert_eq!(active_edge_count, 1);
5260 let fts_count: i64 = conn
5261 .query_row(
5262 "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'chunk-1'",
5263 [],
5264 |row| row.get(0),
5265 )
5266 .expect("fts count");
5267 assert_eq!(fts_count, 1);
5268 }
5269
5270 #[test]
5271 fn restore_logical_id_restores_edges_retired_after_the_node_retire_event() {
5272 let (db, service) = setup();
5273 {
5274 let conn = sqlite::open_connection(db.path()).expect("conn");
5275 conn.execute(
5276 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5277 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5278 [],
5279 )
5280 .expect("insert node");
5281 conn.execute(
5282 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5283 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
5284 [],
5285 )
5286 .expect("insert target node");
5287 conn.execute(
5288 "INSERT INTO edges \
5289 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
5290 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
5291 [],
5292 )
5293 .expect("insert edge");
5294 conn.execute(
5295 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5296 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5297 [],
5298 )
5299 .expect("insert node retire event");
5300 conn.execute(
5301 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5302 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 201, '')",
5303 [],
5304 )
5305 .expect("insert edge retire event");
5306 conn.execute(
5307 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5308 [],
5309 )
5310 .expect("retire node");
5311 conn.execute(
5312 "UPDATE edges SET superseded_at = 201 WHERE logical_id = 'edge-1'",
5313 [],
5314 )
5315 .expect("retire edge");
5316 }
5317
5318 let report = service.restore_logical_id("doc-1").expect("restore");
5319 assert_eq!(report.restored_edge_rows, 1);
5320
5321 let conn = sqlite::open_connection(db.path()).expect("conn");
5322 let active_edge_count: i64 = conn
5323 .query_row(
5324 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
5325 [],
5326 |row| row.get(0),
5327 )
5328 .expect("active edge count");
5329 assert_eq!(active_edge_count, 1);
5330 }
5331
5332 #[test]
5333 fn restore_logical_id_prefers_latest_retired_revision_when_timestamps_tie() {
5334 let (db, service) = setup();
5335 {
5336 let conn = sqlite::open_connection(db.path()).expect("conn");
5337 conn.execute(
5338 "INSERT INTO nodes \
5339 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5340 VALUES ('node-row-older', 'doc-1', 'Document', '{\"title\":\"older\"}', 100, 200, 'forget-1')",
5341 [],
5342 )
5343 .expect("insert older retired node");
5344 conn.execute(
5345 "INSERT INTO nodes \
5346 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5347 VALUES ('node-row-newer', 'doc-1', 'Document', '{\"title\":\"newer\"}', 100, 200, 'forget-1')",
5348 [],
5349 )
5350 .expect("insert newer retired node");
5351 conn.execute(
5352 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5353 VALUES ('evt-retire-older', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5354 [],
5355 )
5356 .expect("insert older retire event");
5357 conn.execute(
5358 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5359 VALUES ('evt-retire-newer', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5360 [],
5361 )
5362 .expect("insert newer retire event");
5363 }
5364
5365 let report = service.restore_logical_id("doc-1").expect("restore");
5366
5367 assert!(!report.was_noop);
5368 let conn = sqlite::open_connection(db.path()).expect("conn");
5369 let active_row: (String, String) = conn
5370 .query_row(
5371 "SELECT row_id, properties FROM nodes \
5372 WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
5373 [],
5374 |row| Ok((row.get(0)?, row.get(1)?)),
5375 )
5376 .expect("restored active row");
5377 assert_eq!(active_row.0, "node-row-newer");
5378 assert_eq!(active_row.1, "{\"title\":\"newer\"}");
5379 }
5380
5381 #[test]
5382 fn purge_logical_id_removes_retired_content_and_records_tombstone() {
5383 let (db, service) = setup();
5384 {
5385 let conn = sqlite::open_connection(db.path()).expect("conn");
5386 conn.execute(
5387 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5388 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5389 [],
5390 )
5391 .expect("insert retired node");
5392 conn.execute(
5393 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5394 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5395 [],
5396 )
5397 .expect("insert chunk");
5398 conn.execute(
5399 "INSERT INTO edges \
5400 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, superseded_at, source_ref) \
5401 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 200, 'seed')",
5402 [],
5403 )
5404 .expect("insert retired edge");
5405 conn.execute(
5406 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
5407 VALUES ('chunk-1', 'doc-1', 'Document', 'budget narrative')",
5408 [],
5409 )
5410 .expect("insert fts");
5411 }
5412
5413 let report = service.purge_logical_id("doc-1").expect("purge");
5414 assert_eq!(report.logical_id, "doc-1");
5415 assert!(!report.was_noop);
5416 assert_eq!(report.deleted_node_rows, 1);
5417 assert_eq!(report.deleted_edge_rows, 1);
5418 assert_eq!(report.deleted_chunk_rows, 1);
5419 assert_eq!(report.deleted_fts_rows, 1);
5420
5421 let conn = sqlite::open_connection(db.path()).expect("conn");
5422 let remaining_nodes: i64 = conn
5423 .query_row(
5424 "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1'",
5425 [],
5426 |row| row.get(0),
5427 )
5428 .expect("remaining nodes");
5429 assert_eq!(remaining_nodes, 0);
5430 let remaining_edges: i64 = conn
5431 .query_row(
5432 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1'",
5433 [],
5434 |row| row.get(0),
5435 )
5436 .expect("remaining edges");
5437 assert_eq!(remaining_edges, 0);
5438 let remaining_chunks: i64 = conn
5439 .query_row(
5440 "SELECT count(*) FROM chunks WHERE id = 'chunk-1'",
5441 [],
5442 |row| row.get(0),
5443 )
5444 .expect("remaining chunks");
5445 assert_eq!(remaining_chunks, 0);
5446 let purge_events: i64 = conn
5447 .query_row(
5448 "SELECT count(*) FROM provenance_events WHERE event_type = 'purge_logical_id' AND subject = 'doc-1'",
5449 [],
5450 |row| row.get(0),
5451 )
5452 .expect("purge events");
5453 assert_eq!(purge_events, 1);
5454 }
5455
5456 #[test]
5457 fn check_semantics_accepts_preserved_retired_chunks() {
5458 let (db, service) = setup();
5459 {
5460 let conn = sqlite::open_connection(db.path()).expect("conn");
5461 conn.execute(
5462 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5463 VALUES ('node-row-1', 'doc-1', 'Document', '{}', 100, 200, 'seed')",
5464 [],
5465 )
5466 .expect("insert retired node");
5467 conn.execute(
5468 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5469 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5470 [],
5471 )
5472 .expect("insert chunk");
5473 }
5474
5475 let report = service.check_semantics().expect("semantics");
5476 assert_eq!(report.orphaned_chunks, 0);
5477 }
5478
5479 #[test]
5480 fn check_semantics_detects_missing_retired_node_history_for_preserved_chunks() {
5481 let (db, service) = setup();
5482 {
5483 let conn = sqlite::open_connection(db.path()).expect("conn");
5484 conn.execute(
5485 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5486 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5487 [],
5488 )
5489 .expect("insert orphaned chunk");
5490 }
5491
5492 let report = service.check_semantics().expect("semantics");
5493 assert_eq!(report.orphaned_chunks, 1);
5494 }
5495
5496 #[cfg(feature = "sqlite-vec")]
5497 #[test]
5498 fn check_semantics_detects_missing_retired_node_history_for_preserved_vec_rows() {
5499 let (db, service) = setup();
5500 {
5501 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5502 service
5503 .schema_manager
5504 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5505 .expect("ensure vec profile");
5506 conn.execute(
5507 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5508 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
5509 [],
5510 )
5511 .expect("insert orphaned chunk");
5512 conn.execute(
5513 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5514 [],
5515 )
5516 .expect("insert vec row");
5517 }
5518
5519 let report = service.check_semantics().expect("semantics");
5520 assert_eq!(report.orphaned_chunks, 1);
5521 assert_eq!(report.vec_rows_for_superseded_nodes, 1);
5522 }
5523
5524 #[cfg(feature = "sqlite-vec")]
5525 #[test]
5526 fn restore_logical_id_reestablishes_vector_search_without_reingest() {
5527 let (db, service) = setup();
5528 {
5529 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5530 service
5531 .schema_manager
5532 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5533 .expect("ensure vec profile");
5534 conn.execute(
5535 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5536 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5537 [],
5538 )
5539 .expect("insert retired node");
5540 conn.execute(
5541 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5542 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5543 [],
5544 )
5545 .expect("insert chunk");
5546 conn.execute(
5547 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5548 [],
5549 )
5550 .expect("insert vec row");
5551 conn.execute(
5552 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5553 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5554 [],
5555 )
5556 .expect("insert retire event");
5557 }
5558
5559 let report = service.restore_logical_id("doc-1").expect("restore");
5560 assert_eq!(report.restored_vec_rows, 1);
5561
5562 let coordinator = ExecutionCoordinator::open(
5563 db.path(),
5564 Arc::new(SchemaManager::new()),
5565 Some(4),
5566 1,
5567 Arc::new(TelemetryCounters::default()),
5568 None,
5569 )
5570 .expect("coordinator");
5571 let compiled = QueryBuilder::nodes("Document")
5572 .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5573 .compile()
5574 .expect("compile");
5575 let rows = coordinator
5576 .execute_compiled_read(&compiled)
5577 .expect("vector read");
5578 assert!(
5579 rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5580 "restore should make the preserved vec row visible again without re-ingest"
5581 );
5582 }
5583
5584 #[cfg(feature = "sqlite-vec")]
5585 #[test]
5586 fn purge_logical_id_deletes_vec_rows_for_retired_content() {
5587 let (db, service) = setup();
5588 {
5589 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5590 service
5591 .schema_manager
5592 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5593 .expect("ensure vec profile");
5594 conn.execute(
5595 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
5596 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
5597 [],
5598 )
5599 .expect("insert retired node");
5600 conn.execute(
5601 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5602 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5603 [],
5604 )
5605 .expect("insert chunk");
5606 conn.execute(
5607 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
5608 [],
5609 )
5610 .expect("insert vec row");
5611 }
5612
5613 let report = service.purge_logical_id("doc-1").expect("purge");
5614 assert_eq!(report.deleted_vec_rows, 1);
5615
5616 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5617 let vec_count: i64 = conn
5618 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
5619 row.get(0)
5620 })
5621 .expect("vec count");
5622 assert_eq!(vec_count, 0);
5623 }
5624
5625 #[cfg(feature = "sqlite-vec")]
5626 #[test]
5627 fn restore_logical_id_restores_visibility_of_regenerated_vectors() {
5628 let (db, service) = setup();
5629 let temp_dir = tempfile::tempdir().expect("temp dir");
5630 let script_path = temp_dir.path().join("vector-generator-restore.sh");
5631 fs::write(
5632 &script_path,
5633 r#"#!/usr/bin/env bash
5634set -euo pipefail
5635python3 -c 'import json, sys
5636payload = json.load(sys.stdin)
5637json.dump({"embeddings": [{"chunk_id": payload["chunks"][0]["chunk_id"], "embedding": [0.0, 0.0, 0.0, 0.0]}]}, sys.stdout)'
5638"#,
5639 )
5640 .expect("write script");
5641 set_file_mode(&script_path, 0o755);
5642
5643 {
5644 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5645 service
5646 .schema_manager
5647 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
5648 .expect("ensure vec profile");
5649 conn.execute(
5650 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5651 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5652 [],
5653 )
5654 .expect("insert node");
5655 conn.execute(
5656 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5657 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5658 [],
5659 )
5660 .expect("insert chunk");
5661 }
5662
5663 service
5664 .regenerate_vector_embeddings(&VectorRegenerationConfig {
5665 profile: "default".to_owned(),
5666 table_name: "vec_nodes_active".to_owned(),
5667 model_identity: "model".to_owned(),
5668 model_version: "1.0.0".to_owned(),
5669 dimension: 4,
5670 normalization_policy: "l2".to_owned(),
5671 chunking_policy: "per_chunk".to_owned(),
5672 preprocessing_policy: "trim".to_owned(),
5673 generator_command: vec![script_path.to_string_lossy().to_string()],
5674 })
5675 .expect("regenerate");
5676
5677 {
5678 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5679 conn.execute(
5680 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5681 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5682 [],
5683 )
5684 .expect("insert retire event");
5685 conn.execute(
5686 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5687 [],
5688 )
5689 .expect("retire node");
5690 }
5691
5692 let report = service.restore_logical_id("doc-1").expect("restore");
5693 assert_eq!(report.restored_vec_rows, 1);
5694
5695 let coordinator = ExecutionCoordinator::open(
5696 db.path(),
5697 Arc::new(SchemaManager::new()),
5698 Some(4),
5699 1,
5700 Arc::new(TelemetryCounters::default()),
5701 None,
5702 )
5703 .expect("coordinator");
5704 let compiled = QueryBuilder::nodes("Document")
5705 .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5706 .compile()
5707 .expect("compile");
5708 let rows = coordinator
5709 .execute_compiled_read(&compiled)
5710 .expect("vector read");
5711 assert!(
5712 rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5713 "restored logical_id should become visible through regenerated vectors"
5714 );
5715 }
5716
5717 #[test]
5718 fn check_semantics_clean_db_returns_zeros() {
5719 let (_db, service) = setup();
5720 let report = service.check_semantics().expect("semantics check");
5721 assert_eq!(report.orphaned_chunks, 0);
5722 assert_eq!(report.null_source_ref_nodes, 0);
5723 assert_eq!(report.broken_step_fk, 0);
5724 assert_eq!(report.broken_action_fk, 0);
5725 assert_eq!(report.stale_fts_rows, 0);
5726 assert_eq!(report.fts_rows_for_superseded_nodes, 0);
5727 assert_eq!(report.dangling_edges, 0);
5728 assert_eq!(report.orphaned_supersession_chains, 0);
5729 assert_eq!(report.stale_vec_rows, 0);
5730 assert_eq!(report.vec_rows_for_superseded_nodes, 0);
5731 assert_eq!(report.missing_operational_current_rows, 0);
5732 assert_eq!(report.stale_operational_current_rows, 0);
5733 assert_eq!(report.disabled_collection_mutations, 0);
5734 assert_eq!(report.mismatched_kind_property_fts_rows, 0);
5735 assert_eq!(report.duplicate_property_fts_rows, 0);
5736 assert_eq!(report.drifted_property_fts_rows, 0);
5737 assert!(report.warnings.is_empty());
5738 }
5739
5740 #[test]
5741 fn register_operational_collection_persists_and_emits_provenance() {
5742 let (db, service) = setup();
5743 let record = service
5744 .register_operational_collection(&OperationalRegisterRequest {
5745 name: "connector_health".to_owned(),
5746 kind: OperationalCollectionKind::LatestState,
5747 schema_json: "{}".to_owned(),
5748 retention_json: "{}".to_owned(),
5749 filter_fields_json: "[]".to_owned(),
5750 validation_json: String::new(),
5751 secondary_indexes_json: "[]".to_owned(),
5752 format_version: 1,
5753 })
5754 .expect("register collection");
5755
5756 assert_eq!(record.name, "connector_health");
5757 assert_eq!(record.kind, OperationalCollectionKind::LatestState);
5758 assert_eq!(record.schema_json, "{}");
5759 assert_eq!(record.retention_json, "{}");
5760 assert_eq!(record.filter_fields_json, "[]");
5761 assert!(record.created_at > 0);
5762 assert_eq!(record.disabled_at, None);
5763
5764 let described = service
5765 .describe_operational_collection("connector_health")
5766 .expect("describe collection")
5767 .expect("collection exists");
5768 assert_eq!(described, record);
5769
5770 let conn = sqlite::open_connection(db.path()).expect("conn");
5771 let provenance_count: i64 = conn
5772 .query_row(
5773 "SELECT count(*) FROM provenance_events \
5774 WHERE event_type = 'operational_collection_registered' AND subject = 'connector_health'",
5775 [],
5776 |row| row.get(0),
5777 )
5778 .expect("provenance count");
5779 assert_eq!(provenance_count, 1);
5780 }
5781
5782 #[test]
5783 fn register_and_update_operational_collection_validation_round_trip() {
5784 let (db, service) = setup();
5785 let record = service
5786 .register_operational_collection(&OperationalRegisterRequest {
5787 name: "connector_health".to_owned(),
5788 kind: OperationalCollectionKind::LatestState,
5789 schema_json: "{}".to_owned(),
5790 retention_json: "{}".to_owned(),
5791 filter_fields_json: "[]".to_owned(),
5792 validation_json: String::new(),
5793 secondary_indexes_json: "[]".to_owned(),
5794 format_version: 1,
5795 })
5796 .expect("register collection");
5797 assert_eq!(record.validation_json, "");
5798
5799 let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
5800 let updated = service
5801 .update_operational_collection_validation("connector_health", validation_json)
5802 .expect("update validation");
5803 assert_eq!(updated.validation_json, validation_json);
5804
5805 let described = service
5806 .describe_operational_collection("connector_health")
5807 .expect("describe collection")
5808 .expect("collection exists");
5809 assert_eq!(described.validation_json, validation_json);
5810
5811 let conn = sqlite::open_connection(db.path()).expect("conn");
5812 let provenance_count: i64 = conn
5813 .query_row(
5814 "SELECT count(*) FROM provenance_events \
5815 WHERE event_type = 'operational_collection_validation_updated' \
5816 AND subject = 'connector_health'",
5817 [],
5818 |row| row.get(0),
5819 )
5820 .expect("provenance count");
5821 assert_eq!(provenance_count, 1);
5822 }
5823
5824 #[test]
5825 fn register_update_and_rebuild_operational_secondary_indexes_round_trip() {
5826 let (db, service) = setup();
5827 let record = service
5828 .register_operational_collection(&OperationalRegisterRequest {
5829 name: "audit_log".to_owned(),
5830 kind: OperationalCollectionKind::AppendOnlyLog,
5831 schema_json: "{}".to_owned(),
5832 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5833 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
5834 validation_json: String::new(),
5835 secondary_indexes_json: "[]".to_owned(),
5836 format_version: 1,
5837 })
5838 .expect("register collection");
5839 assert_eq!(record.secondary_indexes_json, "[]");
5840
5841 {
5842 let writer = crate::WriterActor::start(
5843 db.path(),
5844 Arc::new(SchemaManager::new()),
5845 crate::ProvenanceMode::Warn,
5846 Arc::new(crate::TelemetryCounters::default()),
5847 )
5848 .expect("writer");
5849 writer
5850 .submit(crate::WriteRequest {
5851 label: "secondary-index-seed".to_owned(),
5852 nodes: vec![],
5853 node_retires: vec![],
5854 edges: vec![],
5855 edge_retires: vec![],
5856 chunks: vec![],
5857 runs: vec![],
5858 steps: vec![],
5859 actions: vec![],
5860 optional_backfills: vec![],
5861 vec_inserts: vec![],
5862 operational_writes: vec![
5863 crate::OperationalWrite::Append {
5864 collection: "audit_log".to_owned(),
5865 record_key: "evt-1".to_owned(),
5866 payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
5867 source_ref: Some("src-1".to_owned()),
5868 },
5869 crate::OperationalWrite::Append {
5870 collection: "audit_log".to_owned(),
5871 record_key: "evt-2".to_owned(),
5872 payload_json: r#"{"actor":"bob","ts":200}"#.to_owned(),
5873 source_ref: Some("src-2".to_owned()),
5874 },
5875 ],
5876 })
5877 .expect("seed writes");
5878 }
5879
5880 let secondary_indexes_json = r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#;
5881 let updated = service
5882 .update_operational_collection_secondary_indexes("audit_log", secondary_indexes_json)
5883 .expect("update secondary indexes");
5884 assert_eq!(updated.secondary_indexes_json, secondary_indexes_json);
5885
5886 let conn = sqlite::open_connection(db.path()).expect("conn");
5887 let entry_count: i64 = conn
5888 .query_row(
5889 "SELECT count(*) FROM operational_secondary_index_entries \
5890 WHERE collection_name = 'audit_log' AND index_name = 'actor_ts'",
5891 [],
5892 |row| row.get(0),
5893 )
5894 .expect("secondary index count");
5895 assert_eq!(entry_count, 2);
5896 conn.execute(
5897 "DELETE FROM operational_secondary_index_entries WHERE collection_name = 'audit_log'",
5898 [],
5899 )
5900 .expect("clear index entries");
5901 drop(conn);
5902
5903 let rebuild = service
5904 .rebuild_operational_secondary_indexes("audit_log")
5905 .expect("rebuild secondary indexes");
5906 assert_eq!(rebuild.collection_name, "audit_log");
5907 assert_eq!(rebuild.mutation_entries_rebuilt, 2);
5908 assert_eq!(rebuild.current_entries_rebuilt, 0);
5909 }
5910
5911 #[test]
5912 fn register_operational_collection_rejects_invalid_validation_contract() {
5913 let (_db, service) = setup();
5914
5915 let error = service
5916 .register_operational_collection(&OperationalRegisterRequest {
5917 name: "connector_health".to_owned(),
5918 kind: OperationalCollectionKind::LatestState,
5919 schema_json: "{}".to_owned(),
5920 retention_json: "{}".to_owned(),
5921 filter_fields_json: "[]".to_owned(),
5922 validation_json: r#"{"format_version":1,"mode":"enforce","fields":[{"name":"status","type":"string","minimum":0}]}"#
5923 .to_owned(),
5924 secondary_indexes_json: "[]".to_owned(),
5925 format_version: 1,
5926 })
5927 .expect_err("invalid validation contract should reject");
5928
5929 assert!(matches!(error, EngineError::InvalidWrite(_)));
5930 assert!(error.to_string().contains("minimum/maximum"));
5931 }
5932
5933 #[test]
5934 fn validate_operational_collection_history_reports_invalid_rows_without_mutation() {
5935 let (db, service) = setup();
5936 service
5937 .register_operational_collection(&OperationalRegisterRequest {
5938 name: "audit_log".to_owned(),
5939 kind: OperationalCollectionKind::AppendOnlyLog,
5940 schema_json: "{}".to_owned(),
5941 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5942 filter_fields_json: "[]".to_owned(),
5943 validation_json: r#"{"format_version":1,"mode":"disabled","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#
5944 .to_owned(),
5945 secondary_indexes_json: "[]".to_owned(),
5946 format_version: 1,
5947 })
5948 .expect("register collection");
5949 {
5950 let writer = crate::WriterActor::start(
5951 db.path(),
5952 Arc::new(SchemaManager::new()),
5953 crate::ProvenanceMode::Warn,
5954 Arc::new(crate::TelemetryCounters::default()),
5955 )
5956 .expect("writer");
5957 writer
5958 .submit(crate::WriteRequest {
5959 label: "history-validation".to_owned(),
5960 nodes: vec![],
5961 node_retires: vec![],
5962 edges: vec![],
5963 edge_retires: vec![],
5964 chunks: vec![],
5965 runs: vec![],
5966 steps: vec![],
5967 actions: vec![],
5968 optional_backfills: vec![],
5969 vec_inserts: vec![],
5970 operational_writes: vec![
5971 crate::OperationalWrite::Append {
5972 collection: "audit_log".to_owned(),
5973 record_key: "evt-1".to_owned(),
5974 payload_json: r#"{"status":"ok"}"#.to_owned(),
5975 source_ref: Some("src-1".to_owned()),
5976 },
5977 crate::OperationalWrite::Append {
5978 collection: "audit_log".to_owned(),
5979 record_key: "evt-2".to_owned(),
5980 payload_json: r#"{"status":"bogus"}"#.to_owned(),
5981 source_ref: Some("src-2".to_owned()),
5982 },
5983 ],
5984 })
5985 .expect("write");
5986 }
5987
5988 let report = service
5989 .validate_operational_collection_history("audit_log")
5990 .expect("validate history");
5991 assert_eq!(report.collection_name, "audit_log");
5992 assert_eq!(report.checked_rows, 2);
5993 assert_eq!(report.invalid_row_count, 1);
5994 assert_eq!(report.issues.len(), 1);
5995 assert_eq!(report.issues[0].record_key, "evt-2");
5996 assert!(report.issues[0].message.contains("must be one of"));
5997
5998 let trace = service
5999 .trace_operational_collection("audit_log", None)
6000 .expect("trace");
6001 assert_eq!(trace.mutation_count, 2);
6002
6003 let conn = sqlite::open_connection(db.path()).expect("conn");
6004 let provenance_count: i64 = conn
6005 .query_row(
6006 "SELECT count(*) FROM provenance_events \
6007 WHERE event_type = 'operational_collection_history_validated' \
6008 AND subject = 'audit_log'",
6009 [],
6010 |row| row.get(0),
6011 )
6012 .expect("provenance count");
6013 assert_eq!(provenance_count, 0);
6014 }
6015
6016 #[test]
6017 fn trace_operational_collection_returns_mutations_and_current_rows() {
6018 let (db, service) = setup();
6019 service
6020 .register_operational_collection(&OperationalRegisterRequest {
6021 name: "connector_health".to_owned(),
6022 kind: OperationalCollectionKind::LatestState,
6023 schema_json: "{}".to_owned(),
6024 retention_json: "{}".to_owned(),
6025 filter_fields_json: "[]".to_owned(),
6026 validation_json: String::new(),
6027 secondary_indexes_json: "[]".to_owned(),
6028 format_version: 1,
6029 })
6030 .expect("register collection");
6031 {
6032 let writer = crate::WriterActor::start(
6033 db.path(),
6034 Arc::new(SchemaManager::new()),
6035 crate::ProvenanceMode::Warn,
6036 Arc::new(crate::TelemetryCounters::default()),
6037 )
6038 .expect("writer");
6039 writer
6040 .submit(crate::WriteRequest {
6041 label: "operational".to_owned(),
6042 nodes: vec![],
6043 node_retires: vec![],
6044 edges: vec![],
6045 edge_retires: vec![],
6046 chunks: vec![],
6047 runs: vec![],
6048 steps: vec![],
6049 actions: vec![],
6050 optional_backfills: vec![],
6051 vec_inserts: vec![],
6052 operational_writes: vec![crate::OperationalWrite::Put {
6053 collection: "connector_health".to_owned(),
6054 record_key: "gmail".to_owned(),
6055 payload_json: r#"{"status":"ok"}"#.to_owned(),
6056 source_ref: Some("src-1".to_owned()),
6057 }],
6058 })
6059 .expect("write");
6060 }
6061
6062 let report = service
6063 .trace_operational_collection("connector_health", Some("gmail"))
6064 .expect("trace");
6065 assert_eq!(report.collection_name, "connector_health");
6066 assert_eq!(report.record_key.as_deref(), Some("gmail"));
6067 assert_eq!(report.mutation_count, 1);
6068 assert_eq!(report.current_count, 1);
6069 assert_eq!(report.mutations[0].op_kind, "put");
6070 assert_eq!(report.current_rows[0].payload_json, r#"{"status":"ok"}"#);
6071 }
6072
6073 #[test]
6074 fn trace_operational_collection_rejects_unknown_collection() {
6075 let (_db, service) = setup();
6076
6077 let error = service
6078 .trace_operational_collection("missing_collection", None)
6079 .expect_err("unknown collection should fail");
6080
6081 assert!(matches!(error, EngineError::InvalidWrite(_)));
6082 assert!(error.to_string().contains("is not registered"));
6083 }
6084
6085 #[test]
6086 fn rebuild_operational_current_repairs_missing_latest_state_rows() {
6087 let (db, service) = setup();
6088 service
6089 .register_operational_collection(&OperationalRegisterRequest {
6090 name: "connector_health".to_owned(),
6091 kind: OperationalCollectionKind::LatestState,
6092 schema_json: "{}".to_owned(),
6093 retention_json: "{}".to_owned(),
6094 filter_fields_json: "[]".to_owned(),
6095 validation_json: String::new(),
6096 secondary_indexes_json: "[]".to_owned(),
6097 format_version: 1,
6098 })
6099 .expect("register collection");
6100 {
6101 let writer = crate::WriterActor::start(
6102 db.path(),
6103 Arc::new(SchemaManager::new()),
6104 crate::ProvenanceMode::Warn,
6105 Arc::new(crate::TelemetryCounters::default()),
6106 )
6107 .expect("writer");
6108 writer
6109 .submit(crate::WriteRequest {
6110 label: "operational".to_owned(),
6111 nodes: vec![],
6112 node_retires: vec![],
6113 edges: vec![],
6114 edge_retires: vec![],
6115 chunks: vec![],
6116 runs: vec![],
6117 steps: vec![],
6118 actions: vec![],
6119 optional_backfills: vec![],
6120 vec_inserts: vec![],
6121 operational_writes: vec![crate::OperationalWrite::Put {
6122 collection: "connector_health".to_owned(),
6123 record_key: "gmail".to_owned(),
6124 payload_json: r#"{"status":"ok"}"#.to_owned(),
6125 source_ref: Some("src-1".to_owned()),
6126 }],
6127 })
6128 .expect("write");
6129 }
6130 {
6131 let conn = sqlite::open_connection(db.path()).expect("conn");
6132 conn.execute(
6133 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6134 [],
6135 )
6136 .expect("delete current row");
6137 }
6138
6139 let before = service.check_semantics().expect("semantics before rebuild");
6140 assert_eq!(before.missing_operational_current_rows, 1);
6141
6142 let repair = service
6143 .rebuild_operational_current(Some("connector_health"))
6144 .expect("rebuild current");
6145 assert_eq!(repair.collections_rebuilt, 1);
6146 assert_eq!(repair.current_rows_rebuilt, 1);
6147
6148 let after = service.check_semantics().expect("semantics after rebuild");
6149 assert_eq!(after.missing_operational_current_rows, 0);
6150
6151 let conn = sqlite::open_connection(db.path()).expect("conn");
6152 let payload: String = conn
6153 .query_row(
6154 "SELECT payload_json FROM operational_current \
6155 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6156 [],
6157 |row| row.get(0),
6158 )
6159 .expect("restored payload");
6160 assert_eq!(payload, r#"{"status":"ok"}"#);
6161 }
6162
6163 #[test]
6164 fn rebuild_operational_current_restores_latest_state_secondary_index_entries() {
6165 let (db, service) = setup();
6166 service
6167 .register_operational_collection(&OperationalRegisterRequest {
6168 name: "connector_health".to_owned(),
6169 kind: OperationalCollectionKind::LatestState,
6170 schema_json: "{}".to_owned(),
6171 retention_json: "{}".to_owned(),
6172 filter_fields_json: "[]".to_owned(),
6173 validation_json: String::new(),
6174 secondary_indexes_json: r#"[{"name":"status_current","kind":"latest_state_field","field":"status","value_type":"string"}]"#.to_owned(),
6175 format_version: 1,
6176 })
6177 .expect("register collection");
6178 {
6179 let writer = crate::WriterActor::start(
6180 db.path(),
6181 Arc::new(SchemaManager::new()),
6182 crate::ProvenanceMode::Warn,
6183 Arc::new(crate::TelemetryCounters::default()),
6184 )
6185 .expect("writer");
6186 writer
6187 .submit(crate::WriteRequest {
6188 label: "operational".to_owned(),
6189 nodes: vec![],
6190 node_retires: vec![],
6191 edges: vec![],
6192 edge_retires: vec![],
6193 chunks: vec![],
6194 runs: vec![],
6195 steps: vec![],
6196 actions: vec![],
6197 optional_backfills: vec![],
6198 vec_inserts: vec![],
6199 operational_writes: vec![crate::OperationalWrite::Put {
6200 collection: "connector_health".to_owned(),
6201 record_key: "gmail".to_owned(),
6202 payload_json: r#"{"status":"ok"}"#.to_owned(),
6203 source_ref: Some("src-1".to_owned()),
6204 }],
6205 })
6206 .expect("write");
6207 }
6208 {
6209 let conn = sqlite::open_connection(db.path()).expect("conn");
6210 let entry_count: i64 = conn
6211 .query_row(
6212 "SELECT count(*) FROM operational_secondary_index_entries \
6213 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6214 [],
6215 |row| row.get(0),
6216 )
6217 .expect("secondary index count before repair");
6218 assert_eq!(entry_count, 1);
6219 conn.execute(
6220 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6221 [],
6222 )
6223 .expect("delete current row");
6224 }
6225
6226 service
6227 .rebuild_operational_current(Some("connector_health"))
6228 .expect("rebuild current");
6229
6230 let conn = sqlite::open_connection(db.path()).expect("conn");
6231 let entry_count: i64 = conn
6232 .query_row(
6233 "SELECT count(*) FROM operational_secondary_index_entries \
6234 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
6235 [],
6236 |row| row.get(0),
6237 )
6238 .expect("secondary index count after repair");
6239 assert_eq!(entry_count, 1);
6240 }
6241
6242 #[test]
6243 fn operational_current_semantics_and_rebuild_follow_mutation_order() {
6244 let (db, service) = setup();
6245 {
6246 let conn = sqlite::open_connection(db.path()).expect("conn");
6247 conn.execute(
6248 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6249 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
6250 [],
6251 )
6252 .expect("seed collection");
6253 conn.execute(
6254 "INSERT INTO operational_mutations \
6255 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6256 VALUES ('m3', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'src-1', 100, 1)",
6257 [],
6258 )
6259 .expect("seed first put");
6260 conn.execute(
6261 "INSERT INTO operational_mutations \
6262 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6263 VALUES ('m2', 'connector_health', 'gmail', 'delete', '', 'src-2', 100, 2)",
6264 [],
6265 )
6266 .expect("seed delete");
6267 conn.execute(
6268 "INSERT INTO operational_mutations \
6269 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6270 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'src-3', 100, 3)",
6271 [],
6272 )
6273 .expect("seed final put");
6274 conn.execute(
6275 "INSERT INTO operational_current \
6276 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
6277 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 100, 'm1')",
6278 [],
6279 )
6280 .expect("seed current");
6281 }
6282
6283 let before = service.check_semantics().expect("semantics before rebuild");
6284 assert_eq!(before.missing_operational_current_rows, 0);
6285 assert_eq!(before.stale_operational_current_rows, 0);
6286
6287 {
6288 let conn = sqlite::open_connection(db.path()).expect("conn");
6289 conn.execute(
6290 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6291 [],
6292 )
6293 .expect("delete current row");
6294 }
6295
6296 let missing = service.check_semantics().expect("semantics after delete");
6297 assert_eq!(missing.missing_operational_current_rows, 1);
6298 assert_eq!(missing.stale_operational_current_rows, 0);
6299
6300 service
6301 .rebuild_operational_current(Some("connector_health"))
6302 .expect("rebuild current");
6303
6304 let after = service.check_semantics().expect("semantics after rebuild");
6305 assert_eq!(after.missing_operational_current_rows, 0);
6306 assert_eq!(after.stale_operational_current_rows, 0);
6307
6308 let conn = sqlite::open_connection(db.path()).expect("conn");
6309 let payload: String = conn
6310 .query_row(
6311 "SELECT payload_json FROM operational_current \
6312 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
6313 [],
6314 |row| row.get(0),
6315 )
6316 .expect("restored payload");
6317 assert_eq!(payload, r#"{"status":"new"}"#);
6318 }
6319
6320 #[test]
6321 fn disable_operational_collection_sets_disabled_at_and_emits_provenance() {
6322 let (db, service) = setup();
6323 service
6324 .register_operational_collection(&OperationalRegisterRequest {
6325 name: "audit_log".to_owned(),
6326 kind: OperationalCollectionKind::AppendOnlyLog,
6327 schema_json: "{}".to_owned(),
6328 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6329 filter_fields_json: "[]".to_owned(),
6330 validation_json: String::new(),
6331 secondary_indexes_json: "[]".to_owned(),
6332 format_version: 1,
6333 })
6334 .expect("register collection");
6335
6336 let record = service
6337 .disable_operational_collection("audit_log")
6338 .expect("disable collection");
6339 assert_eq!(record.name, "audit_log");
6340 assert!(record.disabled_at.is_some());
6341
6342 let disabled_at = record.disabled_at.expect("disabled_at");
6343 let described = service
6344 .describe_operational_collection("audit_log")
6345 .expect("describe collection")
6346 .expect("collection exists");
6347 assert_eq!(described.disabled_at, Some(disabled_at));
6348
6349 let writer = crate::WriterActor::start(
6350 db.path(),
6351 Arc::new(SchemaManager::new()),
6352 crate::ProvenanceMode::Warn,
6353 Arc::new(crate::TelemetryCounters::default()),
6354 )
6355 .expect("writer");
6356 let error = writer
6357 .submit(crate::WriteRequest {
6358 label: "disabled-operational".to_owned(),
6359 nodes: vec![],
6360 node_retires: vec![],
6361 edges: vec![],
6362 edge_retires: vec![],
6363 chunks: vec![],
6364 runs: vec![],
6365 steps: vec![],
6366 actions: vec![],
6367 optional_backfills: vec![],
6368 vec_inserts: vec![],
6369 operational_writes: vec![crate::OperationalWrite::Append {
6370 collection: "audit_log".to_owned(),
6371 record_key: "evt-1".to_owned(),
6372 payload_json: r#"{"type":"sync"}"#.to_owned(),
6373 source_ref: Some("src-1".to_owned()),
6374 }],
6375 })
6376 .expect_err("disabled collection should reject writes");
6377 assert!(matches!(error, EngineError::InvalidWrite(_)));
6378 assert!(error.to_string().contains("is disabled"));
6379
6380 let conn = sqlite::open_connection(db.path()).expect("conn");
6381 let provenance_count: i64 = conn
6382 .query_row(
6383 "SELECT count(*) FROM provenance_events \
6384 WHERE event_type = 'operational_collection_disabled' AND subject = 'audit_log'",
6385 [],
6386 |row| row.get(0),
6387 )
6388 .expect("provenance count");
6389 assert_eq!(provenance_count, 1);
6390 }
6391
6392 #[test]
6393 fn purge_operational_collection_deletes_append_only_rows_before_cutoff() {
6394 let (db, service) = setup();
6395 {
6396 let conn = sqlite::open_connection(db.path()).expect("conn");
6397 conn.execute(
6398 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6399 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_all\"}', 1, 100)",
6400 [],
6401 )
6402 .expect("seed collection");
6403 conn.execute(
6404 "INSERT INTO operational_mutations \
6405 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6406 VALUES ('evt-1', 'audit_log', 'evt-1', 'append', '{\"seq\":1}', 'src-1', 100, 1)",
6407 [],
6408 )
6409 .expect("seed event 1");
6410 conn.execute(
6411 "INSERT INTO operational_mutations \
6412 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6413 VALUES ('evt-2', 'audit_log', 'evt-2', 'append', '{\"seq\":2}', 'src-2', 200, 2)",
6414 [],
6415 )
6416 .expect("seed event 2");
6417 conn.execute(
6418 "INSERT INTO operational_mutations \
6419 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6420 VALUES ('evt-3', 'audit_log', 'evt-3', 'append', '{\"seq\":3}', 'src-3', 300, 3)",
6421 [],
6422 )
6423 .expect("seed event 3");
6424 }
6425
6426 let report = service
6427 .purge_operational_collection("audit_log", 250)
6428 .expect("purge collection");
6429 assert_eq!(report.collection_name, "audit_log");
6430 assert_eq!(report.deleted_mutations, 2);
6431 assert_eq!(report.before_timestamp, 250);
6432
6433 let conn = sqlite::open_connection(db.path()).expect("conn");
6434 let remaining: Vec<String> = {
6435 let mut stmt = conn
6436 .prepare(
6437 "SELECT id FROM operational_mutations \
6438 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6439 )
6440 .expect("stmt");
6441 stmt.query_map([], |row| row.get(0))
6442 .expect("rows")
6443 .collect::<Result<_, _>>()
6444 .expect("collect")
6445 };
6446 assert_eq!(remaining, vec!["evt-3".to_owned()]);
6447 let provenance_count: i64 = conn
6448 .query_row(
6449 "SELECT count(*) FROM provenance_events \
6450 WHERE event_type = 'operational_collection_purged' AND subject = 'audit_log'",
6451 [],
6452 |row| row.get(0),
6453 )
6454 .expect("provenance count");
6455 assert_eq!(provenance_count, 1);
6456 }
6457
6458 #[test]
6459 fn compact_operational_collection_dry_run_reports_without_mutation() {
6460 let (db, service) = setup();
6461 {
6462 let conn = sqlite::open_connection(db.path()).expect("conn");
6463 conn.execute(
6464 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6465 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6466 [],
6467 )
6468 .expect("seed collection");
6469 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6470 conn.execute(
6471 "INSERT INTO operational_mutations \
6472 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6473 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6474 rusqlite::params![
6475 format!("evt-{index}"),
6476 format!("{{\"seq\":{index}}}"),
6477 created_at,
6478 index,
6479 ],
6480 )
6481 .expect("seed event");
6482 }
6483 }
6484
6485 let report = service
6486 .compact_operational_collection("audit_log", true)
6487 .expect("compact collection");
6488 assert_eq!(report.collection_name, "audit_log");
6489 assert_eq!(report.deleted_mutations, 1);
6490 assert!(report.dry_run);
6491 assert_eq!(report.before_timestamp, None);
6492
6493 let conn = sqlite::open_connection(db.path()).expect("conn");
6494 let remaining_count: i64 = conn
6495 .query_row(
6496 "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6497 [],
6498 |row| row.get(0),
6499 )
6500 .expect("remaining count");
6501 assert_eq!(remaining_count, 3);
6502 let provenance_count: i64 = conn
6503 .query_row(
6504 "SELECT count(*) FROM provenance_events \
6505 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6506 [],
6507 |row| row.get(0),
6508 )
6509 .expect("provenance count");
6510 assert_eq!(provenance_count, 0);
6511 }
6512
6513 #[test]
6514 fn compact_operational_collection_keep_last_deletes_oldest_rows() {
6515 let (db, service) = setup();
6516 {
6517 let conn = sqlite::open_connection(db.path()).expect("conn");
6518 conn.execute(
6519 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6520 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6521 [],
6522 )
6523 .expect("seed collection");
6524 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6525 conn.execute(
6526 "INSERT INTO operational_mutations \
6527 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6528 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6529 rusqlite::params![
6530 format!("evt-{index}"),
6531 format!("{{\"seq\":{index}}}"),
6532 created_at,
6533 index,
6534 ],
6535 )
6536 .expect("seed event");
6537 }
6538 }
6539
6540 let report = service
6541 .compact_operational_collection("audit_log", false)
6542 .expect("compact collection");
6543 assert_eq!(report.deleted_mutations, 1);
6544 assert!(!report.dry_run);
6545
6546 let conn = sqlite::open_connection(db.path()).expect("conn");
6547 let remaining: Vec<String> = {
6548 let mut stmt = conn
6549 .prepare(
6550 "SELECT id FROM operational_mutations \
6551 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6552 )
6553 .expect("stmt");
6554 stmt.query_map([], |row| row.get(0))
6555 .expect("rows")
6556 .collect::<Result<_, _>>()
6557 .expect("collect")
6558 };
6559 assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6560 let provenance_count: i64 = conn
6561 .query_row(
6562 "SELECT count(*) FROM provenance_events \
6563 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
6564 [],
6565 |row| row.get(0),
6566 )
6567 .expect("provenance count");
6568 assert_eq!(provenance_count, 1);
6569 }
6570
6571 #[test]
6572 fn plan_and_run_operational_retention_keep_last() {
6573 let (db, service) = setup();
6574 {
6575 let conn = sqlite::open_connection(db.path()).expect("conn");
6576 conn.execute(
6577 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6578 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6579 [],
6580 )
6581 .expect("seed collection");
6582 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
6583 conn.execute(
6584 "INSERT INTO operational_mutations \
6585 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6586 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6587 rusqlite::params![
6588 format!("evt-{index}"),
6589 format!("{{\"seq\":{index}}}"),
6590 created_at,
6591 index,
6592 ],
6593 )
6594 .expect("seed event");
6595 }
6596 }
6597
6598 let plan = service
6599 .plan_operational_retention(1_000, None, Some(10))
6600 .expect("plan retention");
6601 assert_eq!(plan.collections_examined, 1);
6602 assert_eq!(plan.items[0].collection_name, "audit_log");
6603 assert_eq!(
6604 plan.items[0].action_kind,
6605 crate::operational::OperationalRetentionActionKind::KeepLast
6606 );
6607 assert_eq!(plan.items[0].candidate_deletions, 1);
6608 assert_eq!(plan.items[0].max_rows, Some(2));
6609 assert_eq!(plan.items[0].last_run_at, None);
6610
6611 let dry_run = service
6612 .run_operational_retention(1_000, None, Some(10), true)
6613 .expect("dry-run retention");
6614 assert!(dry_run.dry_run);
6615 assert_eq!(dry_run.collections_acted_on, 1);
6616 assert_eq!(dry_run.items[0].deleted_mutations, 1);
6617 assert_eq!(dry_run.items[0].rows_remaining, 2);
6618
6619 let conn = sqlite::open_connection(db.path()).expect("conn");
6620 let remaining_count: i64 = conn
6621 .query_row(
6622 "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
6623 [],
6624 |row| row.get(0),
6625 )
6626 .expect("remaining count after dry run");
6627 assert_eq!(remaining_count, 3);
6628 let retention_run_count: i64 = conn
6629 .query_row(
6630 "SELECT count(*) FROM operational_retention_runs WHERE collection_name = 'audit_log'",
6631 [],
6632 |row| row.get(0),
6633 )
6634 .expect("retention run count");
6635 assert_eq!(retention_run_count, 0);
6636 drop(conn);
6637
6638 let executed = service
6639 .run_operational_retention(1_000, None, Some(10), false)
6640 .expect("execute retention");
6641 assert_eq!(executed.collections_acted_on, 1);
6642 assert_eq!(executed.items[0].deleted_mutations, 1);
6643 assert_eq!(executed.items[0].rows_remaining, 2);
6644
6645 let conn = sqlite::open_connection(db.path()).expect("conn");
6646 let remaining: Vec<String> = {
6647 let mut stmt = conn
6648 .prepare(
6649 "SELECT id FROM operational_mutations \
6650 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
6651 )
6652 .expect("stmt");
6653 stmt.query_map([], |row| row.get(0))
6654 .expect("rows")
6655 .collect::<Result<_, _>>()
6656 .expect("collect")
6657 };
6658 assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6659 let last_run_at: i64 = conn
6660 .query_row(
6661 "SELECT executed_at FROM operational_retention_runs \
6662 WHERE collection_name = 'audit_log' ORDER BY executed_at DESC LIMIT 1",
6663 [],
6664 |row| row.get(0),
6665 )
6666 .expect("last run at");
6667 assert_eq!(last_run_at, 1_000);
6668 }
6669
6670 #[test]
6671 fn dry_run_operational_retention_does_not_mark_noop_collection_as_acted_on() {
6672 let (db, service) = setup();
6673 let conn = sqlite::open_connection(db.path()).expect("conn");
6674 conn.execute(
6675 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6676 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6677 [],
6678 )
6679 .expect("seed collection");
6680 for (index, created_at) in [(1_i64, 100_i64), (2, 200)] {
6681 conn.execute(
6682 "INSERT INTO operational_mutations \
6683 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6684 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6685 rusqlite::params![
6686 format!("evt-{index}"),
6687 format!("{{\"seq\":{index}}}"),
6688 created_at,
6689 index,
6690 ],
6691 )
6692 .expect("seed event");
6693 }
6694 drop(conn);
6695
6696 let dry_run = service
6697 .run_operational_retention(1_000, None, Some(10), true)
6698 .expect("dry-run retention");
6699 assert!(dry_run.dry_run);
6700 assert_eq!(dry_run.collections_acted_on, 0);
6701 assert_eq!(dry_run.items[0].deleted_mutations, 0);
6702 assert_eq!(dry_run.items[0].rows_remaining, 2);
6703 }
6704
6705 #[test]
6706 fn compact_operational_collection_rejects_latest_state() {
6707 let (_db, service) = setup();
6708 service
6709 .register_operational_collection(&OperationalRegisterRequest {
6710 name: "connector_health".to_owned(),
6711 kind: OperationalCollectionKind::LatestState,
6712 schema_json: "{}".to_owned(),
6713 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6714 filter_fields_json: "[]".to_owned(),
6715 validation_json: String::new(),
6716 secondary_indexes_json: "[]".to_owned(),
6717 format_version: 1,
6718 })
6719 .expect("register collection");
6720
6721 let error = service
6722 .compact_operational_collection("connector_health", false)
6723 .expect_err("latest_state compaction should be rejected");
6724 assert!(matches!(error, EngineError::InvalidWrite(_)));
6725 assert!(error.to_string().contains("append_only_log"));
6726 }
6727
6728 #[test]
6729 fn register_operational_collection_persists_filter_fields_json() {
6730 let (_db, service) = setup();
6731
6732 let record = service
6733 .register_operational_collection(&OperationalRegisterRequest {
6734 name: "audit_log".to_owned(),
6735 kind: OperationalCollectionKind::AppendOnlyLog,
6736 schema_json: "{}".to_owned(),
6737 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6738 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6739 validation_json: String::new(),
6740 secondary_indexes_json: "[]".to_owned(),
6741 format_version: 1,
6742 })
6743 .expect("register collection");
6744
6745 assert_eq!(
6746 record.filter_fields_json,
6747 r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#
6748 );
6749 }
6750
6751 #[test]
6752 fn read_operational_collection_filters_append_only_rows_by_declared_fields() {
6753 let (db, service) = setup();
6754 service
6755 .register_operational_collection(&OperationalRegisterRequest {
6756 name: "audit_log".to_owned(),
6757 kind: OperationalCollectionKind::AppendOnlyLog,
6758 schema_json: "{}".to_owned(),
6759 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6760 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"seq","type":"integer","modes":["exact","range"]},{"name":"ts","type":"timestamp","modes":["exact","range"]}]"#.to_owned(),
6761 validation_json: String::new(),
6762 secondary_indexes_json: "[]".to_owned(),
6763 format_version: 1,
6764 })
6765 .expect("register collection");
6766 {
6767 let writer = crate::WriterActor::start(
6768 db.path(),
6769 Arc::new(SchemaManager::new()),
6770 crate::ProvenanceMode::Warn,
6771 Arc::new(crate::TelemetryCounters::default()),
6772 )
6773 .expect("writer");
6774 writer
6775 .submit(crate::WriteRequest {
6776 label: "operational".to_owned(),
6777 nodes: vec![],
6778 node_retires: vec![],
6779 edges: vec![],
6780 edge_retires: vec![],
6781 chunks: vec![],
6782 runs: vec![],
6783 steps: vec![],
6784 actions: vec![],
6785 optional_backfills: vec![],
6786 vec_inserts: vec![],
6787 operational_writes: vec![
6788 crate::OperationalWrite::Append {
6789 collection: "audit_log".to_owned(),
6790 record_key: "evt-1".to_owned(),
6791 payload_json: r#"{"actor":"alice","seq":1,"ts":100}"#.to_owned(),
6792 source_ref: Some("src-1".to_owned()),
6793 },
6794 crate::OperationalWrite::Append {
6795 collection: "audit_log".to_owned(),
6796 record_key: "evt-2".to_owned(),
6797 payload_json: r#"{"actor":"alice-admin","seq":2,"ts":200}"#.to_owned(),
6798 source_ref: Some("src-2".to_owned()),
6799 },
6800 crate::OperationalWrite::Append {
6801 collection: "audit_log".to_owned(),
6802 record_key: "evt-3".to_owned(),
6803 payload_json: r#"{"actor":"bob","seq":3,"ts":300}"#.to_owned(),
6804 source_ref: Some("src-3".to_owned()),
6805 },
6806 ],
6807 })
6808 .expect("write");
6809 }
6810
6811 let report = service
6812 .read_operational_collection(&crate::operational::OperationalReadRequest {
6813 collection_name: "audit_log".to_owned(),
6814 filters: vec![
6815 crate::operational::OperationalFilterClause::Prefix {
6816 field: "actor".to_owned(),
6817 value: "alice".to_owned(),
6818 },
6819 crate::operational::OperationalFilterClause::Range {
6820 field: "ts".to_owned(),
6821 lower: Some(150),
6822 upper: Some(250),
6823 },
6824 ],
6825 limit: Some(10),
6826 })
6827 .expect("filtered read");
6828
6829 assert_eq!(report.collection_name, "audit_log");
6830 assert_eq!(report.row_count, 1);
6831 assert!(!report.was_limited);
6832 assert_eq!(report.rows.len(), 1);
6833 assert_eq!(report.rows[0].record_key, "evt-2");
6834 assert_eq!(
6835 report.rows[0].payload_json,
6836 r#"{"actor":"alice-admin","seq":2,"ts":200}"#
6837 );
6838 }
6839
6840 #[test]
6841 fn read_operational_collection_uses_secondary_index_when_filter_values_are_missing() {
6842 let (db, service) = setup();
6843 service
6844 .register_operational_collection(&OperationalRegisterRequest {
6845 name: "audit_log".to_owned(),
6846 kind: OperationalCollectionKind::AppendOnlyLog,
6847 schema_json: "{}".to_owned(),
6848 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6849 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6850 validation_json: String::new(),
6851 secondary_indexes_json: r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#.to_owned(),
6852 format_version: 1,
6853 })
6854 .expect("register collection");
6855 {
6856 let writer = crate::WriterActor::start(
6857 db.path(),
6858 Arc::new(SchemaManager::new()),
6859 crate::ProvenanceMode::Warn,
6860 Arc::new(crate::TelemetryCounters::default()),
6861 )
6862 .expect("writer");
6863 writer
6864 .submit(crate::WriteRequest {
6865 label: "operational".to_owned(),
6866 nodes: vec![],
6867 node_retires: vec![],
6868 edges: vec![],
6869 edge_retires: vec![],
6870 chunks: vec![],
6871 runs: vec![],
6872 steps: vec![],
6873 actions: vec![],
6874 optional_backfills: vec![],
6875 vec_inserts: vec![],
6876 operational_writes: vec![
6877 crate::OperationalWrite::Append {
6878 collection: "audit_log".to_owned(),
6879 record_key: "evt-1".to_owned(),
6880 payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
6881 source_ref: Some("src-1".to_owned()),
6882 },
6883 crate::OperationalWrite::Append {
6884 collection: "audit_log".to_owned(),
6885 record_key: "evt-2".to_owned(),
6886 payload_json: r#"{"actor":"alice-admin","ts":200}"#.to_owned(),
6887 source_ref: Some("src-2".to_owned()),
6888 },
6889 ],
6890 })
6891 .expect("write");
6892 }
6893 let conn = sqlite::open_connection(db.path()).expect("conn");
6894 conn.execute(
6895 "DELETE FROM operational_filter_values WHERE collection_name = 'audit_log'",
6896 [],
6897 )
6898 .expect("clear filter values");
6899 drop(conn);
6900
6901 let report = service
6902 .read_operational_collection(&crate::operational::OperationalReadRequest {
6903 collection_name: "audit_log".to_owned(),
6904 filters: vec![
6905 crate::operational::OperationalFilterClause::Prefix {
6906 field: "actor".to_owned(),
6907 value: "alice".to_owned(),
6908 },
6909 crate::operational::OperationalFilterClause::Range {
6910 field: "ts".to_owned(),
6911 lower: Some(150),
6912 upper: Some(250),
6913 },
6914 ],
6915 limit: Some(10),
6916 })
6917 .expect("secondary-index read");
6918
6919 assert_eq!(report.row_count, 1);
6920 assert_eq!(report.rows[0].record_key, "evt-2");
6921 }
6922
6923 #[test]
6924 fn read_operational_collection_rejects_undeclared_fields_and_latest_state_collections() {
6925 let (_db, service) = setup();
6926 service
6927 .register_operational_collection(&OperationalRegisterRequest {
6928 name: "connector_health".to_owned(),
6929 kind: OperationalCollectionKind::LatestState,
6930 schema_json: "{}".to_owned(),
6931 retention_json: "{}".to_owned(),
6932 filter_fields_json: r#"[{"name":"status","type":"string","modes":["exact"]}]"#
6933 .to_owned(),
6934 validation_json: String::new(),
6935 secondary_indexes_json: "[]".to_owned(),
6936 format_version: 1,
6937 })
6938 .expect("register collection");
6939
6940 let latest_state_error = service
6941 .read_operational_collection(&crate::operational::OperationalReadRequest {
6942 collection_name: "connector_health".to_owned(),
6943 filters: vec![crate::operational::OperationalFilterClause::Exact {
6944 field: "status".to_owned(),
6945 value: crate::operational::OperationalFilterValue::String("ok".to_owned()),
6946 }],
6947 limit: Some(10),
6948 })
6949 .expect_err("latest_state filtered reads should be rejected");
6950 assert!(latest_state_error.to_string().contains("append_only_log"));
6951
6952 service
6953 .register_operational_collection(&OperationalRegisterRequest {
6954 name: "audit_log".to_owned(),
6955 kind: OperationalCollectionKind::AppendOnlyLog,
6956 schema_json: "{}".to_owned(),
6957 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6958 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact"]}]"#
6959 .to_owned(),
6960 validation_json: String::new(),
6961 secondary_indexes_json: "[]".to_owned(),
6962 format_version: 1,
6963 })
6964 .expect("register append-only collection");
6965
6966 let undeclared_error = service
6967 .read_operational_collection(&crate::operational::OperationalReadRequest {
6968 collection_name: "audit_log".to_owned(),
6969 filters: vec![crate::operational::OperationalFilterClause::Exact {
6970 field: "missing".to_owned(),
6971 value: crate::operational::OperationalFilterValue::String("x".to_owned()),
6972 }],
6973 limit: Some(10),
6974 })
6975 .expect_err("undeclared field should be rejected");
6976 assert!(undeclared_error.to_string().contains("undeclared"));
6977 }
6978
6979 #[test]
6980 fn read_operational_collection_applies_limit_and_reports_truncation() {
6981 let (db, service) = setup();
6982 service
6983 .register_operational_collection(&OperationalRegisterRequest {
6984 name: "audit_log".to_owned(),
6985 kind: OperationalCollectionKind::AppendOnlyLog,
6986 schema_json: "{}".to_owned(),
6987 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6988 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["prefix"]}]"#
6989 .to_owned(),
6990 validation_json: String::new(),
6991 secondary_indexes_json: "[]".to_owned(),
6992 format_version: 1,
6993 })
6994 .expect("register collection");
6995 {
6996 let writer = crate::WriterActor::start(
6997 db.path(),
6998 Arc::new(SchemaManager::new()),
6999 crate::ProvenanceMode::Warn,
7000 Arc::new(crate::TelemetryCounters::default()),
7001 )
7002 .expect("writer");
7003 writer
7004 .submit(crate::WriteRequest {
7005 label: "operational".to_owned(),
7006 nodes: vec![],
7007 node_retires: vec![],
7008 edges: vec![],
7009 edge_retires: vec![],
7010 chunks: vec![],
7011 runs: vec![],
7012 steps: vec![],
7013 actions: vec![],
7014 optional_backfills: vec![],
7015 vec_inserts: vec![],
7016 operational_writes: vec![
7017 crate::OperationalWrite::Append {
7018 collection: "audit_log".to_owned(),
7019 record_key: "evt-1".to_owned(),
7020 payload_json: r#"{"actor":"alice-1"}"#.to_owned(),
7021 source_ref: Some("src-1".to_owned()),
7022 },
7023 crate::OperationalWrite::Append {
7024 collection: "audit_log".to_owned(),
7025 record_key: "evt-2".to_owned(),
7026 payload_json: r#"{"actor":"alice-2"}"#.to_owned(),
7027 source_ref: Some("src-2".to_owned()),
7028 },
7029 ],
7030 })
7031 .expect("write");
7032 }
7033
7034 let report = service
7035 .read_operational_collection(&crate::operational::OperationalReadRequest {
7036 collection_name: "audit_log".to_owned(),
7037 filters: vec![crate::operational::OperationalFilterClause::Prefix {
7038 field: "actor".to_owned(),
7039 value: "alice".to_owned(),
7040 }],
7041 limit: Some(1),
7042 })
7043 .expect("limited read");
7044
7045 assert_eq!(report.row_count, 1);
7046 assert_eq!(report.applied_limit, 1);
7047 assert!(report.was_limited);
7048 assert_eq!(report.rows[0].record_key, "evt-2");
7049 }
7050
7051 #[test]
7052 fn preexisting_operational_collection_can_gain_filter_contract_after_upgrade() {
7053 let db = NamedTempFile::new().expect("temp db");
7054 let conn = sqlite::open_connection(db.path()).expect("conn");
7055 conn.execute_batch(
7056 r#"
7057 CREATE TABLE operational_collections (
7058 name TEXT PRIMARY KEY,
7059 kind TEXT NOT NULL,
7060 schema_json TEXT NOT NULL,
7061 retention_json TEXT NOT NULL,
7062 format_version INTEGER NOT NULL DEFAULT 1,
7063 created_at INTEGER NOT NULL DEFAULT 100,
7064 disabled_at INTEGER
7065 );
7066 CREATE TABLE operational_mutations (
7067 id TEXT PRIMARY KEY,
7068 collection_name TEXT NOT NULL,
7069 record_key TEXT NOT NULL,
7070 op_kind TEXT NOT NULL,
7071 payload_json TEXT NOT NULL,
7072 source_ref TEXT,
7073 created_at INTEGER NOT NULL DEFAULT 100,
7074 mutation_order INTEGER NOT NULL DEFAULT 1
7075 );
7076 INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at)
7077 VALUES ('audit_log', 'append_only_log', '{}', '{"mode":"keep_all"}', 1, 100);
7078 INSERT INTO operational_mutations
7079 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order)
7080 VALUES
7081 ('evt-1', 'audit_log', 'evt-1', 'append', '{"actor":"alice","ts":0}', 'src-1', 100, 1);
7082 "#,
7083 )
7084 .expect("seed pre-v10 schema");
7085 drop(conn);
7086
7087 let service = AdminService::new(db.path(), Arc::new(SchemaManager::new()));
7088 let pre_update = service
7089 .read_operational_collection(&crate::operational::OperationalReadRequest {
7090 collection_name: "audit_log".to_owned(),
7091 filters: vec![crate::operational::OperationalFilterClause::Exact {
7092 field: "actor".to_owned(),
7093 value: crate::operational::OperationalFilterValue::String("alice".to_owned()),
7094 }],
7095 limit: Some(10),
7096 })
7097 .expect_err("read should reject undeclared fields before migration update");
7098 assert!(pre_update.to_string().contains("undeclared"));
7099
7100 let updated = service
7101 .update_operational_collection_filters(
7102 "audit_log",
7103 r#"[{"name":"actor","type":"string","modes":["exact"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#,
7104 )
7105 .expect("update filter contract");
7106 assert!(updated.filter_fields_json.contains("\"actor\""));
7107
7108 let report = service
7109 .read_operational_collection(&crate::operational::OperationalReadRequest {
7110 collection_name: "audit_log".to_owned(),
7111 filters: vec![crate::operational::OperationalFilterClause::Range {
7112 field: "ts".to_owned(),
7113 lower: Some(0),
7114 upper: Some(0),
7115 }],
7116 limit: Some(10),
7117 })
7118 .expect("read after explicit filter update");
7119 assert_eq!(report.row_count, 1);
7120 assert_eq!(report.rows[0].record_key, "evt-1");
7121 }
7122
7123 #[cfg(feature = "sqlite-vec")]
7124 #[test]
7125 fn check_semantics_detects_stale_vec_rows() {
7126 use crate::sqlite::open_connection_with_vec;
7127
7128 let db = NamedTempFile::new().expect("temp file");
7129 let schema = Arc::new(SchemaManager::new());
7130 {
7131 let conn = open_connection_with_vec(db.path()).expect("vec conn");
7132 schema.bootstrap(&conn).expect("bootstrap");
7133 schema
7134 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 3)
7135 .expect("vec profile");
7136 let bytes: Vec<u8> = [0.1f32, 0.2f32, 0.3f32]
7138 .iter()
7139 .flat_map(|f| f.to_le_bytes())
7140 .collect();
7141 conn.execute(
7142 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ghost-chunk', ?1)",
7143 rusqlite::params![bytes],
7144 )
7145 .expect("insert stale vec row");
7146 }
7147 let service = AdminService::new(db.path(), Arc::clone(&schema));
7148 let report = service.check_semantics().expect("semantics check");
7149 assert_eq!(report.stale_vec_rows, 1);
7150 assert!(
7151 report.warnings.iter().any(|w| w.contains("stale vec")),
7152 "warning must mention stale vec"
7153 );
7154 }
7155
7156 #[cfg(feature = "sqlite-vec")]
7157 #[test]
7158 fn restore_vector_profiles_recreates_vec_table_from_metadata() {
7159 let db = NamedTempFile::new().expect("temp file");
7160 let schema = Arc::new(SchemaManager::new());
7161 {
7162 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7163 schema.bootstrap(&conn).expect("bootstrap");
7164 conn.execute(
7165 "INSERT INTO vector_profiles (profile, table_name, dimension, enabled) \
7166 VALUES ('default', 'vec_nodes_active', 3, 1)",
7167 [],
7168 )
7169 .expect("insert vector profile");
7170 }
7171
7172 let service = AdminService::new(db.path(), Arc::clone(&schema));
7173 let report = service
7174 .restore_vector_profiles()
7175 .expect("restore vector profiles");
7176 assert_eq!(
7177 report.targets,
7178 vec![crate::projection::ProjectionTarget::Vec]
7179 );
7180 assert_eq!(report.rebuilt_rows, 1);
7181
7182 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7183 let count: i64 = conn
7184 .query_row(
7185 "SELECT count(*) FROM sqlite_schema WHERE name = 'vec_nodes_active'",
7186 [],
7187 |row| row.get(0),
7188 )
7189 .expect("vec schema count");
7190 assert_eq!(count, 1, "vec table should exist after restore");
7191 }
7192
7193 #[cfg(feature = "sqlite-vec")]
7194 #[test]
7195 fn load_vector_regeneration_config_supports_json_and_toml() {
7196 let dir = tempfile::tempdir().expect("temp dir");
7197 let json_path = dir.path().join("regen.json");
7198 let toml_path = dir.path().join("regen.toml");
7199
7200 let config = VectorRegenerationConfig {
7201 profile: "default".to_owned(),
7202 table_name: "vec_nodes_active".to_owned(),
7203 model_identity: "model-a".to_owned(),
7204 model_version: "1.0".to_owned(),
7205 dimension: 4,
7206 normalization_policy: "l2".to_owned(),
7207 chunking_policy: "per_chunk".to_owned(),
7208 preprocessing_policy: "trim".to_owned(),
7209 generator_command: vec!["/bin/echo".to_owned()],
7210 };
7211
7212 fs::write(&json_path, serde_json::to_string(&config).expect("json")).expect("write json");
7213 fs::write(&toml_path, toml::to_string(&config).expect("toml")).expect("write toml");
7214
7215 let parsed_json = load_vector_regeneration_config(&json_path).expect("json parse");
7216 let parsed_toml = load_vector_regeneration_config(&toml_path).expect("toml parse");
7217
7218 assert_eq!(parsed_json, config);
7219 assert_eq!(parsed_toml, config);
7220 }
7221
7222 #[cfg(all(not(feature = "sqlite-vec"), unix))]
7223 #[test]
7224 fn regenerate_vector_embeddings_unsupported_vec_capability_writes_request_and_failed_audit() {
7225 let db = NamedTempFile::new().expect("temp file");
7226 let schema = Arc::new(SchemaManager::new());
7227 let temp_dir = tempfile::tempdir().expect("temp dir");
7228 let script_path = temp_dir.path().join("vector-generator-no-vec.sh");
7229
7230 fs::write(
7231 &script_path,
7232 r#"#!/usr/bin/env bash
7233set -euo pipefail
7234python3 -c 'import json, sys
7235payload = json.load(sys.stdin)
7236embeddings = [{"chunk_id": chunk["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]} for chunk in payload["chunks"]]
7237json.dump({"embeddings": embeddings}, sys.stdout)'
7238"#,
7239 )
7240 .expect("write generator script");
7241 set_file_mode(&script_path, 0o755);
7242
7243 {
7244 let conn = sqlite::open_connection(db.path()).expect("connection");
7245 schema.bootstrap(&conn).expect("bootstrap");
7246 conn.execute(
7247 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7248 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7249 [],
7250 )
7251 .expect("insert node");
7252 conn.execute(
7253 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7254 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7255 [],
7256 )
7257 .expect("insert chunk");
7258 }
7259
7260 let service = AdminService::new(db.path(), Arc::clone(&schema));
7261 let error = service
7262 .regenerate_vector_embeddings(&VectorRegenerationConfig {
7263 profile: "default".to_owned(),
7264 table_name: "vec_nodes_active".to_owned(),
7265 model_identity: "test-model".to_owned(),
7266 model_version: "1.0.0".to_owned(),
7267 dimension: 4,
7268 normalization_policy: "l2".to_owned(),
7269 chunking_policy: "per_chunk".to_owned(),
7270 preprocessing_policy: "trim".to_owned(),
7271 generator_command: vec![script_path.to_string_lossy().to_string()],
7272 })
7273 .expect_err("sqlite-vec capability should be required");
7274
7275 assert!(error.to_string().contains("unsupported vec capability"));
7276
7277 let conn = sqlite::open_connection(db.path()).expect("connection");
7278 let request_count: i64 = conn
7279 .query_row(
7280 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7281 [],
7282 |row| row.get(0),
7283 )
7284 .expect("request count");
7285 assert_eq!(request_count, 1);
7286 let failed_count: i64 = conn
7287 .query_row(
7288 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7289 [],
7290 |row| row.get(0),
7291 )
7292 .expect("failed count");
7293 assert_eq!(failed_count, 1);
7294 let metadata_json: String = conn
7295 .query_row(
7296 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7297 [],
7298 |row| row.get(0),
7299 )
7300 .expect("failed metadata");
7301 assert!(metadata_json.contains("\"failure_class\":\"unsupported vec capability\""));
7302 }
7303
7304 #[cfg(feature = "sqlite-vec")]
7305 #[test]
7306 fn regenerate_vector_embeddings_rebuilds_embeddings_from_generator() {
7307 let db = NamedTempFile::new().expect("temp file");
7308 let schema = Arc::new(SchemaManager::new());
7309 let temp_dir = tempfile::tempdir().expect("temp dir");
7310 let script_path = temp_dir.path().join("vector-generator.sh");
7311
7312 fs::write(
7313 &script_path,
7314 r#"#!/usr/bin/env bash
7315set -euo pipefail
7316python3 -c 'import json, sys
7317payload = json.load(sys.stdin)
7318embeddings = []
7319for chunk in payload["chunks"]:
7320 text = chunk["text_content"].lower()
7321 if "budget" in text:
7322 embedding = [1.0, 0.0, 0.0, 0.0]
7323 else:
7324 embedding = [0.0, 1.0, 0.0, 0.0]
7325 embeddings.append({"chunk_id": chunk["chunk_id"], "embedding": embedding})
7326json.dump({"embeddings": embeddings}, sys.stdout)'
7327"#,
7328 )
7329 .expect("write generator script");
7330 set_file_mode(&script_path, 0o755);
7331
7332 {
7333 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7334 schema.bootstrap(&conn).expect("bootstrap");
7335 conn.execute(
7336 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7337 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7338 [],
7339 )
7340 .expect("insert node");
7341 conn.execute(
7342 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7343 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7344 [],
7345 )
7346 .expect("insert chunk 1");
7347 conn.execute(
7348 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7349 VALUES ('chunk-2', 'doc-1', 'travel plan', 101)",
7350 [],
7351 )
7352 .expect("insert chunk 2");
7353 }
7354
7355 let service = AdminService::new(db.path(), Arc::clone(&schema));
7356 let report = service
7357 .regenerate_vector_embeddings(&VectorRegenerationConfig {
7358 profile: "default".to_owned(),
7359 table_name: "vec_nodes_active".to_owned(),
7360 model_identity: "test-model".to_owned(),
7361 model_version: "1.0.0".to_owned(),
7362 dimension: 4,
7363 normalization_policy: "l2".to_owned(),
7364 chunking_policy: "per_chunk".to_owned(),
7365 preprocessing_policy: "trim".to_owned(),
7366 generator_command: vec![script_path.to_string_lossy().to_string()],
7367 })
7368 .expect("regenerate vectors");
7369
7370 assert_eq!(report.profile, "default");
7371 assert_eq!(report.table_name, "vec_nodes_active");
7372 assert_eq!(report.dimension, 4);
7373 assert_eq!(report.total_chunks, 2);
7374 assert_eq!(report.regenerated_rows, 2);
7375 assert!(report.contract_persisted);
7376
7377 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7378 let vec_count: i64 = conn
7379 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7380 row.get(0)
7381 })
7382 .expect("vec count");
7383 assert_eq!(vec_count, 2);
7384
7385 let contract_count: i64 = conn
7386 .query_row(
7387 "SELECT count(*) FROM vector_embedding_contracts WHERE profile = 'default'",
7388 [],
7389 |row| row.get(0),
7390 )
7391 .expect("contract count");
7392 assert_eq!(contract_count, 1);
7393 let applied_at: i64 = conn
7394 .query_row(
7395 "SELECT applied_at FROM vector_embedding_contracts WHERE profile = 'default'",
7396 [],
7397 |row| row.get(0),
7398 )
7399 .expect("applied_at");
7400 assert!(applied_at > 0);
7401 let snapshot_hash: String = conn
7402 .query_row(
7403 "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
7404 [],
7405 |row| row.get(0),
7406 )
7407 .expect("snapshot_hash");
7408 assert!(!snapshot_hash.is_empty());
7409 let contract_format_version: i64 = conn
7410 .query_row(
7411 "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = 'default'",
7412 [],
7413 |row| row.get(0),
7414 )
7415 .expect("contract_format_version");
7416 assert_eq!(contract_format_version, 1);
7417 let request_count: i64 = conn
7418 .query_row(
7419 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
7420 [],
7421 |row| row.get(0),
7422 )
7423 .expect("request audit count");
7424 assert_eq!(request_count, 1);
7425 let apply_count: i64 = conn
7426 .query_row(
7427 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7428 [],
7429 |row| row.get(0),
7430 )
7431 .expect("apply audit count");
7432 assert_eq!(apply_count, 1);
7433 let apply_metadata: String = conn
7434 .query_row(
7435 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
7436 [],
7437 |row| row.get(0),
7438 )
7439 .expect("apply metadata");
7440 assert!(apply_metadata.contains("\"profile\":\"default\""));
7441 assert!(apply_metadata.contains("\"snapshot_hash\":"));
7442 }
7443
7444 #[cfg(feature = "sqlite-vec")]
7445 #[test]
7446 fn regenerate_vector_embeddings_failure_leaves_contract_and_vec_rows_unchanged() {
7447 let db = NamedTempFile::new().expect("temp file");
7448 let schema = Arc::new(SchemaManager::new());
7449 let temp_dir = tempfile::tempdir().expect("temp dir");
7450 let script_path = temp_dir.path().join("vector-generator-fail.sh");
7451
7452 fs::write(
7453 &script_path,
7454 "#!/usr/bin/env bash\nset -euo pipefail\necho 'generator boom' >&2\nexit 17\n",
7455 )
7456 .expect("write failing script");
7457 set_file_mode(&script_path, 0o755);
7458
7459 {
7460 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7461 schema.bootstrap(&conn).expect("bootstrap");
7462 conn.execute(
7463 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7464 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7465 [],
7466 )
7467 .expect("insert node");
7468 conn.execute(
7469 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7470 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7471 [],
7472 )
7473 .expect("insert chunk");
7474 schema
7475 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7476 .expect("ensure vec profile");
7477 conn.execute(
7478 r"
7479 INSERT INTO vector_embedding_contracts (
7480 profile,
7481 table_name,
7482 model_identity,
7483 model_version,
7484 dimension,
7485 normalization_policy,
7486 chunking_policy,
7487 preprocessing_policy,
7488 generator_command_json,
7489 applied_at,
7490 snapshot_hash
7491 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7492 ",
7493 rusqlite::params![
7494 "default",
7495 "vec_nodes_active",
7496 "old-model",
7497 "0.9.0",
7498 4,
7499 "l2",
7500 "per_chunk",
7501 "trim",
7502 "[\"/bin/echo\"]",
7503 111,
7504 "old-snapshot"
7505 ],
7506 )
7507 .expect("seed contract");
7508 conn.execute(
7509 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7510 [],
7511 )
7512 .expect("seed vec row");
7513 }
7514
7515 let service = AdminService::new(db.path(), Arc::clone(&schema));
7516 let error = service
7517 .regenerate_vector_embeddings_with_policy(
7518 &VectorRegenerationConfig {
7519 profile: "default".to_owned(),
7520 table_name: "vec_nodes_active".to_owned(),
7521 model_identity: "new-model".to_owned(),
7522 model_version: "1.0.0".to_owned(),
7523 dimension: 4,
7524 normalization_policy: "l2".to_owned(),
7525 chunking_policy: "per_chunk".to_owned(),
7526 preprocessing_policy: "trim".to_owned(),
7527 generator_command: vec![script_path.to_string_lossy().to_string()],
7528 },
7529 &VectorGeneratorPolicy::default(),
7530 )
7531 .expect_err("generator should fail");
7532
7533 assert!(error.to_string().contains("generator nonzero exit"));
7534
7535 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7536 let model_identity: String = conn
7537 .query_row(
7538 "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
7539 [],
7540 |row| row.get(0),
7541 )
7542 .expect("model identity");
7543 assert_eq!(model_identity, "old-model");
7544 let snapshot_hash: String = conn
7545 .query_row(
7546 "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
7547 [],
7548 |row| row.get(0),
7549 )
7550 .expect("snapshot hash");
7551 assert_eq!(snapshot_hash, "old-snapshot");
7552 let vec_count: i64 = conn
7553 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7554 row.get(0)
7555 })
7556 .expect("vec count");
7557 assert_eq!(vec_count, 1);
7558 let failure_count: i64 = conn
7559 .query_row(
7560 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7561 [],
7562 |row| row.get(0),
7563 )
7564 .expect("failure count");
7565 assert_eq!(failure_count, 1);
7566 let failure_metadata: String = conn
7567 .query_row(
7568 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7569 [],
7570 |row| row.get(0),
7571 )
7572 .expect("failure metadata");
7573 assert!(failure_metadata.contains("\"failure_class\":\"generator nonzero exit\""));
7574 }
7575
7576 #[cfg(feature = "sqlite-vec")]
7577 #[test]
7578 fn regenerate_vector_embeddings_snapshot_drift_is_retryable_and_non_mutating() {
7579 let db = NamedTempFile::new().expect("temp file");
7580 let schema = Arc::new(SchemaManager::new());
7581 let temp_dir = tempfile::tempdir().expect("temp dir");
7582 let script_path = temp_dir.path().join("vector-generator-drift.sh");
7583 let db_path = db.path().to_string_lossy().to_string();
7584
7585 fs::write(
7586 &script_path,
7587 format!(
7588 r#"#!/usr/bin/env bash
7589set -euo pipefail
7590python3 -c 'import json, sqlite3, sys
7591payload = json.load(sys.stdin)
7592conn = sqlite3.connect({db_path:?})
7593conn.execute("INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES (?, ?, ?, ?)", ("chunk-2", "doc-1", "late arriving text", 101))
7594conn.commit()
7595conn.close()
7596embeddings = [{{"chunk_id": chunk["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]}} for chunk in payload["chunks"]]
7597json.dump({{"embeddings": embeddings}}, sys.stdout)'
7598"#,
7599 ),
7600 )
7601 .expect("write drift script");
7602 set_file_mode(&script_path, 0o755);
7603
7604 {
7605 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7606 schema.bootstrap(&conn).expect("bootstrap");
7607 conn.execute(
7608 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7609 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7610 [],
7611 )
7612 .expect("insert node");
7613 conn.execute(
7614 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7615 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7616 [],
7617 )
7618 .expect("insert chunk");
7619 schema
7620 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7621 .expect("ensure vec profile");
7622 }
7623
7624 let service = AdminService::new(db.path(), Arc::clone(&schema));
7625 let error = service
7626 .regenerate_vector_embeddings_with_policy(
7627 &VectorRegenerationConfig {
7628 profile: "default".to_owned(),
7629 table_name: "vec_nodes_active".to_owned(),
7630 model_identity: "test-model".to_owned(),
7631 model_version: "1.0.0".to_owned(),
7632 dimension: 4,
7633 normalization_policy: "l2".to_owned(),
7634 chunking_policy: "per_chunk".to_owned(),
7635 preprocessing_policy: "trim".to_owned(),
7636 generator_command: vec![script_path.to_string_lossy().to_string()],
7637 },
7638 &VectorGeneratorPolicy::default(),
7639 )
7640 .expect_err("snapshot drift should fail");
7641
7642 assert!(
7643 error
7644 .to_string()
7645 .contains("vector regeneration snapshot drift:")
7646 );
7647 assert!(error.to_string().contains("[retryable]"));
7648
7649 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7650 let contract_count: i64 = conn
7651 .query_row(
7652 "SELECT count(*) FROM vector_embedding_contracts",
7653 [],
7654 |row| row.get(0),
7655 )
7656 .expect("contract count");
7657 assert_eq!(contract_count, 0);
7658 let vec_count: i64 = conn
7659 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7660 row.get(0)
7661 })
7662 .expect("vec count");
7663 assert_eq!(vec_count, 0);
7664 let failure_count: i64 = conn
7665 .query_row(
7666 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7667 [],
7668 |row| row.get(0),
7669 )
7670 .expect("failure count");
7671 assert_eq!(failure_count, 1);
7672 }
7673
7674 #[cfg(feature = "sqlite-vec")]
7675 #[test]
7676 fn regenerate_vector_embeddings_times_out_and_kills_generator() {
7677 let (_db, service) = setup();
7678 let temp_dir = tempfile::tempdir().expect("temp dir");
7679 let script_path = temp_dir.path().join("vector-generator-timeout.sh");
7680
7681 fs::write(
7682 &script_path,
7683 "#!/usr/bin/env bash\nset -euo pipefail\nsleep 1\nprintf '{\"embeddings\":[]}'\n",
7684 )
7685 .expect("write timeout script");
7686 set_file_mode(&script_path, 0o755);
7687
7688 let error = service
7689 .regenerate_vector_embeddings_with_policy(
7690 &VectorRegenerationConfig {
7691 profile: "default".to_owned(),
7692 table_name: "vec_nodes_active".to_owned(),
7693 model_identity: "model".to_owned(),
7694 model_version: "1.0.0".to_owned(),
7695 dimension: 4,
7696 normalization_policy: "l2".to_owned(),
7697 chunking_policy: "per_chunk".to_owned(),
7698 preprocessing_policy: "trim".to_owned(),
7699 generator_command: vec![script_path.to_string_lossy().to_string()],
7700 },
7701 &VectorGeneratorPolicy {
7702 timeout_ms: 50,
7703 max_stdout_bytes: 1024,
7704 max_stderr_bytes: 1024,
7705 max_input_bytes: 1024,
7706 max_chunks: 10,
7707 require_absolute_executable: true,
7708 reject_world_writable_executable: true,
7709 allowed_executable_roots: vec![],
7710 preserve_env_vars: vec![],
7711 },
7712 )
7713 .expect_err("generator should time out");
7714 assert!(error.to_string().contains("generator timeout"));
7715 }
7716
7717 #[cfg(feature = "sqlite-vec")]
7718 #[test]
7719 fn regenerate_vector_embeddings_rejects_oversized_stdout() {
7720 let (_db, service) = setup();
7721 let temp_dir = tempfile::tempdir().expect("temp dir");
7722 let script_path = temp_dir.path().join("vector-generator-stdout.sh");
7723
7724 fs::write(
7725 &script_path,
7726 "#!/usr/bin/env bash\nset -euo pipefail\npython3 -c 'import sys; sys.stdout.write(\"x\" * 5000)'\n",
7727 )
7728 .expect("write stdout script");
7729 set_file_mode(&script_path, 0o755);
7730
7731 let error = service
7732 .regenerate_vector_embeddings_with_policy(
7733 &VectorRegenerationConfig {
7734 profile: "default".to_owned(),
7735 table_name: "vec_nodes_active".to_owned(),
7736 model_identity: "model".to_owned(),
7737 model_version: "1.0.0".to_owned(),
7738 dimension: 4,
7739 normalization_policy: "l2".to_owned(),
7740 chunking_policy: "per_chunk".to_owned(),
7741 preprocessing_policy: "trim".to_owned(),
7742 generator_command: vec![script_path.to_string_lossy().to_string()],
7743 },
7744 &VectorGeneratorPolicy {
7745 timeout_ms: 1000,
7746 max_stdout_bytes: 128,
7747 max_stderr_bytes: 1024,
7748 max_input_bytes: 1024,
7749 max_chunks: 10,
7750 require_absolute_executable: true,
7751 reject_world_writable_executable: true,
7752 allowed_executable_roots: vec![],
7753 preserve_env_vars: vec![],
7754 },
7755 )
7756 .expect_err("generator stdout should overflow");
7757 assert!(error.to_string().contains("stdout overflow"));
7758 }
7759
7760 #[cfg(feature = "sqlite-vec")]
7761 #[test]
7762 fn regenerate_vector_embeddings_rejects_oversized_stderr() {
7763 let (_db, service) = setup();
7764 let temp_dir = tempfile::tempdir().expect("temp dir");
7765 let script_path = temp_dir.path().join("vector-generator-stderr.sh");
7766
7767 fs::write(
7768 &script_path,
7769 "#!/usr/bin/env bash\nset -euo pipefail\npython3 -c 'import sys; sys.stderr.write(\"e\" * 5000); sys.exit(7)'\n",
7770 )
7771 .expect("write stderr script");
7772 set_file_mode(&script_path, 0o755);
7773
7774 let error = service
7775 .regenerate_vector_embeddings_with_policy(
7776 &VectorRegenerationConfig {
7777 profile: "default".to_owned(),
7778 table_name: "vec_nodes_active".to_owned(),
7779 model_identity: "model".to_owned(),
7780 model_version: "1.0.0".to_owned(),
7781 dimension: 4,
7782 normalization_policy: "l2".to_owned(),
7783 chunking_policy: "per_chunk".to_owned(),
7784 preprocessing_policy: "trim".to_owned(),
7785 generator_command: vec![script_path.to_string_lossy().to_string()],
7786 },
7787 &VectorGeneratorPolicy {
7788 timeout_ms: 1000,
7789 max_stdout_bytes: 1024,
7790 max_stderr_bytes: 128,
7791 max_input_bytes: 1024,
7792 max_chunks: 10,
7793 require_absolute_executable: true,
7794 reject_world_writable_executable: true,
7795 allowed_executable_roots: vec![],
7796 preserve_env_vars: vec![],
7797 },
7798 )
7799 .expect_err("generator stderr should overflow");
7800 assert!(error.to_string().contains("stderr overflow"));
7801 }
7802
7803 #[cfg(feature = "sqlite-vec")]
7804 #[test]
7805 fn regenerate_vector_embeddings_rejects_oversized_input_before_spawn() {
7806 let db = NamedTempFile::new().expect("temp file");
7807 let schema = Arc::new(SchemaManager::new());
7808 {
7809 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7810 schema.bootstrap(&conn).expect("bootstrap");
7811 conn.execute(
7812 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7813 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7814 [],
7815 )
7816 .expect("insert node");
7817 conn.execute(
7818 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7819 VALUES ('chunk-1', 'doc-1', 'this chunk is intentionally long to exceed the configured input limit', 100)",
7820 [],
7821 )
7822 .expect("insert chunk");
7823 }
7824
7825 let service = AdminService::new(db.path(), Arc::clone(&schema));
7826 let error = service
7827 .regenerate_vector_embeddings_with_policy(
7828 &VectorRegenerationConfig {
7829 profile: "default".to_owned(),
7830 table_name: "vec_nodes_active".to_owned(),
7831 model_identity: "model".to_owned(),
7832 model_version: "1.0.0".to_owned(),
7833 dimension: 4,
7834 normalization_policy: "l2".to_owned(),
7835 chunking_policy: "per_chunk".to_owned(),
7836 preprocessing_policy: "trim".to_owned(),
7837 generator_command: vec!["/bin/echo".to_owned()],
7838 },
7839 &VectorGeneratorPolicy {
7840 timeout_ms: 1000,
7841 max_stdout_bytes: 1024,
7842 max_stderr_bytes: 1024,
7843 max_input_bytes: 32,
7844 max_chunks: 10,
7845 require_absolute_executable: true,
7846 reject_world_writable_executable: true,
7847 allowed_executable_roots: vec![],
7848 preserve_env_vars: vec![],
7849 },
7850 )
7851 .expect_err("input size should be rejected before spawn");
7852 assert!(error.to_string().contains("payload too large"));
7853 }
7854
7855 #[cfg(feature = "sqlite-vec")]
7856 #[test]
7857 fn regenerate_vector_embeddings_rejects_excessive_chunk_count_before_spawn() {
7858 let db = NamedTempFile::new().expect("temp file");
7859 let schema = Arc::new(SchemaManager::new());
7860 {
7861 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7862 schema.bootstrap(&conn).expect("bootstrap");
7863 conn.execute(
7864 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7865 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7866 [],
7867 )
7868 .expect("insert node");
7869 conn.execute(
7870 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES ('chunk-1', 'doc-1', 'a', 100)",
7871 [],
7872 )
7873 .expect("insert chunk 1");
7874 conn.execute(
7875 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES ('chunk-2', 'doc-1', 'b', 101)",
7876 [],
7877 )
7878 .expect("insert chunk 2");
7879 }
7880
7881 let service = AdminService::new(db.path(), Arc::clone(&schema));
7882 let error = service
7883 .regenerate_vector_embeddings_with_policy(
7884 &VectorRegenerationConfig {
7885 profile: "default".to_owned(),
7886 table_name: "vec_nodes_active".to_owned(),
7887 model_identity: "model".to_owned(),
7888 model_version: "1.0.0".to_owned(),
7889 dimension: 4,
7890 normalization_policy: "l2".to_owned(),
7891 chunking_policy: "per_chunk".to_owned(),
7892 preprocessing_policy: "trim".to_owned(),
7893 generator_command: vec!["/bin/echo".to_owned()],
7894 },
7895 &VectorGeneratorPolicy {
7896 timeout_ms: 1000,
7897 max_stdout_bytes: 1024,
7898 max_stderr_bytes: 1024,
7899 max_input_bytes: 2048,
7900 max_chunks: 1,
7901 require_absolute_executable: true,
7902 reject_world_writable_executable: true,
7903 allowed_executable_roots: vec![],
7904 preserve_env_vars: vec![],
7905 },
7906 )
7907 .expect_err("chunk count should be rejected before spawn");
7908 assert!(error.to_string().contains("payload too large"));
7909 }
7910
7911 #[cfg(feature = "sqlite-vec")]
7912 #[test]
7913 fn regenerate_vector_embeddings_malformed_json_leaves_contract_and_vec_rows_unchanged() {
7914 let db = NamedTempFile::new().expect("temp file");
7915 let schema = Arc::new(SchemaManager::new());
7916 let temp_dir = tempfile::tempdir().expect("temp dir");
7917 let script_path = temp_dir.path().join("vector-generator-bad-json.sh");
7918
7919 fs::write(
7920 &script_path,
7921 "#!/usr/bin/env bash\nset -euo pipefail\nprintf 'not-json'\n",
7922 )
7923 .expect("write bad json script");
7924 set_file_mode(&script_path, 0o755);
7925
7926 {
7927 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7928 schema.bootstrap(&conn).expect("bootstrap");
7929 conn.execute(
7930 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7931 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7932 [],
7933 )
7934 .expect("insert node");
7935 conn.execute(
7936 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7937 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7938 [],
7939 )
7940 .expect("insert chunk");
7941 schema
7942 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7943 .expect("ensure vec profile");
7944 conn.execute(
7945 r"
7946 INSERT INTO vector_embedding_contracts (
7947 profile,
7948 table_name,
7949 model_identity,
7950 model_version,
7951 dimension,
7952 normalization_policy,
7953 chunking_policy,
7954 preprocessing_policy,
7955 generator_command_json,
7956 applied_at,
7957 snapshot_hash
7958 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7959 ",
7960 rusqlite::params![
7961 "default",
7962 "vec_nodes_active",
7963 "old-model",
7964 "0.9.0",
7965 4,
7966 "l2",
7967 "per_chunk",
7968 "trim",
7969 "[\"/bin/echo\"]",
7970 111,
7971 "old-snapshot"
7972 ],
7973 )
7974 .expect("seed contract");
7975 conn.execute(
7976 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7977 [],
7978 )
7979 .expect("seed vec row");
7980 }
7981
7982 let service = AdminService::new(db.path(), Arc::clone(&schema));
7983 let error = service
7984 .regenerate_vector_embeddings_with_policy(
7985 &VectorRegenerationConfig {
7986 profile: "default".to_owned(),
7987 table_name: "vec_nodes_active".to_owned(),
7988 model_identity: "new-model".to_owned(),
7989 model_version: "1.0.0".to_owned(),
7990 dimension: 4,
7991 normalization_policy: "l2".to_owned(),
7992 chunking_policy: "per_chunk".to_owned(),
7993 preprocessing_policy: "trim".to_owned(),
7994 generator_command: vec![script_path.to_string_lossy().to_string()],
7995 },
7996 &VectorGeneratorPolicy::default(),
7997 )
7998 .expect_err("bad json should fail");
7999
8000 assert!(error.to_string().contains("decode generator output"));
8001
8002 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8003 let model_identity: String = conn
8004 .query_row(
8005 "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
8006 [],
8007 |row| row.get(0),
8008 )
8009 .expect("model identity");
8010 assert_eq!(model_identity, "old-model");
8011 let vec_count: i64 = conn
8012 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
8013 row.get(0)
8014 })
8015 .expect("vec count");
8016 assert_eq!(vec_count, 1);
8017 let failure_count: i64 = conn
8018 .query_row(
8019 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
8020 [],
8021 |row| row.get(0),
8022 )
8023 .expect("failure count");
8024 assert_eq!(failure_count, 1);
8025 }
8026
8027 #[cfg(feature = "sqlite-vec")]
8028 #[test]
8029 fn regenerate_vector_embeddings_rejects_whitespace_only_profile_before_mutation() {
8030 let db = NamedTempFile::new().expect("temp file");
8031 let schema = Arc::new(SchemaManager::new());
8032 {
8033 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8034 schema.bootstrap(&conn).expect("bootstrap");
8035 conn.execute(
8036 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8037 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
8038 [],
8039 )
8040 .expect("insert node");
8041 conn.execute(
8042 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8043 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
8044 [],
8045 )
8046 .expect("insert chunk");
8047 }
8048
8049 let service = AdminService::new(db.path(), Arc::clone(&schema));
8050 let error = service
8051 .regenerate_vector_embeddings(&VectorRegenerationConfig {
8052 profile: " ".to_owned(),
8053 table_name: "vec_nodes_active".to_owned(),
8054 model_identity: "test-model".to_owned(),
8055 model_version: "1.0.0".to_owned(),
8056 dimension: 4,
8057 normalization_policy: "l2".to_owned(),
8058 chunking_policy: "per_chunk".to_owned(),
8059 preprocessing_policy: "trim".to_owned(),
8060 generator_command: vec!["/bin/echo".to_owned()],
8061 })
8062 .expect_err("whitespace profile should be rejected");
8063
8064 assert!(error.to_string().contains("invalid contract"));
8065 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8066 let contract_count: i64 = conn
8067 .query_row(
8068 "SELECT count(*) FROM vector_embedding_contracts",
8069 [],
8070 |row| row.get(0),
8071 )
8072 .expect("contract count");
8073 assert_eq!(contract_count, 0);
8074 let provenance_count: i64 = conn
8075 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8076 row.get(0)
8077 })
8078 .expect("provenance count");
8079 assert_eq!(provenance_count, 0);
8080 }
8081
8082 #[cfg(feature = "sqlite-vec")]
8083 #[test]
8084 fn regenerate_vector_embeddings_rejects_world_writable_executable_when_policy_requires_it() {
8085 let (_db, service) = setup();
8086 let temp_dir = tempfile::tempdir().expect("temp dir");
8087 let script_path = temp_dir.path().join("vector-generator-world-writable.sh");
8088
8089 fs::write(
8090 &script_path,
8091 "#!/usr/bin/env bash\nset -euo pipefail\nprintf '{\"embeddings\":[]}'\n",
8092 )
8093 .expect("write script");
8094 set_file_mode(&script_path, 0o777);
8095
8096 let error = service
8097 .regenerate_vector_embeddings_with_policy(
8098 &VectorRegenerationConfig {
8099 profile: "default".to_owned(),
8100 table_name: "vec_nodes_active".to_owned(),
8101 model_identity: "model".to_owned(),
8102 model_version: "1.0.0".to_owned(),
8103 dimension: 4,
8104 normalization_policy: "l2".to_owned(),
8105 chunking_policy: "per_chunk".to_owned(),
8106 preprocessing_policy: "trim".to_owned(),
8107 generator_command: vec![script_path.to_string_lossy().to_string()],
8108 },
8109 &VectorGeneratorPolicy::default(),
8110 )
8111 .expect_err("world-writable executable should be rejected");
8112
8113 assert!(error.to_string().contains("world-writable executable"));
8114 }
8115
8116 #[cfg(feature = "sqlite-vec")]
8117 #[test]
8118 fn regenerate_vector_embeddings_rejects_executable_outside_allowlisted_roots() {
8119 let (_db, service) = setup();
8120 let temp_dir = tempfile::tempdir().expect("temp dir");
8121 let allowed_dir = tempfile::tempdir().expect("allowed dir");
8122 let script_path = temp_dir.path().join("vector-generator-outside-root.sh");
8123
8124 fs::write(
8125 &script_path,
8126 "#!/usr/bin/env bash\nset -euo pipefail\nprintf '{\"embeddings\":[]}'\n",
8127 )
8128 .expect("write script");
8129 set_file_mode(&script_path, 0o755);
8130
8131 let error = service
8132 .regenerate_vector_embeddings_with_policy(
8133 &VectorRegenerationConfig {
8134 profile: "default".to_owned(),
8135 table_name: "vec_nodes_active".to_owned(),
8136 model_identity: "model".to_owned(),
8137 model_version: "1.0.0".to_owned(),
8138 dimension: 4,
8139 normalization_policy: "l2".to_owned(),
8140 chunking_policy: "per_chunk".to_owned(),
8141 preprocessing_policy: "trim".to_owned(),
8142 generator_command: vec![script_path.to_string_lossy().to_string()],
8143 },
8144 &VectorGeneratorPolicy {
8145 timeout_ms: 1000,
8146 max_stdout_bytes: 1024,
8147 max_stderr_bytes: 1024,
8148 max_input_bytes: 1024,
8149 max_chunks: 10,
8150 require_absolute_executable: true,
8151 reject_world_writable_executable: true,
8152 allowed_executable_roots: vec![
8153 allowed_dir.path().to_string_lossy().to_string(),
8154 ],
8155 preserve_env_vars: vec![],
8156 },
8157 )
8158 .expect_err("disallowed root should be rejected");
8159
8160 assert!(
8161 error
8162 .to_string()
8163 .contains("outside allowed executable roots")
8164 );
8165 }
8166
8167 #[cfg(feature = "sqlite-vec")]
8168 #[test]
8169 fn regenerate_vector_embeddings_rejects_future_contract_format_version() {
8170 let db = NamedTempFile::new().expect("temp file");
8171 let schema = Arc::new(SchemaManager::new());
8172 {
8173 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8174 schema.bootstrap(&conn).expect("bootstrap");
8175 conn.execute(
8176 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8177 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
8178 [],
8179 )
8180 .expect("insert node");
8181 conn.execute(
8182 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8183 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
8184 [],
8185 )
8186 .expect("insert chunk");
8187 conn.execute(
8188 r"
8189 INSERT INTO vector_embedding_contracts (
8190 profile,
8191 table_name,
8192 model_identity,
8193 model_version,
8194 dimension,
8195 normalization_policy,
8196 chunking_policy,
8197 preprocessing_policy,
8198 generator_command_json,
8199 applied_at,
8200 snapshot_hash,
8201 contract_format_version,
8202 updated_at
8203 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)
8204 ",
8205 rusqlite::params![
8206 "default",
8207 "vec_nodes_active",
8208 "old-model",
8209 "0.9.0",
8210 4,
8211 "l2",
8212 "per_chunk",
8213 "trim",
8214 "[\"/bin/echo\"]",
8215 111,
8216 "old-snapshot",
8217 99,
8218 111,
8219 ],
8220 )
8221 .expect("seed future contract");
8222 }
8223
8224 let service = AdminService::new(db.path(), Arc::clone(&schema));
8225 let error = service
8226 .regenerate_vector_embeddings(&VectorRegenerationConfig {
8227 profile: "default".to_owned(),
8228 table_name: "vec_nodes_active".to_owned(),
8229 model_identity: "test-model".to_owned(),
8230 model_version: "1.0.0".to_owned(),
8231 dimension: 4,
8232 normalization_policy: "l2".to_owned(),
8233 chunking_policy: "per_chunk".to_owned(),
8234 preprocessing_policy: "trim".to_owned(),
8235 generator_command: vec!["/bin/echo".to_owned()],
8236 })
8237 .expect_err("future contract version should be rejected");
8238
8239 assert!(error.to_string().contains("unsupported"));
8240 assert!(error.to_string().contains("format version"));
8241 }
8242
8243 #[cfg(feature = "sqlite-vec")]
8244 #[test]
8245 fn regenerate_vector_embeddings_clears_environment_except_preserved_vars() {
8246 let db = NamedTempFile::new().expect("temp file");
8247 let schema = Arc::new(SchemaManager::new());
8248 let temp_dir = tempfile::tempdir().expect("temp dir");
8249 let script_path = temp_dir.path().join("vector-generator-env.sh");
8250 {
8251 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8252 schema.bootstrap(&conn).expect("bootstrap");
8253 conn.execute(
8254 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8255 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
8256 [],
8257 )
8258 .expect("insert node");
8259 conn.execute(
8260 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8261 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
8262 [],
8263 )
8264 .expect("insert chunk");
8265 }
8266
8267 fs::write(
8268 &script_path,
8269 r#"#!/usr/bin/env bash
8270set -euo pipefail
8271if [[ "${VECTOR_TEST_SECRET:-}" != "expected" ]]; then
8272 echo "missing secret" >&2
8273 exit 9
8274fi
8275python3 -c 'import json, sys
8276payload = json.load(sys.stdin)
8277json.dump({"embeddings": [{"chunk_id": payload["chunks"][0]["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]}]}, sys.stdout)'
8278"#,
8279 )
8280 .expect("write script");
8281 set_file_mode(&script_path, 0o755);
8282
8283 let service = AdminService::new(db.path(), Arc::clone(&schema));
8284 unsafe {
8285 std::env::set_var("VECTOR_TEST_SECRET", "expected");
8286 }
8287 let missing_env = service
8288 .regenerate_vector_embeddings_with_policy(
8289 &VectorRegenerationConfig {
8290 profile: "default".to_owned(),
8291 table_name: "vec_nodes_active".to_owned(),
8292 model_identity: "model".to_owned(),
8293 model_version: "1.0.0".to_owned(),
8294 dimension: 4,
8295 normalization_policy: "l2".to_owned(),
8296 chunking_policy: "per_chunk".to_owned(),
8297 preprocessing_policy: "trim".to_owned(),
8298 generator_command: vec![script_path.to_string_lossy().to_string()],
8299 },
8300 &VectorGeneratorPolicy::default(),
8301 )
8302 .expect_err("non-preserved env var should be dropped");
8303 assert!(missing_env.to_string().contains("nonzero exit"));
8304
8305 let report = service
8306 .regenerate_vector_embeddings_with_policy(
8307 &VectorRegenerationConfig {
8308 profile: "default".to_owned(),
8309 table_name: "vec_nodes_active".to_owned(),
8310 model_identity: "model".to_owned(),
8311 model_version: "1.0.0".to_owned(),
8312 dimension: 4,
8313 normalization_policy: "l2".to_owned(),
8314 chunking_policy: "per_chunk".to_owned(),
8315 preprocessing_policy: "trim".to_owned(),
8316 generator_command: vec![script_path.to_string_lossy().to_string()],
8317 },
8318 &VectorGeneratorPolicy {
8319 timeout_ms: 1000,
8320 max_stdout_bytes: 1024,
8321 max_stderr_bytes: 1024,
8322 max_input_bytes: 4096,
8323 max_chunks: 10,
8324 require_absolute_executable: true,
8325 reject_world_writable_executable: true,
8326 allowed_executable_roots: vec![],
8327 preserve_env_vars: vec!["VECTOR_TEST_SECRET".to_owned()],
8328 },
8329 )
8330 .expect("preserved env var should allow success");
8331 assert_eq!(report.regenerated_rows, 1);
8332 unsafe {
8333 std::env::remove_var("VECTOR_TEST_SECRET");
8334 }
8335 }
8336
8337 #[test]
8338 fn check_semantics_detects_orphaned_chunk() {
8339 let (db, service) = setup();
8340 {
8341 let conn = sqlite::open_connection(db.path()).expect("conn");
8343 conn.execute(
8344 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8345 VALUES ('c1', 'ghost-node', 'text', 100)",
8346 [],
8347 )
8348 .expect("insert orphaned chunk");
8349 }
8350 let report = service.check_semantics().expect("semantics check");
8351 assert_eq!(report.orphaned_chunks, 1);
8352 }
8353
8354 #[test]
8355 fn check_semantics_detects_null_source_ref() {
8356 let (db, service) = setup();
8357 {
8358 let conn = sqlite::open_connection(db.path()).expect("conn");
8359 conn.execute(
8360 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at) \
8361 VALUES ('r1', 'lg1', 'Meeting', '{}', 100)",
8362 [],
8363 )
8364 .expect("insert node with null source_ref");
8365 }
8366 let report = service.check_semantics().expect("semantics check");
8367 assert_eq!(report.null_source_ref_nodes, 1);
8368 }
8369
8370 #[test]
8371 fn check_semantics_detects_broken_step_fk() {
8372 let (db, service) = setup();
8373 {
8374 let conn = sqlite::open_connection(db.path()).expect("conn");
8377 conn.execute_batch("PRAGMA foreign_keys = OFF;")
8378 .expect("disable FK");
8379 conn.execute(
8380 "INSERT INTO steps (id, run_id, kind, status, properties, created_at) \
8381 VALUES ('s1', 'ghost-run', 'llm', 'completed', '{}', 100)",
8382 [],
8383 )
8384 .expect("insert step with ghost run_id");
8385 }
8386 let report = service.check_semantics().expect("semantics check");
8387 assert_eq!(report.broken_step_fk, 1);
8388 }
8389
8390 #[test]
8391 fn check_semantics_detects_broken_action_fk() {
8392 let (db, service) = setup();
8393 {
8394 let conn = sqlite::open_connection(db.path()).expect("conn");
8395 conn.execute_batch("PRAGMA foreign_keys = OFF;")
8396 .expect("disable FK");
8397 conn.execute(
8398 "INSERT INTO actions (id, step_id, kind, status, properties, created_at) \
8399 VALUES ('a1', 'ghost-step', 'emit', 'completed', '{}', 100)",
8400 [],
8401 )
8402 .expect("insert action with ghost step_id");
8403 }
8404 let report = service.check_semantics().expect("semantics check");
8405 assert_eq!(report.broken_action_fk, 1);
8406 }
8407
8408 #[test]
8409 fn check_semantics_detects_stale_fts_rows() {
8410 let (db, service) = setup();
8411 {
8412 let conn = sqlite::open_connection(db.path()).expect("conn");
8413 conn.execute(
8416 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
8417 VALUES ('ghost-chunk', 'any-node', 'Meeting', 'stale content')",
8418 [],
8419 )
8420 .expect("insert stale FTS row");
8421 }
8422 let report = service.check_semantics().expect("semantics check");
8423 assert_eq!(report.stale_fts_rows, 1);
8424 }
8425
8426 #[test]
8427 fn check_semantics_detects_fts_rows_for_superseded_nodes() {
8428 let (db, service) = setup();
8429 {
8430 let conn = sqlite::open_connection(db.path()).expect("conn");
8431 conn.execute(
8433 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8434 VALUES ('r1', 'lg-sup', 'Meeting', '{}', 100, 200, 'src-1')",
8435 [],
8436 )
8437 .expect("insert superseded node");
8438 conn.execute(
8440 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
8441 VALUES ('ck-x', 'lg-sup', 'Meeting', 'superseded content')",
8442 [],
8443 )
8444 .expect("insert FTS row for superseded node");
8445 }
8446 let report = service.check_semantics().expect("semantics check");
8447 assert_eq!(report.fts_rows_for_superseded_nodes, 1);
8448 }
8449
8450 #[test]
8451 fn check_semantics_detects_dangling_edges() {
8452 let (db, service) = setup();
8453 {
8454 let conn = sqlite::open_connection(db.path()).expect("conn");
8455 conn.execute_batch("PRAGMA foreign_keys = OFF;")
8456 .expect("disable FK");
8457 conn.execute(
8459 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8460 VALUES ('r1', 'lg-src', 'Meeting', '{}', 100, 'src-1')",
8461 [],
8462 )
8463 .expect("insert source node");
8464 conn.execute(
8465 "INSERT INTO edges \
8466 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8467 VALUES ('e1', 'edge-1', 'lg-src', 'ghost-target', 'LINKS', '{}', 100, 'src-1')",
8468 [],
8469 )
8470 .expect("insert dangling edge");
8471 }
8472 let report = service.check_semantics().expect("semantics check");
8473 assert_eq!(report.dangling_edges, 1);
8474 }
8475
8476 #[test]
8477 fn check_semantics_detects_orphaned_supersession_chains() {
8478 let (db, service) = setup();
8479 {
8480 let conn = sqlite::open_connection(db.path()).expect("conn");
8481 conn.execute(
8483 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8484 VALUES ('r1', 'lg-orphaned', 'Meeting', '{}', 100, 200, 'src-1')",
8485 [],
8486 )
8487 .expect("insert fully superseded node");
8488 }
8489 let report = service.check_semantics().expect("semantics check");
8490 assert_eq!(report.orphaned_supersession_chains, 1);
8491 }
8492
8493 #[test]
8494 fn check_semantics_detects_mismatched_kind_property_fts_rows() {
8495 let (db, service) = setup();
8496 {
8497 let conn = sqlite::open_connection(db.path()).expect("conn");
8498 conn.execute(
8500 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8501 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
8502 [],
8503 )
8504 .expect("insert node");
8505 conn.execute(
8507 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8508 VALUES ('goal-1', 'WrongKind', 'Ship v2')",
8509 [],
8510 )
8511 .expect("insert mismatched property FTS row");
8512 }
8513 let report = service.check_semantics().expect("semantics check");
8514 assert_eq!(report.mismatched_kind_property_fts_rows, 1);
8515 }
8516
8517 #[test]
8518 fn check_semantics_detects_duplicate_property_fts_rows() {
8519 let (db, service) = setup();
8520 {
8521 let conn = sqlite::open_connection(db.path()).expect("conn");
8522 conn.execute(
8523 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8524 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'src-1')",
8525 [],
8526 )
8527 .expect("insert node");
8528 conn.execute(
8530 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8531 VALUES ('goal-1', 'Goal', 'Ship v2')",
8532 [],
8533 )
8534 .expect("insert first property FTS row");
8535 conn.execute(
8536 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8537 VALUES ('goal-1', 'Goal', 'Ship v2 duplicate')",
8538 [],
8539 )
8540 .expect("insert duplicate property FTS row");
8541 }
8542 let report = service.check_semantics().expect("semantics check");
8543 assert_eq!(report.duplicate_property_fts_rows, 1);
8544 }
8545
8546 #[test]
8547 fn check_semantics_detects_drifted_property_fts_text() {
8548 let (db, service) = setup();
8549 {
8550 let conn = sqlite::open_connection(db.path()).expect("conn");
8551 conn.execute(
8552 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8553 VALUES ('Goal', '[\"$.name\"]', ' ')",
8554 [],
8555 )
8556 .expect("register schema");
8557 conn.execute(
8558 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8559 VALUES ('r1', 'goal-1', 'Goal', '{\"name\":\"Current name\"}', 100, 'src-1')",
8560 [],
8561 )
8562 .expect("insert node");
8563 conn.execute(
8565 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8566 VALUES ('goal-1', 'Goal', 'Old stale name')",
8567 [],
8568 )
8569 .expect("insert stale property FTS row");
8570 }
8571 let report = service.check_semantics().expect("semantics check");
8572 assert_eq!(report.drifted_property_fts_rows, 1);
8573 }
8574
8575 #[test]
8576 fn check_semantics_detects_property_fts_row_that_should_not_exist() {
8577 let (db, service) = setup();
8578 {
8579 let conn = sqlite::open_connection(db.path()).expect("conn");
8580 conn.execute(
8581 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
8582 VALUES ('Goal', '[\"$.searchable\"]', ' ')",
8583 [],
8584 )
8585 .expect("register schema");
8586 conn.execute(
8588 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8589 VALUES ('r1', 'goal-1', 'Goal', '{\"other\":\"field\"}', 100, 'src-1')",
8590 [],
8591 )
8592 .expect("insert node");
8593 conn.execute(
8595 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
8596 VALUES ('goal-1', 'Goal', 'phantom text')",
8597 [],
8598 )
8599 .expect("insert phantom property FTS row");
8600 }
8601 let report = service.check_semantics().expect("semantics check");
8602 assert_eq!(
8603 report.drifted_property_fts_rows, 1,
8604 "row that should not exist must be counted as drifted"
8605 );
8606 }
8607
8608 #[test]
8609 fn safe_export_writes_manifest_with_sha256() {
8610 let (_db, service) = setup();
8611 let export_dir = tempfile::TempDir::new().expect("temp dir");
8612 let export_path = export_dir.path().join("backup.db");
8613
8614 let manifest = service
8615 .safe_export(
8616 &export_path,
8617 SafeExportOptions {
8618 force_checkpoint: false,
8619 },
8620 )
8621 .expect("export");
8622
8623 assert!(export_path.exists(), "exported db should exist");
8624 let manifest_path = export_dir.path().join("backup.db.export-manifest.json");
8625 assert!(
8626 manifest_path.exists(),
8627 "manifest file should exist at {}",
8628 manifest_path.display()
8629 );
8630 assert_eq!(manifest.sha256.len(), 64, "sha256 should be 64 hex chars");
8631 assert!(
8632 manifest.exported_at > 0,
8633 "exported_at should be a unix timestamp"
8634 );
8635 assert_eq!(
8636 manifest.schema_version,
8637 SchemaManager::new().current_version().0,
8638 "schema_version should match the live schema version"
8639 );
8640 assert_eq!(manifest.protocol_version, 1, "protocol_version should be 1");
8641 assert!(manifest.page_count > 0, "page_count should be positive");
8642 }
8643
8644 #[test]
8645 fn safe_export_preserves_operational_validation_contracts() {
8646 let (_db, service) = setup();
8647 let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
8648 service
8649 .register_operational_collection(&OperationalRegisterRequest {
8650 name: "connector_health".to_owned(),
8651 kind: OperationalCollectionKind::LatestState,
8652 schema_json: "{}".to_owned(),
8653 retention_json: "{}".to_owned(),
8654 filter_fields_json: "[]".to_owned(),
8655 validation_json: validation_json.to_owned(),
8656 secondary_indexes_json: "[]".to_owned(),
8657 format_version: 1,
8658 })
8659 .expect("register collection");
8660
8661 let export_dir = tempfile::TempDir::new().expect("temp dir");
8662 let export_path = export_dir.path().join("backup.db");
8663 service
8664 .safe_export(
8665 &export_path,
8666 SafeExportOptions {
8667 force_checkpoint: false,
8668 },
8669 )
8670 .expect("export");
8671
8672 let exported = sqlite::open_connection(&export_path).expect("exported conn");
8673 let exported_validation_json: String = exported
8674 .query_row(
8675 "SELECT validation_json FROM operational_collections WHERE name = 'connector_health'",
8676 [],
8677 |row| row.get(0),
8678 )
8679 .expect("validation_json");
8680 assert_eq!(exported_validation_json, validation_json);
8681 }
8682
8683 #[test]
8684 fn safe_export_force_checkpoint_false_skips_wal_pragma() {
8685 let (_db, service) = setup();
8686 let export_dir = tempfile::TempDir::new().expect("temp dir");
8687 let export_path = export_dir.path().join("no-wal.db");
8688
8689 let manifest = service
8691 .safe_export(
8692 &export_path,
8693 SafeExportOptions {
8694 force_checkpoint: false,
8695 },
8696 )
8697 .expect("export with no checkpoint");
8698
8699 assert!(
8700 manifest.page_count > 0,
8701 "page_count must be populated regardless of checkpoint mode"
8702 );
8703 assert_eq!(
8704 manifest.schema_version,
8705 SchemaManager::new().current_version().0
8706 );
8707 assert_eq!(manifest.protocol_version, 1);
8708 }
8709
8710 #[test]
8711 fn safe_export_force_checkpoint_false_still_captures_wal_backed_changes() {
8712 let (db, service) = setup();
8713 let conn = sqlite::open_connection(db.path()).expect("conn");
8714 let journal_mode: String = conn
8715 .query_row("PRAGMA journal_mode=WAL", [], |row| row.get(0))
8716 .expect("enable wal");
8717 assert_eq!(journal_mode.to_lowercase(), "wal");
8718 let auto_checkpoint_pages: i64 = conn
8719 .query_row("PRAGMA wal_autocheckpoint=0", [], |row| row.get(0))
8720 .expect("disable auto checkpoint");
8721 assert_eq!(auto_checkpoint_pages, 0);
8722 conn.execute(
8723 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8724 VALUES ('r-wal', 'lg-wal', 'Meeting', '{}', 100, 'src-wal')",
8725 [],
8726 )
8727 .expect("insert wal-backed node");
8728
8729 let export_dir = tempfile::TempDir::new().expect("temp dir");
8730 let export_path = export_dir.path().join("wal-backed.db");
8731 service
8732 .safe_export(
8733 &export_path,
8734 SafeExportOptions {
8735 force_checkpoint: false,
8736 },
8737 )
8738 .expect("export wal-backed db");
8739
8740 let exported = sqlite::open_connection(&export_path).expect("open exported db");
8741 let exported_count: i64 = exported
8742 .query_row(
8743 "SELECT count(*) FROM nodes WHERE logical_id = 'lg-wal'",
8744 [],
8745 |row| row.get(0),
8746 )
8747 .expect("count exported nodes");
8748 assert_eq!(
8749 exported_count, 1,
8750 "safe_export must include committed rows that are still resident in the WAL"
8751 );
8752 }
8753
8754 #[test]
8755 fn excise_source_removes_searchable_content_after_excision() {
8756 let (db, service) = setup();
8757 {
8758 let conn = sqlite::open_connection(db.path()).expect("conn");
8759 conn.execute(
8760 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8761 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8762 [],
8763 )
8764 .expect("insert v1");
8765 conn.execute(
8766 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8767 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8768 [],
8769 )
8770 .expect("insert v2");
8771 conn.execute(
8772 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8773 VALUES ('ck1', 'lg1', 'hello world', 100)",
8774 [],
8775 )
8776 .expect("insert chunk");
8777 }
8778 service.excise_source("source-2").expect("excise");
8779 {
8780 let conn = sqlite::open_connection(db.path()).expect("conn");
8781 let fts_count: i64 = conn
8782 .query_row(
8783 "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'ck1'",
8784 [],
8785 |row| row.get(0),
8786 )
8787 .expect("fts count");
8788 assert_eq!(
8789 fts_count, 0,
8790 "excised content should not remain searchable after excise"
8791 );
8792 }
8793 }
8794
8795 #[cfg(feature = "sqlite-vec")]
8796 #[test]
8797 fn excise_source_cleans_chunks_and_vec_rows_for_excised_version() {
8798 let (db, service) = setup();
8799 {
8800 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8801 service
8802 .schema_manager
8803 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
8804 .expect("ensure vec profile");
8805 conn.execute(
8806 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8807 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8808 [],
8809 )
8810 .expect("insert v1");
8811 conn.execute(
8812 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8813 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8814 [],
8815 )
8816 .expect("insert v2");
8817 conn.execute(
8818 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8819 VALUES ('ck1', 'lg1', 'new content', 200)",
8820 [],
8821 )
8822 .expect("insert chunk");
8823 conn.execute(
8824 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ck1', zeroblob(16))",
8825 [],
8826 )
8827 .expect("insert vec row");
8828 }
8829
8830 service.excise_source("source-2").expect("excise");
8831
8832 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8833 let active_row: String = conn
8834 .query_row(
8835 "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
8836 [],
8837 |row| row.get(0),
8838 )
8839 .expect("restored active row");
8840 assert_eq!(active_row, "r1");
8841 let chunk_count: i64 = conn
8842 .query_row(
8843 "SELECT count(*) FROM chunks WHERE node_logical_id = 'lg1'",
8844 [],
8845 |row| row.get(0),
8846 )
8847 .expect("chunk count");
8848 assert_eq!(
8849 chunk_count, 0,
8850 "excised source content must not survive as chunks"
8851 );
8852 let vec_count: i64 = conn
8853 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
8854 row.get(0)
8855 })
8856 .expect("vec count");
8857 assert_eq!(vec_count, 0, "excised source vec rows must be removed");
8858 let fts_count: i64 = conn
8859 .query_row(
8860 "SELECT count(*) FROM fts_nodes WHERE node_logical_id = 'lg1'",
8861 [],
8862 |row| row.get(0),
8863 )
8864 .expect("fts count");
8865 assert_eq!(
8866 fts_count, 0,
8867 "excised source content must not remain searchable"
8868 );
8869 }
8870
8871 #[test]
8872 fn export_page_count_matches_exported_file() {
8873 let (_db, service) = setup();
8874 let export_dir = tempfile::TempDir::new().expect("temp dir");
8875 let export_path = export_dir.path().join("page-count.db");
8876
8877 let manifest = service
8878 .safe_export(
8879 &export_path,
8880 SafeExportOptions {
8881 force_checkpoint: false,
8882 },
8883 )
8884 .expect("export");
8885
8886 let exported = sqlite::open_connection(&export_path).expect("open exported db");
8887 let actual_page_count: u64 = exported
8888 .query_row("PRAGMA page_count", [], |row| row.get(0))
8889 .expect("page_count from exported file");
8890
8891 assert_eq!(
8892 manifest.page_count, actual_page_count,
8893 "manifest page_count must match the exported file's PRAGMA page_count"
8894 );
8895 }
8896
8897 #[test]
8898 fn no_temp_file_after_successful_export() {
8899 let (_db, service) = setup();
8900 let export_dir = tempfile::TempDir::new().expect("temp dir");
8901 let export_path = export_dir.path().join("no-tmp.db");
8902
8903 service
8904 .safe_export(
8905 &export_path,
8906 SafeExportOptions {
8907 force_checkpoint: false,
8908 },
8909 )
8910 .expect("export");
8911
8912 let tmp_files: Vec<_> = fs::read_dir(export_dir.path())
8913 .expect("read export dir")
8914 .filter_map(Result::ok)
8915 .filter(|e| e.path().extension().is_some_and(|ext| ext == "tmp"))
8916 .collect();
8917
8918 assert!(
8919 tmp_files.is_empty(),
8920 "no .tmp files should remain after a successful export, found: {tmp_files:?}"
8921 );
8922 }
8923
8924 #[test]
8925 fn export_manifest_is_valid_json() {
8926 let (_db, service) = setup();
8927 let export_dir = tempfile::TempDir::new().expect("temp dir");
8928 let export_path = export_dir.path().join("valid-json.db");
8929
8930 service
8931 .safe_export(
8932 &export_path,
8933 SafeExportOptions {
8934 force_checkpoint: false,
8935 },
8936 )
8937 .expect("export");
8938
8939 let manifest_path = export_dir.path().join("valid-json.db.export-manifest.json");
8940 let manifest_contents = fs::read_to_string(&manifest_path).expect("read manifest");
8941 let parsed: serde_json::Value =
8942 serde_json::from_str(&manifest_contents).expect("manifest must be valid JSON");
8943
8944 assert!(
8945 parsed.get("exported_at").is_some(),
8946 "manifest must contain exported_at"
8947 );
8948 assert!(
8949 parsed.get("sha256").is_some(),
8950 "manifest must contain sha256"
8951 );
8952 assert!(
8953 parsed.get("schema_version").is_some(),
8954 "manifest must contain schema_version"
8955 );
8956 assert!(
8957 parsed.get("protocol_version").is_some(),
8958 "manifest must contain protocol_version"
8959 );
8960 assert!(
8961 parsed.get("page_count").is_some(),
8962 "manifest must contain page_count"
8963 );
8964 }
8965
8966 #[test]
8967 fn provenance_purge_dry_run_reports_counts() {
8968 let (db, service) = setup();
8969 {
8970 let conn = sqlite::open_connection(db.path()).expect("conn");
8971 conn.execute(
8972 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8973 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8974 [],
8975 )
8976 .expect("insert p1");
8977 conn.execute(
8978 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8979 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8980 [],
8981 )
8982 .expect("insert p2");
8983 conn.execute(
8984 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8985 VALUES ('p3', 'excise', 'lg3', 'src-1', 300)",
8986 [],
8987 )
8988 .expect("insert p3");
8989 }
8990
8991 let options = super::ProvenancePurgeOptions {
8992 dry_run: true,
8993 preserve_event_types: Vec::new(),
8994 };
8995 let report = service
8996 .purge_provenance_events(250, &options)
8997 .expect("dry run purge");
8998
8999 assert_eq!(report.events_deleted, 2);
9000 assert_eq!(report.events_preserved, 1);
9001 assert!(report.oldest_remaining.is_some());
9002
9003 let conn = sqlite::open_connection(db.path()).expect("conn");
9004 let total: i64 = conn
9005 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
9006 row.get(0)
9007 })
9008 .expect("count");
9009 assert_eq!(total, 3, "dry_run must not delete any events");
9010 }
9011
9012 #[test]
9013 fn provenance_purge_deletes_old_events() {
9014 let (db, service) = setup();
9015 {
9016 let conn = sqlite::open_connection(db.path()).expect("conn");
9017 conn.execute(
9018 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9019 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
9020 [],
9021 )
9022 .expect("insert p1");
9023 conn.execute(
9024 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9025 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
9026 [],
9027 )
9028 .expect("insert p2");
9029 }
9030
9031 let options = super::ProvenancePurgeOptions {
9032 dry_run: false,
9033 preserve_event_types: Vec::new(),
9034 };
9035 let report = service
9036 .purge_provenance_events(150, &options)
9037 .expect("purge");
9038
9039 assert_eq!(report.events_deleted, 1);
9040 assert_eq!(report.events_preserved, 1);
9041 assert_eq!(report.oldest_remaining, Some(200));
9042
9043 let conn = sqlite::open_connection(db.path()).expect("conn");
9044 let remaining: i64 = conn
9045 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
9046 row.get(0)
9047 })
9048 .expect("count");
9049 assert_eq!(remaining, 1);
9050 }
9051
9052 #[test]
9053 fn provenance_purge_preserves_specified_types() {
9054 let (db, service) = setup();
9055 {
9056 let conn = sqlite::open_connection(db.path()).expect("conn");
9057 conn.execute(
9058 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9059 VALUES ('p1', 'excise', 'lg1', 'src-1', 100)",
9060 [],
9061 )
9062 .expect("insert p1");
9063 conn.execute(
9064 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9065 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 100)",
9066 [],
9067 )
9068 .expect("insert p2");
9069 conn.execute(
9070 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9071 VALUES ('p3', 'node_insert', 'lg3', 'src-1', 100)",
9072 [],
9073 )
9074 .expect("insert p3");
9075 }
9076
9077 let options = super::ProvenancePurgeOptions {
9078 dry_run: false,
9079 preserve_event_types: Vec::new(),
9080 };
9081 let report = service
9082 .purge_provenance_events(500, &options)
9083 .expect("purge");
9084
9085 assert_eq!(report.events_deleted, 2);
9086 assert_eq!(report.events_preserved, 1);
9087
9088 let conn = sqlite::open_connection(db.path()).expect("conn");
9089 let remaining_type: String = conn
9090 .query_row("SELECT event_type FROM provenance_events", [], |row| {
9091 row.get(0)
9092 })
9093 .expect("remaining event type");
9094 assert_eq!(remaining_type, "excise");
9095 }
9096
9097 #[test]
9098 fn provenance_purge_noop_with_zero_timestamp() {
9099 let (db, service) = setup();
9100 {
9101 let conn = sqlite::open_connection(db.path()).expect("conn");
9102 conn.execute(
9103 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
9104 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
9105 [],
9106 )
9107 .expect("insert p1");
9108 }
9109
9110 let options = super::ProvenancePurgeOptions {
9111 dry_run: false,
9112 preserve_event_types: Vec::new(),
9113 };
9114 let report = service.purge_provenance_events(0, &options).expect("purge");
9115
9116 assert_eq!(report.events_deleted, 0);
9117 assert_eq!(report.events_preserved, 1);
9118 assert_eq!(report.oldest_remaining, Some(100));
9119 }
9120
9121 #[test]
9122 fn restore_skips_edge_when_counterpart_purged() {
9123 let (db, service) = setup();
9124 {
9125 let conn = sqlite::open_connection(db.path()).expect("conn");
9126 conn.execute(
9128 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9129 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
9130 [],
9131 )
9132 .expect("insert node A");
9133 conn.execute(
9134 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9135 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
9136 [],
9137 )
9138 .expect("insert node B");
9139 conn.execute(
9141 "INSERT INTO edges \
9142 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
9143 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
9144 [],
9145 )
9146 .expect("insert edge");
9147 conn.execute(
9149 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9150 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9151 [],
9152 )
9153 .expect("insert retire event A");
9154 conn.execute(
9155 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9156 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
9157 [],
9158 )
9159 .expect("insert edge retire event");
9160 conn.execute(
9161 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9162 [],
9163 )
9164 .expect("retire node A");
9165 conn.execute(
9166 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
9167 [],
9168 )
9169 .expect("retire node B");
9170 conn.execute(
9171 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
9172 [],
9173 )
9174 .expect("retire edge");
9175 conn.execute("DELETE FROM nodes WHERE logical_id = 'doc-2'", [])
9178 .expect("purge node B rows");
9179 }
9180
9181 let report = service.restore_logical_id("doc-1").expect("restore A");
9183 assert!(!report.was_noop);
9184 assert_eq!(report.restored_node_rows, 1);
9185 assert_eq!(report.restored_edge_rows, 0, "edge should not be restored");
9186 assert_eq!(report.skipped_edges.len(), 1);
9187 assert_eq!(report.skipped_edges[0].edge_logical_id, "edge-1");
9188 assert_eq!(report.skipped_edges[0].missing_endpoint, "doc-2");
9189
9190 let conn = sqlite::open_connection(db.path()).expect("conn");
9192 let active_edge_count: i64 = conn
9193 .query_row(
9194 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9195 [],
9196 |row| row.get(0),
9197 )
9198 .expect("active edge count");
9199 assert_eq!(active_edge_count, 0, "edge must remain retired");
9200 }
9201
9202 #[test]
9203 fn restore_restores_edges_to_active_nodes() {
9204 let (db, service) = setup();
9205 {
9206 let conn = sqlite::open_connection(db.path()).expect("conn");
9207 conn.execute(
9209 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9210 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
9211 [],
9212 )
9213 .expect("insert node A");
9214 conn.execute(
9215 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9216 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
9217 [],
9218 )
9219 .expect("insert node B");
9220 conn.execute(
9222 "INSERT INTO edges \
9223 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
9224 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
9225 [],
9226 )
9227 .expect("insert edge");
9228 conn.execute(
9230 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9231 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9232 [],
9233 )
9234 .expect("insert retire event A");
9235 conn.execute(
9236 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9237 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
9238 [],
9239 )
9240 .expect("insert edge retire event");
9241 conn.execute(
9242 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9243 [],
9244 )
9245 .expect("retire node A");
9246 conn.execute(
9247 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
9248 [],
9249 )
9250 .expect("retire edge");
9251 }
9252
9253 let report = service.restore_logical_id("doc-1").expect("restore A");
9255 assert!(!report.was_noop);
9256 assert_eq!(report.restored_node_rows, 1);
9257 assert!(report.restored_edge_rows > 0, "edge should be restored");
9258 assert!(
9259 report.skipped_edges.is_empty(),
9260 "no edges should be skipped"
9261 );
9262
9263 let conn = sqlite::open_connection(db.path()).expect("conn");
9264 let active_edge_count: i64 = conn
9265 .query_row(
9266 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9267 [],
9268 |row| row.get(0),
9269 )
9270 .expect("active edge count");
9271 assert_eq!(active_edge_count, 1, "edge must be active");
9272 }
9273
9274 #[test]
9275 fn restore_restores_edges_when_both_restored() {
9276 let (db, service) = setup();
9277 {
9278 let conn = sqlite::open_connection(db.path()).expect("conn");
9279 conn.execute(
9281 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9282 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
9283 [],
9284 )
9285 .expect("insert node A");
9286 conn.execute(
9287 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9288 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
9289 [],
9290 )
9291 .expect("insert node B");
9292 conn.execute(
9294 "INSERT INTO edges \
9295 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
9296 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
9297 [],
9298 )
9299 .expect("insert edge");
9300 conn.execute(
9302 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9303 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9304 [],
9305 )
9306 .expect("insert retire event A");
9307 conn.execute(
9308 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9309 VALUES ('evt-retire-b', 'node_retire', 'doc-2', 'forget-1', 200, '')",
9310 [],
9311 )
9312 .expect("insert retire event B");
9313 conn.execute(
9314 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9315 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
9316 [],
9317 )
9318 .expect("insert edge retire event");
9319 conn.execute(
9320 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9321 [],
9322 )
9323 .expect("retire node A");
9324 conn.execute(
9325 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
9326 [],
9327 )
9328 .expect("retire node B");
9329 conn.execute(
9330 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
9331 [],
9332 )
9333 .expect("retire edge");
9334 }
9335
9336 let report_b = service.restore_logical_id("doc-2").expect("restore B");
9338 assert!(!report_b.was_noop);
9339
9340 let report_a = service.restore_logical_id("doc-1").expect("restore A");
9342 assert!(!report_a.was_noop);
9343 assert_eq!(report_a.restored_node_rows, 1);
9344 assert!(
9345 report_a.restored_edge_rows > 0,
9346 "edge should be restored when both endpoints active"
9347 );
9348 assert!(
9349 report_a.skipped_edges.is_empty(),
9350 "no edges should be skipped"
9351 );
9352
9353 let conn = sqlite::open_connection(db.path()).expect("conn");
9354 let active_edge_count: i64 = conn
9355 .query_row(
9356 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
9357 [],
9358 |row| row.get(0),
9359 )
9360 .expect("active edge count");
9361 assert_eq!(
9362 active_edge_count, 1,
9363 "edge must be active after both endpoints restored"
9364 );
9365 }
9366
9367 #[test]
9370 fn fts_property_schema_crud_round_trip() {
9371 let (_db, service) = setup();
9372
9373 let record = service
9375 .register_fts_property_schema(
9376 "Meeting",
9377 &["$.title".to_owned(), "$.summary".to_owned()],
9378 None,
9379 )
9380 .expect("register");
9381 assert_eq!(record.kind, "Meeting");
9382 assert_eq!(record.property_paths, vec!["$.title", "$.summary"]);
9383 assert_eq!(record.separator, " ");
9384 assert_eq!(record.format_version, 1);
9385
9386 let described = service
9388 .describe_fts_property_schema("Meeting")
9389 .expect("describe")
9390 .expect("should exist");
9391 assert_eq!(described, record);
9392
9393 let missing = service
9395 .describe_fts_property_schema("NoSuchKind")
9396 .expect("describe missing");
9397 assert!(missing.is_none());
9398
9399 let list = service.list_fts_property_schemas().expect("list");
9401 assert_eq!(list.len(), 1);
9402 assert_eq!(list[0].kind, "Meeting");
9403
9404 let updated = service
9406 .register_fts_property_schema(
9407 "Meeting",
9408 &["$.title".to_owned(), "$.notes".to_owned()],
9409 Some("\n"),
9410 )
9411 .expect("update");
9412 assert_eq!(updated.property_paths, vec!["$.title", "$.notes"]);
9413 assert_eq!(updated.separator, "\n");
9414
9415 service
9417 .remove_fts_property_schema("Meeting")
9418 .expect("remove");
9419 let after_remove = service
9420 .describe_fts_property_schema("Meeting")
9421 .expect("describe after remove");
9422 assert!(after_remove.is_none());
9423
9424 let err = service.remove_fts_property_schema("Meeting");
9426 assert!(err.is_err());
9427 }
9428
9429 #[test]
9430 fn describe_fts_property_schema_round_trips_recursive_entries() {
9431 let (_db, service) = setup();
9432
9433 let entries = vec![
9434 FtsPropertyPathSpec::scalar("$.title"),
9435 FtsPropertyPathSpec::recursive("$.payload"),
9436 ];
9437 let exclude = vec!["$.payload.private".to_owned()];
9438 let registered = service
9439 .register_fts_property_schema_with_entries(
9440 "KnowledgeItem",
9441 &entries,
9442 Some(" "),
9443 &exclude,
9444 )
9445 .expect("register recursive");
9446
9447 assert_eq!(registered.entries, entries);
9450 assert_eq!(registered.exclude_paths, exclude);
9451 assert_eq!(registered.property_paths, vec!["$.title", "$.payload"]);
9452
9453 let described = service
9454 .describe_fts_property_schema("KnowledgeItem")
9455 .expect("describe")
9456 .expect("should exist");
9457 assert_eq!(described.kind, "KnowledgeItem");
9458 assert_eq!(described.entries, entries);
9459 assert_eq!(described.exclude_paths, exclude);
9460 assert_eq!(described.property_paths, vec!["$.title", "$.payload"]);
9461 assert_eq!(described.separator, " ");
9462 assert_eq!(described.format_version, 1);
9463 }
9464
9465 #[test]
9466 fn list_fts_property_schemas_round_trips_recursive_entries() {
9467 let (_db, service) = setup();
9468
9469 let entries = vec![
9470 FtsPropertyPathSpec::scalar("$.title"),
9471 FtsPropertyPathSpec::recursive("$.payload"),
9472 ];
9473 let exclude = vec!["$.payload.secret".to_owned()];
9474 service
9475 .register_fts_property_schema_with_entries(
9476 "KnowledgeItem",
9477 &entries,
9478 Some(" "),
9479 &exclude,
9480 )
9481 .expect("register recursive");
9482
9483 let listed = service.list_fts_property_schemas().expect("list");
9484 assert_eq!(listed.len(), 1);
9485 let record = &listed[0];
9486 assert_eq!(record.kind, "KnowledgeItem");
9487 assert_eq!(record.entries, entries);
9488 assert_eq!(record.exclude_paths, exclude);
9489 assert_eq!(record.property_paths, vec!["$.title", "$.payload"]);
9490 }
9491
9492 #[test]
9493 fn describe_fts_property_schema_round_trips_scalar_only_entries() {
9494 let (_db, service) = setup();
9495
9496 service
9497 .register_fts_property_schema(
9498 "Meeting",
9499 &["$.title".to_owned(), "$.summary".to_owned()],
9500 None,
9501 )
9502 .expect("register scalar");
9503
9504 let described = service
9505 .describe_fts_property_schema("Meeting")
9506 .expect("describe")
9507 .expect("should exist");
9508 assert_eq!(described.property_paths, vec!["$.title", "$.summary"]);
9509 assert_eq!(described.entries.len(), 2);
9510 for entry in &described.entries {
9511 assert_eq!(
9512 entry.mode,
9513 FtsPropertyPathMode::Scalar,
9514 "scalar-only schema should deserialize every entry as Scalar"
9515 );
9516 }
9517 assert!(described.exclude_paths.is_empty());
9518 }
9519
9520 #[test]
9521 fn restore_reestablishes_property_fts_visibility() {
9522 let (db, service) = setup();
9523 {
9524 let conn = sqlite::open_connection(db.path()).expect("conn");
9525 conn.execute(
9527 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9528 VALUES ('Document', '[\"$.title\", \"$.body\"]', ' ')",
9529 [],
9530 )
9531 .expect("register schema");
9532 conn.execute(
9534 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9535 VALUES ('row-1', 'doc-1', 'Document', '{\"title\":\"Budget\",\"body\":\"Q3 forecast\"}', 100, 'seed')",
9536 [],
9537 )
9538 .expect("insert node");
9539 conn.execute(
9541 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
9542 VALUES ('chunk-1', 'doc-1', 'budget text', 100)",
9543 [],
9544 )
9545 .expect("insert chunk");
9546 conn.execute(
9548 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9549 VALUES ('doc-1', 'Document', 'Budget Q3 forecast')",
9550 [],
9551 )
9552 .expect("insert property fts");
9553 conn.execute(
9555 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
9556 VALUES ('evt-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
9557 [],
9558 )
9559 .expect("retire event");
9560 conn.execute(
9561 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
9562 [],
9563 )
9564 .expect("supersede");
9565 conn.execute("DELETE FROM fts_nodes", [])
9566 .expect("clear chunk fts");
9567 conn.execute("DELETE FROM fts_node_properties", [])
9568 .expect("clear property fts");
9569 }
9570
9571 let report = service.restore_logical_id("doc-1").expect("restore");
9572 assert_eq!(report.restored_property_fts_rows, 1);
9573
9574 let conn = sqlite::open_connection(db.path()).expect("conn");
9576 let prop_fts_count: i64 = conn
9577 .query_row(
9578 "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
9579 [],
9580 |row| row.get(0),
9581 )
9582 .expect("prop fts count");
9583 assert_eq!(prop_fts_count, 1, "property FTS must be restored");
9584
9585 let text: String = conn
9586 .query_row(
9587 "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'doc-1'",
9588 [],
9589 |row| row.get(0),
9590 )
9591 .expect("prop fts text");
9592 assert_eq!(text, "Budget Q3 forecast");
9593 }
9594
9595 #[test]
9596 fn safe_export_preserves_fts_property_schemas() {
9597 let (_db, service) = setup();
9598 service
9599 .register_fts_property_schema(
9600 "Goal",
9601 &["$.name".to_owned(), "$.rationale".to_owned()],
9602 None,
9603 )
9604 .expect("register schema");
9605
9606 let export_dir = tempfile::TempDir::new().expect("temp dir");
9607 let export_path = export_dir.path().join("backup.db");
9608 service
9609 .safe_export(
9610 &export_path,
9611 SafeExportOptions {
9612 force_checkpoint: false,
9613 },
9614 )
9615 .expect("export");
9616
9617 let exported_conn = rusqlite::Connection::open(&export_path).expect("open exported db");
9619 let kind: String = exported_conn
9620 .query_row(
9621 "SELECT kind FROM fts_property_schemas WHERE kind = 'Goal'",
9622 [],
9623 |row| row.get(0),
9624 )
9625 .expect("schema must exist in export");
9626 assert_eq!(kind, "Goal");
9627 let paths_json: String = exported_conn
9628 .query_row(
9629 "SELECT property_paths_json FROM fts_property_schemas WHERE kind = 'Goal'",
9630 [],
9631 |row| row.get(0),
9632 )
9633 .expect("paths must exist");
9634 let paths: Vec<String> = serde_json::from_str(&paths_json).expect("valid json");
9635 assert_eq!(paths, vec!["$.name", "$.rationale"]);
9636 }
9637
9638 #[test]
9639 #[allow(clippy::too_many_lines)]
9640 fn export_recovery_rebuilds_property_fts_from_canonical_state() {
9641 let (db, service) = setup();
9642 service
9644 .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9645 .expect("register");
9646 {
9647 let conn = sqlite::open_connection(db.path()).expect("conn");
9648 conn.execute(
9649 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9650 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9651 [],
9652 )
9653 .expect("insert node 1");
9654 conn.execute(
9655 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9656 VALUES ('goal-1', 'Goal', 'Ship v2')",
9657 [],
9658 )
9659 .expect("insert property FTS row 1");
9660 conn.execute(
9661 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9662 VALUES ('row-2', 'goal-2', 'Goal', '{\"name\":\"Launch redesign\"}', 100, 'seed')",
9663 [],
9664 )
9665 .expect("insert node 2");
9666 conn.execute(
9667 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9668 VALUES ('goal-2', 'Goal', 'Launch redesign')",
9669 [],
9670 )
9671 .expect("insert property FTS row 2");
9672 }
9673
9674 let export_dir = tempfile::TempDir::new().expect("temp dir");
9676 let export_path = export_dir.path().join("backup.db");
9677 service
9678 .safe_export(
9679 &export_path,
9680 SafeExportOptions {
9681 force_checkpoint: false,
9682 },
9683 )
9684 .expect("export");
9685
9686 {
9690 let conn = rusqlite::Connection::open(&export_path).expect("open export");
9691 conn.execute(
9692 "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9693 [],
9694 )
9695 .expect("delete old row");
9696 conn.execute(
9697 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9698 VALUES ('goal-1', 'Goal', 'completely wrong stale text')",
9699 [],
9700 )
9701 .expect("insert corrupted row");
9702 conn.execute(
9703 "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-2'",
9704 [],
9705 )
9706 .expect("delete goal-2 row");
9707 }
9708
9709 let schema = Arc::new(SchemaManager::new());
9711 let exported_service = AdminService::new(&export_path, Arc::clone(&schema));
9712 exported_service
9713 .rebuild_projections(ProjectionTarget::Fts)
9714 .expect("rebuild");
9715
9716 let coordinator = ExecutionCoordinator::open(
9719 &export_path,
9720 Arc::clone(&schema),
9721 None,
9722 1,
9723 Arc::new(TelemetryCounters::default()),
9724 None,
9725 )
9726 .expect("coordinator");
9727
9728 let compiled = QueryBuilder::nodes("Goal")
9729 .text_search("Ship", 10)
9730 .limit(10)
9731 .compile()
9732 .expect("compile");
9733 let rows = coordinator
9734 .execute_compiled_read(&compiled)
9735 .expect("execute read");
9736 assert_eq!(rows.nodes.len(), 1);
9737 assert_eq!(rows.nodes[0].logical_id, "goal-1");
9738
9739 let compiled2 = QueryBuilder::nodes("Goal")
9741 .text_search("redesign", 10)
9742 .limit(10)
9743 .compile()
9744 .expect("compile");
9745 let rows2 = coordinator
9746 .execute_compiled_read(&compiled2)
9747 .expect("execute read");
9748 assert_eq!(rows2.nodes.len(), 1);
9749 assert_eq!(rows2.nodes[0].logical_id, "goal-2");
9750
9751 let compiled3 = QueryBuilder::nodes("Goal")
9753 .text_search("stale", 10)
9754 .limit(10)
9755 .compile()
9756 .expect("compile");
9757 let rows3 = coordinator
9758 .execute_compiled_read(&compiled3)
9759 .expect("execute read");
9760 assert_eq!(
9761 rows3.nodes.len(),
9762 0,
9763 "corrupted text must not appear in search after rebuild"
9764 );
9765
9766 let integrity = exported_service.check_integrity().expect("integrity");
9768 assert_eq!(integrity.missing_property_fts_rows, 0);
9769 let semantics = exported_service.check_semantics().expect("semantics");
9770 assert_eq!(semantics.drifted_property_fts_rows, 0);
9771 assert_eq!(semantics.orphaned_property_fts_rows, 0);
9772 assert_eq!(semantics.duplicate_property_fts_rows, 0);
9773 }
9774
9775 #[test]
9776 fn check_integrity_no_false_positives_for_empty_extraction() {
9777 let (db, service) = setup();
9778 {
9779 let conn = sqlite::open_connection(db.path()).expect("conn");
9780 conn.execute(
9782 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9783 VALUES ('Ticket', '[\"$.searchable\"]', ' ')",
9784 [],
9785 )
9786 .expect("register schema");
9787 conn.execute(
9790 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9791 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"status\":\"open\"}', 100, 'seed')",
9792 [],
9793 )
9794 .expect("insert node");
9795 }
9796
9797 let report = service.check_integrity().expect("integrity");
9798 assert_eq!(
9799 report.missing_property_fts_rows, 0,
9800 "node with no extractable values must not be counted as missing"
9801 );
9802 }
9803
9804 #[test]
9805 fn check_integrity_detects_genuinely_missing_property_fts_rows() {
9806 let (db, service) = setup();
9807 {
9808 let conn = sqlite::open_connection(db.path()).expect("conn");
9809 conn.execute(
9810 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9811 VALUES ('Ticket', '[\"$.title\"]', ' ')",
9812 [],
9813 )
9814 .expect("register schema");
9815 conn.execute(
9817 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9818 VALUES ('row-1', 'ticket-1', 'Ticket', '{\"title\":\"fix login bug\"}', 100, 'seed')",
9819 [],
9820 )
9821 .expect("insert node");
9822 }
9823
9824 let report = service.check_integrity().expect("integrity");
9825 assert_eq!(
9826 report.missing_property_fts_rows, 1,
9827 "node with extractable values but no property FTS row must be detected"
9828 );
9829 }
9830
9831 #[test]
9832 fn rebuild_projections_fts_restores_missing_property_fts_rows() {
9833 let (db, service) = setup();
9834 {
9835 let conn = sqlite::open_connection(db.path()).expect("conn");
9836 conn.execute(
9837 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9838 VALUES ('Goal', '[\"$.name\"]', ' ')",
9839 [],
9840 )
9841 .expect("register schema");
9842 conn.execute(
9843 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9844 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9845 [],
9846 )
9847 .expect("insert node");
9848 }
9850
9851 let report = service
9852 .rebuild_projections(ProjectionTarget::Fts)
9853 .expect("rebuild");
9854 assert!(
9855 report.rebuilt_rows >= 1,
9856 "rebuild must insert at least one property FTS row"
9857 );
9858
9859 let conn = sqlite::open_connection(db.path()).expect("conn");
9860 let text: String = conn
9861 .query_row(
9862 "SELECT text_content FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9863 [],
9864 |row| row.get(0),
9865 )
9866 .expect("property FTS row must exist after rebuild");
9867 assert_eq!(text, "Ship v2");
9868 }
9869
9870 #[test]
9871 fn rebuild_missing_projections_fills_gap_for_deleted_property_fts_row() {
9872 let (db, service) = setup();
9873 {
9874 let conn = sqlite::open_connection(db.path()).expect("conn");
9875 conn.execute(
9876 "INSERT INTO fts_property_schemas (kind, property_paths_json, separator) \
9877 VALUES ('Goal', '[\"$.name\"]', ' ')",
9878 [],
9879 )
9880 .expect("register schema");
9881 conn.execute(
9882 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9883 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9884 [],
9885 )
9886 .expect("insert node");
9887 conn.execute(
9889 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9890 VALUES ('goal-1', 'Goal', 'Ship v2')",
9891 [],
9892 )
9893 .expect("insert property fts");
9894 conn.execute(
9895 "DELETE FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9896 [],
9897 )
9898 .expect("delete property fts");
9899 }
9900
9901 let report = service
9902 .rebuild_missing_projections()
9903 .expect("rebuild missing");
9904 assert!(
9905 report.rebuilt_rows >= 1,
9906 "missing rebuild must insert the gap-fill row"
9907 );
9908
9909 let conn = sqlite::open_connection(db.path()).expect("conn");
9910 let count: i64 = conn
9911 .query_row(
9912 "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9913 [],
9914 |row| row.get(0),
9915 )
9916 .expect("count");
9917 assert_eq!(
9918 count, 1,
9919 "gap-fill must restore exactly one property FTS row"
9920 );
9921 }
9922
9923 #[test]
9924 fn remove_schema_then_rebuild_cleans_stale_property_fts_rows() {
9925 let (db, service) = setup();
9926 service
9927 .register_fts_property_schema("Goal", &["$.name".to_owned()], None)
9928 .expect("register");
9929 {
9930 let conn = sqlite::open_connection(db.path()).expect("conn");
9931 conn.execute(
9932 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
9933 VALUES ('row-1', 'goal-1', 'Goal', '{\"name\":\"Ship v2\"}', 100, 'seed')",
9934 [],
9935 )
9936 .expect("insert node");
9937 conn.execute(
9939 "INSERT INTO fts_node_properties (node_logical_id, kind, text_content) \
9940 VALUES ('goal-1', 'Goal', 'Ship v2')",
9941 [],
9942 )
9943 .expect("insert property fts");
9944 }
9945
9946 service.remove_fts_property_schema("Goal").expect("remove");
9948
9949 let semantics = service.check_semantics().expect("semantics");
9951 assert_eq!(
9952 semantics.orphaned_property_fts_rows, 1,
9953 "stale property FTS rows must be detected after schema removal"
9954 );
9955
9956 service
9958 .rebuild_projections(ProjectionTarget::Fts)
9959 .expect("rebuild");
9960
9961 let conn = sqlite::open_connection(db.path()).expect("conn");
9962 let count: i64 = conn
9963 .query_row(
9964 "SELECT count(*) FROM fts_node_properties WHERE node_logical_id = 'goal-1'",
9965 [],
9966 |row| row.get(0),
9967 )
9968 .expect("count");
9969 assert_eq!(
9970 count, 0,
9971 "rebuild after schema removal must delete stale property FTS rows"
9972 );
9973 }
9974
9975 mod validate_fts_property_paths_tests {
9976 use super::super::validate_fts_property_paths;
9977
9978 #[test]
9979 fn valid_simple_path() {
9980 assert!(validate_fts_property_paths(&["$.name".to_owned()]).is_ok());
9981 }
9982
9983 #[test]
9984 fn valid_nested_path() {
9985 assert!(validate_fts_property_paths(&["$.address.city".to_owned()]).is_ok());
9986 }
9987
9988 #[test]
9989 fn valid_underscore_segment() {
9990 assert!(validate_fts_property_paths(&["$.a_b".to_owned()]).is_ok());
9991 }
9992
9993 #[test]
9994 fn rejects_bare_prefix() {
9995 let result = validate_fts_property_paths(&["$.".to_owned()]);
9996 assert!(result.is_err(), "path '$.' must be rejected");
9997 }
9998
9999 #[test]
10000 fn rejects_double_dot() {
10001 let result = validate_fts_property_paths(&["$..x".to_owned()]);
10002 assert!(result.is_err(), "path '$..x' must be rejected");
10003 }
10004
10005 #[test]
10006 fn rejects_trailing_dot() {
10007 let result = validate_fts_property_paths(&["$.foo.".to_owned()]);
10008 assert!(result.is_err(), "path '$.foo.' must be rejected");
10009 }
10010
10011 #[test]
10012 fn rejects_space_in_segment() {
10013 let result = validate_fts_property_paths(&["$.foo bar".to_owned()]);
10014 assert!(result.is_err(), "path '$.foo bar' must be rejected");
10015 }
10016
10017 #[test]
10018 fn rejects_bracket_syntax() {
10019 let result = validate_fts_property_paths(&["$.foo[0]".to_owned()]);
10020 assert!(result.is_err(), "path '$.foo[0]' must be rejected");
10021 }
10022
10023 #[test]
10024 fn rejects_duplicates() {
10025 let result = validate_fts_property_paths(&["$.name".to_owned(), "$.name".to_owned()]);
10026 assert!(result.is_err(), "duplicate paths must be rejected");
10027 }
10028
10029 #[test]
10030 fn rejects_empty_list() {
10031 let result = validate_fts_property_paths(&[]);
10032 assert!(result.is_err(), "empty path list must be rejected");
10033 }
10034 }
10035}