1use std::fmt::Write as _;
2use std::fs;
3use std::io::{self, Read, Write};
4use std::path::{Path, PathBuf};
5use std::process::{Command, Stdio};
6use std::sync::Arc;
7use std::sync::mpsc;
8use std::thread;
9use std::time::{Duration, Instant, SystemTime};
10
11use fathomdb_schema::{SchemaError, SchemaManager};
12use rusqlite::{DatabaseName, OptionalExtension, TransactionBehavior};
13use serde::{Deserialize, Serialize};
14use sha2::{Digest, Sha256};
15
16use crate::{
17 EngineError, ProjectionRepairReport, ProjectionService, executable_trust,
18 ids::new_id,
19 operational::{
20 OperationalCollectionKind, OperationalCollectionRecord, OperationalCompactionReport,
21 OperationalCurrentRow, OperationalFilterClause, OperationalFilterField,
22 OperationalFilterFieldType, OperationalFilterMode, OperationalFilterValue,
23 OperationalHistoryValidationIssue, OperationalHistoryValidationReport,
24 OperationalMutationRow, OperationalPurgeReport, OperationalReadReport,
25 OperationalReadRequest, OperationalRegisterRequest, OperationalRepairReport,
26 OperationalRetentionActionKind, OperationalRetentionPlanItem,
27 OperationalRetentionPlanReport, OperationalRetentionRunItem, OperationalRetentionRunReport,
28 OperationalSecondaryIndexDefinition, OperationalSecondaryIndexRebuildReport,
29 OperationalTraceReport, extract_secondary_index_entries_for_current,
30 extract_secondary_index_entries_for_mutation, parse_operational_secondary_indexes_json,
31 parse_operational_validation_contract, validate_operational_payload_against_contract,
32 },
33 projection::ProjectionTarget,
34 sqlite,
35};
36
37#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
39pub struct IntegrityReport {
40 pub physical_ok: bool,
41 pub foreign_keys_ok: bool,
42 pub missing_fts_rows: usize,
43 pub duplicate_active_logical_ids: usize,
44 pub operational_missing_collections: usize,
45 pub operational_missing_last_mutations: usize,
46 pub warnings: Vec<String>,
47}
48
49#[derive(Clone, Copy, Debug)]
51pub struct SafeExportOptions {
52 pub force_checkpoint: bool,
56}
57
58impl Default for SafeExportOptions {
59 fn default() -> Self {
60 Self {
61 force_checkpoint: true,
62 }
63 }
64}
65
66const EXPORT_PROTOCOL_VERSION: u32 = 1;
68
69#[derive(Clone, Debug, Serialize)]
71pub struct SafeExportManifest {
72 pub exported_at: u64,
74 pub sha256: String,
76 pub schema_version: u32,
78 pub protocol_version: u32,
80 pub page_count: u64,
82}
83
84#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
86pub struct TraceReport {
87 pub source_ref: String,
88 pub node_rows: usize,
89 pub edge_rows: usize,
90 pub action_rows: usize,
91 pub operational_mutation_rows: usize,
92 pub node_logical_ids: Vec<String>,
93 pub action_ids: Vec<String>,
94 pub operational_mutation_ids: Vec<String>,
95}
96
97#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
99pub struct SkippedEdge {
100 pub edge_logical_id: String,
101 pub missing_endpoint: String,
102}
103
104#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
106pub struct LogicalRestoreReport {
107 pub logical_id: String,
108 pub was_noop: bool,
109 pub restored_node_rows: usize,
110 pub restored_edge_rows: usize,
111 pub restored_chunk_rows: usize,
112 pub restored_fts_rows: usize,
113 pub restored_vec_rows: usize,
114 pub skipped_edges: Vec<SkippedEdge>,
115 pub notes: Vec<String>,
116}
117
118#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
120pub struct LogicalPurgeReport {
121 pub logical_id: String,
122 pub was_noop: bool,
123 pub deleted_node_rows: usize,
124 pub deleted_edge_rows: usize,
125 pub deleted_chunk_rows: usize,
126 pub deleted_fts_rows: usize,
127 pub deleted_vec_rows: usize,
128 pub notes: Vec<String>,
129}
130
131#[derive(Clone, Debug, Serialize, Deserialize)]
133pub struct ProvenancePurgeOptions {
134 pub dry_run: bool,
135 #[serde(default)]
136 pub preserve_event_types: Vec<String>,
137}
138
139#[derive(Clone, Debug, Serialize)]
141pub struct ProvenancePurgeReport {
142 pub events_deleted: u64,
143 pub events_preserved: u64,
144 pub oldest_remaining: Option<i64>,
145}
146
147#[derive(Debug)]
149pub struct AdminService {
150 database_path: PathBuf,
151 schema_manager: Arc<SchemaManager>,
152 projections: ProjectionService,
153}
154
155#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
157pub struct SemanticReport {
158 pub orphaned_chunks: usize,
160 pub null_source_ref_nodes: usize,
162 pub broken_step_fk: usize,
164 pub broken_action_fk: usize,
166 pub stale_fts_rows: usize,
168 pub fts_rows_for_superseded_nodes: usize,
170 pub dangling_edges: usize,
172 pub orphaned_supersession_chains: usize,
174 pub stale_vec_rows: usize,
176 pub vec_rows_for_superseded_nodes: usize,
178 pub missing_operational_current_rows: usize,
180 pub stale_operational_current_rows: usize,
182 pub disabled_collection_mutations: usize,
184 pub orphaned_last_access_metadata_rows: usize,
186 pub warnings: Vec<String>,
187}
188
189#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
191#[serde(rename_all = "snake_case")]
192pub struct VectorRegenerationConfig {
193 pub profile: String,
194 pub table_name: String,
195 pub model_identity: String,
196 pub model_version: String,
197 pub dimension: usize,
198 pub normalization_policy: String,
199 pub chunking_policy: String,
200 pub preprocessing_policy: String,
201 pub generator_command: Vec<String>,
202}
203
204#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
206pub struct VectorRegenerationReport {
207 pub profile: String,
208 pub table_name: String,
209 pub dimension: usize,
210 pub total_chunks: usize,
211 pub regenerated_rows: usize,
212 pub contract_persisted: bool,
213 pub notes: Vec<String>,
214}
215
216#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
218#[serde(rename_all = "snake_case")]
219pub struct VectorGeneratorPolicy {
220 pub timeout_ms: u64,
221 pub max_stdout_bytes: usize,
222 pub max_stderr_bytes: usize,
223 pub max_input_bytes: usize,
224 pub max_chunks: usize,
225 #[serde(default = "default_require_absolute_executable")]
226 pub require_absolute_executable: bool,
227 #[serde(default = "default_reject_world_writable_executable")]
228 pub reject_world_writable_executable: bool,
229 #[serde(default)]
230 pub allowed_executable_roots: Vec<String>,
231 #[serde(default)]
232 pub preserve_env_vars: Vec<String>,
233}
234
235impl Default for VectorGeneratorPolicy {
236 fn default() -> Self {
237 Self {
238 timeout_ms: 300_000,
239 max_stdout_bytes: 64 * 1024 * 1024,
240 max_stderr_bytes: 1024 * 1024,
241 max_input_bytes: 64 * 1024 * 1024,
242 max_chunks: 1_000_000,
243 require_absolute_executable: true,
244 reject_world_writable_executable: true,
245 allowed_executable_roots: vec![],
246 preserve_env_vars: vec![],
247 }
248 }
249}
250
251const fn default_require_absolute_executable() -> bool {
252 true
253}
254
255const fn default_reject_world_writable_executable() -> bool {
256 true
257}
258
259const CURRENT_VECTOR_CONTRACT_FORMAT_VERSION: i64 = 1;
260const MAX_PROFILE_LEN: usize = 128;
261const MAX_MODEL_IDENTITY_LEN: usize = 256;
262const MAX_MODEL_VERSION_LEN: usize = 128;
263const MAX_POLICY_LEN: usize = 128;
264const MAX_GENERATOR_COMMAND_ARG_LEN: usize = 4096;
265const MAX_GENERATOR_COMMAND_TOTAL_LEN: usize = 16 * 1024;
266const MAX_CONTRACT_JSON_BYTES: usize = 32 * 1024;
267const MAX_AUDIT_METADATA_BYTES: usize = 2048;
268const DEFAULT_OPERATIONAL_READ_LIMIT: usize = 100;
269const MAX_OPERATIONAL_READ_LIMIT: usize = 1000;
270
271#[derive(Clone, Debug)]
273pub struct AdminHandle {
274 inner: Arc<AdminService>,
275}
276
277impl AdminHandle {
278 #[must_use]
280 pub fn new(service: AdminService) -> Self {
281 Self {
282 inner: Arc::new(service),
283 }
284 }
285
286 #[must_use]
288 pub fn service(&self) -> Arc<AdminService> {
289 Arc::clone(&self.inner)
290 }
291}
292
293impl AdminService {
294 #[must_use]
296 pub fn new(path: impl AsRef<Path>, schema_manager: Arc<SchemaManager>) -> Self {
297 let database_path = path.as_ref().to_path_buf();
298 let projections = ProjectionService::new(&database_path, Arc::clone(&schema_manager));
299 Self {
300 database_path,
301 schema_manager,
302 projections,
303 }
304 }
305
306 fn connect(&self) -> Result<rusqlite::Connection, EngineError> {
307 #[cfg(feature = "sqlite-vec")]
308 let conn = sqlite::open_connection_with_vec(&self.database_path)?;
309 #[cfg(not(feature = "sqlite-vec"))]
310 let conn = sqlite::open_connection(&self.database_path)?;
311 self.schema_manager.bootstrap(&conn)?;
312 Ok(conn)
313 }
314
315 pub fn check_integrity(&self) -> Result<IntegrityReport, EngineError> {
318 let conn = self.connect()?;
319
320 let physical_result: String =
321 conn.query_row("PRAGMA integrity_check", [], |row| row.get(0))?;
322 let foreign_key_count: i64 =
323 conn.query_row("SELECT count(*) FROM pragma_foreign_key_check", [], |row| {
324 row.get(0)
325 })?;
326 let missing_fts_rows: i64 = conn.query_row(
327 r"
328 SELECT count(*)
329 FROM chunks c
330 JOIN nodes n
331 ON n.logical_id = c.node_logical_id
332 AND n.superseded_at IS NULL
333 WHERE NOT EXISTS (
334 SELECT 1
335 FROM fts_nodes f
336 WHERE f.chunk_id = c.id
337 )
338 ",
339 [],
340 |row| row.get(0),
341 )?;
342 let duplicate_active: i64 = conn.query_row(
343 r"
344 SELECT count(*)
345 FROM (
346 SELECT logical_id
347 FROM nodes
348 WHERE superseded_at IS NULL
349 GROUP BY logical_id
350 HAVING count(*) > 1
351 )
352 ",
353 [],
354 |row| row.get(0),
355 )?;
356 let operational_missing_collections: i64 = conn.query_row(
357 r"
358 SELECT (
359 SELECT count(*)
360 FROM operational_mutations m
361 LEFT JOIN operational_collections c ON c.name = m.collection_name
362 WHERE c.name IS NULL
363 ) + (
364 SELECT count(*)
365 FROM operational_current oc
366 LEFT JOIN operational_collections c ON c.name = oc.collection_name
367 WHERE c.name IS NULL
368 )
369 ",
370 [],
371 |row| row.get(0),
372 )?;
373 let operational_missing_last_mutations: i64 = conn.query_row(
374 r"
375 SELECT count(*)
376 FROM operational_current oc
377 LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
378 WHERE m.id IS NULL
379 ",
380 [],
381 |row| row.get(0),
382 )?;
383
384 let mut warnings = Vec::new();
385 if missing_fts_rows > 0 {
386 warnings.push("missing FTS projections detected".to_owned());
387 }
388 if duplicate_active > 0 {
389 warnings.push("duplicate active logical_ids detected".to_owned());
390 }
391 if operational_missing_collections > 0 {
392 warnings.push("operational rows reference missing collections".to_owned());
393 }
394 if operational_missing_last_mutations > 0 {
395 warnings.push("operational current rows reference missing last mutations".to_owned());
396 }
397
398 Ok(IntegrityReport {
403 physical_ok: physical_result == "ok",
404 foreign_keys_ok: foreign_key_count == 0,
405 missing_fts_rows: i64_to_usize(missing_fts_rows),
406 duplicate_active_logical_ids: i64_to_usize(duplicate_active),
407 operational_missing_collections: i64_to_usize(operational_missing_collections),
408 operational_missing_last_mutations: i64_to_usize(operational_missing_last_mutations),
409 warnings,
410 })
411 }
412
413 #[allow(clippy::too_many_lines)]
416 pub fn check_semantics(&self) -> Result<SemanticReport, EngineError> {
417 let conn = self.connect()?;
418
419 let orphaned_chunks: i64 = conn.query_row(
420 r"
421 SELECT count(*)
422 FROM chunks c
423 WHERE NOT EXISTS (
424 SELECT 1 FROM nodes n
425 WHERE n.logical_id = c.node_logical_id
426 )
427 ",
428 [],
429 |row| row.get(0),
430 )?;
431
432 let null_source_ref_nodes: i64 = conn.query_row(
433 "SELECT count(*) FROM nodes WHERE source_ref IS NULL AND superseded_at IS NULL",
434 [],
435 |row| row.get(0),
436 )?;
437
438 let broken_step_fk: i64 = conn.query_row(
439 r"
440 SELECT count(*) FROM steps s
441 WHERE NOT EXISTS (SELECT 1 FROM runs r WHERE r.id = s.run_id)
442 ",
443 [],
444 |row| row.get(0),
445 )?;
446
447 let broken_action_fk: i64 = conn.query_row(
448 r"
449 SELECT count(*) FROM actions a
450 WHERE NOT EXISTS (SELECT 1 FROM steps s WHERE s.id = a.step_id)
451 ",
452 [],
453 |row| row.get(0),
454 )?;
455
456 let stale_fts_rows: i64 = conn.query_row(
457 r"
458 SELECT count(*) FROM fts_nodes f
459 WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = f.chunk_id)
460 ",
461 [],
462 |row| row.get(0),
463 )?;
464
465 let fts_rows_for_superseded_nodes: i64 = conn.query_row(
466 r"
467 SELECT count(*) FROM fts_nodes f
468 WHERE NOT EXISTS (
469 SELECT 1 FROM nodes n
470 WHERE n.logical_id = f.node_logical_id AND n.superseded_at IS NULL
471 )
472 ",
473 [],
474 |row| row.get(0),
475 )?;
476
477 let dangling_edges: i64 = conn.query_row(
478 r"
479 SELECT count(*) FROM edges e
480 WHERE e.superseded_at IS NULL AND (
481 NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.source_logical_id AND n.superseded_at IS NULL)
482 OR
483 NOT EXISTS (SELECT 1 FROM nodes n WHERE n.logical_id = e.target_logical_id AND n.superseded_at IS NULL)
484 )
485 ",
486 [],
487 |row| row.get(0),
488 )?;
489
490 let orphaned_supersession_chains: i64 = conn.query_row(
491 r"
492 SELECT count(*) FROM (
493 SELECT logical_id FROM nodes
494 GROUP BY logical_id
495 HAVING count(*) > 0 AND sum(CASE WHEN superseded_at IS NULL THEN 1 ELSE 0 END) = 0
496 )
497 ",
498 [],
499 |row| row.get(0),
500 )?;
501
502 #[cfg(feature = "sqlite-vec")]
504 let stale_vec_rows: i64 = match conn.query_row(
505 r"
506 SELECT count(*) FROM vec_nodes_active v
507 WHERE NOT EXISTS (SELECT 1 FROM chunks c WHERE c.id = v.chunk_id)
508 ",
509 [],
510 |row| row.get(0),
511 ) {
512 Ok(n) => n,
513 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
514 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
515 {
516 0
517 }
518 Err(e) => return Err(EngineError::Sqlite(e)),
519 };
520 #[cfg(not(feature = "sqlite-vec"))]
521 let stale_vec_rows: i64 = 0;
522
523 #[cfg(feature = "sqlite-vec")]
524 let vec_rows_for_superseded_nodes: i64 = match conn.query_row(
525 r"
526 SELECT count(*) FROM vec_nodes_active v
527 JOIN chunks c ON c.id = v.chunk_id
528 WHERE NOT EXISTS (
529 SELECT 1 FROM nodes n
530 WHERE n.logical_id = c.node_logical_id
531 )
532 ",
533 [],
534 |row| row.get(0),
535 ) {
536 Ok(n) => n,
537 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
538 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
539 {
540 0
541 }
542 Err(e) => return Err(EngineError::Sqlite(e)),
543 };
544 #[cfg(not(feature = "sqlite-vec"))]
545 let vec_rows_for_superseded_nodes: i64 = 0;
546 let missing_operational_current_rows: i64 = conn.query_row(
547 r"
548 SELECT count(*)
549 FROM operational_mutations m
550 JOIN operational_collections c
551 ON c.name = m.collection_name
552 AND c.kind = 'latest_state'
553 WHERE m.op_kind = 'put'
554 AND NOT EXISTS (
555 SELECT 1
556 FROM operational_mutations newer
557 WHERE newer.collection_name = m.collection_name
558 AND newer.record_key = m.record_key
559 AND newer.mutation_order > m.mutation_order
560 )
561 AND NOT EXISTS (
562 SELECT 1
563 FROM operational_current oc
564 WHERE oc.collection_name = m.collection_name
565 AND oc.record_key = m.record_key
566 )
567 ",
568 [],
569 |row| row.get(0),
570 )?;
571 let stale_operational_current_rows: i64 = conn.query_row(
572 r"
573 SELECT count(*)
574 FROM operational_current oc
575 JOIN operational_collections c
576 ON c.name = oc.collection_name
577 AND c.kind = 'latest_state'
578 LEFT JOIN operational_mutations m ON m.id = oc.last_mutation_id
579 WHERE m.id IS NULL
580 OR m.collection_name != oc.collection_name
581 OR m.record_key != oc.record_key
582 OR m.op_kind != 'put'
583 OR m.payload_json != oc.payload_json
584 OR EXISTS (
585 SELECT 1
586 FROM operational_mutations newer
587 WHERE newer.collection_name = oc.collection_name
588 AND newer.record_key = oc.record_key
589 AND newer.mutation_order > m.mutation_order
590 )
591 ",
592 [],
593 |row| row.get(0),
594 )?;
595 let disabled_collection_mutations: i64 = conn.query_row(
596 r"
597 SELECT count(*)
598 FROM operational_mutations m
599 JOIN operational_collections c ON c.name = m.collection_name
600 WHERE c.disabled_at IS NOT NULL AND m.created_at > c.disabled_at
601 ",
602 [],
603 |row| row.get(0),
604 )?;
605 let orphaned_last_access_metadata_rows: i64 = conn.query_row(
606 r"
607 SELECT count(*)
608 FROM node_access_metadata am
609 WHERE NOT EXISTS (
610 SELECT 1 FROM nodes n WHERE n.logical_id = am.logical_id
611 )
612 ",
613 [],
614 |row| row.get(0),
615 )?;
616
617 let mut warnings = Vec::new();
618 if orphaned_chunks > 0 {
619 warnings.push(format!(
620 "{orphaned_chunks} orphaned chunk(s) with no surviving node history"
621 ));
622 }
623 if null_source_ref_nodes > 0 {
624 warnings.push(format!(
625 "{null_source_ref_nodes} active node(s) with null source_ref"
626 ));
627 }
628 if broken_step_fk > 0 {
629 warnings.push(format!(
630 "{broken_step_fk} step(s) referencing non-existent run"
631 ));
632 }
633 if broken_action_fk > 0 {
634 warnings.push(format!(
635 "{broken_action_fk} action(s) referencing non-existent step"
636 ));
637 }
638 if stale_fts_rows > 0 {
639 warnings.push(format!(
640 "{stale_fts_rows} stale FTS row(s) referencing missing chunk"
641 ));
642 }
643 if fts_rows_for_superseded_nodes > 0 {
644 warnings.push(format!(
645 "{fts_rows_for_superseded_nodes} FTS row(s) for superseded node(s)"
646 ));
647 }
648 if dangling_edges > 0 {
649 warnings.push(format!(
650 "{dangling_edges} active edge(s) with missing endpoint node"
651 ));
652 }
653 if orphaned_supersession_chains > 0 {
654 warnings.push(format!(
655 "{orphaned_supersession_chains} logical_id(s) with all versions superseded"
656 ));
657 }
658 if stale_vec_rows > 0 {
659 warnings.push(format!(
660 "{stale_vec_rows} stale vec row(s) referencing missing chunk"
661 ));
662 }
663 if vec_rows_for_superseded_nodes > 0 {
664 warnings.push(format!(
665 "{vec_rows_for_superseded_nodes} vec row(s) whose node history is missing"
666 ));
667 }
668 if missing_operational_current_rows > 0 {
669 warnings.push(format!(
670 "{missing_operational_current_rows} latest-state key(s) missing operational_current rows"
671 ));
672 }
673 if stale_operational_current_rows > 0 {
674 warnings.push(format!(
675 "{stale_operational_current_rows} stale operational_current row(s)"
676 ));
677 }
678 if disabled_collection_mutations > 0 {
679 warnings.push(format!(
680 "{disabled_collection_mutations} mutation(s) were written after collection disable"
681 ));
682 }
683 if orphaned_last_access_metadata_rows > 0 {
684 warnings.push(format!(
685 "{orphaned_last_access_metadata_rows} last_access metadata row(s) reference missing node history"
686 ));
687 }
688
689 Ok(SemanticReport {
690 orphaned_chunks: i64_to_usize(orphaned_chunks),
691 null_source_ref_nodes: i64_to_usize(null_source_ref_nodes),
692 broken_step_fk: i64_to_usize(broken_step_fk),
693 broken_action_fk: i64_to_usize(broken_action_fk),
694 stale_fts_rows: i64_to_usize(stale_fts_rows),
695 fts_rows_for_superseded_nodes: i64_to_usize(fts_rows_for_superseded_nodes),
696 dangling_edges: i64_to_usize(dangling_edges),
697 orphaned_supersession_chains: i64_to_usize(orphaned_supersession_chains),
698 stale_vec_rows: i64_to_usize(stale_vec_rows),
699 vec_rows_for_superseded_nodes: i64_to_usize(vec_rows_for_superseded_nodes),
700 missing_operational_current_rows: i64_to_usize(missing_operational_current_rows),
701 stale_operational_current_rows: i64_to_usize(stale_operational_current_rows),
702 disabled_collection_mutations: i64_to_usize(disabled_collection_mutations),
703 orphaned_last_access_metadata_rows: i64_to_usize(orphaned_last_access_metadata_rows),
704 warnings,
705 })
706 }
707
708 pub fn register_operational_collection(
711 &self,
712 request: &OperationalRegisterRequest,
713 ) -> Result<OperationalCollectionRecord, EngineError> {
714 if request.name.trim().is_empty() {
715 return Err(EngineError::InvalidWrite(
716 "operational collection name must not be empty".to_owned(),
717 ));
718 }
719 if request.schema_json.is_empty() {
720 return Err(EngineError::InvalidWrite(
721 "operational collection schema_json must not be empty".to_owned(),
722 ));
723 }
724 if request.retention_json.is_empty() {
725 return Err(EngineError::InvalidWrite(
726 "operational collection retention_json must not be empty".to_owned(),
727 ));
728 }
729 if request.filter_fields_json.is_empty() {
730 return Err(EngineError::InvalidWrite(
731 "operational collection filter_fields_json must not be empty".to_owned(),
732 ));
733 }
734 parse_operational_validation_contract(&request.validation_json)
735 .map_err(EngineError::InvalidWrite)?;
736 parse_operational_secondary_indexes_json(&request.secondary_indexes_json, request.kind)
737 .map_err(EngineError::InvalidWrite)?;
738 if request.format_version <= 0 {
739 return Err(EngineError::InvalidWrite(
740 "operational collection format_version must be positive".to_owned(),
741 ));
742 }
743 parse_operational_filter_fields(&request.filter_fields_json)
744 .map_err(EngineError::InvalidWrite)?;
745
746 let mut conn = self.connect()?;
747 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
748 tx.execute(
749 "INSERT INTO operational_collections \
750 (name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at) \
751 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, unixepoch())",
752 rusqlite::params![
753 request.name.as_str(),
754 request.kind.as_str(),
755 request.schema_json.as_str(),
756 request.retention_json.as_str(),
757 request.filter_fields_json.as_str(),
758 request.validation_json.as_str(),
759 request.secondary_indexes_json.as_str(),
760 request.format_version,
761 ],
762 )?;
763 persist_simple_provenance_event(
764 &tx,
765 "operational_collection_registered",
766 request.name.as_str(),
767 Some(serde_json::json!({
768 "kind": request.kind.as_str(),
769 "format_version": request.format_version,
770 })),
771 )?;
772 tx.commit()?;
773
774 self.describe_operational_collection(&request.name)?
775 .ok_or_else(|| {
776 EngineError::Bridge("registered collection missing after commit".to_owned())
777 })
778 }
779
780 pub fn describe_operational_collection(
783 &self,
784 name: &str,
785 ) -> Result<Option<OperationalCollectionRecord>, EngineError> {
786 let conn = self.connect()?;
787 load_operational_collection_record(&conn, name)
788 }
789
790 pub fn update_operational_collection_filters(
794 &self,
795 name: &str,
796 filter_fields_json: &str,
797 ) -> Result<OperationalCollectionRecord, EngineError> {
798 if filter_fields_json.is_empty() {
799 return Err(EngineError::InvalidWrite(
800 "operational collection filter_fields_json must not be empty".to_owned(),
801 ));
802 }
803 let declared_fields = parse_operational_filter_fields(filter_fields_json)
804 .map_err(EngineError::InvalidWrite)?;
805
806 let mut conn = self.connect()?;
807 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
808 load_operational_collection_record(&tx, name)?.ok_or_else(|| {
809 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
810 })?;
811 tx.execute(
812 "UPDATE operational_collections SET filter_fields_json = ?2 WHERE name = ?1",
813 rusqlite::params![name, filter_fields_json],
814 )?;
815 tx.execute(
816 "DELETE FROM operational_filter_values WHERE collection_name = ?1",
817 [name],
818 )?;
819
820 let mut mutation_stmt = tx.prepare(
821 "SELECT id, payload_json FROM operational_mutations \
822 WHERE collection_name = ?1 ORDER BY mutation_order",
823 )?;
824 let mutations = mutation_stmt
825 .query_map([name], |row| {
826 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
827 })?
828 .collect::<Result<Vec<_>, _>>()?;
829 drop(mutation_stmt);
830
831 let mut insert_filter_value = tx.prepare_cached(
832 "INSERT INTO operational_filter_values \
833 (mutation_id, collection_name, field_name, string_value, integer_value) \
834 VALUES (?1, ?2, ?3, ?4, ?5)",
835 )?;
836 let mut inserted_values = 0usize;
837 for (mutation_id, payload_json) in &mutations {
838 for filter_value in
839 extract_operational_filter_values(&declared_fields, payload_json.as_str())
840 {
841 insert_filter_value.execute(rusqlite::params![
842 mutation_id,
843 name,
844 filter_value.field_name,
845 filter_value.string_value,
846 filter_value.integer_value,
847 ])?;
848 inserted_values += 1;
849 }
850 }
851 drop(insert_filter_value);
852
853 persist_simple_provenance_event(
854 &tx,
855 "operational_collection_filter_fields_updated",
856 name,
857 Some(serde_json::json!({
858 "field_count": declared_fields.len(),
859 "mutations_backfilled": mutations.len(),
860 "inserted_filter_values": inserted_values,
861 })),
862 )?;
863 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
864 EngineError::Bridge("operational collection missing after filter update".to_owned())
865 })?;
866 tx.commit()?;
867 Ok(updated)
868 }
869
870 pub fn update_operational_collection_validation(
873 &self,
874 name: &str,
875 validation_json: &str,
876 ) -> Result<OperationalCollectionRecord, EngineError> {
877 parse_operational_validation_contract(validation_json)
878 .map_err(EngineError::InvalidWrite)?;
879
880 let mut conn = self.connect()?;
881 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
882 load_operational_collection_record(&tx, name)?.ok_or_else(|| {
883 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
884 })?;
885 tx.execute(
886 "UPDATE operational_collections SET validation_json = ?2 WHERE name = ?1",
887 rusqlite::params![name, validation_json],
888 )?;
889 persist_simple_provenance_event(
890 &tx,
891 "operational_collection_validation_updated",
892 name,
893 Some(serde_json::json!({
894 "has_validation": !validation_json.is_empty(),
895 })),
896 )?;
897 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
898 EngineError::Bridge("operational collection missing after validation update".to_owned())
899 })?;
900 tx.commit()?;
901 Ok(updated)
902 }
903
904 pub fn update_operational_collection_secondary_indexes(
908 &self,
909 name: &str,
910 secondary_indexes_json: &str,
911 ) -> Result<OperationalCollectionRecord, EngineError> {
912 let mut conn = self.connect()?;
913 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
914 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
915 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
916 })?;
917 let indexes = parse_operational_secondary_indexes_json(secondary_indexes_json, record.kind)
918 .map_err(EngineError::InvalidWrite)?;
919 tx.execute(
920 "UPDATE operational_collections SET secondary_indexes_json = ?2 WHERE name = ?1",
921 rusqlite::params![name, secondary_indexes_json],
922 )?;
923 let (mutation_entries_rebuilt, current_entries_rebuilt) =
924 rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
925 persist_simple_provenance_event(
926 &tx,
927 "operational_collection_secondary_indexes_updated",
928 name,
929 Some(serde_json::json!({
930 "index_count": indexes.len(),
931 "mutation_entries_rebuilt": mutation_entries_rebuilt,
932 "current_entries_rebuilt": current_entries_rebuilt,
933 })),
934 )?;
935 let updated = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
936 EngineError::Bridge(
937 "operational collection missing after secondary index update".to_owned(),
938 )
939 })?;
940 tx.commit()?;
941 Ok(updated)
942 }
943
944 pub fn rebuild_operational_secondary_indexes(
947 &self,
948 name: &str,
949 ) -> Result<OperationalSecondaryIndexRebuildReport, EngineError> {
950 let mut conn = self.connect()?;
951 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
952 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
953 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
954 })?;
955 let indexes =
956 parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
957 .map_err(EngineError::InvalidWrite)?;
958 let (mutation_entries_rebuilt, current_entries_rebuilt) =
959 rebuild_operational_secondary_index_entries(&tx, &record.name, record.kind, &indexes)?;
960 persist_simple_provenance_event(
961 &tx,
962 "operational_secondary_indexes_rebuilt",
963 name,
964 Some(serde_json::json!({
965 "index_count": indexes.len(),
966 "mutation_entries_rebuilt": mutation_entries_rebuilt,
967 "current_entries_rebuilt": current_entries_rebuilt,
968 })),
969 )?;
970 tx.commit()?;
971 Ok(OperationalSecondaryIndexRebuildReport {
972 collection_name: name.to_owned(),
973 mutation_entries_rebuilt,
974 current_entries_rebuilt,
975 })
976 }
977
978 pub fn validate_operational_collection_history(
981 &self,
982 name: &str,
983 ) -> Result<OperationalHistoryValidationReport, EngineError> {
984 let conn = self.connect()?;
985 let record = load_operational_collection_record(&conn, name)?.ok_or_else(|| {
986 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
987 })?;
988 let Some(contract) = parse_operational_validation_contract(&record.validation_json)
989 .map_err(EngineError::InvalidWrite)?
990 else {
991 return Err(EngineError::InvalidWrite(format!(
992 "operational collection '{name}' has no validation_json configured"
993 )));
994 };
995
996 let mut stmt = conn.prepare(
997 "SELECT id, record_key, op_kind, payload_json FROM operational_mutations \
998 WHERE collection_name = ?1 ORDER BY mutation_order",
999 )?;
1000 let rows = stmt
1001 .query_map([name], |row| {
1002 Ok((
1003 row.get::<_, String>(0)?,
1004 row.get::<_, String>(1)?,
1005 row.get::<_, String>(2)?,
1006 row.get::<_, String>(3)?,
1007 ))
1008 })?
1009 .collect::<Result<Vec<_>, _>>()?;
1010 drop(stmt);
1011
1012 let mut checked_rows = 0usize;
1013 let mut issues = Vec::new();
1014 for (mutation_id, record_key, op_kind, payload_json) in rows {
1015 if op_kind == "delete" {
1016 continue;
1017 }
1018 checked_rows += 1;
1019 if let Err(message) =
1020 validate_operational_payload_against_contract(&contract, payload_json.as_str())
1021 {
1022 issues.push(OperationalHistoryValidationIssue {
1023 mutation_id,
1024 record_key,
1025 op_kind,
1026 message,
1027 });
1028 }
1029 }
1030
1031 Ok(OperationalHistoryValidationReport {
1032 collection_name: name.to_owned(),
1033 checked_rows,
1034 invalid_row_count: issues.len(),
1035 issues,
1036 })
1037 }
1038
1039 pub fn disable_operational_collection(
1042 &self,
1043 name: &str,
1044 ) -> Result<OperationalCollectionRecord, EngineError> {
1045 let mut conn = self.connect()?;
1046 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1047 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1048 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1049 })?;
1050 let changed = if record.disabled_at.is_none() {
1051 tx.execute(
1052 "UPDATE operational_collections SET disabled_at = unixepoch() WHERE name = ?1",
1053 [name],
1054 )?;
1055 true
1056 } else {
1057 false
1058 };
1059 let record = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1060 EngineError::Bridge("operational collection missing after disable".to_owned())
1061 })?;
1062 persist_simple_provenance_event(
1063 &tx,
1064 "operational_collection_disabled",
1065 name,
1066 Some(serde_json::json!({
1067 "disabled_at": record.disabled_at,
1068 "changed": changed,
1069 })),
1070 )?;
1071 tx.commit()?;
1072 Ok(record)
1073 }
1074
1075 pub fn compact_operational_collection(
1078 &self,
1079 name: &str,
1080 dry_run: bool,
1081 ) -> Result<OperationalCompactionReport, EngineError> {
1082 let mut conn = self.connect()?;
1083 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1084 let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1085 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1086 })?;
1087 validate_append_only_operational_collection(&collection, "compact")?;
1088 let (mutation_ids, before_timestamp) =
1089 operational_compaction_candidates(&tx, &collection.retention_json, name)?;
1090 if dry_run {
1091 drop(tx);
1092 return Ok(OperationalCompactionReport {
1093 collection_name: name.to_owned(),
1094 deleted_mutations: mutation_ids.len(),
1095 dry_run: true,
1096 before_timestamp,
1097 });
1098 }
1099 let mut delete_stmt =
1100 tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
1101 for mutation_id in &mutation_ids {
1102 delete_stmt.execute([mutation_id.as_str()])?;
1103 }
1104 drop(delete_stmt);
1105 persist_simple_provenance_event(
1106 &tx,
1107 "operational_collection_compacted",
1108 name,
1109 Some(serde_json::json!({
1110 "deleted_mutations": mutation_ids.len(),
1111 "before_timestamp": before_timestamp,
1112 })),
1113 )?;
1114 tx.commit()?;
1115 Ok(OperationalCompactionReport {
1116 collection_name: name.to_owned(),
1117 deleted_mutations: mutation_ids.len(),
1118 dry_run: false,
1119 before_timestamp,
1120 })
1121 }
1122
1123 pub fn purge_operational_collection(
1126 &self,
1127 name: &str,
1128 before_timestamp: i64,
1129 ) -> Result<OperationalPurgeReport, EngineError> {
1130 let mut conn = self.connect()?;
1131 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1132 let collection = load_operational_collection_record(&tx, name)?.ok_or_else(|| {
1133 EngineError::InvalidWrite(format!("operational collection '{name}' is not registered"))
1134 })?;
1135 validate_append_only_operational_collection(&collection, "purge")?;
1136 let deleted_mutations = tx.execute(
1137 "DELETE FROM operational_mutations WHERE collection_name = ?1 AND created_at < ?2",
1138 rusqlite::params![name, before_timestamp],
1139 )?;
1140 persist_simple_provenance_event(
1141 &tx,
1142 "operational_collection_purged",
1143 name,
1144 Some(serde_json::json!({
1145 "deleted_mutations": deleted_mutations,
1146 "before_timestamp": before_timestamp,
1147 })),
1148 )?;
1149 tx.commit()?;
1150 Ok(OperationalPurgeReport {
1151 collection_name: name.to_owned(),
1152 deleted_mutations,
1153 before_timestamp,
1154 })
1155 }
1156
1157 pub fn plan_operational_retention(
1160 &self,
1161 now_timestamp: i64,
1162 collection_names: Option<&[String]>,
1163 max_collections: Option<usize>,
1164 ) -> Result<OperationalRetentionPlanReport, EngineError> {
1165 let conn = self.connect()?;
1166 let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1167 let mut items = Vec::with_capacity(records.len());
1168 for record in records {
1169 items.push(plan_operational_retention_item(
1170 &conn,
1171 &record,
1172 now_timestamp,
1173 )?);
1174 }
1175 Ok(OperationalRetentionPlanReport {
1176 planned_at: now_timestamp,
1177 collections_examined: items.len(),
1178 items,
1179 })
1180 }
1181
1182 pub fn run_operational_retention(
1185 &self,
1186 now_timestamp: i64,
1187 collection_names: Option<&[String]>,
1188 max_collections: Option<usize>,
1189 dry_run: bool,
1190 ) -> Result<OperationalRetentionRunReport, EngineError> {
1191 let mut conn = self.connect()?;
1192 let records = load_operational_retention_records(&conn, collection_names, max_collections)?;
1193 let mut items = Vec::with_capacity(records.len());
1194 let mut collections_acted_on = 0usize;
1195
1196 for record in records {
1197 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1198 let item = run_operational_retention_item(&tx, &record, now_timestamp, dry_run)?;
1199 if item.deleted_mutations > 0 {
1200 collections_acted_on += 1;
1201 }
1202 if dry_run || item.action_kind == OperationalRetentionActionKind::Noop {
1203 drop(tx);
1204 } else {
1205 tx.commit()?;
1206 }
1207 items.push(item);
1208 }
1209
1210 Ok(OperationalRetentionRunReport {
1211 executed_at: now_timestamp,
1212 collections_examined: items.len(),
1213 collections_acted_on,
1214 dry_run,
1215 items,
1216 })
1217 }
1218
1219 pub fn trace_operational_collection(
1222 &self,
1223 collection_name: &str,
1224 record_key: Option<&str>,
1225 ) -> Result<OperationalTraceReport, EngineError> {
1226 let conn = self.connect()?;
1227 ensure_operational_collection_registered(&conn, collection_name)?;
1228 let mutations = if let Some(record_key) = record_key {
1229 let mut stmt = conn.prepare(
1230 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1231 FROM operational_mutations \
1232 WHERE collection_name = ?1 AND record_key = ?2 \
1233 ORDER BY mutation_order",
1234 )?;
1235 stmt.query_map([collection_name, record_key], map_operational_mutation_row)?
1236 .collect::<Result<Vec<_>, _>>()?
1237 } else {
1238 let mut stmt = conn.prepare(
1239 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
1240 FROM operational_mutations \
1241 WHERE collection_name = ?1 \
1242 ORDER BY mutation_order",
1243 )?;
1244 stmt.query_map([collection_name], map_operational_mutation_row)?
1245 .collect::<Result<Vec<_>, _>>()?
1246 };
1247 let current_rows = if let Some(record_key) = record_key {
1248 let mut stmt = conn.prepare(
1249 "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1250 FROM operational_current \
1251 WHERE collection_name = ?1 AND record_key = ?2 \
1252 ORDER BY updated_at, record_key",
1253 )?;
1254 stmt.query_map([collection_name, record_key], map_operational_current_row)?
1255 .collect::<Result<Vec<_>, _>>()?
1256 } else {
1257 let mut stmt = conn.prepare(
1258 "SELECT collection_name, record_key, payload_json, updated_at, last_mutation_id \
1259 FROM operational_current \
1260 WHERE collection_name = ?1 \
1261 ORDER BY updated_at, record_key",
1262 )?;
1263 stmt.query_map([collection_name], map_operational_current_row)?
1264 .collect::<Result<Vec<_>, _>>()?
1265 };
1266
1267 Ok(OperationalTraceReport {
1268 collection_name: collection_name.to_owned(),
1269 record_key: record_key.map(str::to_owned),
1270 mutation_count: mutations.len(),
1271 current_count: current_rows.len(),
1272 mutations,
1273 current_rows,
1274 })
1275 }
1276
1277 pub fn read_operational_collection(
1280 &self,
1281 request: &OperationalReadRequest,
1282 ) -> Result<OperationalReadReport, EngineError> {
1283 if request.collection_name.trim().is_empty() {
1284 return Err(EngineError::InvalidWrite(
1285 "operational read collection_name must not be empty".to_owned(),
1286 ));
1287 }
1288 if request.filters.is_empty() {
1289 return Err(EngineError::InvalidWrite(
1290 "operational read requires at least one filter clause".to_owned(),
1291 ));
1292 }
1293
1294 let conn = self.connect()?;
1295 let record = load_operational_collection_record(&conn, &request.collection_name)?
1296 .ok_or_else(|| {
1297 EngineError::InvalidWrite(format!(
1298 "operational collection '{}' is not registered",
1299 request.collection_name
1300 ))
1301 })?;
1302 validate_append_only_operational_collection(&record, "read")?;
1303 let declared_fields = parse_operational_filter_fields(&record.filter_fields_json)
1304 .map_err(EngineError::InvalidWrite)?;
1305 let secondary_indexes =
1306 parse_operational_secondary_indexes_json(&record.secondary_indexes_json, record.kind)
1307 .map_err(EngineError::InvalidWrite)?;
1308 let applied_limit = operational_read_limit(request.limit)?;
1309 let filters = compile_operational_read_filters(&request.filters, &declared_fields)?;
1310 if let Some(report) = execute_operational_secondary_index_read(
1311 &conn,
1312 &request.collection_name,
1313 &filters,
1314 &secondary_indexes,
1315 applied_limit,
1316 )? {
1317 return Ok(report);
1318 }
1319 execute_operational_filtered_read(&conn, &request.collection_name, &filters, applied_limit)
1320 }
1321
1322 pub fn rebuild_operational_current(
1325 &self,
1326 collection_name: Option<&str>,
1327 ) -> Result<OperationalRepairReport, EngineError> {
1328 let mut conn = self.connect()?;
1329 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1330 let collections = if let Some(name) = collection_name {
1331 let maybe_kind: Option<String> = tx
1332 .query_row(
1333 "SELECT kind FROM operational_collections WHERE name = ?1",
1334 [name],
1335 |row| row.get(0),
1336 )
1337 .optional()?;
1338 let Some(kind) = maybe_kind else {
1339 return Err(EngineError::InvalidWrite(format!(
1340 "operational collection '{name}' is not registered"
1341 )));
1342 };
1343 if kind != OperationalCollectionKind::LatestState.as_str() {
1344 return Err(EngineError::InvalidWrite(format!(
1345 "operational collection '{name}' is not latest_state"
1346 )));
1347 }
1348 vec![name.to_owned()]
1349 } else {
1350 let mut stmt = tx.prepare(
1351 "SELECT name FROM operational_collections WHERE kind = 'latest_state' ORDER BY name",
1352 )?;
1353 stmt.query_map([], |row| row.get::<_, String>(0))?
1354 .collect::<Result<Vec<_>, _>>()?
1355 };
1356
1357 let rebuilt_rows = rebuild_operational_current_rows(&tx, &collections)?;
1358 for collection in &collections {
1359 let record = load_operational_collection_record(&tx, collection)?.ok_or_else(|| {
1360 EngineError::Bridge(format!(
1361 "operational collection '{collection}' missing during current rebuild"
1362 ))
1363 })?;
1364 let indexes = parse_operational_secondary_indexes_json(
1365 &record.secondary_indexes_json,
1366 record.kind,
1367 )
1368 .map_err(EngineError::InvalidWrite)?;
1369 if !indexes.is_empty() {
1370 rebuild_operational_secondary_index_entries(
1371 &tx,
1372 &record.name,
1373 record.kind,
1374 &indexes,
1375 )?;
1376 }
1377 }
1378
1379 persist_simple_provenance_event(
1380 &tx,
1381 "operational_current_rebuilt",
1382 collection_name.unwrap_or("*"),
1383 Some(serde_json::json!({
1384 "collections_rebuilt": collections.len(),
1385 "current_rows_rebuilt": rebuilt_rows,
1386 })),
1387 )?;
1388 tx.commit()?;
1389
1390 Ok(OperationalRepairReport {
1391 collections_rebuilt: collections.len(),
1392 current_rows_rebuilt: rebuilt_rows,
1393 })
1394 }
1395
1396 pub fn rebuild_projections(
1399 &self,
1400 target: ProjectionTarget,
1401 ) -> Result<ProjectionRepairReport, EngineError> {
1402 self.projections.rebuild_projections(target)
1403 }
1404
1405 pub fn rebuild_missing_projections(&self) -> Result<ProjectionRepairReport, EngineError> {
1408 self.projections.rebuild_missing_projections()
1409 }
1410
1411 pub fn restore_vector_profiles(&self) -> Result<ProjectionRepairReport, EngineError> {
1417 let conn = self.connect()?;
1418 let profiles: Vec<(String, String, i64)> = {
1419 let mut stmt = conn.prepare(
1420 "SELECT profile, table_name, dimension \
1421 FROM vector_profiles WHERE enabled = 1 ORDER BY profile",
1422 )?;
1423 stmt.query_map([], |row| {
1424 Ok((
1425 row.get::<_, String>(0)?,
1426 row.get::<_, String>(1)?,
1427 row.get::<_, i64>(2)?,
1428 ))
1429 })?
1430 .collect::<Result<Vec<_>, _>>()?
1431 };
1432
1433 for (profile, table_name, dimension) in &profiles {
1434 let dimension = usize::try_from(*dimension).map_err(|_| {
1435 EngineError::Bridge(format!("invalid vector profile dimension: {dimension}"))
1436 })?;
1437 self.schema_manager
1438 .ensure_vector_profile(&conn, profile, table_name, dimension)?;
1439 }
1440
1441 Ok(ProjectionRepairReport {
1442 targets: vec![ProjectionTarget::Vec],
1443 rebuilt_rows: profiles.len(),
1444 notes: vec![],
1445 })
1446 }
1447
1448 #[allow(clippy::too_many_lines)]
1459 pub fn regenerate_vector_embeddings(
1460 &self,
1461 config: &VectorRegenerationConfig,
1462 ) -> Result<VectorRegenerationReport, EngineError> {
1463 self.regenerate_vector_embeddings_with_policy(config, &VectorGeneratorPolicy::default())
1464 }
1465
1466 #[allow(clippy::too_many_lines)]
1471 pub fn regenerate_vector_embeddings_with_policy(
1472 &self,
1473 config: &VectorRegenerationConfig,
1474 policy: &VectorGeneratorPolicy,
1475 ) -> Result<VectorRegenerationReport, EngineError> {
1476 let conn = self.connect()?;
1477 let config = validate_vector_regeneration_config(&conn, config, policy)
1478 .map_err(|failure| failure.to_engine_error())?;
1479 let chunks = collect_regeneration_chunks(&conn)?;
1480 let payload = build_regeneration_input(&config, chunks.clone());
1481 let snapshot_hash = compute_snapshot_hash(&payload)?;
1482 let audit_metadata = VectorRegenerationAuditMetadata {
1483 profile: config.profile.clone(),
1484 model_identity: config.model_identity.clone(),
1485 model_version: config.model_version.clone(),
1486 chunk_count: chunks.len(),
1487 snapshot_hash: snapshot_hash.clone(),
1488 failure_class: None,
1489 };
1490 persist_vector_regeneration_event(
1491 &conn,
1492 "vector_regeneration_requested",
1493 &config.profile,
1494 &audit_metadata,
1495 )?;
1496 let notes = generator_policy_notes(policy);
1497 let generated = match run_vector_generator_bounded(&config, &payload, policy) {
1498 Ok(generated) => generated,
1499 Err(failure) => {
1500 self.persist_vector_regeneration_failure_best_effort(
1501 &config.profile,
1502 &audit_metadata,
1503 &failure,
1504 );
1505 return Err(failure.to_engine_error());
1506 }
1507 };
1508 let mut embedding_map = match validate_generated_embeddings(&config, &chunks, generated) {
1509 Ok(embedding_map) => embedding_map,
1510 Err(failure) => {
1511 self.persist_vector_regeneration_failure_best_effort(
1512 &config.profile,
1513 &audit_metadata,
1514 &failure,
1515 );
1516 return Err(failure.to_engine_error());
1517 }
1518 };
1519
1520 let mut conn = conn;
1521 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1522 match self.schema_manager.ensure_vector_profile(
1523 &tx,
1524 &config.profile,
1525 &config.table_name,
1526 config.dimension,
1527 ) {
1528 Ok(()) => {}
1529 Err(SchemaError::MissingCapability(message)) => {
1530 let failure = VectorRegenerationFailure::new(
1531 VectorRegenerationFailureClass::UnsupportedVecCapability,
1532 message,
1533 );
1534 drop(tx);
1535 self.persist_vector_regeneration_failure_best_effort(
1536 &config.profile,
1537 &audit_metadata,
1538 &failure,
1539 );
1540 return Err(failure.to_engine_error());
1541 }
1542 Err(error) => return Err(EngineError::Schema(error)),
1543 }
1544 let apply_chunks = collect_regeneration_chunks(&tx)?;
1545 let apply_payload = build_regeneration_input(&config, apply_chunks.clone());
1546 let apply_hash = compute_snapshot_hash(&apply_payload)?;
1547 if apply_hash != snapshot_hash {
1548 let failure = VectorRegenerationFailure::new(
1549 VectorRegenerationFailureClass::SnapshotDrift,
1550 "chunk snapshot changed during generation; retry".to_owned(),
1551 );
1552 drop(tx);
1553 self.persist_vector_regeneration_failure_best_effort(
1554 &config.profile,
1555 &audit_metadata,
1556 &failure,
1557 );
1558 return Err(failure.to_engine_error());
1559 }
1560 persist_vector_contract(&tx, &config, &snapshot_hash)?;
1561 tx.execute("DELETE FROM vec_nodes_active", [])?;
1562 let mut stmt = tx
1563 .prepare_cached("INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES (?1, ?2)")?;
1564 let mut regenerated_rows = 0usize;
1565 for chunk in &apply_chunks {
1566 let Some(embedding) = embedding_map.remove(&chunk.chunk_id) else {
1567 drop(stmt);
1568 drop(tx);
1569 let failure = VectorRegenerationFailure::new(
1570 VectorRegenerationFailureClass::MalformedGeneratorJson,
1571 format!(
1572 "generator did not return embedding for chunk '{}'",
1573 chunk.chunk_id
1574 ),
1575 );
1576 self.persist_vector_regeneration_failure_best_effort(
1577 &config.profile,
1578 &audit_metadata,
1579 &failure,
1580 );
1581 return Err(failure.to_engine_error());
1582 };
1583 stmt.execute(rusqlite::params![chunk.chunk_id.as_str(), embedding])?;
1584 regenerated_rows += 1;
1585 }
1586 drop(stmt);
1587 persist_vector_regeneration_event(
1588 &tx,
1589 "vector_regeneration_apply",
1590 &config.profile,
1591 &audit_metadata,
1592 )?;
1593 tx.commit()?;
1594
1595 Ok(VectorRegenerationReport {
1596 profile: config.profile.clone(),
1597 table_name: config.table_name.clone(),
1598 dimension: config.dimension,
1599 total_chunks: chunks.len(),
1600 regenerated_rows,
1601 contract_persisted: true,
1602 notes,
1603 })
1604 }
1605
1606 fn persist_vector_regeneration_failure_best_effort(
1607 &self,
1608 profile: &str,
1609 metadata: &VectorRegenerationAuditMetadata,
1610 failure: &VectorRegenerationFailure,
1611 ) {
1612 let Ok(conn) = self.connect() else {
1613 return;
1614 };
1615 let failure_metadata = VectorRegenerationAuditMetadata {
1616 profile: metadata.profile.clone(),
1617 model_identity: metadata.model_identity.clone(),
1618 model_version: metadata.model_version.clone(),
1619 chunk_count: metadata.chunk_count,
1620 snapshot_hash: metadata.snapshot_hash.clone(),
1621 failure_class: Some(failure.failure_class_label().to_owned()),
1622 };
1623 let _ = persist_vector_regeneration_event(
1624 &conn,
1625 "vector_regeneration_failed",
1626 profile,
1627 &failure_metadata,
1628 );
1629 }
1630
1631 pub fn trace_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
1634 let conn = self.connect()?;
1635
1636 let node_logical_ids = collect_strings(
1637 &conn,
1638 "SELECT logical_id FROM nodes WHERE source_ref = ?1 ORDER BY created_at",
1639 source_ref,
1640 )?;
1641 let action_ids = collect_strings(
1642 &conn,
1643 "SELECT id FROM actions WHERE source_ref = ?1 ORDER BY created_at",
1644 source_ref,
1645 )?;
1646 let operational_mutation_ids = collect_strings(
1647 &conn,
1648 "SELECT id FROM operational_mutations WHERE source_ref = ?1 ORDER BY mutation_order",
1649 source_ref,
1650 )?;
1651
1652 Ok(TraceReport {
1653 source_ref: source_ref.to_owned(),
1654 node_rows: count_source_ref(&conn, "nodes", source_ref)?,
1655 edge_rows: count_source_ref(&conn, "edges", source_ref)?,
1656 action_rows: count_source_ref(&conn, "actions", source_ref)?,
1657 operational_mutation_rows: count_source_ref(
1658 &conn,
1659 "operational_mutations",
1660 source_ref,
1661 )?,
1662 node_logical_ids,
1663 action_ids,
1664 operational_mutation_ids,
1665 })
1666 }
1667
1668 #[allow(clippy::too_many_lines)]
1672 pub fn restore_logical_id(
1673 &self,
1674 logical_id: &str,
1675 ) -> Result<LogicalRestoreReport, EngineError> {
1676 let mut conn = self.connect()?;
1677 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1678
1679 let active_count: i64 = tx.query_row(
1680 "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
1681 [logical_id],
1682 |row| row.get(0),
1683 )?;
1684 if active_count > 0 {
1685 return Ok(LogicalRestoreReport {
1686 logical_id: logical_id.to_owned(),
1687 was_noop: true,
1688 restored_node_rows: 0,
1689 restored_edge_rows: 0,
1690 restored_chunk_rows: 0,
1691 restored_fts_rows: 0,
1692 restored_vec_rows: 0,
1693 skipped_edges: Vec::new(),
1694 notes: vec!["logical_id already active".to_owned()],
1695 });
1696 }
1697
1698 let restored_node: Option<(String, String)> = tx
1699 .query_row(
1700 "SELECT row_id, kind FROM nodes \
1701 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
1702 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
1703 [logical_id],
1704 |row| Ok((row.get(0)?, row.get(1)?)),
1705 )
1706 .optional()?;
1707 let (restored_node_row_id, restored_kind) = restored_node.ok_or_else(|| {
1708 EngineError::InvalidWrite(format!("logical_id '{logical_id}' is not retired"))
1709 })?;
1710
1711 tx.execute(
1712 "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
1713 [restored_node_row_id.as_str()],
1714 )?;
1715
1716 let retire_scope: Option<(i64, Option<String>, i64)> = tx
1717 .query_row(
1718 "SELECT rowid, source_ref, created_at FROM provenance_events \
1719 WHERE event_type = 'node_retire' AND subject = ?1 \
1720 ORDER BY created_at DESC, rowid DESC LIMIT 1",
1721 [logical_id],
1722 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
1723 )
1724 .optional()?;
1725 let (restored_edge_rows, skipped_edges) = if let Some((
1726 retire_event_rowid,
1727 retire_source_ref,
1728 retire_created_at,
1729 )) = retire_scope
1730 {
1731 restore_validated_edges(
1732 &tx,
1733 logical_id,
1734 retire_source_ref.as_deref(),
1735 retire_created_at,
1736 retire_event_rowid,
1737 )?
1738 } else {
1739 (0, Vec::new())
1740 };
1741
1742 let restored_chunk_rows: usize = tx
1743 .query_row(
1744 "SELECT count(*) FROM chunks WHERE node_logical_id = ?1",
1745 [logical_id],
1746 |row| row.get::<_, i64>(0),
1747 )
1748 .map(i64_to_usize)?;
1749 tx.execute(
1750 "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
1751 [logical_id],
1752 )?;
1753 let restored_fts_rows = tx.execute(
1754 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
1755 SELECT id, node_logical_id, ?2, text_content \
1756 FROM chunks WHERE node_logical_id = ?1",
1757 rusqlite::params![logical_id, restored_kind],
1758 )?;
1759 let restored_vec_rows = count_vec_rows_for_logical_id(&tx, logical_id)?;
1760
1761 persist_simple_provenance_event(
1762 &tx,
1763 "restore_logical_id",
1764 logical_id,
1765 Some(serde_json::json!({
1766 "restored_node_rows": 1,
1767 "restored_edge_rows": restored_edge_rows,
1768 "restored_chunk_rows": restored_chunk_rows,
1769 "restored_fts_rows": restored_fts_rows,
1770 "restored_vec_rows": restored_vec_rows,
1771 })),
1772 )?;
1773 tx.commit()?;
1774
1775 Ok(LogicalRestoreReport {
1776 logical_id: logical_id.to_owned(),
1777 was_noop: false,
1778 restored_node_rows: 1,
1779 restored_edge_rows,
1780 restored_chunk_rows,
1781 restored_fts_rows,
1782 restored_vec_rows,
1783 skipped_edges,
1784 notes: Vec::new(),
1785 })
1786 }
1787
1788 pub fn purge_logical_id(&self, logical_id: &str) -> Result<LogicalPurgeReport, EngineError> {
1792 let mut conn = self.connect()?;
1793 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1794
1795 let active_count: i64 = tx.query_row(
1796 "SELECT count(*) FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL",
1797 [logical_id],
1798 |row| row.get(0),
1799 )?;
1800 if active_count > 0 {
1801 return Ok(LogicalPurgeReport {
1802 logical_id: logical_id.to_owned(),
1803 was_noop: true,
1804 deleted_node_rows: 0,
1805 deleted_edge_rows: 0,
1806 deleted_chunk_rows: 0,
1807 deleted_fts_rows: 0,
1808 deleted_vec_rows: 0,
1809 notes: vec!["logical_id is active; purge skipped".to_owned()],
1810 });
1811 }
1812
1813 let node_rows: i64 = tx.query_row(
1814 "SELECT count(*) FROM nodes WHERE logical_id = ?1",
1815 [logical_id],
1816 |row| row.get(0),
1817 )?;
1818 if node_rows == 0 {
1819 return Err(EngineError::InvalidWrite(format!(
1820 "logical_id '{logical_id}' does not exist"
1821 )));
1822 }
1823
1824 let deleted_vec_rows = delete_vec_rows_for_logical_id(&tx, logical_id)?;
1825 let deleted_fts_rows = tx.execute(
1826 "DELETE FROM fts_nodes WHERE node_logical_id = ?1",
1827 [logical_id],
1828 )?;
1829 let deleted_edge_rows = tx.execute(
1830 "DELETE FROM edges WHERE source_logical_id = ?1 OR target_logical_id = ?1",
1831 [logical_id],
1832 )?;
1833 let deleted_chunk_rows = tx.execute(
1834 "DELETE FROM chunks WHERE node_logical_id = ?1",
1835 [logical_id],
1836 )?;
1837 let deleted_node_rows =
1838 tx.execute("DELETE FROM nodes WHERE logical_id = ?1", [logical_id])?;
1839 tx.execute(
1840 "DELETE FROM node_access_metadata WHERE logical_id = ?1",
1841 [logical_id],
1842 )?;
1843
1844 persist_simple_provenance_event(
1845 &tx,
1846 "purge_logical_id",
1847 logical_id,
1848 Some(serde_json::json!({
1849 "deleted_node_rows": deleted_node_rows,
1850 "deleted_edge_rows": deleted_edge_rows,
1851 "deleted_chunk_rows": deleted_chunk_rows,
1852 "deleted_fts_rows": deleted_fts_rows,
1853 "deleted_vec_rows": deleted_vec_rows,
1854 })),
1855 )?;
1856 tx.commit()?;
1857
1858 Ok(LogicalPurgeReport {
1859 logical_id: logical_id.to_owned(),
1860 was_noop: false,
1861 deleted_node_rows,
1862 deleted_edge_rows,
1863 deleted_chunk_rows,
1864 deleted_fts_rows,
1865 deleted_vec_rows,
1866 notes: Vec::new(),
1867 })
1868 }
1869
1870 pub fn purge_provenance_events(
1880 &self,
1881 before_timestamp: i64,
1882 options: &ProvenancePurgeOptions,
1883 ) -> Result<ProvenancePurgeReport, EngineError> {
1884 let mut conn = self.connect()?;
1885 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1886
1887 let preserved_types: Vec<&str> = if options.preserve_event_types.is_empty() {
1888 vec!["excise", "purge_logical_id"]
1889 } else {
1890 options
1891 .preserve_event_types
1892 .iter()
1893 .map(String::as_str)
1894 .collect()
1895 };
1896
1897 let placeholders: String = (0..preserved_types.len())
1899 .map(|i| format!("?{}", i + 2))
1900 .collect::<Vec<_>>()
1901 .join(", ");
1902 let count_query = format!(
1903 "SELECT count(*) FROM provenance_events \
1904 WHERE created_at < ?1 AND event_type NOT IN ({placeholders})"
1905 );
1906 let delete_query = format!(
1907 "DELETE FROM provenance_events WHERE rowid IN (\
1908 SELECT rowid FROM provenance_events \
1909 WHERE created_at < ?1 AND event_type NOT IN ({placeholders}) \
1910 LIMIT 10000)"
1911 );
1912
1913 let bind_params = |stmt: &mut rusqlite::Statement<'_>| -> Result<(), rusqlite::Error> {
1914 stmt.raw_bind_parameter(1, before_timestamp)?;
1915 for (i, event_type) in preserved_types.iter().enumerate() {
1916 stmt.raw_bind_parameter(i + 2, *event_type)?;
1917 }
1918 Ok(())
1919 };
1920
1921 let events_deleted = if options.dry_run {
1922 let mut stmt = tx.prepare(&count_query)?;
1923 bind_params(&mut stmt)?;
1924 stmt.raw_query()
1925 .next()?
1926 .map_or(0, |row| row.get::<_, u64>(0).unwrap_or(0))
1927 } else {
1928 let mut total_deleted: u64 = 0;
1929 loop {
1930 let mut stmt = tx.prepare(&delete_query)?;
1931 bind_params(&mut stmt)?;
1932 let deleted = stmt.raw_execute()?;
1933 if deleted == 0 {
1934 break;
1935 }
1936 total_deleted += deleted as u64;
1937 }
1938 total_deleted
1939 };
1940
1941 let total_after: u64 =
1942 tx.query_row("SELECT count(*) FROM provenance_events", [], |row| {
1943 row.get(0)
1944 })?;
1945
1946 let oldest_remaining: Option<i64> = tx
1947 .query_row("SELECT MIN(created_at) FROM provenance_events", [], |row| {
1948 row.get(0)
1949 })
1950 .optional()?
1951 .flatten();
1952
1953 if !options.dry_run {
1954 tx.commit()?;
1955 }
1956
1957 let events_preserved = if options.dry_run {
1960 total_after - events_deleted
1961 } else {
1962 total_after
1963 };
1964
1965 Ok(ProvenancePurgeReport {
1966 events_deleted,
1967 events_preserved,
1968 oldest_remaining,
1969 })
1970 }
1971
1972 #[allow(clippy::too_many_lines)]
1976 pub fn excise_source(&self, source_ref: &str) -> Result<TraceReport, EngineError> {
1977 let mut conn = self.connect()?;
1978
1979 let tx = conn.transaction_with_behavior(TransactionBehavior::Immediate)?;
1980 let affected_operational_collections = collect_strings_tx(
1981 &tx,
1982 "SELECT DISTINCT m.collection_name \
1983 FROM operational_mutations m \
1984 JOIN operational_collections c ON c.name = m.collection_name \
1985 WHERE m.source_ref = ?1 AND c.kind = 'latest_state' \
1986 ORDER BY m.collection_name",
1987 source_ref,
1988 )?;
1989
1990 let pairs: Vec<(String, String)> = {
1992 let mut stmt = tx.prepare(
1993 "SELECT row_id, logical_id FROM nodes \
1994 WHERE source_ref = ?1 AND superseded_at IS NULL",
1995 )?;
1996 stmt.query_map([source_ref], |row| {
1997 Ok((row.get::<_, String>(0)?, row.get::<_, String>(1)?))
1998 })?
1999 .collect::<Result<Vec<_>, _>>()?
2000 };
2001 let affected_logical_ids: Vec<String> = pairs
2002 .iter()
2003 .map(|(_, logical_id)| logical_id.clone())
2004 .collect();
2005
2006 tx.execute(
2008 "UPDATE nodes SET superseded_at = unixepoch() \
2009 WHERE source_ref = ?1 AND superseded_at IS NULL",
2010 [source_ref],
2011 )?;
2012 tx.execute(
2013 "UPDATE edges SET superseded_at = unixepoch() \
2014 WHERE source_ref = ?1 AND superseded_at IS NULL",
2015 [source_ref],
2016 )?;
2017 tx.execute(
2018 "UPDATE actions SET superseded_at = unixepoch() \
2019 WHERE source_ref = ?1 AND superseded_at IS NULL",
2020 [source_ref],
2021 )?;
2022 clear_operational_current_rows(&tx, &affected_operational_collections)?;
2023 tx.execute(
2024 "DELETE FROM operational_mutations WHERE source_ref = ?1",
2025 [source_ref],
2026 )?;
2027 for logical_id in &affected_logical_ids {
2028 delete_vec_rows_for_logical_id(&tx, logical_id)?;
2029 tx.execute(
2030 "DELETE FROM chunks WHERE node_logical_id = ?1",
2031 [logical_id.as_str()],
2032 )?;
2033 }
2034
2035 for (excised_row_id, logical_id) in &pairs {
2037 let prior: Option<String> = tx
2038 .query_row(
2039 "SELECT row_id FROM nodes \
2040 WHERE logical_id = ?1 AND row_id != ?2 \
2041 ORDER BY created_at DESC LIMIT 1",
2042 [logical_id.as_str(), excised_row_id.as_str()],
2043 |row| row.get(0),
2044 )
2045 .optional()?;
2046 if let Some(prior_id) = prior {
2047 tx.execute(
2048 "UPDATE nodes SET superseded_at = NULL WHERE row_id = ?1",
2049 [prior_id.as_str()],
2050 )?;
2051 }
2052 }
2053
2054 for logical_id in &affected_logical_ids {
2055 let has_active_node = tx
2056 .query_row(
2057 "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
2058 [logical_id.as_str()],
2059 |row| row.get::<_, i64>(0),
2060 )
2061 .optional()?
2062 .is_some();
2063 if !has_active_node {
2064 tx.execute(
2065 "DELETE FROM node_access_metadata WHERE logical_id = ?1",
2066 [logical_id.as_str()],
2067 )?;
2068 }
2069 }
2070
2071 rebuild_operational_current_rows(&tx, &affected_operational_collections)?;
2072
2073 tx.execute("DELETE FROM fts_nodes", [])?;
2076 tx.execute(
2077 r"
2078 INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content)
2079 SELECT c.id, n.logical_id, n.kind, c.text_content
2080 FROM chunks c
2081 JOIN nodes n
2082 ON n.logical_id = c.node_logical_id
2083 AND n.superseded_at IS NULL
2084 ",
2085 [],
2086 )?;
2087
2088 tx.execute(
2092 "INSERT INTO provenance_events (id, event_type, subject, source_ref) \
2093 VALUES (?1, 'excise_source', ?2, ?2)",
2094 rusqlite::params![new_id(), source_ref],
2095 )?;
2096
2097 tx.commit()?;
2098
2099 self.trace_source(source_ref)
2100 }
2101
2102 pub fn safe_export(
2106 &self,
2107 destination_path: impl AsRef<Path>,
2108 options: SafeExportOptions,
2109 ) -> Result<SafeExportManifest, EngineError> {
2110 let destination_path = destination_path.as_ref();
2111
2112 let conn = self.connect()?;
2116
2117 if options.force_checkpoint {
2118 trace_info!("safe_export: wal checkpoint started");
2119 let (busy, log, checkpointed): (i64, i64, i64) =
2120 conn.query_row("PRAGMA wal_checkpoint(FULL)", [], |row| {
2121 Ok((row.get(0)?, row.get(1)?, row.get(2)?))
2122 })?;
2123 if busy != 0 {
2124 trace_warn!(
2125 busy,
2126 log_frames = log,
2127 checkpointed_frames = checkpointed,
2128 "safe_export: wal checkpoint blocked by active readers"
2129 );
2130 return Err(EngineError::Bridge(format!(
2131 "WAL checkpoint blocked: {busy} active reader(s) prevented a full checkpoint; \
2132 log frames={log}, checkpointed={checkpointed}; \
2133 retry export when no readers are active"
2134 )));
2135 }
2136 trace_info!(
2137 log_frames = log,
2138 checkpointed_frames = checkpointed,
2139 "safe_export: wal checkpoint completed"
2140 );
2141 }
2142
2143 let schema_version: u32 = conn
2144 .query_row(
2145 "SELECT COALESCE(MAX(version), 0) FROM fathom_schema_migrations",
2146 [],
2147 |row| row.get(0),
2148 )
2149 .unwrap_or(0);
2150
2151 if let Some(parent) = destination_path.parent() {
2154 fs::create_dir_all(parent)?;
2155 }
2156 conn.backup(DatabaseName::Main, destination_path, None)?;
2157
2158 drop(conn);
2159
2160 let page_count: u64 = {
2164 let export_conn = rusqlite::Connection::open_with_flags(
2165 destination_path,
2166 rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY
2167 | rusqlite::OpenFlags::SQLITE_OPEN_NO_MUTEX,
2168 )?;
2169 export_conn.query_row("PRAGMA page_count", [], |row| row.get(0))?
2170 };
2171
2172 let sha256 = {
2175 let mut file = fs::File::open(destination_path)?;
2176 let mut hasher = Sha256::new();
2177 io::copy(&mut file, &mut hasher)?;
2178 format!("{:x}", hasher.finalize())
2179 };
2180
2181 let exported_at = SystemTime::now()
2183 .duration_since(SystemTime::UNIX_EPOCH)
2184 .map_err(|e| EngineError::Bridge(format!("system clock error: {e}")))?
2185 .as_secs();
2186
2187 let manifest = SafeExportManifest {
2188 exported_at,
2189 sha256,
2190 schema_version,
2191 protocol_version: EXPORT_PROTOCOL_VERSION,
2192 page_count,
2193 };
2194
2195 let manifest_path = {
2197 let mut p = destination_path.to_path_buf();
2198 let stem = p
2199 .file_name()
2200 .map(|n| format!("{}.export-manifest.json", n.to_string_lossy()))
2201 .ok_or_else(|| {
2202 EngineError::Bridge("destination path has no filename".to_owned())
2203 })?;
2204 p.set_file_name(stem);
2205 p
2206 };
2207 let manifest_json =
2208 serde_json::to_string(&manifest).map_err(|e| EngineError::Bridge(e.to_string()))?;
2209
2210 let manifest_tmp = manifest_path.with_extension("json.tmp");
2213 if let Err(e) = fs::write(&manifest_tmp, &manifest_json)
2214 .and_then(|()| fs::rename(&manifest_tmp, &manifest_path))
2215 {
2216 let _ = fs::remove_file(&manifest_tmp);
2217 return Err(e.into());
2218 }
2219
2220 Ok(manifest)
2221 }
2222}
2223
2224#[allow(dead_code)]
2225#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
2226struct VectorEmbeddingContractRecord {
2227 profile: String,
2228 table_name: String,
2229 model_identity: String,
2230 model_version: String,
2231 dimension: usize,
2232 normalization_policy: String,
2233 chunking_policy: String,
2234 preprocessing_policy: String,
2235 generator_command_json: String,
2236 applied_at: i64,
2237 snapshot_hash: String,
2238 contract_format_version: i64,
2239}
2240
2241#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2242struct VectorRegenerationInputChunk {
2243 chunk_id: String,
2244 node_logical_id: String,
2245 kind: String,
2246 text_content: String,
2247 byte_start: Option<i64>,
2248 byte_end: Option<i64>,
2249 source_ref: Option<String>,
2250 created_at: i64,
2251}
2252
2253#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2254struct VectorRegenerationInput {
2255 profile: String,
2256 table_name: String,
2257 model_identity: String,
2258 model_version: String,
2259 dimension: usize,
2260 normalization_policy: String,
2261 chunking_policy: String,
2262 preprocessing_policy: String,
2263 chunks: Vec<VectorRegenerationInputChunk>,
2264}
2265
2266#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2267struct GeneratedEmbedding {
2268 chunk_id: String,
2269 embedding: Vec<f32>,
2270}
2271
2272#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
2273struct GeneratedEmbeddings {
2274 embeddings: Vec<GeneratedEmbedding>,
2275}
2276
2277#[derive(Clone, Copy, Debug, PartialEq, Eq)]
2278pub(crate) enum VectorRegenerationFailureClass {
2279 InvalidContract,
2280 PayloadTooLarge,
2281 GeneratorTimeout,
2282 GeneratorStdoutOverflow,
2283 GeneratorStderrOverflow,
2284 GeneratorNonzeroExit,
2285 MalformedGeneratorJson,
2286 SnapshotDrift,
2287 UnsupportedVecCapability,
2288}
2289
2290impl VectorRegenerationFailureClass {
2291 fn label(self) -> &'static str {
2292 match self {
2293 Self::InvalidContract => "invalid contract",
2294 Self::PayloadTooLarge => "payload too large",
2295 Self::GeneratorTimeout => "generator timeout",
2296 Self::GeneratorStdoutOverflow => "generator stdout overflow",
2297 Self::GeneratorStderrOverflow => "generator stderr overflow",
2298 Self::GeneratorNonzeroExit => "generator nonzero exit",
2299 Self::MalformedGeneratorJson => "malformed generator json",
2300 Self::SnapshotDrift => "snapshot drift",
2301 Self::UnsupportedVecCapability => "unsupported vec capability",
2302 }
2303 }
2304
2305 fn retryable(self) -> bool {
2306 matches!(self, Self::SnapshotDrift)
2307 }
2308}
2309
2310#[derive(Clone, Debug, PartialEq, Eq)]
2311pub(crate) struct VectorRegenerationFailure {
2312 class: VectorRegenerationFailureClass,
2313 detail: String,
2314}
2315
2316impl VectorRegenerationFailure {
2317 pub(crate) fn new(class: VectorRegenerationFailureClass, detail: impl Into<String>) -> Self {
2318 Self {
2319 class,
2320 detail: detail.into(),
2321 }
2322 }
2323
2324 fn to_engine_error(&self) -> EngineError {
2325 let retry_suffix = if self.class.retryable() {
2326 " [retryable]"
2327 } else {
2328 ""
2329 };
2330 EngineError::Bridge(format!(
2331 "vector regeneration {}: {}{}",
2332 self.class.label(),
2333 self.detail,
2334 retry_suffix
2335 ))
2336 }
2337
2338 fn failure_class_label(&self) -> &'static str {
2339 self.class.label()
2340 }
2341}
2342
2343#[derive(Clone, Debug, PartialEq, Eq, Serialize)]
2344struct VectorRegenerationAuditMetadata {
2345 profile: String,
2346 model_identity: String,
2347 model_version: String,
2348 chunk_count: usize,
2349 snapshot_hash: String,
2350 #[serde(skip_serializing_if = "Option::is_none")]
2351 failure_class: Option<String>,
2352}
2353
2354#[derive(Clone, Copy, Debug, PartialEq, Eq, Deserialize)]
2355#[serde(tag = "mode", rename_all = "snake_case")]
2356enum OperationalRetentionPolicy {
2357 KeepAll,
2358 PurgeBeforeSeconds { max_age_seconds: i64 },
2359 KeepLast { max_rows: usize },
2360}
2361
2362pub fn load_vector_regeneration_config(
2365 path: impl AsRef<Path>,
2366) -> Result<VectorRegenerationConfig, EngineError> {
2367 let path = path.as_ref();
2368 let raw = fs::read_to_string(path)?;
2369 match path.extension().and_then(|ext| ext.to_str()) {
2370 Some("toml") => {
2371 toml::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2372 }
2373 Some("json") | None => {
2374 serde_json::from_str(&raw).map_err(|error| EngineError::Bridge(error.to_string()))
2375 }
2376 Some(other) => Err(EngineError::Bridge(format!(
2377 "unsupported vector regeneration config extension: {other}"
2378 ))),
2379 }
2380}
2381
2382fn validate_vector_regeneration_config(
2383 conn: &rusqlite::Connection,
2384 config: &VectorRegenerationConfig,
2385 policy: &VectorGeneratorPolicy,
2386) -> Result<VectorRegenerationConfig, VectorRegenerationFailure> {
2387 let profile = validate_bounded_text("profile", &config.profile, MAX_PROFILE_LEN)?;
2388 let table_name = validate_bounded_text("table_name", &config.table_name, MAX_PROFILE_LEN)?;
2389 if table_name != "vec_nodes_active" {
2390 return Err(VectorRegenerationFailure::new(
2391 VectorRegenerationFailureClass::InvalidContract,
2392 format!("table_name must be vec_nodes_active, got '{table_name}'"),
2393 ));
2394 }
2395 let model_identity = validate_bounded_text(
2396 "model_identity",
2397 &config.model_identity,
2398 MAX_MODEL_IDENTITY_LEN,
2399 )?;
2400 let model_version = validate_bounded_text(
2401 "model_version",
2402 &config.model_version,
2403 MAX_MODEL_VERSION_LEN,
2404 )?;
2405 if config.dimension == 0 {
2406 return Err(VectorRegenerationFailure::new(
2407 VectorRegenerationFailureClass::InvalidContract,
2408 "dimension must be greater than zero".to_owned(),
2409 ));
2410 }
2411 let normalization_policy = validate_bounded_text(
2412 "normalization_policy",
2413 &config.normalization_policy,
2414 MAX_POLICY_LEN,
2415 )?;
2416 let chunking_policy =
2417 validate_bounded_text("chunking_policy", &config.chunking_policy, MAX_POLICY_LEN)?;
2418 let preprocessing_policy = validate_bounded_text(
2419 "preprocessing_policy",
2420 &config.preprocessing_policy,
2421 MAX_POLICY_LEN,
2422 )?;
2423 let generator_command = validate_generator_command(&config.generator_command, policy)?;
2424
2425 if let Some(existing_dimension) = current_vector_profile_dimension(conn, &profile)?
2426 && existing_dimension != config.dimension
2427 {
2428 return Err(VectorRegenerationFailure::new(
2429 VectorRegenerationFailureClass::InvalidContract,
2430 format!(
2431 "dimension {} does not match existing vector profile dimension {}",
2432 config.dimension, existing_dimension
2433 ),
2434 ));
2435 }
2436
2437 validate_existing_contract_version(conn, &profile)?;
2438
2439 let normalized = VectorRegenerationConfig {
2440 profile,
2441 table_name,
2442 model_identity,
2443 model_version,
2444 dimension: config.dimension,
2445 normalization_policy,
2446 chunking_policy,
2447 preprocessing_policy,
2448 generator_command,
2449 };
2450 let serialized = serde_json::to_vec(&normalized).map_err(|error| {
2451 VectorRegenerationFailure::new(
2452 VectorRegenerationFailureClass::InvalidContract,
2453 error.to_string(),
2454 )
2455 })?;
2456 if serialized.len() > MAX_CONTRACT_JSON_BYTES {
2457 return Err(VectorRegenerationFailure::new(
2458 VectorRegenerationFailureClass::InvalidContract,
2459 format!("serialized contract exceeds {MAX_CONTRACT_JSON_BYTES} bytes"),
2460 ));
2461 }
2462
2463 Ok(normalized)
2464}
2465
2466#[allow(clippy::cast_possible_wrap)]
2467fn persist_vector_contract(
2468 conn: &rusqlite::Connection,
2469 config: &VectorRegenerationConfig,
2470 snapshot_hash: &str,
2471) -> Result<(), EngineError> {
2472 let generator_command_json = serde_json::to_string(&config.generator_command)
2473 .map_err(|error| EngineError::Bridge(error.to_string()))?;
2474 conn.execute(
2475 r"
2476 INSERT OR REPLACE INTO vector_embedding_contracts (
2477 profile,
2478 table_name,
2479 model_identity,
2480 model_version,
2481 dimension,
2482 normalization_policy,
2483 chunking_policy,
2484 preprocessing_policy,
2485 generator_command_json,
2486 applied_at,
2487 snapshot_hash,
2488 contract_format_version,
2489 updated_at
2490 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, unixepoch(), ?10, ?11, unixepoch())
2491 ",
2492 rusqlite::params![
2493 config.profile.as_str(),
2494 config.table_name.as_str(),
2495 config.model_identity.as_str(),
2496 config.model_version.as_str(),
2497 config.dimension as i64,
2498 config.normalization_policy.as_str(),
2499 config.chunking_policy.as_str(),
2500 config.preprocessing_policy.as_str(),
2501 generator_command_json,
2502 snapshot_hash,
2503 CURRENT_VECTOR_CONTRACT_FORMAT_VERSION,
2504 ],
2505 )?;
2506 Ok(())
2507}
2508
2509fn persist_vector_regeneration_event(
2510 conn: &rusqlite::Connection,
2511 event_type: &str,
2512 subject: &str,
2513 metadata: &VectorRegenerationAuditMetadata,
2514) -> Result<(), EngineError> {
2515 let metadata_json = serialize_audit_metadata(metadata)?;
2516 conn.execute(
2517 "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2518 rusqlite::params![new_id(), event_type, subject, metadata_json],
2519 )?;
2520 Ok(())
2521}
2522
2523fn persist_simple_provenance_event(
2524 conn: &rusqlite::Connection,
2525 event_type: &str,
2526 subject: &str,
2527 metadata: Option<serde_json::Value>,
2528) -> Result<(), EngineError> {
2529 let metadata_json = metadata.map(|value| value.to_string()).unwrap_or_default();
2530 conn.execute(
2531 "INSERT INTO provenance_events (id, event_type, subject, metadata_json) VALUES (?1, ?2, ?3, ?4)",
2532 rusqlite::params![new_id(), event_type, subject, metadata_json],
2533 )?;
2534 Ok(())
2535}
2536
2537fn build_regeneration_input(
2538 config: &VectorRegenerationConfig,
2539 chunks: Vec<VectorRegenerationInputChunk>,
2540) -> VectorRegenerationInput {
2541 VectorRegenerationInput {
2542 profile: config.profile.clone(),
2543 table_name: config.table_name.clone(),
2544 model_identity: config.model_identity.clone(),
2545 model_version: config.model_version.clone(),
2546 dimension: config.dimension,
2547 normalization_policy: config.normalization_policy.clone(),
2548 chunking_policy: config.chunking_policy.clone(),
2549 preprocessing_policy: config.preprocessing_policy.clone(),
2550 chunks,
2551 }
2552}
2553
2554fn compute_snapshot_hash(payload: &VectorRegenerationInput) -> Result<String, EngineError> {
2555 let bytes =
2556 serde_json::to_vec(payload).map_err(|error| EngineError::Bridge(error.to_string()))?;
2557 let mut hasher = Sha256::new();
2558 hasher.update(bytes);
2559 Ok(format!("{:x}", hasher.finalize()))
2560}
2561
2562fn collect_regeneration_chunks(
2563 conn: &rusqlite::Connection,
2564) -> Result<Vec<VectorRegenerationInputChunk>, EngineError> {
2565 let mut stmt = conn.prepare(
2566 r"
2567 SELECT c.id, c.node_logical_id, n.kind, c.text_content, c.byte_start, c.byte_end, n.source_ref, c.created_at
2568 FROM chunks c
2569 JOIN nodes n
2570 ON n.logical_id = c.node_logical_id
2571 AND n.superseded_at IS NULL
2572 ORDER BY c.created_at, c.id
2573 ",
2574 )?;
2575 let chunks = stmt
2576 .query_map([], |row| {
2577 Ok(VectorRegenerationInputChunk {
2578 chunk_id: row.get(0)?,
2579 node_logical_id: row.get(1)?,
2580 kind: row.get(2)?,
2581 text_content: row.get(3)?,
2582 byte_start: row.get(4)?,
2583 byte_end: row.get(5)?,
2584 source_ref: row.get(6)?,
2585 created_at: row.get(7)?,
2586 })
2587 })?
2588 .collect::<Result<Vec<_>, _>>()?;
2589 Ok(chunks)
2590}
2591
2592fn validate_generated_embeddings(
2593 config: &VectorRegenerationConfig,
2594 chunks: &[VectorRegenerationInputChunk],
2595 generated: GeneratedEmbeddings,
2596) -> Result<std::collections::HashMap<String, Vec<u8>>, VectorRegenerationFailure> {
2597 if generated.embeddings.len() != chunks.len() {
2598 return Err(VectorRegenerationFailure::new(
2599 VectorRegenerationFailureClass::MalformedGeneratorJson,
2600 format!(
2601 "generator returned {} embedding(s) for {} chunk(s)",
2602 generated.embeddings.len(),
2603 chunks.len()
2604 ),
2605 ));
2606 }
2607
2608 let mut embedding_map = std::collections::HashMap::new();
2609 for embedding in generated.embeddings {
2610 if embedding.embedding.len() != config.dimension {
2611 return Err(VectorRegenerationFailure::new(
2612 VectorRegenerationFailureClass::MalformedGeneratorJson,
2613 format!(
2614 "embedding for chunk '{}' has dimension {}, expected {}",
2615 embedding.chunk_id,
2616 embedding.embedding.len(),
2617 config.dimension
2618 ),
2619 ));
2620 }
2621 if embedding.embedding.iter().any(|value| !value.is_finite()) {
2622 return Err(VectorRegenerationFailure::new(
2623 VectorRegenerationFailureClass::MalformedGeneratorJson,
2624 format!(
2625 "embedding for chunk '{}' contains non-finite values",
2626 embedding.chunk_id
2627 ),
2628 ));
2629 }
2630 let bytes: Vec<u8> = embedding
2631 .embedding
2632 .iter()
2633 .flat_map(|value| value.to_le_bytes())
2634 .collect();
2635 if embedding_map
2636 .insert(embedding.chunk_id.clone(), bytes)
2637 .is_some()
2638 {
2639 return Err(VectorRegenerationFailure::new(
2640 VectorRegenerationFailureClass::MalformedGeneratorJson,
2641 format!(
2642 "duplicate embedding returned for chunk '{}'",
2643 embedding.chunk_id
2644 ),
2645 ));
2646 }
2647 }
2648
2649 Ok(embedding_map)
2650}
2651
2652fn generator_policy_notes(policy: &VectorGeneratorPolicy) -> Vec<String> {
2653 let mut notes = vec!["vector embeddings regenerated from application contract".to_owned()];
2654 if !policy.allowed_executable_roots.is_empty() {
2655 notes.push("generator executable roots enforced by operator policy".to_owned());
2656 }
2657 if !policy.preserve_env_vars.is_empty() {
2658 notes.push("generator environment reduced to preserved variables".to_owned());
2659 }
2660 notes
2661}
2662
2663enum GeneratorStream {
2664 Stdout,
2665 Stderr,
2666}
2667
2668enum StreamReadResult {
2669 Complete(Vec<u8>),
2670 Overflow,
2671 Io(io::Error),
2672}
2673
2674fn validate_bounded_text(
2675 field: &str,
2676 value: &str,
2677 max_len: usize,
2678) -> Result<String, VectorRegenerationFailure> {
2679 let trimmed = value.trim();
2680 if trimmed.is_empty() {
2681 return Err(VectorRegenerationFailure::new(
2682 VectorRegenerationFailureClass::InvalidContract,
2683 format!("{field} must not be empty"),
2684 ));
2685 }
2686 if trimmed.len() > max_len {
2687 return Err(VectorRegenerationFailure::new(
2688 VectorRegenerationFailureClass::InvalidContract,
2689 format!("{field} exceeds max length {max_len}"),
2690 ));
2691 }
2692 Ok(trimmed.to_owned())
2693}
2694
2695fn validate_generator_command(
2696 command: &[String],
2697 policy: &VectorGeneratorPolicy,
2698) -> Result<Vec<String>, VectorRegenerationFailure> {
2699 if command.is_empty() {
2700 return Err(VectorRegenerationFailure::new(
2701 VectorRegenerationFailureClass::InvalidContract,
2702 "generator_command must contain at least one element".to_owned(),
2703 ));
2704 }
2705 let mut total_len = 0usize;
2706 for argument in command {
2707 if argument.is_empty() {
2708 return Err(VectorRegenerationFailure::new(
2709 VectorRegenerationFailureClass::InvalidContract,
2710 "generator_command entries must not be empty".to_owned(),
2711 ));
2712 }
2713 if argument.len() > MAX_GENERATOR_COMMAND_ARG_LEN {
2714 return Err(VectorRegenerationFailure::new(
2715 VectorRegenerationFailureClass::InvalidContract,
2716 format!(
2717 "generator_command argument exceeds max length {MAX_GENERATOR_COMMAND_ARG_LEN}"
2718 ),
2719 ));
2720 }
2721 total_len += argument.len();
2722 }
2723 if total_len > MAX_GENERATOR_COMMAND_TOTAL_LEN {
2724 return Err(VectorRegenerationFailure::new(
2725 VectorRegenerationFailureClass::InvalidContract,
2726 format!(
2727 "generator_command exceeds max serialized length {MAX_GENERATOR_COMMAND_TOTAL_LEN}"
2728 ),
2729 ));
2730 }
2731 executable_trust::validate_generator_executable(&command[0], policy)?;
2732 Ok(command.to_vec())
2733}
2734
2735fn current_vector_profile_dimension(
2736 conn: &rusqlite::Connection,
2737 profile: &str,
2738) -> Result<Option<usize>, VectorRegenerationFailure> {
2739 let dimension: Option<i64> = conn
2740 .query_row(
2741 "SELECT dimension FROM vector_profiles WHERE profile = ?1 AND enabled = 1",
2742 [profile],
2743 |row| row.get(0),
2744 )
2745 .optional()
2746 .map_err(|error| {
2747 VectorRegenerationFailure::new(
2748 VectorRegenerationFailureClass::InvalidContract,
2749 error.to_string(),
2750 )
2751 })?;
2752 dimension
2753 .map(|value| {
2754 usize::try_from(value).map_err(|_| {
2755 VectorRegenerationFailure::new(
2756 VectorRegenerationFailureClass::InvalidContract,
2757 format!("stored vector profile dimension is invalid: {value}"),
2758 )
2759 })
2760 })
2761 .transpose()
2762}
2763
2764fn validate_existing_contract_version(
2765 conn: &rusqlite::Connection,
2766 profile: &str,
2767) -> Result<(), VectorRegenerationFailure> {
2768 let version: Option<i64> = conn
2769 .query_row(
2770 "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = ?1",
2771 [profile],
2772 |row| row.get(0),
2773 )
2774 .optional()
2775 .map_err(|error| {
2776 VectorRegenerationFailure::new(
2777 VectorRegenerationFailureClass::InvalidContract,
2778 error.to_string(),
2779 )
2780 })?;
2781 if let Some(version) = version
2782 && version > CURRENT_VECTOR_CONTRACT_FORMAT_VERSION
2783 {
2784 return Err(VectorRegenerationFailure::new(
2785 VectorRegenerationFailureClass::InvalidContract,
2786 format!(
2787 "persisted contract format version {version} is unsupported; supported version is {CURRENT_VECTOR_CONTRACT_FORMAT_VERSION}"
2788 ),
2789 ));
2790 }
2791 Ok(())
2792}
2793
2794fn serialize_audit_metadata(
2795 metadata: &VectorRegenerationAuditMetadata,
2796) -> Result<String, EngineError> {
2797 let json =
2798 serde_json::to_string(metadata).map_err(|error| EngineError::Bridge(error.to_string()))?;
2799 if json.len() > MAX_AUDIT_METADATA_BYTES {
2800 return Err(VectorRegenerationFailure::new(
2801 VectorRegenerationFailureClass::InvalidContract,
2802 format!("audit metadata exceeds {MAX_AUDIT_METADATA_BYTES} bytes"),
2803 )
2804 .to_engine_error());
2805 }
2806 Ok(json)
2807}
2808
2809#[allow(clippy::too_many_lines)]
2810fn run_vector_generator_bounded(
2811 config: &VectorRegenerationConfig,
2812 payload: &VectorRegenerationInput,
2813 policy: &VectorGeneratorPolicy,
2814) -> Result<GeneratedEmbeddings, VectorRegenerationFailure> {
2815 if payload.chunks.len() > policy.max_chunks {
2816 return Err(VectorRegenerationFailure::new(
2817 VectorRegenerationFailureClass::PayloadTooLarge,
2818 format!(
2819 "chunk count {} exceeds max_chunks {}",
2820 payload.chunks.len(),
2821 policy.max_chunks
2822 ),
2823 ));
2824 }
2825
2826 let input = serde_json::to_vec(payload).map_err(|error| {
2827 VectorRegenerationFailure::new(
2828 VectorRegenerationFailureClass::MalformedGeneratorJson,
2829 error.to_string(),
2830 )
2831 })?;
2832 if input.len() > policy.max_input_bytes {
2833 return Err(VectorRegenerationFailure::new(
2834 VectorRegenerationFailureClass::PayloadTooLarge,
2835 format!(
2836 "serialized input {} bytes exceeds max_input_bytes {}",
2837 input.len(),
2838 policy.max_input_bytes
2839 ),
2840 ));
2841 }
2842
2843 let mut command = Command::new(config.generator_command.first().ok_or_else(|| {
2844 VectorRegenerationFailure::new(
2845 VectorRegenerationFailureClass::InvalidContract,
2846 "missing generator executable",
2847 )
2848 })?);
2849 command.args(config.generator_command.iter().skip(1));
2850 command.stdin(Stdio::piped());
2851 command.stdout(Stdio::piped());
2852 command.stderr(Stdio::piped());
2853 command.env_clear();
2854 for env_var in &policy.preserve_env_vars {
2855 if let Some(value) = std::env::var_os(env_var) {
2856 command.env(env_var, value);
2857 }
2858 }
2859
2860 let mut child = command.spawn().map_err(|error| {
2861 VectorRegenerationFailure::new(
2862 VectorRegenerationFailureClass::GeneratorNonzeroExit,
2863 format!("failed to spawn generator: {error}"),
2864 )
2865 })?;
2866 if let Some(mut stdin) = child.stdin.take() {
2867 stdin.write_all(&input).map_err(|error| {
2868 VectorRegenerationFailure::new(
2869 VectorRegenerationFailureClass::GeneratorNonzeroExit,
2870 format!("failed to write generator stdin: {error}"),
2871 )
2872 })?;
2873 } else {
2874 return Err(VectorRegenerationFailure::new(
2875 VectorRegenerationFailureClass::GeneratorNonzeroExit,
2876 "failed to open generator stdin",
2877 ));
2878 }
2879
2880 let stdout = child.stdout.take().ok_or_else(|| {
2881 VectorRegenerationFailure::new(
2882 VectorRegenerationFailureClass::GeneratorNonzeroExit,
2883 "failed to open generator stdout",
2884 )
2885 })?;
2886 let stderr = child.stderr.take().ok_or_else(|| {
2887 VectorRegenerationFailure::new(
2888 VectorRegenerationFailureClass::GeneratorNonzeroExit,
2889 "failed to open generator stderr",
2890 )
2891 })?;
2892
2893 let (tx, rx) = mpsc::channel();
2894 let stdout_handle = spawn_capped_reader(
2895 stdout,
2896 policy.max_stdout_bytes,
2897 GeneratorStream::Stdout,
2898 tx.clone(),
2899 );
2900 let stderr_handle =
2901 spawn_capped_reader(stderr, policy.max_stderr_bytes, GeneratorStream::Stderr, tx);
2902
2903 let start = Instant::now();
2904 let timeout = Duration::from_millis(policy.timeout_ms);
2905 let mut stdout_bytes: Option<Vec<u8>> = None;
2906 let mut stderr_bytes: Option<Vec<u8>> = None;
2907 let mut status = None;
2908 let mut stream_error: Option<VectorRegenerationFailure> = None;
2909
2910 while status.is_none() && stream_error.is_none() {
2911 while let Ok((stream, result)) = rx.try_recv() {
2912 match (stream, result) {
2913 (GeneratorStream::Stdout, StreamReadResult::Complete(bytes)) => {
2914 stdout_bytes = Some(bytes);
2915 }
2916 (GeneratorStream::Stderr, StreamReadResult::Complete(bytes)) => {
2917 stderr_bytes = Some(bytes);
2918 }
2919 (GeneratorStream::Stdout, StreamReadResult::Overflow) => {
2920 stream_error = Some(VectorRegenerationFailure::new(
2921 VectorRegenerationFailureClass::GeneratorStdoutOverflow,
2922 format!(
2923 "stdout exceeded max_stdout_bytes {}",
2924 policy.max_stdout_bytes
2925 ),
2926 ));
2927 }
2928 (GeneratorStream::Stderr, StreamReadResult::Overflow) => {
2929 stream_error = Some(VectorRegenerationFailure::new(
2930 VectorRegenerationFailureClass::GeneratorStderrOverflow,
2931 format!(
2932 "stderr exceeded max_stderr_bytes {}",
2933 policy.max_stderr_bytes
2934 ),
2935 ));
2936 }
2937 (_, StreamReadResult::Io(error)) => {
2938 stream_error = Some(VectorRegenerationFailure::new(
2939 VectorRegenerationFailureClass::GeneratorNonzeroExit,
2940 format!("failed to read generator stream: {error}"),
2941 ));
2942 }
2943 }
2944 }
2945
2946 if stream_error.is_some() {
2947 let _ = child.kill();
2948 break;
2949 }
2950 if start.elapsed() > timeout {
2951 let _ = child.kill();
2952 stream_error = Some(VectorRegenerationFailure::new(
2953 VectorRegenerationFailureClass::GeneratorTimeout,
2954 format!("generator exceeded timeout after {}ms", policy.timeout_ms),
2955 ));
2956 break;
2957 }
2958 status = child.try_wait().map_err(|error| {
2959 VectorRegenerationFailure::new(
2960 VectorRegenerationFailureClass::GeneratorNonzeroExit,
2961 format!("failed to poll generator status: {error}"),
2962 )
2963 })?;
2964 if status.is_none() {
2965 thread::sleep(Duration::from_millis(10));
2966 }
2967 }
2968
2969 let _ = child.wait();
2970 let _ = stdout_handle.join();
2971 let _ = stderr_handle.join();
2972
2973 while let Ok((stream, result)) = rx.try_recv() {
2974 match (stream, result) {
2975 (GeneratorStream::Stdout, StreamReadResult::Complete(bytes)) => {
2976 stdout_bytes = Some(bytes);
2977 }
2978 (GeneratorStream::Stderr, StreamReadResult::Complete(bytes)) => {
2979 stderr_bytes = Some(bytes);
2980 }
2981 (GeneratorStream::Stdout, StreamReadResult::Overflow) => {
2982 stream_error = Some(VectorRegenerationFailure::new(
2983 VectorRegenerationFailureClass::GeneratorStdoutOverflow,
2984 format!(
2985 "stdout exceeded max_stdout_bytes {}",
2986 policy.max_stdout_bytes
2987 ),
2988 ));
2989 }
2990 (GeneratorStream::Stderr, StreamReadResult::Overflow) => {
2991 stream_error = Some(VectorRegenerationFailure::new(
2992 VectorRegenerationFailureClass::GeneratorStderrOverflow,
2993 format!(
2994 "stderr exceeded max_stderr_bytes {}",
2995 policy.max_stderr_bytes
2996 ),
2997 ));
2998 }
2999 (_, StreamReadResult::Io(error)) => {
3000 stream_error = Some(VectorRegenerationFailure::new(
3001 VectorRegenerationFailureClass::GeneratorNonzeroExit,
3002 format!("failed to read generator stream: {error}"),
3003 ));
3004 }
3005 }
3006 }
3007
3008 if let Some(error) = stream_error {
3009 return Err(error);
3010 }
3011
3012 let status = status.ok_or_else(|| {
3013 VectorRegenerationFailure::new(
3014 VectorRegenerationFailureClass::GeneratorNonzeroExit,
3015 "vector generator exited without a status",
3016 )
3017 })?;
3018 if !status.success() {
3019 let stderr =
3020 truncate_error_text(&stderr_bytes.unwrap_or_default(), policy.max_stderr_bytes);
3021 return Err(VectorRegenerationFailure::new(
3022 VectorRegenerationFailureClass::GeneratorNonzeroExit,
3023 stderr,
3024 ));
3025 }
3026
3027 let stdout = stdout_bytes.unwrap_or_default();
3028 serde_json::from_slice(&stdout).map_err(|error| {
3029 VectorRegenerationFailure::new(
3030 VectorRegenerationFailureClass::MalformedGeneratorJson,
3031 format!("decode generator output: {error}"),
3032 )
3033 })
3034}
3035
3036fn spawn_capped_reader<R: Read + Send + 'static>(
3037 mut reader: R,
3038 max_bytes: usize,
3039 stream: GeneratorStream,
3040 tx: mpsc::Sender<(GeneratorStream, StreamReadResult)>,
3041) -> thread::JoinHandle<()> {
3042 thread::spawn(move || {
3043 let mut buffer = Vec::new();
3044 let mut chunk = [0u8; 8192];
3045 loop {
3046 match reader.read(&mut chunk) {
3047 Ok(0) => {
3048 let _ = tx.send((stream, StreamReadResult::Complete(buffer)));
3049 break;
3050 }
3051 Ok(read_bytes) => {
3052 if buffer.len() + read_bytes > max_bytes {
3053 let _ = tx.send((stream, StreamReadResult::Overflow));
3054 break;
3055 }
3056 buffer.extend_from_slice(&chunk[..read_bytes]);
3057 }
3058 Err(error) => {
3059 let _ = tx.send((stream, StreamReadResult::Io(error)));
3060 break;
3061 }
3062 }
3063 }
3064 })
3065}
3066
3067fn truncate_error_text(bytes: &[u8], max_bytes: usize) -> String {
3068 let mut text = String::from_utf8_lossy(bytes).into_owned();
3069 if bytes.len() > max_bytes {
3070 text.push_str(" [truncated]");
3071 }
3072 text
3073}
3074
3075fn count_source_ref(
3076 conn: &rusqlite::Connection,
3077 table: &str,
3078 source_ref: &str,
3079) -> Result<usize, EngineError> {
3080 let sql = match table {
3081 "nodes" => "SELECT count(*) FROM nodes WHERE source_ref = ?1",
3082 "edges" => "SELECT count(*) FROM edges WHERE source_ref = ?1",
3083 "actions" => "SELECT count(*) FROM actions WHERE source_ref = ?1",
3084 "operational_mutations" => {
3085 "SELECT count(*) FROM operational_mutations WHERE source_ref = ?1"
3086 }
3087 other => return Err(EngineError::Bridge(format!("unknown table: {other}"))),
3088 };
3089 let count: i64 = conn.query_row(sql, [source_ref], |row| row.get(0))?;
3090 usize::try_from(count)
3093 .map_err(|_| EngineError::Bridge(format!("count overflow for table {table}: {count}")))
3094}
3095
3096fn rebuild_operational_current_rows(
3097 tx: &rusqlite::Transaction<'_>,
3098 collections: &[String],
3099) -> Result<usize, EngineError> {
3100 let mut rebuilt_rows = 0usize;
3101 clear_operational_current_rows(tx, collections)?;
3102 let mut ins_current = tx.prepare_cached(
3103 "INSERT INTO operational_current \
3104 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
3105 VALUES (?1, ?2, ?3, ?4, ?5)",
3106 )?;
3107
3108 for collection in collections {
3109 let mut stmt = tx.prepare(
3110 "SELECT id, collection_name, record_key, op_kind, payload_json, source_ref, created_at \
3111 FROM operational_mutations \
3112 WHERE collection_name = ?1 \
3113 ORDER BY record_key, mutation_order",
3114 )?;
3115 let mut latest_by_key: std::collections::HashMap<String, Option<(String, i64, String)>> =
3116 std::collections::HashMap::new();
3117 let rows = stmt.query_map([collection], map_operational_mutation_row)?;
3118 for row in rows {
3119 let mutation = row?;
3120 match mutation.op_kind.as_str() {
3121 "put" => {
3122 latest_by_key.insert(
3123 mutation.record_key,
3124 Some((mutation.payload_json, mutation.created_at, mutation.id)),
3125 );
3126 }
3127 "delete" => {
3128 latest_by_key.insert(mutation.record_key, None);
3129 }
3130 _ => {}
3131 }
3132 }
3133
3134 for (record_key, state) in latest_by_key {
3135 if let Some((payload_json, updated_at, last_mutation_id)) = state {
3136 ins_current.execute(rusqlite::params![
3137 collection,
3138 record_key,
3139 payload_json,
3140 updated_at,
3141 last_mutation_id,
3142 ])?;
3143 rebuilt_rows += 1;
3144 }
3145 }
3146 }
3147
3148 drop(ins_current);
3149 Ok(rebuilt_rows)
3150}
3151
3152fn clear_operational_current_rows(
3153 tx: &rusqlite::Transaction<'_>,
3154 collections: &[String],
3155) -> Result<(), EngineError> {
3156 let mut delete_current =
3157 tx.prepare_cached("DELETE FROM operational_current WHERE collection_name = ?1")?;
3158 let mut delete_secondary_current = tx.prepare_cached(
3159 "DELETE FROM operational_secondary_index_entries \
3160 WHERE collection_name = ?1 AND subject_kind = 'current'",
3161 )?;
3162 for collection in collections {
3163 delete_secondary_current.execute([collection])?;
3164 delete_current.execute([collection])?;
3165 }
3166 drop(delete_secondary_current);
3167 drop(delete_current);
3168 Ok(())
3169}
3170
3171fn clear_operational_secondary_index_entries(
3172 tx: &rusqlite::Transaction<'_>,
3173 collection_name: &str,
3174) -> Result<(), EngineError> {
3175 tx.execute(
3176 "DELETE FROM operational_secondary_index_entries WHERE collection_name = ?1",
3177 [collection_name],
3178 )?;
3179 Ok(())
3180}
3181
3182fn insert_operational_secondary_index_entry(
3183 tx: &rusqlite::Transaction<'_>,
3184 collection_name: &str,
3185 subject_kind: &str,
3186 mutation_id: &str,
3187 record_key: &str,
3188 entry: &crate::operational::OperationalSecondaryIndexEntry,
3189) -> Result<(), EngineError> {
3190 tx.execute(
3191 "INSERT INTO operational_secondary_index_entries \
3192 (collection_name, index_name, subject_kind, mutation_id, record_key, sort_timestamp, \
3193 slot1_text, slot1_integer, slot2_text, slot2_integer, slot3_text, slot3_integer) \
3194 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12)",
3195 rusqlite::params![
3196 collection_name,
3197 entry.index_name,
3198 subject_kind,
3199 mutation_id,
3200 record_key,
3201 entry.sort_timestamp,
3202 entry.slot1_text,
3203 entry.slot1_integer,
3204 entry.slot2_text,
3205 entry.slot2_integer,
3206 entry.slot3_text,
3207 entry.slot3_integer,
3208 ],
3209 )?;
3210 Ok(())
3211}
3212
3213fn rebuild_operational_secondary_index_entries(
3214 tx: &rusqlite::Transaction<'_>,
3215 collection_name: &str,
3216 collection_kind: OperationalCollectionKind,
3217 indexes: &[OperationalSecondaryIndexDefinition],
3218) -> Result<(usize, usize), EngineError> {
3219 clear_operational_secondary_index_entries(tx, collection_name)?;
3220
3221 let mut mutation_entries_rebuilt = 0usize;
3222 if collection_kind == OperationalCollectionKind::AppendOnlyLog {
3223 let mut stmt = tx.prepare(
3224 "SELECT id, record_key, payload_json FROM operational_mutations \
3225 WHERE collection_name = ?1 ORDER BY mutation_order",
3226 )?;
3227 let rows = stmt
3228 .query_map([collection_name], |row| {
3229 Ok((
3230 row.get::<_, String>(0)?,
3231 row.get::<_, String>(1)?,
3232 row.get::<_, String>(2)?,
3233 ))
3234 })?
3235 .collect::<Result<Vec<_>, _>>()?;
3236 drop(stmt);
3237 for (mutation_id, record_key, payload_json) in rows {
3238 for entry in extract_secondary_index_entries_for_mutation(indexes, &payload_json) {
3239 insert_operational_secondary_index_entry(
3240 tx,
3241 collection_name,
3242 "mutation",
3243 &mutation_id,
3244 &record_key,
3245 &entry,
3246 )?;
3247 mutation_entries_rebuilt += 1;
3248 }
3249 }
3250 }
3251
3252 let mut current_entries_rebuilt = 0usize;
3253 if collection_kind == OperationalCollectionKind::LatestState {
3254 let mut stmt = tx.prepare(
3255 "SELECT record_key, payload_json, updated_at, last_mutation_id FROM operational_current \
3256 WHERE collection_name = ?1 ORDER BY updated_at DESC, record_key",
3257 )?;
3258 let rows = stmt
3259 .query_map([collection_name], |row| {
3260 Ok((
3261 row.get::<_, String>(0)?,
3262 row.get::<_, String>(1)?,
3263 row.get::<_, i64>(2)?,
3264 row.get::<_, String>(3)?,
3265 ))
3266 })?
3267 .collect::<Result<Vec<_>, _>>()?;
3268 drop(stmt);
3269 for (record_key, payload_json, updated_at, last_mutation_id) in rows {
3270 for entry in
3271 extract_secondary_index_entries_for_current(indexes, &payload_json, updated_at)
3272 {
3273 insert_operational_secondary_index_entry(
3274 tx,
3275 collection_name,
3276 "current",
3277 &last_mutation_id,
3278 &record_key,
3279 &entry,
3280 )?;
3281 current_entries_rebuilt += 1;
3282 }
3283 }
3284 }
3285
3286 Ok((mutation_entries_rebuilt, current_entries_rebuilt))
3287}
3288
3289fn collect_strings_tx(
3290 tx: &rusqlite::Transaction<'_>,
3291 sql: &str,
3292 value: &str,
3293) -> Result<Vec<String>, EngineError> {
3294 let mut stmt = tx.prepare(sql)?;
3295 let rows = stmt.query_map([value], |row| row.get::<_, String>(0))?;
3296 rows.collect::<Result<Vec<_>, _>>()
3297 .map_err(EngineError::from)
3298}
3299
3300#[allow(clippy::expect_used)]
3303fn i64_to_usize(val: i64) -> usize {
3304 usize::try_from(val).expect("count(*) must be non-negative")
3305}
3306
3307fn collect_strings(
3314 conn: &rusqlite::Connection,
3315 sql: &str,
3316 param: &str,
3317) -> Result<Vec<String>, EngineError> {
3318 let mut stmt = conn.prepare(sql)?;
3319 let values = stmt
3320 .query_map([param], |row| row.get::<_, String>(0))?
3321 .collect::<Result<Vec<_>, _>>()?;
3322 Ok(values)
3323}
3324
3325fn collect_edge_logical_ids_for_restore(
3326 tx: &rusqlite::Transaction<'_>,
3327 logical_id: &str,
3328 retire_source_ref: Option<&str>,
3329 retire_created_at: i64,
3330 retire_event_rowid: i64,
3331) -> Result<Vec<String>, EngineError> {
3332 let mut stmt = tx.prepare(
3333 "SELECT DISTINCT e.logical_id \
3334 FROM edges e \
3335 JOIN provenance_events p \
3336 ON p.subject = e.logical_id \
3337 AND p.event_type = 'edge_retire' \
3338 AND ( \
3339 p.created_at > ?3 \
3340 OR (p.created_at = ?3 AND p.rowid >= ?4) \
3341 ) \
3342 AND ((?2 IS NULL AND p.source_ref IS NULL) OR p.source_ref = ?2) \
3343 WHERE e.superseded_at IS NOT NULL \
3344 AND (e.source_logical_id = ?1 OR e.target_logical_id = ?1) \
3345 AND NOT EXISTS ( \
3346 SELECT 1 FROM edges active \
3347 WHERE active.logical_id = e.logical_id \
3348 AND active.superseded_at IS NULL \
3349 ) \
3350 ORDER BY e.logical_id",
3351 )?;
3352 let edge_ids = stmt
3353 .query_map(
3354 rusqlite::params![
3355 logical_id,
3356 retire_source_ref,
3357 retire_created_at,
3358 retire_event_rowid
3359 ],
3360 |row| row.get::<_, String>(0),
3361 )?
3362 .collect::<Result<Vec<_>, _>>()?;
3363 Ok(edge_ids)
3364}
3365
3366fn restore_validated_edges(
3369 tx: &rusqlite::Transaction<'_>,
3370 logical_id: &str,
3371 retire_source_ref: Option<&str>,
3372 retire_created_at: i64,
3373 retire_event_rowid: i64,
3374) -> Result<(usize, Vec<SkippedEdge>), EngineError> {
3375 let edge_logical_ids = collect_edge_logical_ids_for_restore(
3376 tx,
3377 logical_id,
3378 retire_source_ref,
3379 retire_created_at,
3380 retire_event_rowid,
3381 )?;
3382 let mut restored = 0usize;
3383 let mut skipped = Vec::new();
3384 for edge_logical_id in &edge_logical_ids {
3385 let edge_detail: Option<(String, String, String)> = tx
3386 .query_row(
3387 "SELECT row_id, source_logical_id, target_logical_id FROM edges \
3388 WHERE logical_id = ?1 AND superseded_at IS NOT NULL \
3389 ORDER BY superseded_at DESC, created_at DESC, rowid DESC LIMIT 1",
3390 [edge_logical_id.as_str()],
3391 |row| Ok((row.get(0)?, row.get(1)?, row.get(2)?)),
3392 )
3393 .optional()?;
3394 let Some((edge_row_id, source_lid, target_lid)) = edge_detail else {
3395 continue;
3396 };
3397 let other_endpoint = if source_lid == logical_id {
3398 &target_lid
3399 } else {
3400 &source_lid
3401 };
3402 let endpoint_active: bool = tx
3403 .query_row(
3404 "SELECT 1 FROM nodes WHERE logical_id = ?1 AND superseded_at IS NULL LIMIT 1",
3405 [other_endpoint.as_str()],
3406 |_| Ok(true),
3407 )
3408 .optional()?
3409 .unwrap_or(false);
3410 if !endpoint_active {
3411 skipped.push(SkippedEdge {
3412 edge_logical_id: edge_logical_id.clone(),
3413 missing_endpoint: other_endpoint.clone(),
3414 });
3415 continue;
3416 }
3417 restored += tx.execute(
3418 "UPDATE edges SET superseded_at = NULL WHERE row_id = ?1",
3419 [edge_row_id.as_str()],
3420 )?;
3421 }
3422 Ok((restored, skipped))
3423}
3424
3425#[cfg(feature = "sqlite-vec")]
3426fn count_vec_rows_for_logical_id(
3427 tx: &rusqlite::Transaction<'_>,
3428 logical_id: &str,
3429) -> Result<usize, EngineError> {
3430 match tx.query_row(
3431 "SELECT count(*) FROM vec_nodes_active v \
3432 JOIN chunks c ON c.id = v.chunk_id \
3433 WHERE c.node_logical_id = ?1",
3434 [logical_id],
3435 |row| row.get::<_, i64>(0),
3436 ) {
3437 Ok(count) => Ok(i64_to_usize(count)),
3438 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3439 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3440 {
3441 Ok(0)
3442 }
3443 Err(error) => Err(EngineError::Sqlite(error)),
3444 }
3445}
3446
3447#[cfg(not(feature = "sqlite-vec"))]
3448#[allow(clippy::unnecessary_wraps)]
3449fn count_vec_rows_for_logical_id(
3450 _tx: &rusqlite::Transaction<'_>,
3451 _logical_id: &str,
3452) -> Result<usize, EngineError> {
3453 Ok(0)
3454}
3455
3456#[cfg(feature = "sqlite-vec")]
3457fn delete_vec_rows_for_logical_id(
3458 tx: &rusqlite::Transaction<'_>,
3459 logical_id: &str,
3460) -> Result<usize, EngineError> {
3461 match tx.execute(
3462 "DELETE FROM vec_nodes_active \
3463 WHERE chunk_id IN (SELECT id FROM chunks WHERE node_logical_id = ?1)",
3464 [logical_id],
3465 ) {
3466 Ok(count) => Ok(count),
3467 Err(rusqlite::Error::SqliteFailure(_, Some(ref msg)))
3468 if msg.contains("vec_nodes_active") || msg.contains("no such module: vec0") =>
3469 {
3470 Ok(0)
3471 }
3472 Err(error) => Err(EngineError::Sqlite(error)),
3473 }
3474}
3475
3476#[cfg(not(feature = "sqlite-vec"))]
3477#[allow(clippy::unnecessary_wraps)]
3478fn delete_vec_rows_for_logical_id(
3479 _tx: &rusqlite::Transaction<'_>,
3480 _logical_id: &str,
3481) -> Result<usize, EngineError> {
3482 Ok(0)
3483}
3484
3485fn ensure_operational_collection_registered(
3486 conn: &rusqlite::Connection,
3487 collection_name: &str,
3488) -> Result<(), EngineError> {
3489 if load_operational_collection_record(conn, collection_name)?.is_none() {
3490 return Err(EngineError::InvalidWrite(format!(
3491 "operational collection '{collection_name}' is not registered"
3492 )));
3493 }
3494 Ok(())
3495}
3496
3497fn load_operational_collection_record(
3498 conn: &rusqlite::Connection,
3499 name: &str,
3500) -> Result<Option<OperationalCollectionRecord>, EngineError> {
3501 conn.query_row(
3502 "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
3503 FROM operational_collections WHERE name = ?1",
3504 [name],
3505 map_operational_collection_row,
3506 )
3507 .optional()
3508 .map_err(EngineError::Sqlite)
3509}
3510
3511fn validate_append_only_operational_collection(
3512 record: &OperationalCollectionRecord,
3513 operation: &str,
3514) -> Result<(), EngineError> {
3515 if record.kind != OperationalCollectionKind::AppendOnlyLog {
3516 return Err(EngineError::InvalidWrite(format!(
3517 "operational collection '{}' must be append_only_log to {operation}",
3518 record.name
3519 )));
3520 }
3521 Ok(())
3522}
3523
3524#[derive(Clone, Debug, PartialEq, Eq)]
3525struct CompiledOperationalReadFilter {
3526 field: String,
3527 condition: OperationalReadCondition,
3528}
3529
3530#[derive(Clone, Debug)]
3531struct MatchedAppendOnlySecondaryIndexRead<'a> {
3532 index_name: &'a str,
3533 value_filter: &'a CompiledOperationalReadFilter,
3534 time_range: Option<&'a CompiledOperationalReadFilter>,
3535}
3536
3537#[derive(Clone, Debug, PartialEq, Eq)]
3538enum OperationalReadCondition {
3539 ExactString(String),
3540 ExactInteger(i64),
3541 Prefix(String),
3542 Range {
3543 lower: Option<i64>,
3544 upper: Option<i64>,
3545 },
3546}
3547
3548fn operational_read_limit(limit: Option<usize>) -> Result<usize, EngineError> {
3549 let applied_limit = limit.unwrap_or(DEFAULT_OPERATIONAL_READ_LIMIT);
3550 if applied_limit == 0 {
3551 return Err(EngineError::InvalidWrite(
3552 "operational read limit must be greater than zero".to_owned(),
3553 ));
3554 }
3555 Ok(applied_limit.min(MAX_OPERATIONAL_READ_LIMIT))
3556}
3557
3558fn parse_operational_filter_fields(
3559 filter_fields_json: &str,
3560) -> Result<Vec<OperationalFilterField>, String> {
3561 let fields: Vec<OperationalFilterField> = serde_json::from_str(filter_fields_json)
3562 .map_err(|error| format!("invalid filter_fields_json: {error}"))?;
3563 let mut seen = std::collections::HashSet::new();
3564 for field in &fields {
3565 if field.name.trim().is_empty() {
3566 return Err("filter_fields_json field names must not be empty".to_owned());
3567 }
3568 if !seen.insert(field.name.as_str()) {
3569 return Err(format!(
3570 "filter_fields_json contains duplicate field '{}'",
3571 field.name
3572 ));
3573 }
3574 if field.modes.is_empty() {
3575 return Err(format!(
3576 "filter_fields_json field '{}' must declare at least one mode",
3577 field.name
3578 ));
3579 }
3580 if field.modes.contains(&OperationalFilterMode::Prefix)
3581 && field.field_type != OperationalFilterFieldType::String
3582 {
3583 return Err(format!(
3584 "filter field '{}' only supports prefix for string types",
3585 field.name
3586 ));
3587 }
3588 }
3589 Ok(fields)
3590}
3591
3592fn compile_operational_read_filters(
3593 filters: &[OperationalFilterClause],
3594 declared_fields: &[OperationalFilterField],
3595) -> Result<Vec<CompiledOperationalReadFilter>, EngineError> {
3596 let field_map = declared_fields
3597 .iter()
3598 .map(|field| (field.name.as_str(), field))
3599 .collect::<std::collections::HashMap<_, _>>();
3600 filters
3601 .iter()
3602 .map(|filter| match filter {
3603 OperationalFilterClause::Exact { field, value } => {
3604 let declared = field_map.get(field.as_str()).ok_or_else(|| {
3605 EngineError::InvalidWrite(format!(
3606 "operational read filter uses undeclared field '{field}'"
3607 ))
3608 })?;
3609 if !declared.modes.contains(&OperationalFilterMode::Exact) {
3610 return Err(EngineError::InvalidWrite(format!(
3611 "operational read field '{field}' does not allow exact filters"
3612 )));
3613 }
3614 let condition = match (declared.field_type, value) {
3615 (OperationalFilterFieldType::String, OperationalFilterValue::String(value)) => {
3616 OperationalReadCondition::ExactString(value.clone())
3617 }
3618 (
3619 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp,
3620 OperationalFilterValue::Integer(value),
3621 ) => OperationalReadCondition::ExactInteger(*value),
3622 _ => {
3623 return Err(EngineError::InvalidWrite(format!(
3624 "operational read field '{field}' received a value with the wrong type"
3625 )));
3626 }
3627 };
3628 Ok(CompiledOperationalReadFilter {
3629 field: field.clone(),
3630 condition,
3631 })
3632 }
3633 OperationalFilterClause::Prefix { field, value } => {
3634 let declared = field_map.get(field.as_str()).ok_or_else(|| {
3635 EngineError::InvalidWrite(format!(
3636 "operational read filter uses undeclared field '{field}'"
3637 ))
3638 })?;
3639 if !declared.modes.contains(&OperationalFilterMode::Prefix) {
3640 return Err(EngineError::InvalidWrite(format!(
3641 "operational read field '{field}' does not allow prefix filters"
3642 )));
3643 }
3644 if declared.field_type != OperationalFilterFieldType::String {
3645 return Err(EngineError::InvalidWrite(format!(
3646 "operational read field '{field}' only supports prefix filters for strings"
3647 )));
3648 }
3649 Ok(CompiledOperationalReadFilter {
3650 field: field.clone(),
3651 condition: OperationalReadCondition::Prefix(value.clone()),
3652 })
3653 }
3654 OperationalFilterClause::Range {
3655 field,
3656 lower,
3657 upper,
3658 } => {
3659 let declared = field_map.get(field.as_str()).ok_or_else(|| {
3660 EngineError::InvalidWrite(format!(
3661 "operational read filter uses undeclared field '{field}'"
3662 ))
3663 })?;
3664 if !declared.modes.contains(&OperationalFilterMode::Range) {
3665 return Err(EngineError::InvalidWrite(format!(
3666 "operational read field '{field}' does not allow range filters"
3667 )));
3668 }
3669 if !matches!(
3670 declared.field_type,
3671 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp
3672 ) {
3673 return Err(EngineError::InvalidWrite(format!(
3674 "operational read field '{field}' only supports range filters for integer/timestamp fields"
3675 )));
3676 }
3677 if lower.is_none() && upper.is_none() {
3678 return Err(EngineError::InvalidWrite(format!(
3679 "operational read range filter for '{field}' must specify a lower or upper bound"
3680 )));
3681 }
3682 Ok(CompiledOperationalReadFilter {
3683 field: field.clone(),
3684 condition: OperationalReadCondition::Range {
3685 lower: *lower,
3686 upper: *upper,
3687 },
3688 })
3689 }
3690 })
3691 .collect()
3692}
3693
3694fn match_append_only_secondary_index_read<'a>(
3695 filters: &'a [CompiledOperationalReadFilter],
3696 indexes: &'a [OperationalSecondaryIndexDefinition],
3697) -> Option<MatchedAppendOnlySecondaryIndexRead<'a>> {
3698 indexes.iter().find_map(|index| {
3699 let OperationalSecondaryIndexDefinition::AppendOnlyFieldTime {
3700 name,
3701 field,
3702 value_type,
3703 time_field,
3704 } = index
3705 else {
3706 return None;
3707 };
3708 if !(1..=2).contains(&filters.len()) {
3709 return None;
3710 }
3711
3712 let mut value_filter = None;
3713 let mut time_range = None;
3714 for filter in filters {
3715 if filter.field == *field {
3716 let supported = matches!(
3717 (&filter.condition, value_type),
3718 (
3719 OperationalReadCondition::ExactString(_)
3720 | OperationalReadCondition::Prefix(_),
3721 crate::operational::OperationalSecondaryIndexValueType::String
3722 ) | (
3723 OperationalReadCondition::ExactInteger(_),
3724 crate::operational::OperationalSecondaryIndexValueType::Integer
3725 | crate::operational::OperationalSecondaryIndexValueType::Timestamp
3726 )
3727 );
3728 if !supported || value_filter.is_some() {
3729 return None;
3730 }
3731 value_filter = Some(filter);
3732 continue;
3733 }
3734 if filter.field == *time_field {
3735 if !matches!(filter.condition, OperationalReadCondition::Range { .. })
3736 || time_range.is_some()
3737 {
3738 return None;
3739 }
3740 time_range = Some(filter);
3741 continue;
3742 }
3743 return None;
3744 }
3745
3746 value_filter.map(|value_filter| MatchedAppendOnlySecondaryIndexRead {
3747 index_name: name.as_str(),
3748 value_filter,
3749 time_range,
3750 })
3751 })
3752}
3753
3754fn execute_operational_secondary_index_read(
3755 conn: &rusqlite::Connection,
3756 collection_name: &str,
3757 filters: &[CompiledOperationalReadFilter],
3758 indexes: &[OperationalSecondaryIndexDefinition],
3759 applied_limit: usize,
3760) -> Result<Option<OperationalReadReport>, EngineError> {
3761 use rusqlite::types::Value;
3762
3763 let Some(matched) = match_append_only_secondary_index_read(filters, indexes) else {
3764 return Ok(None);
3765 };
3766
3767 let mut sql = String::from(
3768 "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
3769 FROM operational_secondary_index_entries s \
3770 JOIN operational_mutations m ON m.id = s.mutation_id \
3771 WHERE s.collection_name = ?1 AND s.index_name = ?2 AND s.subject_kind = 'mutation' ",
3772 );
3773 let mut params = vec![
3774 Value::from(collection_name.to_owned()),
3775 Value::from(matched.index_name.to_owned()),
3776 ];
3777
3778 match &matched.value_filter.condition {
3779 OperationalReadCondition::ExactString(value) => {
3780 let _ = write!(sql, "AND s.slot1_text = ?{} ", params.len() + 1);
3781 params.push(Value::from(value.clone()));
3782 }
3783 OperationalReadCondition::Prefix(value) => {
3784 let _ = write!(sql, "AND s.slot1_text GLOB ?{} ", params.len() + 1);
3785 params.push(Value::from(glob_prefix_pattern(value)));
3786 }
3787 OperationalReadCondition::ExactInteger(value) => {
3788 let _ = write!(sql, "AND s.slot1_integer = ?{} ", params.len() + 1);
3789 params.push(Value::from(*value));
3790 }
3791 OperationalReadCondition::Range { .. } => return Ok(None),
3792 }
3793
3794 if let Some(time_range) = matched.time_range
3795 && let OperationalReadCondition::Range { lower, upper } = &time_range.condition
3796 {
3797 if let Some(lower) = lower {
3798 let _ = write!(sql, "AND s.sort_timestamp >= ?{} ", params.len() + 1);
3799 params.push(Value::from(*lower));
3800 }
3801 if let Some(upper) = upper {
3802 let _ = write!(sql, "AND s.sort_timestamp <= ?{} ", params.len() + 1);
3803 params.push(Value::from(*upper));
3804 }
3805 }
3806
3807 let _ = write!(
3808 sql,
3809 "ORDER BY s.sort_timestamp DESC, m.mutation_order DESC LIMIT ?{}",
3810 params.len() + 1
3811 );
3812 params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
3813 |_| EngineError::Bridge("operational read limit overflow".to_owned()),
3814 )?));
3815
3816 let mut stmt = conn.prepare(&sql)?;
3817 let mut rows = stmt
3818 .query_map(
3819 rusqlite::params_from_iter(params),
3820 map_operational_mutation_row,
3821 )?
3822 .collect::<Result<Vec<_>, _>>()?;
3823 let was_limited = rows.len() > applied_limit;
3824 if was_limited {
3825 rows.truncate(applied_limit);
3826 }
3827
3828 Ok(Some(OperationalReadReport {
3829 collection_name: collection_name.to_owned(),
3830 row_count: rows.len(),
3831 applied_limit,
3832 was_limited,
3833 rows,
3834 }))
3835}
3836
3837fn execute_operational_filtered_read(
3838 conn: &rusqlite::Connection,
3839 collection_name: &str,
3840 filters: &[CompiledOperationalReadFilter],
3841 applied_limit: usize,
3842) -> Result<OperationalReadReport, EngineError> {
3843 use rusqlite::types::Value;
3844
3845 let mut sql = String::from(
3846 "SELECT m.id, m.collection_name, m.record_key, m.op_kind, m.payload_json, m.source_ref, m.created_at \
3847 FROM operational_mutations m ",
3848 );
3849 let mut params = vec![Value::from(collection_name.to_owned())];
3850 for (index, filter) in filters.iter().enumerate() {
3851 let _ = write!(
3852 sql,
3853 "JOIN operational_filter_values f{index} \
3854 ON f{index}.mutation_id = m.id \
3855 AND f{index}.collection_name = m.collection_name "
3856 );
3857 match &filter.condition {
3858 OperationalReadCondition::ExactString(value) => {
3859 let _ = write!(
3860 sql,
3861 "AND f{index}.field_name = ?{} AND f{index}.string_value = ?{} ",
3862 params.len() + 1,
3863 params.len() + 2
3864 );
3865 params.push(Value::from(filter.field.clone()));
3866 params.push(Value::from(value.clone()));
3867 }
3868 OperationalReadCondition::ExactInteger(value) => {
3869 let _ = write!(
3870 sql,
3871 "AND f{index}.field_name = ?{} AND f{index}.integer_value = ?{} ",
3872 params.len() + 1,
3873 params.len() + 2
3874 );
3875 params.push(Value::from(filter.field.clone()));
3876 params.push(Value::from(*value));
3877 }
3878 OperationalReadCondition::Prefix(value) => {
3879 let _ = write!(
3880 sql,
3881 "AND f{index}.field_name = ?{} AND f{index}.string_value GLOB ?{} ",
3882 params.len() + 1,
3883 params.len() + 2
3884 );
3885 params.push(Value::from(filter.field.clone()));
3886 params.push(Value::from(glob_prefix_pattern(value)));
3887 }
3888 OperationalReadCondition::Range { lower, upper } => {
3889 let _ = write!(sql, "AND f{index}.field_name = ?{} ", params.len() + 1);
3890 params.push(Value::from(filter.field.clone()));
3891 if let Some(lower) = lower {
3892 let _ = write!(sql, "AND f{index}.integer_value >= ?{} ", params.len() + 1);
3893 params.push(Value::from(*lower));
3894 }
3895 if let Some(upper) = upper {
3896 let _ = write!(sql, "AND f{index}.integer_value <= ?{} ", params.len() + 1);
3897 params.push(Value::from(*upper));
3898 }
3899 }
3900 }
3901 }
3902 let _ = write!(
3903 sql,
3904 "WHERE m.collection_name = ?1 ORDER BY m.mutation_order DESC LIMIT ?{}",
3905 params.len() + 1
3906 );
3907 params.push(Value::from(i64::try_from(applied_limit + 1).map_err(
3908 |_| EngineError::Bridge("operational read limit overflow".to_owned()),
3909 )?));
3910
3911 let mut stmt = conn.prepare(&sql)?;
3912 let mut rows = stmt
3913 .query_map(
3914 rusqlite::params_from_iter(params),
3915 map_operational_mutation_row,
3916 )?
3917 .collect::<Result<Vec<_>, _>>()?;
3918 let was_limited = rows.len() > applied_limit;
3919 if was_limited {
3920 rows.truncate(applied_limit);
3921 }
3922 Ok(OperationalReadReport {
3923 collection_name: collection_name.to_owned(),
3924 row_count: rows.len(),
3925 applied_limit,
3926 was_limited,
3927 rows,
3928 })
3929}
3930
3931fn glob_prefix_pattern(value: &str) -> String {
3932 let mut pattern = String::with_capacity(value.len() + 1);
3933 for ch in value.chars() {
3934 match ch {
3935 '*' => pattern.push_str("[*]"),
3936 '?' => pattern.push_str("[?]"),
3937 '[' => pattern.push_str("[[]"),
3938 _ => pattern.push(ch),
3939 }
3940 }
3941 pattern.push('*');
3942 pattern
3943}
3944
3945#[derive(Clone, Debug, PartialEq, Eq)]
3946struct ExtractedOperationalFilterValue {
3947 field_name: String,
3948 string_value: Option<String>,
3949 integer_value: Option<i64>,
3950}
3951
3952fn extract_operational_filter_values(
3953 filter_fields: &[OperationalFilterField],
3954 payload_json: &str,
3955) -> Vec<ExtractedOperationalFilterValue> {
3956 let Ok(parsed) = serde_json::from_str::<serde_json::Value>(payload_json) else {
3957 return Vec::new();
3958 };
3959 let Some(object) = parsed.as_object() else {
3960 return Vec::new();
3961 };
3962
3963 filter_fields
3964 .iter()
3965 .filter_map(|field| {
3966 let value = object.get(&field.name)?;
3967 match field.field_type {
3968 OperationalFilterFieldType::String => {
3969 value
3970 .as_str()
3971 .map(|string_value| ExtractedOperationalFilterValue {
3972 field_name: field.name.clone(),
3973 string_value: Some(string_value.to_owned()),
3974 integer_value: None,
3975 })
3976 }
3977 OperationalFilterFieldType::Integer | OperationalFilterFieldType::Timestamp => {
3978 value
3979 .as_i64()
3980 .map(|integer_value| ExtractedOperationalFilterValue {
3981 field_name: field.name.clone(),
3982 string_value: None,
3983 integer_value: Some(integer_value),
3984 })
3985 }
3986 }
3987 })
3988 .collect()
3989}
3990
3991fn operational_compaction_candidates(
3992 conn: &rusqlite::Connection,
3993 retention_json: &str,
3994 collection_name: &str,
3995) -> Result<(Vec<String>, Option<i64>), EngineError> {
3996 operational_compaction_candidates_at(
3997 conn,
3998 retention_json,
3999 collection_name,
4000 current_unix_timestamp()?,
4001 )
4002}
4003
4004fn operational_compaction_candidates_at(
4005 conn: &rusqlite::Connection,
4006 retention_json: &str,
4007 collection_name: &str,
4008 now_timestamp: i64,
4009) -> Result<(Vec<String>, Option<i64>), EngineError> {
4010 let policy = parse_operational_retention_policy(retention_json)?;
4011 match policy {
4012 OperationalRetentionPolicy::KeepAll => Ok((Vec::new(), None)),
4013 OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4014 let before_timestamp = now_timestamp - max_age_seconds;
4015 let mut stmt = conn.prepare(
4016 "SELECT id FROM operational_mutations \
4017 WHERE collection_name = ?1 AND created_at < ?2 \
4018 ORDER BY mutation_order",
4019 )?;
4020 let mutation_ids = stmt
4021 .query_map(
4022 rusqlite::params![collection_name, before_timestamp],
4023 |row| row.get::<_, String>(0),
4024 )?
4025 .collect::<Result<Vec<_>, _>>()?;
4026 Ok((mutation_ids, Some(before_timestamp)))
4027 }
4028 OperationalRetentionPolicy::KeepLast { max_rows } => {
4029 let mut stmt = conn.prepare(
4030 "SELECT id FROM operational_mutations \
4031 WHERE collection_name = ?1 \
4032 ORDER BY mutation_order DESC",
4033 )?;
4034 let ordered_ids = stmt
4035 .query_map([collection_name], |row| row.get::<_, String>(0))?
4036 .collect::<Result<Vec<_>, _>>()?;
4037 Ok((ordered_ids.into_iter().skip(max_rows).collect(), None))
4038 }
4039 }
4040}
4041
4042fn parse_operational_retention_policy(
4043 retention_json: &str,
4044) -> Result<OperationalRetentionPolicy, EngineError> {
4045 let policy: OperationalRetentionPolicy = serde_json::from_str(retention_json)
4046 .map_err(|error| EngineError::InvalidWrite(format!("invalid retention_json: {error}")))?;
4047 match policy {
4048 OperationalRetentionPolicy::KeepAll => Ok(policy),
4049 OperationalRetentionPolicy::PurgeBeforeSeconds { max_age_seconds } => {
4050 if max_age_seconds <= 0 {
4051 return Err(EngineError::InvalidWrite(
4052 "retention_json max_age_seconds must be greater than zero".to_owned(),
4053 ));
4054 }
4055 Ok(policy)
4056 }
4057 OperationalRetentionPolicy::KeepLast { max_rows } => {
4058 if max_rows == 0 {
4059 return Err(EngineError::InvalidWrite(
4060 "retention_json max_rows must be greater than zero".to_owned(),
4061 ));
4062 }
4063 Ok(policy)
4064 }
4065 }
4066}
4067
4068fn load_operational_retention_records(
4069 conn: &rusqlite::Connection,
4070 collection_names: Option<&[String]>,
4071 max_collections: Option<usize>,
4072) -> Result<Vec<OperationalCollectionRecord>, EngineError> {
4073 let limit = max_collections.unwrap_or(usize::MAX);
4074 if limit == 0 {
4075 return Err(EngineError::InvalidWrite(
4076 "max_collections must be greater than zero".to_owned(),
4077 ));
4078 }
4079
4080 let mut records = Vec::new();
4081 if let Some(collection_names) = collection_names {
4082 for name in collection_names.iter().take(limit) {
4083 let record = load_operational_collection_record(conn, name)?.ok_or_else(|| {
4084 EngineError::InvalidWrite(format!(
4085 "operational collection '{name}' is not registered"
4086 ))
4087 })?;
4088 records.push(record);
4089 }
4090 return Ok(records);
4091 }
4092
4093 let mut stmt = conn.prepare(
4094 "SELECT name, kind, schema_json, retention_json, filter_fields_json, validation_json, secondary_indexes_json, format_version, created_at, disabled_at \
4095 FROM operational_collections ORDER BY name",
4096 )?;
4097 let rows = stmt
4098 .query_map([], map_operational_collection_row)?
4099 .take(limit)
4100 .collect::<Result<Vec<_>, _>>()?;
4101 Ok(rows)
4102}
4103
4104fn last_operational_retention_run_at(
4105 conn: &rusqlite::Connection,
4106 collection_name: &str,
4107) -> Result<Option<i64>, EngineError> {
4108 conn.query_row(
4109 "SELECT MAX(executed_at) FROM operational_retention_runs WHERE collection_name = ?1",
4110 [collection_name],
4111 |row| row.get(0),
4112 )
4113 .optional()
4114 .map_err(EngineError::Sqlite)
4115 .map(Option::flatten)
4116}
4117
4118fn count_operational_mutations_for_collection(
4119 conn: &rusqlite::Connection,
4120 collection_name: &str,
4121) -> Result<usize, EngineError> {
4122 let count: i64 = conn.query_row(
4123 "SELECT count(*) FROM operational_mutations WHERE collection_name = ?1",
4124 [collection_name],
4125 |row| row.get(0),
4126 )?;
4127 usize::try_from(count).map_err(|_| {
4128 EngineError::Bridge(format!("count overflow for collection {collection_name}"))
4129 })
4130}
4131
4132fn retention_action_kind_and_limit(
4133 policy: &OperationalRetentionPolicy,
4134) -> (OperationalRetentionActionKind, Option<usize>) {
4135 match policy {
4136 OperationalRetentionPolicy::KeepAll => (OperationalRetentionActionKind::Noop, None),
4137 OperationalRetentionPolicy::PurgeBeforeSeconds { .. } => {
4138 (OperationalRetentionActionKind::PurgeBeforeSeconds, None)
4139 }
4140 OperationalRetentionPolicy::KeepLast { max_rows } => {
4141 (OperationalRetentionActionKind::KeepLast, Some(*max_rows))
4142 }
4143 }
4144}
4145
4146fn plan_operational_retention_item(
4147 conn: &rusqlite::Connection,
4148 record: &OperationalCollectionRecord,
4149 now_timestamp: i64,
4150) -> Result<OperationalRetentionPlanItem, EngineError> {
4151 let last_run_at = last_operational_retention_run_at(conn, &record.name)?;
4152 if record.kind != OperationalCollectionKind::AppendOnlyLog {
4153 return Ok(OperationalRetentionPlanItem {
4154 collection_name: record.name.clone(),
4155 action_kind: OperationalRetentionActionKind::Noop,
4156 candidate_deletions: 0,
4157 before_timestamp: None,
4158 max_rows: None,
4159 last_run_at,
4160 });
4161 }
4162 let policy = parse_operational_retention_policy(&record.retention_json)?;
4163 let (action_kind, max_rows) = retention_action_kind_and_limit(&policy);
4164 let (candidate_ids, before_timestamp) = operational_compaction_candidates_at(
4165 conn,
4166 &record.retention_json,
4167 &record.name,
4168 now_timestamp,
4169 )?;
4170 Ok(OperationalRetentionPlanItem {
4171 collection_name: record.name.clone(),
4172 action_kind,
4173 candidate_deletions: candidate_ids.len(),
4174 before_timestamp,
4175 max_rows,
4176 last_run_at,
4177 })
4178}
4179
4180fn run_operational_retention_item(
4181 tx: &rusqlite::Transaction<'_>,
4182 record: &OperationalCollectionRecord,
4183 now_timestamp: i64,
4184 dry_run: bool,
4185) -> Result<OperationalRetentionRunItem, EngineError> {
4186 let plan = plan_operational_retention_item(tx, record, now_timestamp)?;
4187 let mut deleted_mutations = 0usize;
4188 if record.kind == OperationalCollectionKind::AppendOnlyLog
4189 && plan.action_kind != OperationalRetentionActionKind::Noop
4190 && plan.candidate_deletions > 0
4191 && !dry_run
4192 {
4193 let (candidate_ids, _) = operational_compaction_candidates_at(
4194 tx,
4195 &record.retention_json,
4196 &record.name,
4197 now_timestamp,
4198 )?;
4199 let mut delete_stmt =
4200 tx.prepare_cached("DELETE FROM operational_mutations WHERE id = ?1")?;
4201 for mutation_id in &candidate_ids {
4202 delete_stmt.execute([mutation_id.as_str()])?;
4203 deleted_mutations += 1;
4204 }
4205 drop(delete_stmt);
4206
4207 persist_simple_provenance_event(
4208 tx,
4209 "operational_retention_run",
4210 &record.name,
4211 Some(serde_json::json!({
4212 "action_kind": plan.action_kind,
4213 "deleted_mutations": deleted_mutations,
4214 "before_timestamp": plan.before_timestamp,
4215 "max_rows": plan.max_rows,
4216 "executed_at": now_timestamp,
4217 })),
4218 )?;
4219 }
4220
4221 let live_rows_remaining = count_operational_mutations_for_collection(tx, &record.name)?;
4222 let effective_deleted_mutations = if dry_run {
4223 plan.candidate_deletions
4224 } else {
4225 deleted_mutations
4226 };
4227 let rows_remaining = if dry_run {
4228 live_rows_remaining.saturating_sub(effective_deleted_mutations)
4229 } else {
4230 live_rows_remaining
4231 };
4232 if !dry_run && plan.action_kind != OperationalRetentionActionKind::Noop {
4233 tx.execute(
4234 "INSERT INTO operational_retention_runs \
4235 (id, collection_name, executed_at, action_kind, dry_run, deleted_mutations, rows_remaining, metadata_json) \
4236 VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8)",
4237 rusqlite::params![
4238 new_id(),
4239 record.name,
4240 now_timestamp,
4241 serde_json::to_string(&plan.action_kind)
4242 .unwrap_or_else(|_| "\"noop\"".to_owned())
4243 .trim_matches('"')
4244 .to_owned(),
4245 i32::from(dry_run),
4246 deleted_mutations,
4247 rows_remaining,
4248 serde_json::json!({
4249 "before_timestamp": plan.before_timestamp,
4250 "max_rows": plan.max_rows,
4251 })
4252 .to_string(),
4253 ],
4254 )?;
4255 }
4256
4257 Ok(OperationalRetentionRunItem {
4258 collection_name: plan.collection_name,
4259 action_kind: plan.action_kind,
4260 deleted_mutations: effective_deleted_mutations,
4261 before_timestamp: plan.before_timestamp,
4262 max_rows: plan.max_rows,
4263 rows_remaining,
4264 })
4265}
4266
4267fn current_unix_timestamp() -> Result<i64, EngineError> {
4268 let now = SystemTime::now()
4269 .duration_since(SystemTime::UNIX_EPOCH)
4270 .map_err(|error| EngineError::Bridge(format!("system clock error: {error}")))?;
4271 i64::try_from(now.as_secs())
4272 .map_err(|_| EngineError::Bridge("unix timestamp overflow".to_owned()))
4273}
4274
4275fn map_operational_collection_row(
4276 row: &rusqlite::Row<'_>,
4277) -> Result<OperationalCollectionRecord, rusqlite::Error> {
4278 let kind_text: String = row.get(1)?;
4279 let kind = OperationalCollectionKind::try_from(kind_text.as_str()).map_err(|message| {
4280 rusqlite::Error::FromSqlConversionFailure(
4281 1,
4282 rusqlite::types::Type::Text,
4283 Box::new(io::Error::new(io::ErrorKind::InvalidData, message)),
4284 )
4285 })?;
4286 Ok(OperationalCollectionRecord {
4287 name: row.get(0)?,
4288 kind,
4289 schema_json: row.get(2)?,
4290 retention_json: row.get(3)?,
4291 filter_fields_json: row.get(4)?,
4292 validation_json: row.get(5)?,
4293 secondary_indexes_json: row.get(6)?,
4294 format_version: row.get(7)?,
4295 created_at: row.get(8)?,
4296 disabled_at: row.get(9)?,
4297 })
4298}
4299
4300fn map_operational_mutation_row(
4301 row: &rusqlite::Row<'_>,
4302) -> Result<OperationalMutationRow, rusqlite::Error> {
4303 Ok(OperationalMutationRow {
4304 id: row.get(0)?,
4305 collection_name: row.get(1)?,
4306 record_key: row.get(2)?,
4307 op_kind: row.get(3)?,
4308 payload_json: row.get(4)?,
4309 source_ref: row.get(5)?,
4310 created_at: row.get(6)?,
4311 })
4312}
4313
4314fn map_operational_current_row(
4315 row: &rusqlite::Row<'_>,
4316) -> Result<OperationalCurrentRow, rusqlite::Error> {
4317 Ok(OperationalCurrentRow {
4318 collection_name: row.get(0)?,
4319 record_key: row.get(1)?,
4320 payload_json: row.get(2)?,
4321 updated_at: row.get(3)?,
4322 last_mutation_id: row.get(4)?,
4323 })
4324}
4325
4326#[cfg(test)]
4327#[allow(clippy::expect_used)]
4328mod tests {
4329 use std::fs;
4330 use std::sync::Arc;
4331
4332 use fathomdb_schema::SchemaManager;
4333 use tempfile::NamedTempFile;
4334
4335 use super::{AdminService, SafeExportOptions, VectorRegenerationConfig};
4336 use crate::sqlite;
4337 use crate::{EngineError, OperationalCollectionKind, OperationalRegisterRequest};
4338
4339 #[cfg(feature = "sqlite-vec")]
4340 use fathomdb_query::QueryBuilder;
4341
4342 #[cfg(feature = "sqlite-vec")]
4343 use super::{VectorGeneratorPolicy, load_vector_regeneration_config};
4344
4345 #[cfg(feature = "sqlite-vec")]
4346 use crate::ExecutionCoordinator;
4347
4348 #[cfg(feature = "sqlite-vec")]
4349 use crate::TelemetryCounters;
4350
4351 #[allow(dead_code)]
4352 #[cfg(unix)]
4353 fn set_file_mode(path: &std::path::Path, mode: u32) {
4354 use std::os::unix::fs::PermissionsExt;
4355
4356 let mut permissions = fs::metadata(path).expect("script metadata").permissions();
4357 permissions.set_mode(mode);
4358 fs::set_permissions(path, permissions).expect("chmod");
4359 }
4360
4361 #[allow(dead_code)]
4362 #[cfg(not(unix))]
4363 fn set_file_mode(_path: &std::path::Path, _mode: u32) {}
4364
4365 fn setup() -> (NamedTempFile, AdminService) {
4366 let db = NamedTempFile::new().expect("temp file");
4367 let schema = Arc::new(SchemaManager::new());
4368 {
4369 let conn = sqlite::open_connection(db.path()).expect("connection");
4370 schema.bootstrap(&conn).expect("bootstrap");
4371 }
4372 let service = AdminService::new(db.path(), Arc::clone(&schema));
4373 (db, service)
4374 }
4375
4376 #[test]
4377 fn check_integrity_includes_active_uniqueness_count() {
4378 let (_db, service) = setup();
4379 let report = service.check_integrity().expect("integrity check");
4380 assert_eq!(report.duplicate_active_logical_ids, 0);
4381 assert_eq!(report.operational_missing_collections, 0);
4382 assert_eq!(report.operational_missing_last_mutations, 0);
4383 }
4384
4385 #[test]
4386 fn trace_source_returns_node_logical_ids() {
4387 let (db, service) = setup();
4388 {
4389 let conn = sqlite::open_connection(db.path()).expect("conn");
4390 conn.execute(
4391 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4392 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 'source-1')",
4393 [],
4394 )
4395 .expect("insert node");
4396 }
4397 let report = service.trace_source("source-1").expect("trace");
4398 assert_eq!(report.node_rows, 1);
4399 assert_eq!(report.node_logical_ids, vec!["lg1"]);
4400 }
4401
4402 #[test]
4403 fn trace_source_includes_operational_mutations() {
4404 let (db, service) = setup();
4405 {
4406 let conn = sqlite::open_connection(db.path()).expect("conn");
4407 conn.execute(
4408 "INSERT INTO operational_collections \
4409 (name, kind, schema_json, retention_json, format_version, created_at) \
4410 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4411 [],
4412 )
4413 .expect("insert collection");
4414 conn.execute(
4415 "INSERT INTO operational_mutations \
4416 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4417 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"ok\"}', 'source-1', 100, 1)",
4418 [],
4419 )
4420 .expect("insert mutation");
4421 }
4422
4423 let report = service.trace_source("source-1").expect("trace");
4424 assert_eq!(report.operational_mutation_rows, 1);
4425 assert_eq!(report.operational_mutation_ids, vec!["m1"]);
4426 }
4427
4428 #[test]
4429 fn excise_source_restores_prior_active_node() {
4430 let (db, service) = setup();
4431 {
4432 let conn = sqlite::open_connection(db.path()).expect("conn");
4433 conn.execute(
4434 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4435 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
4436 [],
4437 )
4438 .expect("insert v1 superseded");
4439 conn.execute(
4440 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4441 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
4442 [],
4443 )
4444 .expect("insert v2 active");
4445 }
4446 service.excise_source("source-2").expect("excise");
4447 {
4448 let conn = sqlite::open_connection(db.path()).expect("conn");
4449 let active_row_id: String = conn
4450 .query_row(
4451 "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
4452 [],
4453 |row| row.get(0),
4454 )
4455 .expect("active row exists after excise");
4456 assert_eq!(active_row_id, "r1");
4457 }
4458 }
4459
4460 #[test]
4461 fn excise_source_deletes_operational_mutations_and_repairs_latest_state_current() {
4462 let (db, service) = setup();
4463 {
4464 let conn = sqlite::open_connection(db.path()).expect("conn");
4465 conn.execute(
4466 "INSERT INTO operational_collections \
4467 (name, kind, schema_json, retention_json, format_version, created_at) \
4468 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
4469 [],
4470 )
4471 .expect("insert collection");
4472 conn.execute(
4473 "INSERT INTO operational_mutations \
4474 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4475 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'source-1', 100, 1)",
4476 [],
4477 )
4478 .expect("insert prior mutation");
4479 conn.execute(
4480 "INSERT INTO operational_mutations \
4481 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
4482 VALUES ('m2', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'source-2', 200, 2)",
4483 [],
4484 )
4485 .expect("insert excised mutation");
4486 conn.execute(
4487 "INSERT INTO operational_current \
4488 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
4489 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 200, 'm2')",
4490 [],
4491 )
4492 .expect("insert current row");
4493 }
4494
4495 let traced = service
4496 .trace_source("source-2")
4497 .expect("trace before excise");
4498 assert_eq!(traced.operational_mutation_rows, 1);
4499 assert_eq!(traced.operational_mutation_ids, vec!["m2"]);
4500
4501 let excised = service.excise_source("source-2").expect("excise");
4502 assert_eq!(excised.operational_mutation_rows, 0);
4503 assert!(excised.operational_mutation_ids.is_empty());
4504
4505 {
4506 let conn = sqlite::open_connection(db.path()).expect("conn");
4507 let remaining: i64 = conn
4508 .query_row(
4509 "SELECT count(*) FROM operational_mutations WHERE source_ref = 'source-2'",
4510 [],
4511 |row| row.get(0),
4512 )
4513 .expect("remaining count");
4514 assert_eq!(remaining, 0);
4515
4516 let current: (String, String) = conn
4517 .query_row(
4518 "SELECT payload_json, last_mutation_id FROM operational_current \
4519 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
4520 [],
4521 |row| Ok((row.get(0)?, row.get(1)?)),
4522 )
4523 .expect("rebuilt current row");
4524 assert_eq!(current.0, "{\"status\":\"old\"}");
4525 assert_eq!(current.1, "m1");
4526 }
4527 }
4528
4529 #[test]
4530 fn restore_logical_id_reestablishes_last_pre_retire_content_and_attached_edges() {
4531 let (db, service) = setup();
4532 {
4533 let conn = sqlite::open_connection(db.path()).expect("conn");
4534 conn.execute(
4535 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4536 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
4537 [],
4538 )
4539 .expect("insert node");
4540 conn.execute(
4541 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4542 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
4543 [],
4544 )
4545 .expect("insert target node");
4546 conn.execute(
4547 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4548 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
4549 [],
4550 )
4551 .expect("insert chunk");
4552 conn.execute(
4553 "INSERT INTO edges \
4554 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
4555 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
4556 [],
4557 )
4558 .expect("insert edge");
4559 conn.execute(
4560 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4561 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4562 [],
4563 )
4564 .expect("insert node retire event");
4565 conn.execute(
4566 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4567 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
4568 [],
4569 )
4570 .expect("insert edge retire event");
4571 conn.execute(
4572 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
4573 [],
4574 )
4575 .expect("retire node");
4576 conn.execute(
4577 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
4578 [],
4579 )
4580 .expect("retire edge");
4581 conn.execute("DELETE FROM fts_nodes", [])
4582 .expect("clear fts");
4583 }
4584
4585 let report = service.restore_logical_id("doc-1").expect("restore");
4586 assert_eq!(report.logical_id, "doc-1");
4587 assert!(!report.was_noop);
4588 assert_eq!(report.restored_node_rows, 1);
4589 assert_eq!(report.restored_edge_rows, 1);
4590 assert_eq!(report.restored_chunk_rows, 1);
4591 assert_eq!(report.restored_fts_rows, 1);
4592
4593 let conn = sqlite::open_connection(db.path()).expect("conn");
4594 let active_node_count: i64 = conn
4595 .query_row(
4596 "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
4597 [],
4598 |row| row.get(0),
4599 )
4600 .expect("active node count");
4601 assert_eq!(active_node_count, 1);
4602 let active_edge_count: i64 = conn
4603 .query_row(
4604 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
4605 [],
4606 |row| row.get(0),
4607 )
4608 .expect("active edge count");
4609 assert_eq!(active_edge_count, 1);
4610 let fts_count: i64 = conn
4611 .query_row(
4612 "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'chunk-1'",
4613 [],
4614 |row| row.get(0),
4615 )
4616 .expect("fts count");
4617 assert_eq!(fts_count, 1);
4618 }
4619
4620 #[test]
4621 fn restore_logical_id_restores_edges_retired_after_the_node_retire_event() {
4622 let (db, service) = setup();
4623 {
4624 let conn = sqlite::open_connection(db.path()).expect("conn");
4625 conn.execute(
4626 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4627 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
4628 [],
4629 )
4630 .expect("insert node");
4631 conn.execute(
4632 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
4633 VALUES ('node-row-topic', 'topic-1', 'Topic', '{}', 100, 'seed')",
4634 [],
4635 )
4636 .expect("insert target node");
4637 conn.execute(
4638 "INSERT INTO edges \
4639 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
4640 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 'seed')",
4641 [],
4642 )
4643 .expect("insert edge");
4644 conn.execute(
4645 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4646 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4647 [],
4648 )
4649 .expect("insert node retire event");
4650 conn.execute(
4651 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4652 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 201, '')",
4653 [],
4654 )
4655 .expect("insert edge retire event");
4656 conn.execute(
4657 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
4658 [],
4659 )
4660 .expect("retire node");
4661 conn.execute(
4662 "UPDATE edges SET superseded_at = 201 WHERE logical_id = 'edge-1'",
4663 [],
4664 )
4665 .expect("retire edge");
4666 }
4667
4668 let report = service.restore_logical_id("doc-1").expect("restore");
4669 assert_eq!(report.restored_edge_rows, 1);
4670
4671 let conn = sqlite::open_connection(db.path()).expect("conn");
4672 let active_edge_count: i64 = conn
4673 .query_row(
4674 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
4675 [],
4676 |row| row.get(0),
4677 )
4678 .expect("active edge count");
4679 assert_eq!(active_edge_count, 1);
4680 }
4681
4682 #[test]
4683 fn restore_logical_id_prefers_latest_retired_revision_when_timestamps_tie() {
4684 let (db, service) = setup();
4685 {
4686 let conn = sqlite::open_connection(db.path()).expect("conn");
4687 conn.execute(
4688 "INSERT INTO nodes \
4689 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4690 VALUES ('node-row-older', 'doc-1', 'Document', '{\"title\":\"older\"}', 100, 200, 'forget-1')",
4691 [],
4692 )
4693 .expect("insert older retired node");
4694 conn.execute(
4695 "INSERT INTO nodes \
4696 (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4697 VALUES ('node-row-newer', 'doc-1', 'Document', '{\"title\":\"newer\"}', 100, 200, 'forget-1')",
4698 [],
4699 )
4700 .expect("insert newer retired node");
4701 conn.execute(
4702 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4703 VALUES ('evt-retire-older', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4704 [],
4705 )
4706 .expect("insert older retire event");
4707 conn.execute(
4708 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4709 VALUES ('evt-retire-newer', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4710 [],
4711 )
4712 .expect("insert newer retire event");
4713 }
4714
4715 let report = service.restore_logical_id("doc-1").expect("restore");
4716
4717 assert!(!report.was_noop);
4718 let conn = sqlite::open_connection(db.path()).expect("conn");
4719 let active_row: (String, String) = conn
4720 .query_row(
4721 "SELECT row_id, properties FROM nodes \
4722 WHERE logical_id = 'doc-1' AND superseded_at IS NULL",
4723 [],
4724 |row| Ok((row.get(0)?, row.get(1)?)),
4725 )
4726 .expect("restored active row");
4727 assert_eq!(active_row.0, "node-row-newer");
4728 assert_eq!(active_row.1, "{\"title\":\"newer\"}");
4729 }
4730
4731 #[test]
4732 fn purge_logical_id_removes_retired_content_and_records_tombstone() {
4733 let (db, service) = setup();
4734 {
4735 let conn = sqlite::open_connection(db.path()).expect("conn");
4736 conn.execute(
4737 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4738 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
4739 [],
4740 )
4741 .expect("insert retired node");
4742 conn.execute(
4743 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4744 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
4745 [],
4746 )
4747 .expect("insert chunk");
4748 conn.execute(
4749 "INSERT INTO edges \
4750 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, superseded_at, source_ref) \
4751 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'topic-1', 'TAGGED', '{}', 100, 200, 'seed')",
4752 [],
4753 )
4754 .expect("insert retired edge");
4755 conn.execute(
4756 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
4757 VALUES ('chunk-1', 'doc-1', 'Document', 'budget narrative')",
4758 [],
4759 )
4760 .expect("insert fts");
4761 }
4762
4763 let report = service.purge_logical_id("doc-1").expect("purge");
4764 assert_eq!(report.logical_id, "doc-1");
4765 assert!(!report.was_noop);
4766 assert_eq!(report.deleted_node_rows, 1);
4767 assert_eq!(report.deleted_edge_rows, 1);
4768 assert_eq!(report.deleted_chunk_rows, 1);
4769 assert_eq!(report.deleted_fts_rows, 1);
4770
4771 let conn = sqlite::open_connection(db.path()).expect("conn");
4772 let remaining_nodes: i64 = conn
4773 .query_row(
4774 "SELECT count(*) FROM nodes WHERE logical_id = 'doc-1'",
4775 [],
4776 |row| row.get(0),
4777 )
4778 .expect("remaining nodes");
4779 assert_eq!(remaining_nodes, 0);
4780 let remaining_edges: i64 = conn
4781 .query_row(
4782 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1'",
4783 [],
4784 |row| row.get(0),
4785 )
4786 .expect("remaining edges");
4787 assert_eq!(remaining_edges, 0);
4788 let remaining_chunks: i64 = conn
4789 .query_row(
4790 "SELECT count(*) FROM chunks WHERE id = 'chunk-1'",
4791 [],
4792 |row| row.get(0),
4793 )
4794 .expect("remaining chunks");
4795 assert_eq!(remaining_chunks, 0);
4796 let purge_events: i64 = conn
4797 .query_row(
4798 "SELECT count(*) FROM provenance_events WHERE event_type = 'purge_logical_id' AND subject = 'doc-1'",
4799 [],
4800 |row| row.get(0),
4801 )
4802 .expect("purge events");
4803 assert_eq!(purge_events, 1);
4804 }
4805
4806 #[test]
4807 fn check_semantics_accepts_preserved_retired_chunks() {
4808 let (db, service) = setup();
4809 {
4810 let conn = sqlite::open_connection(db.path()).expect("conn");
4811 conn.execute(
4812 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4813 VALUES ('node-row-1', 'doc-1', 'Document', '{}', 100, 200, 'seed')",
4814 [],
4815 )
4816 .expect("insert retired node");
4817 conn.execute(
4818 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4819 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
4820 [],
4821 )
4822 .expect("insert chunk");
4823 }
4824
4825 let report = service.check_semantics().expect("semantics");
4826 assert_eq!(report.orphaned_chunks, 0);
4827 }
4828
4829 #[test]
4830 fn check_semantics_detects_missing_retired_node_history_for_preserved_chunks() {
4831 let (db, service) = setup();
4832 {
4833 let conn = sqlite::open_connection(db.path()).expect("conn");
4834 conn.execute(
4835 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4836 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
4837 [],
4838 )
4839 .expect("insert orphaned chunk");
4840 }
4841
4842 let report = service.check_semantics().expect("semantics");
4843 assert_eq!(report.orphaned_chunks, 1);
4844 }
4845
4846 #[cfg(feature = "sqlite-vec")]
4847 #[test]
4848 fn check_semantics_detects_missing_retired_node_history_for_preserved_vec_rows() {
4849 let (db, service) = setup();
4850 {
4851 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
4852 service
4853 .schema_manager
4854 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
4855 .expect("ensure vec profile");
4856 conn.execute(
4857 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4858 VALUES ('chunk-1', 'ghost-doc', 'budget narrative', 100)",
4859 [],
4860 )
4861 .expect("insert orphaned chunk");
4862 conn.execute(
4863 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
4864 [],
4865 )
4866 .expect("insert vec row");
4867 }
4868
4869 let report = service.check_semantics().expect("semantics");
4870 assert_eq!(report.orphaned_chunks, 1);
4871 assert_eq!(report.vec_rows_for_superseded_nodes, 1);
4872 }
4873
4874 #[cfg(feature = "sqlite-vec")]
4875 #[test]
4876 fn restore_logical_id_reestablishes_vector_search_without_reingest() {
4877 let (db, service) = setup();
4878 {
4879 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
4880 service
4881 .schema_manager
4882 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
4883 .expect("ensure vec profile");
4884 conn.execute(
4885 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4886 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
4887 [],
4888 )
4889 .expect("insert retired node");
4890 conn.execute(
4891 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4892 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
4893 [],
4894 )
4895 .expect("insert chunk");
4896 conn.execute(
4897 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
4898 [],
4899 )
4900 .expect("insert vec row");
4901 conn.execute(
4902 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
4903 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
4904 [],
4905 )
4906 .expect("insert retire event");
4907 }
4908
4909 let report = service.restore_logical_id("doc-1").expect("restore");
4910 assert_eq!(report.restored_vec_rows, 1);
4911
4912 let coordinator = ExecutionCoordinator::open(
4913 db.path(),
4914 Arc::new(SchemaManager::new()),
4915 Some(4),
4916 1,
4917 Arc::new(TelemetryCounters::default()),
4918 )
4919 .expect("coordinator");
4920 let compiled = QueryBuilder::nodes("Document")
4921 .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
4922 .compile()
4923 .expect("compile");
4924 let rows = coordinator
4925 .execute_compiled_read(&compiled)
4926 .expect("vector read");
4927 assert!(
4928 rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
4929 "restore should make the preserved vec row visible again without re-ingest"
4930 );
4931 }
4932
4933 #[cfg(feature = "sqlite-vec")]
4934 #[test]
4935 fn purge_logical_id_deletes_vec_rows_for_retired_content() {
4936 let (db, service) = setup();
4937 {
4938 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
4939 service
4940 .schema_manager
4941 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
4942 .expect("ensure vec profile");
4943 conn.execute(
4944 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
4945 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 200, 'seed')",
4946 [],
4947 )
4948 .expect("insert retired node");
4949 conn.execute(
4950 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
4951 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
4952 [],
4953 )
4954 .expect("insert chunk");
4955 conn.execute(
4956 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
4957 [],
4958 )
4959 .expect("insert vec row");
4960 }
4961
4962 let report = service.purge_logical_id("doc-1").expect("purge");
4963 assert_eq!(report.deleted_vec_rows, 1);
4964
4965 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
4966 let vec_count: i64 = conn
4967 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
4968 row.get(0)
4969 })
4970 .expect("vec count");
4971 assert_eq!(vec_count, 0);
4972 }
4973
4974 #[cfg(feature = "sqlite-vec")]
4975 #[test]
4976 fn restore_logical_id_restores_visibility_of_regenerated_vectors() {
4977 let (db, service) = setup();
4978 let temp_dir = tempfile::tempdir().expect("temp dir");
4979 let script_path = temp_dir.path().join("vector-generator-restore.sh");
4980 fs::write(
4981 &script_path,
4982 r#"#!/usr/bin/env bash
4983set -euo pipefail
4984python3 -c 'import json, sys
4985payload = json.load(sys.stdin)
4986json.dump({"embeddings": [{"chunk_id": payload["chunks"][0]["chunk_id"], "embedding": [0.0, 0.0, 0.0, 0.0]}]}, sys.stdout)'
4987"#,
4988 )
4989 .expect("write script");
4990 set_file_mode(&script_path, 0o755);
4991
4992 {
4993 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
4994 service
4995 .schema_manager
4996 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
4997 .expect("ensure vec profile");
4998 conn.execute(
4999 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
5000 VALUES ('node-row-1', 'doc-1', 'Document', '{\"title\":\"Budget\"}', 100, 'seed')",
5001 [],
5002 )
5003 .expect("insert node");
5004 conn.execute(
5005 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
5006 VALUES ('chunk-1', 'doc-1', 'budget narrative', 100)",
5007 [],
5008 )
5009 .expect("insert chunk");
5010 }
5011
5012 service
5013 .regenerate_vector_embeddings(&VectorRegenerationConfig {
5014 profile: "default".to_owned(),
5015 table_name: "vec_nodes_active".to_owned(),
5016 model_identity: "model".to_owned(),
5017 model_version: "1.0.0".to_owned(),
5018 dimension: 4,
5019 normalization_policy: "l2".to_owned(),
5020 chunking_policy: "per_chunk".to_owned(),
5021 preprocessing_policy: "trim".to_owned(),
5022 generator_command: vec![script_path.to_string_lossy().to_string()],
5023 })
5024 .expect("regenerate");
5025
5026 {
5027 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
5028 conn.execute(
5029 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
5030 VALUES ('evt-node-retire', 'node_retire', 'doc-1', 'forget-1', 200, '')",
5031 [],
5032 )
5033 .expect("insert retire event");
5034 conn.execute(
5035 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
5036 [],
5037 )
5038 .expect("retire node");
5039 }
5040
5041 let report = service.restore_logical_id("doc-1").expect("restore");
5042 assert_eq!(report.restored_vec_rows, 1);
5043
5044 let coordinator = ExecutionCoordinator::open(
5045 db.path(),
5046 Arc::new(SchemaManager::new()),
5047 Some(4),
5048 1,
5049 Arc::new(TelemetryCounters::default()),
5050 )
5051 .expect("coordinator");
5052 let compiled = QueryBuilder::nodes("Document")
5053 .vector_search("[0.0, 0.0, 0.0, 0.0]", 5)
5054 .compile()
5055 .expect("compile");
5056 let rows = coordinator
5057 .execute_compiled_read(&compiled)
5058 .expect("vector read");
5059 assert!(
5060 rows.nodes.iter().any(|row| row.logical_id == "doc-1"),
5061 "restored logical_id should become visible through regenerated vectors"
5062 );
5063 }
5064
5065 #[test]
5066 fn check_semantics_clean_db_returns_zeros() {
5067 let (_db, service) = setup();
5068 let report = service.check_semantics().expect("semantics check");
5069 assert_eq!(report.orphaned_chunks, 0);
5070 assert_eq!(report.null_source_ref_nodes, 0);
5071 assert_eq!(report.broken_step_fk, 0);
5072 assert_eq!(report.broken_action_fk, 0);
5073 assert_eq!(report.stale_fts_rows, 0);
5074 assert_eq!(report.fts_rows_for_superseded_nodes, 0);
5075 assert_eq!(report.dangling_edges, 0);
5076 assert_eq!(report.orphaned_supersession_chains, 0);
5077 assert_eq!(report.stale_vec_rows, 0);
5078 assert_eq!(report.vec_rows_for_superseded_nodes, 0);
5079 assert_eq!(report.missing_operational_current_rows, 0);
5080 assert_eq!(report.stale_operational_current_rows, 0);
5081 assert_eq!(report.disabled_collection_mutations, 0);
5082 assert!(report.warnings.is_empty());
5083 }
5084
5085 #[test]
5086 fn register_operational_collection_persists_and_emits_provenance() {
5087 let (db, service) = setup();
5088 let record = service
5089 .register_operational_collection(&OperationalRegisterRequest {
5090 name: "connector_health".to_owned(),
5091 kind: OperationalCollectionKind::LatestState,
5092 schema_json: "{}".to_owned(),
5093 retention_json: "{}".to_owned(),
5094 filter_fields_json: "[]".to_owned(),
5095 validation_json: String::new(),
5096 secondary_indexes_json: "[]".to_owned(),
5097 format_version: 1,
5098 })
5099 .expect("register collection");
5100
5101 assert_eq!(record.name, "connector_health");
5102 assert_eq!(record.kind, OperationalCollectionKind::LatestState);
5103 assert_eq!(record.schema_json, "{}");
5104 assert_eq!(record.retention_json, "{}");
5105 assert_eq!(record.filter_fields_json, "[]");
5106 assert!(record.created_at > 0);
5107 assert_eq!(record.disabled_at, None);
5108
5109 let described = service
5110 .describe_operational_collection("connector_health")
5111 .expect("describe collection")
5112 .expect("collection exists");
5113 assert_eq!(described, record);
5114
5115 let conn = sqlite::open_connection(db.path()).expect("conn");
5116 let provenance_count: i64 = conn
5117 .query_row(
5118 "SELECT count(*) FROM provenance_events \
5119 WHERE event_type = 'operational_collection_registered' AND subject = 'connector_health'",
5120 [],
5121 |row| row.get(0),
5122 )
5123 .expect("provenance count");
5124 assert_eq!(provenance_count, 1);
5125 }
5126
5127 #[test]
5128 fn register_and_update_operational_collection_validation_round_trip() {
5129 let (db, service) = setup();
5130 let record = service
5131 .register_operational_collection(&OperationalRegisterRequest {
5132 name: "connector_health".to_owned(),
5133 kind: OperationalCollectionKind::LatestState,
5134 schema_json: "{}".to_owned(),
5135 retention_json: "{}".to_owned(),
5136 filter_fields_json: "[]".to_owned(),
5137 validation_json: String::new(),
5138 secondary_indexes_json: "[]".to_owned(),
5139 format_version: 1,
5140 })
5141 .expect("register collection");
5142 assert_eq!(record.validation_json, "");
5143
5144 let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
5145 let updated = service
5146 .update_operational_collection_validation("connector_health", validation_json)
5147 .expect("update validation");
5148 assert_eq!(updated.validation_json, validation_json);
5149
5150 let described = service
5151 .describe_operational_collection("connector_health")
5152 .expect("describe collection")
5153 .expect("collection exists");
5154 assert_eq!(described.validation_json, validation_json);
5155
5156 let conn = sqlite::open_connection(db.path()).expect("conn");
5157 let provenance_count: i64 = conn
5158 .query_row(
5159 "SELECT count(*) FROM provenance_events \
5160 WHERE event_type = 'operational_collection_validation_updated' \
5161 AND subject = 'connector_health'",
5162 [],
5163 |row| row.get(0),
5164 )
5165 .expect("provenance count");
5166 assert_eq!(provenance_count, 1);
5167 }
5168
5169 #[test]
5170 fn register_update_and_rebuild_operational_secondary_indexes_round_trip() {
5171 let (db, service) = setup();
5172 let record = service
5173 .register_operational_collection(&OperationalRegisterRequest {
5174 name: "audit_log".to_owned(),
5175 kind: OperationalCollectionKind::AppendOnlyLog,
5176 schema_json: "{}".to_owned(),
5177 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5178 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
5179 validation_json: String::new(),
5180 secondary_indexes_json: "[]".to_owned(),
5181 format_version: 1,
5182 })
5183 .expect("register collection");
5184 assert_eq!(record.secondary_indexes_json, "[]");
5185
5186 {
5187 let writer = crate::WriterActor::start(
5188 db.path(),
5189 Arc::new(SchemaManager::new()),
5190 crate::ProvenanceMode::Warn,
5191 Arc::new(crate::TelemetryCounters::default()),
5192 )
5193 .expect("writer");
5194 writer
5195 .submit(crate::WriteRequest {
5196 label: "secondary-index-seed".to_owned(),
5197 nodes: vec![],
5198 node_retires: vec![],
5199 edges: vec![],
5200 edge_retires: vec![],
5201 chunks: vec![],
5202 runs: vec![],
5203 steps: vec![],
5204 actions: vec![],
5205 optional_backfills: vec![],
5206 vec_inserts: vec![],
5207 operational_writes: vec![
5208 crate::OperationalWrite::Append {
5209 collection: "audit_log".to_owned(),
5210 record_key: "evt-1".to_owned(),
5211 payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
5212 source_ref: Some("src-1".to_owned()),
5213 },
5214 crate::OperationalWrite::Append {
5215 collection: "audit_log".to_owned(),
5216 record_key: "evt-2".to_owned(),
5217 payload_json: r#"{"actor":"bob","ts":200}"#.to_owned(),
5218 source_ref: Some("src-2".to_owned()),
5219 },
5220 ],
5221 })
5222 .expect("seed writes");
5223 }
5224
5225 let secondary_indexes_json = r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#;
5226 let updated = service
5227 .update_operational_collection_secondary_indexes("audit_log", secondary_indexes_json)
5228 .expect("update secondary indexes");
5229 assert_eq!(updated.secondary_indexes_json, secondary_indexes_json);
5230
5231 let conn = sqlite::open_connection(db.path()).expect("conn");
5232 let entry_count: i64 = conn
5233 .query_row(
5234 "SELECT count(*) FROM operational_secondary_index_entries \
5235 WHERE collection_name = 'audit_log' AND index_name = 'actor_ts'",
5236 [],
5237 |row| row.get(0),
5238 )
5239 .expect("secondary index count");
5240 assert_eq!(entry_count, 2);
5241 conn.execute(
5242 "DELETE FROM operational_secondary_index_entries WHERE collection_name = 'audit_log'",
5243 [],
5244 )
5245 .expect("clear index entries");
5246 drop(conn);
5247
5248 let rebuild = service
5249 .rebuild_operational_secondary_indexes("audit_log")
5250 .expect("rebuild secondary indexes");
5251 assert_eq!(rebuild.collection_name, "audit_log");
5252 assert_eq!(rebuild.mutation_entries_rebuilt, 2);
5253 assert_eq!(rebuild.current_entries_rebuilt, 0);
5254 }
5255
5256 #[test]
5257 fn register_operational_collection_rejects_invalid_validation_contract() {
5258 let (_db, service) = setup();
5259
5260 let error = service
5261 .register_operational_collection(&OperationalRegisterRequest {
5262 name: "connector_health".to_owned(),
5263 kind: OperationalCollectionKind::LatestState,
5264 schema_json: "{}".to_owned(),
5265 retention_json: "{}".to_owned(),
5266 filter_fields_json: "[]".to_owned(),
5267 validation_json: r#"{"format_version":1,"mode":"enforce","fields":[{"name":"status","type":"string","minimum":0}]}"#
5268 .to_owned(),
5269 secondary_indexes_json: "[]".to_owned(),
5270 format_version: 1,
5271 })
5272 .expect_err("invalid validation contract should reject");
5273
5274 assert!(matches!(error, EngineError::InvalidWrite(_)));
5275 assert!(error.to_string().contains("minimum/maximum"));
5276 }
5277
5278 #[test]
5279 fn validate_operational_collection_history_reports_invalid_rows_without_mutation() {
5280 let (db, service) = setup();
5281 service
5282 .register_operational_collection(&OperationalRegisterRequest {
5283 name: "audit_log".to_owned(),
5284 kind: OperationalCollectionKind::AppendOnlyLog,
5285 schema_json: "{}".to_owned(),
5286 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5287 filter_fields_json: "[]".to_owned(),
5288 validation_json: r#"{"format_version":1,"mode":"disabled","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#
5289 .to_owned(),
5290 secondary_indexes_json: "[]".to_owned(),
5291 format_version: 1,
5292 })
5293 .expect("register collection");
5294 {
5295 let writer = crate::WriterActor::start(
5296 db.path(),
5297 Arc::new(SchemaManager::new()),
5298 crate::ProvenanceMode::Warn,
5299 Arc::new(crate::TelemetryCounters::default()),
5300 )
5301 .expect("writer");
5302 writer
5303 .submit(crate::WriteRequest {
5304 label: "history-validation".to_owned(),
5305 nodes: vec![],
5306 node_retires: vec![],
5307 edges: vec![],
5308 edge_retires: vec![],
5309 chunks: vec![],
5310 runs: vec![],
5311 steps: vec![],
5312 actions: vec![],
5313 optional_backfills: vec![],
5314 vec_inserts: vec![],
5315 operational_writes: vec![
5316 crate::OperationalWrite::Append {
5317 collection: "audit_log".to_owned(),
5318 record_key: "evt-1".to_owned(),
5319 payload_json: r#"{"status":"ok"}"#.to_owned(),
5320 source_ref: Some("src-1".to_owned()),
5321 },
5322 crate::OperationalWrite::Append {
5323 collection: "audit_log".to_owned(),
5324 record_key: "evt-2".to_owned(),
5325 payload_json: r#"{"status":"bogus"}"#.to_owned(),
5326 source_ref: Some("src-2".to_owned()),
5327 },
5328 ],
5329 })
5330 .expect("write");
5331 }
5332
5333 let report = service
5334 .validate_operational_collection_history("audit_log")
5335 .expect("validate history");
5336 assert_eq!(report.collection_name, "audit_log");
5337 assert_eq!(report.checked_rows, 2);
5338 assert_eq!(report.invalid_row_count, 1);
5339 assert_eq!(report.issues.len(), 1);
5340 assert_eq!(report.issues[0].record_key, "evt-2");
5341 assert!(report.issues[0].message.contains("must be one of"));
5342
5343 let trace = service
5344 .trace_operational_collection("audit_log", None)
5345 .expect("trace");
5346 assert_eq!(trace.mutation_count, 2);
5347
5348 let conn = sqlite::open_connection(db.path()).expect("conn");
5349 let provenance_count: i64 = conn
5350 .query_row(
5351 "SELECT count(*) FROM provenance_events \
5352 WHERE event_type = 'operational_collection_history_validated' \
5353 AND subject = 'audit_log'",
5354 [],
5355 |row| row.get(0),
5356 )
5357 .expect("provenance count");
5358 assert_eq!(provenance_count, 0);
5359 }
5360
5361 #[test]
5362 fn trace_operational_collection_returns_mutations_and_current_rows() {
5363 let (db, service) = setup();
5364 service
5365 .register_operational_collection(&OperationalRegisterRequest {
5366 name: "connector_health".to_owned(),
5367 kind: OperationalCollectionKind::LatestState,
5368 schema_json: "{}".to_owned(),
5369 retention_json: "{}".to_owned(),
5370 filter_fields_json: "[]".to_owned(),
5371 validation_json: String::new(),
5372 secondary_indexes_json: "[]".to_owned(),
5373 format_version: 1,
5374 })
5375 .expect("register collection");
5376 {
5377 let writer = crate::WriterActor::start(
5378 db.path(),
5379 Arc::new(SchemaManager::new()),
5380 crate::ProvenanceMode::Warn,
5381 Arc::new(crate::TelemetryCounters::default()),
5382 )
5383 .expect("writer");
5384 writer
5385 .submit(crate::WriteRequest {
5386 label: "operational".to_owned(),
5387 nodes: vec![],
5388 node_retires: vec![],
5389 edges: vec![],
5390 edge_retires: vec![],
5391 chunks: vec![],
5392 runs: vec![],
5393 steps: vec![],
5394 actions: vec![],
5395 optional_backfills: vec![],
5396 vec_inserts: vec![],
5397 operational_writes: vec![crate::OperationalWrite::Put {
5398 collection: "connector_health".to_owned(),
5399 record_key: "gmail".to_owned(),
5400 payload_json: r#"{"status":"ok"}"#.to_owned(),
5401 source_ref: Some("src-1".to_owned()),
5402 }],
5403 })
5404 .expect("write");
5405 }
5406
5407 let report = service
5408 .trace_operational_collection("connector_health", Some("gmail"))
5409 .expect("trace");
5410 assert_eq!(report.collection_name, "connector_health");
5411 assert_eq!(report.record_key.as_deref(), Some("gmail"));
5412 assert_eq!(report.mutation_count, 1);
5413 assert_eq!(report.current_count, 1);
5414 assert_eq!(report.mutations[0].op_kind, "put");
5415 assert_eq!(report.current_rows[0].payload_json, r#"{"status":"ok"}"#);
5416 }
5417
5418 #[test]
5419 fn trace_operational_collection_rejects_unknown_collection() {
5420 let (_db, service) = setup();
5421
5422 let error = service
5423 .trace_operational_collection("missing_collection", None)
5424 .expect_err("unknown collection should fail");
5425
5426 assert!(matches!(error, EngineError::InvalidWrite(_)));
5427 assert!(error.to_string().contains("is not registered"));
5428 }
5429
5430 #[test]
5431 fn rebuild_operational_current_repairs_missing_latest_state_rows() {
5432 let (db, service) = setup();
5433 service
5434 .register_operational_collection(&OperationalRegisterRequest {
5435 name: "connector_health".to_owned(),
5436 kind: OperationalCollectionKind::LatestState,
5437 schema_json: "{}".to_owned(),
5438 retention_json: "{}".to_owned(),
5439 filter_fields_json: "[]".to_owned(),
5440 validation_json: String::new(),
5441 secondary_indexes_json: "[]".to_owned(),
5442 format_version: 1,
5443 })
5444 .expect("register collection");
5445 {
5446 let writer = crate::WriterActor::start(
5447 db.path(),
5448 Arc::new(SchemaManager::new()),
5449 crate::ProvenanceMode::Warn,
5450 Arc::new(crate::TelemetryCounters::default()),
5451 )
5452 .expect("writer");
5453 writer
5454 .submit(crate::WriteRequest {
5455 label: "operational".to_owned(),
5456 nodes: vec![],
5457 node_retires: vec![],
5458 edges: vec![],
5459 edge_retires: vec![],
5460 chunks: vec![],
5461 runs: vec![],
5462 steps: vec![],
5463 actions: vec![],
5464 optional_backfills: vec![],
5465 vec_inserts: vec![],
5466 operational_writes: vec![crate::OperationalWrite::Put {
5467 collection: "connector_health".to_owned(),
5468 record_key: "gmail".to_owned(),
5469 payload_json: r#"{"status":"ok"}"#.to_owned(),
5470 source_ref: Some("src-1".to_owned()),
5471 }],
5472 })
5473 .expect("write");
5474 }
5475 {
5476 let conn = sqlite::open_connection(db.path()).expect("conn");
5477 conn.execute(
5478 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5479 [],
5480 )
5481 .expect("delete current row");
5482 }
5483
5484 let before = service.check_semantics().expect("semantics before rebuild");
5485 assert_eq!(before.missing_operational_current_rows, 1);
5486
5487 let repair = service
5488 .rebuild_operational_current(Some("connector_health"))
5489 .expect("rebuild current");
5490 assert_eq!(repair.collections_rebuilt, 1);
5491 assert_eq!(repair.current_rows_rebuilt, 1);
5492
5493 let after = service.check_semantics().expect("semantics after rebuild");
5494 assert_eq!(after.missing_operational_current_rows, 0);
5495
5496 let conn = sqlite::open_connection(db.path()).expect("conn");
5497 let payload: String = conn
5498 .query_row(
5499 "SELECT payload_json FROM operational_current \
5500 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5501 [],
5502 |row| row.get(0),
5503 )
5504 .expect("restored payload");
5505 assert_eq!(payload, r#"{"status":"ok"}"#);
5506 }
5507
5508 #[test]
5509 fn rebuild_operational_current_restores_latest_state_secondary_index_entries() {
5510 let (db, service) = setup();
5511 service
5512 .register_operational_collection(&OperationalRegisterRequest {
5513 name: "connector_health".to_owned(),
5514 kind: OperationalCollectionKind::LatestState,
5515 schema_json: "{}".to_owned(),
5516 retention_json: "{}".to_owned(),
5517 filter_fields_json: "[]".to_owned(),
5518 validation_json: String::new(),
5519 secondary_indexes_json: r#"[{"name":"status_current","kind":"latest_state_field","field":"status","value_type":"string"}]"#.to_owned(),
5520 format_version: 1,
5521 })
5522 .expect("register collection");
5523 {
5524 let writer = crate::WriterActor::start(
5525 db.path(),
5526 Arc::new(SchemaManager::new()),
5527 crate::ProvenanceMode::Warn,
5528 Arc::new(crate::TelemetryCounters::default()),
5529 )
5530 .expect("writer");
5531 writer
5532 .submit(crate::WriteRequest {
5533 label: "operational".to_owned(),
5534 nodes: vec![],
5535 node_retires: vec![],
5536 edges: vec![],
5537 edge_retires: vec![],
5538 chunks: vec![],
5539 runs: vec![],
5540 steps: vec![],
5541 actions: vec![],
5542 optional_backfills: vec![],
5543 vec_inserts: vec![],
5544 operational_writes: vec![crate::OperationalWrite::Put {
5545 collection: "connector_health".to_owned(),
5546 record_key: "gmail".to_owned(),
5547 payload_json: r#"{"status":"ok"}"#.to_owned(),
5548 source_ref: Some("src-1".to_owned()),
5549 }],
5550 })
5551 .expect("write");
5552 }
5553 {
5554 let conn = sqlite::open_connection(db.path()).expect("conn");
5555 let entry_count: i64 = conn
5556 .query_row(
5557 "SELECT count(*) FROM operational_secondary_index_entries \
5558 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
5559 [],
5560 |row| row.get(0),
5561 )
5562 .expect("secondary index count before repair");
5563 assert_eq!(entry_count, 1);
5564 conn.execute(
5565 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5566 [],
5567 )
5568 .expect("delete current row");
5569 }
5570
5571 service
5572 .rebuild_operational_current(Some("connector_health"))
5573 .expect("rebuild current");
5574
5575 let conn = sqlite::open_connection(db.path()).expect("conn");
5576 let entry_count: i64 = conn
5577 .query_row(
5578 "SELECT count(*) FROM operational_secondary_index_entries \
5579 WHERE collection_name = 'connector_health' AND subject_kind = 'current'",
5580 [],
5581 |row| row.get(0),
5582 )
5583 .expect("secondary index count after repair");
5584 assert_eq!(entry_count, 1);
5585 }
5586
5587 #[test]
5588 fn operational_current_semantics_and_rebuild_follow_mutation_order() {
5589 let (db, service) = setup();
5590 {
5591 let conn = sqlite::open_connection(db.path()).expect("conn");
5592 conn.execute(
5593 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5594 VALUES ('connector_health', 'latest_state', '{}', '{}', 1, 100)",
5595 [],
5596 )
5597 .expect("seed collection");
5598 conn.execute(
5599 "INSERT INTO operational_mutations \
5600 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5601 VALUES ('m3', 'connector_health', 'gmail', 'put', '{\"status\":\"old\"}', 'src-1', 100, 1)",
5602 [],
5603 )
5604 .expect("seed first put");
5605 conn.execute(
5606 "INSERT INTO operational_mutations \
5607 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5608 VALUES ('m2', 'connector_health', 'gmail', 'delete', '', 'src-2', 100, 2)",
5609 [],
5610 )
5611 .expect("seed delete");
5612 conn.execute(
5613 "INSERT INTO operational_mutations \
5614 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5615 VALUES ('m1', 'connector_health', 'gmail', 'put', '{\"status\":\"new\"}', 'src-3', 100, 3)",
5616 [],
5617 )
5618 .expect("seed final put");
5619 conn.execute(
5620 "INSERT INTO operational_current \
5621 (collection_name, record_key, payload_json, updated_at, last_mutation_id) \
5622 VALUES ('connector_health', 'gmail', '{\"status\":\"new\"}', 100, 'm1')",
5623 [],
5624 )
5625 .expect("seed current");
5626 }
5627
5628 let before = service.check_semantics().expect("semantics before rebuild");
5629 assert_eq!(before.missing_operational_current_rows, 0);
5630 assert_eq!(before.stale_operational_current_rows, 0);
5631
5632 {
5633 let conn = sqlite::open_connection(db.path()).expect("conn");
5634 conn.execute(
5635 "DELETE FROM operational_current WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5636 [],
5637 )
5638 .expect("delete current row");
5639 }
5640
5641 let missing = service.check_semantics().expect("semantics after delete");
5642 assert_eq!(missing.missing_operational_current_rows, 1);
5643 assert_eq!(missing.stale_operational_current_rows, 0);
5644
5645 service
5646 .rebuild_operational_current(Some("connector_health"))
5647 .expect("rebuild current");
5648
5649 let after = service.check_semantics().expect("semantics after rebuild");
5650 assert_eq!(after.missing_operational_current_rows, 0);
5651 assert_eq!(after.stale_operational_current_rows, 0);
5652
5653 let conn = sqlite::open_connection(db.path()).expect("conn");
5654 let payload: String = conn
5655 .query_row(
5656 "SELECT payload_json FROM operational_current \
5657 WHERE collection_name = 'connector_health' AND record_key = 'gmail'",
5658 [],
5659 |row| row.get(0),
5660 )
5661 .expect("restored payload");
5662 assert_eq!(payload, r#"{"status":"new"}"#);
5663 }
5664
5665 #[test]
5666 fn disable_operational_collection_sets_disabled_at_and_emits_provenance() {
5667 let (db, service) = setup();
5668 service
5669 .register_operational_collection(&OperationalRegisterRequest {
5670 name: "audit_log".to_owned(),
5671 kind: OperationalCollectionKind::AppendOnlyLog,
5672 schema_json: "{}".to_owned(),
5673 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
5674 filter_fields_json: "[]".to_owned(),
5675 validation_json: String::new(),
5676 secondary_indexes_json: "[]".to_owned(),
5677 format_version: 1,
5678 })
5679 .expect("register collection");
5680
5681 let record = service
5682 .disable_operational_collection("audit_log")
5683 .expect("disable collection");
5684 assert_eq!(record.name, "audit_log");
5685 assert!(record.disabled_at.is_some());
5686
5687 let disabled_at = record.disabled_at.expect("disabled_at");
5688 let described = service
5689 .describe_operational_collection("audit_log")
5690 .expect("describe collection")
5691 .expect("collection exists");
5692 assert_eq!(described.disabled_at, Some(disabled_at));
5693
5694 let writer = crate::WriterActor::start(
5695 db.path(),
5696 Arc::new(SchemaManager::new()),
5697 crate::ProvenanceMode::Warn,
5698 Arc::new(crate::TelemetryCounters::default()),
5699 )
5700 .expect("writer");
5701 let error = writer
5702 .submit(crate::WriteRequest {
5703 label: "disabled-operational".to_owned(),
5704 nodes: vec![],
5705 node_retires: vec![],
5706 edges: vec![],
5707 edge_retires: vec![],
5708 chunks: vec![],
5709 runs: vec![],
5710 steps: vec![],
5711 actions: vec![],
5712 optional_backfills: vec![],
5713 vec_inserts: vec![],
5714 operational_writes: vec![crate::OperationalWrite::Append {
5715 collection: "audit_log".to_owned(),
5716 record_key: "evt-1".to_owned(),
5717 payload_json: r#"{"type":"sync"}"#.to_owned(),
5718 source_ref: Some("src-1".to_owned()),
5719 }],
5720 })
5721 .expect_err("disabled collection should reject writes");
5722 assert!(matches!(error, EngineError::InvalidWrite(_)));
5723 assert!(error.to_string().contains("is disabled"));
5724
5725 let conn = sqlite::open_connection(db.path()).expect("conn");
5726 let provenance_count: i64 = conn
5727 .query_row(
5728 "SELECT count(*) FROM provenance_events \
5729 WHERE event_type = 'operational_collection_disabled' AND subject = 'audit_log'",
5730 [],
5731 |row| row.get(0),
5732 )
5733 .expect("provenance count");
5734 assert_eq!(provenance_count, 1);
5735 }
5736
5737 #[test]
5738 fn purge_operational_collection_deletes_append_only_rows_before_cutoff() {
5739 let (db, service) = setup();
5740 {
5741 let conn = sqlite::open_connection(db.path()).expect("conn");
5742 conn.execute(
5743 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5744 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_all\"}', 1, 100)",
5745 [],
5746 )
5747 .expect("seed collection");
5748 conn.execute(
5749 "INSERT INTO operational_mutations \
5750 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5751 VALUES ('evt-1', 'audit_log', 'evt-1', 'append', '{\"seq\":1}', 'src-1', 100, 1)",
5752 [],
5753 )
5754 .expect("seed event 1");
5755 conn.execute(
5756 "INSERT INTO operational_mutations \
5757 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5758 VALUES ('evt-2', 'audit_log', 'evt-2', 'append', '{\"seq\":2}', 'src-2', 200, 2)",
5759 [],
5760 )
5761 .expect("seed event 2");
5762 conn.execute(
5763 "INSERT INTO operational_mutations \
5764 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5765 VALUES ('evt-3', 'audit_log', 'evt-3', 'append', '{\"seq\":3}', 'src-3', 300, 3)",
5766 [],
5767 )
5768 .expect("seed event 3");
5769 }
5770
5771 let report = service
5772 .purge_operational_collection("audit_log", 250)
5773 .expect("purge collection");
5774 assert_eq!(report.collection_name, "audit_log");
5775 assert_eq!(report.deleted_mutations, 2);
5776 assert_eq!(report.before_timestamp, 250);
5777
5778 let conn = sqlite::open_connection(db.path()).expect("conn");
5779 let remaining: Vec<String> = {
5780 let mut stmt = conn
5781 .prepare(
5782 "SELECT id FROM operational_mutations \
5783 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
5784 )
5785 .expect("stmt");
5786 stmt.query_map([], |row| row.get(0))
5787 .expect("rows")
5788 .collect::<Result<_, _>>()
5789 .expect("collect")
5790 };
5791 assert_eq!(remaining, vec!["evt-3".to_owned()]);
5792 let provenance_count: i64 = conn
5793 .query_row(
5794 "SELECT count(*) FROM provenance_events \
5795 WHERE event_type = 'operational_collection_purged' AND subject = 'audit_log'",
5796 [],
5797 |row| row.get(0),
5798 )
5799 .expect("provenance count");
5800 assert_eq!(provenance_count, 1);
5801 }
5802
5803 #[test]
5804 fn compact_operational_collection_dry_run_reports_without_mutation() {
5805 let (db, service) = setup();
5806 {
5807 let conn = sqlite::open_connection(db.path()).expect("conn");
5808 conn.execute(
5809 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5810 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
5811 [],
5812 )
5813 .expect("seed collection");
5814 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
5815 conn.execute(
5816 "INSERT INTO operational_mutations \
5817 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5818 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
5819 rusqlite::params![
5820 format!("evt-{index}"),
5821 format!("{{\"seq\":{index}}}"),
5822 created_at,
5823 index,
5824 ],
5825 )
5826 .expect("seed event");
5827 }
5828 }
5829
5830 let report = service
5831 .compact_operational_collection("audit_log", true)
5832 .expect("compact collection");
5833 assert_eq!(report.collection_name, "audit_log");
5834 assert_eq!(report.deleted_mutations, 1);
5835 assert!(report.dry_run);
5836 assert_eq!(report.before_timestamp, None);
5837
5838 let conn = sqlite::open_connection(db.path()).expect("conn");
5839 let remaining_count: i64 = conn
5840 .query_row(
5841 "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
5842 [],
5843 |row| row.get(0),
5844 )
5845 .expect("remaining count");
5846 assert_eq!(remaining_count, 3);
5847 let provenance_count: i64 = conn
5848 .query_row(
5849 "SELECT count(*) FROM provenance_events \
5850 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
5851 [],
5852 |row| row.get(0),
5853 )
5854 .expect("provenance count");
5855 assert_eq!(provenance_count, 0);
5856 }
5857
5858 #[test]
5859 fn compact_operational_collection_keep_last_deletes_oldest_rows() {
5860 let (db, service) = setup();
5861 {
5862 let conn = sqlite::open_connection(db.path()).expect("conn");
5863 conn.execute(
5864 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5865 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
5866 [],
5867 )
5868 .expect("seed collection");
5869 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
5870 conn.execute(
5871 "INSERT INTO operational_mutations \
5872 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5873 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
5874 rusqlite::params![
5875 format!("evt-{index}"),
5876 format!("{{\"seq\":{index}}}"),
5877 created_at,
5878 index,
5879 ],
5880 )
5881 .expect("seed event");
5882 }
5883 }
5884
5885 let report = service
5886 .compact_operational_collection("audit_log", false)
5887 .expect("compact collection");
5888 assert_eq!(report.deleted_mutations, 1);
5889 assert!(!report.dry_run);
5890
5891 let conn = sqlite::open_connection(db.path()).expect("conn");
5892 let remaining: Vec<String> = {
5893 let mut stmt = conn
5894 .prepare(
5895 "SELECT id FROM operational_mutations \
5896 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
5897 )
5898 .expect("stmt");
5899 stmt.query_map([], |row| row.get(0))
5900 .expect("rows")
5901 .collect::<Result<_, _>>()
5902 .expect("collect")
5903 };
5904 assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
5905 let provenance_count: i64 = conn
5906 .query_row(
5907 "SELECT count(*) FROM provenance_events \
5908 WHERE event_type = 'operational_collection_compacted' AND subject = 'audit_log'",
5909 [],
5910 |row| row.get(0),
5911 )
5912 .expect("provenance count");
5913 assert_eq!(provenance_count, 1);
5914 }
5915
5916 #[test]
5917 fn plan_and_run_operational_retention_keep_last() {
5918 let (db, service) = setup();
5919 {
5920 let conn = sqlite::open_connection(db.path()).expect("conn");
5921 conn.execute(
5922 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
5923 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
5924 [],
5925 )
5926 .expect("seed collection");
5927 for (index, created_at) in [(1_i64, 100_i64), (2, 200), (3, 300)] {
5928 conn.execute(
5929 "INSERT INTO operational_mutations \
5930 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
5931 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
5932 rusqlite::params![
5933 format!("evt-{index}"),
5934 format!("{{\"seq\":{index}}}"),
5935 created_at,
5936 index,
5937 ],
5938 )
5939 .expect("seed event");
5940 }
5941 }
5942
5943 let plan = service
5944 .plan_operational_retention(1_000, None, Some(10))
5945 .expect("plan retention");
5946 assert_eq!(plan.collections_examined, 1);
5947 assert_eq!(plan.items[0].collection_name, "audit_log");
5948 assert_eq!(
5949 plan.items[0].action_kind,
5950 crate::operational::OperationalRetentionActionKind::KeepLast
5951 );
5952 assert_eq!(plan.items[0].candidate_deletions, 1);
5953 assert_eq!(plan.items[0].max_rows, Some(2));
5954 assert_eq!(plan.items[0].last_run_at, None);
5955
5956 let dry_run = service
5957 .run_operational_retention(1_000, None, Some(10), true)
5958 .expect("dry-run retention");
5959 assert!(dry_run.dry_run);
5960 assert_eq!(dry_run.collections_acted_on, 1);
5961 assert_eq!(dry_run.items[0].deleted_mutations, 1);
5962 assert_eq!(dry_run.items[0].rows_remaining, 2);
5963
5964 let conn = sqlite::open_connection(db.path()).expect("conn");
5965 let remaining_count: i64 = conn
5966 .query_row(
5967 "SELECT count(*) FROM operational_mutations WHERE collection_name = 'audit_log'",
5968 [],
5969 |row| row.get(0),
5970 )
5971 .expect("remaining count after dry run");
5972 assert_eq!(remaining_count, 3);
5973 let retention_run_count: i64 = conn
5974 .query_row(
5975 "SELECT count(*) FROM operational_retention_runs WHERE collection_name = 'audit_log'",
5976 [],
5977 |row| row.get(0),
5978 )
5979 .expect("retention run count");
5980 assert_eq!(retention_run_count, 0);
5981 drop(conn);
5982
5983 let executed = service
5984 .run_operational_retention(1_000, None, Some(10), false)
5985 .expect("execute retention");
5986 assert_eq!(executed.collections_acted_on, 1);
5987 assert_eq!(executed.items[0].deleted_mutations, 1);
5988 assert_eq!(executed.items[0].rows_remaining, 2);
5989
5990 let conn = sqlite::open_connection(db.path()).expect("conn");
5991 let remaining: Vec<String> = {
5992 let mut stmt = conn
5993 .prepare(
5994 "SELECT id FROM operational_mutations \
5995 WHERE collection_name = 'audit_log' ORDER BY mutation_order",
5996 )
5997 .expect("stmt");
5998 stmt.query_map([], |row| row.get(0))
5999 .expect("rows")
6000 .collect::<Result<_, _>>()
6001 .expect("collect")
6002 };
6003 assert_eq!(remaining, vec!["evt-2".to_owned(), "evt-3".to_owned()]);
6004 let last_run_at: i64 = conn
6005 .query_row(
6006 "SELECT executed_at FROM operational_retention_runs \
6007 WHERE collection_name = 'audit_log' ORDER BY executed_at DESC LIMIT 1",
6008 [],
6009 |row| row.get(0),
6010 )
6011 .expect("last run at");
6012 assert_eq!(last_run_at, 1_000);
6013 }
6014
6015 #[test]
6016 fn dry_run_operational_retention_does_not_mark_noop_collection_as_acted_on() {
6017 let (db, service) = setup();
6018 let conn = sqlite::open_connection(db.path()).expect("conn");
6019 conn.execute(
6020 "INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at) \
6021 VALUES ('audit_log', 'append_only_log', '{}', '{\"mode\":\"keep_last\",\"max_rows\":2}', 1, 100)",
6022 [],
6023 )
6024 .expect("seed collection");
6025 for (index, created_at) in [(1_i64, 100_i64), (2, 200)] {
6026 conn.execute(
6027 "INSERT INTO operational_mutations \
6028 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order) \
6029 VALUES (?1, 'audit_log', ?1, 'append', ?2, 'src', ?3, ?4)",
6030 rusqlite::params![
6031 format!("evt-{index}"),
6032 format!("{{\"seq\":{index}}}"),
6033 created_at,
6034 index,
6035 ],
6036 )
6037 .expect("seed event");
6038 }
6039 drop(conn);
6040
6041 let dry_run = service
6042 .run_operational_retention(1_000, None, Some(10), true)
6043 .expect("dry-run retention");
6044 assert!(dry_run.dry_run);
6045 assert_eq!(dry_run.collections_acted_on, 0);
6046 assert_eq!(dry_run.items[0].deleted_mutations, 0);
6047 assert_eq!(dry_run.items[0].rows_remaining, 2);
6048 }
6049
6050 #[test]
6051 fn compact_operational_collection_rejects_latest_state() {
6052 let (_db, service) = setup();
6053 service
6054 .register_operational_collection(&OperationalRegisterRequest {
6055 name: "connector_health".to_owned(),
6056 kind: OperationalCollectionKind::LatestState,
6057 schema_json: "{}".to_owned(),
6058 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6059 filter_fields_json: "[]".to_owned(),
6060 validation_json: String::new(),
6061 secondary_indexes_json: "[]".to_owned(),
6062 format_version: 1,
6063 })
6064 .expect("register collection");
6065
6066 let error = service
6067 .compact_operational_collection("connector_health", false)
6068 .expect_err("latest_state compaction should be rejected");
6069 assert!(matches!(error, EngineError::InvalidWrite(_)));
6070 assert!(error.to_string().contains("append_only_log"));
6071 }
6072
6073 #[test]
6074 fn register_operational_collection_persists_filter_fields_json() {
6075 let (_db, service) = setup();
6076
6077 let record = service
6078 .register_operational_collection(&OperationalRegisterRequest {
6079 name: "audit_log".to_owned(),
6080 kind: OperationalCollectionKind::AppendOnlyLog,
6081 schema_json: "{}".to_owned(),
6082 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6083 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6084 validation_json: String::new(),
6085 secondary_indexes_json: "[]".to_owned(),
6086 format_version: 1,
6087 })
6088 .expect("register collection");
6089
6090 assert_eq!(
6091 record.filter_fields_json,
6092 r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#
6093 );
6094 }
6095
6096 #[test]
6097 fn read_operational_collection_filters_append_only_rows_by_declared_fields() {
6098 let (db, service) = setup();
6099 service
6100 .register_operational_collection(&OperationalRegisterRequest {
6101 name: "audit_log".to_owned(),
6102 kind: OperationalCollectionKind::AppendOnlyLog,
6103 schema_json: "{}".to_owned(),
6104 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6105 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"seq","type":"integer","modes":["exact","range"]},{"name":"ts","type":"timestamp","modes":["exact","range"]}]"#.to_owned(),
6106 validation_json: String::new(),
6107 secondary_indexes_json: "[]".to_owned(),
6108 format_version: 1,
6109 })
6110 .expect("register collection");
6111 {
6112 let writer = crate::WriterActor::start(
6113 db.path(),
6114 Arc::new(SchemaManager::new()),
6115 crate::ProvenanceMode::Warn,
6116 Arc::new(crate::TelemetryCounters::default()),
6117 )
6118 .expect("writer");
6119 writer
6120 .submit(crate::WriteRequest {
6121 label: "operational".to_owned(),
6122 nodes: vec![],
6123 node_retires: vec![],
6124 edges: vec![],
6125 edge_retires: vec![],
6126 chunks: vec![],
6127 runs: vec![],
6128 steps: vec![],
6129 actions: vec![],
6130 optional_backfills: vec![],
6131 vec_inserts: vec![],
6132 operational_writes: vec![
6133 crate::OperationalWrite::Append {
6134 collection: "audit_log".to_owned(),
6135 record_key: "evt-1".to_owned(),
6136 payload_json: r#"{"actor":"alice","seq":1,"ts":100}"#.to_owned(),
6137 source_ref: Some("src-1".to_owned()),
6138 },
6139 crate::OperationalWrite::Append {
6140 collection: "audit_log".to_owned(),
6141 record_key: "evt-2".to_owned(),
6142 payload_json: r#"{"actor":"alice-admin","seq":2,"ts":200}"#.to_owned(),
6143 source_ref: Some("src-2".to_owned()),
6144 },
6145 crate::OperationalWrite::Append {
6146 collection: "audit_log".to_owned(),
6147 record_key: "evt-3".to_owned(),
6148 payload_json: r#"{"actor":"bob","seq":3,"ts":300}"#.to_owned(),
6149 source_ref: Some("src-3".to_owned()),
6150 },
6151 ],
6152 })
6153 .expect("write");
6154 }
6155
6156 let report = service
6157 .read_operational_collection(&crate::operational::OperationalReadRequest {
6158 collection_name: "audit_log".to_owned(),
6159 filters: vec![
6160 crate::operational::OperationalFilterClause::Prefix {
6161 field: "actor".to_owned(),
6162 value: "alice".to_owned(),
6163 },
6164 crate::operational::OperationalFilterClause::Range {
6165 field: "ts".to_owned(),
6166 lower: Some(150),
6167 upper: Some(250),
6168 },
6169 ],
6170 limit: Some(10),
6171 })
6172 .expect("filtered read");
6173
6174 assert_eq!(report.collection_name, "audit_log");
6175 assert_eq!(report.row_count, 1);
6176 assert!(!report.was_limited);
6177 assert_eq!(report.rows.len(), 1);
6178 assert_eq!(report.rows[0].record_key, "evt-2");
6179 assert_eq!(
6180 report.rows[0].payload_json,
6181 r#"{"actor":"alice-admin","seq":2,"ts":200}"#
6182 );
6183 }
6184
6185 #[test]
6186 fn read_operational_collection_uses_secondary_index_when_filter_values_are_missing() {
6187 let (db, service) = setup();
6188 service
6189 .register_operational_collection(&OperationalRegisterRequest {
6190 name: "audit_log".to_owned(),
6191 kind: OperationalCollectionKind::AppendOnlyLog,
6192 schema_json: "{}".to_owned(),
6193 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6194 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact","prefix"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#.to_owned(),
6195 validation_json: String::new(),
6196 secondary_indexes_json: r#"[{"name":"actor_ts","kind":"append_only_field_time","field":"actor","value_type":"string","time_field":"ts"}]"#.to_owned(),
6197 format_version: 1,
6198 })
6199 .expect("register collection");
6200 {
6201 let writer = crate::WriterActor::start(
6202 db.path(),
6203 Arc::new(SchemaManager::new()),
6204 crate::ProvenanceMode::Warn,
6205 Arc::new(crate::TelemetryCounters::default()),
6206 )
6207 .expect("writer");
6208 writer
6209 .submit(crate::WriteRequest {
6210 label: "operational".to_owned(),
6211 nodes: vec![],
6212 node_retires: vec![],
6213 edges: vec![],
6214 edge_retires: vec![],
6215 chunks: vec![],
6216 runs: vec![],
6217 steps: vec![],
6218 actions: vec![],
6219 optional_backfills: vec![],
6220 vec_inserts: vec![],
6221 operational_writes: vec![
6222 crate::OperationalWrite::Append {
6223 collection: "audit_log".to_owned(),
6224 record_key: "evt-1".to_owned(),
6225 payload_json: r#"{"actor":"alice","ts":100}"#.to_owned(),
6226 source_ref: Some("src-1".to_owned()),
6227 },
6228 crate::OperationalWrite::Append {
6229 collection: "audit_log".to_owned(),
6230 record_key: "evt-2".to_owned(),
6231 payload_json: r#"{"actor":"alice-admin","ts":200}"#.to_owned(),
6232 source_ref: Some("src-2".to_owned()),
6233 },
6234 ],
6235 })
6236 .expect("write");
6237 }
6238 let conn = sqlite::open_connection(db.path()).expect("conn");
6239 conn.execute(
6240 "DELETE FROM operational_filter_values WHERE collection_name = 'audit_log'",
6241 [],
6242 )
6243 .expect("clear filter values");
6244 drop(conn);
6245
6246 let report = service
6247 .read_operational_collection(&crate::operational::OperationalReadRequest {
6248 collection_name: "audit_log".to_owned(),
6249 filters: vec![
6250 crate::operational::OperationalFilterClause::Prefix {
6251 field: "actor".to_owned(),
6252 value: "alice".to_owned(),
6253 },
6254 crate::operational::OperationalFilterClause::Range {
6255 field: "ts".to_owned(),
6256 lower: Some(150),
6257 upper: Some(250),
6258 },
6259 ],
6260 limit: Some(10),
6261 })
6262 .expect("secondary-index read");
6263
6264 assert_eq!(report.row_count, 1);
6265 assert_eq!(report.rows[0].record_key, "evt-2");
6266 }
6267
6268 #[test]
6269 fn read_operational_collection_rejects_undeclared_fields_and_latest_state_collections() {
6270 let (_db, service) = setup();
6271 service
6272 .register_operational_collection(&OperationalRegisterRequest {
6273 name: "connector_health".to_owned(),
6274 kind: OperationalCollectionKind::LatestState,
6275 schema_json: "{}".to_owned(),
6276 retention_json: "{}".to_owned(),
6277 filter_fields_json: r#"[{"name":"status","type":"string","modes":["exact"]}]"#
6278 .to_owned(),
6279 validation_json: String::new(),
6280 secondary_indexes_json: "[]".to_owned(),
6281 format_version: 1,
6282 })
6283 .expect("register collection");
6284
6285 let latest_state_error = service
6286 .read_operational_collection(&crate::operational::OperationalReadRequest {
6287 collection_name: "connector_health".to_owned(),
6288 filters: vec![crate::operational::OperationalFilterClause::Exact {
6289 field: "status".to_owned(),
6290 value: crate::operational::OperationalFilterValue::String("ok".to_owned()),
6291 }],
6292 limit: Some(10),
6293 })
6294 .expect_err("latest_state filtered reads should be rejected");
6295 assert!(latest_state_error.to_string().contains("append_only_log"));
6296
6297 service
6298 .register_operational_collection(&OperationalRegisterRequest {
6299 name: "audit_log".to_owned(),
6300 kind: OperationalCollectionKind::AppendOnlyLog,
6301 schema_json: "{}".to_owned(),
6302 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6303 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["exact"]}]"#
6304 .to_owned(),
6305 validation_json: String::new(),
6306 secondary_indexes_json: "[]".to_owned(),
6307 format_version: 1,
6308 })
6309 .expect("register append-only collection");
6310
6311 let undeclared_error = service
6312 .read_operational_collection(&crate::operational::OperationalReadRequest {
6313 collection_name: "audit_log".to_owned(),
6314 filters: vec![crate::operational::OperationalFilterClause::Exact {
6315 field: "missing".to_owned(),
6316 value: crate::operational::OperationalFilterValue::String("x".to_owned()),
6317 }],
6318 limit: Some(10),
6319 })
6320 .expect_err("undeclared field should be rejected");
6321 assert!(undeclared_error.to_string().contains("undeclared"));
6322 }
6323
6324 #[test]
6325 fn read_operational_collection_applies_limit_and_reports_truncation() {
6326 let (db, service) = setup();
6327 service
6328 .register_operational_collection(&OperationalRegisterRequest {
6329 name: "audit_log".to_owned(),
6330 kind: OperationalCollectionKind::AppendOnlyLog,
6331 schema_json: "{}".to_owned(),
6332 retention_json: r#"{"mode":"keep_all"}"#.to_owned(),
6333 filter_fields_json: r#"[{"name":"actor","type":"string","modes":["prefix"]}]"#
6334 .to_owned(),
6335 validation_json: String::new(),
6336 secondary_indexes_json: "[]".to_owned(),
6337 format_version: 1,
6338 })
6339 .expect("register collection");
6340 {
6341 let writer = crate::WriterActor::start(
6342 db.path(),
6343 Arc::new(SchemaManager::new()),
6344 crate::ProvenanceMode::Warn,
6345 Arc::new(crate::TelemetryCounters::default()),
6346 )
6347 .expect("writer");
6348 writer
6349 .submit(crate::WriteRequest {
6350 label: "operational".to_owned(),
6351 nodes: vec![],
6352 node_retires: vec![],
6353 edges: vec![],
6354 edge_retires: vec![],
6355 chunks: vec![],
6356 runs: vec![],
6357 steps: vec![],
6358 actions: vec![],
6359 optional_backfills: vec![],
6360 vec_inserts: vec![],
6361 operational_writes: vec![
6362 crate::OperationalWrite::Append {
6363 collection: "audit_log".to_owned(),
6364 record_key: "evt-1".to_owned(),
6365 payload_json: r#"{"actor":"alice-1"}"#.to_owned(),
6366 source_ref: Some("src-1".to_owned()),
6367 },
6368 crate::OperationalWrite::Append {
6369 collection: "audit_log".to_owned(),
6370 record_key: "evt-2".to_owned(),
6371 payload_json: r#"{"actor":"alice-2"}"#.to_owned(),
6372 source_ref: Some("src-2".to_owned()),
6373 },
6374 ],
6375 })
6376 .expect("write");
6377 }
6378
6379 let report = service
6380 .read_operational_collection(&crate::operational::OperationalReadRequest {
6381 collection_name: "audit_log".to_owned(),
6382 filters: vec![crate::operational::OperationalFilterClause::Prefix {
6383 field: "actor".to_owned(),
6384 value: "alice".to_owned(),
6385 }],
6386 limit: Some(1),
6387 })
6388 .expect("limited read");
6389
6390 assert_eq!(report.row_count, 1);
6391 assert_eq!(report.applied_limit, 1);
6392 assert!(report.was_limited);
6393 assert_eq!(report.rows[0].record_key, "evt-2");
6394 }
6395
6396 #[test]
6397 fn preexisting_operational_collection_can_gain_filter_contract_after_upgrade() {
6398 let db = NamedTempFile::new().expect("temp db");
6399 let conn = sqlite::open_connection(db.path()).expect("conn");
6400 conn.execute_batch(
6401 r#"
6402 CREATE TABLE operational_collections (
6403 name TEXT PRIMARY KEY,
6404 kind TEXT NOT NULL,
6405 schema_json TEXT NOT NULL,
6406 retention_json TEXT NOT NULL,
6407 format_version INTEGER NOT NULL DEFAULT 1,
6408 created_at INTEGER NOT NULL DEFAULT 100,
6409 disabled_at INTEGER
6410 );
6411 CREATE TABLE operational_mutations (
6412 id TEXT PRIMARY KEY,
6413 collection_name TEXT NOT NULL,
6414 record_key TEXT NOT NULL,
6415 op_kind TEXT NOT NULL,
6416 payload_json TEXT NOT NULL,
6417 source_ref TEXT,
6418 created_at INTEGER NOT NULL DEFAULT 100,
6419 mutation_order INTEGER NOT NULL DEFAULT 1
6420 );
6421 INSERT INTO operational_collections (name, kind, schema_json, retention_json, format_version, created_at)
6422 VALUES ('audit_log', 'append_only_log', '{}', '{"mode":"keep_all"}', 1, 100);
6423 INSERT INTO operational_mutations
6424 (id, collection_name, record_key, op_kind, payload_json, source_ref, created_at, mutation_order)
6425 VALUES
6426 ('evt-1', 'audit_log', 'evt-1', 'append', '{"actor":"alice","ts":0}', 'src-1', 100, 1);
6427 "#,
6428 )
6429 .expect("seed pre-v10 schema");
6430 drop(conn);
6431
6432 let service = AdminService::new(db.path(), Arc::new(SchemaManager::new()));
6433 let pre_update = service
6434 .read_operational_collection(&crate::operational::OperationalReadRequest {
6435 collection_name: "audit_log".to_owned(),
6436 filters: vec![crate::operational::OperationalFilterClause::Exact {
6437 field: "actor".to_owned(),
6438 value: crate::operational::OperationalFilterValue::String("alice".to_owned()),
6439 }],
6440 limit: Some(10),
6441 })
6442 .expect_err("read should reject undeclared fields before migration update");
6443 assert!(pre_update.to_string().contains("undeclared"));
6444
6445 let updated = service
6446 .update_operational_collection_filters(
6447 "audit_log",
6448 r#"[{"name":"actor","type":"string","modes":["exact"]},{"name":"ts","type":"timestamp","modes":["range"]}]"#,
6449 )
6450 .expect("update filter contract");
6451 assert!(updated.filter_fields_json.contains("\"actor\""));
6452
6453 let report = service
6454 .read_operational_collection(&crate::operational::OperationalReadRequest {
6455 collection_name: "audit_log".to_owned(),
6456 filters: vec![crate::operational::OperationalFilterClause::Range {
6457 field: "ts".to_owned(),
6458 lower: Some(0),
6459 upper: Some(0),
6460 }],
6461 limit: Some(10),
6462 })
6463 .expect("read after explicit filter update");
6464 assert_eq!(report.row_count, 1);
6465 assert_eq!(report.rows[0].record_key, "evt-1");
6466 }
6467
6468 #[cfg(feature = "sqlite-vec")]
6469 #[test]
6470 fn check_semantics_detects_stale_vec_rows() {
6471 use crate::sqlite::open_connection_with_vec;
6472
6473 let db = NamedTempFile::new().expect("temp file");
6474 let schema = Arc::new(SchemaManager::new());
6475 {
6476 let conn = open_connection_with_vec(db.path()).expect("vec conn");
6477 schema.bootstrap(&conn).expect("bootstrap");
6478 schema
6479 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 3)
6480 .expect("vec profile");
6481 let bytes: Vec<u8> = [0.1f32, 0.2f32, 0.3f32]
6483 .iter()
6484 .flat_map(|f| f.to_le_bytes())
6485 .collect();
6486 conn.execute(
6487 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ghost-chunk', ?1)",
6488 rusqlite::params![bytes],
6489 )
6490 .expect("insert stale vec row");
6491 }
6492 let service = AdminService::new(db.path(), Arc::clone(&schema));
6493 let report = service.check_semantics().expect("semantics check");
6494 assert_eq!(report.stale_vec_rows, 1);
6495 assert!(
6496 report.warnings.iter().any(|w| w.contains("stale vec")),
6497 "warning must mention stale vec"
6498 );
6499 }
6500
6501 #[cfg(feature = "sqlite-vec")]
6502 #[test]
6503 fn restore_vector_profiles_recreates_vec_table_from_metadata() {
6504 let db = NamedTempFile::new().expect("temp file");
6505 let schema = Arc::new(SchemaManager::new());
6506 {
6507 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6508 schema.bootstrap(&conn).expect("bootstrap");
6509 conn.execute(
6510 "INSERT INTO vector_profiles (profile, table_name, dimension, enabled) \
6511 VALUES ('default', 'vec_nodes_active', 3, 1)",
6512 [],
6513 )
6514 .expect("insert vector profile");
6515 }
6516
6517 let service = AdminService::new(db.path(), Arc::clone(&schema));
6518 let report = service
6519 .restore_vector_profiles()
6520 .expect("restore vector profiles");
6521 assert_eq!(
6522 report.targets,
6523 vec![crate::projection::ProjectionTarget::Vec]
6524 );
6525 assert_eq!(report.rebuilt_rows, 1);
6526
6527 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6528 let count: i64 = conn
6529 .query_row(
6530 "SELECT count(*) FROM sqlite_schema WHERE name = 'vec_nodes_active'",
6531 [],
6532 |row| row.get(0),
6533 )
6534 .expect("vec schema count");
6535 assert_eq!(count, 1, "vec table should exist after restore");
6536 }
6537
6538 #[cfg(feature = "sqlite-vec")]
6539 #[test]
6540 fn load_vector_regeneration_config_supports_json_and_toml() {
6541 let dir = tempfile::tempdir().expect("temp dir");
6542 let json_path = dir.path().join("regen.json");
6543 let toml_path = dir.path().join("regen.toml");
6544
6545 let config = VectorRegenerationConfig {
6546 profile: "default".to_owned(),
6547 table_name: "vec_nodes_active".to_owned(),
6548 model_identity: "model-a".to_owned(),
6549 model_version: "1.0".to_owned(),
6550 dimension: 4,
6551 normalization_policy: "l2".to_owned(),
6552 chunking_policy: "per_chunk".to_owned(),
6553 preprocessing_policy: "trim".to_owned(),
6554 generator_command: vec!["/bin/echo".to_owned()],
6555 };
6556
6557 fs::write(&json_path, serde_json::to_string(&config).expect("json")).expect("write json");
6558 fs::write(&toml_path, toml::to_string(&config).expect("toml")).expect("write toml");
6559
6560 let parsed_json = load_vector_regeneration_config(&json_path).expect("json parse");
6561 let parsed_toml = load_vector_regeneration_config(&toml_path).expect("toml parse");
6562
6563 assert_eq!(parsed_json, config);
6564 assert_eq!(parsed_toml, config);
6565 }
6566
6567 #[cfg(all(not(feature = "sqlite-vec"), unix))]
6568 #[test]
6569 fn regenerate_vector_embeddings_unsupported_vec_capability_writes_request_and_failed_audit() {
6570 let db = NamedTempFile::new().expect("temp file");
6571 let schema = Arc::new(SchemaManager::new());
6572 let temp_dir = tempfile::tempdir().expect("temp dir");
6573 let script_path = temp_dir.path().join("vector-generator-no-vec.sh");
6574
6575 fs::write(
6576 &script_path,
6577 r#"#!/usr/bin/env bash
6578set -euo pipefail
6579python3 -c 'import json, sys
6580payload = json.load(sys.stdin)
6581embeddings = [{"chunk_id": chunk["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]} for chunk in payload["chunks"]]
6582json.dump({"embeddings": embeddings}, sys.stdout)'
6583"#,
6584 )
6585 .expect("write generator script");
6586 set_file_mode(&script_path, 0o755);
6587
6588 {
6589 let conn = sqlite::open_connection(db.path()).expect("connection");
6590 schema.bootstrap(&conn).expect("bootstrap");
6591 conn.execute(
6592 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6593 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
6594 [],
6595 )
6596 .expect("insert node");
6597 conn.execute(
6598 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6599 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
6600 [],
6601 )
6602 .expect("insert chunk");
6603 }
6604
6605 let service = AdminService::new(db.path(), Arc::clone(&schema));
6606 let error = service
6607 .regenerate_vector_embeddings(&VectorRegenerationConfig {
6608 profile: "default".to_owned(),
6609 table_name: "vec_nodes_active".to_owned(),
6610 model_identity: "test-model".to_owned(),
6611 model_version: "1.0.0".to_owned(),
6612 dimension: 4,
6613 normalization_policy: "l2".to_owned(),
6614 chunking_policy: "per_chunk".to_owned(),
6615 preprocessing_policy: "trim".to_owned(),
6616 generator_command: vec![script_path.to_string_lossy().to_string()],
6617 })
6618 .expect_err("sqlite-vec capability should be required");
6619
6620 assert!(error.to_string().contains("unsupported vec capability"));
6621
6622 let conn = sqlite::open_connection(db.path()).expect("connection");
6623 let request_count: i64 = conn
6624 .query_row(
6625 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
6626 [],
6627 |row| row.get(0),
6628 )
6629 .expect("request count");
6630 assert_eq!(request_count, 1);
6631 let failed_count: i64 = conn
6632 .query_row(
6633 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
6634 [],
6635 |row| row.get(0),
6636 )
6637 .expect("failed count");
6638 assert_eq!(failed_count, 1);
6639 let metadata_json: String = conn
6640 .query_row(
6641 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
6642 [],
6643 |row| row.get(0),
6644 )
6645 .expect("failed metadata");
6646 assert!(metadata_json.contains("\"failure_class\":\"unsupported vec capability\""));
6647 }
6648
6649 #[cfg(feature = "sqlite-vec")]
6650 #[test]
6651 fn regenerate_vector_embeddings_rebuilds_embeddings_from_generator() {
6652 let db = NamedTempFile::new().expect("temp file");
6653 let schema = Arc::new(SchemaManager::new());
6654 let temp_dir = tempfile::tempdir().expect("temp dir");
6655 let script_path = temp_dir.path().join("vector-generator.sh");
6656
6657 fs::write(
6658 &script_path,
6659 r#"#!/usr/bin/env bash
6660set -euo pipefail
6661python3 -c 'import json, sys
6662payload = json.load(sys.stdin)
6663embeddings = []
6664for chunk in payload["chunks"]:
6665 text = chunk["text_content"].lower()
6666 if "budget" in text:
6667 embedding = [1.0, 0.0, 0.0, 0.0]
6668 else:
6669 embedding = [0.0, 1.0, 0.0, 0.0]
6670 embeddings.append({"chunk_id": chunk["chunk_id"], "embedding": embedding})
6671json.dump({"embeddings": embeddings}, sys.stdout)'
6672"#,
6673 )
6674 .expect("write generator script");
6675 set_file_mode(&script_path, 0o755);
6676
6677 {
6678 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6679 schema.bootstrap(&conn).expect("bootstrap");
6680 conn.execute(
6681 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6682 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
6683 [],
6684 )
6685 .expect("insert node");
6686 conn.execute(
6687 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6688 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
6689 [],
6690 )
6691 .expect("insert chunk 1");
6692 conn.execute(
6693 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6694 VALUES ('chunk-2', 'doc-1', 'travel plan', 101)",
6695 [],
6696 )
6697 .expect("insert chunk 2");
6698 }
6699
6700 let service = AdminService::new(db.path(), Arc::clone(&schema));
6701 let report = service
6702 .regenerate_vector_embeddings(&VectorRegenerationConfig {
6703 profile: "default".to_owned(),
6704 table_name: "vec_nodes_active".to_owned(),
6705 model_identity: "test-model".to_owned(),
6706 model_version: "1.0.0".to_owned(),
6707 dimension: 4,
6708 normalization_policy: "l2".to_owned(),
6709 chunking_policy: "per_chunk".to_owned(),
6710 preprocessing_policy: "trim".to_owned(),
6711 generator_command: vec![script_path.to_string_lossy().to_string()],
6712 })
6713 .expect("regenerate vectors");
6714
6715 assert_eq!(report.profile, "default");
6716 assert_eq!(report.table_name, "vec_nodes_active");
6717 assert_eq!(report.dimension, 4);
6718 assert_eq!(report.total_chunks, 2);
6719 assert_eq!(report.regenerated_rows, 2);
6720 assert!(report.contract_persisted);
6721
6722 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6723 let vec_count: i64 = conn
6724 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
6725 row.get(0)
6726 })
6727 .expect("vec count");
6728 assert_eq!(vec_count, 2);
6729
6730 let contract_count: i64 = conn
6731 .query_row(
6732 "SELECT count(*) FROM vector_embedding_contracts WHERE profile = 'default'",
6733 [],
6734 |row| row.get(0),
6735 )
6736 .expect("contract count");
6737 assert_eq!(contract_count, 1);
6738 let applied_at: i64 = conn
6739 .query_row(
6740 "SELECT applied_at FROM vector_embedding_contracts WHERE profile = 'default'",
6741 [],
6742 |row| row.get(0),
6743 )
6744 .expect("applied_at");
6745 assert!(applied_at > 0);
6746 let snapshot_hash: String = conn
6747 .query_row(
6748 "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
6749 [],
6750 |row| row.get(0),
6751 )
6752 .expect("snapshot_hash");
6753 assert!(!snapshot_hash.is_empty());
6754 let contract_format_version: i64 = conn
6755 .query_row(
6756 "SELECT contract_format_version FROM vector_embedding_contracts WHERE profile = 'default'",
6757 [],
6758 |row| row.get(0),
6759 )
6760 .expect("contract_format_version");
6761 assert_eq!(contract_format_version, 1);
6762 let request_count: i64 = conn
6763 .query_row(
6764 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_requested' AND subject = 'default'",
6765 [],
6766 |row| row.get(0),
6767 )
6768 .expect("request audit count");
6769 assert_eq!(request_count, 1);
6770 let apply_count: i64 = conn
6771 .query_row(
6772 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
6773 [],
6774 |row| row.get(0),
6775 )
6776 .expect("apply audit count");
6777 assert_eq!(apply_count, 1);
6778 let apply_metadata: String = conn
6779 .query_row(
6780 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_apply' AND subject = 'default'",
6781 [],
6782 |row| row.get(0),
6783 )
6784 .expect("apply metadata");
6785 assert!(apply_metadata.contains("\"profile\":\"default\""));
6786 assert!(apply_metadata.contains("\"snapshot_hash\":"));
6787 }
6788
6789 #[cfg(feature = "sqlite-vec")]
6790 #[test]
6791 fn regenerate_vector_embeddings_failure_leaves_contract_and_vec_rows_unchanged() {
6792 let db = NamedTempFile::new().expect("temp file");
6793 let schema = Arc::new(SchemaManager::new());
6794 let temp_dir = tempfile::tempdir().expect("temp dir");
6795 let script_path = temp_dir.path().join("vector-generator-fail.sh");
6796
6797 fs::write(
6798 &script_path,
6799 "#!/usr/bin/env bash\nset -euo pipefail\necho 'generator boom' >&2\nexit 17\n",
6800 )
6801 .expect("write failing script");
6802 set_file_mode(&script_path, 0o755);
6803
6804 {
6805 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6806 schema.bootstrap(&conn).expect("bootstrap");
6807 conn.execute(
6808 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6809 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
6810 [],
6811 )
6812 .expect("insert node");
6813 conn.execute(
6814 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6815 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
6816 [],
6817 )
6818 .expect("insert chunk");
6819 schema
6820 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
6821 .expect("ensure vec profile");
6822 conn.execute(
6823 r"
6824 INSERT INTO vector_embedding_contracts (
6825 profile,
6826 table_name,
6827 model_identity,
6828 model_version,
6829 dimension,
6830 normalization_policy,
6831 chunking_policy,
6832 preprocessing_policy,
6833 generator_command_json,
6834 applied_at,
6835 snapshot_hash
6836 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
6837 ",
6838 rusqlite::params![
6839 "default",
6840 "vec_nodes_active",
6841 "old-model",
6842 "0.9.0",
6843 4,
6844 "l2",
6845 "per_chunk",
6846 "trim",
6847 "[\"/bin/echo\"]",
6848 111,
6849 "old-snapshot"
6850 ],
6851 )
6852 .expect("seed contract");
6853 conn.execute(
6854 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
6855 [],
6856 )
6857 .expect("seed vec row");
6858 }
6859
6860 let service = AdminService::new(db.path(), Arc::clone(&schema));
6861 let error = service
6862 .regenerate_vector_embeddings_with_policy(
6863 &VectorRegenerationConfig {
6864 profile: "default".to_owned(),
6865 table_name: "vec_nodes_active".to_owned(),
6866 model_identity: "new-model".to_owned(),
6867 model_version: "1.0.0".to_owned(),
6868 dimension: 4,
6869 normalization_policy: "l2".to_owned(),
6870 chunking_policy: "per_chunk".to_owned(),
6871 preprocessing_policy: "trim".to_owned(),
6872 generator_command: vec![script_path.to_string_lossy().to_string()],
6873 },
6874 &VectorGeneratorPolicy::default(),
6875 )
6876 .expect_err("generator should fail");
6877
6878 assert!(error.to_string().contains("generator nonzero exit"));
6879
6880 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6881 let model_identity: String = conn
6882 .query_row(
6883 "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
6884 [],
6885 |row| row.get(0),
6886 )
6887 .expect("model identity");
6888 assert_eq!(model_identity, "old-model");
6889 let snapshot_hash: String = conn
6890 .query_row(
6891 "SELECT snapshot_hash FROM vector_embedding_contracts WHERE profile = 'default'",
6892 [],
6893 |row| row.get(0),
6894 )
6895 .expect("snapshot hash");
6896 assert_eq!(snapshot_hash, "old-snapshot");
6897 let vec_count: i64 = conn
6898 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
6899 row.get(0)
6900 })
6901 .expect("vec count");
6902 assert_eq!(vec_count, 1);
6903 let failure_count: i64 = conn
6904 .query_row(
6905 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
6906 [],
6907 |row| row.get(0),
6908 )
6909 .expect("failure count");
6910 assert_eq!(failure_count, 1);
6911 let failure_metadata: String = conn
6912 .query_row(
6913 "SELECT metadata_json FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
6914 [],
6915 |row| row.get(0),
6916 )
6917 .expect("failure metadata");
6918 assert!(failure_metadata.contains("\"failure_class\":\"generator nonzero exit\""));
6919 }
6920
6921 #[cfg(feature = "sqlite-vec")]
6922 #[test]
6923 fn regenerate_vector_embeddings_snapshot_drift_is_retryable_and_non_mutating() {
6924 let db = NamedTempFile::new().expect("temp file");
6925 let schema = Arc::new(SchemaManager::new());
6926 let temp_dir = tempfile::tempdir().expect("temp dir");
6927 let script_path = temp_dir.path().join("vector-generator-drift.sh");
6928 let db_path = db.path().to_string_lossy().to_string();
6929
6930 fs::write(
6931 &script_path,
6932 format!(
6933 r#"#!/usr/bin/env bash
6934set -euo pipefail
6935python3 -c 'import json, sqlite3, sys
6936payload = json.load(sys.stdin)
6937conn = sqlite3.connect({db_path:?})
6938conn.execute("INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES (?, ?, ?, ?)", ("chunk-2", "doc-1", "late arriving text", 101))
6939conn.commit()
6940conn.close()
6941embeddings = [{{"chunk_id": chunk["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]}} for chunk in payload["chunks"]]
6942json.dump({{"embeddings": embeddings}}, sys.stdout)'
6943"#,
6944 ),
6945 )
6946 .expect("write drift script");
6947 set_file_mode(&script_path, 0o755);
6948
6949 {
6950 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6951 schema.bootstrap(&conn).expect("bootstrap");
6952 conn.execute(
6953 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
6954 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
6955 [],
6956 )
6957 .expect("insert node");
6958 conn.execute(
6959 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
6960 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
6961 [],
6962 )
6963 .expect("insert chunk");
6964 schema
6965 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
6966 .expect("ensure vec profile");
6967 }
6968
6969 let service = AdminService::new(db.path(), Arc::clone(&schema));
6970 let error = service
6971 .regenerate_vector_embeddings_with_policy(
6972 &VectorRegenerationConfig {
6973 profile: "default".to_owned(),
6974 table_name: "vec_nodes_active".to_owned(),
6975 model_identity: "test-model".to_owned(),
6976 model_version: "1.0.0".to_owned(),
6977 dimension: 4,
6978 normalization_policy: "l2".to_owned(),
6979 chunking_policy: "per_chunk".to_owned(),
6980 preprocessing_policy: "trim".to_owned(),
6981 generator_command: vec![script_path.to_string_lossy().to_string()],
6982 },
6983 &VectorGeneratorPolicy::default(),
6984 )
6985 .expect_err("snapshot drift should fail");
6986
6987 assert!(
6988 error
6989 .to_string()
6990 .contains("vector regeneration snapshot drift:")
6991 );
6992 assert!(error.to_string().contains("[retryable]"));
6993
6994 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
6995 let contract_count: i64 = conn
6996 .query_row(
6997 "SELECT count(*) FROM vector_embedding_contracts",
6998 [],
6999 |row| row.get(0),
7000 )
7001 .expect("contract count");
7002 assert_eq!(contract_count, 0);
7003 let vec_count: i64 = conn
7004 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7005 row.get(0)
7006 })
7007 .expect("vec count");
7008 assert_eq!(vec_count, 0);
7009 let failure_count: i64 = conn
7010 .query_row(
7011 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7012 [],
7013 |row| row.get(0),
7014 )
7015 .expect("failure count");
7016 assert_eq!(failure_count, 1);
7017 }
7018
7019 #[cfg(feature = "sqlite-vec")]
7020 #[test]
7021 fn regenerate_vector_embeddings_times_out_and_kills_generator() {
7022 let (_db, service) = setup();
7023 let temp_dir = tempfile::tempdir().expect("temp dir");
7024 let script_path = temp_dir.path().join("vector-generator-timeout.sh");
7025
7026 fs::write(
7027 &script_path,
7028 "#!/usr/bin/env bash\nset -euo pipefail\nsleep 1\nprintf '{\"embeddings\":[]}'\n",
7029 )
7030 .expect("write timeout script");
7031 set_file_mode(&script_path, 0o755);
7032
7033 let error = service
7034 .regenerate_vector_embeddings_with_policy(
7035 &VectorRegenerationConfig {
7036 profile: "default".to_owned(),
7037 table_name: "vec_nodes_active".to_owned(),
7038 model_identity: "model".to_owned(),
7039 model_version: "1.0.0".to_owned(),
7040 dimension: 4,
7041 normalization_policy: "l2".to_owned(),
7042 chunking_policy: "per_chunk".to_owned(),
7043 preprocessing_policy: "trim".to_owned(),
7044 generator_command: vec![script_path.to_string_lossy().to_string()],
7045 },
7046 &VectorGeneratorPolicy {
7047 timeout_ms: 50,
7048 max_stdout_bytes: 1024,
7049 max_stderr_bytes: 1024,
7050 max_input_bytes: 1024,
7051 max_chunks: 10,
7052 require_absolute_executable: true,
7053 reject_world_writable_executable: true,
7054 allowed_executable_roots: vec![],
7055 preserve_env_vars: vec![],
7056 },
7057 )
7058 .expect_err("generator should time out");
7059 assert!(error.to_string().contains("generator timeout"));
7060 }
7061
7062 #[cfg(feature = "sqlite-vec")]
7063 #[test]
7064 fn regenerate_vector_embeddings_rejects_oversized_stdout() {
7065 let (_db, service) = setup();
7066 let temp_dir = tempfile::tempdir().expect("temp dir");
7067 let script_path = temp_dir.path().join("vector-generator-stdout.sh");
7068
7069 fs::write(
7070 &script_path,
7071 "#!/usr/bin/env bash\nset -euo pipefail\npython3 -c 'import sys; sys.stdout.write(\"x\" * 5000)'\n",
7072 )
7073 .expect("write stdout script");
7074 set_file_mode(&script_path, 0o755);
7075
7076 let error = service
7077 .regenerate_vector_embeddings_with_policy(
7078 &VectorRegenerationConfig {
7079 profile: "default".to_owned(),
7080 table_name: "vec_nodes_active".to_owned(),
7081 model_identity: "model".to_owned(),
7082 model_version: "1.0.0".to_owned(),
7083 dimension: 4,
7084 normalization_policy: "l2".to_owned(),
7085 chunking_policy: "per_chunk".to_owned(),
7086 preprocessing_policy: "trim".to_owned(),
7087 generator_command: vec![script_path.to_string_lossy().to_string()],
7088 },
7089 &VectorGeneratorPolicy {
7090 timeout_ms: 1000,
7091 max_stdout_bytes: 128,
7092 max_stderr_bytes: 1024,
7093 max_input_bytes: 1024,
7094 max_chunks: 10,
7095 require_absolute_executable: true,
7096 reject_world_writable_executable: true,
7097 allowed_executable_roots: vec![],
7098 preserve_env_vars: vec![],
7099 },
7100 )
7101 .expect_err("generator stdout should overflow");
7102 assert!(error.to_string().contains("stdout overflow"));
7103 }
7104
7105 #[cfg(feature = "sqlite-vec")]
7106 #[test]
7107 fn regenerate_vector_embeddings_rejects_oversized_stderr() {
7108 let (_db, service) = setup();
7109 let temp_dir = tempfile::tempdir().expect("temp dir");
7110 let script_path = temp_dir.path().join("vector-generator-stderr.sh");
7111
7112 fs::write(
7113 &script_path,
7114 "#!/usr/bin/env bash\nset -euo pipefail\npython3 -c 'import sys; sys.stderr.write(\"e\" * 5000); sys.exit(7)'\n",
7115 )
7116 .expect("write stderr script");
7117 set_file_mode(&script_path, 0o755);
7118
7119 let error = service
7120 .regenerate_vector_embeddings_with_policy(
7121 &VectorRegenerationConfig {
7122 profile: "default".to_owned(),
7123 table_name: "vec_nodes_active".to_owned(),
7124 model_identity: "model".to_owned(),
7125 model_version: "1.0.0".to_owned(),
7126 dimension: 4,
7127 normalization_policy: "l2".to_owned(),
7128 chunking_policy: "per_chunk".to_owned(),
7129 preprocessing_policy: "trim".to_owned(),
7130 generator_command: vec![script_path.to_string_lossy().to_string()],
7131 },
7132 &VectorGeneratorPolicy {
7133 timeout_ms: 1000,
7134 max_stdout_bytes: 1024,
7135 max_stderr_bytes: 128,
7136 max_input_bytes: 1024,
7137 max_chunks: 10,
7138 require_absolute_executable: true,
7139 reject_world_writable_executable: true,
7140 allowed_executable_roots: vec![],
7141 preserve_env_vars: vec![],
7142 },
7143 )
7144 .expect_err("generator stderr should overflow");
7145 assert!(error.to_string().contains("stderr overflow"));
7146 }
7147
7148 #[cfg(feature = "sqlite-vec")]
7149 #[test]
7150 fn regenerate_vector_embeddings_rejects_oversized_input_before_spawn() {
7151 let db = NamedTempFile::new().expect("temp file");
7152 let schema = Arc::new(SchemaManager::new());
7153 {
7154 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7155 schema.bootstrap(&conn).expect("bootstrap");
7156 conn.execute(
7157 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7158 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7159 [],
7160 )
7161 .expect("insert node");
7162 conn.execute(
7163 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7164 VALUES ('chunk-1', 'doc-1', 'this chunk is intentionally long to exceed the configured input limit', 100)",
7165 [],
7166 )
7167 .expect("insert chunk");
7168 }
7169
7170 let service = AdminService::new(db.path(), Arc::clone(&schema));
7171 let error = service
7172 .regenerate_vector_embeddings_with_policy(
7173 &VectorRegenerationConfig {
7174 profile: "default".to_owned(),
7175 table_name: "vec_nodes_active".to_owned(),
7176 model_identity: "model".to_owned(),
7177 model_version: "1.0.0".to_owned(),
7178 dimension: 4,
7179 normalization_policy: "l2".to_owned(),
7180 chunking_policy: "per_chunk".to_owned(),
7181 preprocessing_policy: "trim".to_owned(),
7182 generator_command: vec!["/bin/echo".to_owned()],
7183 },
7184 &VectorGeneratorPolicy {
7185 timeout_ms: 1000,
7186 max_stdout_bytes: 1024,
7187 max_stderr_bytes: 1024,
7188 max_input_bytes: 32,
7189 max_chunks: 10,
7190 require_absolute_executable: true,
7191 reject_world_writable_executable: true,
7192 allowed_executable_roots: vec![],
7193 preserve_env_vars: vec![],
7194 },
7195 )
7196 .expect_err("input size should be rejected before spawn");
7197 assert!(error.to_string().contains("payload too large"));
7198 }
7199
7200 #[cfg(feature = "sqlite-vec")]
7201 #[test]
7202 fn regenerate_vector_embeddings_rejects_excessive_chunk_count_before_spawn() {
7203 let db = NamedTempFile::new().expect("temp file");
7204 let schema = Arc::new(SchemaManager::new());
7205 {
7206 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7207 schema.bootstrap(&conn).expect("bootstrap");
7208 conn.execute(
7209 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7210 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7211 [],
7212 )
7213 .expect("insert node");
7214 conn.execute(
7215 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES ('chunk-1', 'doc-1', 'a', 100)",
7216 [],
7217 )
7218 .expect("insert chunk 1");
7219 conn.execute(
7220 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) VALUES ('chunk-2', 'doc-1', 'b', 101)",
7221 [],
7222 )
7223 .expect("insert chunk 2");
7224 }
7225
7226 let service = AdminService::new(db.path(), Arc::clone(&schema));
7227 let error = service
7228 .regenerate_vector_embeddings_with_policy(
7229 &VectorRegenerationConfig {
7230 profile: "default".to_owned(),
7231 table_name: "vec_nodes_active".to_owned(),
7232 model_identity: "model".to_owned(),
7233 model_version: "1.0.0".to_owned(),
7234 dimension: 4,
7235 normalization_policy: "l2".to_owned(),
7236 chunking_policy: "per_chunk".to_owned(),
7237 preprocessing_policy: "trim".to_owned(),
7238 generator_command: vec!["/bin/echo".to_owned()],
7239 },
7240 &VectorGeneratorPolicy {
7241 timeout_ms: 1000,
7242 max_stdout_bytes: 1024,
7243 max_stderr_bytes: 1024,
7244 max_input_bytes: 2048,
7245 max_chunks: 1,
7246 require_absolute_executable: true,
7247 reject_world_writable_executable: true,
7248 allowed_executable_roots: vec![],
7249 preserve_env_vars: vec![],
7250 },
7251 )
7252 .expect_err("chunk count should be rejected before spawn");
7253 assert!(error.to_string().contains("payload too large"));
7254 }
7255
7256 #[cfg(feature = "sqlite-vec")]
7257 #[test]
7258 fn regenerate_vector_embeddings_malformed_json_leaves_contract_and_vec_rows_unchanged() {
7259 let db = NamedTempFile::new().expect("temp file");
7260 let schema = Arc::new(SchemaManager::new());
7261 let temp_dir = tempfile::tempdir().expect("temp dir");
7262 let script_path = temp_dir.path().join("vector-generator-bad-json.sh");
7263
7264 fs::write(
7265 &script_path,
7266 "#!/usr/bin/env bash\nset -euo pipefail\nprintf 'not-json'\n",
7267 )
7268 .expect("write bad json script");
7269 set_file_mode(&script_path, 0o755);
7270
7271 {
7272 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7273 schema.bootstrap(&conn).expect("bootstrap");
7274 conn.execute(
7275 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7276 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7277 [],
7278 )
7279 .expect("insert node");
7280 conn.execute(
7281 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7282 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7283 [],
7284 )
7285 .expect("insert chunk");
7286 schema
7287 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
7288 .expect("ensure vec profile");
7289 conn.execute(
7290 r"
7291 INSERT INTO vector_embedding_contracts (
7292 profile,
7293 table_name,
7294 model_identity,
7295 model_version,
7296 dimension,
7297 normalization_policy,
7298 chunking_policy,
7299 preprocessing_policy,
7300 generator_command_json,
7301 applied_at,
7302 snapshot_hash
7303 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11)
7304 ",
7305 rusqlite::params![
7306 "default",
7307 "vec_nodes_active",
7308 "old-model",
7309 "0.9.0",
7310 4,
7311 "l2",
7312 "per_chunk",
7313 "trim",
7314 "[\"/bin/echo\"]",
7315 111,
7316 "old-snapshot"
7317 ],
7318 )
7319 .expect("seed contract");
7320 conn.execute(
7321 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('chunk-1', zeroblob(16))",
7322 [],
7323 )
7324 .expect("seed vec row");
7325 }
7326
7327 let service = AdminService::new(db.path(), Arc::clone(&schema));
7328 let error = service
7329 .regenerate_vector_embeddings_with_policy(
7330 &VectorRegenerationConfig {
7331 profile: "default".to_owned(),
7332 table_name: "vec_nodes_active".to_owned(),
7333 model_identity: "new-model".to_owned(),
7334 model_version: "1.0.0".to_owned(),
7335 dimension: 4,
7336 normalization_policy: "l2".to_owned(),
7337 chunking_policy: "per_chunk".to_owned(),
7338 preprocessing_policy: "trim".to_owned(),
7339 generator_command: vec![script_path.to_string_lossy().to_string()],
7340 },
7341 &VectorGeneratorPolicy::default(),
7342 )
7343 .expect_err("bad json should fail");
7344
7345 assert!(error.to_string().contains("decode generator output"));
7346
7347 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7348 let model_identity: String = conn
7349 .query_row(
7350 "SELECT model_identity FROM vector_embedding_contracts WHERE profile = 'default'",
7351 [],
7352 |row| row.get(0),
7353 )
7354 .expect("model identity");
7355 assert_eq!(model_identity, "old-model");
7356 let vec_count: i64 = conn
7357 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
7358 row.get(0)
7359 })
7360 .expect("vec count");
7361 assert_eq!(vec_count, 1);
7362 let failure_count: i64 = conn
7363 .query_row(
7364 "SELECT count(*) FROM provenance_events WHERE event_type = 'vector_regeneration_failed' AND subject = 'default'",
7365 [],
7366 |row| row.get(0),
7367 )
7368 .expect("failure count");
7369 assert_eq!(failure_count, 1);
7370 }
7371
7372 #[cfg(feature = "sqlite-vec")]
7373 #[test]
7374 fn regenerate_vector_embeddings_rejects_whitespace_only_profile_before_mutation() {
7375 let db = NamedTempFile::new().expect("temp file");
7376 let schema = Arc::new(SchemaManager::new());
7377 {
7378 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7379 schema.bootstrap(&conn).expect("bootstrap");
7380 conn.execute(
7381 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7382 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7383 [],
7384 )
7385 .expect("insert node");
7386 conn.execute(
7387 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7388 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7389 [],
7390 )
7391 .expect("insert chunk");
7392 }
7393
7394 let service = AdminService::new(db.path(), Arc::clone(&schema));
7395 let error = service
7396 .regenerate_vector_embeddings(&VectorRegenerationConfig {
7397 profile: " ".to_owned(),
7398 table_name: "vec_nodes_active".to_owned(),
7399 model_identity: "test-model".to_owned(),
7400 model_version: "1.0.0".to_owned(),
7401 dimension: 4,
7402 normalization_policy: "l2".to_owned(),
7403 chunking_policy: "per_chunk".to_owned(),
7404 preprocessing_policy: "trim".to_owned(),
7405 generator_command: vec!["/bin/echo".to_owned()],
7406 })
7407 .expect_err("whitespace profile should be rejected");
7408
7409 assert!(error.to_string().contains("invalid contract"));
7410 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7411 let contract_count: i64 = conn
7412 .query_row(
7413 "SELECT count(*) FROM vector_embedding_contracts",
7414 [],
7415 |row| row.get(0),
7416 )
7417 .expect("contract count");
7418 assert_eq!(contract_count, 0);
7419 let provenance_count: i64 = conn
7420 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
7421 row.get(0)
7422 })
7423 .expect("provenance count");
7424 assert_eq!(provenance_count, 0);
7425 }
7426
7427 #[cfg(feature = "sqlite-vec")]
7428 #[test]
7429 fn regenerate_vector_embeddings_rejects_world_writable_executable_when_policy_requires_it() {
7430 let (_db, service) = setup();
7431 let temp_dir = tempfile::tempdir().expect("temp dir");
7432 let script_path = temp_dir.path().join("vector-generator-world-writable.sh");
7433
7434 fs::write(
7435 &script_path,
7436 "#!/usr/bin/env bash\nset -euo pipefail\nprintf '{\"embeddings\":[]}'\n",
7437 )
7438 .expect("write script");
7439 set_file_mode(&script_path, 0o777);
7440
7441 let error = service
7442 .regenerate_vector_embeddings_with_policy(
7443 &VectorRegenerationConfig {
7444 profile: "default".to_owned(),
7445 table_name: "vec_nodes_active".to_owned(),
7446 model_identity: "model".to_owned(),
7447 model_version: "1.0.0".to_owned(),
7448 dimension: 4,
7449 normalization_policy: "l2".to_owned(),
7450 chunking_policy: "per_chunk".to_owned(),
7451 preprocessing_policy: "trim".to_owned(),
7452 generator_command: vec![script_path.to_string_lossy().to_string()],
7453 },
7454 &VectorGeneratorPolicy::default(),
7455 )
7456 .expect_err("world-writable executable should be rejected");
7457
7458 assert!(error.to_string().contains("world-writable executable"));
7459 }
7460
7461 #[cfg(feature = "sqlite-vec")]
7462 #[test]
7463 fn regenerate_vector_embeddings_rejects_executable_outside_allowlisted_roots() {
7464 let (_db, service) = setup();
7465 let temp_dir = tempfile::tempdir().expect("temp dir");
7466 let allowed_dir = tempfile::tempdir().expect("allowed dir");
7467 let script_path = temp_dir.path().join("vector-generator-outside-root.sh");
7468
7469 fs::write(
7470 &script_path,
7471 "#!/usr/bin/env bash\nset -euo pipefail\nprintf '{\"embeddings\":[]}'\n",
7472 )
7473 .expect("write script");
7474 set_file_mode(&script_path, 0o755);
7475
7476 let error = service
7477 .regenerate_vector_embeddings_with_policy(
7478 &VectorRegenerationConfig {
7479 profile: "default".to_owned(),
7480 table_name: "vec_nodes_active".to_owned(),
7481 model_identity: "model".to_owned(),
7482 model_version: "1.0.0".to_owned(),
7483 dimension: 4,
7484 normalization_policy: "l2".to_owned(),
7485 chunking_policy: "per_chunk".to_owned(),
7486 preprocessing_policy: "trim".to_owned(),
7487 generator_command: vec![script_path.to_string_lossy().to_string()],
7488 },
7489 &VectorGeneratorPolicy {
7490 timeout_ms: 1000,
7491 max_stdout_bytes: 1024,
7492 max_stderr_bytes: 1024,
7493 max_input_bytes: 1024,
7494 max_chunks: 10,
7495 require_absolute_executable: true,
7496 reject_world_writable_executable: true,
7497 allowed_executable_roots: vec![
7498 allowed_dir.path().to_string_lossy().to_string(),
7499 ],
7500 preserve_env_vars: vec![],
7501 },
7502 )
7503 .expect_err("disallowed root should be rejected");
7504
7505 assert!(
7506 error
7507 .to_string()
7508 .contains("outside allowed executable roots")
7509 );
7510 }
7511
7512 #[cfg(feature = "sqlite-vec")]
7513 #[test]
7514 fn regenerate_vector_embeddings_rejects_future_contract_format_version() {
7515 let db = NamedTempFile::new().expect("temp file");
7516 let schema = Arc::new(SchemaManager::new());
7517 {
7518 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7519 schema.bootstrap(&conn).expect("bootstrap");
7520 conn.execute(
7521 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7522 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7523 [],
7524 )
7525 .expect("insert node");
7526 conn.execute(
7527 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7528 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7529 [],
7530 )
7531 .expect("insert chunk");
7532 conn.execute(
7533 r"
7534 INSERT INTO vector_embedding_contracts (
7535 profile,
7536 table_name,
7537 model_identity,
7538 model_version,
7539 dimension,
7540 normalization_policy,
7541 chunking_policy,
7542 preprocessing_policy,
7543 generator_command_json,
7544 applied_at,
7545 snapshot_hash,
7546 contract_format_version,
7547 updated_at
7548 ) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9, ?10, ?11, ?12, ?13)
7549 ",
7550 rusqlite::params![
7551 "default",
7552 "vec_nodes_active",
7553 "old-model",
7554 "0.9.0",
7555 4,
7556 "l2",
7557 "per_chunk",
7558 "trim",
7559 "[\"/bin/echo\"]",
7560 111,
7561 "old-snapshot",
7562 99,
7563 111,
7564 ],
7565 )
7566 .expect("seed future contract");
7567 }
7568
7569 let service = AdminService::new(db.path(), Arc::clone(&schema));
7570 let error = service
7571 .regenerate_vector_embeddings(&VectorRegenerationConfig {
7572 profile: "default".to_owned(),
7573 table_name: "vec_nodes_active".to_owned(),
7574 model_identity: "test-model".to_owned(),
7575 model_version: "1.0.0".to_owned(),
7576 dimension: 4,
7577 normalization_policy: "l2".to_owned(),
7578 chunking_policy: "per_chunk".to_owned(),
7579 preprocessing_policy: "trim".to_owned(),
7580 generator_command: vec!["/bin/echo".to_owned()],
7581 })
7582 .expect_err("future contract version should be rejected");
7583
7584 assert!(error.to_string().contains("unsupported"));
7585 assert!(error.to_string().contains("format version"));
7586 }
7587
7588 #[cfg(feature = "sqlite-vec")]
7589 #[test]
7590 fn regenerate_vector_embeddings_clears_environment_except_preserved_vars() {
7591 let db = NamedTempFile::new().expect("temp file");
7592 let schema = Arc::new(SchemaManager::new());
7593 let temp_dir = tempfile::tempdir().expect("temp dir");
7594 let script_path = temp_dir.path().join("vector-generator-env.sh");
7595 {
7596 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
7597 schema.bootstrap(&conn).expect("bootstrap");
7598 conn.execute(
7599 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7600 VALUES ('row-1', 'doc-1', 'Document', '{}', 100, 'source-1')",
7601 [],
7602 )
7603 .expect("insert node");
7604 conn.execute(
7605 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7606 VALUES ('chunk-1', 'doc-1', 'budget discussion', 100)",
7607 [],
7608 )
7609 .expect("insert chunk");
7610 }
7611
7612 fs::write(
7613 &script_path,
7614 r#"#!/usr/bin/env bash
7615set -euo pipefail
7616if [[ "${VECTOR_TEST_SECRET:-}" != "expected" ]]; then
7617 echo "missing secret" >&2
7618 exit 9
7619fi
7620python3 -c 'import json, sys
7621payload = json.load(sys.stdin)
7622json.dump({"embeddings": [{"chunk_id": payload["chunks"][0]["chunk_id"], "embedding": [1.0, 0.0, 0.0, 0.0]}]}, sys.stdout)'
7623"#,
7624 )
7625 .expect("write script");
7626 set_file_mode(&script_path, 0o755);
7627
7628 let service = AdminService::new(db.path(), Arc::clone(&schema));
7629 unsafe {
7630 std::env::set_var("VECTOR_TEST_SECRET", "expected");
7631 }
7632 let missing_env = service
7633 .regenerate_vector_embeddings_with_policy(
7634 &VectorRegenerationConfig {
7635 profile: "default".to_owned(),
7636 table_name: "vec_nodes_active".to_owned(),
7637 model_identity: "model".to_owned(),
7638 model_version: "1.0.0".to_owned(),
7639 dimension: 4,
7640 normalization_policy: "l2".to_owned(),
7641 chunking_policy: "per_chunk".to_owned(),
7642 preprocessing_policy: "trim".to_owned(),
7643 generator_command: vec![script_path.to_string_lossy().to_string()],
7644 },
7645 &VectorGeneratorPolicy::default(),
7646 )
7647 .expect_err("non-preserved env var should be dropped");
7648 assert!(missing_env.to_string().contains("nonzero exit"));
7649
7650 let report = service
7651 .regenerate_vector_embeddings_with_policy(
7652 &VectorRegenerationConfig {
7653 profile: "default".to_owned(),
7654 table_name: "vec_nodes_active".to_owned(),
7655 model_identity: "model".to_owned(),
7656 model_version: "1.0.0".to_owned(),
7657 dimension: 4,
7658 normalization_policy: "l2".to_owned(),
7659 chunking_policy: "per_chunk".to_owned(),
7660 preprocessing_policy: "trim".to_owned(),
7661 generator_command: vec![script_path.to_string_lossy().to_string()],
7662 },
7663 &VectorGeneratorPolicy {
7664 timeout_ms: 1000,
7665 max_stdout_bytes: 1024,
7666 max_stderr_bytes: 1024,
7667 max_input_bytes: 4096,
7668 max_chunks: 10,
7669 require_absolute_executable: true,
7670 reject_world_writable_executable: true,
7671 allowed_executable_roots: vec![],
7672 preserve_env_vars: vec!["VECTOR_TEST_SECRET".to_owned()],
7673 },
7674 )
7675 .expect("preserved env var should allow success");
7676 assert_eq!(report.regenerated_rows, 1);
7677 unsafe {
7678 std::env::remove_var("VECTOR_TEST_SECRET");
7679 }
7680 }
7681
7682 #[test]
7683 fn check_semantics_detects_orphaned_chunk() {
7684 let (db, service) = setup();
7685 {
7686 let conn = sqlite::open_connection(db.path()).expect("conn");
7688 conn.execute(
7689 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
7690 VALUES ('c1', 'ghost-node', 'text', 100)",
7691 [],
7692 )
7693 .expect("insert orphaned chunk");
7694 }
7695 let report = service.check_semantics().expect("semantics check");
7696 assert_eq!(report.orphaned_chunks, 1);
7697 }
7698
7699 #[test]
7700 fn check_semantics_detects_null_source_ref() {
7701 let (db, service) = setup();
7702 {
7703 let conn = sqlite::open_connection(db.path()).expect("conn");
7704 conn.execute(
7705 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at) \
7706 VALUES ('r1', 'lg1', 'Meeting', '{}', 100)",
7707 [],
7708 )
7709 .expect("insert node with null source_ref");
7710 }
7711 let report = service.check_semantics().expect("semantics check");
7712 assert_eq!(report.null_source_ref_nodes, 1);
7713 }
7714
7715 #[test]
7716 fn check_semantics_detects_broken_step_fk() {
7717 let (db, service) = setup();
7718 {
7719 let conn = sqlite::open_connection(db.path()).expect("conn");
7722 conn.execute_batch("PRAGMA foreign_keys = OFF;")
7723 .expect("disable FK");
7724 conn.execute(
7725 "INSERT INTO steps (id, run_id, kind, status, properties, created_at) \
7726 VALUES ('s1', 'ghost-run', 'llm', 'completed', '{}', 100)",
7727 [],
7728 )
7729 .expect("insert step with ghost run_id");
7730 }
7731 let report = service.check_semantics().expect("semantics check");
7732 assert_eq!(report.broken_step_fk, 1);
7733 }
7734
7735 #[test]
7736 fn check_semantics_detects_broken_action_fk() {
7737 let (db, service) = setup();
7738 {
7739 let conn = sqlite::open_connection(db.path()).expect("conn");
7740 conn.execute_batch("PRAGMA foreign_keys = OFF;")
7741 .expect("disable FK");
7742 conn.execute(
7743 "INSERT INTO actions (id, step_id, kind, status, properties, created_at) \
7744 VALUES ('a1', 'ghost-step', 'emit', 'completed', '{}', 100)",
7745 [],
7746 )
7747 .expect("insert action with ghost step_id");
7748 }
7749 let report = service.check_semantics().expect("semantics check");
7750 assert_eq!(report.broken_action_fk, 1);
7751 }
7752
7753 #[test]
7754 fn check_semantics_detects_stale_fts_rows() {
7755 let (db, service) = setup();
7756 {
7757 let conn = sqlite::open_connection(db.path()).expect("conn");
7758 conn.execute(
7761 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
7762 VALUES ('ghost-chunk', 'any-node', 'Meeting', 'stale content')",
7763 [],
7764 )
7765 .expect("insert stale FTS row");
7766 }
7767 let report = service.check_semantics().expect("semantics check");
7768 assert_eq!(report.stale_fts_rows, 1);
7769 }
7770
7771 #[test]
7772 fn check_semantics_detects_fts_rows_for_superseded_nodes() {
7773 let (db, service) = setup();
7774 {
7775 let conn = sqlite::open_connection(db.path()).expect("conn");
7776 conn.execute(
7778 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7779 VALUES ('r1', 'lg-sup', 'Meeting', '{}', 100, 200, 'src-1')",
7780 [],
7781 )
7782 .expect("insert superseded node");
7783 conn.execute(
7785 "INSERT INTO fts_nodes (chunk_id, node_logical_id, kind, text_content) \
7786 VALUES ('ck-x', 'lg-sup', 'Meeting', 'superseded content')",
7787 [],
7788 )
7789 .expect("insert FTS row for superseded node");
7790 }
7791 let report = service.check_semantics().expect("semantics check");
7792 assert_eq!(report.fts_rows_for_superseded_nodes, 1);
7793 }
7794
7795 #[test]
7796 fn check_semantics_detects_dangling_edges() {
7797 let (db, service) = setup();
7798 {
7799 let conn = sqlite::open_connection(db.path()).expect("conn");
7800 conn.execute_batch("PRAGMA foreign_keys = OFF;")
7801 .expect("disable FK");
7802 conn.execute(
7804 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7805 VALUES ('r1', 'lg-src', 'Meeting', '{}', 100, 'src-1')",
7806 [],
7807 )
7808 .expect("insert source node");
7809 conn.execute(
7810 "INSERT INTO edges \
7811 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
7812 VALUES ('e1', 'edge-1', 'lg-src', 'ghost-target', 'LINKS', '{}', 100, 'src-1')",
7813 [],
7814 )
7815 .expect("insert dangling edge");
7816 }
7817 let report = service.check_semantics().expect("semantics check");
7818 assert_eq!(report.dangling_edges, 1);
7819 }
7820
7821 #[test]
7822 fn check_semantics_detects_orphaned_supersession_chains() {
7823 let (db, service) = setup();
7824 {
7825 let conn = sqlite::open_connection(db.path()).expect("conn");
7826 conn.execute(
7828 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7829 VALUES ('r1', 'lg-orphaned', 'Meeting', '{}', 100, 200, 'src-1')",
7830 [],
7831 )
7832 .expect("insert fully superseded node");
7833 }
7834 let report = service.check_semantics().expect("semantics check");
7835 assert_eq!(report.orphaned_supersession_chains, 1);
7836 }
7837
7838 #[test]
7839 fn safe_export_writes_manifest_with_sha256() {
7840 let (_db, service) = setup();
7841 let export_dir = tempfile::TempDir::new().expect("temp dir");
7842 let export_path = export_dir.path().join("backup.db");
7843
7844 let manifest = service
7845 .safe_export(
7846 &export_path,
7847 SafeExportOptions {
7848 force_checkpoint: false,
7849 },
7850 )
7851 .expect("export");
7852
7853 assert!(export_path.exists(), "exported db should exist");
7854 let manifest_path = export_dir.path().join("backup.db.export-manifest.json");
7855 assert!(
7856 manifest_path.exists(),
7857 "manifest file should exist at {}",
7858 manifest_path.display()
7859 );
7860 assert_eq!(manifest.sha256.len(), 64, "sha256 should be 64 hex chars");
7861 assert!(
7862 manifest.exported_at > 0,
7863 "exported_at should be a unix timestamp"
7864 );
7865 assert_eq!(
7866 manifest.schema_version,
7867 SchemaManager::new().current_version().0,
7868 "schema_version should match the live schema version"
7869 );
7870 assert_eq!(manifest.protocol_version, 1, "protocol_version should be 1");
7871 assert!(manifest.page_count > 0, "page_count should be positive");
7872 }
7873
7874 #[test]
7875 fn safe_export_preserves_operational_validation_contracts() {
7876 let (_db, service) = setup();
7877 let validation_json = r#"{"format_version":1,"mode":"enforce","additional_properties":false,"fields":[{"name":"status","type":"string","required":true,"enum":["ok","failed"]}]}"#;
7878 service
7879 .register_operational_collection(&OperationalRegisterRequest {
7880 name: "connector_health".to_owned(),
7881 kind: OperationalCollectionKind::LatestState,
7882 schema_json: "{}".to_owned(),
7883 retention_json: "{}".to_owned(),
7884 filter_fields_json: "[]".to_owned(),
7885 validation_json: validation_json.to_owned(),
7886 secondary_indexes_json: "[]".to_owned(),
7887 format_version: 1,
7888 })
7889 .expect("register collection");
7890
7891 let export_dir = tempfile::TempDir::new().expect("temp dir");
7892 let export_path = export_dir.path().join("backup.db");
7893 service
7894 .safe_export(
7895 &export_path,
7896 SafeExportOptions {
7897 force_checkpoint: false,
7898 },
7899 )
7900 .expect("export");
7901
7902 let exported = sqlite::open_connection(&export_path).expect("exported conn");
7903 let exported_validation_json: String = exported
7904 .query_row(
7905 "SELECT validation_json FROM operational_collections WHERE name = 'connector_health'",
7906 [],
7907 |row| row.get(0),
7908 )
7909 .expect("validation_json");
7910 assert_eq!(exported_validation_json, validation_json);
7911 }
7912
7913 #[test]
7914 fn safe_export_force_checkpoint_false_skips_wal_pragma() {
7915 let (_db, service) = setup();
7916 let export_dir = tempfile::TempDir::new().expect("temp dir");
7917 let export_path = export_dir.path().join("no-wal.db");
7918
7919 let manifest = service
7921 .safe_export(
7922 &export_path,
7923 SafeExportOptions {
7924 force_checkpoint: false,
7925 },
7926 )
7927 .expect("export with no checkpoint");
7928
7929 assert!(
7930 manifest.page_count > 0,
7931 "page_count must be populated regardless of checkpoint mode"
7932 );
7933 assert_eq!(
7934 manifest.schema_version,
7935 SchemaManager::new().current_version().0
7936 );
7937 assert_eq!(manifest.protocol_version, 1);
7938 }
7939
7940 #[test]
7941 fn safe_export_force_checkpoint_false_still_captures_wal_backed_changes() {
7942 let (db, service) = setup();
7943 let conn = sqlite::open_connection(db.path()).expect("conn");
7944 let journal_mode: String = conn
7945 .query_row("PRAGMA journal_mode=WAL", [], |row| row.get(0))
7946 .expect("enable wal");
7947 assert_eq!(journal_mode.to_lowercase(), "wal");
7948 let auto_checkpoint_pages: i64 = conn
7949 .query_row("PRAGMA wal_autocheckpoint=0", [], |row| row.get(0))
7950 .expect("disable auto checkpoint");
7951 assert_eq!(auto_checkpoint_pages, 0);
7952 conn.execute(
7953 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7954 VALUES ('r-wal', 'lg-wal', 'Meeting', '{}', 100, 'src-wal')",
7955 [],
7956 )
7957 .expect("insert wal-backed node");
7958
7959 let export_dir = tempfile::TempDir::new().expect("temp dir");
7960 let export_path = export_dir.path().join("wal-backed.db");
7961 service
7962 .safe_export(
7963 &export_path,
7964 SafeExportOptions {
7965 force_checkpoint: false,
7966 },
7967 )
7968 .expect("export wal-backed db");
7969
7970 let exported = sqlite::open_connection(&export_path).expect("open exported db");
7971 let exported_count: i64 = exported
7972 .query_row(
7973 "SELECT count(*) FROM nodes WHERE logical_id = 'lg-wal'",
7974 [],
7975 |row| row.get(0),
7976 )
7977 .expect("count exported nodes");
7978 assert_eq!(
7979 exported_count, 1,
7980 "safe_export must include committed rows that are still resident in the WAL"
7981 );
7982 }
7983
7984 #[test]
7985 fn excise_source_removes_searchable_content_after_excision() {
7986 let (db, service) = setup();
7987 {
7988 let conn = sqlite::open_connection(db.path()).expect("conn");
7989 conn.execute(
7990 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
7991 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
7992 [],
7993 )
7994 .expect("insert v1");
7995 conn.execute(
7996 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
7997 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
7998 [],
7999 )
8000 .expect("insert v2");
8001 conn.execute(
8002 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8003 VALUES ('ck1', 'lg1', 'hello world', 100)",
8004 [],
8005 )
8006 .expect("insert chunk");
8007 }
8008 service.excise_source("source-2").expect("excise");
8009 {
8010 let conn = sqlite::open_connection(db.path()).expect("conn");
8011 let fts_count: i64 = conn
8012 .query_row(
8013 "SELECT count(*) FROM fts_nodes WHERE chunk_id = 'ck1'",
8014 [],
8015 |row| row.get(0),
8016 )
8017 .expect("fts count");
8018 assert_eq!(
8019 fts_count, 0,
8020 "excised content should not remain searchable after excise"
8021 );
8022 }
8023 }
8024
8025 #[cfg(feature = "sqlite-vec")]
8026 #[test]
8027 fn excise_source_cleans_chunks_and_vec_rows_for_excised_version() {
8028 let (db, service) = setup();
8029 {
8030 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8031 service
8032 .schema_manager
8033 .ensure_vector_profile(&conn, "default", "vec_nodes_active", 4)
8034 .expect("ensure vec profile");
8035 conn.execute(
8036 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, superseded_at, source_ref) \
8037 VALUES ('r1', 'lg1', 'Meeting', '{}', 100, 200, 'source-1')",
8038 [],
8039 )
8040 .expect("insert v1");
8041 conn.execute(
8042 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8043 VALUES ('r2', 'lg1', 'Meeting', '{}', 200, 'source-2')",
8044 [],
8045 )
8046 .expect("insert v2");
8047 conn.execute(
8048 "INSERT INTO chunks (id, node_logical_id, text_content, created_at) \
8049 VALUES ('ck1', 'lg1', 'new content', 200)",
8050 [],
8051 )
8052 .expect("insert chunk");
8053 conn.execute(
8054 "INSERT INTO vec_nodes_active (chunk_id, embedding) VALUES ('ck1', zeroblob(16))",
8055 [],
8056 )
8057 .expect("insert vec row");
8058 }
8059
8060 service.excise_source("source-2").expect("excise");
8061
8062 let conn = crate::sqlite::open_connection_with_vec(db.path()).expect("vec conn");
8063 let active_row: String = conn
8064 .query_row(
8065 "SELECT row_id FROM nodes WHERE logical_id = 'lg1' AND superseded_at IS NULL",
8066 [],
8067 |row| row.get(0),
8068 )
8069 .expect("restored active row");
8070 assert_eq!(active_row, "r1");
8071 let chunk_count: i64 = conn
8072 .query_row(
8073 "SELECT count(*) FROM chunks WHERE node_logical_id = 'lg1'",
8074 [],
8075 |row| row.get(0),
8076 )
8077 .expect("chunk count");
8078 assert_eq!(
8079 chunk_count, 0,
8080 "excised source content must not survive as chunks"
8081 );
8082 let vec_count: i64 = conn
8083 .query_row("SELECT count(*) FROM vec_nodes_active", [], |row| {
8084 row.get(0)
8085 })
8086 .expect("vec count");
8087 assert_eq!(vec_count, 0, "excised source vec rows must be removed");
8088 let fts_count: i64 = conn
8089 .query_row(
8090 "SELECT count(*) FROM fts_nodes WHERE node_logical_id = 'lg1'",
8091 [],
8092 |row| row.get(0),
8093 )
8094 .expect("fts count");
8095 assert_eq!(
8096 fts_count, 0,
8097 "excised source content must not remain searchable"
8098 );
8099 }
8100
8101 #[test]
8102 fn export_page_count_matches_exported_file() {
8103 let (_db, service) = setup();
8104 let export_dir = tempfile::TempDir::new().expect("temp dir");
8105 let export_path = export_dir.path().join("page-count.db");
8106
8107 let manifest = service
8108 .safe_export(
8109 &export_path,
8110 SafeExportOptions {
8111 force_checkpoint: false,
8112 },
8113 )
8114 .expect("export");
8115
8116 let exported = sqlite::open_connection(&export_path).expect("open exported db");
8117 let actual_page_count: u64 = exported
8118 .query_row("PRAGMA page_count", [], |row| row.get(0))
8119 .expect("page_count from exported file");
8120
8121 assert_eq!(
8122 manifest.page_count, actual_page_count,
8123 "manifest page_count must match the exported file's PRAGMA page_count"
8124 );
8125 }
8126
8127 #[test]
8128 fn no_temp_file_after_successful_export() {
8129 let (_db, service) = setup();
8130 let export_dir = tempfile::TempDir::new().expect("temp dir");
8131 let export_path = export_dir.path().join("no-tmp.db");
8132
8133 service
8134 .safe_export(
8135 &export_path,
8136 SafeExportOptions {
8137 force_checkpoint: false,
8138 },
8139 )
8140 .expect("export");
8141
8142 let tmp_files: Vec<_> = fs::read_dir(export_dir.path())
8143 .expect("read export dir")
8144 .filter_map(Result::ok)
8145 .filter(|e| e.path().extension().is_some_and(|ext| ext == "tmp"))
8146 .collect();
8147
8148 assert!(
8149 tmp_files.is_empty(),
8150 "no .tmp files should remain after a successful export, found: {tmp_files:?}"
8151 );
8152 }
8153
8154 #[test]
8155 fn export_manifest_is_valid_json() {
8156 let (_db, service) = setup();
8157 let export_dir = tempfile::TempDir::new().expect("temp dir");
8158 let export_path = export_dir.path().join("valid-json.db");
8159
8160 service
8161 .safe_export(
8162 &export_path,
8163 SafeExportOptions {
8164 force_checkpoint: false,
8165 },
8166 )
8167 .expect("export");
8168
8169 let manifest_path = export_dir.path().join("valid-json.db.export-manifest.json");
8170 let manifest_contents = fs::read_to_string(&manifest_path).expect("read manifest");
8171 let parsed: serde_json::Value =
8172 serde_json::from_str(&manifest_contents).expect("manifest must be valid JSON");
8173
8174 assert!(
8175 parsed.get("exported_at").is_some(),
8176 "manifest must contain exported_at"
8177 );
8178 assert!(
8179 parsed.get("sha256").is_some(),
8180 "manifest must contain sha256"
8181 );
8182 assert!(
8183 parsed.get("schema_version").is_some(),
8184 "manifest must contain schema_version"
8185 );
8186 assert!(
8187 parsed.get("protocol_version").is_some(),
8188 "manifest must contain protocol_version"
8189 );
8190 assert!(
8191 parsed.get("page_count").is_some(),
8192 "manifest must contain page_count"
8193 );
8194 }
8195
8196 #[test]
8197 fn provenance_purge_dry_run_reports_counts() {
8198 let (db, service) = setup();
8199 {
8200 let conn = sqlite::open_connection(db.path()).expect("conn");
8201 conn.execute(
8202 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8203 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8204 [],
8205 )
8206 .expect("insert p1");
8207 conn.execute(
8208 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8209 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8210 [],
8211 )
8212 .expect("insert p2");
8213 conn.execute(
8214 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8215 VALUES ('p3', 'excise', 'lg3', 'src-1', 300)",
8216 [],
8217 )
8218 .expect("insert p3");
8219 }
8220
8221 let options = super::ProvenancePurgeOptions {
8222 dry_run: true,
8223 preserve_event_types: Vec::new(),
8224 };
8225 let report = service
8226 .purge_provenance_events(250, &options)
8227 .expect("dry run purge");
8228
8229 assert_eq!(report.events_deleted, 2);
8230 assert_eq!(report.events_preserved, 1);
8231 assert!(report.oldest_remaining.is_some());
8232
8233 let conn = sqlite::open_connection(db.path()).expect("conn");
8234 let total: i64 = conn
8235 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8236 row.get(0)
8237 })
8238 .expect("count");
8239 assert_eq!(total, 3, "dry_run must not delete any events");
8240 }
8241
8242 #[test]
8243 fn provenance_purge_deletes_old_events() {
8244 let (db, service) = setup();
8245 {
8246 let conn = sqlite::open_connection(db.path()).expect("conn");
8247 conn.execute(
8248 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8249 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8250 [],
8251 )
8252 .expect("insert p1");
8253 conn.execute(
8254 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8255 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 200)",
8256 [],
8257 )
8258 .expect("insert p2");
8259 }
8260
8261 let options = super::ProvenancePurgeOptions {
8262 dry_run: false,
8263 preserve_event_types: Vec::new(),
8264 };
8265 let report = service
8266 .purge_provenance_events(150, &options)
8267 .expect("purge");
8268
8269 assert_eq!(report.events_deleted, 1);
8270 assert_eq!(report.events_preserved, 1);
8271 assert_eq!(report.oldest_remaining, Some(200));
8272
8273 let conn = sqlite::open_connection(db.path()).expect("conn");
8274 let remaining: i64 = conn
8275 .query_row("SELECT count(*) FROM provenance_events", [], |row| {
8276 row.get(0)
8277 })
8278 .expect("count");
8279 assert_eq!(remaining, 1);
8280 }
8281
8282 #[test]
8283 fn provenance_purge_preserves_specified_types() {
8284 let (db, service) = setup();
8285 {
8286 let conn = sqlite::open_connection(db.path()).expect("conn");
8287 conn.execute(
8288 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8289 VALUES ('p1', 'excise', 'lg1', 'src-1', 100)",
8290 [],
8291 )
8292 .expect("insert p1");
8293 conn.execute(
8294 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8295 VALUES ('p2', 'node_insert', 'lg2', 'src-1', 100)",
8296 [],
8297 )
8298 .expect("insert p2");
8299 conn.execute(
8300 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8301 VALUES ('p3', 'node_insert', 'lg3', 'src-1', 100)",
8302 [],
8303 )
8304 .expect("insert p3");
8305 }
8306
8307 let options = super::ProvenancePurgeOptions {
8308 dry_run: false,
8309 preserve_event_types: Vec::new(),
8310 };
8311 let report = service
8312 .purge_provenance_events(500, &options)
8313 .expect("purge");
8314
8315 assert_eq!(report.events_deleted, 2);
8316 assert_eq!(report.events_preserved, 1);
8317
8318 let conn = sqlite::open_connection(db.path()).expect("conn");
8319 let remaining_type: String = conn
8320 .query_row("SELECT event_type FROM provenance_events", [], |row| {
8321 row.get(0)
8322 })
8323 .expect("remaining event type");
8324 assert_eq!(remaining_type, "excise");
8325 }
8326
8327 #[test]
8328 fn provenance_purge_noop_with_zero_timestamp() {
8329 let (db, service) = setup();
8330 {
8331 let conn = sqlite::open_connection(db.path()).expect("conn");
8332 conn.execute(
8333 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at) \
8334 VALUES ('p1', 'node_insert', 'lg1', 'src-1', 100)",
8335 [],
8336 )
8337 .expect("insert p1");
8338 }
8339
8340 let options = super::ProvenancePurgeOptions {
8341 dry_run: false,
8342 preserve_event_types: Vec::new(),
8343 };
8344 let report = service.purge_provenance_events(0, &options).expect("purge");
8345
8346 assert_eq!(report.events_deleted, 0);
8347 assert_eq!(report.events_preserved, 1);
8348 assert_eq!(report.oldest_remaining, Some(100));
8349 }
8350
8351 #[test]
8352 fn restore_skips_edge_when_counterpart_purged() {
8353 let (db, service) = setup();
8354 {
8355 let conn = sqlite::open_connection(db.path()).expect("conn");
8356 conn.execute(
8358 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8359 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8360 [],
8361 )
8362 .expect("insert node A");
8363 conn.execute(
8364 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8365 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8366 [],
8367 )
8368 .expect("insert node B");
8369 conn.execute(
8371 "INSERT INTO edges \
8372 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8373 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8374 [],
8375 )
8376 .expect("insert edge");
8377 conn.execute(
8379 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8380 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8381 [],
8382 )
8383 .expect("insert retire event A");
8384 conn.execute(
8385 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8386 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8387 [],
8388 )
8389 .expect("insert edge retire event");
8390 conn.execute(
8391 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8392 [],
8393 )
8394 .expect("retire node A");
8395 conn.execute(
8396 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8397 [],
8398 )
8399 .expect("retire node B");
8400 conn.execute(
8401 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8402 [],
8403 )
8404 .expect("retire edge");
8405 conn.execute("DELETE FROM nodes WHERE logical_id = 'doc-2'", [])
8408 .expect("purge node B rows");
8409 }
8410
8411 let report = service.restore_logical_id("doc-1").expect("restore A");
8413 assert!(!report.was_noop);
8414 assert_eq!(report.restored_node_rows, 1);
8415 assert_eq!(report.restored_edge_rows, 0, "edge should not be restored");
8416 assert_eq!(report.skipped_edges.len(), 1);
8417 assert_eq!(report.skipped_edges[0].edge_logical_id, "edge-1");
8418 assert_eq!(report.skipped_edges[0].missing_endpoint, "doc-2");
8419
8420 let conn = sqlite::open_connection(db.path()).expect("conn");
8422 let active_edge_count: i64 = conn
8423 .query_row(
8424 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8425 [],
8426 |row| row.get(0),
8427 )
8428 .expect("active edge count");
8429 assert_eq!(active_edge_count, 0, "edge must remain retired");
8430 }
8431
8432 #[test]
8433 fn restore_restores_edges_to_active_nodes() {
8434 let (db, service) = setup();
8435 {
8436 let conn = sqlite::open_connection(db.path()).expect("conn");
8437 conn.execute(
8439 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8440 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8441 [],
8442 )
8443 .expect("insert node A");
8444 conn.execute(
8445 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8446 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8447 [],
8448 )
8449 .expect("insert node B");
8450 conn.execute(
8452 "INSERT INTO edges \
8453 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8454 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8455 [],
8456 )
8457 .expect("insert edge");
8458 conn.execute(
8460 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8461 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8462 [],
8463 )
8464 .expect("insert retire event A");
8465 conn.execute(
8466 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8467 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8468 [],
8469 )
8470 .expect("insert edge retire event");
8471 conn.execute(
8472 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8473 [],
8474 )
8475 .expect("retire node A");
8476 conn.execute(
8477 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8478 [],
8479 )
8480 .expect("retire edge");
8481 }
8482
8483 let report = service.restore_logical_id("doc-1").expect("restore A");
8485 assert!(!report.was_noop);
8486 assert_eq!(report.restored_node_rows, 1);
8487 assert!(report.restored_edge_rows > 0, "edge should be restored");
8488 assert!(
8489 report.skipped_edges.is_empty(),
8490 "no edges should be skipped"
8491 );
8492
8493 let conn = sqlite::open_connection(db.path()).expect("conn");
8494 let active_edge_count: i64 = conn
8495 .query_row(
8496 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8497 [],
8498 |row| row.get(0),
8499 )
8500 .expect("active edge count");
8501 assert_eq!(active_edge_count, 1, "edge must be active");
8502 }
8503
8504 #[test]
8505 fn restore_restores_edges_when_both_restored() {
8506 let (db, service) = setup();
8507 {
8508 let conn = sqlite::open_connection(db.path()).expect("conn");
8509 conn.execute(
8511 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8512 VALUES ('node-row-a', 'doc-1', 'Document', '{}', 100, 'seed')",
8513 [],
8514 )
8515 .expect("insert node A");
8516 conn.execute(
8517 "INSERT INTO nodes (row_id, logical_id, kind, properties, created_at, source_ref) \
8518 VALUES ('node-row-b', 'doc-2', 'Document', '{}', 100, 'seed')",
8519 [],
8520 )
8521 .expect("insert node B");
8522 conn.execute(
8524 "INSERT INTO edges \
8525 (row_id, logical_id, source_logical_id, target_logical_id, kind, properties, created_at, source_ref) \
8526 VALUES ('edge-row-1', 'edge-1', 'doc-1', 'doc-2', 'RELATED', '{}', 100, 'seed')",
8527 [],
8528 )
8529 .expect("insert edge");
8530 conn.execute(
8532 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8533 VALUES ('evt-retire-a', 'node_retire', 'doc-1', 'forget-1', 200, '')",
8534 [],
8535 )
8536 .expect("insert retire event A");
8537 conn.execute(
8538 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8539 VALUES ('evt-retire-b', 'node_retire', 'doc-2', 'forget-1', 200, '')",
8540 [],
8541 )
8542 .expect("insert retire event B");
8543 conn.execute(
8544 "INSERT INTO provenance_events (id, event_type, subject, source_ref, created_at, metadata_json) \
8545 VALUES ('evt-edge-retire', 'edge_retire', 'edge-1', 'forget-1', 200, '')",
8546 [],
8547 )
8548 .expect("insert edge retire event");
8549 conn.execute(
8550 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-1'",
8551 [],
8552 )
8553 .expect("retire node A");
8554 conn.execute(
8555 "UPDATE nodes SET superseded_at = 200 WHERE logical_id = 'doc-2'",
8556 [],
8557 )
8558 .expect("retire node B");
8559 conn.execute(
8560 "UPDATE edges SET superseded_at = 200 WHERE logical_id = 'edge-1'",
8561 [],
8562 )
8563 .expect("retire edge");
8564 }
8565
8566 let report_b = service.restore_logical_id("doc-2").expect("restore B");
8568 assert!(!report_b.was_noop);
8569
8570 let report_a = service.restore_logical_id("doc-1").expect("restore A");
8572 assert!(!report_a.was_noop);
8573 assert_eq!(report_a.restored_node_rows, 1);
8574 assert!(
8575 report_a.restored_edge_rows > 0,
8576 "edge should be restored when both endpoints active"
8577 );
8578 assert!(
8579 report_a.skipped_edges.is_empty(),
8580 "no edges should be skipped"
8581 );
8582
8583 let conn = sqlite::open_connection(db.path()).expect("conn");
8584 let active_edge_count: i64 = conn
8585 .query_row(
8586 "SELECT count(*) FROM edges WHERE logical_id = 'edge-1' AND superseded_at IS NULL",
8587 [],
8588 |row| row.get(0),
8589 )
8590 .expect("active edge count");
8591 assert_eq!(
8592 active_edge_count, 1,
8593 "edge must be active after both endpoints restored"
8594 );
8595 }
8596}