1use super::*;
2use crate::storage::unified::metadata::{MetadataFilter, MetadataValue};
3
4impl RedDB {
5 pub fn is_replica_role(&self) -> bool {
6 matches!(
7 self.options.replication.role,
8 crate::replication::ReplicationRole::Replica { .. }
9 )
10 }
11
12 pub fn enforce_retention_policy(&self) -> Result<(), Box<dyn std::error::Error>> {
13 if self.options.read_only || self.is_replica_role() {
14 return Ok(());
15 }
16
17 if self.options.mode == StorageMode::Persistent {
20 let Some(path) = self.path() else {
21 return Ok(());
22 };
23
24 let Ok(mut metadata) = self.load_or_bootstrap_physical_metadata(true) else {
25 return Ok(());
26 };
27
28 self.prune_export_registry(&mut metadata.exports);
29 metadata.save_for_data_path(path)?;
30 }
31
32 let _ = self.sweep_ttl_expired_entities()?;
33
34 Ok(())
35 }
36
37 pub(crate) fn ttl_expired_entities_now(
38 &self,
39 ) -> Result<Vec<(String, EntityId)>, Box<dyn std::error::Error>> {
40 self.ttl_expired_entities_at(current_unix_ms())
41 }
42
43 pub fn replica_allows_entity_at_read(
44 &self,
45 collection: &str,
46 entity: &crate::storage::UnifiedEntity,
47 ) -> bool {
48 if !self.is_replica_role() {
49 return true;
50 }
51 !self.entity_expired_at(collection, entity, current_unix_ms())
52 }
53
54 fn sweep_ttl_expired_entities(&self) -> Result<usize, Box<dyn std::error::Error>> {
55 let to_delete = self.ttl_expired_entities_now()?;
56
57 let mut deleted = 0usize;
58 for (collection, id) in to_delete {
59 match self.store.delete(&collection, id) {
60 Ok(true) => deleted = deleted.saturating_add(1),
61 Ok(false) => {}
62 Err(err) => {
63 return Err(format!(
64 "failed deleting expired entity {id} from collection '{collection}': {err:?}"
65 )
66 .into());
67 }
68 }
69 }
70
71 Ok(deleted)
72 }
73
74 fn ttl_expired_entities_at(
75 &self,
76 now_ms: u64,
77 ) -> Result<Vec<(String, EntityId)>, Box<dyn std::error::Error>> {
78 let mut to_delete = Vec::<(String, EntityId)>::new();
79
80 let mut absolute_expired = self.expired_entities_by_expires_at(now_ms)?;
81 to_delete.append(&mut absolute_expired);
82
83 let mut relative_expired = self.expired_entities_by_ttl(now_ms)?;
84 to_delete.append(&mut relative_expired);
85
86 to_delete.sort_unstable();
87 to_delete.dedup();
88
89 Ok(to_delete)
90 }
91
92 fn entity_expired_at(
93 &self,
94 collection: &str,
95 entity: &crate::storage::UnifiedEntity,
96 now_ms: u64,
97 ) -> bool {
98 let Some(metadata) = self.store.get_metadata(collection, entity.id) else {
99 return false;
100 };
101
102 if metadata
103 .get("_expires_at")
104 .and_then(Self::metadata_u64)
105 .is_some_and(|expires_at_ms| expires_at_ms <= now_ms)
106 {
107 return true;
108 }
109
110 let ttl_ms = metadata.get("_ttl_ms").and_then(Self::metadata_u64);
111 let ttl_secs = if ttl_ms.is_none() {
112 metadata.get("_ttl").and_then(|value| {
113 Self::metadata_u64(value).and_then(|value_secs| value_secs.checked_mul(1000))
114 })
115 } else {
116 None
117 };
118
119 let Some(ttl_ms) = ttl_ms.or(ttl_secs) else {
120 return false;
121 };
122 entity
123 .created_at
124 .saturating_mul(1000)
125 .saturating_add(ttl_ms)
126 <= now_ms
127 }
128
129 fn expired_entities_by_expires_at(
130 &self,
131 now_ms: u64,
132 ) -> Result<Vec<(String, EntityId)>, Box<dyn std::error::Error>> {
133 let mut ids = self.store.filter_metadata_all(&[(
134 "_expires_at".to_string(),
135 MetadataFilter::Le(MetadataValue::Timestamp(now_ms)),
136 )]);
137
138 if let Ok(now_ms_i64) = i64::try_from(now_ms) {
139 ids.extend(self.store.filter_metadata_all(&[(
140 "_expires_at".to_string(),
141 MetadataFilter::Le(MetadataValue::Int(now_ms_i64)),
142 )]));
143 }
144
145 let now_ms_f64 = now_ms as f64;
146 if now_ms_f64.is_finite() {
147 ids.extend(self.store.filter_metadata_all(&[(
148 "_expires_at".to_string(),
149 MetadataFilter::Le(MetadataValue::Float(now_ms_f64)),
150 )]));
151 }
152
153 Ok(ids)
154 }
155
156 fn expired_entities_by_ttl(
157 &self,
158 now_ms: u64,
159 ) -> Result<Vec<(String, EntityId)>, Box<dyn std::error::Error>> {
160 let mut candidates = Vec::<(String, EntityId)>::new();
161
162 let ttl_ms_candidates = self
163 .store
164 .filter_metadata_all(&[("_ttl_ms".to_string(), MetadataFilter::IsNotNull)]);
165 candidates.extend(ttl_ms_candidates);
166
167 let ttl_candidates = self
168 .store
169 .filter_metadata_all(&[("_ttl".to_string(), MetadataFilter::IsNotNull)]);
170 candidates.extend(ttl_candidates);
171
172 if candidates.is_empty() {
173 return Ok(Vec::new());
174 }
175
176 candidates.sort_unstable();
177 candidates.dedup();
178
179 let mut expired = Vec::<(String, EntityId)>::new();
180 for (collection, entity_id) in candidates {
181 let Some(entity) = self.store.get(&collection, entity_id) else {
182 continue;
183 };
184
185 let Some(metadata) = self.store.get_metadata(&collection, entity_id) else {
186 continue;
187 };
188
189 let ttl_ms = metadata.get("_ttl_ms").and_then(Self::metadata_u64);
190 let ttl_secs = if ttl_ms.is_none() {
191 metadata.get("_ttl").and_then(|value| {
192 Self::metadata_u64(value).and_then(|value_secs| value_secs.checked_mul(1000))
193 })
194 } else {
195 None
196 };
197
198 let Some(ttl_ms) = ttl_ms.or(ttl_secs) else {
199 continue;
200 };
201
202 let created_at_ms = entity.created_at.saturating_mul(1000);
203 let expiry_ms = created_at_ms.saturating_add(ttl_ms);
204 if expiry_ms <= now_ms {
205 expired.push((collection, entity_id));
206 }
207 }
208
209 Ok(expired)
210 }
211
212 fn metadata_u64(value: &MetadataValue) -> Option<u64> {
213 match value {
214 MetadataValue::Int(v) if *v >= 0 => Some(*v as u64),
215 MetadataValue::Timestamp(v) => Some(*v),
216 MetadataValue::Float(v) => {
217 if !v.is_finite() || !v.is_sign_positive() || v.fract().abs() >= f64::EPSILON {
218 return None;
219 }
220 if *v > u64::MAX as f64 {
221 return None;
222 }
223 Some(v.trunc() as u64)
224 }
225 MetadataValue::String(v) => v.parse::<u64>().ok(),
226 _ => None,
227 }
228 }
229
230 pub fn node(&self, collection: impl Into<String>, label: impl Into<String>) -> NodeBuilder {
243 NodeBuilder::new(
244 self.store.clone(),
245 self.preprocessors.clone(),
246 collection,
247 label,
248 )
249 }
250
251 pub fn edge(&self, collection: impl Into<String>, label: impl Into<String>) -> EdgeBuilder {
263 EdgeBuilder::new(
264 self.store.clone(),
265 self.preprocessors.clone(),
266 collection,
267 label,
268 )
269 }
270
271 pub fn vector(&self, collection: impl Into<String>) -> VectorBuilder {
282 VectorBuilder::new(self.store.clone(), self.preprocessors.clone(), collection)
283 }
284
285 pub fn row(&self, table: impl Into<String>, columns: Vec<(&str, Value)>) -> RowBuilder {
296 RowBuilder::new(
297 self.store.clone(),
298 self.preprocessors.clone(),
299 table,
300 columns,
301 )
302 }
303
304 pub fn doc(&self, collection: impl Into<String>) -> DocumentBuilder {
318 DocumentBuilder::new(self.store.clone(), self.preprocessors.clone(), collection)
319 }
320
321 pub fn kv(
332 &self,
333 collection: impl Into<String>,
334 key: impl Into<String>,
335 value: Value,
336 ) -> KvBuilder {
337 KvBuilder::new(
338 self.store.clone(),
339 self.preprocessors.clone(),
340 collection,
341 key,
342 value,
343 )
344 }
345
346 pub fn get_kv(&self, collection: &str, key: &str) -> Option<(Value, EntityId)> {
350 let manager = self.store.get_collection(collection)?;
351 let entities = manager.query_all(|_| true);
352 for entity in entities {
353 if let EntityData::Row(ref row) = entity.data {
354 if let Some(ref named) = row.named {
355 if let Some(Value::Text(ref k)) = named.get("key") {
356 if &**k == key {
357 let value = named.get("value").cloned().unwrap_or(Value::Null);
358 return Some((value, entity.id));
359 }
360 }
361 }
362 }
363 }
364 None
365 }
366
367 pub fn delete_kv(
369 &self,
370 collection: &str,
371 key: &str,
372 ) -> Result<bool, super::super::error::DevXError> {
373 let Some((_, id)) = self.get_kv(collection, key) else {
374 return Ok(false);
375 };
376 self.store
377 .delete(collection, id)
378 .map_err(|err| super::super::error::DevXError::Storage(format!("{err:?}")))?;
379 Ok(true)
380 }
381
382 pub(crate) fn with_initialized_metadata(self) -> Result<Self, Box<dyn std::error::Error>> {
383 if self.options.mode == StorageMode::Persistent {
384 if let Ok(metadata) = self.load_or_bootstrap_physical_metadata(false) {
386 crate::reserved_fields::validate_physical_metadata_contracts(&metadata)
387 .map_err(|err| err.to_string())?;
388 }
389 }
393 self.load_collection_ttl_defaults_from_metadata();
394 self.load_hypertables_from_metadata();
398 self.recover_queue_pending_state();
399 Ok(self)
400 }
401
402 pub(crate) fn persist_metadata(&self) -> Result<(), Box<dyn std::error::Error>> {
403 if self.options.mode != StorageMode::Persistent || self.options.read_only {
404 return Ok(());
405 }
406 if self.options.storage_profile.deploy_profile == crate::storage::DeployProfile::Embedded
407 && self.options.storage_profile.packaging
408 == crate::storage::StoragePackaging::SingleFile
409 {
410 return Ok(());
411 }
412 let Some(path) = self.path() else {
413 return Ok(());
414 };
415
416 let previous = self.load_or_bootstrap_physical_metadata(false).ok();
417 let collection_roots = self.physical_collection_roots();
418 let indexes = self
419 .native_physical_state()
420 .map(|state| self.physical_index_state_from_native_state(&state, previous.as_ref()))
421 .unwrap_or_else(|| self.physical_index_state());
422 let mut metadata = PhysicalMetadataFile::from_state(
423 self.options.clone(),
424 self.catalog_snapshot(),
425 collection_roots,
426 indexes,
427 previous.as_ref(),
428 );
429 metadata.collection_ttl_defaults_ms = self.collection_ttl_defaults_snapshot();
430 metadata.hypertables = self.hypertable_registry_snapshot();
434 metadata.save_for_data_path(path)?;
435 self.persist_native_physical_header(&metadata)?;
436 Ok(())
437 }
438
439 fn bootstrap_metadata_from_native_state(&self) -> Result<bool, Box<dyn std::error::Error>> {
440 if self.options.mode != StorageMode::Persistent || self.options.read_only {
441 return Ok(false);
442 }
443 let Some(path) = self.path() else {
444 return Ok(false);
445 };
446 let Some(native_state) = self.native_physical_state() else {
447 return Ok(false);
448 };
449 if !Self::native_state_is_bootstrap_complete(&native_state) {
450 return Ok(false);
451 }
452
453 let previous = PhysicalMetadataFile::load_for_data_path(path).ok();
454 let metadata = self.metadata_from_native_state(&native_state, previous.as_ref());
455 metadata.save_for_data_path(path)?;
456 self.persist_native_physical_header(&metadata)?;
457 Ok(true)
458 }
459
460 pub fn rebuild_physical_metadata_from_native_state(
463 &self,
464 ) -> Result<bool, Box<dyn std::error::Error>> {
465 self.bootstrap_metadata_from_native_state()
466 }
467
468 pub(crate) fn native_state_is_bootstrap_complete(native_state: &NativePhysicalState) -> bool {
469 let registry_complete = native_state.registry.as_ref().map(|registry| {
470 registry.collections_complete
471 && registry.indexes_complete
472 && registry.graph_projections_complete
473 && registry.analytics_jobs_complete
474 && registry.vector_artifacts_complete
475 });
476 let recovery_complete = native_state
477 .recovery
478 .as_ref()
479 .map(|recovery| recovery.snapshots_complete && recovery.exports_complete);
480 let catalog_complete = native_state
481 .catalog
482 .as_ref()
483 .map(|catalog| catalog.collections_complete);
484
485 registry_complete == Some(true)
486 && recovery_complete == Some(true)
487 && catalog_complete == Some(true)
488 }
489
490 pub(crate) fn load_or_bootstrap_physical_metadata(
491 &self,
492 persist_bootstrapped: bool,
493 ) -> Result<PhysicalMetadataFile, Box<dyn std::error::Error>> {
494 if self.options.mode != StorageMode::Persistent {
495 return Err("physical metadata requires persistent mode".into());
496 }
497 let Some(path) = self.path() else {
498 return Err("database path is not available".into());
499 };
500 let native_state = self.native_physical_state();
501
502 match PhysicalMetadataFile::load_for_data_path(path) {
503 Ok(metadata) => {
504 if let Some(native_state) = native_state.as_ref() {
505 let inspection = Self::inspect_native_header_against_metadata(
506 native_state.header,
507 &metadata,
508 );
509 if Self::repair_policy_for_inspection(&inspection)
510 == NativeHeaderRepairPolicy::NativeAheadOfMetadata
511 {
512 let bootstrapped =
513 self.metadata_from_native_state(native_state, Some(&metadata));
514 if persist_bootstrapped && !self.options.read_only {
515 bootstrapped.save_for_data_path(path)?;
516 self.persist_native_physical_header(&bootstrapped)?;
517 }
518 return Ok(bootstrapped);
519 }
520 }
521 Ok(metadata)
522 }
523 Err(err) => {
524 let Some(native_state) = native_state else {
525 return Err(err.into());
526 };
527 let is_fresh_empty = native_state.header.sequence == 0
543 && native_state.registry.is_none()
544 && native_state.catalog.is_none()
545 && native_state.recovery.is_none();
546 if !is_fresh_empty && !Self::native_state_is_bootstrap_complete(&native_state) {
547 return Err(err.into());
548 }
549 let metadata = self.metadata_from_native_state(&native_state, None);
550 if persist_bootstrapped && !self.options.read_only {
551 metadata.save_for_data_path(path)?;
552 self.persist_native_physical_header(&metadata)?;
553 }
554 Ok(metadata)
555 }
556 }
557 }
558
559 pub(crate) fn physical_metadata_preference(&self) -> Option<&'static str> {
560 let path = self.path()?;
561 let native_state = self.native_physical_state();
562 let metadata = PhysicalMetadataFile::load_for_data_path(path).ok();
563
564 match (metadata, native_state) {
565 (Some(metadata), Some(native_state)) => {
566 let inspection =
567 Self::inspect_native_header_against_metadata(native_state.header, &metadata);
568 match Self::repair_policy_for_inspection(&inspection) {
569 NativeHeaderRepairPolicy::InSync => Some("sidecar_current"),
570 NativeHeaderRepairPolicy::RepairNativeFromMetadata => Some("sidecar_current"),
571 NativeHeaderRepairPolicy::NativeAheadOfMetadata => Some("native_ahead"),
572 }
573 }
574 (Some(_), None) => Some("sidecar_only"),
575 (None, Some(_)) => Some("sidecar_missing_native_available"),
576 (None, None) => Some("sidecar_missing_no_native"),
577 }
578 }
579
580 fn metadata_from_native_state(
581 &self,
582 native_state: &NativePhysicalState,
583 previous: Option<&PhysicalMetadataFile>,
584 ) -> PhysicalMetadataFile {
585 let now = SystemTime::now()
586 .duration_since(UNIX_EPOCH)
587 .unwrap_or_default()
588 .as_millis();
589 let catalog = self.catalog_snapshot();
590 let catalog_name = catalog.name.clone();
591 let catalog_total_entities = catalog.total_entities;
592 let catalog_total_collections = catalog.total_collections;
593 let indexes = self.physical_index_state();
594
595 let mut manifest =
596 crate::api::SchemaManifest::now(self.options.clone(), catalog.total_collections);
597 manifest.updated_at_unix_ms = now;
598
599 let manifest_events = native_state
600 .manifest
601 .as_ref()
602 .map(|summary| {
603 summary
604 .recent_events
605 .iter()
606 .map(|event| crate::physical::ManifestEvent {
607 collection: event.collection.clone(),
608 object_key: event.object_key.clone(),
609 kind: match event.kind.as_str() {
610 "insert" => crate::physical::ManifestEventKind::Insert,
611 "update" => crate::physical::ManifestEventKind::Update,
612 "remove" => crate::physical::ManifestEventKind::Remove,
613 _ => crate::physical::ManifestEventKind::Checkpoint,
614 },
615 block: crate::physical::BlockReference {
616 index: event.block_index,
617 checksum: event.block_checksum,
618 },
619 snapshot_min: event.snapshot_min,
620 snapshot_max: event.snapshot_max,
621 })
622 .collect()
623 })
624 .unwrap_or_default();
625
626 let graph_projections = native_state
627 .registry
628 .as_ref()
629 .and_then(|registry| {
630 registry.graph_projections_complete.then(|| {
631 registry
632 .graph_projections
633 .iter()
634 .map(|projection| crate::physical::PhysicalGraphProjection {
635 name: projection.name.clone(),
636 created_at_unix_ms: projection.created_at_unix_ms,
637 updated_at_unix_ms: projection.updated_at_unix_ms,
638 state: "materialized".to_string(),
639 source: projection.source.clone(),
640 node_labels: projection.node_labels.clone(),
641 node_types: projection.node_types.clone(),
642 edge_labels: projection.edge_labels.clone(),
643 last_materialized_sequence: projection.last_materialized_sequence,
644 })
645 .collect()
646 })
647 })
648 .or_else(|| previous.map(|metadata| metadata.graph_projections.clone()))
649 .unwrap_or_default();
650
651 let analytics_jobs = native_state
652 .registry
653 .as_ref()
654 .and_then(|registry| {
655 registry.analytics_jobs_complete.then(|| {
656 registry
657 .analytics_jobs
658 .iter()
659 .map(|job| crate::physical::PhysicalAnalyticsJob {
660 id: job.id.clone(),
661 kind: job.kind.clone(),
662 state: job.state.clone(),
663 projection: job.projection.clone(),
664 created_at_unix_ms: job.created_at_unix_ms,
665 updated_at_unix_ms: job.updated_at_unix_ms,
666 last_run_sequence: job.last_run_sequence,
667 metadata: job.metadata.clone(),
668 })
669 .collect()
670 })
671 })
672 .or_else(|| previous.map(|metadata| metadata.analytics_jobs.clone()))
673 .unwrap_or_default();
674
675 let exports = native_state
676 .recovery
677 .as_ref()
678 .and_then(|recovery| {
679 recovery.exports_complete.then(|| {
680 recovery
681 .exports
682 .iter()
683 .map(|export| crate::physical::ExportDescriptor {
684 name: export.name.clone(),
685 created_at_unix_ms: export.created_at_unix_ms,
686 snapshot_id: export.snapshot_id,
687 superblock_sequence: export.superblock_sequence,
688 data_path: self
689 .path()
690 .map(|path| {
691 crate::physical::PhysicalMetadataFile::export_data_path_for(
692 path,
693 &export.name,
694 )
695 .display()
696 .to_string()
697 })
698 .unwrap_or_default(),
699 metadata_path: self
700 .path()
701 .map(|path| {
702 let export_data_path =
703 crate::physical::PhysicalMetadataFile::export_data_path_for(
704 path,
705 &export.name,
706 );
707 crate::physical::PhysicalMetadataFile::metadata_path_for(
708 &export_data_path,
709 )
710 .display()
711 .to_string()
712 })
713 .unwrap_or_default(),
714 collection_count: export.collection_count as usize,
715 total_entities: export.total_entities as usize,
716 })
717 .collect()
718 })
719 })
720 .or_else(|| previous.map(|metadata| metadata.exports.clone()))
721 .unwrap_or_default();
722
723 let snapshots = native_state
724 .recovery
725 .as_ref()
726 .and_then(|recovery| {
727 recovery.snapshots_complete.then(|| {
728 recovery
729 .snapshots
730 .iter()
731 .map(|snapshot| crate::physical::SnapshotDescriptor {
732 snapshot_id: snapshot.snapshot_id,
733 created_at_unix_ms: snapshot.created_at_unix_ms,
734 superblock_sequence: snapshot.superblock_sequence,
735 collection_count: snapshot.collection_count as usize,
736 total_entities: snapshot.total_entities as usize,
737 })
738 .collect()
739 })
740 })
741 .or_else(|| previous.map(|metadata| metadata.snapshots.clone()))
742 .unwrap_or_else(|| {
743 vec![crate::physical::SnapshotDescriptor {
744 snapshot_id: native_state.header.sequence,
745 created_at_unix_ms: now,
746 superblock_sequence: native_state.header.sequence,
747 collection_count: catalog_total_collections,
748 total_entities: catalog_total_entities,
749 }]
750 });
751
752 let catalog_stats = native_state
753 .catalog
754 .as_ref()
755 .and_then(|native_catalog| {
756 native_catalog.collections_complete.then(|| {
757 native_catalog
758 .collections
759 .iter()
760 .map(|collection| {
761 (
762 collection.name.clone(),
763 crate::api::CollectionStats {
764 entities: collection.entities as usize,
765 cross_refs: collection.cross_refs as usize,
766 segments: collection.segments as usize,
767 },
768 )
769 })
770 .collect::<BTreeMap<_, _>>()
771 })
772 })
773 .or_else(|| previous.map(|metadata| metadata.catalog.stats_by_collection.clone()))
774 .unwrap_or_else(|| catalog.stats_by_collection.clone());
775
776 PhysicalMetadataFile {
777 protocol_version: crate::physical::PHYSICAL_METADATA_PROTOCOL_VERSION.to_string(),
778 generated_at_unix_ms: now,
779 last_loaded_from: Some("native_bootstrap".to_string()),
780 last_healed_at_unix_ms: Some(now),
781 manifest,
782 catalog: crate::api::CatalogSnapshot {
783 name: catalog_name,
784 total_entities: native_state
785 .catalog
786 .as_ref()
787 .map(|summary| summary.total_entities as usize)
788 .unwrap_or(catalog_total_entities),
789 total_collections: native_state
790 .catalog
791 .as_ref()
792 .map(|summary| summary.collection_count as usize)
793 .unwrap_or(catalog_total_collections),
794 stats_by_collection: catalog_stats,
795 updated_at: SystemTime::now(),
796 },
797 manifest_events,
798 collection_ttl_defaults_ms: previous
799 .map(|metadata| metadata.collection_ttl_defaults_ms.clone())
800 .unwrap_or_default(),
801 collection_contracts: previous
802 .map(|metadata| metadata.collection_contracts.clone())
803 .unwrap_or_default(),
804 hypertables: previous
805 .map(|metadata| metadata.hypertables.clone())
806 .unwrap_or_default(),
807 tree_definitions: previous
808 .map(|metadata| metadata.tree_definitions.clone())
809 .unwrap_or_default(),
810 indexes,
811 graph_projections,
812 analytics_jobs,
813 exports,
814 superblock: crate::physical::SuperblockHeader {
815 format_version: native_state.header.format_version,
816 sequence: native_state.header.sequence,
817 copies: crate::physical::DEFAULT_SUPERBLOCK_COPIES,
818 manifest: crate::physical::ManifestPointers {
819 oldest: crate::physical::BlockReference {
820 index: native_state.header.manifest_oldest_root,
821 checksum: 0,
822 },
823 newest: crate::physical::BlockReference {
824 index: native_state.header.manifest_root,
825 checksum: 0,
826 },
827 },
828 free_set: crate::physical::BlockReference {
829 index: native_state.header.free_set_root,
830 checksum: 0,
831 },
832 collection_roots: native_state.collection_roots.clone(),
833 },
834 snapshots,
835 }
836 }
837
838 pub(crate) fn reconcile_index_states_with_native_artifacts(
839 &self,
840 mut indexes: Vec<PhysicalIndexState>,
841 ) -> Vec<PhysicalIndexState> {
842 let native_artifacts = self
843 .native_physical_state()
844 .and_then(|state| state.registry)
845 .map(|registry| registry.vector_artifacts)
846 .unwrap_or_default();
847 for index in &mut indexes {
848 let Some(collection) = index.collection.as_deref() else {
849 continue;
850 };
851 let Some(artifact_kind) = Self::native_artifact_kind_for_index(index.kind) else {
852 continue;
853 };
854 let Some(artifact) = native_artifacts.iter().find(|artifact| {
855 artifact.collection == collection && artifact.artifact_kind == artifact_kind
856 }) else {
857 index.build_state = "metadata-only".to_string();
858 continue;
859 };
860 index.entries = artifact.vector_count as usize;
861 index.estimated_memory_bytes = artifact.serialized_bytes;
862 index.backend = format!("{}+native-artifact", index_backend_name(index.kind));
863 index.artifact_kind = Some(artifact.artifact_kind.clone());
864 index.artifact_checksum = Some(artifact.checksum);
865 index.build_state = "artifact-published".to_string();
866 if let Some(pages) = self.native_vector_artifact_pages() {
867 index.artifact_root_page = pages
868 .into_iter()
869 .find(|page| {
870 page.collection == artifact.collection
871 && page.artifact_kind == artifact.artifact_kind
872 })
873 .map(|page| page.root_page);
874 }
875 }
876 indexes
877 }
878
879 pub(crate) fn warmup_native_vector_artifact_for_index(
880 &self,
881 index: &PhysicalIndexState,
882 ) -> Result<(), String> {
883 let Some(collection) = index.collection.as_deref() else {
884 return Ok(());
885 };
886 let Some(artifact_kind) = Self::native_artifact_kind_for_index(index.kind) else {
887 return Ok(());
888 };
889 self.warmup_native_vector_artifact(collection, Some(artifact_kind))?;
890 Ok(())
891 }
892
893 pub(crate) fn apply_runtime_native_artifact_to_index_state(
894 &self,
895 index: &mut PhysicalIndexState,
896 ) -> Result<(), String> {
897 let Some(collection) = index.collection.as_deref() else {
898 return Ok(());
899 };
900 let Some(artifact_kind) = Self::native_artifact_kind_for_index(index.kind) else {
901 return Ok(());
902 };
903 let artifact = self.inspect_native_vector_artifact(collection, Some(artifact_kind))?;
904 index.entries = artifact
905 .graph_edge_count
906 .or(artifact.text_posting_count)
907 .unwrap_or(artifact.node_count) as usize;
908 index.estimated_memory_bytes = artifact.byte_len;
909 index.backend = format!("{}+native-artifact", index_backend_name(index.kind));
910 index.artifact_kind = Some(artifact.artifact_kind.clone());
911 index.artifact_checksum = Some(artifact.checksum);
912 index.build_state = "ready".to_string();
913 index.artifact_root_page = self
914 .native_vector_artifact_pages()
915 .and_then(|pages| {
916 pages.into_iter().find(|page| {
917 page.collection == artifact.collection
918 && page.artifact_kind == artifact.artifact_kind
919 })
920 })
921 .map(|page| page.root_page);
922 Ok(())
923 }
924
925 pub(crate) fn physical_index_state_from_native_state(
926 &self,
927 native_state: &NativePhysicalState,
928 previous: Option<&PhysicalMetadataFile>,
929 ) -> Vec<PhysicalIndexState> {
930 let mut fresh = self.physical_index_state();
931 let Some(registry) = native_state.registry.as_ref() else {
932 if let Some(previous) = previous {
933 for index in &previous.indexes {
934 if !fresh.iter().any(|candidate| candidate.name == index.name) {
935 fresh.push(index.clone());
936 }
937 }
938 }
939 return fresh;
940 };
941
942 for index in &mut fresh {
943 if let Some(native) = registry
944 .indexes
945 .iter()
946 .find(|candidate| candidate.name == index.name)
947 {
948 index.enabled = native.enabled;
949 index.last_refresh_ms = native.last_refresh_ms;
950 index.backend = native.backend.clone();
951 index.entries = native.entries as usize;
952 index.estimated_memory_bytes = native.estimated_memory_bytes;
953 if index.artifact_kind.is_none() {
954 index.artifact_kind = Self::native_artifact_kind_for_index(index.kind)
955 .map(|value| value.to_string());
956 }
957 if index.build_state == "catalog-derived" {
958 index.build_state = "registry-loaded".to_string();
959 }
960 }
961 }
962
963 for native in ®istry.indexes {
964 if fresh.iter().any(|index| index.name == native.name) {
965 continue;
966 }
967 let Some(kind) = Self::index_kind_from_str(&native.kind) else {
968 continue;
969 };
970 fresh.push(PhysicalIndexState {
971 name: native.name.clone(),
972 kind,
973 collection: native.collection.clone(),
974 enabled: native.enabled,
975 entries: native.entries as usize,
976 estimated_memory_bytes: native.estimated_memory_bytes,
977 last_refresh_ms: native.last_refresh_ms,
978 backend: native.backend.clone(),
979 artifact_kind: Self::native_artifact_kind_for_index(kind)
980 .map(|value| value.to_string()),
981 artifact_root_page: None,
982 artifact_checksum: None,
983 build_state: "registry-loaded".to_string(),
984 });
985 }
986
987 if !registry.indexes_complete {
988 if let Some(previous) = previous {
989 for index in &previous.indexes {
990 if !fresh.iter().any(|candidate| candidate.name == index.name) {
991 fresh.push(index.clone());
992 }
993 }
994 }
995 }
996
997 fresh
998 }
999
1000 pub(crate) fn graph_projections_from_native_state(
1001 &self,
1002 native_state: &NativePhysicalState,
1003 ) -> Vec<PhysicalGraphProjection> {
1004 native_state
1005 .registry
1006 .as_ref()
1007 .map(|registry| {
1008 registry
1009 .graph_projections
1010 .iter()
1011 .map(|projection| PhysicalGraphProjection {
1012 name: projection.name.clone(),
1013 created_at_unix_ms: projection.created_at_unix_ms,
1014 updated_at_unix_ms: projection.updated_at_unix_ms,
1015 state: "materialized".to_string(),
1016 source: projection.source.clone(),
1017 node_labels: projection.node_labels.clone(),
1018 node_types: projection.node_types.clone(),
1019 edge_labels: projection.edge_labels.clone(),
1020 last_materialized_sequence: projection.last_materialized_sequence,
1021 })
1022 .collect()
1023 })
1024 .unwrap_or_default()
1025 }
1026
1027 pub(crate) fn analytics_jobs_from_native_state(
1028 &self,
1029 native_state: &NativePhysicalState,
1030 ) -> Vec<PhysicalAnalyticsJob> {
1031 native_state
1032 .registry
1033 .as_ref()
1034 .map(|registry| {
1035 registry
1036 .analytics_jobs
1037 .iter()
1038 .map(|job| PhysicalAnalyticsJob {
1039 id: job.id.clone(),
1040 kind: job.kind.clone(),
1041 state: job.state.clone(),
1042 projection: job.projection.clone(),
1043 created_at_unix_ms: job.created_at_unix_ms,
1044 updated_at_unix_ms: job.updated_at_unix_ms,
1045 last_run_sequence: job.last_run_sequence,
1046 metadata: job.metadata.clone(),
1047 })
1048 .collect()
1049 })
1050 .unwrap_or_default()
1051 }
1052
1053 pub(crate) fn exports_from_native_state(
1054 &self,
1055 native_state: &NativePhysicalState,
1056 ) -> Vec<ExportDescriptor> {
1057 native_state
1058 .recovery
1059 .as_ref()
1060 .map(|recovery| {
1061 recovery
1062 .exports
1063 .iter()
1064 .map(|export| ExportDescriptor {
1065 name: export.name.clone(),
1066 created_at_unix_ms: export.created_at_unix_ms,
1067 snapshot_id: export.snapshot_id,
1068 superblock_sequence: export.superblock_sequence,
1069 data_path: self
1070 .path()
1071 .map(|path| {
1072 crate::physical::PhysicalMetadataFile::export_data_path_for(
1073 path,
1074 &export.name,
1075 )
1076 .display()
1077 .to_string()
1078 })
1079 .unwrap_or_default(),
1080 metadata_path: self
1081 .path()
1082 .map(|path| {
1083 let export_data_path =
1084 crate::physical::PhysicalMetadataFile::export_data_path_for(
1085 path,
1086 &export.name,
1087 );
1088 crate::physical::PhysicalMetadataFile::metadata_path_for(
1089 &export_data_path,
1090 )
1091 .display()
1092 .to_string()
1093 })
1094 .unwrap_or_default(),
1095 collection_count: export.collection_count as usize,
1096 total_entities: export.total_entities as usize,
1097 })
1098 .collect()
1099 })
1100 .unwrap_or_default()
1101 }
1102
1103 pub(crate) fn snapshots_from_native_state(
1104 &self,
1105 native_state: &NativePhysicalState,
1106 ) -> Vec<crate::physical::SnapshotDescriptor> {
1107 let snapshots: Vec<_> = native_state
1108 .recovery
1109 .as_ref()
1110 .map(|recovery| {
1111 recovery
1112 .snapshots
1113 .iter()
1114 .map(|snapshot| crate::physical::SnapshotDescriptor {
1115 snapshot_id: snapshot.snapshot_id,
1116 created_at_unix_ms: snapshot.created_at_unix_ms,
1117 superblock_sequence: snapshot.superblock_sequence,
1118 collection_count: snapshot.collection_count as usize,
1119 total_entities: snapshot.total_entities as usize,
1120 })
1121 .collect()
1122 })
1123 .unwrap_or_default();
1124 if !snapshots.is_empty() {
1125 return snapshots;
1126 }
1127
1128 let now = SystemTime::now()
1129 .duration_since(UNIX_EPOCH)
1130 .unwrap_or_default()
1131 .as_millis();
1132 let (collection_count, total_entities) = native_state
1133 .catalog
1134 .as_ref()
1135 .map(|catalog| {
1136 (
1137 catalog.collection_count as usize,
1138 catalog.total_entities as usize,
1139 )
1140 })
1141 .unwrap_or_else(|| {
1142 let catalog = self.catalog_snapshot();
1143 (catalog.total_collections, catalog.total_entities)
1144 });
1145
1146 vec![crate::physical::SnapshotDescriptor {
1147 snapshot_id: native_state.header.sequence,
1148 created_at_unix_ms: now,
1149 superblock_sequence: native_state.header.sequence,
1150 collection_count,
1151 total_entities,
1152 }]
1153 }
1154
1155 fn index_kind_from_str(value: &str) -> Option<crate::index::IndexKind> {
1156 match value {
1157 "btree" => Some(crate::index::IndexKind::BTree),
1158 "vector.hnsw" => Some(crate::index::IndexKind::VectorHnsw),
1159 "vector.inverted" => Some(crate::index::IndexKind::VectorInverted),
1160 "vector.turbo" => Some(crate::index::IndexKind::VectorTurbo),
1161 "graph.adjacency" => Some(crate::index::IndexKind::GraphAdjacency),
1162 "text.fulltext" => Some(crate::index::IndexKind::FullText),
1163 "document.pathvalue" => Some(crate::index::IndexKind::DocumentPathValue),
1164 "search.hybrid" => Some(crate::index::IndexKind::HybridSearch),
1165 _ => None,
1166 }
1167 }
1168
1169 pub(crate) fn native_artifact_kind_for_index(kind: IndexKind) -> Option<&'static str> {
1170 match kind {
1171 IndexKind::VectorHnsw => Some("hnsw"),
1172 IndexKind::VectorInverted => Some("ivf"),
1173 IndexKind::VectorTurbo => Some("turboquant"),
1174 IndexKind::GraphAdjacency => Some("graph.adjacency"),
1175 IndexKind::FullText => Some("text.fulltext"),
1176 IndexKind::DocumentPathValue => Some("document.pathvalue"),
1177 _ => None,
1178 }
1179 }
1180
1181 fn index_is_declared(&self, name: &str) -> bool {
1182 self.physical_metadata()
1183 .map(|metadata| metadata.indexes.iter().any(|index| index.name == name))
1184 .unwrap_or(false)
1185 }
1186
1187 pub(crate) fn graph_projection_is_declared(&self, name: &str) -> bool {
1188 self.physical_metadata()
1189 .map(|metadata| {
1190 metadata
1191 .graph_projections
1192 .iter()
1193 .any(|projection| projection.name == name)
1194 })
1195 .unwrap_or(false)
1196 }
1197
1198 pub(crate) fn graph_projection_is_operational(&self, name: &str) -> bool {
1199 self.operational_graph_projections()
1200 .into_iter()
1201 .any(|projection| projection.name == name && projection.state == "materialized")
1202 }
1203
1204 pub(crate) fn analytics_job_id(kind: &str, projection: Option<&str>) -> String {
1205 match projection {
1206 Some(projection) => format!("{kind}::{projection}"),
1207 None => format!("{kind}::global"),
1208 }
1209 }
1210
1211 pub(crate) fn update_physical_metadata<T, F>(
1212 &self,
1213 mutator: F,
1214 ) -> Result<T, Box<dyn std::error::Error>>
1215 where
1216 F: FnOnce(&mut PhysicalMetadataFile) -> T,
1217 {
1218 if self.options.mode != StorageMode::Persistent {
1219 return Err("physical metadata operations require persistent mode".into());
1220 }
1221 if self.options.read_only {
1222 return Err("physical metadata operations are not allowed in read-only mode".into());
1223 }
1224 let Some(path) = self.path() else {
1225 return Err("database path is not available".into());
1226 };
1227
1228 let mut metadata = self.load_or_bootstrap_physical_metadata(true)?;
1229
1230 if metadata.indexes.is_empty() {
1231 metadata.indexes = self.physical_index_state();
1232 }
1233 metadata.superblock.collection_roots = self.physical_collection_roots();
1234
1235 let result = mutator(&mut metadata);
1236 metadata.save_for_data_path(path)?;
1237 self.persist_native_physical_header(&metadata)?;
1238 Ok(result)
1239 }
1240
1241 pub(crate) fn persist_native_physical_header(
1242 &self,
1243 metadata: &PhysicalMetadataFile,
1244 ) -> Result<(), Box<dyn std::error::Error>> {
1245 if !self.paged_mode {
1246 return Ok(());
1247 }
1248
1249 let existing_page = self
1250 .store
1251 .physical_file_header()
1252 .map(|header| header.collection_roots_page)
1253 .filter(|page| *page != 0);
1254 let existing_registry_page = self
1255 .store
1256 .physical_file_header()
1257 .map(|header| header.registry_page)
1258 .filter(|page| *page != 0);
1259 let existing_recovery_page = self
1260 .store
1261 .physical_file_header()
1262 .map(|header| header.recovery_page)
1263 .filter(|page| *page != 0);
1264 let existing_catalog_page = self
1265 .store
1266 .physical_file_header()
1267 .map(|header| header.catalog_page)
1268 .filter(|page| *page != 0);
1269 let existing_metadata_state_page = self
1270 .store
1271 .physical_file_header()
1272 .map(|header| header.metadata_state_page)
1273 .filter(|page| *page != 0);
1274 let existing_vector_artifact_page = self
1275 .store
1276 .physical_file_header()
1277 .map(|header| header.vector_artifact_page)
1278 .filter(|page| *page != 0);
1279 let existing_manifest_page = self
1280 .store
1281 .physical_file_header()
1282 .map(|header| header.manifest_page)
1283 .filter(|page| *page != 0);
1284 let (manifest_page, manifest_checksum) = self.store.write_native_manifest_summary(
1285 metadata.superblock.sequence,
1286 &metadata.manifest_events,
1287 existing_manifest_page,
1288 )?;
1289 let (collection_roots_page, collection_roots_checksum) = self
1290 .store
1291 .write_native_collection_roots(&metadata.superblock.collection_roots, existing_page)?;
1292 let registry_summary = self.native_registry_summary_from_metadata(metadata);
1293 let (registry_page, registry_checksum) = self
1294 .store
1295 .write_native_registry_summary(®istry_summary, existing_registry_page)?;
1296 let recovery_summary = Self::native_recovery_summary_from_metadata(metadata);
1297 let (recovery_page, recovery_checksum) = self
1298 .store
1299 .write_native_recovery_summary(&recovery_summary, existing_recovery_page)?;
1300 let catalog_summary = Self::native_catalog_summary_from_metadata(metadata);
1301 let (catalog_page, catalog_checksum) = self
1302 .store
1303 .write_native_catalog_summary(&catalog_summary, existing_catalog_page)?;
1304 let metadata_state_summary = Self::native_metadata_state_summary_from_metadata(metadata);
1305 let (metadata_state_page, metadata_state_checksum) =
1306 self.store.write_native_metadata_state_summary(
1307 &metadata_state_summary,
1308 existing_metadata_state_page,
1309 )?;
1310 let vector_artifact_records = self.native_vector_artifact_records();
1311 let vector_artifact_payloads = vector_artifact_records
1312 .iter()
1313 .map(|(summary, bytes)| {
1314 (
1315 summary.collection.clone(),
1316 summary.artifact_kind.clone(),
1317 bytes.clone(),
1318 )
1319 })
1320 .collect::<Vec<_>>();
1321 let (vector_artifact_page, vector_artifact_checksum, _vector_artifact_pages) =
1322 self.store.write_native_vector_artifact_store(
1323 &vector_artifact_payloads,
1324 existing_vector_artifact_page,
1325 )?;
1326 let mut header = Self::native_header_from_metadata(metadata);
1327 header.manifest_page = manifest_page;
1328 header.manifest_checksum = manifest_checksum;
1329 header.collection_roots_page = collection_roots_page;
1330 header.collection_roots_checksum = collection_roots_checksum;
1331 header.registry_page = registry_page;
1332 header.registry_checksum = registry_checksum;
1333 header.recovery_page = recovery_page;
1334 header.recovery_checksum = recovery_checksum;
1335 header.catalog_page = catalog_page;
1336 header.catalog_checksum = catalog_checksum;
1337 header.metadata_state_page = metadata_state_page;
1338 header.metadata_state_checksum = metadata_state_checksum;
1339 header.vector_artifact_page = vector_artifact_page;
1340 header.vector_artifact_checksum = vector_artifact_checksum;
1341 self.store.update_physical_file_header(header)?;
1342 self.store.persist()?;
1343 Ok(())
1344 }
1345
1346 pub(crate) fn native_header_from_metadata(
1347 metadata: &PhysicalMetadataFile,
1348 ) -> PhysicalFileHeader {
1349 PhysicalFileHeader {
1350 format_version: metadata.superblock.format_version,
1351 sequence: metadata.superblock.sequence,
1352 manifest_oldest_root: metadata.superblock.manifest.oldest.index,
1353 manifest_root: metadata.superblock.manifest.newest.index,
1354 free_set_root: metadata.superblock.free_set.index,
1355 manifest_page: 0,
1356 manifest_checksum: 0,
1357 collection_roots_page: 0,
1358 collection_roots_checksum: 0,
1359 collection_root_count: metadata.superblock.collection_roots.len() as u32,
1360 snapshot_count: metadata.snapshots.len() as u32,
1361 index_count: metadata.indexes.len() as u32,
1362 catalog_collection_count: metadata.catalog.total_collections as u32,
1363 catalog_total_entities: metadata.catalog.total_entities as u64,
1364 export_count: metadata.exports.len() as u32,
1365 graph_projection_count: metadata.graph_projections.len() as u32,
1366 analytics_job_count: metadata.analytics_jobs.len() as u32,
1367 manifest_event_count: metadata.manifest_events.len() as u32,
1368 registry_page: 0,
1369 registry_checksum: 0,
1370 recovery_page: 0,
1371 recovery_checksum: 0,
1372 catalog_page: 0,
1373 catalog_checksum: 0,
1374 metadata_state_page: 0,
1375 metadata_state_checksum: 0,
1376 vector_artifact_page: 0,
1377 vector_artifact_checksum: 0,
1378 }
1379 }
1380
1381 fn recover_queue_pending_state(&self) {
1382 const QUEUE_META_COLLECTION: &str = "red_queue_meta";
1383
1384 let Some(manager) = self.store.get_collection(QUEUE_META_COLLECTION) else {
1385 return;
1386 };
1387
1388 let pending_rows = manager.query_all(|entity| {
1389 entity.data.as_row().is_some_and(|row| {
1390 matches!(
1391 row.get_field("kind"),
1392 Some(crate::storage::schema::Value::Text(kind))
1393 if matches!(&**kind, "queue_pending" | "queue_pending_lc")
1394 )
1395 })
1396 });
1397
1398 for row in pending_rows {
1399 let _ = self.store.delete(QUEUE_META_COLLECTION, row.id);
1400 }
1401 }
1402}
1403
1404fn current_unix_ms() -> u64 {
1405 SystemTime::now()
1406 .duration_since(UNIX_EPOCH)
1407 .unwrap_or_default()
1408 .as_millis()
1409 .min(u128::from(u64::MAX)) as u64
1410}