Skip to main content

nodedb_spatial/
persist.rs

1//! R-tree checkpoint/restore for durable persistence.
2//!
3//! Follows the same pattern as the vector engine's HNSW checkpoint:
4//! serialize all entries to bytes (MessagePack) → write to storage →
5//! restore via bulk_load on cold start.
6//!
7//! Storage key scheme (in redb under Namespace::Spatial):
8//! - `{collection}\x00{field}\x00rtree` → serialized R-tree entries
9//! - `{collection}\x00{field}\x00meta` → SpatialIndexMeta
10
11use nodedb_types::BoundingBox;
12use serde::{Deserialize, Serialize};
13
14use crate::rtree::{RTree, RTreeEntry};
15
16/// Metadata for a persisted spatial index.
17#[derive(Debug, Clone, Serialize, Deserialize)]
18pub struct SpatialIndexMeta {
19    /// Collection this index belongs to.
20    pub collection: String,
21    /// Geometry field name being indexed.
22    pub field: String,
23    /// Index type.
24    pub index_type: SpatialIndexType,
25    /// Number of entries at last checkpoint.
26    pub entry_count: usize,
27    /// Bounding box of all indexed geometries (spatial extent).
28    pub extent: Option<BoundingBox>,
29}
30
31/// Type of spatial index.
32#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
33pub enum SpatialIndexType {
34    RTree,
35    Geohash,
36}
37
38impl SpatialIndexType {
39    pub fn as_str(&self) -> &'static str {
40        match self {
41            Self::RTree => "rtree",
42            Self::Geohash => "geohash",
43        }
44    }
45}
46
47impl std::fmt::Display for SpatialIndexType {
48    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
49        f.write_str(self.as_str())
50    }
51}
52
53/// Serialized R-tree snapshot for checkpoint.
54#[derive(Debug, Serialize, Deserialize)]
55pub struct RTreeSnapshot {
56    pub entries: Vec<RTreeEntry>,
57}
58
59impl RTree {
60    /// Serialize the R-tree to bytes for checkpointing.
61    ///
62    /// Collects all entries and serializes via MessagePack. On restore,
63    /// `from_checkpoint` bulk-loads entries for optimal packing.
64    pub fn checkpoint_to_bytes(&self) -> Result<Vec<u8>, RTreeCheckpointError> {
65        let snapshot = RTreeSnapshot {
66            entries: self.entries().into_iter().cloned().collect(),
67        };
68        rmp_serde::to_vec_named(&snapshot).map_err(RTreeCheckpointError::Serialize)
69    }
70
71    /// Restore an R-tree from checkpoint bytes.
72    ///
73    /// Uses bulk_load (STR packing) for better node packing than
74    /// sequential insert. Returns None if bytes are corrupted.
75    pub fn from_checkpoint(bytes: &[u8]) -> Result<Self, RTreeCheckpointError> {
76        let snapshot: RTreeSnapshot =
77            rmp_serde::from_slice(bytes).map_err(RTreeCheckpointError::Deserialize)?;
78        Ok(RTree::bulk_load(snapshot.entries))
79    }
80}
81
82/// Build the storage key for an R-tree checkpoint.
83///
84/// Format: `{collection}\0{field}\0rtree`
85pub fn rtree_storage_key(collection: &str, field: &str) -> Vec<u8> {
86    let mut key = Vec::with_capacity(collection.len() + field.len() + 8);
87    key.extend_from_slice(collection.as_bytes());
88    key.push(0);
89    key.extend_from_slice(field.as_bytes());
90    key.push(0);
91    key.extend_from_slice(b"rtree");
92    key
93}
94
95/// Build the storage key for spatial index metadata.
96///
97/// Format: `{collection}\0{field}\0meta`
98pub fn meta_storage_key(collection: &str, field: &str) -> Vec<u8> {
99    let mut key = Vec::with_capacity(collection.len() + field.len() + 7);
100    key.extend_from_slice(collection.as_bytes());
101    key.push(0);
102    key.extend_from_slice(field.as_bytes());
103    key.push(0);
104    key.extend_from_slice(b"meta");
105    key
106}
107
108/// Serialize index metadata to bytes.
109pub fn serialize_meta(meta: &SpatialIndexMeta) -> Result<Vec<u8>, RTreeCheckpointError> {
110    rmp_serde::to_vec_named(meta).map_err(RTreeCheckpointError::Serialize)
111}
112
113/// Deserialize index metadata from bytes.
114pub fn deserialize_meta(bytes: &[u8]) -> Result<SpatialIndexMeta, RTreeCheckpointError> {
115    rmp_serde::from_slice(bytes).map_err(RTreeCheckpointError::Deserialize)
116}
117
118/// Errors during R-tree checkpoint operations.
119#[derive(Debug, thiserror::Error)]
120pub enum RTreeCheckpointError {
121    #[error("R-tree checkpoint serialization failed: {0}")]
122    Serialize(rmp_serde::encode::Error),
123    #[error("R-tree checkpoint deserialization failed: {0}")]
124    Deserialize(rmp_serde::decode::Error),
125}
126
127#[cfg(test)]
128mod tests {
129    use super::*;
130
131    fn make_entry(id: u64, lng: f64, lat: f64) -> RTreeEntry {
132        RTreeEntry {
133            id,
134            bbox: BoundingBox::from_point(lng, lat),
135        }
136    }
137
138    #[test]
139    fn checkpoint_roundtrip_empty() {
140        let tree = RTree::new();
141        let bytes = tree.checkpoint_to_bytes().unwrap();
142        let restored = RTree::from_checkpoint(&bytes).unwrap();
143        assert_eq!(restored.len(), 0);
144    }
145
146    #[test]
147    fn checkpoint_roundtrip_entries() {
148        let mut tree = RTree::new();
149        for i in 0..100 {
150            tree.insert(make_entry(i, (i as f64) * 0.5, (i as f64) * 0.3));
151        }
152        assert_eq!(tree.len(), 100);
153
154        let bytes = tree.checkpoint_to_bytes().unwrap();
155        let restored = RTree::from_checkpoint(&bytes).unwrap();
156        assert_eq!(restored.len(), 100);
157
158        // All entries should be searchable.
159        let all = restored.search(&BoundingBox::new(-180.0, -90.0, 180.0, 90.0));
160        assert_eq!(all.len(), 100);
161    }
162
163    #[test]
164    fn checkpoint_preserves_ids() {
165        let mut tree = RTree::new();
166        tree.insert(make_entry(42, 10.0, 20.0));
167        tree.insert(make_entry(99, 30.0, 40.0));
168
169        let bytes = tree.checkpoint_to_bytes().unwrap();
170        let restored = RTree::from_checkpoint(&bytes).unwrap();
171
172        let results = restored.search(&BoundingBox::new(5.0, 15.0, 15.0, 25.0));
173        assert_eq!(results.len(), 1);
174        assert_eq!(results[0].id, 42);
175    }
176
177    #[test]
178    fn corrupted_bytes_returns_error() {
179        assert!(RTree::from_checkpoint(&[0xFF, 0xFF, 0xFF]).is_err());
180    }
181
182    #[test]
183    fn meta_roundtrip() {
184        let meta = SpatialIndexMeta {
185            collection: "buildings".to_string(),
186            field: "geom".to_string(),
187            index_type: SpatialIndexType::RTree,
188            entry_count: 1000,
189            extent: Some(BoundingBox::new(-180.0, -90.0, 180.0, 90.0)),
190        };
191        let bytes = serialize_meta(&meta).unwrap();
192        let restored = deserialize_meta(&bytes).unwrap();
193        assert_eq!(restored.collection, "buildings");
194        assert_eq!(restored.entry_count, 1000);
195        assert_eq!(restored.index_type, SpatialIndexType::RTree);
196    }
197
198    #[test]
199    fn storage_key_format() {
200        let key = rtree_storage_key("buildings", "geom");
201        assert_eq!(key, b"buildings\0geom\0rtree");
202
203        let meta_key = meta_storage_key("buildings", "geom");
204        assert_eq!(meta_key, b"buildings\0geom\0meta");
205    }
206
207    #[test]
208    fn checkpoint_size_reasonable() {
209        let mut tree = RTree::new();
210        for i in 0..1000 {
211            tree.insert(make_entry(i, (i as f64) * 0.01, (i as f64) * 0.01));
212        }
213        let bytes = tree.checkpoint_to_bytes().unwrap();
214        // Each entry: id(8) + 4 f64(32) = ~40 bytes + msgpack overhead.
215        // 1000 entries ≈ 40-60KB is reasonable.
216        assert!(
217            bytes.len() < 100_000,
218            "checkpoint too large: {} bytes",
219            bytes.len()
220        );
221        assert!(
222            bytes.len() > 10_000,
223            "checkpoint too small: {} bytes",
224            bytes.len()
225        );
226    }
227}