Skip to main content

nodedb_spatial/
persist.rs

1//! R-tree checkpoint/restore for durable persistence.
2//!
3//! Follows the same pattern as the vector engine's HNSW checkpoint:
4//! serialize all entries to bytes (MessagePack) → write to storage →
5//! restore via bulk_load on cold start.
6//!
7//! Storage key scheme (in redb under Namespace::Spatial):
8//! - `{collection}\x00{field}\x00rtree` → serialized R-tree entries
9//! - `{collection}\x00{field}\x00meta` → SpatialIndexMeta
10
11use nodedb_types::BoundingBox;
12use serde::{Deserialize, Serialize};
13use zerompk::{FromMessagePack, ToMessagePack};
14
15use crate::rtree::{RTree, RTreeEntry};
16
17/// Metadata for a persisted spatial index.
18#[derive(Debug, Clone, Serialize, Deserialize, ToMessagePack, FromMessagePack)]
19pub struct SpatialIndexMeta {
20    /// Collection this index belongs to.
21    pub collection: String,
22    /// Geometry field name being indexed.
23    pub field: String,
24    /// Index type.
25    pub index_type: SpatialIndexType,
26    /// Number of entries at last checkpoint.
27    pub entry_count: usize,
28    /// Bounding box of all indexed geometries (spatial extent).
29    pub extent: Option<BoundingBox>,
30}
31
32/// Type of spatial index.
33#[derive(
34    Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToMessagePack, FromMessagePack,
35)]
36#[msgpack(c_enum)]
37#[repr(u8)]
38pub enum SpatialIndexType {
39    RTree = 0,
40    Geohash = 1,
41}
42
43impl SpatialIndexType {
44    pub fn as_str(&self) -> &'static str {
45        match self {
46            Self::RTree => "rtree",
47            Self::Geohash => "geohash",
48        }
49    }
50}
51
52impl std::fmt::Display for SpatialIndexType {
53    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
54        f.write_str(self.as_str())
55    }
56}
57
58/// Magic header for rkyv-serialized R-tree snapshots (6 bytes).
59const RTREE_RKYV_MAGIC: &[u8; 6] = b"RKSPT\0";
60
61/// Serialized R-tree snapshot (legacy MessagePack).
62#[derive(Debug, Serialize, Deserialize, ToMessagePack, FromMessagePack)]
63pub struct RTreeSnapshot {
64    pub entries: Vec<RTreeEntry>,
65}
66
67/// rkyv-serialized R-tree snapshot.
68#[derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
69struct RTreeSnapshotRkyv {
70    entries: Vec<RTreeEntry>,
71}
72
73impl RTree {
74    /// Serialize the R-tree to rkyv bytes (with magic header) for checkpointing.
75    pub fn checkpoint_to_bytes(&self) -> Result<Vec<u8>, RTreeCheckpointError> {
76        let snapshot = RTreeSnapshotRkyv {
77            entries: self.entries().into_iter().cloned().collect(),
78        };
79        let rkyv_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&snapshot)
80            .map_err(|e| RTreeCheckpointError::RkyvSerialize(e.to_string()))?;
81        let mut buf = Vec::with_capacity(RTREE_RKYV_MAGIC.len() + rkyv_bytes.len());
82        buf.extend_from_slice(RTREE_RKYV_MAGIC);
83        buf.extend_from_slice(&rkyv_bytes);
84        Ok(buf)
85    }
86
87    /// Restore an R-tree from checkpoint bytes.
88    ///
89    /// Auto-detects format: rkyv (magic `RKSPT\0`) or legacy MessagePack.
90    /// Uses bulk_load (STR packing) for optimal node packing.
91    pub fn from_checkpoint(bytes: &[u8]) -> Result<Self, RTreeCheckpointError> {
92        if bytes.len() > RTREE_RKYV_MAGIC.len()
93            && &bytes[..RTREE_RKYV_MAGIC.len()] == RTREE_RKYV_MAGIC
94        {
95            let rkyv_bytes = &bytes[RTREE_RKYV_MAGIC.len()..];
96            let mut aligned = rkyv::util::AlignedVec::<16>::with_capacity(rkyv_bytes.len());
97            aligned.extend_from_slice(rkyv_bytes);
98            let snapshot: RTreeSnapshotRkyv =
99                rkyv::from_bytes::<RTreeSnapshotRkyv, rkyv::rancor::Error>(&aligned)
100                    .map_err(|e| RTreeCheckpointError::RkyvDeserialize(e.to_string()))?;
101            return Ok(RTree::bulk_load(snapshot.entries));
102        }
103        // Legacy MessagePack fallback.
104        let snapshot: RTreeSnapshot =
105            zerompk::from_msgpack(bytes).map_err(RTreeCheckpointError::Deserialize)?;
106        Ok(RTree::bulk_load(snapshot.entries))
107    }
108}
109
110/// Build the storage key for an R-tree checkpoint.
111///
112/// Format: `{collection}\0{field}\0rtree`
113pub fn rtree_storage_key(collection: &str, field: &str) -> Vec<u8> {
114    let mut key = Vec::with_capacity(collection.len() + field.len() + 8);
115    key.extend_from_slice(collection.as_bytes());
116    key.push(0);
117    key.extend_from_slice(field.as_bytes());
118    key.push(0);
119    key.extend_from_slice(b"rtree");
120    key
121}
122
123/// Build the storage key for spatial index metadata.
124///
125/// Format: `{collection}\0{field}\0meta`
126pub fn meta_storage_key(collection: &str, field: &str) -> Vec<u8> {
127    let mut key = Vec::with_capacity(collection.len() + field.len() + 7);
128    key.extend_from_slice(collection.as_bytes());
129    key.push(0);
130    key.extend_from_slice(field.as_bytes());
131    key.push(0);
132    key.extend_from_slice(b"meta");
133    key
134}
135
136/// Serialize index metadata to bytes.
137pub fn serialize_meta(meta: &SpatialIndexMeta) -> Result<Vec<u8>, RTreeCheckpointError> {
138    zerompk::to_msgpack_vec(meta).map_err(RTreeCheckpointError::Serialize)
139}
140
141/// Deserialize index metadata from bytes.
142pub fn deserialize_meta(bytes: &[u8]) -> Result<SpatialIndexMeta, RTreeCheckpointError> {
143    zerompk::from_msgpack(bytes).map_err(RTreeCheckpointError::Deserialize)
144}
145
146/// Errors during R-tree checkpoint operations.
147#[derive(Debug, thiserror::Error)]
148pub enum RTreeCheckpointError {
149    #[error("R-tree checkpoint serialization failed: {0}")]
150    Serialize(zerompk::Error),
151    #[error("R-tree checkpoint deserialization failed: {0}")]
152    Deserialize(zerompk::Error),
153    #[error("R-tree rkyv serialization failed: {0}")]
154    RkyvSerialize(String),
155    #[error("R-tree rkyv deserialization failed: {0}")]
156    RkyvDeserialize(String),
157}
158
159#[cfg(test)]
160mod tests {
161    use super::*;
162
163    fn make_entry(id: u64, lng: f64, lat: f64) -> RTreeEntry {
164        RTreeEntry {
165            id,
166            bbox: BoundingBox::from_point(lng, lat),
167        }
168    }
169
170    #[test]
171    fn checkpoint_roundtrip_empty() {
172        let tree = RTree::new();
173        let bytes = tree.checkpoint_to_bytes().unwrap();
174        let restored = RTree::from_checkpoint(&bytes).unwrap();
175        assert_eq!(restored.len(), 0);
176    }
177
178    #[test]
179    fn checkpoint_roundtrip_entries() {
180        let mut tree = RTree::new();
181        for i in 0..100 {
182            tree.insert(make_entry(i, (i as f64) * 0.5, (i as f64) * 0.3));
183        }
184        assert_eq!(tree.len(), 100);
185
186        let bytes = tree.checkpoint_to_bytes().unwrap();
187        let restored = RTree::from_checkpoint(&bytes).unwrap();
188        assert_eq!(restored.len(), 100);
189
190        // All entries should be searchable.
191        let all = restored.search(&BoundingBox::new(-180.0, -90.0, 180.0, 90.0));
192        assert_eq!(all.len(), 100);
193    }
194
195    #[test]
196    fn checkpoint_preserves_ids() {
197        let mut tree = RTree::new();
198        tree.insert(make_entry(42, 10.0, 20.0));
199        tree.insert(make_entry(99, 30.0, 40.0));
200
201        let bytes = tree.checkpoint_to_bytes().unwrap();
202        let restored = RTree::from_checkpoint(&bytes).unwrap();
203
204        let results = restored.search(&BoundingBox::new(5.0, 15.0, 15.0, 25.0));
205        assert_eq!(results.len(), 1);
206        assert_eq!(results[0].id, 42);
207    }
208
209    #[test]
210    fn corrupted_bytes_returns_error() {
211        assert!(RTree::from_checkpoint(&[0xFF, 0xFF, 0xFF]).is_err());
212    }
213
214    #[test]
215    fn meta_roundtrip() {
216        let meta = SpatialIndexMeta {
217            collection: "buildings".to_string(),
218            field: "geom".to_string(),
219            index_type: SpatialIndexType::RTree,
220            entry_count: 1000,
221            extent: Some(BoundingBox::new(-180.0, -90.0, 180.0, 90.0)),
222        };
223        let bytes = serialize_meta(&meta).unwrap();
224        let restored = deserialize_meta(&bytes).unwrap();
225        assert_eq!(restored.collection, "buildings");
226        assert_eq!(restored.entry_count, 1000);
227        assert_eq!(restored.index_type, SpatialIndexType::RTree);
228    }
229
230    #[test]
231    fn storage_key_format() {
232        let key = rtree_storage_key("buildings", "geom");
233        assert_eq!(key, b"buildings\0geom\0rtree");
234
235        let meta_key = meta_storage_key("buildings", "geom");
236        assert_eq!(meta_key, b"buildings\0geom\0meta");
237    }
238
239    #[test]
240    fn checkpoint_size_reasonable() {
241        let mut tree = RTree::new();
242        for i in 0..1000 {
243            tree.insert(make_entry(i, (i as f64) * 0.01, (i as f64) * 0.01));
244        }
245        let bytes = tree.checkpoint_to_bytes().unwrap();
246        // Each entry: id(8) + 4 f64(32) = ~40 bytes + msgpack overhead.
247        // 1000 entries ≈ 40-60KB is reasonable.
248        assert!(
249            bytes.len() < 100_000,
250            "checkpoint too large: {} bytes",
251            bytes.len()
252        );
253        assert!(
254            bytes.len() > 10_000,
255            "checkpoint too small: {} bytes",
256            bytes.len()
257        );
258    }
259}