Skip to main content

nodedb_spatial/
persist.rs

1// SPDX-License-Identifier: Apache-2.0
2
3//! R-tree checkpoint/restore for durable persistence.
4//!
5//! ## On-disk framing
6//!
7//! **Plaintext** R-tree checkpoints start with the 6-byte magic `RKSPT\0`.
8//! The first 4 bytes are never `SEGV`, so detection is unambiguous.
9//!
10//! **Encrypted** checkpoints use the same SEGV framing as the vector engine:
11//!
12//! ```text
13//! [SEGV (4B)] [version_u16_le (2B)] [cipher_alg_u8 (1B)] [kid_u8 (1B)]
14//! [epoch (4B)] [reserved (4B)] [AES-256-GCM ciphertext of the inner payload]
15//! ```
16//!
17//! The inner payload is either the raw rkyv bytes for R-tree or the msgpack
18//! bytes for geohash — the existing plaintext format. The nonce is
19//! `(epoch, lsn=0)`, and the 16-byte preamble is used as AAD.
20//!
21//! Storage key scheme (in redb under Namespace::Spatial):
22//! - `{collection}\x00{field}\x00rtree` → serialized R-tree entries
23//! - `{collection}\x00{field}\x00meta`  → SpatialIndexMeta
24
25use nodedb_types::BoundingBox;
26use serde::{Deserialize, Serialize};
27use zerompk::{FromMessagePack, ToMessagePack};
28
29use crate::rtree::{RTree, RTreeEntry};
30
31// ── SEGV framing constants ─────────────────────────────────────────────────
32//
33// The encrypted envelope itself lives in `nodedb_wal::crypto`; only the
34// magic constant is local to this module.
35
36/// Magic bytes identifying an encrypted spatial checkpoint. Shared with
37/// `nodedb-vector`'s collection checkpoint format.
38const SEGV_MAGIC: [u8; 4] = *b"SEGV";
39
40// ── Plaintext inner-format constants ───────────────────────────────────────
41
42/// Magic header for rkyv-serialized R-tree snapshots (6 bytes).
43const RTREE_RKYV_MAGIC: &[u8; 6] = b"RKSPT\0";
44
45/// Current format version for rkyv-serialized R-tree snapshots.
46pub const RTREE_FORMAT_VERSION: u8 = 1;
47
48// ── SEGV framing helpers ───────────────────────────────────────────────────
49
50fn encrypt_payload(
51    key: &nodedb_wal::crypto::WalEncryptionKey,
52    plaintext: &[u8],
53) -> Result<Vec<u8>, RTreeCheckpointError> {
54    nodedb_wal::crypto::encrypt_segment_envelope(key, &SEGV_MAGIC, plaintext)
55        .map_err(|e| RTreeCheckpointError::EncryptionFailed(e.to_string()))
56}
57
58fn decrypt_payload(
59    key: &nodedb_wal::crypto::WalEncryptionKey,
60    blob: &[u8],
61) -> Result<Vec<u8>, RTreeCheckpointError> {
62    nodedb_wal::crypto::decrypt_segment_envelope(key, &SEGV_MAGIC, blob)
63        .map_err(|e| RTreeCheckpointError::DecryptionFailed(e.to_string()))
64}
65
66/// Encrypt a geohash msgpack payload (called from `geohash_index.rs`).
67pub(crate) fn encrypt_geohash_payload(
68    key: &nodedb_wal::crypto::WalEncryptionKey,
69    plaintext: &[u8],
70) -> Result<Vec<u8>, RTreeCheckpointError> {
71    encrypt_payload(key, plaintext)
72}
73
74/// Decrypt a geohash msgpack payload (called from `geohash_index.rs`).
75pub(crate) fn decrypt_geohash_payload(
76    key: &nodedb_wal::crypto::WalEncryptionKey,
77    blob: &[u8],
78) -> Result<Vec<u8>, RTreeCheckpointError> {
79    decrypt_payload(key, blob)
80}
81
82// ── Metadata types ─────────────────────────────────────────────────────────
83
84/// Metadata for a persisted spatial index.
85#[derive(Debug, Clone, Serialize, Deserialize, ToMessagePack, FromMessagePack)]
86pub struct SpatialIndexMeta {
87    /// Collection this index belongs to.
88    pub collection: String,
89    /// Geometry field name being indexed.
90    pub field: String,
91    /// Index type.
92    pub index_type: SpatialIndexType,
93    /// Number of entries at last checkpoint.
94    pub entry_count: u64,
95    /// Bounding box of all indexed geometries (spatial extent).
96    pub extent: Option<BoundingBox>,
97}
98
99/// Type of spatial index.
100#[derive(
101    Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize, ToMessagePack, FromMessagePack,
102)]
103#[msgpack(c_enum)]
104#[repr(u8)]
105#[non_exhaustive]
106pub enum SpatialIndexType {
107    RTree = 0,
108    Geohash = 1,
109}
110
111impl SpatialIndexType {
112    pub fn as_str(&self) -> &'static str {
113        match self {
114            Self::RTree => "rtree",
115            Self::Geohash => "geohash",
116        }
117    }
118}
119
120impl std::fmt::Display for SpatialIndexType {
121    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
122        f.write_str(self.as_str())
123    }
124}
125
126// ── rkyv snapshot type ─────────────────────────────────────────────────────
127
128/// rkyv-serialized R-tree snapshot.
129#[derive(rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
130struct RTreeSnapshotRkyv {
131    entries: Vec<RTreeEntry>,
132}
133
134// ── RTree checkpoint impl ──────────────────────────────────────────────────
135
136impl RTree {
137    /// Serialize the R-tree to bytes for checkpointing.
138    ///
139    /// When `kek` is `Some`, the inner rkyv payload is wrapped in an AES-256-GCM
140    /// encrypted SEGV envelope. When `None`, the raw rkyv bytes (with `RKSPT\0`
141    /// inner magic) are returned (existing plaintext format).
142    pub fn checkpoint_to_bytes(
143        &self,
144        kek: Option<&nodedb_wal::crypto::WalEncryptionKey>,
145    ) -> Result<Vec<u8>, RTreeCheckpointError> {
146        let snapshot = RTreeSnapshotRkyv {
147            entries: self.entries().into_iter().cloned().collect(),
148        };
149        let rkyv_bytes = rkyv::to_bytes::<rkyv::rancor::Error>(&snapshot)
150            .map_err(|e| RTreeCheckpointError::RkyvSerialize(e.to_string()))?;
151
152        // Build inner plaintext: magic + version + rkyv payload.
153        let inner_len = RTREE_RKYV_MAGIC.len() + 1 + rkyv_bytes.len();
154        let _guard = self
155            .governor()
156            .and_then(|gov| gov.reserve(nodedb_mem::EngineId::Spatial, inner_len).ok());
157        let mut inner = Vec::with_capacity(inner_len);
158        inner.extend_from_slice(RTREE_RKYV_MAGIC);
159        inner.push(RTREE_FORMAT_VERSION);
160        inner.extend_from_slice(&rkyv_bytes);
161
162        if let Some(key) = kek {
163            return encrypt_payload(key, &inner);
164        }
165
166        Ok(inner)
167    }
168
169    /// Restore an R-tree from checkpoint bytes.
170    ///
171    /// `kek` controls the expected framing:
172    /// - `None` → file must be plaintext (starting with `RKSPT\0`). If it
173    ///   starts with `SEGV`, returns `Err(MissingKek)`.
174    /// - `Some(key)` → encryption is **required**. If the file starts with
175    ///   `SEGV`, it is decrypted. If plaintext, returns `Err(KekRequired)`.
176    pub fn from_checkpoint(
177        bytes: &[u8],
178        kek: Option<&nodedb_wal::crypto::WalEncryptionKey>,
179    ) -> Result<Self, RTreeCheckpointError> {
180        let is_encrypted = bytes.len() >= 4 && bytes[0..4] == SEGV_MAGIC;
181
182        let inner: Vec<u8>;
183        let inner_ref: &[u8];
184
185        if is_encrypted {
186            if let Some(key) = kek {
187                inner = decrypt_payload(key, bytes)?;
188                inner_ref = &inner;
189            } else {
190                return Err(RTreeCheckpointError::MissingKek);
191            }
192        } else if kek.is_some() {
193            return Err(RTreeCheckpointError::KekRequired);
194        } else {
195            inner_ref = bytes;
196        }
197
198        Self::decode_plaintext_inner(inner_ref)
199    }
200
201    fn decode_plaintext_inner(bytes: &[u8]) -> Result<Self, RTreeCheckpointError> {
202        let header_len = RTREE_RKYV_MAGIC.len() + 1; // magic + version byte
203        if bytes.len() <= header_len || &bytes[..RTREE_RKYV_MAGIC.len()] != RTREE_RKYV_MAGIC {
204            return Err(RTreeCheckpointError::UnrecognizedFormat);
205        }
206        let version = bytes[RTREE_RKYV_MAGIC.len()];
207        if version != RTREE_FORMAT_VERSION {
208            return Err(RTreeCheckpointError::UnsupportedVersion {
209                found: version,
210                expected: RTREE_FORMAT_VERSION,
211            });
212        }
213        let rkyv_bytes = &bytes[header_len..];
214        let mut aligned = rkyv::util::AlignedVec::<16>::with_capacity(rkyv_bytes.len());
215        aligned.extend_from_slice(rkyv_bytes);
216        let snapshot: RTreeSnapshotRkyv =
217            rkyv::from_bytes::<RTreeSnapshotRkyv, rkyv::rancor::Error>(&aligned)
218                .map_err(|e| RTreeCheckpointError::RkyvDeserialize(e.to_string()))?;
219        Ok(RTree::bulk_load(snapshot.entries))
220    }
221}
222
223// ── Storage key helpers ────────────────────────────────────────────────────
224
225/// Build the storage key for an R-tree checkpoint.
226///
227/// Format: `{collection}\0{field}\0rtree`
228pub fn rtree_storage_key(collection: &str, field: &str) -> Vec<u8> {
229    // no-governor: fixed-tiny storage key; bounded by collection+field name lengths (cold path)
230    let mut key = Vec::with_capacity(collection.len() + field.len() + 8);
231    key.extend_from_slice(collection.as_bytes());
232    key.push(0);
233    key.extend_from_slice(field.as_bytes());
234    key.push(0);
235    key.extend_from_slice(b"rtree");
236    key
237}
238
239/// Build the storage key for spatial index metadata.
240///
241/// Format: `{collection}\0{field}\0meta`
242pub fn meta_storage_key(collection: &str, field: &str) -> Vec<u8> {
243    // no-governor: fixed-tiny storage key; bounded by collection+field name lengths (cold path)
244    let mut key = Vec::with_capacity(collection.len() + field.len() + 7);
245    key.extend_from_slice(collection.as_bytes());
246    key.push(0);
247    key.extend_from_slice(field.as_bytes());
248    key.push(0);
249    key.extend_from_slice(b"meta");
250    key
251}
252
253/// Serialize index metadata to bytes.
254pub fn serialize_meta(meta: &SpatialIndexMeta) -> Result<Vec<u8>, RTreeCheckpointError> {
255    zerompk::to_msgpack_vec(meta).map_err(RTreeCheckpointError::Serialize)
256}
257
258/// Deserialize index metadata from bytes.
259pub fn deserialize_meta(bytes: &[u8]) -> Result<SpatialIndexMeta, RTreeCheckpointError> {
260    zerompk::from_msgpack(bytes).map_err(RTreeCheckpointError::Deserialize)
261}
262
263// ── Error type ─────────────────────────────────────────────────────────────
264
265/// Errors during R-tree checkpoint operations.
266#[derive(Debug, thiserror::Error)]
267#[non_exhaustive]
268pub enum RTreeCheckpointError {
269    #[error("R-tree checkpoint serialization failed: {0}")]
270    Serialize(zerompk::Error),
271    #[error("R-tree checkpoint deserialization failed: {0}")]
272    Deserialize(zerompk::Error),
273    #[error("R-tree rkyv serialization failed: {0}")]
274    RkyvSerialize(String),
275    #[error("R-tree rkyv deserialization failed: {0}")]
276    RkyvDeserialize(String),
277    #[error("unsupported R-tree checkpoint version {found}; expected {expected}")]
278    UnsupportedVersion { found: u8, expected: u8 },
279    #[error("unrecognized R-tree checkpoint format (missing RKSPT\\0 magic)")]
280    UnrecognizedFormat,
281    /// Checkpoint is encrypted (starts with `SEGV`) but no KEK was supplied.
282    #[error(
283        "spatial checkpoint is encrypted but no encryption key was provided; \
284         cannot load an encrypted checkpoint without a key"
285    )]
286    MissingKek,
287    /// Checkpoint is plaintext but a KEK is configured (policy violation).
288    #[error(
289        "spatial checkpoint is plaintext but an encryption key is configured; \
290         refusing to load an unencrypted checkpoint when encryption is required"
291    )]
292    KekRequired,
293    /// AES-256-GCM encryption failed.
294    #[error("spatial checkpoint encryption failed: {0}")]
295    EncryptionFailed(String),
296    /// AES-256-GCM decryption failed.
297    #[error("spatial checkpoint decryption failed: {0}")]
298    DecryptionFailed(String),
299}
300
301// ── Tests ──────────────────────────────────────────────────────────────────
302
303#[cfg(test)]
304mod tests {
305    use super::*;
306
307    fn make_entry(id: u64, lng: f64, lat: f64) -> RTreeEntry {
308        RTreeEntry {
309            id,
310            bbox: BoundingBox::from_point(lng, lat),
311        }
312    }
313
314    #[test]
315    fn checkpoint_roundtrip_empty() {
316        let tree = RTree::new();
317        let bytes = tree.checkpoint_to_bytes(None).unwrap();
318        let restored = RTree::from_checkpoint(&bytes, None).unwrap();
319        assert_eq!(restored.len(), 0);
320    }
321
322    #[test]
323    fn checkpoint_roundtrip_entries() {
324        let mut tree = RTree::new();
325        for i in 0..100 {
326            tree.insert(make_entry(i, (i as f64) * 0.5, (i as f64) * 0.3));
327        }
328        assert_eq!(tree.len(), 100);
329
330        let bytes = tree.checkpoint_to_bytes(None).unwrap();
331        let restored = RTree::from_checkpoint(&bytes, None).unwrap();
332        assert_eq!(restored.len(), 100);
333
334        // All entries should be searchable.
335        let all = restored.search(&BoundingBox::new(-180.0, -90.0, 180.0, 90.0));
336        assert_eq!(all.len(), 100);
337    }
338
339    #[test]
340    fn checkpoint_preserves_ids() {
341        let mut tree = RTree::new();
342        tree.insert(make_entry(42, 10.0, 20.0));
343        tree.insert(make_entry(99, 30.0, 40.0));
344
345        let bytes = tree.checkpoint_to_bytes(None).unwrap();
346        let restored = RTree::from_checkpoint(&bytes, None).unwrap();
347
348        let results = restored.search(&BoundingBox::new(5.0, 15.0, 15.0, 25.0));
349        assert_eq!(results.len(), 1);
350        assert_eq!(results[0].id, 42);
351    }
352
353    #[test]
354    fn corrupted_bytes_returns_error() {
355        assert!(matches!(
356            RTree::from_checkpoint(&[0xFF, 0xFF, 0xFF], None),
357            Err(RTreeCheckpointError::UnrecognizedFormat)
358        ));
359    }
360
361    #[test]
362    fn meta_roundtrip() {
363        let meta = SpatialIndexMeta {
364            collection: "buildings".to_string(),
365            field: "geom".to_string(),
366            index_type: SpatialIndexType::RTree,
367            entry_count: 1000,
368            extent: Some(BoundingBox::new(-180.0, -90.0, 180.0, 90.0)),
369        };
370        let bytes = serialize_meta(&meta).unwrap();
371        let restored = deserialize_meta(&bytes).unwrap();
372        assert_eq!(restored.collection, "buildings");
373        assert_eq!(restored.entry_count, 1000);
374        assert_eq!(restored.index_type, SpatialIndexType::RTree);
375    }
376
377    #[test]
378    fn storage_key_format() {
379        let key = rtree_storage_key("buildings", "geom");
380        assert_eq!(key, b"buildings\0geom\0rtree");
381
382        let meta_key = meta_storage_key("buildings", "geom");
383        assert_eq!(meta_key, b"buildings\0geom\0meta");
384    }
385
386    #[test]
387    fn checkpoint_size_reasonable() {
388        let mut tree = RTree::new();
389        for i in 0..1000 {
390            tree.insert(make_entry(i, (i as f64) * 0.01, (i as f64) * 0.01));
391        }
392        let bytes = tree.checkpoint_to_bytes(None).unwrap();
393        // Each entry: id(8) + 4 f64(32) = ~40 bytes + rkyv overhead.
394        // 1000 entries ≈ 40-60KB is reasonable.
395        assert!(
396            bytes.len() < 100_000,
397            "checkpoint too large: {} bytes",
398            bytes.len()
399        );
400        assert!(
401            bytes.len() > 10_000,
402            "checkpoint too small: {} bytes",
403            bytes.len()
404        );
405    }
406
407    #[test]
408    fn golden_header_layout() {
409        let mut tree = RTree::new();
410        tree.insert(make_entry(1, 10.0, 20.0));
411        let bytes = tree.checkpoint_to_bytes(None).unwrap();
412        // Magic at bytes[0..6].
413        assert_eq!(&bytes[0..6], b"RKSPT\0");
414        // Version byte at bytes[6].
415        assert_eq!(bytes[6], super::RTREE_FORMAT_VERSION);
416        // rkyv payload follows immediately.
417        assert!(bytes.len() > 7);
418    }
419
420    #[test]
421    fn version_mismatch_returns_error() {
422        let mut tree = RTree::new();
423        tree.insert(make_entry(1, 10.0, 20.0));
424        let mut bytes = tree.checkpoint_to_bytes(None).unwrap();
425        // Corrupt the version byte to an unsupported value.
426        bytes[6] = 0;
427        match RTree::from_checkpoint(&bytes, None) {
428            Err(RTreeCheckpointError::UnsupportedVersion { found, expected }) => {
429                assert_eq!(found, 0);
430                assert_eq!(expected, super::RTREE_FORMAT_VERSION);
431            }
432            Err(other) => panic!("unexpected error: {other}"),
433            Ok(_) => panic!("expected UnsupportedVersion error, got Ok"),
434        }
435    }
436
437    fn make_test_kek() -> nodedb_wal::crypto::WalEncryptionKey {
438        nodedb_wal::crypto::WalEncryptionKey::from_bytes(&[0x42u8; 32]).unwrap()
439    }
440
441    #[test]
442    fn spatial_rtree_checkpoint_encrypted_at_rest() {
443        let kek = make_test_kek();
444        let mut tree = RTree::new();
445        for i in 0..50 {
446            tree.insert(make_entry(i, i as f64, i as f64 * 0.5));
447        }
448
449        let enc_bytes = tree.checkpoint_to_bytes(Some(&kek)).unwrap();
450
451        // Encrypted blob must start with SEGV, not RKSPT.
452        assert_eq!(&enc_bytes[0..4], b"SEGV");
453
454        // Round-trip: decrypt and verify all entries survive.
455        let restored = RTree::from_checkpoint(&enc_bytes, Some(&kek)).unwrap();
456        assert_eq!(restored.len(), 50);
457        let all = restored.search(&BoundingBox::new(-180.0, -90.0, 180.0, 90.0));
458        assert_eq!(all.len(), 50);
459    }
460
461    #[test]
462    fn spatial_rtree_refuses_plaintext_when_kek_required() {
463        let kek = make_test_kek();
464        let mut tree = RTree::new();
465        tree.insert(make_entry(1, 10.0, 20.0));
466
467        // Write plaintext checkpoint.
468        let plain_bytes = tree.checkpoint_to_bytes(None).unwrap();
469
470        // Attempting to load with a KEK must be refused.
471        assert!(matches!(
472            RTree::from_checkpoint(&plain_bytes, Some(&kek)),
473            Err(RTreeCheckpointError::KekRequired)
474        ));
475    }
476
477    #[test]
478    fn spatial_rtree_refuses_encrypted_without_kek() {
479        let kek = make_test_kek();
480        let mut tree = RTree::new();
481        tree.insert(make_entry(1, 10.0, 20.0));
482
483        let enc_bytes = tree.checkpoint_to_bytes(Some(&kek)).unwrap();
484
485        // Loading without a key must be refused.
486        assert!(matches!(
487            RTree::from_checkpoint(&enc_bytes, None),
488            Err(RTreeCheckpointError::MissingKek)
489        ));
490    }
491
492    #[test]
493    fn spatial_rtree_tampered_ciphertext_rejected() {
494        let kek = make_test_kek();
495        let mut tree = RTree::new();
496        tree.insert(make_entry(1, 10.0, 20.0));
497
498        let mut enc_bytes = tree.checkpoint_to_bytes(Some(&kek)).unwrap();
499        // Flip a byte in the ciphertext region (after the 16-byte preamble).
500        enc_bytes[20] ^= 0xFF;
501
502        assert!(matches!(
503            RTree::from_checkpoint(&enc_bytes, Some(&kek)),
504            Err(RTreeCheckpointError::DecryptionFailed(_))
505        ));
506    }
507}