Skip to main content

nodedb_cluster/distributed_spatial/
shard_routing.rs

1//! Spatial-aware shard routing.
2//!
3//! Maintains per-shard bounding box metadata — the spatial extent of all
4//! geometries on each shard. On spatial query, only fan out to shards
5//! whose bounding box overlaps the query region.
6//!
7//! Updated at flush time: each shard reports its spatial extent to the
8//! routing table. Bounding box union is cheap (min of mins, max of maxes).
9
10use nodedb_types::BoundingBox;
11use serde::{Deserialize, Serialize};
12use std::collections::HashMap;
13
14/// Per-shard spatial extent for a collection.
15#[derive(Debug, Clone, Serialize, Deserialize)]
16pub struct ShardSpatialExtent {
17    pub shard_id: u16,
18    /// Bounding box covering all geometries in this shard for this collection.
19    /// None if the shard has no spatial data for this collection.
20    pub extent: Option<BoundingBox>,
21    /// Number of spatial entries on this shard.
22    pub entry_count: usize,
23}
24
25/// Per-collection spatial routing metadata.
26///
27/// Tracks which shards have spatial data and their extent. Used by the
28/// coordinator to avoid fanning out to shards that can't possibly match.
29pub struct SpatialRoutingTable {
30    /// collection_name → shard_id → extent.
31    extents: HashMap<String, HashMap<u16, ShardSpatialExtent>>,
32}
33
34impl SpatialRoutingTable {
35    pub fn new() -> Self {
36        Self {
37            extents: HashMap::new(),
38        }
39    }
40
41    /// Update the spatial extent for a shard.
42    pub fn update_extent(
43        &mut self,
44        collection: &str,
45        shard_id: u16,
46        extent: Option<BoundingBox>,
47        entry_count: usize,
48    ) {
49        let entry = ShardSpatialExtent {
50            shard_id,
51            extent,
52            entry_count,
53        };
54        self.extents
55            .entry(collection.to_string())
56            .or_default()
57            .insert(shard_id, entry);
58    }
59
60    /// Find which shards might contain results for a spatial query.
61    ///
62    /// Returns shard IDs whose extent overlaps the query bounding box.
63    /// If no extent data is available for a shard, it is conservatively
64    /// included (we don't know what's there, so we must check).
65    pub fn route_query(
66        &self,
67        collection: &str,
68        query_bbox: &BoundingBox,
69        all_shard_ids: &[u16],
70    ) -> Vec<u16> {
71        let Some(shard_extents) = self.extents.get(collection) else {
72            // No extent data at all — fan out to all shards (conservative).
73            return all_shard_ids.to_vec();
74        };
75
76        let mut target_shards = Vec::new();
77        for &shard_id in all_shard_ids {
78            match shard_extents.get(&shard_id) {
79                Some(ext) => {
80                    match &ext.extent {
81                        Some(bbox) if !bbox.intersects(query_bbox) => {
82                            // Skip — shard extent doesn't overlap query.
83                        }
84                        _ => target_shards.push(shard_id),
85                    }
86                }
87                None => {
88                    // No extent data for this shard — include conservatively.
89                    target_shards.push(shard_id);
90                }
91            }
92        }
93        target_shards
94    }
95
96    /// Get the total number of spatial entries across all shards for a collection.
97    pub fn total_entries(&self, collection: &str) -> usize {
98        self.extents
99            .get(collection)
100            .map(|shards| shards.values().map(|e| e.entry_count).sum())
101            .unwrap_or(0)
102    }
103}
104
105impl Default for SpatialRoutingTable {
106    fn default() -> Self {
107        Self::new()
108    }
109}
110
111#[cfg(test)]
112mod tests {
113    use super::*;
114
115    #[test]
116    fn route_filters_non_overlapping() {
117        let mut table = SpatialRoutingTable::new();
118        // Shard 0: buildings in NYC area.
119        table.update_extent(
120            "buildings",
121            0,
122            Some(BoundingBox::new(-74.1, 40.6, -73.9, 40.8)),
123            1000,
124        );
125        // Shard 1: buildings in London area.
126        table.update_extent(
127            "buildings",
128            1,
129            Some(BoundingBox::new(-0.2, 51.4, 0.1, 51.6)),
130            500,
131        );
132        // Shard 2: buildings in Tokyo.
133        table.update_extent(
134            "buildings",
135            2,
136            Some(BoundingBox::new(139.6, 35.6, 139.8, 35.8)),
137            300,
138        );
139
140        // Query near NYC — should only route to shard 0.
141        let query = BoundingBox::new(-74.05, 40.7, -73.95, 40.8);
142        let targets = table.route_query("buildings", &query, &[0, 1, 2]);
143        assert_eq!(targets, vec![0]);
144    }
145
146    #[test]
147    fn route_includes_unknown_shards() {
148        let mut table = SpatialRoutingTable::new();
149        table.update_extent(
150            "buildings",
151            0,
152            Some(BoundingBox::new(0.0, 0.0, 10.0, 10.0)),
153            100,
154        );
155        // Shard 1 has no extent data.
156
157        let query = BoundingBox::new(5.0, 5.0, 15.0, 15.0);
158        let targets = table.route_query("buildings", &query, &[0, 1]);
159        // Both shards included: 0 overlaps, 1 is unknown.
160        assert_eq!(targets.len(), 2);
161    }
162
163    #[test]
164    fn route_no_extent_data_fans_out_all() {
165        let table = SpatialRoutingTable::new();
166        let targets =
167            table.route_query("unknown", &BoundingBox::new(0.0, 0.0, 1.0, 1.0), &[0, 1, 2]);
168        assert_eq!(targets, vec![0, 1, 2]);
169    }
170
171    #[test]
172    fn total_entries() {
173        let mut table = SpatialRoutingTable::new();
174        table.update_extent("col", 0, Some(BoundingBox::new(0.0, 0.0, 1.0, 1.0)), 100);
175        table.update_extent("col", 1, Some(BoundingBox::new(2.0, 2.0, 3.0, 3.0)), 200);
176        assert_eq!(table.total_entries("col"), 300);
177    }
178}