Skip to main content

sedona_spatial_join/partitioning/
rtree.rs

1// Licensed to the Apache Software Foundation (ASF) under one
2// or more contributor license agreements.  See the NOTICE file
3// distributed with this work for additional information
4// regarding copyright ownership.  The ASF licenses this file
5// to you under the Apache License, Version 2.0 (the
6// "License"); you may not use this file except in compliance
7// with the License.  You may obtain a copy of the License at
8//
9//   http://www.apache.org/licenses/LICENSE-2.0
10//
11// Unless required by applicable law or agreed to in writing,
12// software distributed under the License is distributed on an
13// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
14// KIND, either express or implied.  See the License for the
15// specific language governing permissions and limitations
16// under the License.
17
18//! RTree-based spatial partitioning implementation.
19//!
20//! This module provides an RTree-based implementation for spatial partitioning,
21//! designed for out-of-core spatial joins. Unlike KDB-tree partitioning which
22//! builds the partition structure from sample data, RTree partitioning takes
23//! a pre-defined set of partition boundaries (rectangles) and uses an RTree
24//! index to efficiently determine which partition a given bounding box belongs to.
25//!
26//! # Partitioning Strategy
27//!
28//! The RTree partitioner follows these rules:
29//!
30//! 1. **Single Partition Assignment**: Each bounding box is assigned to exactly one partition.
31//! 2. **Intersection-Based Assignment**: The partitioner finds which partition boundaries
32//!    intersect with the input bounding box and produces a [`SpatialPartition::Regular`] result.
33//! 3. **Multi-partition Handling**: When a bbox intersects multiple partitions, it's assigned
34//!    to [`SpatialPartition::Multi`].
35//! 4. **None-partition Handling**: If a bbox doesn't intersect any partition boundary, it's assigned
36//!    to [`SpatialPartition::None`].
37
38use std::sync::Arc;
39
40use datafusion_common::Result;
41use geo::Rect;
42use geo_index::rtree::{sort::HilbertSort, RTree, RTreeBuilder, RTreeIndex};
43use sedona_geometry::bounding_box::BoundingBox;
44
45use crate::partitioning::util::{
46    bbox_to_f32_rect, bbox_to_geo_rect, make_rect, rect_intersection_area,
47};
48use crate::partitioning::{SpatialPartition, SpatialPartitioner};
49
50/// RTree-based spatial partitioner that uses pre-defined partition boundaries.
51///
52/// This partitioner constructs an RTree index over a set of partition boundaries
53/// (rectangles) and uses it to efficiently determine which partition a given
54/// bounding box belongs to based on spatial intersection.
55#[derive(Clone)]
56pub struct RTreePartitioner {
57    inner: Arc<RawRTreePartitioner>,
58}
59
60impl RTreePartitioner {
61    /// Create a new RTree partitioner from a collection of partition boundaries.
62    ///
63    /// # Arguments
64    /// * `boundaries` - A vector of bounding boxes representing partition boundaries.
65    ///   Each bounding box defines the spatial extent of one partition. The partition ID
66    ///   is the index in this vector.
67    ///
68    /// # Errors
69    /// Returns an error if any boundary has wraparound coordinates (not supported)
70    ///
71    /// # Example
72    /// ```rust
73    /// use sedona_geometry::bounding_box::BoundingBox;
74    /// use sedona_spatial_join::partitioning::rtree::RTreePartitioner;
75    ///
76    /// let boundaries = vec![
77    ///     BoundingBox::xy((0.0, 50.0), (0.0, 50.0)),
78    ///     BoundingBox::xy((50.0, 100.0), (0.0, 50.0)),
79    /// ];
80    /// let partitioner = RTreePartitioner::try_new(boundaries).unwrap();
81    /// ```
82    pub fn try_new(boundaries: Vec<BoundingBox>) -> Result<Self> {
83        let inner = RawRTreePartitioner::try_new(boundaries)?;
84        Ok(Self {
85            inner: Arc::new(inner),
86        })
87    }
88
89    /// Create a new RTree partitioner with a custom node size.
90    pub fn try_new_with_node_size(boundaries: Vec<BoundingBox>, node_size: u16) -> Result<Self> {
91        let inner = RawRTreePartitioner::build(boundaries, Some(node_size))?;
92        Ok(Self {
93            inner: Arc::new(inner),
94        })
95    }
96
97    /// Return the number of levels in the underlying RTree.
98    pub fn depth(&self) -> usize {
99        self.inner.depth()
100    }
101}
102
103impl SpatialPartitioner for RTreePartitioner {
104    fn num_regular_partitions(&self) -> usize {
105        self.inner.num_regular_partitions()
106    }
107
108    fn partition(&self, bbox: &BoundingBox) -> Result<SpatialPartition> {
109        self.inner.partition(bbox)
110    }
111
112    fn partition_no_multi(&self, bbox: &BoundingBox) -> Result<SpatialPartition> {
113        self.inner.partition_no_multi(bbox)
114    }
115
116    fn box_clone(&self) -> Box<dyn SpatialPartitioner> {
117        Box::new(self.clone())
118    }
119}
120
121struct RawRTreePartitioner {
122    /// The RTree index storing partition boundaries as f32 rectangles
123    rtree: RTree<f32>,
124    /// Flat representation of partition boundaries for overlap calculations
125    boundaries: Vec<Rect<f32>>,
126    /// Number of partitions (excluding None and Multi)
127    num_partitions: usize,
128    /// Map from RTree index to original partition index
129    partition_map: Vec<usize>,
130}
131
132impl RawRTreePartitioner {
133    fn try_new(boundaries: Vec<BoundingBox>) -> Result<Self> {
134        Self::build(boundaries, None)
135    }
136
137    fn build(boundaries: Vec<BoundingBox>, node_size: Option<u16>) -> Result<Self> {
138        let num_partitions = boundaries.len();
139
140        // Filter valid boundaries and keep track of original indices
141        let mut valid_boundaries = Vec::with_capacity(num_partitions);
142        let mut partition_map = Vec::with_capacity(num_partitions);
143
144        for (i, bbox) in boundaries.iter().enumerate() {
145            if let Some(rect) = bbox_to_f32_rect(bbox)? {
146                valid_boundaries.push(rect);
147                partition_map.push(i);
148            }
149        }
150
151        let num_valid = valid_boundaries.len();
152
153        // Build RTree index with partition boundaries
154        let mut rtree_builder = match node_size {
155            Some(size) => RTreeBuilder::<f32>::new_with_node_size(num_valid as u32, size),
156            None => RTreeBuilder::<f32>::new(num_valid as u32),
157        };
158
159        let mut rects = Vec::with_capacity(num_valid);
160        for (min_x, min_y, max_x, max_y) in valid_boundaries {
161            rtree_builder.add(min_x, min_y, max_x, max_y);
162            rects.push(make_rect(min_x, min_y, max_x, max_y));
163        }
164
165        let rtree = rtree_builder.finish::<HilbertSort>();
166
167        Ok(RawRTreePartitioner {
168            rtree,
169            boundaries: rects,
170            num_partitions,
171            partition_map,
172        })
173    }
174
175    fn num_regular_partitions(&self) -> usize {
176        self.num_partitions
177    }
178
179    fn depth(&self) -> usize {
180        self.rtree.num_levels()
181    }
182
183    fn partition(&self, bbox: &BoundingBox) -> Result<SpatialPartition> {
184        // Convert bbox to f32 for RTree query with proper bounds handling
185        let (min_x, min_y, max_x, max_y) = match bbox_to_f32_rect(bbox)? {
186            Some(rect) => rect,
187            None => return Ok(SpatialPartition::None),
188        };
189
190        // Query RTree for intersecting partitions
191        let intersecting_partitions = self.rtree.search(min_x, min_y, max_x, max_y);
192
193        // Handle different cases based on number of intersecting partitions
194        match intersecting_partitions.len() {
195            0 => {
196                // No intersection with any partition -> None
197                Ok(SpatialPartition::None)
198            }
199            1 => {
200                // Single intersection -> Regular partition
201                let rtree_index = intersecting_partitions[0];
202                Ok(SpatialPartition::Regular(
203                    self.partition_map[rtree_index as usize] as u32,
204                ))
205            }
206            _ => {
207                // Multiple intersections -> Always return Multi
208                Ok(SpatialPartition::Multi)
209            }
210        }
211    }
212
213    fn partition_no_multi(&self, bbox: &BoundingBox) -> Result<SpatialPartition> {
214        let rect = match bbox_to_geo_rect(bbox)? {
215            Some(rect) => rect,
216            None => return Ok(SpatialPartition::None),
217        };
218        let min = rect.min();
219        let max = rect.max();
220        let intersecting_partitions = self.rtree.search(min.x, min.y, max.x, max.y);
221
222        if intersecting_partitions.is_empty() {
223            return Ok(SpatialPartition::None);
224        }
225
226        let mut best_partition = None;
227        let mut best_area = -1.0_f32;
228        for &partition_id in &intersecting_partitions {
229            let boundary = &self.boundaries[partition_id as usize];
230            let area = rect_intersection_area(boundary, &rect);
231            if area > best_area {
232                best_area = area;
233                best_partition = Some(partition_id);
234            }
235        }
236
237        Ok(match best_partition {
238            Some(id) => SpatialPartition::Regular(self.partition_map[id as usize] as u32),
239            None => SpatialPartition::None,
240        })
241    }
242}
243
244#[cfg(test)]
245mod tests {
246    use sedona_geometry::interval::{Interval, IntervalTrait};
247
248    use super::*;
249
250    #[test]
251    fn test_rtree_partitioner_creation() {
252        let boundaries = vec![
253            BoundingBox::xy((0.0, 50.0), (0.0, 50.0)),
254            BoundingBox::xy((50.0, 100.0), (0.0, 50.0)),
255        ];
256        let partitioner = RTreePartitioner::try_new(boundaries).unwrap();
257        assert_eq!(partitioner.num_regular_partitions(), 2);
258    }
259
260    #[test]
261    fn test_rtree_partitioner_empty_boundaries() {
262        let boundaries: Vec<BoundingBox> = vec![];
263        let partitioner = RTreePartitioner::try_new(boundaries).unwrap();
264        assert_eq!(partitioner.num_regular_partitions(), 0);
265    }
266
267    #[test]
268    fn test_partition_with_empty_boundaries_returns_none() {
269        let boundaries: Vec<BoundingBox> = vec![];
270        let partitioner = RTreePartitioner::try_new(boundaries).unwrap();
271
272        let bbox = BoundingBox::xy((0.0, 1.0), (0.0, 1.0));
273        let result = partitioner.partition(&bbox).unwrap();
274        assert_eq!(result, SpatialPartition::None);
275    }
276
277    #[test]
278    fn test_rtree_partitioner_wraparound_boundary() {
279        // Create a boundary with wraparound (crossing anti-meridian)
280        let boundaries = vec![
281            BoundingBox::xy((170.0, -170.0), (0.0, 50.0)), // This represents wraparound
282        ];
283        let result = RTreePartitioner::try_new(boundaries);
284        assert!(result.is_err());
285    }
286
287    #[test]
288    fn test_partition_single_intersection() {
289        let boundaries = vec![
290            BoundingBox::xy((0.0, 50.0), (0.0, 50.0)),   // Partition 0
291            BoundingBox::xy((50.0, 100.0), (0.0, 50.0)), // Partition 1
292            BoundingBox::xy((0.0, 50.0), (50.0, 100.0)), // Partition 2
293            BoundingBox::xy((50.0, 100.0), (50.0, 100.0)), // Partition 3
294        ];
295        let partitioner = RTreePartitioner::try_new(boundaries).unwrap();
296
297        // Test bbox that falls entirely in partition 0
298        let bbox = BoundingBox::xy((10.0, 20.0), (10.0, 20.0));
299        let result = partitioner.partition(&bbox).unwrap();
300        assert_eq!(result, SpatialPartition::Regular(0));
301
302        // Test bbox that falls entirely in partition 3
303        let bbox = BoundingBox::xy((60.0, 80.0), (60.0, 80.0));
304        let result = partitioner.partition(&bbox).unwrap();
305        assert_eq!(result, SpatialPartition::Regular(3));
306    }
307
308    #[test]
309    fn test_partition_multiple_intersections() {
310        let boundaries = vec![
311            BoundingBox::xy((0.0, 50.0), (0.0, 50.0)),   // Partition 0
312            BoundingBox::xy((50.0, 100.0), (0.0, 50.0)), // Partition 1
313        ];
314        let partitioner = RTreePartitioner::try_new(boundaries).unwrap();
315
316        // Test bbox that spans both partitions - should return Multi
317        let bbox = BoundingBox::xy((30.0, 60.0), (10.0, 20.0));
318        let result = partitioner.partition(&bbox).unwrap();
319        assert_eq!(result, SpatialPartition::Multi);
320
321        assert!(matches!(
322            partitioner.partition_no_multi(&bbox).unwrap(),
323            SpatialPartition::Regular(_)
324        ));
325    }
326
327    #[test]
328    fn test_partition_none() {
329        let boundaries = vec![
330            BoundingBox::xy((0.0, 50.0), (0.0, 50.0)),   // Partition 0
331            BoundingBox::xy((50.0, 100.0), (0.0, 50.0)), // Partition 1
332        ];
333        let partitioner = RTreePartitioner::try_new(boundaries).unwrap();
334
335        // Test bbox that doesn't intersect any partition
336        let bbox = BoundingBox::xy((200.0, 300.0), (200.0, 300.0));
337        let result = partitioner.partition(&bbox).unwrap();
338        assert_eq!(result, SpatialPartition::None);
339    }
340
341    #[test]
342    fn test_partition_wraparound_input() {
343        let boundaries = vec![BoundingBox::xy((0.0, 50.0), (0.0, 50.0))];
344        let partitioner = RTreePartitioner::try_new(boundaries).unwrap();
345
346        // Test with wraparound input bbox (should be rejected)
347        let bbox = BoundingBox::xy((170.0, -170.0), (10.0, 20.0));
348        let result = partitioner.partition(&bbox);
349        assert!(result.is_err());
350    }
351
352    #[test]
353    fn test_num_partitions() {
354        let boundaries = vec![
355            BoundingBox::xy((0.0, 50.0), (0.0, 50.0)),
356            BoundingBox::xy((50.0, 100.0), (0.0, 50.0)),
357            BoundingBox::xy((0.0, 50.0), (50.0, 100.0)),
358        ];
359        let partitioner = RTreePartitioner::try_new(boundaries).unwrap();
360        assert_eq!(partitioner.num_regular_partitions(), 3);
361    }
362
363    #[test]
364    fn test_partition_boundary_edge() {
365        let boundaries = vec![
366            BoundingBox::xy((0.0, 50.0), (0.0, 50.0)),
367            BoundingBox::xy((50.0, 100.0), (0.0, 50.0)),
368        ];
369        let partitioner = RTreePartitioner::try_new(boundaries).unwrap();
370
371        // Test bbox exactly on the boundary between two partitions
372        let bbox = BoundingBox::xy((45.0, 55.0), (10.0, 20.0));
373        let result = partitioner.partition(&bbox).unwrap();
374        // Should be Multi since it spans multiple partitions
375        assert_eq!(result, SpatialPartition::Multi);
376    }
377
378    #[test]
379    fn test_rtree_partitioner_empty_bbox_build() {
380        let boundaries = vec![
381            BoundingBox::xy((0.0, 50.0), (0.0, 50.0)),
382            BoundingBox::xy(Interval::empty(), Interval::empty()),
383            BoundingBox::xy((50.0, 100.0), (0.0, 50.0)),
384        ];
385        let partitioner = RTreePartitioner::try_new(boundaries).unwrap();
386        assert_eq!(partitioner.num_regular_partitions(), 3);
387
388        // Verify that the third partition (index 2) is correctly mapped
389        let query = BoundingBox::xy((60.0, 70.0), (10.0, 20.0));
390        assert_eq!(
391            partitioner.partition(&query).unwrap(),
392            SpatialPartition::Regular(2)
393        );
394    }
395
396    #[test]
397    fn test_rtree_partitioner_empty_bbox_query() {
398        let boundaries = vec![BoundingBox::xy((0.0, 50.0), (0.0, 50.0))];
399        let partitioner = RTreePartitioner::try_new(boundaries).unwrap();
400
401        let bbox = BoundingBox::xy(Interval::empty(), Interval::empty());
402        assert_eq!(
403            partitioner.partition(&bbox).unwrap(),
404            SpatialPartition::None
405        );
406        assert_eq!(
407            partitioner.partition_no_multi(&bbox).unwrap(),
408            SpatialPartition::None
409        );
410    }
411}