geo_index/rtree/
index.rs

1use std::marker::PhantomData;
2
3use bytemuck::cast_slice;
4
5use crate::error::{GeoIndexError, Result};
6use crate::indices::Indices;
7use crate::r#type::IndexableNum;
8use crate::rtree::constants::VERSION;
9use crate::rtree::util::compute_num_nodes;
10
11/// Common metadata to describe an RTree
12///
13/// You can use the metadata to infer the total byte size of a tree given the provided criteria.
14/// See [`data_buffer_length`][Self::data_buffer_length].
15#[derive(Debug, Clone, PartialEq)]
16pub struct RTreeMetadata<N: IndexableNum> {
17    node_size: u16,
18    num_items: u32,
19    num_nodes: usize,
20    level_bounds: Vec<usize>,
21    pub(crate) nodes_byte_length: usize,
22    pub(crate) indices_byte_length: usize,
23    phantom: PhantomData<N>,
24}
25
26impl<N: IndexableNum> RTreeMetadata<N> {
27    /// Construct a new [`RTreeMetadata`] from a number of items and node size.
28    pub fn new(num_items: u32, node_size: u16) -> Self {
29        assert!((2..=65535).contains(&node_size));
30
31        let (num_nodes, level_bounds) = compute_num_nodes(num_items, node_size);
32
33        let indices_bytes_per_element = if num_nodes < 16384 { 2 } else { 4 };
34        let nodes_byte_length = num_nodes * 4 * N::BYTES_PER_ELEMENT;
35        let indices_byte_length = num_nodes * indices_bytes_per_element;
36
37        Self {
38            node_size,
39            num_items,
40            num_nodes,
41            level_bounds,
42            nodes_byte_length,
43            indices_byte_length,
44            phantom: PhantomData,
45        }
46    }
47
48    /// Construct a new [`RTreeMetadata`] from an existing byte slice conforming to the "flatbush
49    /// ABI", such as what [`RTreeBuilder`] generates.
50    pub fn from_slice(data: &[u8]) -> Result<Self> {
51        if data.len() < 8 {
52            return Err(GeoIndexError::General(format!(
53                "Expected at least 8 bytes but received {}",
54                data.len()
55            )));
56        }
57
58        let magic = data[0];
59        if magic != 0xfb {
60            return Err(GeoIndexError::General(
61                "Data not in Flatbush format.".to_string(),
62            ));
63        }
64
65        let version_and_type = data[1];
66        let version = version_and_type >> 4;
67        if version != VERSION {
68            return Err(GeoIndexError::General(
69                format!("Got v{} data when expected v{}.", version, VERSION).to_string(),
70            ));
71        }
72
73        let type_ = version_and_type & 0x0f;
74        if type_ != N::TYPE_INDEX {
75            return Err(GeoIndexError::General(
76                format!(
77                    "Got type {} data when expected type {}.",
78                    type_,
79                    N::TYPE_INDEX
80                )
81                .to_string(),
82            ));
83        }
84
85        let node_size: u16 = cast_slice(&data[2..4])[0];
86        let num_items: u32 = cast_slice(&data[4..8])[0];
87
88        let slf = Self::new(num_items, node_size);
89        if slf.data_buffer_length() != data.len() {
90            return Err(GeoIndexError::General(format!(
91                "Expected {} bytes but received byte slice with {} bytes",
92                slf.data_buffer_length(),
93                data.len()
94            )));
95        }
96
97        Ok(slf)
98    }
99
100    /// The maximum number of items per node.
101    pub fn node_size(&self) -> u16 {
102        self.node_size
103    }
104
105    /// The number of items indexed in the tree.
106    pub fn num_items(&self) -> u32 {
107        self.num_items
108    }
109
110    /// The total number of nodes at all levels in the tree.
111    pub fn num_nodes(&self) -> usize {
112        self.num_nodes
113    }
114
115    /// The offsets into [`RTreeIndex::boxes`][crate::rtree::RTreeIndex::boxes] where each level's
116    /// boxes starts and ends. The tree is laid out bottom-up, and there's an implicit initial 0.
117    /// So the boxes of the lowest level of the tree are located from
118    /// `boxes[0..self.level_bounds()[0]]`.
119    pub fn level_bounds(&self) -> &[usize] {
120        &self.level_bounds
121    }
122
123    /// The number of bytes that an RTree with this metadata would have.
124    ///
125    /// ```
126    /// use geo_index::rtree::RTreeMetadata;
127    ///
128    /// let metadata = RTreeMetadata::<f64>::new(25000, 16);
129    /// assert_eq!(metadata.data_buffer_length(), 960_092);
130    /// ```
131    pub fn data_buffer_length(&self) -> usize {
132        8 + self.nodes_byte_length + self.indices_byte_length
133    }
134
135    /// Access the slice of boxes from the data buffer this metadata represents.
136    pub fn boxes_slice<'a>(&self, data: &'a [u8]) -> &'a [N] {
137        cast_slice(&data[8..8 + self.nodes_byte_length])
138    }
139
140    /// Access the slice of indices from the data buffer this metadata represents.
141    pub fn indices_slice<'a>(&self, data: &'a [u8]) -> Indices<'a> {
142        let indices_buf = &data
143            [8 + self.nodes_byte_length..8 + self.nodes_byte_length + self.indices_byte_length];
144        Indices::new(indices_buf, self.num_nodes)
145    }
146}
147
148/// An owned RTree buffer.
149///
150/// Usually this will be created from scratch via [`RTreeBuilder`][crate::rtree::RTreeBuilder].
151#[derive(Debug, Clone, PartialEq)]
152pub struct RTree<N: IndexableNum> {
153    pub(crate) buffer: Vec<u8>,
154    pub(crate) metadata: RTreeMetadata<N>,
155}
156
157impl<N: IndexableNum> RTree<N> {
158    /// Access the underlying buffer of this RTree.
159    ///
160    /// This buffer can then be persisted and passed to `RTreeRef::try_new`.
161    pub fn into_inner(self) -> Vec<u8> {
162        self.buffer
163    }
164}
165
166impl<N: IndexableNum> AsRef<[u8]> for RTree<N> {
167    fn as_ref(&self) -> &[u8] {
168        &self.buffer
169    }
170}
171
172/// A reference on an external RTree buffer.
173///
174/// Usually this will be created from an [`RTree`] via its [`as_ref`][RTree::as_ref]
175/// method, but it can also be created from any existing data buffer.
176#[derive(Debug, Clone, PartialEq)]
177pub struct RTreeRef<'a, N: IndexableNum> {
178    pub(crate) boxes: &'a [N],
179    pub(crate) indices: Indices<'a>,
180    pub(crate) metadata: RTreeMetadata<N>,
181}
182
183impl<'a, N: IndexableNum> RTreeRef<'a, N> {
184    /// Construct a new RTree from an external byte slice.
185    ///
186    /// This byte slice must conform to the "flatbush ABI", that is, the ABI originally implemented
187    /// by the JavaScript [`flatbush` library](https://github.com/mourner/flatbush). You can
188    /// extract such a buffer either via [`RTree::into_inner`] or from the `.data` attribute
189    /// of the JavaScript `Flatbush` object.
190    pub fn try_new<T: AsRef<[u8]>>(data: &'a T) -> Result<Self> {
191        let data = data.as_ref();
192        let metadata = RTreeMetadata::from_slice(data)?;
193        let boxes = metadata.boxes_slice(data);
194        let indices = metadata.indices_slice(data);
195
196        Ok(Self {
197            boxes,
198            indices,
199            metadata,
200        })
201    }
202
203    /// Construct a new RTreeRef without doing any validation
204    ///
205    /// # Safety
206    ///
207    /// `metadata` must be valid for this data buffer.
208    pub unsafe fn new_unchecked<T: AsRef<[u8]>>(
209        data: &'a T,
210        metadata: RTreeMetadata<N>,
211    ) -> Result<Self> {
212        let data = data.as_ref();
213        let boxes = metadata.boxes_slice(data);
214        let indices = metadata.indices_slice(data);
215
216        Ok(Self {
217            boxes,
218            indices,
219            metadata,
220        })
221    }
222}
223
224#[cfg(test)]
225mod tests {
226    use crate::rtree::sort::{HilbertSort, STRSort, Sort};
227    use crate::rtree::{RTreeBuilder, RTreeIndex};
228
229    use super::*;
230
231    #[test]
232    fn rejects_short_buffers() {
233        assert!(RTreeMetadata::<f64>::from_slice(&[]).is_err());
234        assert!(RTreeMetadata::<f64>::from_slice(&[0; 7]).is_err());
235    }
236
237    fn linspace(start: usize, stop: usize, num: usize, endpoint: bool) -> Vec<f64> {
238        let div = if endpoint { num - 1 } else { num };
239        let step = (stop - start) as f64 / div as f64;
240        (0..num).map(|i| start as f64 + step * i as f64).collect()
241    }
242
243    #[test]
244    // https://github.com/mourner/flatbush/pull/65
245    fn quicksort_should_work_with_an_inbalanced_dataset() {
246        _quicksort_should_work_with_an_inbalanced_dataset::<HilbertSort>();
247        _quicksort_should_work_with_an_inbalanced_dataset::<STRSort>();
248    }
249
250    fn _quicksort_should_work_with_an_inbalanced_dataset<S: Sort<f64>>() {
251        let n = 15000;
252        let mut builder = RTreeBuilder::new(2 * n);
253
254        let items = linspace(0, 1000, n as usize, true);
255        let items2 = linspace(0, 1000, n as usize, true);
256
257        for item in items {
258            builder.add(item, 0., item, 0.);
259        }
260
261        for item in items2 {
262            builder.add(item, 0., item, 0.);
263        }
264
265        let index = builder.finish::<S>();
266
267        index.search(-100., -1., 15000., 1.);
268    }
269}