geo_index/kdtree/
index.rs

1use std::marker::PhantomData;
2
3use bytemuck::cast_slice;
4
5use crate::error::{GeoIndexError, Result};
6use crate::indices::Indices;
7use crate::kdtree::constants::{KDBUSH_HEADER_SIZE, KDBUSH_MAGIC, KDBUSH_VERSION};
8use crate::r#type::IndexableNum;
9
10/// Common metadata to describe a KDTree
11///
12/// You can use the metadata to infer the total byte size of a tree given the provided criteria.
13/// See [`data_buffer_length`][Self::data_buffer_length].
14#[derive(Debug, Clone, Copy, PartialEq)]
15pub struct KDTreeMetadata<N: IndexableNum> {
16    node_size: u16,
17    num_items: u32,
18    phantom: PhantomData<N>,
19    pub(crate) indices_byte_size: usize,
20    pub(crate) pad_coords_byte_size: usize,
21    pub(crate) coords_byte_size: usize,
22}
23
24impl<N: IndexableNum> KDTreeMetadata<N> {
25    /// Construct a new [`KDTreeMetadata`] from a number of items and node size.
26    pub fn new(num_items: u32, node_size: u16) -> Self {
27        assert!((2..=65535).contains(&node_size));
28
29        let coords_byte_size = (num_items as usize) * 2 * N::BYTES_PER_ELEMENT;
30        let indices_bytes_per_element = if num_items < 65536 { 2 } else { 4 };
31        let indices_byte_size = (num_items as usize) * indices_bytes_per_element;
32        let pad_coords_byte_size = (8 - (indices_byte_size % 8)) % 8;
33
34        Self {
35            node_size,
36            num_items,
37            phantom: PhantomData,
38            indices_byte_size,
39            pad_coords_byte_size,
40            coords_byte_size,
41        }
42    }
43
44    /// Construct a new [`KDTreeMetadata`] from an existing byte slice conforming to the "kdbush
45    /// ABI", such as what [`KDTreeBuilder`] generates.
46    pub fn from_slice(data: &[u8]) -> Result<Self> {
47        if data.len() < KDBUSH_HEADER_SIZE {
48            return Err(GeoIndexError::General(format!(
49                "Expected at least {} bytes but received {}",
50                KDBUSH_HEADER_SIZE,
51                data.len()
52            )));
53        }
54
55        if data[0] != KDBUSH_MAGIC {
56            return Err(GeoIndexError::General(
57                "Data not in Kdbush format.".to_string(),
58            ));
59        }
60
61        let version_and_type = data[1];
62        let version = version_and_type >> 4;
63        if version != KDBUSH_VERSION {
64            return Err(GeoIndexError::General(
65                format!("Got v{} data when expected v{}.", version, KDBUSH_VERSION).to_string(),
66            ));
67        }
68
69        let type_ = version_and_type & 0x0f;
70        if type_ != N::TYPE_INDEX {
71            return Err(GeoIndexError::General(
72                format!(
73                    "Got type {} data when expected type {}.",
74                    type_,
75                    N::TYPE_INDEX
76                )
77                .to_string(),
78            ));
79        }
80
81        let node_size: u16 = cast_slice(&data[2..4])[0];
82        let num_items: u32 = cast_slice(&data[4..8])[0];
83
84        let slf = Self::new(num_items, node_size);
85        if slf.data_buffer_length() != data.len() {
86            return Err(GeoIndexError::General(format!(
87                "Expected {} bytes but received byte slice with {} bytes",
88                slf.data_buffer_length(),
89                data.len()
90            )));
91        }
92
93        Ok(slf)
94    }
95
96    /// The maximum number of items per node.
97    pub fn node_size(&self) -> u16 {
98        self.node_size
99    }
100
101    /// The number of items indexed in the tree.
102    pub fn num_items(&self) -> u32 {
103        self.num_items
104    }
105
106    /// The number of bytes that a KDTree with this metadata would have.
107    ///
108    /// ```
109    /// use geo_index::kdtree::KDTreeMetadata;
110    ///
111    /// let metadata = KDTreeMetadata::<f64>::new(25000, 16);
112    /// assert_eq!(metadata.data_buffer_length(), 450_008);
113    /// ```
114    pub fn data_buffer_length(&self) -> usize {
115        KDBUSH_HEADER_SIZE
116            + self.coords_byte_size
117            + self.indices_byte_size
118            + self.pad_coords_byte_size
119    }
120
121    /// Access the slice of coordinates from the data buffer this metadata represents.
122    pub fn coords_slice<'a>(&self, data: &'a [u8]) -> &'a [N] {
123        let coords_byte_start =
124            KDBUSH_HEADER_SIZE + self.indices_byte_size + self.pad_coords_byte_size;
125        let coords_byte_end = KDBUSH_HEADER_SIZE
126            + self.indices_byte_size
127            + self.pad_coords_byte_size
128            + self.coords_byte_size;
129        cast_slice(&data[coords_byte_start..coords_byte_end])
130    }
131
132    /// Access the slice of indices from the data buffer this metadata represents.
133    pub fn indices_slice<'a>(&self, data: &'a [u8]) -> Indices<'a> {
134        let indices_buf = &data[KDBUSH_HEADER_SIZE..KDBUSH_HEADER_SIZE + self.indices_byte_size];
135
136        if self.num_items < 65536 {
137            Indices::U16(cast_slice(indices_buf))
138        } else {
139            Indices::U32(cast_slice(indices_buf))
140        }
141    }
142}
143
144/// An owned KDTree buffer.
145///
146/// Usually this will be created from scratch via [`KDTreeBuilder`][crate::kdtree::KDTreeBuilder].
147#[derive(Debug, Clone, PartialEq)]
148pub struct KDTree<N: IndexableNum> {
149    pub(crate) buffer: Vec<u8>,
150    pub(crate) metadata: KDTreeMetadata<N>,
151}
152
153impl<N: IndexableNum> KDTree<N> {
154    /// Consume this KDTree, returning the underlying buffer.
155    pub fn into_inner(self) -> Vec<u8> {
156        self.buffer
157    }
158}
159
160impl<N: IndexableNum> AsRef<[u8]> for KDTree<N> {
161    fn as_ref(&self) -> &[u8] {
162        &self.buffer
163    }
164}
165
166/// A reference on an external KDTree buffer.
167#[derive(Debug, Clone, PartialEq)]
168pub struct KDTreeRef<'a, N: IndexableNum> {
169    pub(crate) coords: &'a [N],
170    pub(crate) indices: Indices<'a>,
171    pub(crate) metadata: KDTreeMetadata<N>,
172}
173
174impl<'a, N: IndexableNum> KDTreeRef<'a, N> {
175    /// Construct a new KDTreeRef from an external byte slice.
176    ///
177    /// This byte slice must conform to the "kdbush ABI", that is, the ABI originally implemented
178    /// by the JavaScript [`kdbush` library](https://github.com/mourner/kdbush). You can extract
179    /// such a buffer either via [`KDTree::into_inner`] or from the `.data` attribute of the
180    /// JavaScript `KDBush` object.
181    pub fn try_new<T: AsRef<[u8]>>(data: &'a T) -> Result<Self> {
182        let data = data.as_ref();
183        let metadata = KDTreeMetadata::from_slice(data)?;
184        let coords = metadata.coords_slice(data);
185        let indices = metadata.indices_slice(data);
186
187        Ok(Self {
188            coords,
189            indices,
190            metadata,
191        })
192    }
193
194    /// Construct a new KDTreeRef without doing any validation
195    ///
196    /// # Safety
197    ///
198    /// `metadata` must be valid for this data buffer.
199    pub unsafe fn new_unchecked<T: AsRef<[u8]>>(
200        data: &'a T,
201        metadata: KDTreeMetadata<N>,
202    ) -> Result<Self> {
203        let data = data.as_ref();
204        let coords = metadata.coords_slice(data);
205        let indices = metadata.indices_slice(data);
206
207        Ok(Self {
208            coords,
209            indices,
210            metadata,
211        })
212    }
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218
219    #[test]
220    fn rejects_short_buffers() {
221        assert!(KDTreeMetadata::<f64>::from_slice(&[]).is_err());
222        assert!(KDTreeMetadata::<f64>::from_slice(&[0; 7]).is_err());
223    }
224}