cashmere/
lib.rs

1#![deny(unsafe_op_in_unsafe_fn)]
2
3use std::borrow::Borrow;
4use std::collections::hash_map::Entry::{Occupied, Vacant};
5use std::collections::{hash_map, HashMap};
6#[cfg(feature = "full_validation")]
7use std::fmt::Debug;
8use std::hash::Hash;
9use std::mem::{replace, swap};
10use std::ops::{Deref, DerefMut};
11use std::ptr::null_mut;
12
13#[cfg(feature = "full_validation")]
14use crate::node::{validate_tree, MutableBinaryNode, ViewingBinaryTreeIter};
15use crate::node::{
16    Consumable, Height, InsertDuplicates, KdBoxNodeParent, KdBuildableNode, KdInsertable, KdNode,
17    Ownable, Parent, ScapegoatKdNode, Statistic, Tree, TreeHeightBound, Value, WithHeight,
18};
19use crate::search::{AdhocKdSearcher, KdSearchable, KdSearcher};
20use crate::types::CountedIter;
21pub use crate::types::{Ratio, RatioT};
22use crate::value::{KdValue, KdValuePlus, ValueStatisticPlus};
23
24mod node;
25pub mod search;
26pub(crate) mod types;
27pub mod value;
28
29struct NewKdInsert<'a, V, Node> {
30    value: V,
31    created: &'a mut *mut Node,
32}
33
34impl<'a, V, Node> KdInsertable<Node> for NewKdInsert<'a, V, Node>
35where
36    V: KdValue,
37    Node: KdNode<Value = V>,
38{
39    fn value(&self) -> &V {
40        &self.value
41    }
42
43    fn swap_value_with(&mut self, _receiving_node: *mut Node, val: &mut V) {
44        swap(&mut self.value, val)
45    }
46
47    fn node(self, dim: usize) -> Node::Ownership {
48        let mut node = Node::new(self.value, dim);
49        *self.created = node.deref_mut();
50        node
51    }
52}
53
54pub struct KdValueMapImpl<K, V, Node, Balance: TreeHeightBound = Ratio<5, 4>>
55where
56    V: KdValue,
57    Node: KdNode<Value = KdValuePlus<V, K>>,
58{
59    tree: Node::Tree,
60    items: HashMap<K, *mut Node>,
61    balance: Balance,
62}
63
64pub type KdValueMap<K, V, Stat = (), Balance = Ratio<5, 4>> = KdValueMapImpl<
65    K,
66    V,
67    KdBoxNodeParent<KdValuePlus<V, K>, WithHeight<ValueStatisticPlus<Stat>>>,
68    Balance,
69>;
70
71impl<K, V, Node, Balance> KdValueMapImpl<K, V, Node, Balance>
72where
73    K: Clone + Eq + Hash,
74    V: KdValue,
75    Node: KdNode<Value = KdValuePlus<V, K>>,
76    Balance: TreeHeightBound,
77{
78    pub fn new() -> Self
79    where
80        Balance: Default,
81    {
82        Default::default()
83    }
84
85    pub fn with_balance(balance: Balance) -> Self {
86        Self {
87            balance,
88            ..Default::default()
89        }
90    }
91
92    pub fn len(&self) -> usize {
93        self.items.len()
94    }
95
96    pub fn is_empty(&self) -> bool {
97        self.len() == 0
98    }
99
100    pub fn clear(&mut self) {
101        self.tree.take();
102        self.items.clear();
103    }
104
105    pub fn contains_key(&mut self, key: &K) -> bool {
106        self.items.contains_key(key)
107    }
108
109    pub fn keys(&self) -> impl Iterator<Item = &K> {
110        self.items.keys()
111    }
112
113    pub fn into_keys(self) -> impl Iterator<Item = K> {
114        self.items.into_keys()
115    }
116
117    pub fn values(&self) -> impl Iterator<Item = &V> {
118        self.into_iter().map(|(_, v)| v)
119    }
120}
121
122impl<K, V, Node, Balance> KdValueMapImpl<K, V, Node, Balance>
123where
124    K: Clone + Eq + Hash,
125    V: KdValue,
126    Node: KdNode<Value = KdValuePlus<V, K>>,
127    Node::Ownership: Consumable<Value = KdValuePlus<V, K>>,
128    Balance: TreeHeightBound,
129{
130    pub fn drain(self) -> impl Iterator<Item = (K, V)> {
131        CountedIter::new(
132            self.tree.into_iter().map(|node| {
133                let KdValuePlus { val: v, plus: k } = node.consume();
134                (k, v)
135            }),
136            self.items.len(),
137        )
138    }
139
140    pub fn into_values(self) -> impl Iterator<Item = V> {
141        CountedIter::new(
142            self.tree.into_iter().map(|node| node.consume().val),
143            self.items.len(),
144        )
145    }
146}
147
148impl<K, V, Stat, Node, Balance> KdValueMapImpl<K, V, Node, Balance>
149where
150    K: Clone + Eq + Hash,
151    V: KdValue,
152    Stat: Height,
153    Node: KdNode<Value = KdValuePlus<V, K>> + Statistic<Stat = Stat>,
154    Balance: TreeHeightBound,
155{
156    fn validate_height(&self) {
157        let population: usize;
158        let actual_height: isize;
159        let max_height: isize;
160        debug_assert!(
161            {
162                population = self.len();
163                actual_height = self
164                    .tree
165                    .as_ref()
166                    .map_or(-1, |root| root.stat().height() as isize);
167                max_height = self.balance.height_budget(population) as isize;
168                actual_height <= max_height + 1
169            },
170            "tree with {population} nodes has height {actual_height} which is more than 1 over the \
171            allowed {max_height}"
172        );
173    }
174}
175
176impl<K, V, Stat, Node, Balance> KdValueMapImpl<K, V, Node, Balance>
177where
178    K: Clone + Eq + Hash,
179    V: KdValue,
180    Stat: Height,
181    Node: KdNode<Value = KdValuePlus<V, K>> + Statistic<Stat = Stat>,
182    Balance: TreeHeightBound,
183{
184    fn validate(&self) {
185        self.validate_height();
186    }
187}
188
189impl<K, V, Stat, Node, Balance> KdValueMapImpl<K, V, Node, Balance>
190where
191    K: Clone + Eq + Hash,
192    V: KdValue,
193    Stat: Height,
194    Node: ScapegoatKdNode<Value = KdValuePlus<V, K>> + Parent + Statistic<Stat = Stat>,
195    <Node as Ownable>::Ownership: KdInsertable<Node> + Consumable<Value = KdValuePlus<V, K>>,
196    Balance: TreeHeightBound,
197{
198    /// When a node's value is removed from the tree, the node itself is often not removed; instead
199    /// another node is removed and its value swapped into the target node. When this happens we
200    /// have to ensure the pointers in `items` are updated for the swapped value still in the tree.
201    ///
202    /// SAFETY: `actually_removed` is a still-owned `Node` that was just removed from the tree by
203    /// removing its value with `Node::remove_node(removal_target)`; thus, `removal_target` points
204    /// either to a still live node inside the tree, or to the same node as `actually_removed`.
205    unsafe fn track_removal_swap(
206        &mut self,
207        removal_target: *mut Node,
208        actually_removed: *const Node,
209    ) {
210        if actually_removed != removal_target {
211            // The node we triggered the removal of now has a new value. Track that key in items.
212            // SAFETY: `removal_target` is still in the tree. No references into the tree exist.
213            let node_new_key = &unsafe { &*removal_target }.value().plus;
214            // SAFETY: Because we just found this key inside the tree, its corresponding entry must
215            // exist in items.
216            let new_key_ptr = unsafe { self.items.get_mut(node_new_key).unwrap_unchecked() };
217            // The key there used to live in the removed node.
218            debug_assert!(actually_removed == *new_key_ptr);
219            // Associate that key with the node it's in now.
220            *new_key_ptr = removal_target;
221        }
222    }
223
224    pub fn insert(&mut self, key: K, value: V) -> Option<V> {
225        let current_size = self.len();
226        match self.items.entry(key) {
227            Vacant(entry) => {
228                let mut created_node: *mut Node = null_mut();
229                Node::tree_insert(
230                    &mut self.tree,
231                    self.balance,
232                    current_size + 1,
233                    NewKdInsert {
234                        value: KdValuePlus {
235                            val: value,
236                            plus: entry.key().clone(),
237                        },
238                        created: &mut created_node,
239                    },
240                    InsertDuplicates,
241                );
242                entry.insert(created_node);
243                self.validate();
244                None
245            }
246            Occupied(mut entry) => {
247                let removal_target = *entry.get();
248                // SAFETY: No references into the tree exist.
249                let mut reinsert = unsafe { Node::remove_node(removal_target) }
250                    // SAFETY: We know at least one item is in the tree because we found a
251                    // corresponding value in `items`. Furthermore, `remove_node` only returns None
252                    // when the node to be removed is the sole item in its tree, which means it
253                    // lives in `head`.
254                    .unwrap_or_else(|| unsafe { self.tree.take().unwrap_unchecked() });
255                // Fix up the pointer for this value's node in case the value was swapped
256                *entry.get_mut() = reinsert.deref_mut();
257                // Fix up the removal-target node too
258                // SAFETY: We just did the removal.
259                unsafe {
260                    self.track_removal_swap(removal_target, reinsert.deref());
261                }
262                // If the whole tree is still unbalanced after a removal, rebuild one of its deepest
263                // leaves.
264                self.maybe_rebuild_after_removal();
265                // Swap the k-d value
266                let old_value = replace(&mut reinsert.value_mut().val, value);
267                Node::tree_insert(
268                    &mut self.tree,
269                    self.balance,
270                    current_size,
271                    reinsert,
272                    InsertDuplicates,
273                );
274                self.validate();
275                Some(old_value)
276            }
277        }
278    }
279
280    pub fn try_insert(&mut self, key: K, value: V) -> Result<(), &V> {
281        let current_size = self.len();
282        match self.items.entry(key) {
283            Vacant(entry) => {
284                let mut created_node: *mut Node = null_mut();
285                Node::tree_insert(
286                    &mut self.tree,
287                    self.balance,
288                    current_size + 1,
289                    NewKdInsert {
290                        value: KdValuePlus {
291                            val: value,
292                            plus: entry.key().clone(),
293                        },
294                        created: &mut created_node,
295                    },
296                    InsertDuplicates,
297                );
298                entry.insert(created_node);
299                self.validate();
300                Ok(())
301            }
302            // SAFETY: No other references into the tree exist.
303            Occupied(entry) => Err(&<Node as Value>::value(unsafe { &**entry.get() }).val),
304        }
305    }
306
307    fn maybe_rebuild_after_removal(&mut self) {
308        Node::tree_maybe_rebuild_deepest_leaf(
309            self.balance.height_budget(self.len()),
310            &mut self.tree,
311            self.balance,
312        );
313        self.validate();
314    }
315
316    // TODO(widders): instead of borrowable to K, make the lookup key any borrowable type of K like
317    //  HashMap does it. same for contains etc.
318    pub fn remove<BK: Borrow<K>>(&mut self, key: BK) -> Option<V> {
319        self.items.remove(key.borrow()).map(|removal_target| {
320            // SAFETY: No references into the tree exist.
321            let removed_node = unsafe { Node::remove_node(removal_target) }
322                // SAFETY: We know at least one item is in the tree because we found a corresponding
323                // value in `items`. Furthermore, `remove_node` only returns None when the node to
324                // be removed is the sole item in its tree, which means it lives in `head`.
325                .unwrap_or_else(unsafe { || self.tree.take().unwrap_unchecked() });
326            // SAFETY: We just did the removal.
327            unsafe {
328                self.track_removal_swap(removal_target, removed_node.deref());
329            }
330            let removed_val = removed_node.consume().val;
331            // If the whole tree is unbalanced after a shrink, rebuild one of its deepest leaves.
332            self.maybe_rebuild_after_removal();
333            removed_val
334        })
335    }
336
337    // TODO(widders): insertable entries would be nice
338}
339
340impl<K, V, Stat, Node, Balance> KdValueMapImpl<K, V, Node, Balance>
341where
342    V: KdValue,
343    Stat: Default,
344    Node: KdSearchable<KdValuePlus<V, K>, WithHeight<Stat>>
345        + KdNode<Value = KdValuePlus<V, K>>
346        + Statistic<Stat = WithHeight<Stat>>,
347    Balance: TreeHeightBound,
348{
349    pub fn search_values_with<S>(&self, searcher: &mut S)
350    where
351        S: KdSearcher<KdValuePlus<V, K>, Stat>,
352    {
353        let mut adapter = AdhocKdSearcher {
354            searcher,
355            visitor: |s: &mut S, v: &KdValuePlus<V, K>| s.visit(v),
356            guide: |s: &S, plane: &V::Dimension, dim: usize, stat: &WithHeight<Stat>| {
357                s.guide_search(plane, dim, &stat.stat)
358            },
359            recheck: |searcher: &S| searcher.recheck(),
360        };
361        if let Some(root) = self.tree.as_ref() {
362            root.search_with(&mut adapter)
363        }
364    }
365
366    pub fn search_values<S>(&self, mut searcher: S) -> S
367    where
368        S: KdSearcher<KdValuePlus<V, K>, Stat>,
369    {
370        self.search_values_with(&mut searcher);
371        searcher
372    }
373}
374
375impl<K, V, Node, Balance> Default for KdValueMapImpl<K, V, Node, Balance>
376where
377    V: KdValue,
378    Node: KdNode<Value = KdValuePlus<V, K>>,
379    Balance: TreeHeightBound + Default,
380{
381    fn default() -> Self {
382        Balance::validate();
383        Self {
384            tree: Default::default(),
385            items: Default::default(),
386            balance: Default::default(),
387        }
388    }
389}
390
391impl<K, V, Node, Balance> FromIterator<(K, V)> for KdValueMapImpl<K, V, Node, Balance>
392where
393    K: Clone + Eq + Hash,
394    V: KdValue,
395    Node: KdBuildableNode<Value = KdValuePlus<V, K>>,
396    <Node as Ownable>::Ownership: KdInsertable<Node>,
397    Balance: TreeHeightBound,
398{
399    fn from_iter<Is>(items: Is) -> Self
400    where
401        Is: IntoIterator<Item = (K, V)>,
402    {
403        let mut res: Self = Default::default();
404        let items_building: Vec<_> = items
405            .into_iter()
406            .map(|(k, val)| KdValuePlus { plus: k, val })
407            .collect();
408        let mut nodes_building: Vec<Node::Ownership> = Vec::new();
409        // Go over the key/values in reverse so we will only add the last for each duplicated key.
410        for val in items_building.into_iter().rev() {
411            match res.items.entry(val.plus.clone()) {
412                Occupied(_) => {}
413                Vacant(entry) => {
414                    let mut node = Node::new(val, 0);
415                    entry.insert(node.deref_mut());
416                    nodes_building.push(node);
417                }
418            }
419        }
420        Node::build_from(nodes_building, 0).map(|new_root| res.tree.replace(new_root));
421        res
422    }
423}
424
425#[cfg(feature = "full_validation")]
426impl<K, V, Node, Balance> KdValueMapImpl<K, V, Node, Balance>
427where
428    K: Eq + Hash,
429    V: KdValue,
430    Node: ScapegoatKdNode<Value = KdValuePlus<V, K>> + MutableBinaryNode,
431    Node::Stat: Height + Eq + Debug,
432    Balance: TreeHeightBound + Default,
433{
434    pub fn fully_validate(&mut self) {
435        // Check all invariants inside the tree
436        validate_tree(&mut self.tree);
437        // Check that all nodes in the tree have an entry in items
438        let mut count = 0usize;
439        for node in ViewingBinaryTreeIter::new(&self.tree) {
440            assert!(self
441                .items
442                .get(&node.value().plus)
443                .is_some_and(|ptr| *ptr == node as *const Node as *mut Node));
444            count += 1;
445        }
446        // Check that there are no entries in items that don't have a matching node in the tree.
447        // If any duplicate keys existed in the tree, we would have found an entry with a
448        // mismatched pointer in the prior loop, so we can be confident that the count of nodes is
449        // equal to the count of unique keys at this point.
450        assert_eq!(count, self.items.len());
451    }
452}
453
454pub struct KdValueIterator<'a, K, V, Node>
455where
456    K: 'a,
457    V: 'a + KdValue,
458    Node: Value<Value = KdValuePlus<V, K>>,
459{
460    iter: hash_map::Iter<'a, K, *mut Node>,
461}
462
463impl<'a, K, V, Node> Iterator for KdValueIterator<'a, K, V, Node>
464where
465    V: KdValue,
466    Node: Value<Value = KdValuePlus<V, K>>,
467{
468    type Item = (&'a K, &'a V);
469
470    fn next(&mut self) -> Option<Self::Item> {
471        self.iter
472            .next()
473            // SAFETY: Each entry in the items table points to a live node in the tree, which cannot
474            // change because we hold a reference.
475            .map(|(key, node_ptr)| (key, &unsafe { &*(*node_ptr as *const Node) }.value().val))
476    }
477
478    fn size_hint(&self) -> (usize, Option<usize>) {
479        self.iter.size_hint()
480    }
481}
482
483impl<'a, K, V, Node, Balance> IntoIterator for &'a KdValueMapImpl<K, V, Node, Balance>
484where
485    K: 'a + Clone + Eq + Hash,
486    V: 'a + KdValue,
487    Node: KdNode<Value = KdValuePlus<V, K>>,
488    Balance: TreeHeightBound,
489{
490    type Item = (&'a K, &'a V);
491    type IntoIter = KdValueIterator<'a, K, V, Node>;
492
493    fn into_iter(self) -> KdValueIterator<'a, K, V, Node> {
494        KdValueIterator {
495            iter: self.items.iter(),
496        }
497    }
498}
499
500// TODO(widders): MORE FASTER: stem-and-leaf tree
501// TODO(widders): plan to eliminate Ord requirement so we can use raw floating points, which is
502//  probably good for vectorization
503// TODO(widders): nonduplicate vs duplicate trees, parentless nodes
504// TODO(widders): -> KdKeyMap
505// TODO(widders): -> KdSet
506// TODO(widders): cubes and bounding-box metrics
507// TODO(widders): move tree_*() impls into trees? maybe not