Skip to main content

amt/
indexing.rs

1//! Indexed retrieval.
2
3use std::collections::hash_map::Entry;
4use std::collections::HashMap;
5
6/// Burkhard-Keller metric tree over 32-bit keys with Hamming distance.
7///
8/// Supports radius-bounded nearest-neighbor queries via triangle-inequality
9/// pruning. For phonetic fingerprints, typical query time at radius ≤ 4
10/// over 10⁴-10⁶ entries is in the low milliseconds.
11///
12/// # Example
13///
14/// ```
15/// use amt::{encode_token, BKTree};
16///
17/// let mut tree: BKTree<String> = BKTree::new();
18/// for name in ["Khaled", "Khalid", "Ahmed", "Robert"] {
19///     let code = encode_token(name);
20///     for &sp in &code.spectrals {
21///         tree.add(sp, name.to_string());
22///     }
23/// }
24///
25/// let query = encode_token("Khaleed");
26/// let hits = tree.query(query.spectrals[0], 4);
27/// assert!(hits.iter().any(|(_, name)| name.as_str() == "Khaled"));
28/// ```
29#[derive(Debug)]
30pub struct BKTree<T> {
31    root: Option<Box<BKNode<T>>>,
32    len: usize,
33}
34
35#[derive(Debug)]
36struct BKNode<T> {
37    key: u32,
38    payload: T,
39    children: HashMap<u32, Box<BKNode<T>>>,
40}
41
42impl<T> Default for BKTree<T> {
43    fn default() -> Self {
44        Self::new()
45    }
46}
47
48impl<T> BKTree<T> {
49    /// Create an empty tree.
50    #[must_use]
51    pub fn new() -> Self {
52        Self { root: None, len: 0 }
53    }
54
55    /// Number of entries currently in the tree.
56    #[must_use]
57    pub fn len(&self) -> usize {
58        self.len
59    }
60
61    /// True if the tree has no entries.
62    #[must_use]
63    pub fn is_empty(&self) -> bool {
64        self.len == 0
65    }
66
67    /// Insert `(key, payload)`.
68    pub fn add(&mut self, key: u32, payload: T) {
69        self.len += 1;
70        match &mut self.root {
71            None => {
72                self.root = Some(Box::new(BKNode {
73                    key,
74                    payload,
75                    children: HashMap::new(),
76                }));
77            }
78            Some(root) => {
79                let mut node: &mut BKNode<T> = root;
80                loop {
81                    let d = (node.key ^ key).count_ones();
82                    match node.children.entry(d) {
83                        Entry::Occupied(slot) => {
84                            node = slot.into_mut();
85                        }
86                        Entry::Vacant(slot) => {
87                            slot.insert(Box::new(BKNode {
88                                key,
89                                payload,
90                                children: HashMap::new(),
91                            }));
92                            return;
93                        }
94                    }
95                }
96            }
97        }
98    }
99
100    /// Return all entries within `radius` Hamming distance of `key`,
101    /// sorted by distance (closest first).
102    #[must_use]
103    pub fn query(&self, key: u32, radius: u32) -> Vec<(u32, &T)> {
104        let mut results: Vec<(u32, &T)> = Vec::new();
105        let mut stack: Vec<&BKNode<T>> = Vec::new();
106
107        if let Some(root) = &self.root {
108            stack.push(root);
109        }
110
111        while let Some(node) = stack.pop() {
112            let d = (node.key ^ key).count_ones();
113            if d <= radius {
114                results.push((d, &node.payload));
115            }
116            let lo = d.saturating_sub(radius);
117            let hi = d.saturating_add(radius);
118            for (&edge, child) in &node.children {
119                if edge >= lo && edge <= hi {
120                    stack.push(child);
121                }
122            }
123        }
124
125        results.sort_by_key(|&(d, _)| d);
126        results
127    }
128}