amt/indexing.rs
1//! Indexed retrieval.
2
3use std::collections::hash_map::Entry;
4use std::collections::HashMap;
5
6/// Burkhard-Keller metric tree over 32-bit keys with Hamming distance.
7///
8/// Supports radius-bounded nearest-neighbor queries via triangle-inequality
9/// pruning. For phonetic fingerprints, typical query time at radius ≤ 4
10/// over 10⁴-10⁶ entries is in the low milliseconds.
11///
12/// # Example
13///
14/// ```
15/// use amt::{encode_token, BKTree};
16///
17/// let mut tree: BKTree<String> = BKTree::new();
18/// for name in ["Khaled", "Khalid", "Ahmed", "Robert"] {
19/// let code = encode_token(name);
20/// for &sp in &code.spectrals {
21/// tree.add(sp, name.to_string());
22/// }
23/// }
24///
25/// let query = encode_token("Khaleed");
26/// let hits = tree.query(query.spectrals[0], 4);
27/// assert!(hits.iter().any(|(_, name)| name.as_str() == "Khaled"));
28/// ```
29#[derive(Debug)]
30pub struct BKTree<T> {
31 root: Option<Box<BKNode<T>>>,
32 len: usize,
33}
34
35#[derive(Debug)]
36struct BKNode<T> {
37 key: u32,
38 payload: T,
39 children: HashMap<u32, Box<BKNode<T>>>,
40}
41
42impl<T> Default for BKTree<T> {
43 fn default() -> Self {
44 Self::new()
45 }
46}
47
48impl<T> BKTree<T> {
49 /// Create an empty tree.
50 #[must_use]
51 pub fn new() -> Self {
52 Self { root: None, len: 0 }
53 }
54
55 /// Number of entries currently in the tree.
56 #[must_use]
57 pub fn len(&self) -> usize {
58 self.len
59 }
60
61 /// True if the tree has no entries.
62 #[must_use]
63 pub fn is_empty(&self) -> bool {
64 self.len == 0
65 }
66
67 /// Insert `(key, payload)`.
68 pub fn add(&mut self, key: u32, payload: T) {
69 self.len += 1;
70 match &mut self.root {
71 None => {
72 self.root = Some(Box::new(BKNode {
73 key,
74 payload,
75 children: HashMap::new(),
76 }));
77 }
78 Some(root) => {
79 let mut node: &mut BKNode<T> = root;
80 loop {
81 let d = (node.key ^ key).count_ones();
82 match node.children.entry(d) {
83 Entry::Occupied(slot) => {
84 node = slot.into_mut();
85 }
86 Entry::Vacant(slot) => {
87 slot.insert(Box::new(BKNode {
88 key,
89 payload,
90 children: HashMap::new(),
91 }));
92 return;
93 }
94 }
95 }
96 }
97 }
98 }
99
100 /// Return all entries within `radius` Hamming distance of `key`,
101 /// sorted by distance (closest first).
102 #[must_use]
103 pub fn query(&self, key: u32, radius: u32) -> Vec<(u32, &T)> {
104 let mut results: Vec<(u32, &T)> = Vec::new();
105 let mut stack: Vec<&BKNode<T>> = Vec::new();
106
107 if let Some(root) = &self.root {
108 stack.push(root);
109 }
110
111 while let Some(node) = stack.pop() {
112 let d = (node.key ^ key).count_ones();
113 if d <= radius {
114 results.push((d, &node.payload));
115 }
116 let lo = d.saturating_sub(radius);
117 let hi = d.saturating_add(radius);
118 for (&edge, child) in &node.children {
119 if edge >= lo && edge <= hi {
120 stack.push(child);
121 }
122 }
123 }
124
125 results.sort_by_key(|&(d, _)| d);
126 results
127 }
128}