fuzzy_search/
bk.rs

1use std::collections::{HashMap, VecDeque};
2
3struct Node {
4    term: String,
5    children: HashMap<usize, Node>,
6}
7
8impl Node {
9    fn new(term: String) -> Self {
10        Self {
11            term,
12            children: HashMap::default(),
13        }
14    }
15}
16
17pub struct BkTree<E: Fn(&str, &str) -> usize> {
18    root: Option<Node>,
19    edit_distance: E,
20}
21
22pub struct TreeLookup<'q, E: Fn(&str, &str) -> usize> {
23    choices: VecDeque<&'q Node>,
24    edit_distance: &'q E,
25    query: &'q str,
26    max_edits: usize,
27}
28
29impl<'q, E: Fn(&str, &str) -> usize> Iterator for TreeLookup<'q, E> {
30    type Item = String;
31
32    fn next(&mut self) -> Option<Self::Item> {
33        while let Some(choice) = self.choices.pop_front() {
34            let edits = (self.edit_distance)(&choice.term, self.query);
35
36            // Enqueue
37            let (lower, upper) = (
38                edits.saturating_sub(self.max_edits),
39                edits.saturating_add(self.max_edits),
40            );
41            for (dist, child) in choice.children.iter() {
42                if &lower <= dist && dist <= &upper {
43                    self.choices.push_back(child);
44                }
45            }
46
47            // Return neighbor
48            if edits <= self.max_edits {
49                return Some(choice.term.to_string());
50            }
51        }
52        None
53    }
54}
55
56impl<E: Fn(&str, &str) -> usize> BkTree<E> {
57    pub fn new(edit_distance: E) -> Self {
58        Self {
59            root: None,
60            edit_distance,
61        }
62    }
63
64    pub fn insert(&mut self, choice: String) {
65        match self.root {
66            None => {
67                self.root = Some(Node::new(choice));
68            }
69            Some(ref mut root) => {
70                let mut cursor = root;
71                loop {
72                    if cursor.term == choice {
73                        break;
74                    }
75                    let dist = (self.edit_distance)(&cursor.term, &choice);
76                    if cursor.children.get(&dist).is_none() {
77                        cursor.children.insert(dist, Node::new(choice));
78                        break;
79                    }
80                    cursor = cursor.children.get_mut(&dist).unwrap();
81                }
82            }
83        }
84    }
85
86    pub fn fuzzy_search<'q>(&'q self, query: &'q str, max_edits: usize) -> TreeLookup<'q, E> {
87        TreeLookup {
88            choices: match &self.root {
89                None => VecDeque::new(),
90                Some(root) => VecDeque::from(vec![root]),
91            },
92            edit_distance: &self.edit_distance,
93            query,
94            max_edits,
95        }
96    }
97}
98
99#[cfg(test)]
100mod tests {
101    mod tree {
102        mod insert {
103            use crate::{bk::BkTree, distance::levenshtein};
104
105            #[test]
106            fn test() {
107                let mut tree = BkTree::new(levenshtein);
108                tree.insert("apple".into());
109                assert_eq!(tree.root.as_ref().unwrap().term, "apple");
110                tree.insert("apply".into());
111                assert_eq!(
112                    tree.root.as_ref().unwrap().children.get(&1).unwrap().term,
113                    "apply"
114                );
115            }
116        }
117    }
118}