graph_api_simplegraph/index/
full_text.rs1use fastbloom::BloomFilter;
2use rphonetic::{Encoder, Metaphone};
3use std::collections::HashMap;
4use std::hash::Hash;
5
6#[derive(Default, Debug)]
16pub struct FullTextIndex<V>
17where
18 V: Eq + Hash + Copy,
19{
20 filters: HashMap<V, BloomFilter>,
21 metaphone: Metaphone,
22}
23
24impl<V> FullTextIndex<V>
25where
26 V: Eq + Hash + Copy,
27{
28 pub(crate) fn insert(&mut self, key: V, text: &str) {
37 let word_count = text.split_whitespace().count();
38 let mut filter = BloomFilter::with_false_pos(0.001).expected_items(word_count);
39
40 for word in text.split_whitespace() {
41 let encoded = self.metaphone.encode(word);
42 filter.insert(&encoded);
43 }
44
45 self.filters.insert(key, filter);
46 }
47
48 pub(crate) fn search<'a>(&'a self, search: &str) -> impl Iterator<Item = V> + 'a {
60 let search = search
61 .split_whitespace()
62 .map(|w| self.metaphone.encode(w))
63 .collect::<Vec<_>>();
64 self.filters.iter().filter_map(move |(v, filter)| {
65 for word in &search {
66 if !filter.contains(&word) {
67 return None;
68 }
69 }
70 Some(*v)
71 })
72 }
73
74 pub(crate) fn remove(&mut self, key: &V) {
79 self.filters.remove(key);
80 }
81}
82#[cfg(test)]
83mod tests {
84 use super::*;
85
86 #[test]
87 fn test_basic_insert_and_search() {
88 let mut index = FullTextIndex::default();
89 index.insert(1, "hello world");
90
91 let results: Vec<_> = index.search("hello").collect();
92 assert_eq!(results, vec![1]);
93
94 let results: Vec<_> = index.search("world").collect();
95 assert_eq!(results, vec![1]);
96 }
97
98 #[test]
99 fn test_phonetic_matching() {
100 let mut index = FullTextIndex::default();
101 index.insert(1, "phone");
102
103 let results: Vec<_> = index.search("fone").collect();
104 assert_eq!(results, vec![1]);
105 }
106
107 #[test]
108 fn test_multiple_entries() {
109 let mut index = FullTextIndex::default();
110 index.insert(1, "hello world");
111 index.insert(2, "hello there");
112 index.insert(3, "goodbye world");
113
114 let results: Vec<_> = index.search("hello").collect();
115 assert_eq!(results.len(), 2);
116 assert!(results.contains(&1));
117 assert!(results.contains(&2));
118 }
119
120 #[test]
121 fn test_remove() {
122 let mut index = FullTextIndex::default();
123 index.insert(1, "hello world");
124 index.remove(&1);
125
126 let results: Vec<_> = index.search("hello").collect();
127 assert!(results.is_empty());
128 }
129
130 #[test]
131 fn test_multi_word_search() {
132 let mut index = FullTextIndex::default();
133 index.insert(1, "hello beautiful world");
134 index.insert(2, "hello ugly world");
135
136 let results: Vec<_> = index.search("hello world").collect();
137 assert_eq!(results.len(), 2);
138
139 let results: Vec<_> = index.search("beautiful world").collect();
140 assert_eq!(results, vec![1]);
141 }
142
143 #[test]
144 fn test_empty_cases() {
145 let mut index = FullTextIndex::<i32>::default();
146 let results: Vec<_> = index.search("anything").collect();
147 assert!(results.is_empty());
148
149 index.insert(1, "");
150 let results: Vec<_> = index.search("").collect();
151 assert_eq!(results, vec![1]);
152 }
153}