1use crate::analyzer::synonym::SynonymMap;
19use crate::backend::FtsBackend;
20use crate::index::writer::FtsIndex;
21
22const SYNONYM_GROUPS_COLLECTION: &str = "_synonym_groups";
24
25const INDEX_SUBKEY: &str = "_index";
27
28#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
30pub struct SynonymGroupRecord {
31 pub name: String,
32 pub terms: Vec<String>,
33 pub created_at: u64,
34}
35
36impl<B: FtsBackend> FtsIndex<B> {
37 pub fn put_synonym_group(&self, tid: u64, record: &SynonymGroupRecord) -> Result<(), B::Error> {
39 let bytes = sonic_rs::to_vec(record).unwrap_or_default();
41 self.backend
42 .write_meta(tid, SYNONYM_GROUPS_COLLECTION, &record.name, &bytes)?;
43
44 let mut names = self.read_name_index(tid)?;
46 if !names.contains(&record.name) {
47 names.push(record.name.clone());
48 self.write_name_index(tid, &names)?;
49 }
50 Ok(())
51 }
52
53 pub fn delete_synonym_group(&self, tid: u64, name: &str) -> Result<bool, B::Error> {
55 let existed = self
58 .backend
59 .read_meta(tid, SYNONYM_GROUPS_COLLECTION, name)?
60 .is_some_and(|b| !b.is_empty());
61
62 if existed {
63 self.backend
65 .write_meta(tid, SYNONYM_GROUPS_COLLECTION, name, &[])?;
66 let mut names = self.read_name_index(tid)?;
68 names.retain(|n| n != name);
69 self.write_name_index(tid, &names)?;
70 }
71 Ok(existed)
72 }
73
74 pub fn get_synonym_group(
76 &self,
77 tid: u64,
78 name: &str,
79 ) -> Result<Option<SynonymGroupRecord>, B::Error> {
80 match self
81 .backend
82 .read_meta(tid, SYNONYM_GROUPS_COLLECTION, name)?
83 {
84 None => Ok(None),
85 Some(bytes) if bytes.is_empty() => Ok(None),
86 Some(bytes) => Ok(sonic_rs::from_slice::<SynonymGroupRecord>(&bytes).ok()),
87 }
88 }
89
90 pub fn list_synonym_groups(&self, tid: u64) -> Result<Vec<SynonymGroupRecord>, B::Error> {
92 let names = self.read_name_index(tid)?;
93 let mut groups = Vec::with_capacity(names.len());
94 for name in &names {
95 if let Some(rec) = self.get_synonym_group(tid, name)? {
96 groups.push(rec);
97 }
98 }
99 Ok(groups)
100 }
101
102 pub fn build_synonym_map_for_tenant(
109 &self,
110 _tid: u64,
111 all_groups: &[SynonymGroupRecord],
112 ) -> SynonymMap {
113 let mut map = SynonymMap::new();
114 for group in all_groups {
115 if group.terms.len() < 2 {
116 continue;
117 }
118 let analyzed: Vec<Vec<String>> = group
121 .terms
122 .iter()
123 .map(|t| crate::analyzer::pipeline::analyze(t))
124 .collect();
125
126 for (i, my_tokens) in analyzed.iter().enumerate() {
127 let other_tokens: Vec<&str> = analyzed
128 .iter()
129 .enumerate()
130 .filter(|(j, _)| *j != i)
131 .flat_map(|(_, ts)| ts.iter().map(|s| s.as_str()))
132 .collect();
133 for my_token in my_tokens {
134 map.add(my_token, &other_tokens);
135 }
136 }
137 }
138 map
139 }
140
141 pub fn expand_query_with_synonyms(
146 &self,
147 tid: u64,
148 tokens: Vec<String>,
149 ) -> Result<Vec<String>, B::Error> {
150 let groups = self.list_synonym_groups(tid)?;
151 if groups.is_empty() {
152 return Ok(tokens);
153 }
154 let map = self.build_synonym_map_for_tenant(tid, &groups);
155 let expanded = map.expand(&tokens);
156 Ok(expanded)
157 }
158
159 fn read_name_index(&self, tid: u64) -> Result<Vec<String>, B::Error> {
162 match self
163 .backend
164 .read_meta(tid, SYNONYM_GROUPS_COLLECTION, INDEX_SUBKEY)?
165 {
166 None => Ok(Vec::new()),
167 Some(bytes) if bytes.is_empty() => Ok(Vec::new()),
168 Some(bytes) => Ok(sonic_rs::from_slice::<Vec<String>>(&bytes).unwrap_or_default()),
169 }
170 }
171
172 fn write_name_index(&self, tid: u64, names: &[String]) -> Result<(), B::Error> {
173 let bytes = sonic_rs::to_vec(names).unwrap_or_default();
174 self.backend
175 .write_meta(tid, SYNONYM_GROUPS_COLLECTION, INDEX_SUBKEY, &bytes)
176 }
177}
178
179#[cfg(test)]
180mod tests {
181 use crate::backend::memory::MemoryBackend;
182 use crate::index::writer::FtsIndex;
183
184 use super::SynonymGroupRecord;
185
186 const T: u64 = 1;
187
188 fn idx() -> FtsIndex<MemoryBackend> {
189 FtsIndex::new(MemoryBackend::new())
190 }
191
192 fn rec(name: &str, terms: &[&str]) -> SynonymGroupRecord {
193 SynonymGroupRecord {
194 name: name.to_string(),
195 terms: terms.iter().map(|s| s.to_string()).collect(),
196 created_at: 0,
197 }
198 }
199
200 #[test]
201 fn put_and_get() {
202 let i = idx();
203 i.put_synonym_group(T, &rec("db_terms", &["database", "db", "datastore"]))
204 .unwrap();
205 let got = i.get_synonym_group(T, "db_terms").unwrap().unwrap();
206 assert_eq!(got.name, "db_terms");
207 assert_eq!(got.terms.len(), 3);
208 }
209
210 #[test]
211 fn delete_removes() {
212 let i = idx();
213 i.put_synonym_group(T, &rec("g1", &["a", "b"])).unwrap();
214 assert!(i.delete_synonym_group(T, "g1").unwrap());
215 assert!(!i.delete_synonym_group(T, "g1").unwrap());
216 assert!(i.get_synonym_group(T, "g1").unwrap().is_none());
217 }
218
219 #[test]
220 fn list_reflects_puts_and_deletes() {
221 let i = idx();
222 i.put_synonym_group(T, &rec("g1", &["a", "b"])).unwrap();
223 i.put_synonym_group(T, &rec("g2", &["x", "y"])).unwrap();
224 let names: Vec<String> = i
225 .list_synonym_groups(T)
226 .unwrap()
227 .into_iter()
228 .map(|r| r.name)
229 .collect();
230 assert_eq!(names.len(), 2);
231
232 i.delete_synonym_group(T, "g1").unwrap();
233 let names2: Vec<String> = i
234 .list_synonym_groups(T)
235 .unwrap()
236 .into_iter()
237 .map(|r| r.name)
238 .collect();
239 assert_eq!(names2, vec!["g2"]);
240 }
241
242 #[test]
243 fn synonym_map_bidirectional() {
244 let i = idx();
245 let recs = vec![rec("db_terms", &["db", "database", "datastore"])];
246 let map = i.build_synonym_map_for_tenant(T, &recs);
247
248 let expanded = map.expand(&["db".to_string()]);
251 assert!(expanded.contains(&"databas".to_string()));
252 assert!(expanded.contains(&"datastor".to_string()));
253
254 let expanded2 = map.expand(&["databas".to_string()]);
255 assert!(expanded2.contains(&"db".to_string()));
256 assert!(expanded2.contains(&"datastor".to_string()));
257 }
258
259 #[test]
260 fn expand_query_with_synonyms_no_groups() {
261 let i = idx();
262 let tokens = vec!["hello".to_string(), "world".to_string()];
263 let expanded = i.expand_query_with_synonyms(T, tokens.clone()).unwrap();
264 assert_eq!(expanded, tokens);
265 }
266
267 #[test]
268 fn expand_query_expands_matching_token() {
269 let i = idx();
270 i.put_synonym_group(T, &rec("db_terms", &["db", "database", "datastore"]))
271 .unwrap();
272 let tokens = vec!["db".to_string(), "perform".to_string()];
276 let expanded = i.expand_query_with_synonyms(T, tokens).unwrap();
277 assert!(expanded.contains(&"db".to_string()));
278 assert!(expanded.contains(&"databas".to_string()));
279 assert!(expanded.contains(&"datastor".to_string()));
280 assert!(expanded.contains(&"perform".to_string()));
281 }
282}