1use crate::analyzer::synonym::SynonymMap;
19use crate::backend::FtsBackend;
20use crate::index::writer::FtsIndex;
21
22const SYNONYM_GROUPS_COLLECTION: &str = "_synonym_groups";
24
25const INDEX_SUBKEY: &str = "_index";
27
28#[derive(serde::Serialize, serde::Deserialize, Debug, Clone)]
30pub struct SynonymGroupRecord {
31 pub name: String,
32 pub terms: Vec<String>,
33 pub created_at: u64,
34}
35
36impl<B: FtsBackend> FtsIndex<B> {
37 pub fn put_synonym_group(&self, tid: u64, record: &SynonymGroupRecord) -> Result<(), B::Error> {
39 let bytes = sonic_rs::to_vec(record).unwrap_or_default();
41 self.backend
42 .write_meta(tid, SYNONYM_GROUPS_COLLECTION, &record.name, &bytes)?;
43
44 let mut names = self.read_name_index(tid)?;
46 if !names.contains(&record.name) {
47 names.push(record.name.clone());
48 self.write_name_index(tid, &names)?;
49 }
50 Ok(())
51 }
52
53 pub fn delete_synonym_group(&self, tid: u64, name: &str) -> Result<bool, B::Error> {
55 let existed = self
58 .backend
59 .read_meta(tid, SYNONYM_GROUPS_COLLECTION, name)?
60 .is_some_and(|b| !b.is_empty());
61
62 if existed {
63 self.backend
65 .write_meta(tid, SYNONYM_GROUPS_COLLECTION, name, &[])?;
66 let mut names = self.read_name_index(tid)?;
68 names.retain(|n| n != name);
69 self.write_name_index(tid, &names)?;
70 }
71 Ok(existed)
72 }
73
74 pub fn get_synonym_group(
76 &self,
77 tid: u64,
78 name: &str,
79 ) -> Result<Option<SynonymGroupRecord>, B::Error> {
80 match self
81 .backend
82 .read_meta(tid, SYNONYM_GROUPS_COLLECTION, name)?
83 {
84 None => Ok(None),
85 Some(bytes) if bytes.is_empty() => Ok(None),
86 Some(bytes) => Ok(sonic_rs::from_slice::<SynonymGroupRecord>(&bytes).ok()),
87 }
88 }
89
90 pub fn list_synonym_groups(&self, tid: u64) -> Result<Vec<SynonymGroupRecord>, B::Error> {
92 let names = self.read_name_index(tid)?;
93 let mut groups = Vec::with_capacity(names.len());
95 for name in &names {
96 if let Some(rec) = self.get_synonym_group(tid, name)? {
97 groups.push(rec);
98 }
99 }
100 Ok(groups)
101 }
102
103 pub fn build_synonym_map_for_tenant(
110 &self,
111 _tid: u64,
112 all_groups: &[SynonymGroupRecord],
113 ) -> SynonymMap {
114 let mut map = SynonymMap::new();
115 for group in all_groups {
116 if group.terms.len() < 2 {
117 continue;
118 }
119 let analyzed: Vec<Vec<String>> = group
122 .terms
123 .iter()
124 .map(|t| crate::analyzer::pipeline::analyze(t))
125 .collect();
126
127 for (i, my_tokens) in analyzed.iter().enumerate() {
128 let other_tokens: Vec<&str> = analyzed
129 .iter()
130 .enumerate()
131 .filter(|(j, _)| *j != i)
132 .flat_map(|(_, ts)| ts.iter().map(|s| s.as_str()))
133 .collect();
134 for my_token in my_tokens {
135 map.add(my_token, &other_tokens);
136 }
137 }
138 }
139 map
140 }
141
142 pub fn expand_query_with_synonyms(
147 &self,
148 tid: u64,
149 tokens: Vec<String>,
150 ) -> Result<Vec<String>, B::Error> {
151 let groups = self.list_synonym_groups(tid)?;
152 eprintln!(
153 "[synonym_debug] tid={tid} tokens={tokens:?} groups_count={}",
154 groups.len()
155 );
156 if groups.is_empty() {
157 return Ok(tokens);
158 }
159 let map = self.build_synonym_map_for_tenant(tid, &groups);
160 let expanded = map.expand(&tokens);
161 eprintln!("[synonym_debug] expanded={expanded:?}");
162 Ok(expanded)
163 }
164
165 fn read_name_index(&self, tid: u64) -> Result<Vec<String>, B::Error> {
168 match self
169 .backend
170 .read_meta(tid, SYNONYM_GROUPS_COLLECTION, INDEX_SUBKEY)?
171 {
172 None => Ok(Vec::new()),
173 Some(bytes) if bytes.is_empty() => Ok(Vec::new()),
174 Some(bytes) => Ok(sonic_rs::from_slice::<Vec<String>>(&bytes).unwrap_or_default()),
175 }
176 }
177
178 fn write_name_index(&self, tid: u64, names: &[String]) -> Result<(), B::Error> {
179 let bytes = sonic_rs::to_vec(names).unwrap_or_default();
180 self.backend
181 .write_meta(tid, SYNONYM_GROUPS_COLLECTION, INDEX_SUBKEY, &bytes)
182 }
183}
184
185#[cfg(test)]
186mod tests {
187 use crate::backend::memory::MemoryBackend;
188 use crate::index::writer::FtsIndex;
189
190 use super::SynonymGroupRecord;
191
192 const T: u64 = 1;
193
194 fn idx() -> FtsIndex<MemoryBackend> {
195 FtsIndex::new(MemoryBackend::new())
196 }
197
198 fn rec(name: &str, terms: &[&str]) -> SynonymGroupRecord {
199 SynonymGroupRecord {
200 name: name.to_string(),
201 terms: terms.iter().map(|s| s.to_string()).collect(),
202 created_at: 0,
203 }
204 }
205
206 #[test]
207 fn put_and_get() {
208 let i = idx();
209 i.put_synonym_group(T, &rec("db_terms", &["database", "db", "datastore"]))
210 .unwrap();
211 let got = i.get_synonym_group(T, "db_terms").unwrap().unwrap();
212 assert_eq!(got.name, "db_terms");
213 assert_eq!(got.terms.len(), 3);
214 }
215
216 #[test]
217 fn delete_removes() {
218 let i = idx();
219 i.put_synonym_group(T, &rec("g1", &["a", "b"])).unwrap();
220 assert!(i.delete_synonym_group(T, "g1").unwrap());
221 assert!(!i.delete_synonym_group(T, "g1").unwrap());
222 assert!(i.get_synonym_group(T, "g1").unwrap().is_none());
223 }
224
225 #[test]
226 fn list_reflects_puts_and_deletes() {
227 let i = idx();
228 i.put_synonym_group(T, &rec("g1", &["a", "b"])).unwrap();
229 i.put_synonym_group(T, &rec("g2", &["x", "y"])).unwrap();
230 let names: Vec<String> = i
231 .list_synonym_groups(T)
232 .unwrap()
233 .into_iter()
234 .map(|r| r.name)
235 .collect();
236 assert_eq!(names.len(), 2);
237
238 i.delete_synonym_group(T, "g1").unwrap();
239 let names2: Vec<String> = i
240 .list_synonym_groups(T)
241 .unwrap()
242 .into_iter()
243 .map(|r| r.name)
244 .collect();
245 assert_eq!(names2, vec!["g2"]);
246 }
247
248 #[test]
249 fn synonym_map_bidirectional() {
250 let i = idx();
251 let recs = vec![rec("db_terms", &["db", "database", "datastore"])];
252 let map = i.build_synonym_map_for_tenant(T, &recs);
253
254 let expanded = map.expand(&["db".to_string()]);
257 assert!(expanded.contains(&"databas".to_string()));
258 assert!(expanded.contains(&"datastor".to_string()));
259
260 let expanded2 = map.expand(&["databas".to_string()]);
261 assert!(expanded2.contains(&"db".to_string()));
262 assert!(expanded2.contains(&"datastor".to_string()));
263 }
264
265 #[test]
266 fn expand_query_with_synonyms_no_groups() {
267 let i = idx();
268 let tokens = vec!["hello".to_string(), "world".to_string()];
269 let expanded = i.expand_query_with_synonyms(T, tokens.clone()).unwrap();
270 assert_eq!(expanded, tokens);
271 }
272
273 #[test]
274 fn expand_query_expands_matching_token() {
275 let i = idx();
276 i.put_synonym_group(T, &rec("db_terms", &["db", "database", "datastore"]))
277 .unwrap();
278 let tokens = vec!["db".to_string(), "perform".to_string()];
282 let expanded = i.expand_query_with_synonyms(T, tokens).unwrap();
283 assert!(expanded.contains(&"db".to_string()));
284 assert!(expanded.contains(&"databas".to_string()));
285 assert!(expanded.contains(&"datastor".to_string()));
286 assert!(expanded.contains(&"perform".to_string()));
287 }
288}