1use std::collections::HashSet;
4
5use rphonetic::{
6 Caverphone1, Caverphone2, Encoder, MatchRatingApproach, Metaphone, Nysiis, Soundex,
7};
8use serde_json::Value;
9
10use crate::functions::Function;
11use crate::interpreter::SearchResult;
12use crate::registry::register_if_enabled;
13use crate::{Context, Runtime, arg, defn};
14
15defn!(SoundexFn, vec![arg!(string)], None);
20
21impl Function for SoundexFn {
22 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
23 self.signature.validate(args, ctx)?;
24 let s = args[0].as_str().unwrap();
25 let soundex = Soundex::default();
26 let result = soundex.encode(s);
27 Ok(Value::String(result))
28 }
29}
30
31defn!(MetaphoneFn, vec![arg!(string)], None);
36
37impl Function for MetaphoneFn {
38 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
39 self.signature.validate(args, ctx)?;
40 let s = args[0].as_str().unwrap();
41 let metaphone = Metaphone::default();
42 let result = metaphone.encode(s);
43 Ok(Value::String(result))
44 }
45}
46
47defn!(DoubleMetaphoneFn, vec![arg!(string)], None);
52
53impl Function for DoubleMetaphoneFn {
54 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
55 self.signature.validate(args, ctx)?;
56 let s = args[0].as_str().unwrap();
57 let dm = rphonetic::DoubleMetaphone::default();
58 let result = dm.double_metaphone(s);
59 let primary = Value::String(result.primary());
60 let alt = result.alternate();
61 let alternate = if alt.is_empty() {
62 Value::Null
63 } else {
64 Value::String(alt)
65 };
66 Ok(Value::Array(vec![primary, alternate]))
67 }
68}
69
70defn!(NysiisFn, vec![arg!(string)], None);
75
76impl Function for NysiisFn {
77 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
78 self.signature.validate(args, ctx)?;
79 let s = args[0].as_str().unwrap();
80 let nysiis = Nysiis::default();
81 let result = nysiis.encode(s);
82 Ok(Value::String(result))
83 }
84}
85
86defn!(MatchRatingCodexFn, vec![arg!(string)], None);
91
92impl Function for MatchRatingCodexFn {
93 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
94 self.signature.validate(args, ctx)?;
95 let s = args[0].as_str().unwrap();
96 let mra = MatchRatingApproach;
97 let result = mra.encode(s);
98 Ok(Value::String(result))
99 }
100}
101
102defn!(CaverphoneFn, vec![arg!(string)], None);
107
108impl Function for CaverphoneFn {
109 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
110 self.signature.validate(args, ctx)?;
111 let s = args[0].as_str().unwrap();
112 let caverphone = Caverphone1;
113 let result = caverphone.encode(s);
114 Ok(Value::String(result))
115 }
116}
117
118defn!(Caverphone2Fn, vec![arg!(string)], None);
123
124impl Function for Caverphone2Fn {
125 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
126 self.signature.validate(args, ctx)?;
127 let s = args[0].as_str().unwrap();
128 let caverphone = Caverphone2;
129 let result = caverphone.encode(s);
130 Ok(Value::String(result))
131 }
132}
133
134defn!(SoundsLikeFn, vec![arg!(string), arg!(string)], None);
139
140impl Function for SoundsLikeFn {
141 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
142 self.signature.validate(args, ctx)?;
143 let s1 = args[0].as_str().unwrap();
144 let s2 = args[1].as_str().unwrap();
145 let soundex = Soundex::default();
146 let result = soundex.is_encoded_equals(s1, s2);
147 Ok(Value::Bool(result))
148 }
149}
150
151defn!(
156 PhoneticMatchFn,
157 vec![arg!(string), arg!(string)],
158 Some(arg!(string))
159);
160
161impl Function for PhoneticMatchFn {
162 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
163 self.signature.validate(args, ctx)?;
164 let s1 = args[0].as_str().unwrap();
165 let s2 = args[1].as_str().unwrap();
166
167 let algorithm = if args.len() > 2 {
168 args[2]
169 .as_str()
170 .map(|s| s.to_lowercase())
171 .unwrap_or_else(|| "soundex".to_string())
172 } else {
173 "soundex".to_string()
174 };
175
176 let result = match algorithm.as_str() {
177 "soundex" => {
178 let encoder = Soundex::default();
179 encoder.is_encoded_equals(s1, s2)
180 }
181 "metaphone" => {
182 let encoder = Metaphone::default();
183 encoder.encode(s1) == encoder.encode(s2)
184 }
185 "double_metaphone" | "doublemetaphone" => {
186 let encoder = rphonetic::DoubleMetaphone::default();
187 let r1 = encoder.double_metaphone(s1);
188 let r2 = encoder.double_metaphone(s2);
189 r1.primary() == r2.primary()
191 || (!r1.alternate().is_empty() && r1.alternate() == r2.primary())
192 || (!r2.alternate().is_empty() && r2.alternate() == r1.primary())
193 || (!r1.alternate().is_empty()
194 && !r2.alternate().is_empty()
195 && r1.alternate() == r2.alternate())
196 }
197 "nysiis" => {
198 let encoder = Nysiis::default();
199 encoder.encode(s1) == encoder.encode(s2)
200 }
201 "match_rating" | "mra" => {
202 let encoder = MatchRatingApproach;
203 encoder.is_encoded_equals(s1, s2)
204 }
205 "caverphone" | "caverphone1" => {
206 let encoder = Caverphone1;
207 encoder.encode(s1) == encoder.encode(s2)
208 }
209 "caverphone2" => {
210 let encoder = Caverphone2;
211 encoder.encode(s1) == encoder.encode(s2)
212 }
213 _ => {
214 let encoder = Soundex::default();
216 encoder.is_encoded_equals(s1, s2)
217 }
218 };
219
220 Ok(Value::Bool(result))
221 }
222}
223
224pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
226 register_if_enabled(runtime, "soundex", enabled, Box::new(SoundexFn::new()));
227 register_if_enabled(runtime, "metaphone", enabled, Box::new(MetaphoneFn::new()));
228 register_if_enabled(
229 runtime,
230 "double_metaphone",
231 enabled,
232 Box::new(DoubleMetaphoneFn::new()),
233 );
234 register_if_enabled(runtime, "nysiis", enabled, Box::new(NysiisFn::new()));
235 register_if_enabled(
236 runtime,
237 "match_rating_codex",
238 enabled,
239 Box::new(MatchRatingCodexFn::new()),
240 );
241 register_if_enabled(
242 runtime,
243 "caverphone",
244 enabled,
245 Box::new(CaverphoneFn::new()),
246 );
247 register_if_enabled(
248 runtime,
249 "caverphone2",
250 enabled,
251 Box::new(Caverphone2Fn::new()),
252 );
253 register_if_enabled(
254 runtime,
255 "sounds_like",
256 enabled,
257 Box::new(SoundsLikeFn::new()),
258 );
259 register_if_enabled(
260 runtime,
261 "phonetic_match",
262 enabled,
263 Box::new(PhoneticMatchFn::new()),
264 );
265}
266
267#[cfg(test)]
268mod tests {
269 use crate::Runtime;
270 use serde_json::json;
271
272 fn setup_runtime() -> Runtime {
273 Runtime::builder()
274 .with_standard()
275 .with_all_extensions()
276 .build()
277 }
278
279 #[test]
280 fn test_soundex() {
281 let runtime = setup_runtime();
282 let data = json!("Robert");
283 let expr = runtime.compile("soundex(@)").unwrap();
284 let result = expr.search(&data).unwrap();
285 assert_eq!(result.as_str().unwrap(), "R163");
286 }
287
288 #[test]
289 fn test_soundex_similar_names() {
290 let runtime = setup_runtime();
291 let data = json!("Rupert");
293 let expr = runtime.compile("soundex(@)").unwrap();
294 let result = expr.search(&data).unwrap();
295 assert_eq!(result.as_str().unwrap(), "R163");
296 }
297
298 #[test]
299 fn test_metaphone() {
300 let runtime = setup_runtime();
301 let data = json!("Smith");
302 let expr = runtime.compile("metaphone(@)").unwrap();
303 let result = expr.search(&data).unwrap();
304 assert_eq!(result.as_str().unwrap(), "SM0");
305 }
306
307 #[test]
308 fn test_double_metaphone() {
309 let runtime = setup_runtime();
310 let data = json!("Schmidt");
311 let expr = runtime.compile("double_metaphone(@)").unwrap();
312 let result = expr.search(&data).unwrap();
313 let arr = result.as_array().unwrap();
314 assert_eq!(arr.len(), 2);
315 assert!(!arr[0].as_str().unwrap().is_empty());
317 }
318
319 #[test]
320 fn test_nysiis() {
321 let runtime = setup_runtime();
322 let data = json!("Johnson");
323 let expr = runtime.compile("nysiis(@)").unwrap();
324 let result = expr.search(&data).unwrap();
325 assert!(!result.as_str().unwrap().is_empty());
326 }
327
328 #[test]
329 fn test_match_rating_codex() {
330 let runtime = setup_runtime();
331 let data = json!("Smith");
332 let expr = runtime.compile("match_rating_codex(@)").unwrap();
333 let result = expr.search(&data).unwrap();
334 assert!(!result.as_str().unwrap().is_empty());
335 }
336
337 #[test]
338 fn test_caverphone() {
339 let runtime = setup_runtime();
340 let data = json!("Thompson");
341 let expr = runtime.compile("caverphone(@)").unwrap();
342 let result = expr.search(&data).unwrap();
343 assert!(!result.as_str().unwrap().is_empty());
344 }
345
346 #[test]
347 fn test_caverphone2() {
348 let runtime = setup_runtime();
349 let data = json!("Thompson");
350 let expr = runtime.compile("caverphone2(@)").unwrap();
351 let result = expr.search(&data).unwrap();
352 assert!(!result.as_str().unwrap().is_empty());
353 }
354
355 #[test]
356 fn test_sounds_like_true() {
357 let runtime = setup_runtime();
358 let data = json!(["Robert", "Rupert"]);
359 let expr = runtime.compile("sounds_like(@[0], @[1])").unwrap();
360 let result = expr.search(&data).unwrap();
361 assert!(result.as_bool().unwrap());
362 }
363
364 #[test]
365 fn test_sounds_like_false() {
366 let runtime = setup_runtime();
367 let data = json!(["Robert", "Smith"]);
368 let expr = runtime.compile("sounds_like(@[0], @[1])").unwrap();
369 let result = expr.search(&data).unwrap();
370 assert!(!result.as_bool().unwrap());
371 }
372
373 #[test]
374 fn test_phonetic_match_default() {
375 let runtime = setup_runtime();
376 let data = json!(["Robert", "Rupert"]);
377 let expr = runtime.compile("phonetic_match(@[0], @[1])").unwrap();
378 let result = expr.search(&data).unwrap();
379 assert!(result.as_bool().unwrap());
380 }
381
382 #[test]
383 fn test_phonetic_match_metaphone() {
384 let runtime = setup_runtime();
385 let data = json!(["Smith", "Smyth"]);
386 let expr = runtime
387 .compile("phonetic_match(@[0], @[1], 'metaphone')")
388 .unwrap();
389 let result = expr.search(&data).unwrap();
390 assert!(result.as_bool().unwrap());
392 }
393
394 #[test]
395 fn test_phonetic_match_nysiis() {
396 let runtime = setup_runtime();
397 let data = json!(["Johnson", "Jonson"]);
398 let expr = runtime
399 .compile("phonetic_match(@[0], @[1], 'nysiis')")
400 .unwrap();
401 let result = expr.search(&data).unwrap();
402 assert!(result.as_bool().unwrap());
403 }
404}