Skip to main content

jpx_core/extensions/
phonetic.rs

1//! Phonetic encoding functions.
2
3use std::collections::HashSet;
4
5use rphonetic::{
6    Caverphone1, Caverphone2, Encoder, MatchRatingApproach, Metaphone, Nysiis, Soundex,
7};
8use serde_json::Value;
9
10use crate::functions::Function;
11use crate::interpreter::SearchResult;
12use crate::registry::register_if_enabled;
13use crate::{Context, Runtime, arg, defn};
14
15// =============================================================================
16// soundex(string) -> string
17// =============================================================================
18
19defn!(SoundexFn, vec![arg!(string)], None);
20
21impl Function for SoundexFn {
22    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
23        self.signature.validate(args, ctx)?;
24        let s = args[0].as_str().unwrap();
25        let soundex = Soundex::default();
26        let result = soundex.encode(s);
27        Ok(Value::String(result))
28    }
29}
30
31// =============================================================================
32// metaphone(string) -> string
33// =============================================================================
34
35defn!(MetaphoneFn, vec![arg!(string)], None);
36
37impl Function for MetaphoneFn {
38    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
39        self.signature.validate(args, ctx)?;
40        let s = args[0].as_str().unwrap();
41        let metaphone = Metaphone::default();
42        let result = metaphone.encode(s);
43        Ok(Value::String(result))
44    }
45}
46
47// =============================================================================
48// double_metaphone(string) -> [primary, alternate]
49// =============================================================================
50
51defn!(DoubleMetaphoneFn, vec![arg!(string)], None);
52
53impl Function for DoubleMetaphoneFn {
54    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
55        self.signature.validate(args, ctx)?;
56        let s = args[0].as_str().unwrap();
57        let dm = rphonetic::DoubleMetaphone::default();
58        let result = dm.double_metaphone(s);
59        let primary = Value::String(result.primary());
60        let alt = result.alternate();
61        let alternate = if alt.is_empty() {
62            Value::Null
63        } else {
64            Value::String(alt)
65        };
66        Ok(Value::Array(vec![primary, alternate]))
67    }
68}
69
70// =============================================================================
71// nysiis(string) -> string
72// =============================================================================
73
74defn!(NysiisFn, vec![arg!(string)], None);
75
76impl Function for NysiisFn {
77    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
78        self.signature.validate(args, ctx)?;
79        let s = args[0].as_str().unwrap();
80        let nysiis = Nysiis::default();
81        let result = nysiis.encode(s);
82        Ok(Value::String(result))
83    }
84}
85
86// =============================================================================
87// match_rating_codex(string) -> string
88// =============================================================================
89
90defn!(MatchRatingCodexFn, vec![arg!(string)], None);
91
92impl Function for MatchRatingCodexFn {
93    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
94        self.signature.validate(args, ctx)?;
95        let s = args[0].as_str().unwrap();
96        let mra = MatchRatingApproach;
97        let result = mra.encode(s);
98        Ok(Value::String(result))
99    }
100}
101
102// =============================================================================
103// caverphone(string) -> string
104// =============================================================================
105
106defn!(CaverphoneFn, vec![arg!(string)], None);
107
108impl Function for CaverphoneFn {
109    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
110        self.signature.validate(args, ctx)?;
111        let s = args[0].as_str().unwrap();
112        let caverphone = Caverphone1;
113        let result = caverphone.encode(s);
114        Ok(Value::String(result))
115    }
116}
117
118// =============================================================================
119// caverphone2(string) -> string
120// =============================================================================
121
122defn!(Caverphone2Fn, vec![arg!(string)], None);
123
124impl Function for Caverphone2Fn {
125    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
126        self.signature.validate(args, ctx)?;
127        let s = args[0].as_str().unwrap();
128        let caverphone = Caverphone2;
129        let result = caverphone.encode(s);
130        Ok(Value::String(result))
131    }
132}
133
134// =============================================================================
135// sounds_like(s1, s2) -> bool
136// =============================================================================
137
138defn!(SoundsLikeFn, vec![arg!(string), arg!(string)], None);
139
140impl Function for SoundsLikeFn {
141    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
142        self.signature.validate(args, ctx)?;
143        let s1 = args[0].as_str().unwrap();
144        let s2 = args[1].as_str().unwrap();
145        let soundex = Soundex::default();
146        let result = soundex.is_encoded_equals(s1, s2);
147        Ok(Value::Bool(result))
148    }
149}
150
151// =============================================================================
152// phonetic_match(s1, s2, algorithm?) -> bool
153// =============================================================================
154
155defn!(
156    PhoneticMatchFn,
157    vec![arg!(string), arg!(string)],
158    Some(arg!(string))
159);
160
161impl Function for PhoneticMatchFn {
162    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
163        self.signature.validate(args, ctx)?;
164        let s1 = args[0].as_str().unwrap();
165        let s2 = args[1].as_str().unwrap();
166
167        let algorithm = if args.len() > 2 {
168            args[2]
169                .as_str()
170                .map(|s| s.to_lowercase())
171                .unwrap_or_else(|| "soundex".to_string())
172        } else {
173            "soundex".to_string()
174        };
175
176        let result = match algorithm.as_str() {
177            "soundex" => {
178                let encoder = Soundex::default();
179                encoder.is_encoded_equals(s1, s2)
180            }
181            "metaphone" => {
182                let encoder = Metaphone::default();
183                encoder.encode(s1) == encoder.encode(s2)
184            }
185            "double_metaphone" | "doublemetaphone" => {
186                let encoder = rphonetic::DoubleMetaphone::default();
187                let r1 = encoder.double_metaphone(s1);
188                let r2 = encoder.double_metaphone(s2);
189                // Match if primary codes match, or if any combination matches
190                r1.primary() == r2.primary()
191                    || (!r1.alternate().is_empty() && r1.alternate() == r2.primary())
192                    || (!r2.alternate().is_empty() && r2.alternate() == r1.primary())
193                    || (!r1.alternate().is_empty()
194                        && !r2.alternate().is_empty()
195                        && r1.alternate() == r2.alternate())
196            }
197            "nysiis" => {
198                let encoder = Nysiis::default();
199                encoder.encode(s1) == encoder.encode(s2)
200            }
201            "match_rating" | "mra" => {
202                let encoder = MatchRatingApproach;
203                encoder.is_encoded_equals(s1, s2)
204            }
205            "caverphone" | "caverphone1" => {
206                let encoder = Caverphone1;
207                encoder.encode(s1) == encoder.encode(s2)
208            }
209            "caverphone2" => {
210                let encoder = Caverphone2;
211                encoder.encode(s1) == encoder.encode(s2)
212            }
213            _ => {
214                // Default to soundex for unknown algorithms
215                let encoder = Soundex::default();
216                encoder.is_encoded_equals(s1, s2)
217            }
218        };
219
220        Ok(Value::Bool(result))
221    }
222}
223
224/// Register phonetic functions filtered by the enabled set.
225pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
226    register_if_enabled(runtime, "soundex", enabled, Box::new(SoundexFn::new()));
227    register_if_enabled(runtime, "metaphone", enabled, Box::new(MetaphoneFn::new()));
228    register_if_enabled(
229        runtime,
230        "double_metaphone",
231        enabled,
232        Box::new(DoubleMetaphoneFn::new()),
233    );
234    register_if_enabled(runtime, "nysiis", enabled, Box::new(NysiisFn::new()));
235    register_if_enabled(
236        runtime,
237        "match_rating_codex",
238        enabled,
239        Box::new(MatchRatingCodexFn::new()),
240    );
241    register_if_enabled(
242        runtime,
243        "caverphone",
244        enabled,
245        Box::new(CaverphoneFn::new()),
246    );
247    register_if_enabled(
248        runtime,
249        "caverphone2",
250        enabled,
251        Box::new(Caverphone2Fn::new()),
252    );
253    register_if_enabled(
254        runtime,
255        "sounds_like",
256        enabled,
257        Box::new(SoundsLikeFn::new()),
258    );
259    register_if_enabled(
260        runtime,
261        "phonetic_match",
262        enabled,
263        Box::new(PhoneticMatchFn::new()),
264    );
265}
266
267#[cfg(test)]
268mod tests {
269    use crate::Runtime;
270    use serde_json::json;
271
272    fn setup_runtime() -> Runtime {
273        Runtime::builder()
274            .with_standard()
275            .with_all_extensions()
276            .build()
277    }
278
279    #[test]
280    fn test_soundex() {
281        let runtime = setup_runtime();
282        let data = json!("Robert");
283        let expr = runtime.compile("soundex(@)").unwrap();
284        let result = expr.search(&data).unwrap();
285        assert_eq!(result.as_str().unwrap(), "R163");
286    }
287
288    #[test]
289    fn test_soundex_similar_names() {
290        let runtime = setup_runtime();
291        // Robert and Rupert should have the same Soundex code
292        let data = json!("Rupert");
293        let expr = runtime.compile("soundex(@)").unwrap();
294        let result = expr.search(&data).unwrap();
295        assert_eq!(result.as_str().unwrap(), "R163");
296    }
297
298    #[test]
299    fn test_metaphone() {
300        let runtime = setup_runtime();
301        let data = json!("Smith");
302        let expr = runtime.compile("metaphone(@)").unwrap();
303        let result = expr.search(&data).unwrap();
304        assert_eq!(result.as_str().unwrap(), "SM0");
305    }
306
307    #[test]
308    fn test_double_metaphone() {
309        let runtime = setup_runtime();
310        let data = json!("Schmidt");
311        let expr = runtime.compile("double_metaphone(@)").unwrap();
312        let result = expr.search(&data).unwrap();
313        let arr = result.as_array().unwrap();
314        assert_eq!(arr.len(), 2);
315        // Primary encoding
316        assert!(!arr[0].as_str().unwrap().is_empty());
317    }
318
319    #[test]
320    fn test_nysiis() {
321        let runtime = setup_runtime();
322        let data = json!("Johnson");
323        let expr = runtime.compile("nysiis(@)").unwrap();
324        let result = expr.search(&data).unwrap();
325        assert!(!result.as_str().unwrap().is_empty());
326    }
327
328    #[test]
329    fn test_match_rating_codex() {
330        let runtime = setup_runtime();
331        let data = json!("Smith");
332        let expr = runtime.compile("match_rating_codex(@)").unwrap();
333        let result = expr.search(&data).unwrap();
334        assert!(!result.as_str().unwrap().is_empty());
335    }
336
337    #[test]
338    fn test_caverphone() {
339        let runtime = setup_runtime();
340        let data = json!("Thompson");
341        let expr = runtime.compile("caverphone(@)").unwrap();
342        let result = expr.search(&data).unwrap();
343        assert!(!result.as_str().unwrap().is_empty());
344    }
345
346    #[test]
347    fn test_caverphone2() {
348        let runtime = setup_runtime();
349        let data = json!("Thompson");
350        let expr = runtime.compile("caverphone2(@)").unwrap();
351        let result = expr.search(&data).unwrap();
352        assert!(!result.as_str().unwrap().is_empty());
353    }
354
355    #[test]
356    fn test_sounds_like_true() {
357        let runtime = setup_runtime();
358        let data = json!(["Robert", "Rupert"]);
359        let expr = runtime.compile("sounds_like(@[0], @[1])").unwrap();
360        let result = expr.search(&data).unwrap();
361        assert!(result.as_bool().unwrap());
362    }
363
364    #[test]
365    fn test_sounds_like_false() {
366        let runtime = setup_runtime();
367        let data = json!(["Robert", "Smith"]);
368        let expr = runtime.compile("sounds_like(@[0], @[1])").unwrap();
369        let result = expr.search(&data).unwrap();
370        assert!(!result.as_bool().unwrap());
371    }
372
373    #[test]
374    fn test_phonetic_match_default() {
375        let runtime = setup_runtime();
376        let data = json!(["Robert", "Rupert"]);
377        let expr = runtime.compile("phonetic_match(@[0], @[1])").unwrap();
378        let result = expr.search(&data).unwrap();
379        assert!(result.as_bool().unwrap());
380    }
381
382    #[test]
383    fn test_phonetic_match_metaphone() {
384        let runtime = setup_runtime();
385        let data = json!(["Smith", "Smyth"]);
386        let expr = runtime
387            .compile("phonetic_match(@[0], @[1], 'metaphone')")
388            .unwrap();
389        let result = expr.search(&data).unwrap();
390        // Both should encode to SM0
391        assert!(result.as_bool().unwrap());
392    }
393
394    #[test]
395    fn test_phonetic_match_nysiis() {
396        let runtime = setup_runtime();
397        let data = json!(["Johnson", "Jonson"]);
398        let expr = runtime
399            .compile("phonetic_match(@[0], @[1], 'nysiis')")
400            .unwrap();
401        let result = expr.search(&data).unwrap();
402        assert!(result.as_bool().unwrap());
403    }
404}