Skip to main content

jpx_core/extensions/
phonetic.rs

1//! Phonetic encoding functions.
2
3use std::collections::HashSet;
4
5use rphonetic::{
6    Caverphone1, Caverphone2, Encoder, MatchRatingApproach, Metaphone, Nysiis, Soundex,
7};
8use serde_json::Value;
9
10use crate::functions::Function;
11use crate::interpreter::SearchResult;
12use crate::registry::register_if_enabled;
13use crate::{Context, Runtime, arg, defn};
14
15// =============================================================================
16// soundex(string) -> string
17// =============================================================================
18
19defn!(SoundexFn, vec![arg!(string)], None);
20
21impl Function for SoundexFn {
22    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
23        self.signature.validate(args, ctx)?;
24        let s = args[0].as_str().unwrap();
25        let soundex = Soundex::default();
26        let result = soundex.encode(s);
27        Ok(Value::String(result))
28    }
29}
30
31// =============================================================================
32// metaphone(string) -> string
33// =============================================================================
34
35defn!(MetaphoneFn, vec![arg!(string)], None);
36
37impl Function for MetaphoneFn {
38    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
39        self.signature.validate(args, ctx)?;
40        let s = args[0].as_str().unwrap();
41        let metaphone = Metaphone::default();
42        let result = metaphone.encode(s);
43        Ok(Value::String(result))
44    }
45}
46
47// =============================================================================
48// double_metaphone(string) -> [primary, alternate]
49// =============================================================================
50
51defn!(DoubleMetaphoneFn, vec![arg!(string)], None);
52
53impl Function for DoubleMetaphoneFn {
54    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
55        self.signature.validate(args, ctx)?;
56        let s = args[0].as_str().unwrap();
57        let dm = rphonetic::DoubleMetaphone::default();
58        let result = dm.double_metaphone(s);
59        let primary = Value::String(result.primary());
60        let alt = result.alternate();
61        let alternate = if alt.is_empty() {
62            Value::Null
63        } else {
64            Value::String(alt)
65        };
66        Ok(Value::Array(vec![primary, alternate]))
67    }
68}
69
70// =============================================================================
71// nysiis(string) -> string
72// =============================================================================
73
74defn!(NysiisFn, vec![arg!(string)], None);
75
76impl Function for NysiisFn {
77    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
78        self.signature.validate(args, ctx)?;
79        let s = args[0].as_str().unwrap();
80        let nysiis = Nysiis::default();
81        let result = nysiis.encode(s);
82        Ok(Value::String(result))
83    }
84}
85
86// =============================================================================
87// match_rating_codex(string) -> string
88// =============================================================================
89
90defn!(MatchRatingCodexFn, vec![arg!(string)], None);
91
92impl Function for MatchRatingCodexFn {
93    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
94        self.signature.validate(args, ctx)?;
95        let s = args[0].as_str().unwrap();
96        let mra = MatchRatingApproach;
97        let result = mra.encode(s);
98        Ok(Value::String(result))
99    }
100}
101
102// =============================================================================
103// caverphone(string) -> string
104// =============================================================================
105
106defn!(CaverphoneFn, vec![arg!(string)], None);
107
108impl Function for CaverphoneFn {
109    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
110        self.signature.validate(args, ctx)?;
111        let s = args[0].as_str().unwrap();
112        let caverphone = Caverphone1;
113        let result = caverphone.encode(s);
114        Ok(Value::String(result))
115    }
116}
117
118// =============================================================================
119// caverphone2(string) -> string
120// =============================================================================
121
122defn!(Caverphone2Fn, vec![arg!(string)], None);
123
124impl Function for Caverphone2Fn {
125    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
126        self.signature.validate(args, ctx)?;
127        let s = args[0].as_str().unwrap();
128        let caverphone = Caverphone2;
129        let result = caverphone.encode(s);
130        Ok(Value::String(result))
131    }
132}
133
134// =============================================================================
135// sounds_like(s1, s2) -> bool
136// =============================================================================
137
138defn!(SoundsLikeFn, vec![arg!(string), arg!(string)], None);
139
140impl Function for SoundsLikeFn {
141    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
142        self.signature.validate(args, ctx)?;
143        let s1 = args[0].as_str().unwrap();
144        let s2 = args[1].as_str().unwrap();
145        let soundex = Soundex::default();
146        let result = soundex.is_encoded_equals(s1, s2);
147        Ok(Value::Bool(result))
148    }
149}
150
151// =============================================================================
152// phonetic_match(s1, s2, algorithm?) -> bool
153// =============================================================================
154
155defn!(
156    PhoneticMatchFn,
157    vec![arg!(string), arg!(string)],
158    Some(arg!(string))
159);
160
161impl Function for PhoneticMatchFn {
162    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
163        self.signature.validate(args, ctx)?;
164        let s1 = args[0].as_str().unwrap();
165        let s2 = args[1].as_str().unwrap();
166
167        let algorithm = if args.len() > 2 {
168            args[2]
169                .as_str()
170                .map(|s| s.to_lowercase())
171                .unwrap_or_else(|| "soundex".to_string())
172        } else {
173            "soundex".to_string()
174        };
175
176        let result = match algorithm.as_str() {
177            "soundex" => {
178                let encoder = Soundex::default();
179                encoder.is_encoded_equals(s1, s2)
180            }
181            "metaphone" => {
182                let encoder = Metaphone::default();
183                encoder.encode(s1) == encoder.encode(s2)
184            }
185            "double_metaphone" | "doublemetaphone" => {
186                let encoder = rphonetic::DoubleMetaphone::default();
187                let r1 = encoder.double_metaphone(s1);
188                let r2 = encoder.double_metaphone(s2);
189                // Match if primary codes match, or if any combination matches
190                r1.primary() == r2.primary()
191                    || (!r1.alternate().is_empty() && r1.alternate() == r2.primary())
192                    || (!r2.alternate().is_empty() && r2.alternate() == r1.primary())
193                    || (!r1.alternate().is_empty()
194                        && !r2.alternate().is_empty()
195                        && r1.alternate() == r2.alternate())
196            }
197            "nysiis" => {
198                let encoder = Nysiis::default();
199                encoder.encode(s1) == encoder.encode(s2)
200            }
201            "match_rating" | "mra" => {
202                let encoder = MatchRatingApproach;
203                encoder.is_encoded_equals(s1, s2)
204            }
205            "caverphone" | "caverphone1" => {
206                let encoder = Caverphone1;
207                encoder.encode(s1) == encoder.encode(s2)
208            }
209            "caverphone2" => {
210                let encoder = Caverphone2;
211                encoder.encode(s1) == encoder.encode(s2)
212            }
213            _ => {
214                // Default to soundex for unknown algorithms
215                let encoder = Soundex::default();
216                encoder.is_encoded_equals(s1, s2)
217            }
218        };
219
220        Ok(Value::Bool(result))
221    }
222}
223
224/// Register phonetic functions filtered by the enabled set.
225pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
226    register_if_enabled(runtime, "soundex", enabled, Box::new(SoundexFn::new()));
227    register_if_enabled(runtime, "metaphone", enabled, Box::new(MetaphoneFn::new()));
228    register_if_enabled(
229        runtime,
230        "double_metaphone",
231        enabled,
232        Box::new(DoubleMetaphoneFn::new()),
233    );
234    register_if_enabled(runtime, "nysiis", enabled, Box::new(NysiisFn::new()));
235    register_if_enabled(
236        runtime,
237        "match_rating_codex",
238        enabled,
239        Box::new(MatchRatingCodexFn::new()),
240    );
241    register_if_enabled(
242        runtime,
243        "caverphone",
244        enabled,
245        Box::new(CaverphoneFn::new()),
246    );
247    register_if_enabled(
248        runtime,
249        "caverphone2",
250        enabled,
251        Box::new(Caverphone2Fn::new()),
252    );
253    register_if_enabled(
254        runtime,
255        "sounds_like",
256        enabled,
257        Box::new(SoundsLikeFn::new()),
258    );
259    register_if_enabled(
260        runtime,
261        "phonetic_match",
262        enabled,
263        Box::new(PhoneticMatchFn::new()),
264    );
265}