1use std::collections::HashSet;
4
5use serde_json::Value;
6
7use crate::functions::{Function, number_value};
8use crate::interpreter::SearchResult;
9use crate::registry::register_if_enabled;
10use crate::{Context, Runtime, arg, defn};
11
12defn!(LevenshteinFn, vec![arg!(string), arg!(string)], None);
14
15impl Function for LevenshteinFn {
16 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
17 self.signature.validate(args, ctx)?;
18 let s1 = args[0].as_str().unwrap();
19 let s2 = args[1].as_str().unwrap();
20 let dist = strsim::levenshtein(s1, s2);
21 Ok(number_value(dist as f64))
22 }
23}
24
25defn!(
27 NormalizedLevenshteinFn,
28 vec![arg!(string), arg!(string)],
29 None
30);
31
32impl Function for NormalizedLevenshteinFn {
33 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
34 self.signature.validate(args, ctx)?;
35 let s1 = args[0].as_str().unwrap();
36 let s2 = args[1].as_str().unwrap();
37 let sim = strsim::normalized_levenshtein(s1, s2);
38 Ok(number_value(sim))
39 }
40}
41
42defn!(DamerauLevenshteinFn, vec![arg!(string), arg!(string)], None);
44
45impl Function for DamerauLevenshteinFn {
46 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
47 self.signature.validate(args, ctx)?;
48 let s1 = args[0].as_str().unwrap();
49 let s2 = args[1].as_str().unwrap();
50 let dist = strsim::damerau_levenshtein(s1, s2);
51 Ok(number_value(dist as f64))
52 }
53}
54
55defn!(
57 NormalizedDamerauLevenshteinFn,
58 vec![arg!(string), arg!(string)],
59 None
60);
61
62impl Function for NormalizedDamerauLevenshteinFn {
63 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
64 self.signature.validate(args, ctx)?;
65 let s1 = args[0].as_str().unwrap();
66 let s2 = args[1].as_str().unwrap();
67 let sim = strsim::normalized_damerau_levenshtein(s1, s2);
68 Ok(number_value(sim))
69 }
70}
71
72defn!(JaroFn, vec![arg!(string), arg!(string)], None);
74
75impl Function for JaroFn {
76 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
77 self.signature.validate(args, ctx)?;
78 let s1 = args[0].as_str().unwrap();
79 let s2 = args[1].as_str().unwrap();
80 let sim = strsim::jaro(s1, s2);
81 Ok(number_value(sim))
82 }
83}
84
85defn!(JaroWinklerFn, vec![arg!(string), arg!(string)], None);
87
88impl Function for JaroWinklerFn {
89 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
90 self.signature.validate(args, ctx)?;
91 let s1 = args[0].as_str().unwrap();
92 let s2 = args[1].as_str().unwrap();
93 let sim = strsim::jaro_winkler(s1, s2);
94 Ok(number_value(sim))
95 }
96}
97
98defn!(SorensenDiceFn, vec![arg!(string), arg!(string)], None);
100
101impl Function for SorensenDiceFn {
102 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
103 self.signature.validate(args, ctx)?;
104 let s1 = args[0].as_str().unwrap();
105 let s2 = args[1].as_str().unwrap();
106 let sim = strsim::sorensen_dice(s1, s2);
107 Ok(number_value(sim))
108 }
109}
110
111defn!(HammingFn, vec![arg!(string), arg!(string)], None);
113
114impl Function for HammingFn {
115 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
116 self.signature.validate(args, ctx)?;
117 let s1 = args[0].as_str().unwrap();
118 let s2 = args[1].as_str().unwrap();
119 match strsim::hamming(s1, s2) {
120 Ok(dist) => Ok(number_value(dist as f64)),
121 Err(_) => Ok(Value::Null), }
123 }
124}
125
126defn!(OsaDistanceFn, vec![arg!(string), arg!(string)], None);
128
129impl Function for OsaDistanceFn {
130 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
131 self.signature.validate(args, ctx)?;
132 let s1 = args[0].as_str().unwrap();
133 let s2 = args[1].as_str().unwrap();
134 let dist = strsim::osa_distance(s1, s2);
135 Ok(number_value(dist as f64))
136 }
137}
138
139pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
141 register_if_enabled(
142 runtime,
143 "levenshtein",
144 enabled,
145 Box::new(LevenshteinFn::new()),
146 );
147 register_if_enabled(
148 runtime,
149 "normalized_levenshtein",
150 enabled,
151 Box::new(NormalizedLevenshteinFn::new()),
152 );
153 register_if_enabled(
154 runtime,
155 "damerau_levenshtein",
156 enabled,
157 Box::new(DamerauLevenshteinFn::new()),
158 );
159 register_if_enabled(
160 runtime,
161 "normalized_damerau_levenshtein",
162 enabled,
163 Box::new(NormalizedDamerauLevenshteinFn::new()),
164 );
165 register_if_enabled(runtime, "jaro", enabled, Box::new(JaroFn::new()));
166 register_if_enabled(
167 runtime,
168 "jaro_winkler",
169 enabled,
170 Box::new(JaroWinklerFn::new()),
171 );
172 register_if_enabled(
173 runtime,
174 "sorensen_dice",
175 enabled,
176 Box::new(SorensenDiceFn::new()),
177 );
178 register_if_enabled(runtime, "hamming", enabled, Box::new(HammingFn::new()));
179 register_if_enabled(
180 runtime,
181 "osa_distance",
182 enabled,
183 Box::new(OsaDistanceFn::new()),
184 );
185}
186
187#[cfg(test)]
188mod tests {
189 use crate::Runtime;
190 use serde_json::json;
191
192 fn setup_runtime() -> Runtime {
193 Runtime::builder()
194 .with_standard()
195 .with_all_extensions()
196 .build()
197 }
198
199 #[test]
200 fn test_levenshtein() {
201 let runtime = setup_runtime();
202 let expr = runtime.compile("levenshtein('kitten', 'sitting')").unwrap();
203 let result = expr.search(&json!(null)).unwrap();
204 assert_eq!(result.as_f64().unwrap(), 3.0);
205 }
206
207 #[test]
208 fn test_levenshtein_identical() {
209 let runtime = setup_runtime();
210 let expr = runtime.compile("levenshtein('hello', 'hello')").unwrap();
211 let result = expr.search(&json!(null)).unwrap();
212 assert_eq!(result.as_f64().unwrap(), 0.0);
213 }
214
215 #[test]
216 fn test_normalized_levenshtein() {
217 let runtime = setup_runtime();
218 let expr = runtime
219 .compile("normalized_levenshtein('hello', 'hello')")
220 .unwrap();
221 let result = expr.search(&json!(null)).unwrap();
222 assert_eq!(result.as_f64().unwrap(), 1.0);
223 }
224
225 #[test]
226 fn test_normalized_levenshtein_different() {
227 let runtime = setup_runtime();
228 let expr = runtime
229 .compile("normalized_levenshtein('hello', 'world')")
230 .unwrap();
231 let result = expr.search(&json!(null)).unwrap();
232 let sim = result.as_f64().unwrap();
233 assert!(sim > 0.0 && sim < 1.0);
234 }
235
236 #[test]
237 fn test_damerau_levenshtein() {
238 let runtime = setup_runtime();
239 let expr = runtime.compile("damerau_levenshtein('ab', 'ba')").unwrap();
241 let result = expr.search(&json!(null)).unwrap();
242 assert_eq!(result.as_f64().unwrap(), 1.0);
243 }
244
245 #[test]
246 fn test_jaro() {
247 let runtime = setup_runtime();
248 let expr = runtime.compile("jaro('hello', 'hallo')").unwrap();
249 let result = expr.search(&json!(null)).unwrap();
250 let sim = result.as_f64().unwrap();
251 assert!(sim > 0.8);
252 }
253
254 #[test]
255 fn test_jaro_identical() {
256 let runtime = setup_runtime();
257 let expr = runtime.compile("jaro('test', 'test')").unwrap();
258 let result = expr.search(&json!(null)).unwrap();
259 assert_eq!(result.as_f64().unwrap(), 1.0);
260 }
261
262 #[test]
263 fn test_jaro_winkler() {
264 let runtime = setup_runtime();
265 let expr = runtime
267 .compile("jaro_winkler('prefix_abc', 'prefix_xyz')")
268 .unwrap();
269 let result = expr.search(&json!(null)).unwrap();
270 let sim = result.as_f64().unwrap();
271 assert!(sim > 0.7);
272 }
273
274 #[test]
275 fn test_jaro_winkler_vs_jaro() {
276 let runtime = setup_runtime();
277 let jw_expr = runtime.compile("jaro_winkler('hello', 'hella')").unwrap();
279 let j_expr = runtime.compile("jaro('hello', 'hella')").unwrap();
280 let jw = jw_expr.search(&json!(null)).unwrap();
281 let j = j_expr.search(&json!(null)).unwrap();
282 assert!(jw.as_f64().unwrap() >= j.as_f64().unwrap());
283 }
284
285 #[test]
286 fn test_sorensen_dice() {
287 let runtime = setup_runtime();
288 let expr = runtime.compile("sorensen_dice('night', 'nacht')").unwrap();
289 let result = expr.search(&json!(null)).unwrap();
290 let sim = result.as_f64().unwrap();
291 assert!(sim > 0.0 && sim < 1.0);
292 }
293
294 #[test]
295 fn test_sorensen_dice_identical() {
296 let runtime = setup_runtime();
297 let expr = runtime.compile("sorensen_dice('test', 'test')").unwrap();
298 let result = expr.search(&json!(null)).unwrap();
299 assert_eq!(result.as_f64().unwrap(), 1.0);
300 }
301
302 #[test]
303 fn test_normalized_damerau_levenshtein() {
304 let runtime = setup_runtime();
305 let expr = runtime
306 .compile("normalized_damerau_levenshtein('hello', 'hello')")
307 .unwrap();
308 let result = expr.search(&json!(null)).unwrap();
309 assert_eq!(result.as_f64().unwrap(), 1.0);
310 }
311
312 #[test]
313 fn test_normalized_damerau_levenshtein_transposition() {
314 let runtime = setup_runtime();
315 let expr = runtime
317 .compile("normalized_damerau_levenshtein('ab', 'ba')")
318 .unwrap();
319 let result = expr.search(&json!(null)).unwrap();
320 let sim = result.as_f64().unwrap();
321 assert!(sim > 0.0 && sim < 1.0);
322 }
323
324 #[test]
325 fn test_hamming() {
326 let runtime = setup_runtime();
327 let expr = runtime.compile("hamming('karolin', 'kathrin')").unwrap();
328 let result = expr.search(&json!(null)).unwrap();
329 assert_eq!(result.as_f64().unwrap(), 3.0);
330 }
331
332 #[test]
333 fn test_hamming_identical() {
334 let runtime = setup_runtime();
335 let expr = runtime.compile("hamming('hello', 'hello')").unwrap();
336 let result = expr.search(&json!(null)).unwrap();
337 assert_eq!(result.as_f64().unwrap(), 0.0);
338 }
339
340 #[test]
341 fn test_hamming_different_lengths() {
342 let runtime = setup_runtime();
343 let expr = runtime.compile("hamming('hello', 'hi')").unwrap();
345 let result = expr.search(&json!(null)).unwrap();
346 assert!(result.is_null());
347 }
348
349 #[test]
350 fn test_osa_distance() {
351 let runtime = setup_runtime();
352 let expr = runtime.compile("osa_distance('ab', 'ba')").unwrap();
354 let result = expr.search(&json!(null)).unwrap();
355 assert_eq!(result.as_f64().unwrap(), 1.0);
356 }
357
358 #[test]
359 fn test_osa_distance_identical() {
360 let runtime = setup_runtime();
361 let expr = runtime.compile("osa_distance('hello', 'hello')").unwrap();
362 let result = expr.search(&json!(null)).unwrap();
363 assert_eq!(result.as_f64().unwrap(), 0.0);
364 }
365}