Skip to main content

jpx_core/extensions/
fuzzy.rs

1//! Fuzzy string matching functions.
2
3use std::collections::HashSet;
4
5use serde_json::Value;
6
7use crate::functions::{Function, number_value};
8use crate::interpreter::SearchResult;
9use crate::registry::register_if_enabled;
10use crate::{Context, Runtime, arg, defn};
11
12// levenshtein(s1, s2) -> number
13defn!(LevenshteinFn, vec![arg!(string), arg!(string)], None);
14
15impl Function for LevenshteinFn {
16    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
17        self.signature.validate(args, ctx)?;
18        let s1 = args[0].as_str().unwrap();
19        let s2 = args[1].as_str().unwrap();
20        let dist = strsim::levenshtein(s1, s2);
21        Ok(number_value(dist as f64))
22    }
23}
24
25// normalized_levenshtein(s1, s2) -> number (0.0-1.0)
26defn!(
27    NormalizedLevenshteinFn,
28    vec![arg!(string), arg!(string)],
29    None
30);
31
32impl Function for NormalizedLevenshteinFn {
33    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
34        self.signature.validate(args, ctx)?;
35        let s1 = args[0].as_str().unwrap();
36        let s2 = args[1].as_str().unwrap();
37        let sim = strsim::normalized_levenshtein(s1, s2);
38        Ok(number_value(sim))
39    }
40}
41
42// damerau_levenshtein(s1, s2) -> number
43defn!(DamerauLevenshteinFn, vec![arg!(string), arg!(string)], None);
44
45impl Function for DamerauLevenshteinFn {
46    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
47        self.signature.validate(args, ctx)?;
48        let s1 = args[0].as_str().unwrap();
49        let s2 = args[1].as_str().unwrap();
50        let dist = strsim::damerau_levenshtein(s1, s2);
51        Ok(number_value(dist as f64))
52    }
53}
54
55// normalized_damerau_levenshtein(s1, s2) -> number (0.0-1.0)
56defn!(
57    NormalizedDamerauLevenshteinFn,
58    vec![arg!(string), arg!(string)],
59    None
60);
61
62impl Function for NormalizedDamerauLevenshteinFn {
63    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
64        self.signature.validate(args, ctx)?;
65        let s1 = args[0].as_str().unwrap();
66        let s2 = args[1].as_str().unwrap();
67        let sim = strsim::normalized_damerau_levenshtein(s1, s2);
68        Ok(number_value(sim))
69    }
70}
71
72// jaro(s1, s2) -> number (0.0-1.0)
73defn!(JaroFn, vec![arg!(string), arg!(string)], None);
74
75impl Function for JaroFn {
76    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
77        self.signature.validate(args, ctx)?;
78        let s1 = args[0].as_str().unwrap();
79        let s2 = args[1].as_str().unwrap();
80        let sim = strsim::jaro(s1, s2);
81        Ok(number_value(sim))
82    }
83}
84
85// jaro_winkler(s1, s2) -> number (0.0-1.0)
86defn!(JaroWinklerFn, vec![arg!(string), arg!(string)], None);
87
88impl Function for JaroWinklerFn {
89    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
90        self.signature.validate(args, ctx)?;
91        let s1 = args[0].as_str().unwrap();
92        let s2 = args[1].as_str().unwrap();
93        let sim = strsim::jaro_winkler(s1, s2);
94        Ok(number_value(sim))
95    }
96}
97
98// sorensen_dice(s1, s2) -> number (0.0-1.0)
99defn!(SorensenDiceFn, vec![arg!(string), arg!(string)], None);
100
101impl Function for SorensenDiceFn {
102    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
103        self.signature.validate(args, ctx)?;
104        let s1 = args[0].as_str().unwrap();
105        let s2 = args[1].as_str().unwrap();
106        let sim = strsim::sorensen_dice(s1, s2);
107        Ok(number_value(sim))
108    }
109}
110
111// hamming(s1, s2) -> number (returns null if strings have different lengths)
112defn!(HammingFn, vec![arg!(string), arg!(string)], None);
113
114impl Function for HammingFn {
115    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
116        self.signature.validate(args, ctx)?;
117        let s1 = args[0].as_str().unwrap();
118        let s2 = args[1].as_str().unwrap();
119        match strsim::hamming(s1, s2) {
120            Ok(dist) => Ok(number_value(dist as f64)),
121            Err(_) => Ok(Value::Null), // Different lengths
122        }
123    }
124}
125
126// osa_distance(s1, s2) -> number (Optimal String Alignment distance)
127defn!(OsaDistanceFn, vec![arg!(string), arg!(string)], None);
128
129impl Function for OsaDistanceFn {
130    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
131        self.signature.validate(args, ctx)?;
132        let s1 = args[0].as_str().unwrap();
133        let s2 = args[1].as_str().unwrap();
134        let dist = strsim::osa_distance(s1, s2);
135        Ok(number_value(dist as f64))
136    }
137}
138
139/// Register fuzzy matching functions filtered by the enabled set.
140pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
141    register_if_enabled(
142        runtime,
143        "levenshtein",
144        enabled,
145        Box::new(LevenshteinFn::new()),
146    );
147    register_if_enabled(
148        runtime,
149        "normalized_levenshtein",
150        enabled,
151        Box::new(NormalizedLevenshteinFn::new()),
152    );
153    register_if_enabled(
154        runtime,
155        "damerau_levenshtein",
156        enabled,
157        Box::new(DamerauLevenshteinFn::new()),
158    );
159    register_if_enabled(
160        runtime,
161        "normalized_damerau_levenshtein",
162        enabled,
163        Box::new(NormalizedDamerauLevenshteinFn::new()),
164    );
165    register_if_enabled(runtime, "jaro", enabled, Box::new(JaroFn::new()));
166    register_if_enabled(
167        runtime,
168        "jaro_winkler",
169        enabled,
170        Box::new(JaroWinklerFn::new()),
171    );
172    register_if_enabled(
173        runtime,
174        "sorensen_dice",
175        enabled,
176        Box::new(SorensenDiceFn::new()),
177    );
178    register_if_enabled(runtime, "hamming", enabled, Box::new(HammingFn::new()));
179    register_if_enabled(
180        runtime,
181        "osa_distance",
182        enabled,
183        Box::new(OsaDistanceFn::new()),
184    );
185}