1use std::collections::HashSet;
4
5use serde_json::{Number, Value};
6
7use crate::functions::{Function, number_value};
8use crate::interpreter::SearchResult;
9use crate::registry::register_if_enabled;
10use crate::{Context, Runtime, arg, defn};
11
12#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14enum MatchType {
15 Exact,
16 Prefix,
17 Contains,
18 Fuzzy,
19 None,
20}
21
22impl MatchType {
23 fn as_str(&self) -> &'static str {
24 match self {
25 MatchType::Exact => "exact",
26 MatchType::Prefix => "prefix",
27 MatchType::Contains => "contains",
28 MatchType::Fuzzy => "fuzzy",
29 MatchType::None => "none",
30 }
31 }
32
33 fn base_score(&self) -> i32 {
34 match self {
35 MatchType::Exact => 1000,
36 MatchType::Prefix => 800,
37 MatchType::Contains => 600,
38 MatchType::Fuzzy => 400,
39 MatchType::None => 0,
40 }
41 }
42}
43
44fn score_field(value: &str, query: &str, field_weight: i32) -> (i32, MatchType) {
46 let value_lower = value.to_lowercase();
47 let query_lower = query.to_lowercase();
48
49 let match_type = if value_lower == query_lower {
50 MatchType::Exact
51 } else if value_lower.starts_with(&query_lower) {
52 MatchType::Prefix
53 } else if value_lower.contains(&query_lower) {
54 MatchType::Contains
55 } else {
56 if query.len() >= 3 && value.len() >= 3 {
58 let similarity = strsim::jaro_winkler(&value_lower, &query_lower);
59 if similarity > 0.8 {
60 MatchType::Fuzzy
61 } else {
62 MatchType::None
63 }
64 } else {
65 MatchType::None
66 }
67 };
68
69 let score = match_type.base_score() * field_weight / 10;
70 (score, match_type)
71}
72
73fn score_item(
75 item: &Value,
76 query: &str,
77 fields: &[(String, i32)],
78) -> Option<(i32, String, String)> {
79 let obj = item.as_object()?;
80
81 let mut best_score = 0;
82 let mut best_match_type = MatchType::None;
83 let mut best_field = String::new();
84
85 for (field, weight) in fields {
86 if let Some(val) = obj.get(field.as_str()) {
87 let text = match val {
88 Value::String(s) => s.clone(),
89 Value::Array(arr) => {
90 arr.iter()
92 .filter_map(|v| v.as_str().map(|s| s.to_string()))
93 .collect::<Vec<_>>()
94 .join(" ")
95 }
96 _ => continue,
97 };
98
99 let (score, match_type) = score_field(&text, query, *weight);
100 if score > best_score {
101 best_score = score;
102 best_match_type = match_type;
103 best_field = field.clone();
104 }
105 }
106 }
107
108 if best_score > 0 {
109 Some((best_score, best_match_type.as_str().to_string(), best_field))
110 } else {
111 None
112 }
113}
114
115fn parse_fields(fields_arg: &Value) -> Result<Vec<(String, i32)>, String> {
117 match fields_arg {
118 Value::String(s) => {
119 Ok(s.split(',').map(|f| (f.trim().to_string(), 10)).collect())
121 }
122 Value::Object(obj) => {
123 let mut fields = Vec::new();
125 for (k, v) in obj.iter() {
126 let weight = v.as_f64().map(|n| n as i32).unwrap_or(10);
127 fields.push((k.clone(), weight));
128 }
129 Ok(fields)
130 }
131 _ => Err("fields must be a string or object".to_string()),
132 }
133}
134
135defn!(
138 FuzzySearchFn,
139 vec![arg!(array), arg!(any), arg!(string)],
140 None
141);
142
143impl Function for FuzzySearchFn {
144 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
145 self.signature.validate(args, ctx)?;
146
147 let array = args[0].as_array().unwrap();
148 let fields = parse_fields(&args[1]).map_err(|e| crate::functions::custom_error(ctx, &e))?;
149 let query = args[2].as_str().unwrap();
150
151 if query.is_empty() {
152 return Ok(Value::Array(vec![]));
153 }
154
155 let mut results: Vec<(i32, Value)> = Vec::new();
156
157 for item in array.iter() {
158 if let Some((score, match_type, matched_field)) = score_item(item, query, &fields) {
159 let mut result_obj = serde_json::Map::new();
160 result_obj.insert("item".to_string(), item.clone());
161 result_obj.insert("score".to_string(), Value::Number(Number::from(score)));
162 result_obj.insert("match_type".to_string(), Value::String(match_type));
163 result_obj.insert("matched_field".to_string(), Value::String(matched_field));
164
165 results.push((score, Value::Object(result_obj)));
166 }
167 }
168
169 results.sort_by(|a, b| b.0.cmp(&a.0));
171
172 let result_array: Vec<Value> = results.into_iter().map(|(_, item)| item).collect();
173 Ok(Value::Array(result_array))
174 }
175}
176
177defn!(FuzzyMatchFn, vec![arg!(string), arg!(string)], None);
180
181impl Function for FuzzyMatchFn {
182 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
183 self.signature.validate(args, ctx)?;
184
185 let value = args[0].as_str().unwrap();
186 let query = args[1].as_str().unwrap();
187
188 let (score, match_type) = score_field(value, query, 10);
189
190 let mut result = serde_json::Map::new();
191 result.insert("matches".to_string(), Value::Bool(score > 0));
192 result.insert("score".to_string(), Value::Number(Number::from(score)));
193 result.insert(
194 "match_type".to_string(),
195 Value::String(match_type.as_str().to_string()),
196 );
197
198 if match_type == MatchType::Fuzzy || match_type == MatchType::None {
200 let similarity = strsim::jaro_winkler(&value.to_lowercase(), &query.to_lowercase());
201 result.insert("similarity".to_string(), number_value(similarity));
202 }
203
204 Ok(Value::Object(result))
205 }
206}
207
208defn!(FuzzyScoreFn, vec![arg!(string), arg!(string)], None);
211
212impl Function for FuzzyScoreFn {
213 fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
214 self.signature.validate(args, ctx)?;
215
216 let value = args[0].as_str().unwrap();
217 let query = args[1].as_str().unwrap();
218
219 let (score, _) = score_field(value, query, 10);
220
221 Ok(Value::Number(Number::from(score)))
222 }
223}
224
225pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
227 register_if_enabled(
228 runtime,
229 "fuzzy_search",
230 enabled,
231 Box::new(FuzzySearchFn::new()),
232 );
233 register_if_enabled(
234 runtime,
235 "fuzzy_match",
236 enabled,
237 Box::new(FuzzyMatchFn::new()),
238 );
239 register_if_enabled(
240 runtime,
241 "fuzzy_score",
242 enabled,
243 Box::new(FuzzyScoreFn::new()),
244 );
245}
246
247#[cfg(test)]
248mod tests {
249 use crate::Runtime;
250 use serde_json::json;
251
252 fn setup_runtime() -> Runtime {
253 Runtime::builder()
254 .with_standard()
255 .with_all_extensions()
256 .build()
257 }
258
259 #[test]
260 fn test_fuzzy_search_exact_match() {
261 let runtime = setup_runtime();
262 let data = json!([
263 {"name": "get_user", "description": "Get a user by ID"},
264 {"name": "create_user", "description": "Create a new user"},
265 {"name": "delete_user", "description": "Delete a user"}
266 ]);
267
268 let expr = runtime
269 .compile("fuzzy_search(@, 'name,description', 'get_user')")
270 .unwrap();
271 let result = expr.search(&data).unwrap();
272 let arr = result.as_array().unwrap();
273
274 assert_eq!(arr.len(), 1);
275 let first = arr[0].as_object().unwrap();
276 assert_eq!(first.get("match_type").unwrap().as_str().unwrap(), "exact");
277 }
278
279 #[test]
280 fn test_fuzzy_search_prefix_match() {
281 let runtime = setup_runtime();
282 let data = json!([
283 {"name": "get_user", "description": "Get a user"},
284 {"name": "get_cluster", "description": "Get cluster info"},
285 {"name": "create_user", "description": "Create user"}
286 ]);
287
288 let expr = runtime.compile("fuzzy_search(@, 'name', 'get')").unwrap();
289 let result = expr.search(&data).unwrap();
290 let arr = result.as_array().unwrap();
291
292 assert_eq!(arr.len(), 2);
293 for item in arr {
294 let obj = item.as_object().unwrap();
295 assert_eq!(obj.get("match_type").unwrap().as_str().unwrap(), "prefix");
296 }
297 }
298
299 #[test]
300 fn test_fuzzy_search_contains_match() {
301 let runtime = setup_runtime();
302 let data = json!([
303 {"name": "get_user_info", "description": "Get user information"},
304 {"name": "create_user", "description": "Create a user"},
305 {"name": "list_items", "description": "List all items"}
306 ]);
307
308 let expr = runtime.compile("fuzzy_search(@, 'name', 'user')").unwrap();
309 let result = expr.search(&data).unwrap();
310 let arr = result.as_array().unwrap();
311
312 assert_eq!(arr.len(), 2);
313 }
314
315 #[test]
316 fn test_fuzzy_search_description_match() {
317 let runtime = setup_runtime();
318 let data = json!([
319 {"name": "foo", "description": "Manage database connections"},
320 {"name": "bar", "description": "Handle user requests"},
321 {"name": "baz", "description": "Process data"}
322 ]);
323
324 let expr = runtime
325 .compile("fuzzy_search(@, 'name,description', 'database')")
326 .unwrap();
327 let result = expr.search(&data).unwrap();
328 let arr = result.as_array().unwrap();
329
330 assert_eq!(arr.len(), 1);
331 let first = arr[0].as_object().unwrap();
332 assert_eq!(
333 first.get("matched_field").unwrap().as_str().unwrap(),
334 "description"
335 );
336 }
337
338 #[test]
339 fn test_fuzzy_search_with_weights() {
340 let runtime = setup_runtime();
341 let data = json!([
342 {"name": "user_search", "description": "Search for items"},
343 {"name": "item_list", "description": "List all users"}
344 ]);
345
346 let expr = runtime
348 .compile("fuzzy_search(@, `{\"name\": 10, \"description\": 5}`, 'user')")
349 .unwrap();
350 let result = expr.search(&data).unwrap();
351 let arr = result.as_array().unwrap();
352
353 assert_eq!(arr.len(), 2);
354 let first = arr[0].as_object().unwrap();
355 let first_item = first.get("item").unwrap().as_object().unwrap();
356 assert_eq!(
357 first_item.get("name").unwrap().as_str().unwrap(),
358 "user_search"
359 );
360 }
361
362 #[test]
363 fn test_fuzzy_search_no_results() {
364 let runtime = setup_runtime();
365 let data = json!([
366 {"name": "foo", "description": "bar"},
367 {"name": "baz", "description": "qux"}
368 ]);
369
370 let expr = runtime
371 .compile("fuzzy_search(@, 'name,description', 'nonexistent')")
372 .unwrap();
373 let result = expr.search(&data).unwrap();
374 let arr = result.as_array().unwrap();
375
376 assert!(arr.is_empty());
377 }
378
379 #[test]
380 fn test_fuzzy_search_with_tags_array() {
381 let runtime = setup_runtime();
382 let data = json!([
383 {"name": "tool1", "tags": ["database", "sql"]},
384 {"name": "tool2", "tags": ["cache", "redis"]},
385 {"name": "tool3", "tags": ["api", "rest"]}
386 ]);
387
388 let expr = runtime
389 .compile("fuzzy_search(@, 'name,tags', 'redis')")
390 .unwrap();
391 let result = expr.search(&data).unwrap();
392 let arr = result.as_array().unwrap();
393
394 assert_eq!(arr.len(), 1);
395 let first = arr[0].as_object().unwrap();
396 let first_item = first.get("item").unwrap().as_object().unwrap();
397 assert_eq!(first_item.get("name").unwrap().as_str().unwrap(), "tool2");
398 }
399
400 #[test]
401 fn test_fuzzy_match_exact() {
402 let runtime = setup_runtime();
403 let expr = runtime.compile("fuzzy_match('hello', 'hello')").unwrap();
404 let result = expr.search(&json!(null)).unwrap();
405 let obj = result.as_object().unwrap();
406
407 assert!(obj.get("matches").unwrap().as_bool().unwrap());
408 assert_eq!(obj.get("match_type").unwrap().as_str().unwrap(), "exact");
409 assert_eq!(obj.get("score").unwrap().as_f64().unwrap() as i32, 1000);
410 }
411
412 #[test]
413 fn test_fuzzy_match_prefix() {
414 let runtime = setup_runtime();
415 let expr = runtime
416 .compile("fuzzy_match('hello_world', 'hello')")
417 .unwrap();
418 let result = expr.search(&json!(null)).unwrap();
419 let obj = result.as_object().unwrap();
420
421 assert!(obj.get("matches").unwrap().as_bool().unwrap());
422 assert_eq!(obj.get("match_type").unwrap().as_str().unwrap(), "prefix");
423 }
424
425 #[test]
426 fn test_fuzzy_match_no_match() {
427 let runtime = setup_runtime();
428 let expr = runtime.compile("fuzzy_match('hello', 'xyz')").unwrap();
429 let result = expr.search(&json!(null)).unwrap();
430 let obj = result.as_object().unwrap();
431
432 assert!(!obj.get("matches").unwrap().as_bool().unwrap());
433 assert_eq!(obj.get("match_type").unwrap().as_str().unwrap(), "none");
434 }
435
436 #[test]
437 fn test_fuzzy_score() {
438 let runtime = setup_runtime();
439
440 let expr = runtime.compile("fuzzy_score('hello', 'hello')").unwrap();
442 let exact = expr.search(&json!(null)).unwrap();
443
444 let expr = runtime
446 .compile("fuzzy_score('hello_world', 'hello')")
447 .unwrap();
448 let prefix = expr.search(&json!(null)).unwrap();
449
450 let expr = runtime
452 .compile("fuzzy_score('say_hello_world', 'hello')")
453 .unwrap();
454 let contains = expr.search(&json!(null)).unwrap();
455
456 assert!(exact.as_f64().unwrap() > prefix.as_f64().unwrap());
457 assert!(prefix.as_f64().unwrap() > contains.as_f64().unwrap());
458 }
459
460 #[test]
461 fn test_fuzzy_search_case_insensitive() {
462 let runtime = setup_runtime();
463 let data = json!([
464 {"name": "GetUser", "description": "GET user data"},
465 {"name": "createuser", "description": "create USER"}
466 ]);
467
468 let expr = runtime
469 .compile("fuzzy_search(@, 'name,description', 'USER')")
470 .unwrap();
471 let result = expr.search(&data).unwrap();
472 let arr = result.as_array().unwrap();
473
474 assert_eq!(arr.len(), 2);
476 }
477}