Skip to main content

jpx_core/extensions/
discovery.rs

1//! Discovery and search functions for JSON arrays.
2
3use std::collections::HashSet;
4
5use serde_json::{Number, Value};
6
7use crate::functions::{Function, number_value};
8use crate::interpreter::SearchResult;
9use crate::registry::register_if_enabled;
10use crate::{Context, Runtime, arg, defn};
11
12/// Match types for scoring, in order of relevance
13#[derive(Debug, Clone, Copy, PartialEq, Eq)]
14enum MatchType {
15    Exact,
16    Prefix,
17    Contains,
18    Fuzzy,
19    None,
20}
21
22impl MatchType {
23    fn as_str(&self) -> &'static str {
24        match self {
25            MatchType::Exact => "exact",
26            MatchType::Prefix => "prefix",
27            MatchType::Contains => "contains",
28            MatchType::Fuzzy => "fuzzy",
29            MatchType::None => "none",
30        }
31    }
32
33    fn base_score(&self) -> i32 {
34        match self {
35            MatchType::Exact => 1000,
36            MatchType::Prefix => 800,
37            MatchType::Contains => 600,
38            MatchType::Fuzzy => 400,
39            MatchType::None => 0,
40        }
41    }
42}
43
44/// Calculate match score for a single field value against query
45fn score_field(value: &str, query: &str, field_weight: i32) -> (i32, MatchType) {
46    let value_lower = value.to_lowercase();
47    let query_lower = query.to_lowercase();
48
49    let match_type = if value_lower == query_lower {
50        MatchType::Exact
51    } else if value_lower.starts_with(&query_lower) {
52        MatchType::Prefix
53    } else if value_lower.contains(&query_lower) {
54        MatchType::Contains
55    } else {
56        // Try fuzzy matching for longer strings
57        if query.len() >= 3 && value.len() >= 3 {
58            let similarity = strsim::jaro_winkler(&value_lower, &query_lower);
59            if similarity > 0.8 {
60                MatchType::Fuzzy
61            } else {
62                MatchType::None
63            }
64        } else {
65            MatchType::None
66        }
67    };
68
69    let score = match_type.base_score() * field_weight / 10;
70    (score, match_type)
71}
72
73/// Score an item against a query across multiple fields
74fn score_item(
75    item: &Value,
76    query: &str,
77    fields: &[(String, i32)],
78) -> Option<(i32, String, String)> {
79    let obj = item.as_object()?;
80
81    let mut best_score = 0;
82    let mut best_match_type = MatchType::None;
83    let mut best_field = String::new();
84
85    for (field, weight) in fields {
86        if let Some(val) = obj.get(field.as_str()) {
87            let text = match val {
88                Value::String(s) => s.clone(),
89                Value::Array(arr) => {
90                    // For arrays (like tags), join and search
91                    arr.iter()
92                        .filter_map(|v| v.as_str().map(|s| s.to_string()))
93                        .collect::<Vec<_>>()
94                        .join(" ")
95                }
96                _ => continue,
97            };
98
99            let (score, match_type) = score_field(&text, query, *weight);
100            if score > best_score {
101                best_score = score;
102                best_match_type = match_type;
103                best_field = field.clone();
104            }
105        }
106    }
107
108    if best_score > 0 {
109        Some((best_score, best_match_type.as_str().to_string(), best_field))
110    } else {
111        None
112    }
113}
114
115/// Parse field specification - either a string "name,description" or object {"name": 10, "description": 5}
116fn parse_fields(fields_arg: &Value) -> Result<Vec<(String, i32)>, String> {
117    match fields_arg {
118        Value::String(s) => {
119            // Simple comma-separated list with default weight of 10
120            Ok(s.split(',').map(|f| (f.trim().to_string(), 10)).collect())
121        }
122        Value::Object(obj) => {
123            // Object with field weights
124            let mut fields = Vec::new();
125            for (k, v) in obj.iter() {
126                let weight = v.as_f64().map(|n| n as i32).unwrap_or(10);
127                fields.push((k.clone(), weight));
128            }
129            Ok(fields)
130        }
131        _ => Err("fields must be a string or object".to_string()),
132    }
133}
134
135// fuzzy_search(array, fields, query) -> array
136// Search an array of objects, returning matches sorted by relevance
137defn!(
138    FuzzySearchFn,
139    vec![arg!(array), arg!(any), arg!(string)],
140    None
141);
142
143impl Function for FuzzySearchFn {
144    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
145        self.signature.validate(args, ctx)?;
146
147        let array = args[0].as_array().unwrap();
148        let fields = parse_fields(&args[1]).map_err(|e| crate::functions::custom_error(ctx, &e))?;
149        let query = args[2].as_str().unwrap();
150
151        if query.is_empty() {
152            return Ok(Value::Array(vec![]));
153        }
154
155        let mut results: Vec<(i32, Value)> = Vec::new();
156
157        for item in array.iter() {
158            if let Some((score, match_type, matched_field)) = score_item(item, query, &fields) {
159                let mut result_obj = serde_json::Map::new();
160                result_obj.insert("item".to_string(), item.clone());
161                result_obj.insert("score".to_string(), Value::Number(Number::from(score)));
162                result_obj.insert("match_type".to_string(), Value::String(match_type));
163                result_obj.insert("matched_field".to_string(), Value::String(matched_field));
164
165                results.push((score, Value::Object(result_obj)));
166            }
167        }
168
169        // Sort by score descending
170        results.sort_by(|a, b| b.0.cmp(&a.0));
171
172        let result_array: Vec<Value> = results.into_iter().map(|(_, item)| item).collect();
173        Ok(Value::Array(result_array))
174    }
175}
176
177// fuzzy_match(value, query) -> object
178// Check if a single value matches a query, returning match details
179defn!(FuzzyMatchFn, vec![arg!(string), arg!(string)], None);
180
181impl Function for FuzzyMatchFn {
182    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
183        self.signature.validate(args, ctx)?;
184
185        let value = args[0].as_str().unwrap();
186        let query = args[1].as_str().unwrap();
187
188        let (score, match_type) = score_field(value, query, 10);
189
190        let mut result = serde_json::Map::new();
191        result.insert("matches".to_string(), Value::Bool(score > 0));
192        result.insert("score".to_string(), Value::Number(Number::from(score)));
193        result.insert(
194            "match_type".to_string(),
195            Value::String(match_type.as_str().to_string()),
196        );
197
198        // Add similarity score for fuzzy matches
199        if match_type == MatchType::Fuzzy || match_type == MatchType::None {
200            let similarity = strsim::jaro_winkler(&value.to_lowercase(), &query.to_lowercase());
201            result.insert("similarity".to_string(), number_value(similarity));
202        }
203
204        Ok(Value::Object(result))
205    }
206}
207
208// fuzzy_score(value, query) -> number
209// Simple scoring function that returns just the match score
210defn!(FuzzyScoreFn, vec![arg!(string), arg!(string)], None);
211
212impl Function for FuzzyScoreFn {
213    fn evaluate(&self, args: &[Value], ctx: &mut Context<'_>) -> SearchResult {
214        self.signature.validate(args, ctx)?;
215
216        let value = args[0].as_str().unwrap();
217        let query = args[1].as_str().unwrap();
218
219        let (score, _) = score_field(value, query, 10);
220
221        Ok(Value::Number(Number::from(score)))
222    }
223}
224
225/// Register discovery functions filtered by the enabled set.
226pub fn register_filtered(runtime: &mut Runtime, enabled: &HashSet<&str>) {
227    register_if_enabled(
228        runtime,
229        "fuzzy_search",
230        enabled,
231        Box::new(FuzzySearchFn::new()),
232    );
233    register_if_enabled(
234        runtime,
235        "fuzzy_match",
236        enabled,
237        Box::new(FuzzyMatchFn::new()),
238    );
239    register_if_enabled(
240        runtime,
241        "fuzzy_score",
242        enabled,
243        Box::new(FuzzyScoreFn::new()),
244    );
245}
246
247#[cfg(test)]
248mod tests {
249    use crate::Runtime;
250    use serde_json::json;
251
252    fn setup_runtime() -> Runtime {
253        Runtime::builder()
254            .with_standard()
255            .with_all_extensions()
256            .build()
257    }
258
259    #[test]
260    fn test_fuzzy_search_exact_match() {
261        let runtime = setup_runtime();
262        let data = json!([
263            {"name": "get_user", "description": "Get a user by ID"},
264            {"name": "create_user", "description": "Create a new user"},
265            {"name": "delete_user", "description": "Delete a user"}
266        ]);
267
268        let expr = runtime
269            .compile("fuzzy_search(@, 'name,description', 'get_user')")
270            .unwrap();
271        let result = expr.search(&data).unwrap();
272        let arr = result.as_array().unwrap();
273
274        assert_eq!(arr.len(), 1);
275        let first = arr[0].as_object().unwrap();
276        assert_eq!(first.get("match_type").unwrap().as_str().unwrap(), "exact");
277    }
278
279    #[test]
280    fn test_fuzzy_search_prefix_match() {
281        let runtime = setup_runtime();
282        let data = json!([
283            {"name": "get_user", "description": "Get a user"},
284            {"name": "get_cluster", "description": "Get cluster info"},
285            {"name": "create_user", "description": "Create user"}
286        ]);
287
288        let expr = runtime.compile("fuzzy_search(@, 'name', 'get')").unwrap();
289        let result = expr.search(&data).unwrap();
290        let arr = result.as_array().unwrap();
291
292        assert_eq!(arr.len(), 2);
293        for item in arr {
294            let obj = item.as_object().unwrap();
295            assert_eq!(obj.get("match_type").unwrap().as_str().unwrap(), "prefix");
296        }
297    }
298
299    #[test]
300    fn test_fuzzy_search_contains_match() {
301        let runtime = setup_runtime();
302        let data = json!([
303            {"name": "get_user_info", "description": "Get user information"},
304            {"name": "create_user", "description": "Create a user"},
305            {"name": "list_items", "description": "List all items"}
306        ]);
307
308        let expr = runtime.compile("fuzzy_search(@, 'name', 'user')").unwrap();
309        let result = expr.search(&data).unwrap();
310        let arr = result.as_array().unwrap();
311
312        assert_eq!(arr.len(), 2);
313    }
314
315    #[test]
316    fn test_fuzzy_search_description_match() {
317        let runtime = setup_runtime();
318        let data = json!([
319            {"name": "foo", "description": "Manage database connections"},
320            {"name": "bar", "description": "Handle user requests"},
321            {"name": "baz", "description": "Process data"}
322        ]);
323
324        let expr = runtime
325            .compile("fuzzy_search(@, 'name,description', 'database')")
326            .unwrap();
327        let result = expr.search(&data).unwrap();
328        let arr = result.as_array().unwrap();
329
330        assert_eq!(arr.len(), 1);
331        let first = arr[0].as_object().unwrap();
332        assert_eq!(
333            first.get("matched_field").unwrap().as_str().unwrap(),
334            "description"
335        );
336    }
337
338    #[test]
339    fn test_fuzzy_search_with_weights() {
340        let runtime = setup_runtime();
341        let data = json!([
342            {"name": "user_search", "description": "Search for items"},
343            {"name": "item_list", "description": "List all users"}
344        ]);
345
346        // With higher weight on name, "user_search" should rank higher
347        let expr = runtime
348            .compile("fuzzy_search(@, `{\"name\": 10, \"description\": 5}`, 'user')")
349            .unwrap();
350        let result = expr.search(&data).unwrap();
351        let arr = result.as_array().unwrap();
352
353        assert_eq!(arr.len(), 2);
354        let first = arr[0].as_object().unwrap();
355        let first_item = first.get("item").unwrap().as_object().unwrap();
356        assert_eq!(
357            first_item.get("name").unwrap().as_str().unwrap(),
358            "user_search"
359        );
360    }
361
362    #[test]
363    fn test_fuzzy_search_no_results() {
364        let runtime = setup_runtime();
365        let data = json!([
366            {"name": "foo", "description": "bar"},
367            {"name": "baz", "description": "qux"}
368        ]);
369
370        let expr = runtime
371            .compile("fuzzy_search(@, 'name,description', 'nonexistent')")
372            .unwrap();
373        let result = expr.search(&data).unwrap();
374        let arr = result.as_array().unwrap();
375
376        assert!(arr.is_empty());
377    }
378
379    #[test]
380    fn test_fuzzy_search_with_tags_array() {
381        let runtime = setup_runtime();
382        let data = json!([
383            {"name": "tool1", "tags": ["database", "sql"]},
384            {"name": "tool2", "tags": ["cache", "redis"]},
385            {"name": "tool3", "tags": ["api", "rest"]}
386        ]);
387
388        let expr = runtime
389            .compile("fuzzy_search(@, 'name,tags', 'redis')")
390            .unwrap();
391        let result = expr.search(&data).unwrap();
392        let arr = result.as_array().unwrap();
393
394        assert_eq!(arr.len(), 1);
395        let first = arr[0].as_object().unwrap();
396        let first_item = first.get("item").unwrap().as_object().unwrap();
397        assert_eq!(first_item.get("name").unwrap().as_str().unwrap(), "tool2");
398    }
399
400    #[test]
401    fn test_fuzzy_match_exact() {
402        let runtime = setup_runtime();
403        let expr = runtime.compile("fuzzy_match('hello', 'hello')").unwrap();
404        let result = expr.search(&json!(null)).unwrap();
405        let obj = result.as_object().unwrap();
406
407        assert!(obj.get("matches").unwrap().as_bool().unwrap());
408        assert_eq!(obj.get("match_type").unwrap().as_str().unwrap(), "exact");
409        assert_eq!(obj.get("score").unwrap().as_f64().unwrap() as i32, 1000);
410    }
411
412    #[test]
413    fn test_fuzzy_match_prefix() {
414        let runtime = setup_runtime();
415        let expr = runtime
416            .compile("fuzzy_match('hello_world', 'hello')")
417            .unwrap();
418        let result = expr.search(&json!(null)).unwrap();
419        let obj = result.as_object().unwrap();
420
421        assert!(obj.get("matches").unwrap().as_bool().unwrap());
422        assert_eq!(obj.get("match_type").unwrap().as_str().unwrap(), "prefix");
423    }
424
425    #[test]
426    fn test_fuzzy_match_no_match() {
427        let runtime = setup_runtime();
428        let expr = runtime.compile("fuzzy_match('hello', 'xyz')").unwrap();
429        let result = expr.search(&json!(null)).unwrap();
430        let obj = result.as_object().unwrap();
431
432        assert!(!obj.get("matches").unwrap().as_bool().unwrap());
433        assert_eq!(obj.get("match_type").unwrap().as_str().unwrap(), "none");
434    }
435
436    #[test]
437    fn test_fuzzy_score() {
438        let runtime = setup_runtime();
439
440        // Exact match should score highest
441        let expr = runtime.compile("fuzzy_score('hello', 'hello')").unwrap();
442        let exact = expr.search(&json!(null)).unwrap();
443
444        // Prefix should score lower
445        let expr = runtime
446            .compile("fuzzy_score('hello_world', 'hello')")
447            .unwrap();
448        let prefix = expr.search(&json!(null)).unwrap();
449
450        // Contains should score even lower
451        let expr = runtime
452            .compile("fuzzy_score('say_hello_world', 'hello')")
453            .unwrap();
454        let contains = expr.search(&json!(null)).unwrap();
455
456        assert!(exact.as_f64().unwrap() > prefix.as_f64().unwrap());
457        assert!(prefix.as_f64().unwrap() > contains.as_f64().unwrap());
458    }
459
460    #[test]
461    fn test_fuzzy_search_case_insensitive() {
462        let runtime = setup_runtime();
463        let data = json!([
464            {"name": "GetUser", "description": "GET user data"},
465            {"name": "createuser", "description": "create USER"}
466        ]);
467
468        let expr = runtime
469            .compile("fuzzy_search(@, 'name,description', 'USER')")
470            .unwrap();
471        let result = expr.search(&data).unwrap();
472        let arr = result.as_array().unwrap();
473
474        // Should find both (case-insensitive)
475        assert_eq!(arr.len(), 2);
476    }
477}