1use crate::distance::{find_closest, Algorithm};
7use crate::error::FuzzyError;
8use crate::schema::{ObjectSchema, TaggedEnumSchema};
9use serde_json::{Map, Value};
10
11#[derive(Debug, Clone)]
13pub struct FuzzyOptions {
14 pub min_similarity: f64,
19
20 pub algorithm: Algorithm,
24}
25
26impl Default for FuzzyOptions {
27 fn default() -> Self {
28 Self {
29 min_similarity: 0.7,
30 algorithm: Algorithm::JaroWinkler,
31 }
32 }
33}
34
35impl FuzzyOptions {
36 pub fn with_min_similarity(mut self, min_similarity: f64) -> Self {
38 self.min_similarity = min_similarity;
39 self
40 }
41
42 pub fn with_algorithm(mut self, algorithm: Algorithm) -> Self {
44 self.algorithm = algorithm;
45 self
46 }
47}
48
49#[derive(Debug, Clone, PartialEq)]
51pub struct Correction {
52 pub original: String,
54 pub corrected: String,
56 pub similarity: f64,
58 pub field_path: String,
60}
61
62impl Correction {
63 pub fn new(original: String, corrected: String, similarity: f64, field_path: String) -> Self {
65 Self {
66 original,
67 corrected,
68 similarity,
69 field_path,
70 }
71 }
72}
73
74#[derive(Debug, Clone)]
76pub struct RepairResult {
77 pub repaired: Value,
79 pub corrections: Vec<Correction>,
81}
82
83impl RepairResult {
84 pub fn has_corrections(&self) -> bool {
86 !self.corrections.is_empty()
87 }
88
89 pub fn correction_count(&self) -> usize {
91 self.corrections.len()
92 }
93}
94
95pub fn repair_object_fields(
103 obj: &mut Map<String, Value>,
104 schema: &ObjectSchema,
105 path: &str,
106 options: &FuzzyOptions,
107) -> Vec<Correction> {
108 repair_fields_with_list(obj, schema.valid_fields, path, options)
109}
110
111pub fn repair_fields_with_list(
115 obj: &mut Map<String, Value>,
116 valid_fields: &[&str],
117 path: &str,
118 options: &FuzzyOptions,
119) -> Vec<Correction> {
120 let mut corrections = Vec::new();
121
122 let keys_to_check: Vec<String> = obj
124 .keys()
125 .filter(|k| !valid_fields.contains(&k.as_str()))
126 .cloned()
127 .collect();
128
129 for key in keys_to_check {
131 if let Some(m) = find_closest(
132 &key,
133 valid_fields.iter().copied(),
134 options.min_similarity,
135 options.algorithm,
136 ) {
137 if !obj.contains_key(&m.candidate) {
139 if let Some(val) = obj.remove(&key) {
140 corrections.push(Correction::new(
141 key.clone(),
142 m.candidate.clone(),
143 m.similarity,
144 format!("{}.{}", path, key),
145 ));
146 obj.insert(m.candidate, val);
147 }
148 }
149 }
150 }
151
152 corrections
153}
154
155pub fn repair_tagged_enum<F>(
165 obj: &mut Map<String, Value>,
166 schema: &TaggedEnumSchema<F>,
167 path: &str,
168 options: &FuzzyOptions,
169) -> Vec<Correction>
170where
171 F: Fn(&str) -> Option<&'static [&'static str]>,
172{
173 let mut corrections = Vec::new();
174
175 let tag_value = if let Some(tag_val) = obj.get(schema.tag_field).and_then(|v| v.as_str()) {
177 if !schema.is_valid_tag(tag_val) {
178 if let Some(m) = find_closest(
180 tag_val,
181 schema.valid_tags.iter().copied(),
182 options.min_similarity,
183 options.algorithm,
184 ) {
185 corrections.push(Correction::new(
186 tag_val.to_string(),
187 m.candidate.clone(),
188 m.similarity,
189 format!("{}.{}", path, schema.tag_field),
190 ));
191 obj.insert(
192 schema.tag_field.to_string(),
193 Value::String(m.candidate.clone()),
194 );
195 m.candidate
196 } else {
197 tag_val.to_string()
198 }
199 } else {
200 tag_val.to_string()
201 }
202 } else {
203 return corrections; };
205
206 if let Some(valid_fields) = schema.get_fields(&tag_value) {
208 let keys_to_check: Vec<String> = obj
210 .keys()
211 .filter(|k| *k != schema.tag_field && !valid_fields.contains(&k.as_str()))
212 .cloned()
213 .collect();
214
215 for key in keys_to_check {
216 if let Some(m) = find_closest(
217 &key,
218 valid_fields.iter().copied(),
219 options.min_similarity,
220 options.algorithm,
221 ) {
222 if !obj.contains_key(&m.candidate) {
223 if let Some(val) = obj.remove(&key) {
224 corrections.push(Correction::new(
225 key.clone(),
226 m.candidate.clone(),
227 m.similarity,
228 format!("{}.{}", path, key),
229 ));
230 obj.insert(m.candidate, val);
231 }
232 }
233 }
234 }
235 }
236
237 for (field_name, valid_values) in &schema.enum_arrays {
239 if let Some(Value::Array(arr)) = obj.get_mut(*field_name) {
240 let field_path = format!("{}.{}", path, field_name);
241 let arr_corrections = repair_enum_array(arr, valid_values, &field_path, options);
242 corrections.extend(arr_corrections);
243 }
244 }
245
246 for (field_name, valid_fields) in &schema.nested_objects {
248 if let Some(Value::Object(nested_obj)) = obj.get_mut(*field_name) {
249 let nested_path = format!("{}.{}", path, field_name);
250 let nested_corrections =
251 repair_fields_with_list(nested_obj, valid_fields, &nested_path, options);
252 corrections.extend(nested_corrections);
253 }
254 }
255
256 corrections
257}
258
259pub fn repair_enum_array(
263 arr: &mut [Value],
264 valid_values: &[&str],
265 path: &str,
266 options: &FuzzyOptions,
267) -> Vec<Correction> {
268 let mut corrections = Vec::new();
269
270 for (i, item) in arr.iter_mut().enumerate() {
271 if let Value::String(s) = item {
272 if !valid_values.contains(&s.as_str()) {
273 if let Some(m) = find_closest(
274 s,
275 valid_values.iter().copied(),
276 options.min_similarity,
277 options.algorithm,
278 ) {
279 corrections.push(Correction::new(
280 s.clone(),
281 m.candidate.clone(),
282 m.similarity,
283 format!("{}[{}]", path, i),
284 ));
285 *item = Value::String(m.candidate);
286 }
287 }
288 }
289 }
290
291 corrections
292}
293
294pub fn repair_tagged_enum_json<F>(
296 json: &str,
297 schema: &TaggedEnumSchema<F>,
298 options: &FuzzyOptions,
299) -> Result<RepairResult, FuzzyError>
300where
301 F: Fn(&str) -> Option<&'static [&'static str]>,
302{
303 let mut value: Value = serde_json::from_str(json)?;
304
305 let corrections = if let Some(obj) = value.as_object_mut() {
306 repair_tagged_enum(obj, schema, "$", options)
307 } else {
308 return Err(FuzzyError::NotObject);
309 };
310
311 Ok(RepairResult {
312 repaired: value,
313 corrections,
314 })
315}
316
317pub fn repair_tagged_enum_array<F>(
319 arr: &mut [Value],
320 schema: &TaggedEnumSchema<F>,
321 path: &str,
322 options: &FuzzyOptions,
323) -> Vec<Correction>
324where
325 F: Fn(&str) -> Option<&'static [&'static str]>,
326{
327 let mut all_corrections = Vec::new();
328
329 for (i, item) in arr.iter_mut().enumerate() {
330 if let Some(obj) = item.as_object_mut() {
331 let item_path = format!("{}[{}]", path, i);
332 let corrections = repair_tagged_enum(obj, schema, &item_path, options);
333 all_corrections.extend(corrections);
334 }
335 }
336
337 all_corrections
338}
339
340#[cfg(test)]
341mod tests {
342 use super::*;
343
344 fn test_schema() -> TaggedEnumSchema<fn(&str) -> Option<&'static [&'static str]>> {
345 TaggedEnumSchema::new(
346 "type",
347 &["AddDerive", "RemoveDerive", "RenameIdent"],
348 |tag| match tag {
349 "AddDerive" | "RemoveDerive" => Some(&["target", "derives"]),
350 "RenameIdent" => Some(&["from", "to", "kind"]),
351 _ => None,
352 },
353 )
354 }
355
356 #[test]
357 fn test_repair_tagged_enum_type_typo() {
358 let schema = test_schema();
359 let json = r#"{"type": "AddDeriv", "target": "User", "derives": ["Debug"]}"#;
360 let options = FuzzyOptions::default();
361
362 let result = repair_tagged_enum_json(json, &schema, &options).unwrap();
363
364 assert_eq!(result.repaired["type"], "AddDerive");
365 assert_eq!(result.corrections.len(), 1);
366 assert_eq!(result.corrections[0].original, "AddDeriv");
367 assert_eq!(result.corrections[0].corrected, "AddDerive");
368 }
369
370 #[test]
371 fn test_repair_tagged_enum_field_typo() {
372 let schema = test_schema();
373 let json = r#"{"type": "AddDerive", "taget": "User", "derives": ["Debug"]}"#;
374 let options = FuzzyOptions::default();
375
376 let result = repair_tagged_enum_json(json, &schema, &options).unwrap();
377
378 assert!(result.repaired.get("target").is_some());
379 assert!(result.repaired.get("taget").is_none());
380 assert_eq!(result.corrections.len(), 1);
381 }
382
383 #[test]
384 fn test_repair_tagged_enum_multiple_typos() {
385 let schema = test_schema();
386 let json = r#"{"type": "RenamIdent", "form": "old", "too": "new"}"#;
387 let options = FuzzyOptions::default();
388
389 let result = repair_tagged_enum_json(json, &schema, &options).unwrap();
390
391 assert_eq!(result.repaired["type"], "RenameIdent");
392 assert!(result.repaired.get("from").is_some());
393 assert!(result.repaired.get("to").is_some());
394 assert_eq!(result.corrections.len(), 3);
395 }
396
397 #[test]
398 fn test_repair_object_fields() {
399 let schema = ObjectSchema::new(&["name", "module", "derives"]);
400 let mut obj: Map<String, Value> =
401 serde_json::from_str(r#"{"nam": "Test", "modul": "foo"}"#).unwrap();
402 let options = FuzzyOptions::default();
403
404 let corrections = repair_object_fields(&mut obj, &schema, "$", &options);
405
406 assert!(obj.contains_key("name"));
407 assert!(obj.contains_key("module"));
408 assert_eq!(corrections.len(), 2);
409 }
410
411 #[test]
412 fn test_no_correction_needed() {
413 let schema = test_schema();
414 let json = r#"{"type": "AddDerive", "target": "User", "derives": ["Debug"]}"#;
415 let options = FuzzyOptions::default();
416
417 let result = repair_tagged_enum_json(json, &schema, &options).unwrap();
418
419 assert!(!result.has_corrections());
420 }
421
422 #[test]
423 fn test_high_similarity_threshold() {
424 let schema = test_schema();
425 let json = r#"{"type": "AddDeriv", "target": "User", "derives": ["Debug"]}"#;
426 let options = FuzzyOptions::default().with_min_similarity(0.99);
427
428 let result = repair_tagged_enum_json(json, &schema, &options).unwrap();
429
430 assert_eq!(result.repaired["type"], "AddDeriv");
432 assert!(!result.has_corrections());
433 }
434
435 #[test]
436 fn test_repair_array() {
437 let schema = test_schema();
438 let mut arr: Vec<Value> = serde_json::from_str(
439 r#"[
440 {"type": "AddDeriv", "taget": "User", "derives": ["Debug"]},
441 {"type": "RenamIdent", "form": "old", "too": "new"}
442 ]"#,
443 )
444 .unwrap();
445 let options = FuzzyOptions::default();
446
447 let corrections = repair_tagged_enum_array(&mut arr, &schema, "$.intents", &options);
448
449 assert_eq!(arr[0]["type"], "AddDerive");
450 assert!(arr[0].get("target").is_some());
451 assert_eq!(arr[1]["type"], "RenameIdent");
452 assert!(arr[1].get("from").is_some());
453 assert!(corrections.len() >= 4);
454 }
455
456 #[test]
457 fn test_repair_enum_array_values() {
458 let schema =
459 TaggedEnumSchema::new("type", &["AddDerive"], |_| Some(&["target", "derives"][..]))
460 .with_enum_array("derives", &["Debug", "Clone", "Serialize", "Default"]);
461
462 let json =
463 r#"{"type": "AddDerive", "target": "User", "derives": ["Debg", "Clne", "Serializ"]}"#;
464 let options = FuzzyOptions::default();
465
466 let result = repair_tagged_enum_json(json, &schema, &options).unwrap();
467
468 assert_eq!(result.repaired["derives"][0], "Debug");
469 assert_eq!(result.repaired["derives"][1], "Clone");
470 assert_eq!(result.repaired["derives"][2], "Serialize");
471 assert_eq!(result.corrections.len(), 3);
472 }
473
474 #[test]
475 fn test_repair_nested_object_fields() {
476 let schema =
477 TaggedEnumSchema::new("type", &["Configure"], |_| Some(&["name", "config"][..]))
478 .with_nested_object("config", &["timeout", "retries", "enabled"]);
479
480 let json =
481 r#"{"type": "Configure", "name": "test", "config": {"timout": 30, "retres": 3}}"#;
482 let options = FuzzyOptions::default();
483
484 let result = repair_tagged_enum_json(json, &schema, &options).unwrap();
485
486 assert!(result.repaired["config"].get("timeout").is_some());
487 assert!(result.repaired["config"].get("retries").is_some());
488 assert_eq!(result.repaired["config"]["timeout"], 30);
489 assert_eq!(result.repaired["config"]["retries"], 3);
490 assert_eq!(result.corrections.len(), 2);
491 }
492
493 #[test]
494 fn test_repair_combined_all_features() {
495 let schema = TaggedEnumSchema::new("type", &["AddDerive"], |_| {
496 Some(&["target", "derives", "config"][..])
497 })
498 .with_enum_array("derives", &["Debug", "Clone", "Serialize"])
499 .with_nested_object("config", &["timeout", "retries"]);
500
501 let json = r#"{
502 "type": "AddDeriv",
503 "taget": "User",
504 "derives": ["Debg", "Clne"],
505 "config": {"timout": 30}
506 }"#;
507 let options = FuzzyOptions::default();
508
509 let result = repair_tagged_enum_json(json, &schema, &options).unwrap();
510
511 assert_eq!(result.repaired["type"], "AddDerive");
513 assert!(result.repaired.get("target").is_some());
515 assert_eq!(result.repaired["target"], "User");
516 assert_eq!(result.repaired["derives"][0], "Debug");
518 assert_eq!(result.repaired["derives"][1], "Clone");
519 assert!(result.repaired["config"].get("timeout").is_some());
521 assert_eq!(result.repaired["config"]["timeout"], 30);
522 assert_eq!(result.corrections.len(), 5);
524 }
525
526 #[test]
527 fn test_repair_enum_array_no_correction_needed() {
528 let schema =
529 TaggedEnumSchema::new("type", &["AddDerive"], |_| Some(&["target", "derives"][..]))
530 .with_enum_array("derives", &["Debug", "Clone"]);
531
532 let json = r#"{"type": "AddDerive", "target": "User", "derives": ["Debug", "Clone"]}"#;
533 let options = FuzzyOptions::default();
534
535 let result = repair_tagged_enum_json(json, &schema, &options).unwrap();
536
537 assert!(!result.has_corrections());
538 }
539}