Expand description
Fuzzy JSON repair for LLM-generated DSL
This crate provides generic fuzzy matching and automatic correction for JSON that may contain typos (common when generated by LLMs).
§Design
This crate provides generic repair APIs - the schema definitions live in the calling crate (e.g., your application defines the schema).
§Features
- JSON sanitization: Fix syntax errors (trailing commas, missing braces)
- Tagged enum repair: Fix type discriminator typos (e.g.,
"AddDeriv"→"AddDerive") - Field name repair: Fix field name typos (e.g.,
"taget"→"target") - Enum array repair: Fix values in enum arrays (e.g.,
["Debg"]→["Debug"]) - Nested object repair: Fix field names in nested objects
- Multiple algorithm support: Jaro-Winkler, Levenshtein, Damerau-Levenshtein
- Configurable similarity threshold
§Example
use fuzzy_parser::{
sanitize_json, repair_tagged_enum_json, TaggedEnumSchema, FuzzyOptions,
};
// Define schema with enum arrays and nested objects
let schema = TaggedEnumSchema::new(
"type", // tag field
&["AddDerive", "RemoveDerive"], // valid types
|tag| match tag {
"AddDerive" | "RemoveDerive" => Some(&["target", "derives", "config"][..]),
_ => None,
},
)
.with_enum_array("derives", &["Debug", "Clone", "Serialize", "Default"])
.with_nested_object("config", &["timeout", "retries"]);
// LLM output with syntax errors AND typos
let malformed = r#"{"type": "AddDeriv", "taget": "User", "derives": ["Debg",], "config": {"timout": 30,}}"#;
// Step 1: Sanitize (fix syntax errors)
let sanitized = sanitize_json(malformed);
// Step 2: Repair (fix typos)
let result = repair_tagged_enum_json(&sanitized, &schema, &FuzzyOptions::default()).unwrap();
assert_eq!(result.repaired["type"], "AddDerive");
assert!(result.repaired.get("target").is_some());
assert_eq!(result.repaired["derives"][0], "Debug");
assert!(result.repaired["config"].get("timeout").is_some());Re-exports§
pub use distance::Algorithm;pub use distance::Match;pub use error::FuzzyError;pub use repair::repair_enum_array;pub use repair::repair_fields_with_list;pub use repair::repair_object_fields;pub use repair::repair_tagged_enum;pub use repair::repair_tagged_enum_array;pub use repair::repair_tagged_enum_json;pub use repair::Correction;pub use repair::FuzzyOptions;pub use repair::RepairResult;pub use sanitize::sanitize_json;pub use schema::ObjectSchema;pub use schema::TaggedEnumSchema;