Skip to main content

a2ui_base/validate/
payload_fixer.rs

1//! LLM-JSON autofixer — ports `payload_fixer.py`.
2//!
3//! Parses a raw JSON string from an LLM, applying tolerant fixes:
4//! 1. normalize smart (curly) quotes to straight quotes,
5//! 2. on parse failure, strip trailing commas and retry,
6//! 3. wrap a bare top-level object into a one-element list.
7//!
8//! Returns `Ok(Vec<Value>)` on success, or an `A2uiError` (`Parse` for serde
9//! failures, `Validation` for a non-list/non-object top level).
10
11use regex::Regex;
12use std::sync::LazyLock;
13
14static TRAILING_COMMA: LazyLock<Regex> = LazyLock::new(|| {
15    // A comma followed by optional whitespace and a closing ] or }.
16    Regex::new(r",(\s*[\]}])").expect("TRAILING_COMMA is a compile-time-constant regex")
17});
18
19/// Parse and autofix a raw LLM JSON payload into a list of JSON values.
20pub fn parse_and_fix(payload: &str) -> Result<Vec<serde_json::Value>, crate::error::A2uiError> {
21    let normalized = normalize_smart_quotes(payload);
22
23    match parse_inner(&normalized) {
24        Ok(vals) => Ok(vals),
25        // Retry once after stripping trailing commas.
26        Err(first_err) => {
27            let fixed = TRAILING_COMMA.replace_all(&normalized, "$1").into_owned();
28            if fixed == normalized {
29                // No trailing commas found — nothing more we can do.
30                return Err(first_err);
31            }
32            parse_inner(&fixed).map_err(|_| first_err)
33        }
34    }
35}
36
37/// Parse a (already quote-normalized) payload, wrapping a bare object into a
38/// list. Maps serde errors to `A2uiError::Parse`, and a non-list/non-object top
39/// level to `A2uiError::Validation`.
40fn parse_inner(payload: &str) -> Result<Vec<serde_json::Value>, crate::error::A2uiError> {
41    let value: serde_json::Value = serde_json::from_str(payload)?;
42    match value {
43        serde_json::Value::Array(arr) => Ok(arr),
44        other => {
45            if other.is_object() {
46                Ok(vec![other])
47            } else {
48                Err(crate::error::A2uiError::Validation(
49                    "payload is not a JSON list or object".into(),
50                ))
51            }
52        }
53    }
54}
55
56/// Replace smart (curly) quotes with straight ASCII quotes.
57fn normalize_smart_quotes(s: &str) -> String {
58    s.replace('\u{201C}', "\"")
59        .replace('\u{201D}', "\"")
60        .replace('\u{2018}', "'")
61        .replace('\u{2019}', "'")
62}
63
64#[cfg(test)]
65mod tests {
66    use super::*;
67    use serde_json::json;
68
69    #[test]
70    fn clean_json_passes_through() {
71        let payload = r#"[{"id":"root","component":"Text"}]"#;
72        let vals = parse_and_fix(payload).unwrap();
73        assert_eq!(vals.len(), 1);
74        assert_eq!(vals[0]["id"], json!("root"));
75    }
76
77    #[test]
78    fn smart_quotes_normalized() {
79        // LLMs sometimes emit curly double quotes around keys/values.
80        let payload = "[{\u{201C}id\u{201D}: \u{201C}root\u{201D}}]";
81        let vals = parse_and_fix(payload).unwrap();
82        assert_eq!(vals[0]["id"], json!("root"));
83    }
84
85    #[test]
86    fn trailing_comma_removed() {
87        let payload = r#"[{"id":"root",},{"id":"c1",}]"#;
88        let vals = parse_and_fix(payload).unwrap();
89        assert_eq!(vals.len(), 2);
90        assert_eq!(vals[0]["id"], json!("root"));
91        assert_eq!(vals[1]["id"], json!("c1"));
92    }
93
94    #[test]
95    fn single_object_wrapped_in_list() {
96        let payload = r#"{"id":"root","component":"Text"}"#;
97        let vals = parse_and_fix(payload).unwrap();
98        assert_eq!(vals.len(), 1);
99        assert_eq!(vals[0]["id"], json!("root"));
100    }
101
102    #[test]
103    fn broken_json_errors() {
104        let payload = r#"not json at all {{{"#;
105        assert!(parse_and_fix(payload).is_err());
106    }
107}