car-ir 0.12.0

Agent IR types for Common Agent Runtime
Documentation
//! Utilities for extracting and repairing JSON from LLM output.
//!
//! LLMs often wrap JSON in markdown code fences, add preamble text, or produce
//! trailing commas. These functions handle common patterns.

/// Extract the first balanced `{...}` JSON object from text.
///
/// Tries, in order:
/// 1. Markdown ` ```json ` fenced block
/// 2. Plain ` ``` ` fenced block
/// 3. First `{` with balanced-brace matching (handles nesting and string escapes)
///
/// Returns `None` if no balanced object is found.
pub fn extract_json_object(text: &str) -> Option<String> {
    // Try markdown code fence first
    if let Some(start) = text.find("```json") {
        let after_fence = &text[start + 7..];
        if let Some(end) = after_fence.find("```") {
            let content = after_fence[..end].trim();
            if content.starts_with('{') {
                return Some(content.to_string());
            }
        }
    }
    // Also handle plain ``` fences
    if let Some(start) = text.find("```\n") {
        let after_fence = &text[start + 4..];
        if let Some(end) = after_fence.find("```") {
            let content = after_fence[..end].trim();
            if content.starts_with('{') {
                return Some(content.to_string());
            }
        }
    }
    // Also handle ``` with language tag on first line
    if let Some(start) = text.find("```") {
        let after_fence = &text[start + 3..];
        if let Some(end) = after_fence.find("```") {
            let block = after_fence[..end].trim();
            // Skip language tag line if present
            if let Some(first_brace) = block.find('{') {
                let from_brace = &block[first_brace..];
                if let Some(obj) = balanced_braces(from_brace, b'{', b'}') {
                    return Some(obj);
                }
            }
        }
    }

    // Find first '{' and match braces
    balanced_braces(text, b'{', b'}')
}

/// Extract the first JSON array `[...]` from text.
///
/// Tries, in order:
/// 1. Markdown ` ```json ` fenced block
/// 2. Plain ` ``` ` fenced block
/// 3. First `[` with balanced-bracket matching
pub fn extract_json_array(text: &str) -> Option<String> {
    // Try markdown code fence first
    if let Some(start) = text.find("```json") {
        let after_fence = &text[start + 7..];
        if let Some(end) = after_fence.find("```") {
            let content = after_fence[..end].trim();
            if content.starts_with('[') || content.starts_with('{') {
                return Some(content.to_string());
            }
        }
    }
    // Plain ``` fences
    if let Some(start) = text.find("```") {
        let after_fence = &text[start + 3..];
        if let Some(end) = after_fence.find("```") {
            let content = after_fence[..end].trim();
            if content.starts_with('[') {
                return Some(content.to_string());
            }
        }
    }

    // Find bare [ ... ] array
    balanced_braces(text, b'[', b']')
}

/// Repair common JSON issues from LLM output.
///
/// Currently handles:
/// - Trailing commas before `}` or `]`
///
/// NOTE: This does naive string replacement and may corrupt JSON strings
/// that legitimately contain `,}` or `,]` as text content. Acceptable for
/// LLM output parsing where this pattern is vanishingly rare in practice.
pub fn repair_json(json: &str) -> String {
    let mut result = json.to_string();
    loop {
        let before = result.clone();
        result = result.replace(",}", "}").replace(",]", "]");
        if result == before {
            break;
        }
    }
    result
}

/// Find the first balanced pair of open/close delimiters in `text`,
/// respecting JSON string escaping.
fn balanced_braces(text: &str, open: u8, close: u8) -> Option<String> {
    let bytes = text.as_bytes();
    let start = bytes.iter().position(|&b| b == open)?;
    let mut depth = 0i32;
    let mut in_string = false;
    let mut escape = false;

    for (i, &b) in bytes[start..].iter().enumerate() {
        if escape {
            escape = false;
            continue;
        }
        if b == b'\\' && in_string {
            escape = true;
            continue;
        }
        if b == b'"' {
            in_string = !in_string;
            continue;
        }
        if in_string {
            continue;
        }
        if b == open {
            depth += 1;
        } else if b == close {
            depth -= 1;
            if depth == 0 {
                return Some(text[start..start + i + 1].to_string());
            }
        }
    }
    None
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn extract_object_from_markdown_json_fence() {
        let text = "Here:\n```json\n{\"id\":\"x\"}\n```\nDone.";
        assert_eq!(extract_json_object(text).unwrap(), r#"{"id":"x"}"#);
    }

    #[test]
    fn extract_object_from_plain_fence() {
        let text = "Here:\n```\n{\"id\":\"x\"}\n```\nDone.";
        assert_eq!(extract_json_object(text).unwrap(), r#"{"id":"x"}"#);
    }

    #[test]
    fn extract_nested_object() {
        let text = r#"blah {"id":"x","inner":{"a":1}} blah"#;
        assert_eq!(
            extract_json_object(text).unwrap(),
            r#"{"id":"x","inner":{"a":1}}"#
        );
    }

    #[test]
    fn extract_object_with_string_braces() {
        let text = r#"{"msg":"a { b } c"}"#;
        assert_eq!(extract_json_object(text).unwrap(), text);
    }

    #[test]
    fn extract_object_returns_none_for_no_json() {
        assert!(extract_json_object("no json here").is_none());
    }

    #[test]
    fn extract_array_from_markdown() {
        let text = "Skills:\n```json\n[{\"name\":\"a\"}]\n```";
        assert_eq!(extract_json_array(text).unwrap(), r#"[{"name":"a"}]"#);
    }

    #[test]
    fn extract_bare_array() {
        let text = "result: [{\"a\":1},{\"b\":2}] end";
        assert_eq!(extract_json_array(text).unwrap(), r#"[{"a":1},{"b":2}]"#);
    }

    #[test]
    fn repair_trailing_commas() {
        assert_eq!(repair_json(r#"{"a":1,}"#), r#"{"a":1}"#);
        assert_eq!(repair_json(r#"[1,2,]"#), r#"[1,2]"#);
        assert_eq!(repair_json(r#"{"a":[1,],}"#), r#"{"a":[1]}"#);
    }

    #[test]
    fn repair_no_change_for_valid_json() {
        let valid = r#"{"a":1,"b":[2,3]}"#;
        assert_eq!(repair_json(valid), valid);
    }
}