Skip to main content

datasynth_core/llm/
json_utils.rs

1//! Utilities for extracting JSON fragments from noisy LLM output.
2
3/// Extract the first balanced JSON object (`{...}`) from text.
4pub fn extract_json_object(content: &str) -> Option<&str> {
5    extract_balanced(content, '{', '}')
6}
7
8/// Extract the first balanced JSON array (`[...]`) from text.
9pub fn extract_json_array(content: &str) -> Option<&str> {
10    extract_balanced(content, '[', ']')
11}
12
13/// Extract the first balanced pair of delimiters from text.
14fn extract_balanced(content: &str, open: char, close: char) -> Option<&str> {
15    let start = content.find(open)?;
16    let mut depth = 0i32;
17    for (i, ch) in content[start..].char_indices() {
18        if ch == open {
19            depth += 1;
20        } else if ch == close {
21            depth -= 1;
22            if depth == 0 {
23                return Some(&content[start..start + i + 1]);
24            }
25        }
26    }
27    None
28}
29
30#[cfg(test)]
31mod tests {
32    use super::*;
33
34    #[test]
35    fn test_extract_json_object() {
36        let input = r#"Here: {"a": 1, "b": {"c": 2}} done"#;
37        let obj = extract_json_object(input).unwrap();
38        assert!(obj.starts_with('{') && obj.ends_with('}'));
39        assert!(obj.contains("\"c\": 2"));
40    }
41
42    #[test]
43    fn test_extract_json_array() {
44        let input = r#"Result: [{"x": 1}, {"y": [2, 3]}] end"#;
45        let arr = extract_json_array(input).unwrap();
46        assert!(arr.starts_with('[') && arr.ends_with(']'));
47    }
48
49    #[test]
50    fn test_no_match() {
51        assert!(extract_json_object("no json here").is_none());
52        assert!(extract_json_array("no json here").is_none());
53    }
54
55    #[test]
56    fn test_unbalanced() {
57        assert!(extract_json_object("{unclosed").is_none());
58        assert!(extract_json_array("[unclosed").is_none());
59    }
60}