Skip to main content

agentic_tools_utils/
llm_output.rs

1//! LLM output extraction utilities.
2
3use thiserror::Error;
4
5/// Error type for LLM output extraction.
6#[derive(Debug, Error)]
7#[error("Failed to extract valid JSON from model output")]
8pub struct JsonExtractionError;
9
10/// Best-effort JSON extraction from model output.
11///
12/// Tries multiple extraction strategies in order:
13/// 1. Whole string as valid JSON
14/// 2. Fenced code blocks (```json or ```)
15/// 3. First `{` to last `}` fallback
16///
17/// # Errors
18///
19/// Returns an error if no valid JSON can be extracted from the input.
20pub fn extract_json_best_effort(text: &str) -> Result<String, JsonExtractionError> {
21    let t = text.trim();
22
23    // 1) Try whole string as JSON
24    if serde_json::from_str::<serde_json::Value>(t).is_ok() {
25        return Ok(t.to_string());
26    }
27
28    // 2) Try extracting from fenced code blocks
29    if t.contains("```") {
30        for chunk in t.split("```").skip(1).step_by(2) {
31            // Skip language identifier if present (e.g., "json\n{...")
32            let chunk = chunk.trim_start_matches(|c: char| c.is_alphabetic() || c == '\n');
33            if let (Some(a), Some(b)) = (chunk.find('{'), chunk.rfind('}')) {
34                let candidate = &chunk[a..=b];
35                if serde_json::from_str::<serde_json::Value>(candidate).is_ok() {
36                    return Ok(candidate.to_string());
37                }
38            }
39        }
40    }
41
42    // 3) Fallback: find first { to last }
43    if let (Some(a), Some(b)) = (t.find('{'), t.rfind('}')) {
44        let candidate = &t[a..=b];
45        if serde_json::from_str::<serde_json::Value>(candidate).is_ok() {
46            return Ok(candidate.to_string());
47        }
48    }
49
50    Err(JsonExtractionError)
51}
52
53#[cfg(test)]
54mod tests {
55    use super::*;
56
57    #[test]
58    fn extracts_raw_json() {
59        let s = r#"{"lens":"security","verdict":"approved","findings":[],"notes":[]}"#;
60        let j = extract_json_best_effort(s).unwrap();
61        assert!(j.starts_with('{'));
62        assert!(j.ends_with('}'));
63    }
64
65    #[test]
66    fn extracts_fenced_json() {
67        let s = "Here is the review:\n```json\n{\"lens\":\"security\",\"verdict\":\"approved\",\"findings\":[],\"notes\":[]}\n```\nDone.";
68        let j = extract_json_best_effort(s).unwrap();
69        assert!(j.contains("\"lens\":\"security\""));
70    }
71
72    #[test]
73    fn extracts_json_with_preamble() {
74        let s = "I found the following issues:\n{\"lens\":\"correctness\",\"verdict\":\"needs_changes\",\"findings\":[],\"notes\":[]}";
75        let j = extract_json_best_effort(s).unwrap();
76        assert!(j.starts_with('{'));
77    }
78
79    #[test]
80    fn rejects_invalid_json() {
81        let s = "This is not JSON at all";
82        let result = extract_json_best_effort(s);
83        assert!(result.is_err());
84    }
85
86    #[test]
87    fn extracts_fenced_json_without_language_tag() {
88        let s = "Preamble\n```\n{\"lens\":\"security\",\"verdict\":\"approved\",\"findings\":[],\"notes\":[]}\n```\n";
89        let j = extract_json_best_effort(s).unwrap();
90        assert!(j.contains("\"lens\":\"security\""));
91    }
92
93    #[test]
94    fn extracts_json_from_second_fence_when_first_is_not_json() {
95        let s = "```text\nnot json\n```\n```json\n{\"lens\":\"security\",\"verdict\":\"approved\",\"findings\":[],\"notes\":[]}\n```\n";
96        let j = extract_json_best_effort(s).unwrap();
97        assert!(j.contains("\"verdict\":\"approved\""));
98    }
99
100    #[test]
101    fn extracts_json_outside_fences_when_fences_contain_no_json() {
102        let s = "```text\nhello\n```\nTrailing:\n{\"lens\":\"security\",\"verdict\":\"approved\",\"findings\":[],\"notes\":[]}\n";
103        let j = extract_json_best_effort(s).unwrap();
104        assert!(j.contains("\"lens\":\"security\""));
105    }
106}