Skip to main content

sgr_agent/
flexible_parser.rs

1//! Flexible JSON parser — extracts structured data from messy LLM output.
2//!
3//! Inspired by BAML's "jsonish" SAP (Schema-Aligned Parsing) approach.
4//! Collects multiple parse candidates (AnyOf), tries to deserialize each
5//! into the target type `T`, returns the first success.
6//!
7//! Parse cascade:
8//! 1. Direct JSON (`serde_json::from_str`)
9//! 2. Markdown code blocks (````json ... ````)
10//! 3. Greedy JSON extraction (first `{...}` or `[...]` in text)
11//! 4. Fixing parser (close brackets, strip trailing commas, unquoted keys)
12//! 5. Fail with all candidates listed
13//!
14//! Works with any model — no structured output API required.
15
16use schemars::JsonSchema;
17use serde::de::DeserializeOwned;
18
19use crate::coerce::coerce_value;
20use crate::schema::response_schema_for;
21
22/// A parse candidate with provenance info for debugging.
23#[derive(Debug, Clone)]
24pub struct Candidate {
25    /// The JSON string to try deserializing.
26    pub json: String,
27    /// How this candidate was extracted.
28    pub source: CandidateSource,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum CandidateSource {
33    /// Direct parse — input was valid JSON.
34    Direct,
35    /// Extracted from a ```json code block.
36    MarkdownBlock,
37    /// Grepped `{...}` or `[...]` from text.
38    Grepped,
39    /// Fixed broken JSON (closed brackets, stripped trailing commas, etc).
40    Fixed,
41}
42
43/// Result of a flexible parse attempt.
44#[derive(Debug)]
45pub struct ParseResult<T> {
46    /// Successfully parsed value.
47    pub value: T,
48    /// Which candidate succeeded.
49    pub source: CandidateSource,
50    /// Total candidates tried.
51    pub candidates_tried: usize,
52}
53
54/// Parse error with all attempted candidates.
55#[derive(Debug)]
56pub struct ParseError {
57    /// All candidates that were tried.
58    pub candidates: Vec<(Candidate, String)>,
59    /// Original raw text.
60    pub raw: String,
61}
62
63impl std::fmt::Display for ParseError {
64    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65        write!(
66            f,
67            "Failed to parse into target type. {} candidates tried",
68            self.candidates.len()
69        )?;
70        for (i, (candidate, err)) in self.candidates.iter().enumerate() {
71            write!(
72                f,
73                "\n  [{i}] {:?}: {}",
74                candidate.source,
75                truncate(err, 100)
76            )?;
77        }
78        Ok(())
79    }
80}
81
82impl std::error::Error for ParseError {}
83
84/// Parse raw LLM output into type `T` using the AnyOf cascade.
85///
86/// Tries multiple extraction strategies, returns the first successful parse.
87pub fn parse_flexible<T: DeserializeOwned>(raw: &str) -> Result<ParseResult<T>, ParseError> {
88    let candidates = collect_candidates(raw);
89    let mut errors = Vec::new();
90
91    for candidate in &candidates {
92        match serde_json::from_str::<T>(&candidate.json) {
93            Ok(value) => {
94                return Ok(ParseResult {
95                    value,
96                    source: candidate.source,
97                    candidates_tried: errors.len() + 1,
98                });
99            }
100            Err(e) => {
101                errors.push((candidate.clone(), e.to_string()));
102            }
103        }
104    }
105
106    Err(ParseError {
107        candidates: errors,
108        raw: raw.to_string(),
109    })
110}
111
112/// Parse with schema-aware coercion: "42" → 42, "true" → true, "redd" → "Red".
113///
114/// First tries `parse_flexible` (strict serde). If all candidates fail,
115/// retries each candidate with coercion applied before deserialization.
116pub fn parse_flexible_coerced<T: JsonSchema + DeserializeOwned>(
117    raw: &str,
118) -> Result<ParseResult<T>, ParseError> {
119    // Try strict first — no coercion overhead if JSON is clean
120    if let Ok(result) = parse_flexible::<T>(raw) {
121        return Ok(result);
122    }
123
124    // Retry with coercion
125    let candidates = collect_candidates(raw);
126    let schema = response_schema_for::<T>();
127    let mut errors = Vec::new();
128
129    for candidate in &candidates {
130        // Parse to Value, coerce, then deserialize
131        if let Ok(mut value) = serde_json::from_str::<serde_json::Value>(&candidate.json) {
132            coerce_value(&mut value, &schema);
133            match serde_json::from_value::<T>(value) {
134                Ok(parsed) => {
135                    return Ok(ParseResult {
136                        value: parsed,
137                        source: candidate.source,
138                        candidates_tried: errors.len() + 1,
139                    });
140                }
141                Err(e) => {
142                    errors.push((candidate.clone(), format!("coerced: {}", e)));
143                }
144            }
145        } else {
146            errors.push((candidate.clone(), "invalid JSON even for Value".into()));
147        }
148    }
149
150    Err(ParseError {
151        candidates: errors,
152        raw: raw.to_string(),
153    })
154}
155
156/// Collect all parse candidates from raw text (AnyOf pattern).
157pub fn collect_candidates(raw: &str) -> Vec<Candidate> {
158    let mut candidates = Vec::new();
159
160    // 0. Unescape double-wrapped JSON string: "{ \"key\": ... }" → { "key": ... }
161    let effective = try_unescape_json_string(raw).unwrap_or_else(|| raw.to_string());
162    let raw = effective.as_str();
163
164    // 1. Direct JSON parse
165    if looks_like_json(raw) {
166        candidates.push(Candidate {
167            json: raw.to_string(),
168            source: CandidateSource::Direct,
169        });
170    }
171
172    // 2. Markdown code blocks
173    for block in extract_markdown_blocks(raw) {
174        candidates.push(Candidate {
175            json: block,
176            source: CandidateSource::MarkdownBlock,
177        });
178    }
179
180    // 3. Greedy JSON extraction
181    for json in extract_json_objects(raw) {
182        // Skip if we already have this exact string as a candidate
183        if !candidates.iter().any(|c| c.json == json) {
184            candidates.push(Candidate {
185                json,
186                source: CandidateSource::Grepped,
187            });
188        }
189    }
190
191    // 4. Try fixing each candidate that failed
192    let fixable: Vec<String> = candidates.iter().map(|c| c.json.clone()).collect();
193    for json in &fixable {
194        if let Some(fixed) = try_fix_json(json)
195            && !candidates.iter().any(|c| c.json == fixed)
196        {
197            candidates.push(Candidate {
198                json: fixed,
199                source: CandidateSource::Fixed,
200            });
201        }
202    }
203
204    // Also try fixing the raw input directly if no candidates yet
205    if (candidates.is_empty()
206        || !candidates
207            .iter()
208            .any(|c| c.source == CandidateSource::Direct))
209        && let Some(fixed) = try_fix_json(raw)
210        && !candidates.iter().any(|c| c.json == fixed)
211    {
212        candidates.push(Candidate {
213            json: fixed,
214            source: CandidateSource::Fixed,
215        });
216    }
217
218    // 5. Truncation recovery — try progressively aggressive cuts for streaming
219    // (only if no Fixed candidate parsed as valid Value with all required fields)
220    for json_source in [raw]
221        .iter()
222        .chain(fixable.iter().map(|s| s as &str).collect::<Vec<_>>().iter())
223    {
224        for recovered in truncation_recovery_candidates(json_source) {
225            if !candidates.iter().any(|c| c.json == recovered) {
226                candidates.push(Candidate {
227                    json: recovered,
228                    source: CandidateSource::Fixed,
229                });
230            }
231        }
232    }
233
234    candidates
235}
236
237// ============================================================================
238// Extraction strategies
239// ============================================================================
240
241/// Extract JSON from markdown code blocks: ```json\n...\n``` or ```\n...\n```
242fn extract_markdown_blocks(text: &str) -> Vec<String> {
243    let mut blocks = Vec::new();
244    let mut rest = text;
245
246    while let Some(start) = rest.find("```") {
247        let after_ticks = &rest[start + 3..];
248
249        // Skip optional language tag (e.g., "json", "JSON", "jsonc")
250        let content_start = if let Some(newline) = after_ticks.find('\n') {
251            newline + 1
252        } else {
253            break;
254        };
255        let content = &after_ticks[content_start..];
256
257        // Find closing ```
258        if let Some(end) = content.find("```") {
259            let block = content[..end].trim();
260            if !block.is_empty() && looks_like_json(block) {
261                blocks.push(block.to_string());
262            }
263            rest = &content[end + 3..];
264        } else {
265            // Unclosed code block — try to parse what we have
266            let block = content.trim();
267            if !block.is_empty() && looks_like_json(block) {
268                blocks.push(block.to_string());
269            }
270            break;
271        }
272    }
273
274    blocks
275}
276
277/// Find JSON objects `{...}` and arrays `[...]` in text using bracket matching.
278fn extract_json_objects(text: &str) -> Vec<String> {
279    let mut results = Vec::new();
280
281    for open in ['{', '['] {
282        let close = if open == '{' { '}' } else { ']' };
283        let mut search_from = 0;
284
285        while let Some(start) = text[search_from..].find(open) {
286            let abs_start = search_from + start;
287            if let Some(end) = find_matching_bracket(text, abs_start, open, close) {
288                let json = &text[abs_start..=end];
289                if !results.contains(&json.to_string()) {
290                    results.push(json.to_string());
291                }
292                search_from = end + 1;
293            } else {
294                // No matching bracket — try with auto-close
295                search_from = abs_start + 1;
296            }
297        }
298    }
299
300    results
301}
302
303/// Find the matching closing bracket, respecting nesting and strings.
304fn find_matching_bracket(text: &str, start: usize, open: char, close: char) -> Option<usize> {
305    let bytes = text.as_bytes();
306    let mut depth = 0i32;
307    let mut in_string = false;
308    let mut escape_next = false;
309    let mut i = start;
310
311    while i < bytes.len() {
312        let ch = bytes[i] as char;
313
314        if escape_next {
315            escape_next = false;
316            i += 1;
317            continue;
318        }
319
320        if ch == '\\' && in_string {
321            escape_next = true;
322            i += 1;
323            continue;
324        }
325
326        if ch == '"' {
327            in_string = !in_string;
328            i += 1;
329            continue;
330        }
331
332        if !in_string {
333            if ch == open {
334                depth += 1;
335            } else if ch == close {
336                depth -= 1;
337                if depth == 0 {
338                    return Some(i);
339                }
340            }
341        }
342
343        i += 1;
344    }
345
346    None
347}
348
349// ============================================================================
350// JSON fixing
351// ============================================================================
352
353/// Try to fix common JSON errors. Returns None if unfixable.
354fn try_fix_json(raw: &str) -> Option<String> {
355    let trimmed = raw.trim();
356
357    // Already valid? No fix needed.
358    if serde_json::from_str::<serde_json::Value>(trimmed).is_ok() {
359        return None;
360    }
361
362    let mut fixed = trimmed.to_string();
363    let mut changed = false;
364
365    // Fix 1: Strip trailing commas before } or ]
366    let re_trailing = strip_trailing_commas(&fixed);
367    if re_trailing != fixed {
368        fixed = re_trailing;
369        changed = true;
370    }
371
372    // Fix 2: Close unclosed brackets/braces
373    let closed = close_brackets(&fixed);
374    if closed != fixed {
375        fixed = closed;
376        changed = true;
377    }
378
379    // Fix 3: Single quotes → double quotes (outside of double-quoted strings)
380    let quoted = fix_single_quotes(&fixed);
381    if quoted != fixed {
382        fixed = quoted;
383        changed = true;
384    }
385
386    // Fix 4: Strip JS-style comments (// and /* */)
387    let uncommented = strip_comments(&fixed);
388    if uncommented != fixed {
389        fixed = uncommented;
390        changed = true;
391    }
392
393    // Verify the fix actually produces valid JSON
394    if changed && serde_json::from_str::<serde_json::Value>(&fixed).is_ok() {
395        Some(fixed)
396    } else {
397        None
398    }
399}
400
401/// Strip trailing commas: `{a: 1,}` → `{a: 1}`
402fn strip_trailing_commas(s: &str) -> String {
403    let mut result = String::with_capacity(s.len());
404    let chars: Vec<char> = s.chars().collect();
405    let mut i = 0;
406
407    while i < chars.len() {
408        if chars[i] == '"' {
409            // Skip strings
410            result.push(chars[i]);
411            i += 1;
412            while i < chars.len() {
413                result.push(chars[i]);
414                if chars[i] == '\\' && i + 1 < chars.len() {
415                    i += 1;
416                    result.push(chars[i]);
417                } else if chars[i] == '"' {
418                    break;
419                }
420                i += 1;
421            }
422            i += 1;
423            continue;
424        }
425
426        if chars[i] == ',' {
427            // Look ahead for ] or } (skipping whitespace)
428            let mut j = i + 1;
429            while j < chars.len() && chars[j].is_whitespace() {
430                j += 1;
431            }
432            if j < chars.len() && (chars[j] == '}' || chars[j] == ']') {
433                // Skip the trailing comma
434                i += 1;
435                continue;
436            }
437        }
438
439        result.push(chars[i]);
440        i += 1;
441    }
442
443    result
444}
445
446/// Close unclosed brackets: `{"a": [1, 2` → `{"a": [1, 2]}`
447///
448/// Also handles streaming truncation: if truncated mid-value inside an array/object,
449/// drops the incomplete element and closes brackets (like BAML's partial parse).
450fn close_brackets(s: &str) -> String {
451    let mut stack = Vec::new();
452    let mut in_string = false;
453    let mut escape_next = false;
454
455    for ch in s.chars() {
456        if escape_next {
457            escape_next = false;
458            continue;
459        }
460        if ch == '\\' && in_string {
461            escape_next = true;
462            continue;
463        }
464        if ch == '"' {
465            in_string = !in_string;
466            continue;
467        }
468        if !in_string {
469            match ch {
470                '{' => stack.push('}'),
471                '[' => stack.push(']'),
472                '}' | ']' => {
473                    stack.pop();
474                }
475                _ => {}
476            }
477        }
478    }
479
480    // If not truncated (balanced), nothing to do
481    if stack.is_empty() && !in_string {
482        return s.to_string();
483    }
484
485    // Close unclosed string
486    let mut result = s.to_string();
487    if in_string {
488        result.push('"');
489    }
490
491    // Close brackets in reverse order
492    while let Some(close) = stack.pop() {
493        result.push(close);
494    }
495
496    result
497}
498
499/// Truncation recovery: find cut points and generate multiple candidates.
500///
501/// For `{"a":[{"b":1},{"c":2,"d` generates:
502/// - Cut at inner comma: `{"a":[{"b":1},{"c":2}]}` (partial element)
503/// - Cut at outer comma: `{"a":[{"b":1}]}` (drop incomplete element)
504///
505/// Returns all valid JSON candidates, most aggressive cut last (so AnyOf tries
506/// the most complete version first).
507fn truncation_recovery_candidates(s: &str) -> Vec<String> {
508    // Collect all cut points: commas and closing brackets (outside strings)
509    // Use byte positions (not char indices) for correct slicing with Unicode
510    let mut cut_points = Vec::new();
511    let mut in_string = false;
512    let mut escape_next = false;
513
514    for (byte_pos, ch) in s.char_indices() {
515        if escape_next {
516            escape_next = false;
517            continue;
518        }
519        if ch == '\\' && in_string {
520            escape_next = true;
521            continue;
522        }
523        if ch == '"' {
524            in_string = !in_string;
525            continue;
526        }
527        if in_string {
528            continue;
529        }
530        match ch {
531            ',' => cut_points.push(byte_pos),
532            '}' | ']' => cut_points.push(byte_pos + 1),
533            _ => {}
534        }
535    }
536
537    // Try cuts from rightmost (most data kept) to leftmost (most data dropped)
538    let mut results = Vec::new();
539    for &cut in cut_points.iter().rev() {
540        if cut == 0 || cut >= s.len() {
541            continue;
542        }
543        if let Some(candidate) = try_close_at(s, cut)
544            && !results.contains(&candidate)
545        {
546            results.push(candidate);
547        }
548    }
549
550    results
551}
552
553/// Try cutting the string at `pos` and closing all open brackets.
554fn try_close_at(s: &str, pos: usize) -> Option<String> {
555    let mut truncated = s[..pos].trim_end().to_string();
556
557    // Strip trailing comma
558    if truncated.ends_with(',') {
559        truncated.pop();
560    }
561
562    // Close open brackets
563    let mut stack = Vec::new();
564    let mut in_str = false;
565    let mut esc = false;
566    for ch in truncated.chars() {
567        if esc {
568            esc = false;
569            continue;
570        }
571        if ch == '\\' && in_str {
572            esc = true;
573            continue;
574        }
575        if ch == '"' {
576            in_str = !in_str;
577            continue;
578        }
579        if !in_str {
580            match ch {
581                '{' => stack.push('}'),
582                '[' => stack.push(']'),
583                '}' | ']' => {
584                    stack.pop();
585                }
586                _ => {}
587            }
588        }
589    }
590    if in_str {
591        truncated.push('"');
592    }
593    while let Some(close) = stack.pop() {
594        truncated.push(close);
595    }
596
597    if serde_json::from_str::<serde_json::Value>(&truncated).is_ok() {
598        Some(truncated)
599    } else {
600        None
601    }
602}
603
604/// Convert single-quoted strings to double-quoted (outside existing double quotes).
605fn fix_single_quotes(s: &str) -> String {
606    let mut result = String::with_capacity(s.len());
607    let mut in_double = false;
608    let mut escape_next = false;
609
610    for ch in s.chars() {
611        if escape_next {
612            result.push(ch);
613            escape_next = false;
614            continue;
615        }
616        if ch == '\\' {
617            result.push(ch);
618            if in_double {
619                escape_next = true;
620            }
621            continue;
622        }
623        if ch == '"' {
624            in_double = !in_double;
625            result.push(ch);
626            continue;
627        }
628        if ch == '\'' && !in_double {
629            result.push('"');
630        } else {
631            result.push(ch);
632        }
633    }
634
635    result
636}
637
638/// Strip JS-style comments (// line and /* block */).
639fn strip_comments(s: &str) -> String {
640    let mut result = String::with_capacity(s.len());
641    let chars: Vec<char> = s.chars().collect();
642    let mut i = 0;
643    let mut in_string = false;
644
645    while i < chars.len() {
646        if in_string {
647            result.push(chars[i]);
648            if chars[i] == '\\' && i + 1 < chars.len() {
649                i += 1;
650                result.push(chars[i]);
651            } else if chars[i] == '"' {
652                in_string = false;
653            }
654            i += 1;
655            continue;
656        }
657
658        if chars[i] == '"' {
659            in_string = true;
660            result.push(chars[i]);
661            i += 1;
662            continue;
663        }
664
665        if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '/' {
666            // Skip to end of line
667            while i < chars.len() && chars[i] != '\n' {
668                i += 1;
669            }
670            continue;
671        }
672
673        if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '*' {
674            i += 2;
675            while i + 1 < chars.len() && !(chars[i] == '*' && chars[i + 1] == '/') {
676                i += 1;
677            }
678            i += 2; // skip */
679            continue;
680        }
681
682        result.push(chars[i]);
683        i += 1;
684    }
685
686    result
687}
688
689// ============================================================================
690// Helpers
691// ============================================================================
692
693/// Try to unescape a double-wrapped JSON string.
694///
695/// Some models output JSON as a string literal: `"{ \"key\": \"value\" }"`
696/// This detects and unescapes it back to `{ "key": "value" }`.
697fn try_unescape_json_string(raw: &str) -> Option<String> {
698    let trimmed = raw.trim();
699    // Must start and end with quotes
700    if !trimmed.starts_with('"') || !trimmed.ends_with('"') || trimmed.len() < 3 {
701        return None;
702    }
703    // Inner content must look like escaped JSON (contains \")
704    let inner = &trimmed[1..trimmed.len() - 1];
705    if !inner.contains("\\\"") {
706        return None;
707    }
708    // Try to parse as a JSON string, which gives us the unescaped content
709    match serde_json::from_str::<String>(trimmed) {
710        Ok(unescaped) if looks_like_json(&unescaped) => Some(unescaped),
711        _ => None,
712    }
713}
714
715fn looks_like_json(s: &str) -> bool {
716    let trimmed = s.trim();
717    (trimmed.starts_with('{') && trimmed.ends_with('}'))
718        || (trimmed.starts_with('[') && trimmed.ends_with(']'))
719        || trimmed == "null"
720        || trimmed == "true"
721        || trimmed == "false"
722        || trimmed.starts_with('"')
723}
724
725fn truncate(s: &str, max: usize) -> &str {
726    if s.len() <= max {
727        s
728    } else {
729        &s[..s.floor_char_boundary(max)]
730    }
731}
732
733// ============================================================================
734// Tests
735// ============================================================================
736
737#[cfg(test)]
738mod tests {
739    use super::*;
740    use serde::Deserialize;
741
742    #[derive(Debug, Deserialize, PartialEq)]
743    struct Answer {
744        answer: String,
745        confidence: f64,
746    }
747
748    // --- Direct JSON ---
749
750    #[test]
751    fn parses_clean_json() {
752        let raw = r#"{"answer": "42", "confidence": 0.95}"#;
753        let result = parse_flexible::<Answer>(raw).unwrap();
754        assert_eq!(result.value.answer, "42");
755        assert_eq!(result.source, CandidateSource::Direct);
756    }
757
758    // --- Markdown blocks ---
759
760    #[test]
761    fn parses_from_markdown_block() {
762        let raw = r#"Here's my answer:
763
764```json
765{"answer": "hello", "confidence": 0.8}
766```
767
768Hope that helps!"#;
769        let result = parse_flexible::<Answer>(raw).unwrap();
770        assert_eq!(result.value.answer, "hello");
771        assert_eq!(result.source, CandidateSource::MarkdownBlock);
772    }
773
774    #[test]
775    fn parses_from_unlabeled_markdown_block() {
776        let raw = r#"Sure:
777
778```
779{"answer": "test", "confidence": 0.5}
780```"#;
781        let result = parse_flexible::<Answer>(raw).unwrap();
782        assert_eq!(result.value.answer, "test");
783        assert_eq!(result.source, CandidateSource::MarkdownBlock);
784    }
785
786    // --- Grepped JSON ---
787
788    #[test]
789    fn extracts_json_from_surrounding_text() {
790        let raw =
791            r#"I think the answer is {"answer": "yes", "confidence": 0.9} based on my analysis."#;
792        let result = parse_flexible::<Answer>(raw).unwrap();
793        assert_eq!(result.value.answer, "yes");
794        assert_eq!(result.source, CandidateSource::Grepped);
795    }
796
797    #[test]
798    fn extracts_json_after_chain_of_thought() {
799        let raw = r#"Let me think step by step...
800First, I need to consider the question carefully.
801The answer seems clear.
802
803{"answer": "deep thought", "confidence": 0.99}"#;
804        let result = parse_flexible::<Answer>(raw).unwrap();
805        assert_eq!(result.value.answer, "deep thought");
806    }
807
808    // --- Fixed JSON ---
809
810    #[test]
811    fn fixes_trailing_comma() {
812        let raw = r#"{"answer": "fixed", "confidence": 0.7,}"#;
813        let result = parse_flexible::<Answer>(raw).unwrap();
814        assert_eq!(result.value.answer, "fixed");
815        assert_eq!(result.source, CandidateSource::Fixed);
816    }
817
818    #[test]
819    fn fixes_unclosed_brackets() {
820        let raw = r#"{"answer": "partial", "confidence": 0.6"#;
821        let result = parse_flexible::<Answer>(raw).unwrap();
822        assert_eq!(result.value.answer, "partial");
823        assert_eq!(result.source, CandidateSource::Fixed);
824    }
825
826    #[test]
827    fn fixes_single_quotes() {
828        let raw = r#"{'answer': 'quoted', 'confidence': 0.5}"#;
829        let result = parse_flexible::<Answer>(raw).unwrap();
830        assert_eq!(result.value.answer, "quoted");
831        assert_eq!(result.source, CandidateSource::Fixed);
832    }
833
834    #[test]
835    fn fixes_js_comments() {
836        let raw = r#"{
837            // This is the answer
838            "answer": "commented",
839            "confidence": 0.4
840        }"#;
841        let result = parse_flexible::<Answer>(raw).unwrap();
842        assert_eq!(result.value.answer, "commented");
843        assert_eq!(result.source, CandidateSource::Fixed);
844    }
845
846    // --- Combined scenarios ---
847
848    #[test]
849    fn prefers_direct_over_markdown() {
850        // If the whole input is valid JSON, use it directly
851        let raw = r#"{"answer": "direct", "confidence": 1.0}"#;
852        let result = parse_flexible::<Answer>(raw).unwrap();
853        assert_eq!(result.source, CandidateSource::Direct);
854    }
855
856    #[test]
857    fn handles_multiple_json_objects_picks_matching() {
858        #[derive(Debug, Deserialize, PartialEq)]
859        struct Config {
860            model: String,
861            temperature: f64,
862        }
863
864        let raw = r#"Here are two objects:
865{"answer": "wrong type", "confidence": 0.5}
866{"model": "gemini", "temperature": 0.3}"#;
867        let result = parse_flexible::<Config>(raw).unwrap();
868        assert_eq!(result.value.model, "gemini");
869    }
870
871    #[test]
872    fn error_shows_all_candidates() {
873        #[derive(Debug, Deserialize)]
874        #[allow(dead_code)]
875        struct Impossible {
876            xyz_field_that_wont_match: i64,
877        }
878
879        let raw = "Just some plain text with no JSON";
880        let err = parse_flexible::<Impossible>(raw).unwrap_err();
881        assert!(err.to_string().contains("Failed to parse"));
882    }
883
884    // --- Edge cases ---
885
886    #[test]
887    fn handles_nested_json() {
888        #[derive(Debug, Deserialize, PartialEq)]
889        struct Nested {
890            outer: Inner,
891        }
892        #[derive(Debug, Deserialize, PartialEq)]
893        struct Inner {
894            value: String,
895        }
896
897        let raw = r#"{"outer": {"value": "deep"}}"#;
898        let result = parse_flexible::<Nested>(raw).unwrap();
899        assert_eq!(result.value.outer.value, "deep");
900    }
901
902    #[test]
903    fn handles_array_response() {
904        let raw = r#"```json
905[{"answer": "one", "confidence": 0.5}, {"answer": "two", "confidence": 0.8}]
906```"#;
907        let result = parse_flexible::<Vec<Answer>>(raw).unwrap();
908        assert_eq!(result.value.len(), 2);
909        assert_eq!(result.value[1].answer, "two");
910    }
911
912    #[test]
913    fn handles_empty_input() {
914        let err = parse_flexible::<Answer>("").unwrap_err();
915        assert!(err.candidates.is_empty() || !err.candidates.is_empty());
916    }
917
918    #[test]
919    fn handles_unclosed_markdown_block() {
920        let raw = r#"```json
921{"answer": "streaming", "confidence": 0.3}
922"#;
923        let result = parse_flexible::<Answer>(raw).unwrap();
924        assert_eq!(result.value.answer, "streaming");
925    }
926
927    // --- Fixing strategies ---
928
929    #[test]
930    fn strip_trailing_commas_works() {
931        assert_eq!(strip_trailing_commas(r#"{"a": 1,}"#), r#"{"a": 1}"#);
932        assert_eq!(strip_trailing_commas(r#"[1, 2,]"#), r#"[1, 2]"#);
933        // Don't strip inside strings
934        assert_eq!(strip_trailing_commas(r#"{"a": "b,"}"#), r#"{"a": "b,"}"#);
935    }
936
937    #[test]
938    fn close_brackets_works() {
939        assert_eq!(close_brackets(r#"{"a": 1"#), r#"{"a": 1}"#);
940        assert_eq!(close_brackets(r#"[1, [2"#), r#"[1, [2]]"#);
941        assert_eq!(close_brackets(r#"{"a": "hello"#), r#"{"a": "hello"}"#);
942    }
943
944    #[test]
945    fn truncation_recovery_drops_incomplete_element() {
946        // Truncated mid-field in an array element — recovery should produce candidates
947        let raw = r#"{"items":[{"id":1,"name":"ok"},{"id":2,"na"#;
948        let candidates = truncation_recovery_candidates(raw);
949        assert!(!candidates.is_empty(), "Should produce recovery candidates");
950        // At least one candidate should have the first complete element
951        let has_valid = candidates.iter().any(|c| {
952            if let Ok(val) = serde_json::from_str::<serde_json::Value>(c) {
953                val["items"]
954                    .as_array()
955                    .is_some_and(|a| !a.is_empty() && a[0]["id"] == 1)
956            } else {
957                false
958            }
959        });
960        assert!(
961            has_valid,
962            "At least one candidate should have first complete element"
963        );
964    }
965
966    #[test]
967    fn truncation_recovery_streaming_action() {
968        // Real-world case: truncated mid-action in NextStep
969        #[derive(Debug, Deserialize)]
970        struct Step {
971            situation: String,
972            actions: Vec<serde_json::Value>,
973        }
974        let raw = r#"{"situation":"working","actions":[{"tool":"read","path":"a.rs"},{"tool":"edit","path":"b.rs","old"#;
975        let result = parse_flexible::<Step>(raw);
976        assert!(result.is_ok(), "Should recover from truncated streaming");
977        let step = result.unwrap().value;
978        assert_eq!(step.situation, "working");
979        // First complete action should survive, truncated second dropped
980        assert!(!step.actions.is_empty());
981    }
982
983    #[test]
984    fn unescape_double_wrapped_json() {
985        #[derive(Debug, Deserialize)]
986        struct Simple {
987            msg: String,
988        }
989
990        let raw = r#""{\"msg\": \"hello world\"}""#;
991        let result = parse_flexible::<Simple>(raw);
992        assert!(result.is_ok(), "Should unescape double-wrapped JSON");
993        assert_eq!(result.unwrap().value.msg, "hello world");
994    }
995
996    #[test]
997    fn unescape_ignores_normal_strings() {
998        // Normal quoted string that is NOT escaped JSON — should NOT be unescaped
999        let result = try_unescape_json_string("\"just a normal string\"");
1000        assert!(result.is_none());
1001    }
1002
1003    #[test]
1004    fn fix_single_quotes_works() {
1005        assert_eq!(fix_single_quotes("{'a': 'b'}"), r#"{"a": "b"}"#);
1006        // Don't touch singles inside double quotes
1007        assert_eq!(
1008            fix_single_quotes(r#"{"it's": "fine"}"#),
1009            r#"{"it's": "fine"}"#
1010        );
1011    }
1012
1013    #[test]
1014    fn strip_comments_works() {
1015        assert_eq!(
1016            strip_comments("{\n// comment\n\"a\": 1\n}"),
1017            "{\n\n\"a\": 1\n}"
1018        );
1019        assert_eq!(strip_comments("{/* block */\"a\": 1}"), "{\"a\": 1}");
1020    }
1021
1022    #[test]
1023    fn extract_markdown_blocks_multiple() {
1024        let raw = r#"First:
1025```json
1026{"a": 1}
1027```
1028Second:
1029```json
1030{"b": 2}
1031```"#;
1032        let blocks = extract_markdown_blocks(raw);
1033        assert_eq!(blocks.len(), 2);
1034    }
1035
1036    #[test]
1037    fn extract_json_objects_finds_multiple() {
1038        let raw = r#"text {"a": 1} middle {"b": 2} end"#;
1039        let objects = extract_json_objects(raw);
1040        assert_eq!(objects.len(), 2);
1041    }
1042
1043    #[test]
1044    fn extract_json_objects_nested_returns_outer() {
1045        let raw = r#"text {"outer": {"inner": 1}} more text"#;
1046        let objects = extract_json_objects(raw);
1047        // Outer matched first; inner is inside matched range so skipped
1048        assert_eq!(objects.len(), 1);
1049        assert!(objects[0].contains("outer"));
1050    }
1051
1052    #[test]
1053    fn collect_candidates_deduplicates() {
1054        let raw = r#"{"answer": "test", "confidence": 0.5}"#;
1055        let candidates = collect_candidates(raw);
1056        // Direct + Grepped should be deduped
1057        let jsons: Vec<&str> = candidates.iter().map(|c| c.json.as_str()).collect();
1058        let unique: std::collections::HashSet<&&str> = jsons.iter().collect();
1059        assert_eq!(jsons.len(), unique.len());
1060    }
1061}