Skip to main content

sgr_agent/
flexible_parser.rs

1//! Flexible JSON parser — extracts structured data from messy LLM output.
2//!
3//! Inspired by BAML's "jsonish" SAP (Schema-Aligned Parsing) approach.
4//! Collects multiple parse candidates (AnyOf), tries to deserialize each
5//! into the target type `T`, returns the first success.
6//!
7//! Parse cascade:
8//! 1. Direct JSON (`serde_json::from_str`)
9//! 2. Markdown code blocks (````json ... ````)
10//! 3. Greedy JSON extraction (first `{...}` or `[...]` in text)
11//! 4. Fixing parser (close brackets, strip trailing commas, unquoted keys)
12//! 5. Fail with all candidates listed
13//!
14//! Works with any model — no structured output API required.
15
16use schemars::JsonSchema;
17use serde::de::DeserializeOwned;
18
19use crate::coerce::coerce_value;
20use crate::schema::response_schema_for;
21
22/// A parse candidate with provenance info for debugging.
23#[derive(Debug, Clone)]
24pub struct Candidate {
25    /// The JSON string to try deserializing.
26    pub json: String,
27    /// How this candidate was extracted.
28    pub source: CandidateSource,
29}
30
31#[derive(Debug, Clone, Copy, PartialEq, Eq)]
32pub enum CandidateSource {
33    /// Direct parse — input was valid JSON.
34    Direct,
35    /// Extracted from a ```json code block.
36    MarkdownBlock,
37    /// Grepped `{...}` or `[...]` from text.
38    Grepped,
39    /// Fixed broken JSON (closed brackets, stripped trailing commas, etc).
40    Fixed,
41}
42
43/// Result of a flexible parse attempt.
44#[derive(Debug)]
45pub struct ParseResult<T> {
46    /// Successfully parsed value.
47    pub value: T,
48    /// Which candidate succeeded.
49    pub source: CandidateSource,
50    /// Total candidates tried.
51    pub candidates_tried: usize,
52}
53
54/// Parse error with all attempted candidates.
55#[derive(Debug)]
56pub struct ParseError {
57    /// All candidates that were tried.
58    pub candidates: Vec<(Candidate, String)>,
59    /// Original raw text.
60    pub raw: String,
61}
62
63impl std::fmt::Display for ParseError {
64    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
65        write!(
66            f,
67            "Failed to parse into target type. {} candidates tried",
68            self.candidates.len()
69        )?;
70        for (i, (candidate, err)) in self.candidates.iter().enumerate() {
71            write!(
72                f,
73                "\n  [{i}] {:?}: {}",
74                candidate.source,
75                truncate(err, 100)
76            )?;
77        }
78        Ok(())
79    }
80}
81
82impl std::error::Error for ParseError {}
83
84/// Parse raw LLM output into type `T` using the AnyOf cascade.
85///
86/// Tries multiple extraction strategies, returns the first successful parse.
87pub fn parse_flexible<T: DeserializeOwned>(raw: &str) -> Result<ParseResult<T>, ParseError> {
88    let candidates = collect_candidates(raw);
89    let mut errors = Vec::new();
90
91    for candidate in &candidates {
92        match serde_json::from_str::<T>(&candidate.json) {
93            Ok(value) => {
94                return Ok(ParseResult {
95                    value,
96                    source: candidate.source,
97                    candidates_tried: errors.len() + 1,
98                });
99            }
100            Err(e) => {
101                errors.push((candidate.clone(), e.to_string()));
102            }
103        }
104    }
105
106    Err(ParseError {
107        candidates: errors,
108        raw: raw.to_string(),
109    })
110}
111
112/// Parse with schema-aware coercion: "42" → 42, "true" → true, "redd" → "Red".
113///
114/// First tries `parse_flexible` (strict serde). If all candidates fail,
115/// retries each candidate with coercion applied before deserialization.
116pub fn parse_flexible_coerced<T: JsonSchema + DeserializeOwned>(
117    raw: &str,
118) -> Result<ParseResult<T>, ParseError> {
119    // Try strict first — no coercion overhead if JSON is clean
120    if let Ok(result) = parse_flexible::<T>(raw) {
121        return Ok(result);
122    }
123
124    // Retry with coercion
125    let candidates = collect_candidates(raw);
126    let schema = response_schema_for::<T>();
127    let mut errors = Vec::new();
128
129    for candidate in &candidates {
130        // Parse to Value, coerce, then deserialize
131        if let Ok(mut value) = serde_json::from_str::<serde_json::Value>(&candidate.json) {
132            coerce_value(&mut value, &schema);
133            match serde_json::from_value::<T>(value) {
134                Ok(parsed) => {
135                    return Ok(ParseResult {
136                        value: parsed,
137                        source: candidate.source,
138                        candidates_tried: errors.len() + 1,
139                    });
140                }
141                Err(e) => {
142                    errors.push((candidate.clone(), format!("coerced: {}", e)));
143                }
144            }
145        } else {
146            errors.push((candidate.clone(), "invalid JSON even for Value".into()));
147        }
148    }
149
150    Err(ParseError {
151        candidates: errors,
152        raw: raw.to_string(),
153    })
154}
155
156/// Collect all parse candidates from raw text (AnyOf pattern).
157pub fn collect_candidates(raw: &str) -> Vec<Candidate> {
158    let mut candidates = Vec::new();
159
160    // 0. Unescape double-wrapped JSON string: "{ \"key\": ... }" → { "key": ... }
161    let effective = try_unescape_json_string(raw).unwrap_or_else(|| raw.to_string());
162    let raw = effective.as_str();
163
164    // 1. Direct JSON parse
165    if looks_like_json(raw) {
166        candidates.push(Candidate {
167            json: raw.to_string(),
168            source: CandidateSource::Direct,
169        });
170    }
171
172    // 2. Markdown code blocks
173    for block in extract_markdown_blocks(raw) {
174        candidates.push(Candidate {
175            json: block,
176            source: CandidateSource::MarkdownBlock,
177        });
178    }
179
180    // 3. Greedy JSON extraction
181    for json in extract_json_objects(raw) {
182        // Skip if we already have this exact string as a candidate
183        if !candidates.iter().any(|c| c.json == json) {
184            candidates.push(Candidate {
185                json,
186                source: CandidateSource::Grepped,
187            });
188        }
189    }
190
191    // 4. Try fixing each candidate that failed
192    let fixable: Vec<String> = candidates.iter().map(|c| c.json.clone()).collect();
193    for json in &fixable {
194        if let Some(fixed) = try_fix_json(json) {
195            if !candidates.iter().any(|c| c.json == fixed) {
196                candidates.push(Candidate {
197                    json: fixed,
198                    source: CandidateSource::Fixed,
199                });
200            }
201        }
202    }
203
204    // Also try fixing the raw input directly if no candidates yet
205    if candidates.is_empty()
206        || !candidates
207            .iter()
208            .any(|c| c.source == CandidateSource::Direct)
209    {
210        if let Some(fixed) = try_fix_json(raw) {
211            if !candidates.iter().any(|c| c.json == fixed) {
212                candidates.push(Candidate {
213                    json: fixed,
214                    source: CandidateSource::Fixed,
215                });
216            }
217        }
218    }
219
220    // 5. Truncation recovery — try progressively aggressive cuts for streaming
221    // (only if no Fixed candidate parsed as valid Value with all required fields)
222    for json_source in [raw]
223        .iter()
224        .chain(fixable.iter().map(|s| s as &str).collect::<Vec<_>>().iter())
225    {
226        for recovered in truncation_recovery_candidates(json_source) {
227            if !candidates.iter().any(|c| c.json == recovered) {
228                candidates.push(Candidate {
229                    json: recovered,
230                    source: CandidateSource::Fixed,
231                });
232            }
233        }
234    }
235
236    candidates
237}
238
239// ============================================================================
240// Extraction strategies
241// ============================================================================
242
243/// Extract JSON from markdown code blocks: ```json\n...\n``` or ```\n...\n```
244fn extract_markdown_blocks(text: &str) -> Vec<String> {
245    let mut blocks = Vec::new();
246    let mut rest = text;
247
248    while let Some(start) = rest.find("```") {
249        let after_ticks = &rest[start + 3..];
250
251        // Skip optional language tag (e.g., "json", "JSON", "jsonc")
252        let content_start = if let Some(newline) = after_ticks.find('\n') {
253            newline + 1
254        } else {
255            break;
256        };
257        let content = &after_ticks[content_start..];
258
259        // Find closing ```
260        if let Some(end) = content.find("```") {
261            let block = content[..end].trim();
262            if !block.is_empty() && looks_like_json(block) {
263                blocks.push(block.to_string());
264            }
265            rest = &content[end + 3..];
266        } else {
267            // Unclosed code block — try to parse what we have
268            let block = content.trim();
269            if !block.is_empty() && looks_like_json(block) {
270                blocks.push(block.to_string());
271            }
272            break;
273        }
274    }
275
276    blocks
277}
278
279/// Find JSON objects `{...}` and arrays `[...]` in text using bracket matching.
280fn extract_json_objects(text: &str) -> Vec<String> {
281    let mut results = Vec::new();
282
283    for open in ['{', '['] {
284        let close = if open == '{' { '}' } else { ']' };
285        let mut search_from = 0;
286
287        while let Some(start) = text[search_from..].find(open) {
288            let abs_start = search_from + start;
289            if let Some(end) = find_matching_bracket(text, abs_start, open, close) {
290                let json = &text[abs_start..=end];
291                if !results.contains(&json.to_string()) {
292                    results.push(json.to_string());
293                }
294                search_from = end + 1;
295            } else {
296                // No matching bracket — try with auto-close
297                search_from = abs_start + 1;
298            }
299        }
300    }
301
302    results
303}
304
305/// Find the matching closing bracket, respecting nesting and strings.
306fn find_matching_bracket(text: &str, start: usize, open: char, close: char) -> Option<usize> {
307    let bytes = text.as_bytes();
308    let mut depth = 0i32;
309    let mut in_string = false;
310    let mut escape_next = false;
311    let mut i = start;
312
313    while i < bytes.len() {
314        let ch = bytes[i] as char;
315
316        if escape_next {
317            escape_next = false;
318            i += 1;
319            continue;
320        }
321
322        if ch == '\\' && in_string {
323            escape_next = true;
324            i += 1;
325            continue;
326        }
327
328        if ch == '"' {
329            in_string = !in_string;
330            i += 1;
331            continue;
332        }
333
334        if !in_string {
335            if ch == open {
336                depth += 1;
337            } else if ch == close {
338                depth -= 1;
339                if depth == 0 {
340                    return Some(i);
341                }
342            }
343        }
344
345        i += 1;
346    }
347
348    None
349}
350
351// ============================================================================
352// JSON fixing
353// ============================================================================
354
355/// Try to fix common JSON errors. Returns None if unfixable.
356fn try_fix_json(raw: &str) -> Option<String> {
357    let trimmed = raw.trim();
358
359    // Already valid? No fix needed.
360    if serde_json::from_str::<serde_json::Value>(trimmed).is_ok() {
361        return None;
362    }
363
364    let mut fixed = trimmed.to_string();
365    let mut changed = false;
366
367    // Fix 1: Strip trailing commas before } or ]
368    let re_trailing = strip_trailing_commas(&fixed);
369    if re_trailing != fixed {
370        fixed = re_trailing;
371        changed = true;
372    }
373
374    // Fix 2: Close unclosed brackets/braces
375    let closed = close_brackets(&fixed);
376    if closed != fixed {
377        fixed = closed;
378        changed = true;
379    }
380
381    // Fix 3: Single quotes → double quotes (outside of double-quoted strings)
382    let quoted = fix_single_quotes(&fixed);
383    if quoted != fixed {
384        fixed = quoted;
385        changed = true;
386    }
387
388    // Fix 4: Strip JS-style comments (// and /* */)
389    let uncommented = strip_comments(&fixed);
390    if uncommented != fixed {
391        fixed = uncommented;
392        changed = true;
393    }
394
395    // Verify the fix actually produces valid JSON
396    if changed && serde_json::from_str::<serde_json::Value>(&fixed).is_ok() {
397        Some(fixed)
398    } else {
399        None
400    }
401}
402
403/// Strip trailing commas: `{a: 1,}` → `{a: 1}`
404fn strip_trailing_commas(s: &str) -> String {
405    let mut result = String::with_capacity(s.len());
406    let chars: Vec<char> = s.chars().collect();
407    let mut i = 0;
408
409    while i < chars.len() {
410        if chars[i] == '"' {
411            // Skip strings
412            result.push(chars[i]);
413            i += 1;
414            while i < chars.len() {
415                result.push(chars[i]);
416                if chars[i] == '\\' && i + 1 < chars.len() {
417                    i += 1;
418                    result.push(chars[i]);
419                } else if chars[i] == '"' {
420                    break;
421                }
422                i += 1;
423            }
424            i += 1;
425            continue;
426        }
427
428        if chars[i] == ',' {
429            // Look ahead for ] or } (skipping whitespace)
430            let mut j = i + 1;
431            while j < chars.len() && chars[j].is_whitespace() {
432                j += 1;
433            }
434            if j < chars.len() && (chars[j] == '}' || chars[j] == ']') {
435                // Skip the trailing comma
436                i += 1;
437                continue;
438            }
439        }
440
441        result.push(chars[i]);
442        i += 1;
443    }
444
445    result
446}
447
448/// Close unclosed brackets: `{"a": [1, 2` → `{"a": [1, 2]}`
449///
450/// Also handles streaming truncation: if truncated mid-value inside an array/object,
451/// drops the incomplete element and closes brackets (like BAML's partial parse).
452fn close_brackets(s: &str) -> String {
453    let mut stack = Vec::new();
454    let mut in_string = false;
455    let mut escape_next = false;
456
457    for ch in s.chars() {
458        if escape_next {
459            escape_next = false;
460            continue;
461        }
462        if ch == '\\' && in_string {
463            escape_next = true;
464            continue;
465        }
466        if ch == '"' {
467            in_string = !in_string;
468            continue;
469        }
470        if !in_string {
471            match ch {
472                '{' => stack.push('}'),
473                '[' => stack.push(']'),
474                '}' | ']' => {
475                    stack.pop();
476                }
477                _ => {}
478            }
479        }
480    }
481
482    // If not truncated (balanced), nothing to do
483    if stack.is_empty() && !in_string {
484        return s.to_string();
485    }
486
487    // Close unclosed string
488    let mut result = s.to_string();
489    if in_string {
490        result.push('"');
491    }
492
493    // Close brackets in reverse order
494    while let Some(close) = stack.pop() {
495        result.push(close);
496    }
497
498    result
499}
500
501/// Truncation recovery: find cut points and generate multiple candidates.
502///
503/// For `{"a":[{"b":1},{"c":2,"d` generates:
504/// - Cut at inner comma: `{"a":[{"b":1},{"c":2}]}` (partial element)
505/// - Cut at outer comma: `{"a":[{"b":1}]}` (drop incomplete element)
506///
507/// Returns all valid JSON candidates, most aggressive cut last (so AnyOf tries
508/// the most complete version first).
509fn truncation_recovery_candidates(s: &str) -> Vec<String> {
510    // Collect all cut points: commas and closing brackets (outside strings)
511    // Use byte positions (not char indices) for correct slicing with Unicode
512    let mut cut_points = Vec::new();
513    let mut in_string = false;
514    let mut escape_next = false;
515
516    for (byte_pos, ch) in s.char_indices() {
517        if escape_next {
518            escape_next = false;
519            continue;
520        }
521        if ch == '\\' && in_string {
522            escape_next = true;
523            continue;
524        }
525        if ch == '"' {
526            in_string = !in_string;
527            continue;
528        }
529        if in_string {
530            continue;
531        }
532        match ch {
533            ',' => cut_points.push(byte_pos),
534            '}' | ']' => cut_points.push(byte_pos + 1),
535            _ => {}
536        }
537    }
538
539    // Try cuts from rightmost (most data kept) to leftmost (most data dropped)
540    let mut results = Vec::new();
541    for &cut in cut_points.iter().rev() {
542        if cut == 0 || cut >= s.len() {
543            continue;
544        }
545        if let Some(candidate) = try_close_at(s, cut) {
546            if !results.contains(&candidate) {
547                results.push(candidate);
548            }
549        }
550    }
551
552    results
553}
554
555/// Try cutting the string at `pos` and closing all open brackets.
556fn try_close_at(s: &str, pos: usize) -> Option<String> {
557    let mut truncated = s[..pos].trim_end().to_string();
558
559    // Strip trailing comma
560    if truncated.ends_with(',') {
561        truncated.pop();
562    }
563
564    // Close open brackets
565    let mut stack = Vec::new();
566    let mut in_str = false;
567    let mut esc = false;
568    for ch in truncated.chars() {
569        if esc {
570            esc = false;
571            continue;
572        }
573        if ch == '\\' && in_str {
574            esc = true;
575            continue;
576        }
577        if ch == '"' {
578            in_str = !in_str;
579            continue;
580        }
581        if !in_str {
582            match ch {
583                '{' => stack.push('}'),
584                '[' => stack.push(']'),
585                '}' | ']' => {
586                    stack.pop();
587                }
588                _ => {}
589            }
590        }
591    }
592    if in_str {
593        truncated.push('"');
594    }
595    while let Some(close) = stack.pop() {
596        truncated.push(close);
597    }
598
599    if serde_json::from_str::<serde_json::Value>(&truncated).is_ok() {
600        Some(truncated)
601    } else {
602        None
603    }
604}
605
606/// Convert single-quoted strings to double-quoted (outside existing double quotes).
607fn fix_single_quotes(s: &str) -> String {
608    let mut result = String::with_capacity(s.len());
609    let mut in_double = false;
610    let mut escape_next = false;
611
612    for ch in s.chars() {
613        if escape_next {
614            result.push(ch);
615            escape_next = false;
616            continue;
617        }
618        if ch == '\\' {
619            result.push(ch);
620            if in_double {
621                escape_next = true;
622            }
623            continue;
624        }
625        if ch == '"' {
626            in_double = !in_double;
627            result.push(ch);
628            continue;
629        }
630        if ch == '\'' && !in_double {
631            result.push('"');
632        } else {
633            result.push(ch);
634        }
635    }
636
637    result
638}
639
640/// Strip JS-style comments (// line and /* block */).
641fn strip_comments(s: &str) -> String {
642    let mut result = String::with_capacity(s.len());
643    let chars: Vec<char> = s.chars().collect();
644    let mut i = 0;
645    let mut in_string = false;
646
647    while i < chars.len() {
648        if in_string {
649            result.push(chars[i]);
650            if chars[i] == '\\' && i + 1 < chars.len() {
651                i += 1;
652                result.push(chars[i]);
653            } else if chars[i] == '"' {
654                in_string = false;
655            }
656            i += 1;
657            continue;
658        }
659
660        if chars[i] == '"' {
661            in_string = true;
662            result.push(chars[i]);
663            i += 1;
664            continue;
665        }
666
667        if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '/' {
668            // Skip to end of line
669            while i < chars.len() && chars[i] != '\n' {
670                i += 1;
671            }
672            continue;
673        }
674
675        if i + 1 < chars.len() && chars[i] == '/' && chars[i + 1] == '*' {
676            i += 2;
677            while i + 1 < chars.len() && !(chars[i] == '*' && chars[i + 1] == '/') {
678                i += 1;
679            }
680            i += 2; // skip */
681            continue;
682        }
683
684        result.push(chars[i]);
685        i += 1;
686    }
687
688    result
689}
690
691// ============================================================================
692// Helpers
693// ============================================================================
694
695/// Try to unescape a double-wrapped JSON string.
696///
697/// Some models output JSON as a string literal: `"{ \"key\": \"value\" }"`
698/// This detects and unescapes it back to `{ "key": "value" }`.
699fn try_unescape_json_string(raw: &str) -> Option<String> {
700    let trimmed = raw.trim();
701    // Must start and end with quotes
702    if !trimmed.starts_with('"') || !trimmed.ends_with('"') || trimmed.len() < 3 {
703        return None;
704    }
705    // Inner content must look like escaped JSON (contains \")
706    let inner = &trimmed[1..trimmed.len() - 1];
707    if !inner.contains("\\\"") {
708        return None;
709    }
710    // Try to parse as a JSON string, which gives us the unescaped content
711    match serde_json::from_str::<String>(trimmed) {
712        Ok(unescaped) if looks_like_json(&unescaped) => Some(unescaped),
713        _ => None,
714    }
715}
716
717fn looks_like_json(s: &str) -> bool {
718    let trimmed = s.trim();
719    (trimmed.starts_with('{') && trimmed.ends_with('}'))
720        || (trimmed.starts_with('[') && trimmed.ends_with(']'))
721        || trimmed == "null"
722        || trimmed == "true"
723        || trimmed == "false"
724        || trimmed.starts_with('"')
725}
726
727fn truncate(s: &str, max: usize) -> &str {
728    if s.len() <= max {
729        s
730    } else {
731        &s[..s.floor_char_boundary(max)]
732    }
733}
734
735// ============================================================================
736// Tests
737// ============================================================================
738
739#[cfg(test)]
740mod tests {
741    use super::*;
742    use serde::Deserialize;
743
744    #[derive(Debug, Deserialize, PartialEq)]
745    struct Answer {
746        answer: String,
747        confidence: f64,
748    }
749
750    // --- Direct JSON ---
751
752    #[test]
753    fn parses_clean_json() {
754        let raw = r#"{"answer": "42", "confidence": 0.95}"#;
755        let result = parse_flexible::<Answer>(raw).unwrap();
756        assert_eq!(result.value.answer, "42");
757        assert_eq!(result.source, CandidateSource::Direct);
758    }
759
760    // --- Markdown blocks ---
761
762    #[test]
763    fn parses_from_markdown_block() {
764        let raw = r#"Here's my answer:
765
766```json
767{"answer": "hello", "confidence": 0.8}
768```
769
770Hope that helps!"#;
771        let result = parse_flexible::<Answer>(raw).unwrap();
772        assert_eq!(result.value.answer, "hello");
773        assert_eq!(result.source, CandidateSource::MarkdownBlock);
774    }
775
776    #[test]
777    fn parses_from_unlabeled_markdown_block() {
778        let raw = r#"Sure:
779
780```
781{"answer": "test", "confidence": 0.5}
782```"#;
783        let result = parse_flexible::<Answer>(raw).unwrap();
784        assert_eq!(result.value.answer, "test");
785        assert_eq!(result.source, CandidateSource::MarkdownBlock);
786    }
787
788    // --- Grepped JSON ---
789
790    #[test]
791    fn extracts_json_from_surrounding_text() {
792        let raw =
793            r#"I think the answer is {"answer": "yes", "confidence": 0.9} based on my analysis."#;
794        let result = parse_flexible::<Answer>(raw).unwrap();
795        assert_eq!(result.value.answer, "yes");
796        assert_eq!(result.source, CandidateSource::Grepped);
797    }
798
799    #[test]
800    fn extracts_json_after_chain_of_thought() {
801        let raw = r#"Let me think step by step...
802First, I need to consider the question carefully.
803The answer seems clear.
804
805{"answer": "deep thought", "confidence": 0.99}"#;
806        let result = parse_flexible::<Answer>(raw).unwrap();
807        assert_eq!(result.value.answer, "deep thought");
808    }
809
810    // --- Fixed JSON ---
811
812    #[test]
813    fn fixes_trailing_comma() {
814        let raw = r#"{"answer": "fixed", "confidence": 0.7,}"#;
815        let result = parse_flexible::<Answer>(raw).unwrap();
816        assert_eq!(result.value.answer, "fixed");
817        assert_eq!(result.source, CandidateSource::Fixed);
818    }
819
820    #[test]
821    fn fixes_unclosed_brackets() {
822        let raw = r#"{"answer": "partial", "confidence": 0.6"#;
823        let result = parse_flexible::<Answer>(raw).unwrap();
824        assert_eq!(result.value.answer, "partial");
825        assert_eq!(result.source, CandidateSource::Fixed);
826    }
827
828    #[test]
829    fn fixes_single_quotes() {
830        let raw = r#"{'answer': 'quoted', 'confidence': 0.5}"#;
831        let result = parse_flexible::<Answer>(raw).unwrap();
832        assert_eq!(result.value.answer, "quoted");
833        assert_eq!(result.source, CandidateSource::Fixed);
834    }
835
836    #[test]
837    fn fixes_js_comments() {
838        let raw = r#"{
839            // This is the answer
840            "answer": "commented",
841            "confidence": 0.4
842        }"#;
843        let result = parse_flexible::<Answer>(raw).unwrap();
844        assert_eq!(result.value.answer, "commented");
845        assert_eq!(result.source, CandidateSource::Fixed);
846    }
847
848    // --- Combined scenarios ---
849
850    #[test]
851    fn prefers_direct_over_markdown() {
852        // If the whole input is valid JSON, use it directly
853        let raw = r#"{"answer": "direct", "confidence": 1.0}"#;
854        let result = parse_flexible::<Answer>(raw).unwrap();
855        assert_eq!(result.source, CandidateSource::Direct);
856    }
857
858    #[test]
859    fn handles_multiple_json_objects_picks_matching() {
860        #[derive(Debug, Deserialize, PartialEq)]
861        struct Config {
862            model: String,
863            temperature: f64,
864        }
865
866        let raw = r#"Here are two objects:
867{"answer": "wrong type", "confidence": 0.5}
868{"model": "gemini", "temperature": 0.3}"#;
869        let result = parse_flexible::<Config>(raw).unwrap();
870        assert_eq!(result.value.model, "gemini");
871    }
872
873    #[test]
874    fn error_shows_all_candidates() {
875        #[derive(Debug, Deserialize)]
876        #[allow(dead_code)]
877        struct Impossible {
878            xyz_field_that_wont_match: i64,
879        }
880
881        let raw = "Just some plain text with no JSON";
882        let err = parse_flexible::<Impossible>(raw).unwrap_err();
883        assert!(err.to_string().contains("Failed to parse"));
884    }
885
886    // --- Edge cases ---
887
888    #[test]
889    fn handles_nested_json() {
890        #[derive(Debug, Deserialize, PartialEq)]
891        struct Nested {
892            outer: Inner,
893        }
894        #[derive(Debug, Deserialize, PartialEq)]
895        struct Inner {
896            value: String,
897        }
898
899        let raw = r#"{"outer": {"value": "deep"}}"#;
900        let result = parse_flexible::<Nested>(raw).unwrap();
901        assert_eq!(result.value.outer.value, "deep");
902    }
903
904    #[test]
905    fn handles_array_response() {
906        let raw = r#"```json
907[{"answer": "one", "confidence": 0.5}, {"answer": "two", "confidence": 0.8}]
908```"#;
909        let result = parse_flexible::<Vec<Answer>>(raw).unwrap();
910        assert_eq!(result.value.len(), 2);
911        assert_eq!(result.value[1].answer, "two");
912    }
913
914    #[test]
915    fn handles_empty_input() {
916        let err = parse_flexible::<Answer>("").unwrap_err();
917        assert!(err.candidates.is_empty() || !err.candidates.is_empty());
918    }
919
920    #[test]
921    fn handles_unclosed_markdown_block() {
922        let raw = r#"```json
923{"answer": "streaming", "confidence": 0.3}
924"#;
925        let result = parse_flexible::<Answer>(raw).unwrap();
926        assert_eq!(result.value.answer, "streaming");
927    }
928
929    // --- Fixing strategies ---
930
931    #[test]
932    fn strip_trailing_commas_works() {
933        assert_eq!(strip_trailing_commas(r#"{"a": 1,}"#), r#"{"a": 1}"#);
934        assert_eq!(strip_trailing_commas(r#"[1, 2,]"#), r#"[1, 2]"#);
935        // Don't strip inside strings
936        assert_eq!(strip_trailing_commas(r#"{"a": "b,"}"#), r#"{"a": "b,"}"#);
937    }
938
939    #[test]
940    fn close_brackets_works() {
941        assert_eq!(close_brackets(r#"{"a": 1"#), r#"{"a": 1}"#);
942        assert_eq!(close_brackets(r#"[1, [2"#), r#"[1, [2]]"#);
943        assert_eq!(close_brackets(r#"{"a": "hello"#), r#"{"a": "hello"}"#);
944    }
945
946    #[test]
947    fn truncation_recovery_drops_incomplete_element() {
948        // Truncated mid-field in an array element — recovery should produce candidates
949        let raw = r#"{"items":[{"id":1,"name":"ok"},{"id":2,"na"#;
950        let candidates = truncation_recovery_candidates(raw);
951        assert!(!candidates.is_empty(), "Should produce recovery candidates");
952        // At least one candidate should have the first complete element
953        let has_valid = candidates.iter().any(|c| {
954            if let Ok(val) = serde_json::from_str::<serde_json::Value>(c) {
955                val["items"]
956                    .as_array()
957                    .is_some_and(|a| !a.is_empty() && a[0]["id"] == 1)
958            } else {
959                false
960            }
961        });
962        assert!(
963            has_valid,
964            "At least one candidate should have first complete element"
965        );
966    }
967
968    #[test]
969    fn truncation_recovery_streaming_action() {
970        // Real-world case: truncated mid-action in NextStep
971        #[derive(Debug, Deserialize)]
972        struct Step {
973            situation: String,
974            actions: Vec<serde_json::Value>,
975        }
976        let raw = r#"{"situation":"working","actions":[{"tool":"read","path":"a.rs"},{"tool":"edit","path":"b.rs","old"#;
977        let result = parse_flexible::<Step>(raw);
978        assert!(result.is_ok(), "Should recover from truncated streaming");
979        let step = result.unwrap().value;
980        assert_eq!(step.situation, "working");
981        // First complete action should survive, truncated second dropped
982        assert!(!step.actions.is_empty());
983    }
984
985    #[test]
986    fn unescape_double_wrapped_json() {
987        #[derive(Debug, Deserialize)]
988        struct Simple {
989            msg: String,
990        }
991
992        let raw = r#""{\"msg\": \"hello world\"}""#;
993        let result = parse_flexible::<Simple>(raw);
994        assert!(result.is_ok(), "Should unescape double-wrapped JSON");
995        assert_eq!(result.unwrap().value.msg, "hello world");
996    }
997
998    #[test]
999    fn unescape_ignores_normal_strings() {
1000        // Normal quoted string that is NOT escaped JSON — should NOT be unescaped
1001        let result = try_unescape_json_string("\"just a normal string\"");
1002        assert!(result.is_none());
1003    }
1004
1005    #[test]
1006    fn fix_single_quotes_works() {
1007        assert_eq!(fix_single_quotes("{'a': 'b'}"), r#"{"a": "b"}"#);
1008        // Don't touch singles inside double quotes
1009        assert_eq!(
1010            fix_single_quotes(r#"{"it's": "fine"}"#),
1011            r#"{"it's": "fine"}"#
1012        );
1013    }
1014
1015    #[test]
1016    fn strip_comments_works() {
1017        assert_eq!(
1018            strip_comments("{\n// comment\n\"a\": 1\n}"),
1019            "{\n\n\"a\": 1\n}"
1020        );
1021        assert_eq!(strip_comments("{/* block */\"a\": 1}"), "{\"a\": 1}");
1022    }
1023
1024    #[test]
1025    fn extract_markdown_blocks_multiple() {
1026        let raw = r#"First:
1027```json
1028{"a": 1}
1029```
1030Second:
1031```json
1032{"b": 2}
1033```"#;
1034        let blocks = extract_markdown_blocks(raw);
1035        assert_eq!(blocks.len(), 2);
1036    }
1037
1038    #[test]
1039    fn extract_json_objects_finds_multiple() {
1040        let raw = r#"text {"a": 1} middle {"b": 2} end"#;
1041        let objects = extract_json_objects(raw);
1042        assert_eq!(objects.len(), 2);
1043    }
1044
1045    #[test]
1046    fn extract_json_objects_nested_returns_outer() {
1047        let raw = r#"text {"outer": {"inner": 1}} more text"#;
1048        let objects = extract_json_objects(raw);
1049        // Outer matched first; inner is inside matched range so skipped
1050        assert_eq!(objects.len(), 1);
1051        assert!(objects[0].contains("outer"));
1052    }
1053
1054    #[test]
1055    fn collect_candidates_deduplicates() {
1056        let raw = r#"{"answer": "test", "confidence": 0.5}"#;
1057        let candidates = collect_candidates(raw);
1058        // Direct + Grepped should be deduped
1059        let jsons: Vec<&str> = candidates.iter().map(|c| c.json.as_str()).collect();
1060        let unique: std::collections::HashSet<&&str> = jsons.iter().collect();
1061        assert_eq!(jsons.len(), unique.len());
1062    }
1063}