chill_json/
lib.rs

1use serde_json::Value;
2use std::fmt::Debug;
3use thiserror::Error;
4
5#[derive(Error, Debug)]
6pub enum FuzzyJsonError {
7    #[error("Invalid JSON at position {pos}: {msg}")]
8    ParseError { pos: usize, msg: String },
9    #[error("Repair failed: {0}")]
10    RepairFailed(String),
11    #[error("JSON error: {0}")]
12    JsonError(#[from] serde_json::Error),
13}
14
15#[derive(Debug, Clone, PartialEq)]
16pub enum JsonContext {
17    Root,
18    Object,
19    Array,
20    Property,
21    Value,
22    Colon,
23}
24
25#[derive(Debug, Clone)]
26pub struct ParseState {
27    pub input: String,
28    pub position: usize,
29    pub stack: Vec<JsonContext>,
30    pub output: String,
31}
32
33impl ParseState {
34    pub fn new(input: String) -> Self {
35        Self {
36            input,
37            position: 0,
38            stack: vec![JsonContext::Root],
39            output: String::new(),
40        }
41    }
42
43    pub fn current_char(&self) -> Option<char> {
44        self.input.chars().nth(self.position)
45    }
46
47    pub fn peek_chars(&self, count: usize) -> String {
48        self.input.chars().skip(self.position).take(count).collect()
49    }
50
51    pub fn advance(&mut self, count: usize) -> String {
52        let chars: String = self.input.chars().skip(self.position).take(count).collect();
53        self.position += count;
54        chars
55    }
56
57    pub fn remaining(&self) -> &str {
58        match self
59            .input
60            .char_indices()
61            .nth(self.position)
62            .map(|(idx, _)| idx)
63        {
64            Some(start_byte) => &self.input[start_byte..],
65            None => "",
66        }
67    }
68
69    pub fn is_finished(&self) -> bool {
70        self.position >= self.input.chars().count()
71    }
72
73    pub fn current_context(&self) -> &JsonContext {
74        self.stack.last().unwrap_or(&JsonContext::Root)
75    }
76
77    pub fn push_context(&mut self, context: JsonContext) {
78        self.stack.push(context);
79    }
80
81    pub fn pop_context(&mut self) -> Option<JsonContext> {
82        if self.stack.len() > 1 {
83            self.stack.pop()
84        } else {
85            None
86        }
87    }
88}
89
90pub trait RepairStrategy: Send + Sync + Debug {
91    fn name(&self) -> &'static str;
92    fn can_repair(&self, state: &ParseState, error: &str) -> bool;
93    fn repair(&self, state: &mut ParseState, error: &str) -> Result<(), FuzzyJsonError>;
94    fn priority(&self) -> u8; // Higher priority strategies are tried first
95}
96
97pub trait StateHandler: Send + Sync + Debug {
98    fn can_handle(&self, state: &ParseState) -> bool;
99    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError>; // Returns true if parsing should continue
100}
101
102#[derive(Default, Debug)]
103pub struct FuzzyJsonParser {
104    repair_strategies: Vec<Box<dyn RepairStrategy>>,
105    state_handlers: Vec<Box<dyn StateHandler>>,
106    options: ParserOptions,
107}
108
109#[derive(Debug, Clone)]
110pub struct ParserOptions {
111    pub auto_repair: bool,
112    pub allow_trailing_commas: bool,
113    pub allow_comments: bool,
114    pub allow_single_quotes: bool,
115    pub allow_unquoted_keys: bool,
116    pub max_repair_attempts: usize,
117    pub strict_mode: bool,
118    pub aggressive_truncation_repair: bool, // New option for LLM truncation handling
119}
120
121impl Default for ParserOptions {
122    fn default() -> Self {
123        Self {
124            auto_repair: true,
125            allow_trailing_commas: true,
126            allow_comments: true,
127            allow_single_quotes: true,
128            allow_unquoted_keys: false,
129            max_repair_attempts: 1500,
130            strict_mode: false,
131            aggressive_truncation_repair: true, // Enable by default for LLM responses
132        }
133    }
134}
135
136impl FuzzyJsonParser {
137    pub fn new() -> Self {
138        let mut parser = Self::default();
139        parser.register_default_strategies();
140        parser.register_default_handlers();
141        parser
142    }
143
144    /*
145    /// Parse with aggressive scope closing for truncated LLM responses
146    pub fn parse_with_auto_close<T>(&self, json_str: &str) -> Result<T, FuzzyJsonError>
147    where
148        T: serde::de::DeserializeOwned,
149    {
150        // First try normal parsing
151        match self.parse_value(json_str) {
152            Ok(value) => return serde_json::from_value(value).map_err(FuzzyJsonError::JsonError),
153            Err(_) => {
154                // Try with aggressive scope closing
155                // println!("Try with aggressive scope closing");
156                let closed_json = self.aggressively_close_scopes(json_str)?;
157                let value = self.parse_value(&closed_json)?;
158                serde_json::from_value(value).map_err(FuzzyJsonError::JsonError)
159            }
160        }
161    }*/
162
163    /// Aggressively close all unclosed scopes in potentially truncated JSON
164    pub fn aggressively_close_scopes(&self, json_str: &str) -> Result<String, FuzzyJsonError> {
165        if !self.options.aggressive_truncation_repair {
166            return Ok(json_str.to_string());
167        }
168        let mut state = ParseState::new(json_str.trim().to_string());
169        let mut in_string = false;
170        let mut string_quote_char = '"';
171        let mut escape_next = false;
172
173        // Track unclosed scopes with their positions for better error reporting
174        let mut scope_stack: Vec<(JsonContext, usize)> = vec![(JsonContext::Root, 0)];
175
176        while !state.is_finished() {
177            let ch = match state.current_char() {
178                Some(c) => c,
179                None => break,
180            };
181
182            if escape_next {
183                state.output.push(ch);
184                state.advance(1);
185                escape_next = false;
186                continue;
187            }
188
189            match ch {
190                '\\' if in_string => {
191                    state.output.push(ch);
192                    state.advance(1);
193                    escape_next = true;
194                }
195                '"' | '\'' if !in_string => {
196                    in_string = true;
197                    string_quote_char = ch;
198                    state.output.push(ch);
199                    state.advance(1);
200                }
201                c if in_string && c == string_quote_char => {
202                    in_string = false;
203                    state.output.push(ch);
204                    state.advance(1);
205                }
206                '{' if !in_string => {
207                    scope_stack.push((JsonContext::Object, state.position));
208                    state.output.push(ch);
209                    state.advance(1);
210                }
211                '[' if !in_string => {
212                    scope_stack.push((JsonContext::Array, state.position));
213                    state.output.push(ch);
214                    state.advance(1);
215                }
216                '}' if !in_string => {
217                    if let Some((JsonContext::Object, _)) = scope_stack.last() {
218                        scope_stack.pop();
219                    }
220                    state.output.push(ch);
221                    state.advance(1);
222                }
223                ']' if !in_string => {
224                    if let Some((JsonContext::Array, _)) = scope_stack.last() {
225                        scope_stack.pop();
226                    }
227                    state.output.push(ch);
228                    state.advance(1);
229                }
230                _ => {
231                    state.output.push(ch);
232                    state.advance(1);
233                }
234            }
235        }
236
237        // Now aggressively close all unclosed scopes
238        self.close_remaining_scopes(&mut state, in_string, string_quote_char, scope_stack)?;
239
240        Ok(state.output)
241    }
242
243    fn close_remaining_scopes(
244        &self,
245        state: &mut ParseState,
246        in_string: bool,
247        string_quote_char: char,
248        mut scope_stack: Vec<(JsonContext, usize)>,
249    ) -> Result<(), FuzzyJsonError> {
250        // First, close any unclosed string
251        if in_string {
252            state.output.push(string_quote_char);
253            // in_string = false;
254        }
255
256        // Remove any trailing comma that might cause issues
257        let trimmed_output = state.output.trim_end();
258        if trimmed_output.ends_with(',') {
259            state.output = trimmed_output[..trimmed_output.len() - 1].to_string();
260        }
261
262        // Close scopes in reverse order (LIFO)
263        while let Some((context, _pos)) = scope_stack.pop() {
264            match context {
265                JsonContext::Object => {
266                    state.output.push('}');
267                }
268                JsonContext::Array => {
269                    state.output.push(']');
270                }
271                JsonContext::Root => {
272                    // Don't close root context
273                    break;
274                }
275                _ => {} // Other contexts don't need explicit closing
276            }
277        }
278
279        Ok(())
280    }
281
282    pub fn with_options(options: ParserOptions) -> Self {
283        let mut parser = Self {
284            options,
285            ..Default::default()
286        };
287        parser.register_default_strategies();
288        parser.register_default_handlers();
289        parser
290    }
291
292    pub fn register_strategy(&mut self, strategy: Box<dyn RepairStrategy>) {
293        self.repair_strategies.push(strategy);
294        // Sort by priority (highest first)
295        self.repair_strategies
296            .sort_by(|a, b| b.priority().cmp(&a.priority()));
297    }
298
299    pub fn register_handler(&mut self, handler: Box<dyn StateHandler>) {
300        self.state_handlers.push(handler);
301    }
302
303    pub fn parse<T>(&self, json_str: &str) -> Result<T, FuzzyJsonError>
304    where
305        T: serde::de::DeserializeOwned,
306    {
307        let value = self.parse_value(json_str)?;
308        serde_json::from_value(value).map_err(FuzzyJsonError::JsonError)
309    }
310
311    pub fn parse_value(&self, json_str: &str) -> Result<Value, FuzzyJsonError> {
312        // First try standard parsing
313        match serde_json::from_str(json_str) {
314            Ok(value) => Ok(value),
315            Err(e) => {
316                if !self.options.auto_repair {
317                    return Err(FuzzyJsonError::RepairFailed(
318                        "Auto-repair disabled".to_string(),
319                    ));
320                }
321
322                // Try fuzzy parsing with repair
323                let repaired = self.repair_json(json_str, e)?;
324                serde_json::from_str(&repaired).map_err(FuzzyJsonError::JsonError)
325            }
326        }
327    }
328
329    pub fn repair_json(
330        &self,
331        json_str: &str,
332        e: serde_json::error::Error,
333    ) -> Result<String, FuzzyJsonError> {
334        let mut state = ParseState::new(json_str.trim().to_string());
335        let mut attempts = 0;
336
337        self.try_repair_strategies(&mut state, &e.to_string())?;
338        // try repairing once
339        /*
340        println!(
341            "Repair response : {:?} | State afterwards: {:?}",
342            repair_successful, state
343        );*/
344        // .context("Failed to repair json using available repair strategies")?;
345
346        while !state.is_finished() && attempts < self.options.max_repair_attempts {
347            let mut handled = false;
348
349            // Try state handlers first
350            for handler in &self.state_handlers {
351                if handler.can_handle(&state) {
352                    /*
353                    #[cfg(debug_assertions)]
354                    println!(
355                        "State: {:?} | {:?} : {:?} | {:?} | Handler: {:?} | Context: {:?}",
356                        state.position,
357                        state.current_char(),
358                        state.remaining().chars().nth(0),
359                        state.output,
360                        handler,
361                        state.current_context()
362                    );*/
363                    match handler.handle(&mut state) {
364                        Ok(should_continue) => {
365                            handled = true;
366                            if !should_continue {
367                                return Ok(state.output);
368                            }
369                            break;
370                        }
371                        Err(e) => {
372                            // println!("State(e): {:?}", e);
373                            // Try repair strategies
374                            if self.try_repair_strategies(&mut state, &e.to_string())? {
375                                handled = true;
376                                break;
377                            }
378                        }
379                    }
380                } else {
381                    /*
382                    #[cfg(debug_assertions)]
383                    println!(
384                        "Can't handle |  handler: {:?} | State(e): {:?} | Remaining First Char: {:?} |  Current Char: {:?}",
385                        handler,
386                        state.position,
387                        state.remaining().chars().nth(0),
388                        state.current_char()
389                    );*/
390                }
391            }
392            if !handled {
393                /* println!(
394                    "Not handled |  output: {:?} | State(e): {:?} | Current Char: {:?}",
395                    state.output,
396                    state.position,
397                    state.current_char()
398                );*/
399                if self.try_repair_strategies(&mut state, &e.to_string())? {
400                    handled = true;
401                }
402            }
403
404            if !handled {
405                return Err(FuzzyJsonError::ParseError {
406                    pos: state.position,
407                    msg: format!(
408                        "No handler for current state: {:?} | {:?}",
409                        state.current_context(),
410                        state.current_char()
411                    ),
412                });
413            }
414
415            attempts += 1;
416        }
417        /*
418        println!(
419            " Repaired so far: {:?} | End Context: {:?} | Current Char: {:?}",
420            state.output,
421            state.current_context(),
422            state.current_char()
423        );*/
424        if state.current_context() != &JsonContext::Root {
425            /*
426            #[cfg(debug_assertions)]
427            println!(
428                "Repairing the case of incomplete json | Repaired so far: {:?} | End Context: {:?}",
429                state.output,
430                state.current_context()
431            );*/
432            self.try_repair_strategies(&mut state, &e.to_string())?;
433        }
434
435        if attempts >= self.options.max_repair_attempts {
436            return Err(FuzzyJsonError::RepairFailed(
437                "Too many repair attempts".to_string(),
438            ));
439        }
440
441        // #[cfg(debug_assertions)]
442        // println!("Output: {:?}", state.output);
443        Ok(state.output)
444    }
445
446    fn try_repair_strategies(
447        &self,
448        state: &mut ParseState,
449        error: &str,
450    ) -> Result<bool, FuzzyJsonError> {
451        for strategy in &self.repair_strategies {
452            if strategy.can_repair(state, error) {
453                // #[cfg(debug_assertions)]
454                // println!("Repaired using {:?}", strategy);
455                strategy.repair(state, error)?;
456                return Ok(true);
457            }
458        }
459        Ok(false)
460    }
461
462    fn register_default_strategies(&mut self) {
463        self.register_strategy(Box::new(TruncationRepairStrategy));
464        self.register_strategy(Box::new(SingleQuotesStrategy));
465        self.register_strategy(Box::new(CodeBlockMarkersStrategy));
466        self.register_strategy(Box::new(IncompletePropertyStrategy));
467        self.register_strategy(Box::new(IncompleteArrayStrategy));
468        self.register_strategy(Box::new(TrailingCommaStrategy));
469        self.register_strategy(Box::new(MissingQuotesStrategy));
470        self.register_strategy(Box::new(MissingBracketsStrategy));
471        self.register_strategy(Box::new(TrimStrayContentInBeginningStrategy));
472        self.register_strategy(Box::new(TrimStrayContentInEndStrategy));
473    }
474
475    fn register_default_handlers(&mut self) {
476        self.register_handler(Box::new(WhitespaceHandler));
477        self.register_handler(Box::new(LiteralHandler));
478        self.register_handler(Box::new(ColonHandler));
479        self.register_handler(Box::new(CommaHandler));
480        self.register_handler(Box::new(StringHandler));
481        self.register_handler(Box::new(NumberHandler));
482        self.register_handler(Box::new(ObjectHandler));
483        self.register_handler(Box::new(ArrayHandler));
484    }
485}
486
487// Repair Strategies
488#[derive(Debug)]
489pub struct TrailingCommaStrategy;
490
491impl RepairStrategy for TrailingCommaStrategy {
492    fn name(&self) -> &'static str {
493        "trailing_comma"
494    }
495    fn priority(&self) -> u8 {
496        80
497    }
498
499    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
500        if let Some(ch) = state.current_char() {
501            ch == ','
502                && state
503                    .peek_chars(2)
504                    .chars()
505                    .nth(1)
506                    .map_or(false, |next| next == '}' || next == ']')
507        } else {
508            false
509        }
510    }
511
512    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
513        // Skip the trailing comma
514        state.advance(1);
515        Ok(())
516    }
517}
518
519#[derive(Debug)]
520pub struct MissingQuotesStrategy;
521
522impl RepairStrategy for MissingQuotesStrategy {
523    fn name(&self) -> &'static str {
524        "missing_quotes"
525    }
526    fn priority(&self) -> u8 {
527        70
528    }
529
530    fn can_repair(&self, state: &ParseState, error: &str) -> bool {
531        error.contains("expected") && error.contains("quote")
532            || (state.current_context() == &JsonContext::Property
533                && state.current_char().map_or(false, |c| c.is_alphabetic()))
534    }
535
536    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
537        state.output.push('"');
538
539        // Collect until we hit a delimiter
540        while let Some(ch) = state.current_char() {
541            if ch.is_whitespace() || ch == ':' || ch == ',' || ch == '}' || ch == ']' {
542                break;
543            }
544            state.output.push(ch);
545            state.advance(1);
546        }
547
548        state.output.push('"');
549        Ok(())
550    }
551}
552
553#[derive(Debug)]
554pub struct MissingBracketsStrategy;
555
556impl RepairStrategy for MissingBracketsStrategy {
557    fn name(&self) -> &'static str {
558        "missing_brackets"
559    }
560    fn priority(&self) -> u8 {
561        60
562    }
563
564    fn can_repair(&self, _state: &ParseState, error: &str) -> bool {
565        error.contains("missing") && (error.contains("}") || error.contains("]"))
566    }
567
568    fn repair(&self, state: &mut ParseState, error: &str) -> Result<(), FuzzyJsonError> {
569        if error.contains("}") {
570            state.output.push('}');
571            state.pop_context();
572        } else if error.contains("]") {
573            state.output.push(']');
574            state.pop_context();
575        }
576        Ok(())
577    }
578}
579
580#[derive(Debug)]
581pub struct CodeBlockMarkersStrategy;
582
583impl RepairStrategy for CodeBlockMarkersStrategy {
584    fn name(&self) -> &'static str {
585        "code_block_markers"
586    }
587    fn priority(&self) -> u8 {
588        90
589    }
590
591    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
592        state.remaining().starts_with("```") || state.remaining().starts_with("json```")
593    }
594
595    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
596        if state.remaining().starts_with("json```") {
597            state.advance(7);
598        } else if state.remaining().starts_with("```json") {
599            state.advance(7);
600        } else if state.remaining().starts_with("```") {
601            state.advance(3);
602        }
603        Ok(())
604    }
605}
606
607#[derive(Debug)]
608pub struct TrimStrayContentInBeginningStrategy;
609
610impl RepairStrategy for TrimStrayContentInBeginningStrategy {
611    fn name(&self) -> &'static str {
612        "trim_stray_characters_in_end_markers"
613    }
614    fn priority(&self) -> u8 {
615        70
616    }
617
618    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
619        state.current_context() == &JsonContext::Root
620            && (state.current_char() != Some('{') || state.current_char() != Some('['))
621    }
622
623    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
624        while state.current_char().is_some()
625            && state.current_char() != Some('{')
626            && state.current_char() != Some('[')
627        {
628            // this normally works for stray chars in end as well
629            // but there could be stray `{` in the end as well // those will be captured/corrected
630            // by the in the end strategy
631            state.advance(1);
632        }
633        Ok(())
634    }
635}
636
637#[derive(Debug)]
638pub struct TrimStrayContentInEndStrategy;
639
640impl RepairStrategy for TrimStrayContentInEndStrategy {
641    fn name(&self) -> &'static str {
642        "trim_stray_characters_in_end_markers"
643    }
644    fn priority(&self) -> u8 {
645        70
646    }
647
648    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
649        state.current_context() == &JsonContext::Root
650        //  && (state.current_char() != Some(']') || state.current_char() != Some('}'))
651    }
652
653    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
654        println!("Char: {:?}", state.current_char());
655        while state.current_char() != None {
656            state.advance(1);
657            println!("Char: {:?}", state.current_char());
658        }
659        Ok(())
660    }
661}
662
663#[derive(Debug)]
664pub struct SingleQuotesStrategy;
665
666impl RepairStrategy for SingleQuotesStrategy {
667    fn name(&self) -> &'static str {
668        "single_quotes"
669    }
670    fn priority(&self) -> u8 {
671        85 // higher than incomplete property strategy basically
672    }
673
674    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
675        state.current_char() == Some('\'')
676    }
677
678    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
679        state.output.push('"');
680        state.advance(1); // Skip the single quote
681
682        while let Some(ch) = state.current_char() {
683            if ch == '\'' {
684                state.advance(1);
685                break;
686            }
687            if ch == '"' {
688                state.output.push('\\');
689            }
690            state.output.push(ch);
691            state.advance(1);
692        }
693        if state.current_context() == &JsonContext::Colon {
694            state.pop_context(); // for the cases when property was defined correctly and colon was
695            // there but value
696            // happened to be in single quotes
697            // we are still missing the case where there's '' quote right
698            // after property without any colons
699        }
700
701        state.output.push('"');
702        Ok(())
703    }
704}
705
706// High-priority strategy for handling LLM truncation
707#[derive(Debug)]
708pub struct TruncationRepairStrategy;
709
710impl RepairStrategy for TruncationRepairStrategy {
711    fn name(&self) -> &'static str {
712        "truncation_repair"
713    }
714    fn priority(&self) -> u8 {
715        95
716    } // Highest priority
717
718    fn can_repair(&self, state: &ParseState, error: &str) -> bool {
719        // Detect if we're at the end of input with unclosed scopes
720        state.is_finished()
721            || error.contains("unexpected end")
722            || error.contains("unclosed")
723            || (state.remaining().trim().is_empty() && !state.stack.is_empty())
724    }
725
726    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
727        // Close all remaining scopes aggressively
728        self.close_all_scopes(state)
729    }
730}
731
732impl TruncationRepairStrategy {
733    fn close_all_scopes(&self, state: &mut ParseState) -> Result<(), FuzzyJsonError> {
734        // Track what we need to close
735        let mut needs_closing = Vec::new();
736
737        // Analyze the current state to determine what needs closing
738        for context in state.stack.iter().rev() {
739            match context {
740                JsonContext::Object => needs_closing.push('}'),
741                JsonContext::Array => needs_closing.push(']'),
742                JsonContext::Property => {
743                    // We might be in the middle of a property name or value
744                    //
745                    if state.output.chars().last() != Some('"')
746                        && state.output.matches('"').count() % 2 != 0
747                    {
748                        needs_closing.push('"'); // Close unclosed string
749                    }
750                    // needs_closing.push('"'); // Close any unclosed string
751                    needs_closing.push(':'); // set 0/empty
752                    needs_closing.push('0'); // set 0/empty
753                    // needs_closing.push('}'); // Close the object
754                }
755                JsonContext::Colon => {
756                    needs_closing.push('0'); // set 0/empty
757                }
758                JsonContext::Value => {
759                    // We might be in the middle of a value
760                    if state.output.chars().last() == Some('"')
761                        && state.output.matches('"').count() % 2 != 0
762                    {
763                        needs_closing.push('"'); // Close unclosed string
764                    }
765                }
766                _ => {} // Root context doesn't need closing
767            }
768        }
769
770        // Special case: if we're in the middle of a string
771        if self.is_in_unclosed_string(&state.output) {
772            // we can make this one redudant [todo:]
773            needs_closing.insert(0, '"');
774        }
775
776        // Remove trailing comma if present
777        if state.output.trim_end().ends_with(',') {
778            let trimmed = state.output.trim_end();
779            state.output = trimmed[..trimmed.len() - 1].to_string();
780        }
781
782        // Apply all closings
783        for &closing_char in &needs_closing {
784            state.output.push(closing_char);
785        }
786
787        Ok(())
788    }
789
790    fn is_in_unclosed_string(&self, output: &str) -> bool {
791        let mut in_string = false;
792        let mut escape_next = false;
793        let mut quote_char = '"';
794
795        for ch in output.chars() {
796            if escape_next {
797                escape_next = false;
798                continue;
799            }
800
801            match ch {
802                '\\' if in_string => escape_next = true,
803                '"' | '\'' if !in_string => {
804                    in_string = true;
805                    quote_char = ch;
806                }
807                c if in_string && c == quote_char => in_string = false,
808                _ => {}
809            }
810        }
811
812        in_string
813    }
814}
815
816// Strategy for detecting and fixing incomplete property assignments
817#[derive(Debug)]
818pub struct IncompletePropertyStrategy;
819
820impl RepairStrategy for IncompletePropertyStrategy {
821    fn name(&self) -> &'static str {
822        "incomplete_property"
823    }
824    fn priority(&self) -> u8 {
825        85
826    }
827
828    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
829        let output = state.output.trim_end();
830        // Detect patterns like: "key": or "key":
831        output.ends_with(':')
832            || (output.ends_with('"') && state.remaining().trim().starts_with(':'))
833    }
834
835    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
836        let output = state.output.trim_end();
837
838        if output.ends_with(':') {
839            // Add a null value for incomplete property
840            state.output.push_str(" null");
841        } else if output.ends_with('"') && state.remaining().trim().starts_with(':') {
842            // Complete the property assignment
843            state.output.push_str(": null");
844            // Skip the colon in remaining input
845            while let Some(ch) = state.current_char() {
846                if ch == ':' {
847                    state.advance(1);
848                    break;
849                }
850                if !ch.is_whitespace() {
851                    break;
852                }
853                state.advance(1);
854            }
855        }
856
857        Ok(())
858    }
859}
860
861// Strategy for handling incomplete array elements
862#[derive(Debug)]
863pub struct IncompleteArrayStrategy;
864
865impl RepairStrategy for IncompleteArrayStrategy {
866    fn name(&self) -> &'static str {
867        "incomplete_array"
868    }
869    fn priority(&self) -> u8 {
870        80
871    }
872
873    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
874        state.current_context() == &JsonContext::Array
875            && state.output.trim_end().ends_with(',')
876            && state.remaining().trim().is_empty()
877    }
878
879    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
880        // Remove trailing comma and close array
881        let trimmed = state.output.trim_end();
882        if trimmed.ends_with(',') {
883            state.output = trimmed[..trimmed.len() - 1].to_string();
884        }
885        state.output.push(']');
886        Ok(())
887    }
888}
889
890// State Handlers
891#[derive(Debug)]
892pub struct WhitespaceHandler;
893
894impl StateHandler for WhitespaceHandler {
895    fn can_handle(&self, state: &ParseState) -> bool {
896        state.current_char().map_or(false, |c| c.is_whitespace())
897            || state.remaining().starts_with("\\n")
898    }
899
900    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
901        while state.current_char().map_or(false, |a| a.is_whitespace())
902            || state.remaining().starts_with("\\n")
903        {
904            // state.output.push(ch);
905            if state.remaining().starts_with("\\n") {
906                state.advance(2);
907            } else {
908                state.advance(1);
909            }
910        }
911        Ok(true)
912    }
913}
914#[derive(Debug)]
915pub struct CommaHandler;
916
917impl StateHandler for CommaHandler {
918    fn can_handle(&self, state: &ParseState) -> bool {
919        let remaining = state.remaining();
920        /*
921        println!(
922            "start with [comma handler] | Current Char: {:?}: {:?}",
923            state.current_char(),
924            remaining.chars().nth(0)
925        );*/
926        remaining.starts_with(",")
927    }
928
929    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
930        let remaining = state.remaining();
931
932        if remaining.starts_with(",") {
933            state.advance(1);
934
935            let mut remaining = state.remaining();
936
937            while remaining.starts_with("\\n")
938                || state.current_char().map(|a| a.is_whitespace()) == Some(true)
939            {
940                if remaining.starts_with("\\n") {
941                    state.advance(2);
942                } else {
943                    state.advance(1);
944                }
945                remaining = state.remaining();
946            }
947            // not an idiomatic way, should have returned at this point
948            // so that object handler could have taken over
949            // this introduced redundant code
950            // [todo]:
951            if state.current_char() == Some('}') {
952                state.output.push('}');
953                state.advance(1);
954                state.pop_context();
955                return Ok(true);
956            }
957            state.output.push_str(",");
958        }
959
960        Ok(true)
961    }
962}
963#[derive(Debug)]
964pub struct ColonHandler;
965
966impl StateHandler for ColonHandler {
967    fn can_handle(&self, state: &ParseState) -> bool {
968        let remaining = state.remaining();
969        remaining.starts_with(":")
970    }
971
972    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
973        // there should be a colon state as well
974        // for the cases when json stopped at colon itself
975
976        if state.current_context() == &JsonContext::Property {
977            state.pop_context();
978            state.push_context(JsonContext::Colon);
979        }
980
981        let remaining = state.remaining();
982        if remaining.starts_with(":") {
983            state.output.push_str(":");
984            state.advance(1);
985        }
986        while state.current_char().map_or(false, |a| a.is_whitespace())
987            || state.remaining().starts_with("\\n")
988        {
989            if state.remaining().starts_with("\\n") {
990                state.advance(2);
991            } else {
992                state.advance(1);
993            }
994        }
995        // not a right approach to add repair code in json handler
996        // should be moved to repair strategies
997        if state.current_char() == Some('}') {
998            state.output.push_str("null");
999            // state.advance(1);
1000            state.pop_context(); // colon context popped
1001        }
1002
1003        Ok(true)
1004    }
1005}
1006
1007#[derive(Debug)]
1008pub struct LiteralHandler;
1009
1010impl StateHandler for LiteralHandler {
1011    fn can_handle(&self, state: &ParseState) -> bool {
1012        let remaining = state.remaining();
1013        (state.current_context() == &JsonContext::Array
1014            || state.current_context() == &JsonContext::Colon
1015            || state.current_context() == &JsonContext::Property)
1016            && (remaining.starts_with("true")
1017                || remaining.starts_with("false")
1018                || remaining.starts_with("null"))
1019    }
1020
1021    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
1022        let remaining = state.remaining();
1023
1024        if remaining.starts_with("true") {
1025            state.output.push_str("true");
1026            state.advance(4);
1027        } else if remaining.starts_with("false") {
1028            state.output.push_str("false");
1029            state.advance(5);
1030        } else if remaining.starts_with("null") {
1031            state.output.push_str("null");
1032            state.advance(4);
1033        }
1034        if state.current_context() != &JsonContext::Array {
1035            state.pop_context(); // if not array it would be a property or colon // what about
1036            // cases where literal appeared right after object
1037        }
1038
1039        Ok(true)
1040    }
1041}
1042
1043#[derive(Debug)]
1044pub struct StringHandler;
1045
1046impl StateHandler for StringHandler {
1047    fn can_handle(&self, state: &ParseState) -> bool {
1048        state.current_char() == Some('"')
1049    }
1050
1051    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
1052        state.output.push('"');
1053        state.advance(1);
1054
1055        if state.current_context() == &JsonContext::Colon {
1056            state.pop_context();
1057            state.push_context(JsonContext::Value);
1058        } else if state.current_context() == &JsonContext::Property {
1059            state.output.push(':');
1060            state.pop_context();
1061            state.push_context(JsonContext::Value);
1062        } else if state.current_context() == &JsonContext::Array {
1063            state.push_context(JsonContext::Value);
1064        } else {
1065            state.push_context(JsonContext::Property);
1066        }
1067
1068        while let Some(ch) = state.current_char() {
1069            if ch == '"' {
1070                state.output.push('"');
1071                state.advance(1);
1072                /*
1073                println!(
1074                    "stopped string-handler at {:?} | Remaning: {:?} | Current: {:?}",
1075                    state.position,
1076                    state.remaining().chars().nth(0),
1077                    state.current_char()
1078                );*/
1079                if state.current_context() == &JsonContext::Value {
1080                    state.pop_context();
1081                }
1082                break;
1083            }
1084
1085            if ch == '\\' {
1086                state.output.push('\\');
1087                state.advance(1);
1088                if let Some(escaped) = state.current_char() {
1089                    state.output.push(escaped);
1090                    state.advance(1);
1091                }
1092            } else {
1093                state.output.push(ch);
1094                state.advance(1);
1095            }
1096        }
1097
1098        Ok(true)
1099    }
1100}
1101
1102#[derive(Debug)]
1103pub struct NumberHandler;
1104
1105impl StateHandler for NumberHandler {
1106    fn can_handle(&self, state: &ParseState) -> bool {
1107        state
1108            .current_char()
1109            .map_or(false, |c| c.is_ascii_digit() || c == '-')
1110    }
1111
1112    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
1113        if state.current_context() == &JsonContext::Colon {
1114            state.pop_context();
1115            state.push_context(JsonContext::Value);
1116        } else if state.current_context() == &JsonContext::Property {
1117            state.pop_context();
1118            state.push_context(JsonContext::Value);
1119            state.output.push(':');
1120        } else if state.current_context() == &JsonContext::Array {
1121            state.push_context(JsonContext::Value);
1122        } else {
1123            state.push_context(JsonContext::Property);
1124            state.output.push('"');
1125        }
1126
1127        while let Some(ch) = state.current_char() {
1128            if ch.is_ascii_digit() || ch == '-' || ch == '+' || ch == '.' || ch == 'e' || ch == 'E'
1129            {
1130                state.output.push(ch);
1131                state.advance(1);
1132            } else {
1133                break;
1134            }
1135        }
1136
1137        if state.current_context() == &JsonContext::Value {
1138            state.pop_context();
1139        } else if state.current_context() == &JsonContext::Property
1140            && state
1141                .current_char()
1142                .map_or(true, |c| c.is_whitespace() || c == ':' || c == '}')
1143        {
1144            state.output.push('"');
1145        }
1146        Ok(true)
1147    }
1148}
1149
1150#[derive(Debug)]
1151pub struct ObjectHandler;
1152
1153impl StateHandler for ObjectHandler {
1154    fn can_handle(&self, state: &ParseState) -> bool {
1155        state.current_char() == Some('{')
1156            || (state.current_context() != &JsonContext::Root && state.current_char() == Some('}'))
1157    }
1158
1159    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
1160        if state.current_context() == &JsonContext::Colon {
1161            state.pop_context();
1162        }
1163        if let Some(ch) = state.current_char() {
1164            if ch == '{' {
1165                state.output.push('{');
1166                state.push_context(JsonContext::Object);
1167                state.advance(1);
1168            } else if ch == '}' {
1169                state.output.push('}');
1170                state.pop_context();
1171                state.advance(1);
1172            }
1173        }
1174        Ok(true)
1175    }
1176}
1177
1178#[derive(Debug)]
1179pub struct ArrayHandler;
1180
1181impl StateHandler for ArrayHandler {
1182    fn can_handle(&self, state: &ParseState) -> bool {
1183        state.current_char() == Some('[') || state.current_char() == Some(']')
1184    }
1185
1186    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
1187        if state.current_context() == &JsonContext::Colon {
1188            state.pop_context();
1189        }
1190        if let Some(ch) = state.current_char() {
1191            if ch == '[' {
1192                state.output.push('[');
1193                state.push_context(JsonContext::Array);
1194                state.advance(1);
1195            } else if ch == ']' {
1196                state.output.push(']');
1197                state.pop_context();
1198                state.advance(1);
1199            }
1200        }
1201        Ok(true)
1202    }
1203}
1204
1205// Builder pattern for easy configuration
1206pub struct FuzzyJsonParserBuilder {
1207    options: ParserOptions,
1208    custom_strategies: Vec<Box<dyn RepairStrategy>>,
1209    custom_handlers: Vec<Box<dyn StateHandler>>,
1210}
1211
1212impl FuzzyJsonParserBuilder {
1213    pub fn new() -> Self {
1214        Self {
1215            options: ParserOptions::default(),
1216            custom_strategies: Vec::new(),
1217            custom_handlers: Vec::new(),
1218        }
1219    }
1220
1221    pub fn with_trailing_commas(mut self, allow: bool) -> Self {
1222        self.options.allow_trailing_commas = allow;
1223        self
1224    }
1225
1226    pub fn with_single_quotes(mut self, allow: bool) -> Self {
1227        self.options.allow_single_quotes = allow;
1228        self
1229    }
1230
1231    pub fn with_comments(mut self, allow: bool) -> Self {
1232        self.options.allow_comments = allow;
1233        self
1234    }
1235
1236    pub fn with_unquoted_keys(mut self, allow: bool) -> Self {
1237        self.options.allow_unquoted_keys = allow;
1238        self
1239    }
1240
1241    pub fn strict_mode(mut self, strict: bool) -> Self {
1242        self.options.strict_mode = strict;
1243        self
1244    }
1245
1246    pub fn max_repair_attempts(mut self, max: usize) -> Self {
1247        self.options.max_repair_attempts = max;
1248        self
1249    }
1250
1251    pub fn aggressive_truncation_repair(mut self, enable: bool) -> Self {
1252        self.options.aggressive_truncation_repair = enable;
1253        self
1254    }
1255
1256    pub fn add_strategy(mut self, strategy: Box<dyn RepairStrategy>) -> Self {
1257        self.custom_strategies.push(strategy);
1258        self
1259    }
1260
1261    pub fn add_handler(mut self, handler: Box<dyn StateHandler>) -> Self {
1262        self.custom_handlers.push(handler);
1263        self
1264    }
1265
1266    pub fn build(self) -> FuzzyJsonParser {
1267        let mut parser = FuzzyJsonParser::with_options(self.options);
1268
1269        for strategy in self.custom_strategies {
1270            parser.register_strategy(strategy);
1271        }
1272
1273        for handler in self.custom_handlers {
1274            parser.register_handler(handler);
1275        }
1276
1277        parser
1278    }
1279}
1280
1281impl Default for FuzzyJsonParserBuilder {
1282    fn default() -> Self {
1283        Self::new()
1284    }
1285}