chill_json/
lib.rs

1use serde_json::Value;
2use std::fmt::Debug;
3use thiserror::Error;
4
5#[derive(Error, Debug)]
6pub enum FuzzyJsonError {
7    #[error("Invalid JSON at position {pos}: {msg}")]
8    ParseError { pos: usize, msg: String },
9    #[error("Repair failed: {0}")]
10    RepairFailed(String),
11    #[error("JSON error: {0}")]
12    JsonError(#[from] serde_json::Error),
13}
14
15#[derive(Debug, Clone, PartialEq)]
16pub enum JsonContext {
17    Root,
18    Object,
19    Array,
20    DoubleQuoteProperty,
21    SingleQuoteProperty,
22    DoubleQuoteValue,
23    SingleQuoteValue,
24    Colon,
25}
26
27impl JsonContext {
28    pub fn is_value(&self) -> bool {
29        self == &Self::DoubleQuoteValue || self == &Self::SingleQuoteValue
30    }
31
32    pub fn is_key(&self) -> bool {
33        self == &Self::DoubleQuoteProperty || self == &Self::SingleQuoteProperty
34    }
35}
36
37#[derive(Debug, Clone)]
38pub struct ParseState {
39    pub input: String,
40    pub position: usize,
41    pub stack: Vec<JsonContext>,
42    pub output: String,
43}
44
45impl ParseState {
46    pub fn new(input: String) -> Self {
47        Self {
48            input,
49            position: 0,
50            stack: vec![JsonContext::Root],
51            output: String::new(),
52        }
53    }
54
55    pub fn current_char(&self) -> Option<char> {
56        self.input.chars().nth(self.position)
57    }
58
59    pub fn peek_chars(&self, count: usize) -> String {
60        self.input.chars().skip(self.position).take(count).collect()
61    }
62
63    pub fn advance(&mut self, count: usize) -> String {
64        let chars: String = self.input.chars().skip(self.position).take(count).collect();
65        self.position += count;
66        chars
67    }
68
69    pub fn remaining(&self) -> &str {
70        match self
71            .input
72            .char_indices()
73            .nth(self.position)
74            .map(|(idx, _)| idx)
75        {
76            Some(start_byte) => &self.input[start_byte..],
77            None => "",
78        }
79    }
80
81    pub fn is_sq_key_or_value(&self) -> bool {
82        let cc = self.current_context();
83
84        cc == &JsonContext::SingleQuoteValue || cc == &JsonContext::SingleQuoteProperty
85    }
86    pub fn is_key_or_value(&self) -> bool {
87        let cc = self.current_context();
88
89        cc == &JsonContext::SingleQuoteValue
90            || cc == &JsonContext::DoubleQuoteValue
91            || cc == &JsonContext::SingleQuoteProperty
92            || cc == &JsonContext::DoubleQuoteProperty
93    }
94
95    pub fn is_dq_key_or_value(&self) -> bool {
96        let cc = self.current_context();
97
98        cc == &JsonContext::DoubleQuoteValue || cc == &JsonContext::DoubleQuoteProperty
99    }
100
101    pub fn is_value(&self) -> bool {
102        let cc = self.current_context();
103        cc == &JsonContext::SingleQuoteValue || cc == &JsonContext::DoubleQuoteValue
104    }
105
106    pub fn is_prop(&self) -> bool {
107        let cc = self.current_context();
108        cc == &JsonContext::SingleQuoteProperty || cc == &JsonContext::DoubleQuoteProperty
109    }
110
111    pub fn is_finished(&self) -> bool {
112        self.position >= self.input.chars().count()
113    }
114
115    pub fn current_context(&self) -> &JsonContext {
116        self.stack.last().unwrap_or(&JsonContext::Root)
117    }
118
119    pub fn push_context(&mut self, context: JsonContext) {
120        self.stack.push(context);
121    }
122
123    pub fn pop_context(&mut self) -> Option<JsonContext> {
124        if self.stack.len() > 1 {
125            self.stack.pop()
126        } else {
127            None
128        }
129    }
130}
131
132pub trait RepairStrategy: Send + Sync + Debug {
133    fn name(&self) -> &'static str;
134    fn can_repair(&self, state: &ParseState, error: &str) -> bool;
135    fn repair(&self, state: &mut ParseState, error: &str) -> Result<(), FuzzyJsonError>;
136    fn priority(&self) -> u8; // Higher priority strategies are tried first
137}
138
139pub trait StateHandler: Send + Sync + Debug {
140    fn can_handle(&self, state: &ParseState) -> bool;
141    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError>; // Returns true if parsing should continue
142}
143
144#[derive(Default, Debug)]
145pub struct FuzzyJsonParser {
146    repair_strategies: Vec<Box<dyn RepairStrategy>>,
147    state_handlers: Vec<Box<dyn StateHandler>>,
148    options: ParserOptions,
149}
150
151#[derive(Debug, Clone)]
152pub struct ParserOptions {
153    pub auto_repair: bool,
154    pub allow_trailing_commas: bool,
155    pub allow_comments: bool,
156    pub allow_single_quotes: bool,
157    pub allow_unquoted_keys: bool,
158    pub max_repair_attempts: usize,
159    pub strict_mode: bool,
160    pub aggressive_truncation_repair: bool, // New option for LLM truncation handling
161}
162
163impl Default for ParserOptions {
164    fn default() -> Self {
165        Self {
166            auto_repair: true,
167            allow_trailing_commas: true,
168            allow_comments: true,
169            allow_single_quotes: true,
170            allow_unquoted_keys: false,
171            max_repair_attempts: 1500,
172            strict_mode: false,
173            aggressive_truncation_repair: true, // Enable by default for LLM responses
174        }
175    }
176}
177
178impl FuzzyJsonParser {
179    pub fn new() -> Self {
180        let mut parser = Self::default();
181        parser.register_default_strategies();
182        parser.register_default_handlers();
183        parser
184    }
185
186    /*
187    /// Parse with aggressive scope closing for truncated LLM responses
188    pub fn parse_with_auto_close<T>(&self, json_str: &str) -> Result<T, FuzzyJsonError>
189    where
190        T: serde::de::DeserializeOwned,
191    {
192        // First try normal parsing
193        match self.parse_value(json_str) {
194            Ok(value) => return serde_json::from_value(value).map_err(FuzzyJsonError::JsonError),
195            Err(_) => {
196                // Try with aggressive scope closing
197                // println!("Try with aggressive scope closing");
198                let closed_json = self.aggressively_close_scopes(json_str)?;
199                let value = self.parse_value(&closed_json)?;
200                serde_json::from_value(value).map_err(FuzzyJsonError::JsonError)
201            }
202        }
203    }*/
204
205    /// Aggressively close all unclosed scopes in potentially truncated JSON
206    pub fn aggressively_close_scopes(&self, json_str: &str) -> Result<String, FuzzyJsonError> {
207        if !self.options.aggressive_truncation_repair {
208            return Ok(json_str.to_string());
209        }
210        let mut state = ParseState::new(json_str.trim().to_string());
211        let mut in_string = false;
212        let mut string_quote_char = '"';
213        let mut escape_next = false;
214
215        // Track unclosed scopes with their positions for better error reporting
216        let mut scope_stack: Vec<(JsonContext, usize)> = vec![(JsonContext::Root, 0)];
217
218        while !state.is_finished() {
219            let ch = match state.current_char() {
220                Some(c) => c,
221                None => break,
222            };
223
224            if escape_next {
225                state.output.push(ch);
226                state.advance(1);
227                escape_next = false;
228                continue;
229            }
230
231            match ch {
232                '\\' if in_string => {
233                    state.output.push(ch);
234                    state.advance(1);
235                    escape_next = true;
236                }
237                '"' | '\'' if !in_string => {
238                    in_string = true;
239                    string_quote_char = ch;
240                    state.output.push(ch);
241                    state.advance(1);
242                }
243                c if in_string && c == string_quote_char => {
244                    in_string = false;
245                    state.output.push(ch);
246                    state.advance(1);
247                }
248                '{' if !in_string => {
249                    scope_stack.push((JsonContext::Object, state.position));
250                    state.output.push(ch);
251                    state.advance(1);
252                }
253                '[' if !in_string => {
254                    scope_stack.push((JsonContext::Array, state.position));
255                    state.output.push(ch);
256                    state.advance(1);
257                }
258                '}' if !in_string => {
259                    if let Some((JsonContext::Object, _)) = scope_stack.last() {
260                        scope_stack.pop();
261                    }
262                    state.output.push(ch);
263                    state.advance(1);
264                }
265                ']' if !in_string => {
266                    if let Some((JsonContext::Array, _)) = scope_stack.last() {
267                        scope_stack.pop();
268                    }
269                    state.output.push(ch);
270                    state.advance(1);
271                }
272                _ => {
273                    state.output.push(ch);
274                    state.advance(1);
275                }
276            }
277        }
278
279        // Now aggressively close all unclosed scopes
280        self.close_remaining_scopes(&mut state, in_string, string_quote_char, scope_stack)?;
281
282        Ok(state.output)
283    }
284
285    fn close_remaining_scopes(
286        &self,
287        state: &mut ParseState,
288        in_string: bool,
289        string_quote_char: char,
290        mut scope_stack: Vec<(JsonContext, usize)>,
291    ) -> Result<(), FuzzyJsonError> {
292        // First, close any unclosed string
293        if in_string {
294            state.output.push(string_quote_char);
295            // in_string = false;
296        }
297
298        // Remove any trailing comma that might cause issues
299        let trimmed_output = state.output.trim_end();
300        if trimmed_output.ends_with(',') {
301            state.output = trimmed_output[..trimmed_output.len() - 1].to_string();
302        }
303
304        // Close scopes in reverse order (LIFO)
305        while let Some((context, _pos)) = scope_stack.pop() {
306            match context {
307                JsonContext::Object => {
308                    state.output.push('}');
309                }
310                JsonContext::Array => {
311                    state.output.push(']');
312                }
313                JsonContext::Root => {
314                    // Don't close root context
315                    break;
316                }
317                _ => {} // Other contexts don't need explicit closing
318            }
319        }
320
321        Ok(())
322    }
323
324    pub fn with_options(options: ParserOptions) -> Self {
325        let mut parser = Self {
326            options,
327            ..Default::default()
328        };
329        parser.register_default_strategies();
330        parser.register_default_handlers();
331        parser
332    }
333
334    pub fn register_strategy(&mut self, strategy: Box<dyn RepairStrategy>) {
335        self.repair_strategies.push(strategy);
336        // Sort by priority (highest first)
337        self.repair_strategies
338            .sort_by(|a, b| b.priority().cmp(&a.priority()));
339    }
340
341    pub fn register_handler(&mut self, handler: Box<dyn StateHandler>) {
342        self.state_handlers.push(handler);
343    }
344
345    pub fn parse<T>(&self, json_str: &str) -> Result<T, FuzzyJsonError>
346    where
347        T: serde::de::DeserializeOwned,
348    {
349        let value = self.parse_value(json_str)?;
350        serde_json::from_value(value).map_err(FuzzyJsonError::JsonError)
351    }
352
353    pub fn parse_value(&self, json_str: &str) -> Result<Value, FuzzyJsonError> {
354        // First try standard parsing
355        match serde_json::from_str(json_str) {
356            Ok(value) => Ok(value),
357            Err(e) => {
358                if !self.options.auto_repair {
359                    return Err(FuzzyJsonError::RepairFailed(
360                        "Auto-repair disabled".to_string(),
361                    ));
362                }
363
364                // Try fuzzy parsing with repair
365                let repaired = self.repair_json(json_str, e)?;
366                serde_json::from_str(&repaired).map_err(FuzzyJsonError::JsonError)
367            }
368        }
369    }
370
371    pub fn repair_json(
372        &self,
373        json_str: &str,
374        e: serde_json::error::Error,
375    ) -> Result<String, FuzzyJsonError> {
376        let mut state = ParseState::new(json_str.trim().to_string());
377        let mut attempts = 0;
378
379        self.try_repair_strategies(&mut state, &e.to_string())?;
380        // try repairing once
381        /*
382        println!(
383            "Repair response : {:?} | State afterwards: {:?}",
384            repair_successful, state
385        );*/
386        // .context("Failed to repair json using available repair strategies")?;
387
388        while !state.is_finished() && attempts < self.options.max_repair_attempts {
389            let mut handled = false;
390
391            // Try state handlers first
392            for handler in &self.state_handlers {
393                if handler.can_handle(&state) {
394                    /*
395                    #[cfg(debug_assertions)]
396                    println!(
397                        "State: {:?} | {:?} : {:?} | {:?} | Handler: {:?} | Context: {:?}",
398                        state.position,
399                        state.current_char(),
400                        state.remaining().chars().nth(0),
401                        state.output,
402                        handler,
403                        state.current_context()
404                    );*/
405                    match handler.handle(&mut state) {
406                        Ok(should_continue) => {
407                            handled = true;
408                            if !should_continue {
409                                return Ok(state.output);
410                            }
411                            break;
412                        }
413                        Err(e) => {
414                            // println!("State(e): {:?}", e);
415                            // Try repair strategies
416                            if self.try_repair_strategies(&mut state, &e.to_string())? {
417                                handled = true;
418                                break;
419                            }
420                        }
421                    }
422                } else {
423                    /*
424                    #[cfg(debug_assertions)]
425                    println!(
426                        "Can't handle |  handler: {:?} | State(e): {:?} | Remaining First Char: {:?} |  Current Char: {:?}",
427                        handler,
428                        state.position,
429                        state.remaining().chars().nth(0),
430                        state.current_char()
431                    );*/
432                }
433            }
434            if !handled {
435                /* println!(
436                    "Not handled |  output: {:?} | State(e): {:?} | Current Char: {:?}",
437                    state.output,
438                    state.position,
439                    state.current_char()
440                );*/
441                if self.try_repair_strategies(&mut state, &e.to_string())? {
442                    handled = true;
443                }
444            }
445
446            if !handled {
447                return Err(FuzzyJsonError::ParseError {
448                    pos: state.position,
449                    msg: format!(
450                        "No handler for current state: {:?} | {:?}",
451                        state.current_context(),
452                        state.current_char()
453                    ),
454                });
455            }
456
457            attempts += 1;
458        }
459        /*
460        println!(
461            " Repaired so far: {:?} | End Context: {:?} | Current Char: {:?}",
462            state.output,
463            state.current_context(),
464            state.current_char()
465        );*/
466        if state.current_context() != &JsonContext::Root {
467            /*
468            #[cfg(debug_assertions)]
469            println!(
470                "Repairing the case of incomplete json | Repaired so far: {:?} | End Context: {:?}",
471                state.output,
472                state.current_context()
473            );*/
474            self.try_repair_strategies(&mut state, &e.to_string())?;
475        }
476
477        if attempts >= self.options.max_repair_attempts {
478            return Err(FuzzyJsonError::RepairFailed(
479                "Too many repair attempts".to_string(),
480            ));
481        }
482
483        // #[cfg(debug_assertions)]
484        // println!("Output: {:?}", state.output);
485        Ok(state.output)
486    }
487
488    fn try_repair_strategies(
489        &self,
490        state: &mut ParseState,
491        error: &str,
492    ) -> Result<bool, FuzzyJsonError> {
493        // println!("COntext: {:?} | Is key: {:?}", state.current_context(), state.is_prop());
494        for strategy in &self.repair_strategies {
495            if strategy.can_repair(state, error) {
496                // #[cfg(debug_assertions)]
497                // println!("Repaired using {:?} | output: {}", strategy, state.output);
498                strategy.repair(state, error)?;
499                return Ok(true);
500            }
501        }
502        Ok(false)
503    }
504
505    fn register_default_strategies(&mut self) {
506        self.register_strategy(Box::new(TruncationRepairStrategy));
507        self.register_strategy(Box::new(SingleQuotesStrategy));
508        self.register_strategy(Box::new(CodeBlockMarkersStrategy));
509        self.register_strategy(Box::new(IncompletePropertyStrategy));
510        self.register_strategy(Box::new(IncompleteArrayStrategy));
511        self.register_strategy(Box::new(TrailingCommaStrategy));
512        self.register_strategy(Box::new(MissingQuotesStrategy));
513        self.register_strategy(Box::new(MissingBracketsStrategy));
514        self.register_strategy(Box::new(TrimStrayContentInBeginningStrategy));
515        self.register_strategy(Box::new(TrimStrayContentInEndStrategy));
516    }
517
518    fn register_default_handlers(&mut self) {
519        self.register_handler(Box::new(WhitespaceHandler));
520        self.register_handler(Box::new(LiteralHandler));
521        self.register_handler(Box::new(ColonHandler));
522        self.register_handler(Box::new(CommaHandler));
523        self.register_handler(Box::new(StringHandler));
524        self.register_handler(Box::new(NumberHandler));
525        self.register_handler(Box::new(ObjectHandler));
526        self.register_handler(Box::new(ArrayHandler));
527        self.register_handler(Box::new(NoQuotesKeyHandler));
528    }
529}
530
531// Repair Strategies
532#[derive(Debug)]
533pub struct TrailingCommaStrategy;
534
535impl RepairStrategy for TrailingCommaStrategy {
536    fn name(&self) -> &'static str {
537        "trailing_comma"
538    }
539    fn priority(&self) -> u8 {
540        80
541    }
542
543    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
544        if let Some(ch) = state.current_char() {
545            ch == ','
546                && state
547                    .peek_chars(2)
548                    .chars()
549                    .nth(1)
550                    .map_or(false, |next| next == '}' || next == ']')
551        } else {
552            false
553        }
554    }
555
556    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
557        // Skip the trailing comma
558        state.advance(1);
559        Ok(())
560    }
561}
562
563#[derive(Debug)]
564pub struct MissingQuotesStrategy;
565
566// it only works for property keys and not for values
567impl RepairStrategy for MissingQuotesStrategy {
568    fn name(&self) -> &'static str {
569        "missing_quotes"
570    }
571    fn priority(&self) -> u8 {
572        70
573    }
574
575    fn can_repair(&self, state: &ParseState, error: &str) -> bool {
576        error.contains("expected") && error.contains("quote")
577            || (state.current_context() == &JsonContext::DoubleQuoteProperty
578                && state.current_char().map_or(false, |c| c.is_alphabetic()))
579    }
580
581    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
582        println!("Repairing missing quotes");
583        state.output.push(
584            if state.current_context() == &JsonContext::SingleQuoteProperty {
585                '\''
586            } else {
587                '"'
588            },
589        );
590
591        // Collect until we hit a delimiter
592        while let Some(ch) = state.current_char() {
593            if ch.is_whitespace() || ch == ':' || ch == ',' || ch == '}' || ch == ']' {
594                break;
595            }
596            state.output.push(ch);
597            state.advance(1);
598        }
599
600        state.output.push(
601            if state.current_context() == &JsonContext::SingleQuoteProperty {
602                '\''
603            } else {
604                '"'
605            },
606        );
607        Ok(())
608    }
609}
610
611#[derive(Debug)]
612pub struct MissingBracketsStrategy;
613
614impl RepairStrategy for MissingBracketsStrategy {
615    fn name(&self) -> &'static str {
616        "missing_brackets"
617    }
618    fn priority(&self) -> u8 {
619        60
620    }
621
622    fn can_repair(&self, _state: &ParseState, error: &str) -> bool {
623        error.contains("missing") && (error.contains("}") || error.contains("]"))
624    }
625
626    fn repair(&self, state: &mut ParseState, error: &str) -> Result<(), FuzzyJsonError> {
627        if error.contains("}") {
628            state.output.push('}');
629            state.pop_context();
630        } else if error.contains("]") {
631            state.output.push(']');
632            state.pop_context();
633        }
634        Ok(())
635    }
636}
637
638#[derive(Debug)]
639pub struct CodeBlockMarkersStrategy;
640
641impl RepairStrategy for CodeBlockMarkersStrategy {
642    fn name(&self) -> &'static str {
643        "code_block_markers"
644    }
645    fn priority(&self) -> u8 {
646        90
647    }
648
649    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
650        state.remaining().starts_with("```") || state.remaining().starts_with("json```")
651    }
652
653    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
654        if state.remaining().starts_with("json```") {
655            state.advance(7);
656        } else if state.remaining().starts_with("```json") {
657            state.advance(7);
658        } else if state.remaining().starts_with("```") {
659            state.advance(3);
660        }
661        Ok(())
662    }
663}
664
665#[derive(Debug)]
666pub struct TrimStrayContentInBeginningStrategy;
667
668impl RepairStrategy for TrimStrayContentInBeginningStrategy {
669    fn name(&self) -> &'static str {
670        "trim_stray_characters_in_end_markers"
671    }
672    fn priority(&self) -> u8 {
673        70
674    }
675
676    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
677        state.current_context() == &JsonContext::Root
678            && (state.current_char() != Some('{') || state.current_char() != Some('['))
679    }
680
681    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
682        while state.current_char().is_some()
683            && state.current_char() != Some('{')
684            && state.current_char() != Some('[')
685        {
686            // this normally works for stray chars in end as well
687            // but there could be stray `{` in the end as well // those will be captured/corrected
688            // by the in the end strategy
689            state.advance(1);
690        }
691        Ok(())
692    }
693}
694
695#[derive(Debug)]
696pub struct TrimStrayContentInEndStrategy;
697
698impl RepairStrategy for TrimStrayContentInEndStrategy {
699    fn name(&self) -> &'static str {
700        "trim_stray_characters_in_end_markers"
701    }
702    fn priority(&self) -> u8 {
703        70
704    }
705
706    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
707        state.current_context() == &JsonContext::Root
708        //  && (state.current_char() != Some(']') || state.current_char() != Some('}'))
709    }
710
711    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
712        println!("Char: {:?}", state.current_char());
713        while state.current_char() != None {
714            state.advance(1);
715            println!("Char: {:?}", state.current_char());
716        }
717        Ok(())
718    }
719}
720
721#[derive(Debug)]
722pub struct SingleQuotesStrategy;
723
724impl RepairStrategy for SingleQuotesStrategy {
725    fn name(&self) -> &'static str {
726        "single_quotes"
727    }
728    fn priority(&self) -> u8 {
729        85 // higher than incomplete property strategy basically
730    }
731
732    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
733        state.current_char() == Some('\'')
734    }
735
736    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
737        state.output.push('"');
738        state.advance(1); // Skip the single quote
739
740        while let Some(ch) = state.current_char() {
741            if ch == '\'' {
742                state.advance(1);
743                break;
744            }
745            if ch == '"' {
746                state.output.push('\\');
747            }
748            state.output.push(ch);
749            state.advance(1);
750        }
751        if state.current_context() == &JsonContext::Colon {
752            state.pop_context(); // for the cases when property was defined correctly and colon was
753            // there but value
754            // happened to be in single quotes
755            // we are still missing the case where there's '' quote right
756            // after property without any colons
757        }
758
759        state.output.push('"');
760        Ok(())
761    }
762}
763
764// High-priority strategy for handling LLM truncation
765#[derive(Debug)]
766pub struct TruncationRepairStrategy;
767
768impl RepairStrategy for TruncationRepairStrategy {
769    fn name(&self) -> &'static str {
770        "truncation_repair"
771    }
772    fn priority(&self) -> u8 {
773        95
774    } // Highest priority
775
776    fn can_repair(&self, state: &ParseState, error: &str) -> bool {
777        // Detect if we're at the end of input with unclosed scopes
778        state.is_finished()
779            || error.contains("unexpected end")
780            || error.contains("unclosed")
781            || (state.remaining().trim().is_empty() && !state.stack.is_empty())
782    }
783
784    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
785        // Close all remaining scopes aggressively
786        self.close_all_scopes(state)
787    }
788}
789
790impl TruncationRepairStrategy {
791    fn close_all_scopes(&self, state: &mut ParseState) -> Result<(), FuzzyJsonError> {
792        // Track what we need to close
793        let mut needs_closing = Vec::new();
794
795        // Analyze the current state to determine what needs closing
796        for context in state.stack.iter().rev() {
797            match context {
798                JsonContext::Object => needs_closing.push('}'),
799                JsonContext::Array => needs_closing.push(']'),
800                JsonContext::DoubleQuoteProperty |JsonContext::SingleQuoteProperty => {
801                    // We might be in the middle of a property name or value
802                    //
803                    if state.output.chars().last() != Some('"')
804                        && state.output.matches('"').count() % 2 != 0
805                    {
806                        println!("maybe the root cause @ 805");
807                        needs_closing.push('"'); // Close unclosed string
808                    }
809                    // needs_closing.push('"'); // Close any unclosed string
810                    needs_closing.push(':'); // set 0/empty
811                    needs_closing.push('0'); // set 0/empty
812                    // needs_closing.push('}'); // Close the object
813                }
814                JsonContext::Colon => {
815                    needs_closing.push('0'); // set 0/empty
816                }
817                JsonContext::DoubleQuoteValue => {
818                    // We might be in the middle of a value
819                    if state.output.chars().last() == Some('"')
820                        && state.output.matches('"').count() % 2 != 0
821                    {
822                        needs_closing.push('"'); // Close unclosed string
823                    }
824                }
825                _ => {} // Root context doesn't need closing
826            }
827        }
828
829        // Special case: if we're in the middle of a string
830        if self.is_in_unclosed_string(&state.output) {
831            // we can make this one redudant [todo:]
832            needs_closing.insert(0, '"');
833        }
834
835        // Remove trailing comma if present
836        if state.output.trim_end().ends_with(',') {
837            let trimmed = state.output.trim_end();
838            state.output = trimmed[..trimmed.len() - 1].to_string();
839        }
840
841        // Apply all closings
842        for &closing_char in &needs_closing {
843            state.output.push(closing_char);
844        }
845
846        Ok(())
847    }
848
849    fn is_in_unclosed_string(&self, output: &str) -> bool {
850        let mut in_string = false;
851        let mut escape_next = false;
852        let mut quote_char = '"';
853
854        for ch in output.chars() {
855            if escape_next {
856                escape_next = false;
857                continue;
858            }
859
860            match ch {
861                '\\' if in_string => escape_next = true,
862                '"' | '\'' if !in_string => {
863                    in_string = true;
864                    quote_char = ch;
865                }
866                c if in_string && c == quote_char => in_string = false,
867                _ => {}
868            }
869        }
870
871        in_string
872    }
873}
874
875// Strategy for detecting and fixing incomplete property assignments
876#[derive(Debug)]
877pub struct IncompletePropertyStrategy;
878
879impl RepairStrategy for IncompletePropertyStrategy {
880    fn name(&self) -> &'static str {
881        "incomplete_property"
882    }
883    fn priority(&self) -> u8 {
884        85
885    }
886
887    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
888        let output = state.output.trim_end();
889        // Detect patterns like: "key": or "key":
890        output.ends_with(':')
891            || (output.ends_with('"') && state.remaining().trim().starts_with(':'))
892    }
893
894    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
895        let output = state.output.trim_end();
896
897        if output.ends_with(':') {
898            // Add a null value for incomplete property
899            state.output.push_str(" null");
900        } else if output.ends_with('"') && state.remaining().trim().starts_with(':') {
901            // Complete the property assignment
902            state.output.push_str(": null");
903            // Skip the colon in remaining input
904            while let Some(ch) = state.current_char() {
905                if ch == ':' {
906                    state.advance(1);
907                    break;
908                }
909                if !ch.is_whitespace() {
910                    break;
911                }
912                state.advance(1);
913            }
914        }
915
916        Ok(())
917    }
918}
919
920// Strategy for handling incomplete array elements
921#[derive(Debug)]
922pub struct IncompleteArrayStrategy;
923
924impl RepairStrategy for IncompleteArrayStrategy {
925    fn name(&self) -> &'static str {
926        "incomplete_array"
927    }
928    fn priority(&self) -> u8 {
929        80
930    }
931
932    fn can_repair(&self, state: &ParseState, _error: &str) -> bool {
933        state.current_context() == &JsonContext::Array
934            && state.output.trim_end().ends_with(',')
935            && state.remaining().trim().is_empty()
936    }
937
938    fn repair(&self, state: &mut ParseState, _error: &str) -> Result<(), FuzzyJsonError> {
939        // Remove trailing comma and close array
940        let trimmed = state.output.trim_end();
941        if trimmed.ends_with(',') {
942            state.output = trimmed[..trimmed.len() - 1].to_string();
943        }
944        state.output.push(']');
945        Ok(())
946    }
947}
948
949// State Handlers
950#[derive(Debug)]
951pub struct WhitespaceHandler;
952
953impl StateHandler for WhitespaceHandler {
954    fn can_handle(&self, state: &ParseState) -> bool {
955        state.current_char().map_or(false, |c| c.is_whitespace())
956            || state.remaining().starts_with("\\n")
957    }
958
959    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
960        while state.current_char().map_or(false, |a| a.is_whitespace())
961            || state.remaining().starts_with("\\n")
962        {
963            // state.output.push(ch);
964            if state.remaining().starts_with("\\n") {
965                state.advance(2);
966            } else {
967                state.advance(1);
968            }
969        }
970        Ok(true)
971    }
972}
973#[derive(Debug)]
974pub struct CommaHandler;
975
976impl StateHandler for CommaHandler {
977    fn can_handle(&self, state: &ParseState) -> bool {
978        let remaining = state.remaining();
979        /*
980        println!(
981            "start with [comma handler] | Current Char: {:?}: {:?}",
982            state.current_char(),
983            remaining.chars().nth(0)
984        );*/
985        remaining.starts_with(",")
986    }
987
988    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
989        let remaining = state.remaining();
990
991        if remaining.starts_with(",") {
992            state.advance(1);
993
994            let mut remaining = state.remaining();
995
996            while remaining.starts_with("\\n")
997                || state.current_char().map(|a| a.is_whitespace()) == Some(true)
998            {
999                if remaining.starts_with("\\n") {
1000                    state.advance(2);
1001                } else {
1002                    state.advance(1);
1003                }
1004                remaining = state.remaining();
1005            }
1006            
1007            // not an idiomatic way from first look, ideally it should have returned at this point
1008            // so that object handler could have taken over
1009            // but this is to handle a space case where comma is followed by closing curly brace,
1010            // as per json the stray comma is a syntax error
1011            if state.current_char() == Some('}') {
1012                state.output.push('}');
1013                state.advance(1);
1014                state.pop_context();
1015                return Ok(true);
1016            }
1017            state.output.push_str(",");
1018        }
1019
1020        Ok(true)
1021    }
1022}
1023#[derive(Debug)]
1024pub struct ColonHandler;
1025
1026impl StateHandler for ColonHandler {
1027    fn can_handle(&self, state: &ParseState) -> bool {
1028        let remaining = state.remaining();
1029        remaining.starts_with(":")
1030    }
1031
1032    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
1033        // there should be a colon state as well
1034        // for the cases when json stopped at colon itself
1035
1036
1037        // println!("\n COLON: \n Remaining at colon handler check: {} | Context: {:?}", state.remaining(), state.current_context());
1038        if state.is_prop() {
1039            state.pop_context();
1040            state.push_context(JsonContext::Colon);
1041        }
1042
1043        let remaining = state.remaining();
1044        if remaining.starts_with(":") {
1045            state.output.push(':');
1046            state.advance(1);
1047        }
1048        while state.current_char().map_or(false, |a| a.is_whitespace())
1049            || state.remaining().starts_with("\\n")
1050        {
1051            if state.remaining().starts_with("\\n") {
1052                state.advance(2);
1053            } else {
1054                state.advance(1);
1055            }
1056        }
1057        /*
1058        // not a right approach to add repair code in json handler
1059        // should be moved to repair strategies
1060        if state.current_char() == Some('}') {
1061            state.output.push_str("null");
1062            // state.advance(1);
1063            state.pop_context(); // colon context popped
1064        }*/
1065
1066        Ok(true)
1067    }
1068}
1069
1070#[derive(Debug)]
1071pub struct LiteralHandler;
1072
1073impl StateHandler for LiteralHandler {
1074    fn can_handle(&self, state: &ParseState) -> bool {
1075        let remaining = state.remaining().trim();
1076        // println!("\n \n Remaining at literal handler check: {} | Context: {:?}", remaining, state.current_context());
1077        (state.current_context() == &JsonContext::Array
1078            || state.current_context() == &JsonContext::Colon
1079            || state.current_context().is_key())
1080            && (remaining.starts_with("true")
1081                || remaining.starts_with("false")
1082                || remaining.starts_with("null")
1083                || remaining.starts_with("undefined"))
1084    }
1085
1086    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
1087        let remaining = state.remaining();
1088
1089        if remaining.starts_with("true") {
1090            state.output.push_str("true");
1091            state.advance(4);
1092        } else if remaining.starts_with("false") {
1093            state.output.push_str("false");
1094            state.advance(5);
1095        } else if remaining.starts_with("null") {
1096            state.output.push_str("null");
1097            state.advance(4);
1098        }
1099        else if remaining.starts_with("undefined") {
1100            state.output.push_str("null");
1101            state.advance(9);
1102        }
1103        if state.current_context() != &JsonContext::Array {
1104            state.pop_context(); // if not array it would be a property or colon // what about
1105            // cases where literal appeared right after object
1106        }
1107
1108        Ok(true)
1109    }
1110}
1111
1112const VALID_KEY_FIRST_CHARS: [char; 27] = [
1113    'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
1114    't', 'u', 'v', 'w', 'x', 'y', 'z', '_',
1115];
1116const VALID_KEY_REST_OF_CHARS: [char; 10] = ['1', '2', '3', '4', '5', '6', '7', '8', '9', '0'];
1117
1118#[derive(Debug)]
1119pub struct NoQuotesKeyHandler;
1120
1121impl StateHandler for NoQuotesKeyHandler {
1122    fn can_handle(&self, state: &ParseState) -> bool {
1123        (state.current_context() == &JsonContext::Object)
1124            && (state
1125                .current_char()
1126                .map(|c| VALID_KEY_FIRST_CHARS.contains(&c.to_ascii_lowercase()))
1127                == Some(true))
1128    }
1129
1130    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
1131        /*
1132        println!(
1133            "The mother fucker no quote inttervened at: {:?}  \n| {}",
1134            state.output,
1135            state.remaining()
1136        );*/
1137        state.push_context(JsonContext::DoubleQuoteProperty);
1138        state.output.push('"');
1139
1140        while let Some(ch) = state.current_char() {
1141            if VALID_KEY_FIRST_CHARS.contains(&ch.to_ascii_lowercase())
1142                || VALID_KEY_REST_OF_CHARS.contains(&ch)
1143            {
1144                state.output.push(ch);
1145                state.advance(1);
1146            } else {
1147                state.output.push('"');
1148                break;
1149            }
1150        }
1151
1152        Ok(true)
1153    }
1154}
1155
1156#[derive(Debug)]
1157pub struct StringHandler;
1158
1159impl StateHandler for StringHandler {
1160    fn can_handle(&self, state: &ParseState) -> bool {
1161        (state.is_sq_key_or_value() && state.current_char() == Some('\''))
1162            || (state.is_dq_key_or_value() && state.current_char() == Some('"'))
1163            || (!state.is_key_or_value()
1164                && (state.current_char() == Some('"') || state.current_char() == Some('\'')))
1165    }
1166
1167    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
1168        /*
1169        println!(
1170
1171            "{:?} | At the beginning of string handler: {}  | Output so far: {}",
1172            state.current_context(),
1173            state.remaining(),
1174            state.output
1175        );*/
1176        let boundary_char = state.current_char().unwrap(); // because this would be
1177        // called only if there
1178        // exists a current char
1179
1180        state.output.push('"');
1181        state.advance(1);
1182
1183        if state.current_context() == &JsonContext::Colon {
1184            state.pop_context();
1185            state.push_context(if boundary_char == '"' {
1186                JsonContext::DoubleQuoteValue
1187            } else {
1188                JsonContext::SingleQuoteValue
1189            });
1190        } else if state.is_prop() {
1191            // what if the colon is already there in json and it could be the next char itself
1192            //
1193            /*
1194            println!(
1195                "the fuck is going on here with this much remaining(something definitely seems wrong here): {}  | Output so far: {}",
1196                state.remaining(),
1197                state.output
1198            );*/
1199            // state.output.push(':');
1200            /*
1201            state.pop_context();
1202            state.push_context(if boundary_char == '"' {
1203                JsonContext::DoubleQuoteValue
1204            } else {
1205                JsonContext::SingleQuoteValue
1206            });*/
1207        } else if state.current_context() == &JsonContext::Array {
1208            state.push_context(if boundary_char == '"' {
1209                JsonContext::DoubleQuoteValue
1210            } else {
1211                JsonContext::SingleQuoteValue
1212            });
1213        } else {
1214            state.push_context(if boundary_char == '"' {
1215                JsonContext::DoubleQuoteProperty
1216            } else {
1217                JsonContext::SingleQuoteProperty
1218            });
1219        }
1220
1221        while let Some(ch) = state.current_char() {
1222            if ch == boundary_char {
1223                state.output.push('"');
1224                state.advance(1);
1225                /*
1226                println!(
1227                    "stopped string-handler at {:?} | Remaning: {:?} | Current: {:?}",
1228                    state.position,
1229                    state.remaining().chars().nth(0),
1230                    state.current_char()
1231                );*/
1232                if state.is_value() {
1233                    state.pop_context();
1234                }
1235                break;
1236            }
1237
1238            if ch == '\\' {
1239                state.output.push('\\');
1240                state.advance(1);
1241                if let Some(escaped) = state.current_char() {
1242                    state.output.push(escaped);
1243                    state.advance(1);
1244                }
1245            } else {
1246                state.output.push(ch);
1247                state.advance(1);
1248            }
1249        }
1250
1251        Ok(true)
1252    }
1253}
1254
1255#[derive(Debug)]
1256pub struct NumberHandler;
1257
1258impl StateHandler for NumberHandler {
1259    fn can_handle(&self, state: &ParseState) -> bool {
1260        state
1261            .current_char()
1262            .map_or(false, |c| c.is_ascii_digit() || c == '-')
1263    }
1264
1265    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
1266        if state.current_context() == &JsonContext::Colon {
1267            state.pop_context();
1268            state.push_context(JsonContext::DoubleQuoteValue);
1269        } else if state.current_context() == &JsonContext::DoubleQuoteProperty {
1270            state.pop_context();
1271            state.push_context(JsonContext::DoubleQuoteValue);
1272            state.output.push(':');
1273        } else if state.current_context() == &JsonContext::Array {
1274            state.push_context(JsonContext::DoubleQuoteValue);
1275        } else {
1276            state.push_context(JsonContext::DoubleQuoteProperty);
1277            state.output.push('"');
1278        }
1279
1280        while let Some(ch) = state.current_char() {
1281            if ch.is_ascii_digit() || ch == '-' || ch == '+' || ch == '.' || ch == 'e' || ch == 'E'
1282            {
1283                state.output.push(ch);
1284                state.advance(1);
1285            } else {
1286                break;
1287            }
1288        }
1289
1290        if state.current_context() == &JsonContext::DoubleQuoteValue {
1291            state.pop_context();
1292        } else if state.current_context() == &JsonContext::DoubleQuoteProperty
1293            && state
1294                .current_char()
1295                .map_or(true, |c| c.is_whitespace() || c == ':' || c == '}')
1296        {
1297            state.output.push('"');
1298        }
1299        Ok(true)
1300    }
1301}
1302
1303#[derive(Debug)]
1304pub struct ObjectHandler;
1305
1306impl StateHandler for ObjectHandler {
1307    fn can_handle(&self, state: &ParseState) -> bool {
1308        state.current_char() == Some('{')
1309            || (state.current_context() != &JsonContext::Root && state.current_char() == Some('}'))
1310    }
1311
1312    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
1313        if state.current_context() == &JsonContext::Colon {
1314            state.pop_context();
1315        }
1316        if let Some(ch) = state.current_char() {
1317            if ch == '{' {
1318                state.output.push('{');
1319                state.push_context(JsonContext::Object);
1320                state.advance(1);
1321            } else if ch == '}' {
1322                state.output.push('}');
1323                state.pop_context();
1324                state.advance(1);
1325            }
1326        }
1327        Ok(true)
1328    }
1329}
1330
1331#[derive(Debug)]
1332pub struct ArrayHandler;
1333
1334impl StateHandler for ArrayHandler {
1335    fn can_handle(&self, state: &ParseState) -> bool {
1336        state.current_char() == Some('[') || state.current_char() == Some(']')
1337    }
1338
1339    fn handle(&self, state: &mut ParseState) -> Result<bool, FuzzyJsonError> {
1340        if state.current_context() == &JsonContext::Colon {
1341            state.pop_context();
1342        }
1343        if let Some(ch) = state.current_char() {
1344            if ch == '[' {
1345                state.output.push('[');
1346                state.push_context(JsonContext::Array);
1347                state.advance(1);
1348            } else if ch == ']' {
1349                state.output.push(']');
1350                state.pop_context();
1351                state.advance(1);
1352            }
1353        }
1354        Ok(true)
1355    }
1356}
1357
1358// Builder pattern for easy configuration
1359pub struct FuzzyJsonParserBuilder {
1360    options: ParserOptions,
1361    custom_strategies: Vec<Box<dyn RepairStrategy>>,
1362    custom_handlers: Vec<Box<dyn StateHandler>>,
1363}
1364
1365impl FuzzyJsonParserBuilder {
1366    pub fn new() -> Self {
1367        Self {
1368            options: ParserOptions::default(),
1369            custom_strategies: Vec::new(),
1370            custom_handlers: Vec::new(),
1371        }
1372    }
1373
1374    pub fn with_trailing_commas(mut self, allow: bool) -> Self {
1375        self.options.allow_trailing_commas = allow;
1376        self
1377    }
1378
1379    pub fn with_single_quotes(mut self, allow: bool) -> Self {
1380        self.options.allow_single_quotes = allow;
1381        self
1382    }
1383
1384    pub fn with_comments(mut self, allow: bool) -> Self {
1385        self.options.allow_comments = allow;
1386        self
1387    }
1388
1389    pub fn with_unquoted_keys(mut self, allow: bool) -> Self {
1390        self.options.allow_unquoted_keys = allow;
1391        self
1392    }
1393
1394    pub fn strict_mode(mut self, strict: bool) -> Self {
1395        self.options.strict_mode = strict;
1396        self
1397    }
1398
1399    pub fn max_repair_attempts(mut self, max: usize) -> Self {
1400        self.options.max_repair_attempts = max;
1401        self
1402    }
1403
1404    pub fn aggressive_truncation_repair(mut self, enable: bool) -> Self {
1405        self.options.aggressive_truncation_repair = enable;
1406        self
1407    }
1408
1409    pub fn add_strategy(mut self, strategy: Box<dyn RepairStrategy>) -> Self {
1410        self.custom_strategies.push(strategy);
1411        self
1412    }
1413
1414    pub fn add_handler(mut self, handler: Box<dyn StateHandler>) -> Self {
1415        self.custom_handlers.push(handler);
1416        self
1417    }
1418
1419    pub fn build(self) -> FuzzyJsonParser {
1420        let mut parser = FuzzyJsonParser::with_options(self.options);
1421
1422        for strategy in self.custom_strategies {
1423            parser.register_strategy(strategy);
1424        }
1425
1426        for handler in self.custom_handlers {
1427            parser.register_handler(handler);
1428        }
1429
1430        parser
1431    }
1432}
1433
1434impl Default for FuzzyJsonParserBuilder {
1435    fn default() -> Self {
1436        Self::new()
1437    }
1438}