Skip to main content

tool_parser/
partial_json.rs

1use serde_json::{Map, Value};
2
3use crate::{
4    errors::{ParserError, ParserResult},
5    parsers::helpers::is_complete_json,
6    traits::PartialJsonParser,
7};
8
9/// Parser for incomplete JSON
10pub struct PartialJson {
11    /// Maximum depth for nested structures
12    max_depth: usize,
13    /// Whether to allow incomplete values
14    allow_incomplete: bool,
15}
16
17impl PartialJson {
18    /// Create a new partial JSON parser
19    pub fn new(max_depth: usize, allow_incomplete: bool) -> Self {
20        Self {
21            max_depth,
22            allow_incomplete,
23        }
24    }
25
26    /// Parse potentially incomplete JSON, returning parsed value and consumed bytes
27    ///
28    /// # Arguments
29    /// * `input` - The JSON string to parse
30    /// * `allow_partial_strings` - When false, incomplete strings cause parsing to stop
31    ///   (matches Python's Allow.ALL & ~Allow.STR behavior)
32    pub fn parse_value(
33        &self,
34        input: &str,
35        allow_partial_strings: bool,
36    ) -> ParserResult<(Value, usize)> {
37        let mut parser = Parser::new(
38            input,
39            self.max_depth,
40            self.allow_incomplete,
41            allow_partial_strings,
42        );
43        let value = parser.parse_value(0)?;
44        Ok((value, parser.position))
45    }
46}
47
48impl Default for PartialJson {
49    fn default() -> Self {
50        Self::new(32, true)
51    }
52}
53
54impl PartialJsonParser for PartialJson {
55    fn parse(&self, input: &str) -> ParserResult<(Value, usize)> {
56        // Default to allowing partial strings
57        self.parse_value(input, true)
58    }
59
60    fn is_complete(&self, input: &str) -> bool {
61        is_complete_json(input)
62    }
63
64    fn max_depth(&self) -> usize {
65        self.max_depth
66    }
67}
68
69/// Internal parser state
70struct Parser<'a> {
71    chars: std::iter::Peekable<std::str::Chars<'a>>,
72    position: usize,
73    max_depth: usize,
74    allow_incomplete: bool,
75    allow_partial_strings: bool,
76}
77
78impl<'a> Parser<'a> {
79    fn new(
80        input: &'a str,
81        max_depth: usize,
82        allow_incomplete: bool,
83        allow_partial_strings: bool,
84    ) -> Self {
85        Self {
86            chars: input.chars().peekable(),
87            position: 0,
88            max_depth,
89            allow_incomplete,
90            allow_partial_strings,
91        }
92    }
93
94    fn peek(&mut self) -> Option<char> {
95        self.chars.peek().copied()
96    }
97
98    fn advance(&mut self) {
99        if self.chars.next().is_some() {
100            self.position += 1;
101        }
102    }
103
104    fn skip_whitespace(&mut self) {
105        while let Some(ch) = self.peek() {
106            if ch.is_whitespace() {
107                self.advance();
108            } else {
109                break;
110            }
111        }
112    }
113
114    fn parse_value(&mut self, depth: usize) -> ParserResult<Value> {
115        if depth > self.max_depth {
116            return Err(ParserError::DepthExceeded(self.max_depth));
117        }
118
119        self.skip_whitespace();
120
121        match self.peek() {
122            Some('{') => self.parse_object(depth + 1),
123            Some('[') => self.parse_array(depth + 1),
124            Some('"') => self.parse_string(),
125            Some('t') | Some('f') => self.parse_bool(),
126            Some('n') => self.parse_null(),
127            Some(c) if c == '-' || c.is_ascii_digit() => self.parse_number(),
128            _ => {
129                if self.allow_incomplete {
130                    Ok(Value::Null)
131                } else {
132                    Err(ParserError::ParsingFailed("Unexpected character".into()))
133                }
134            }
135        }
136    }
137
138    fn parse_object(&mut self, depth: usize) -> ParserResult<Value> {
139        if depth > self.max_depth {
140            return Err(ParserError::DepthExceeded(self.max_depth));
141        }
142
143        let mut object = Map::new();
144
145        // Consume '{'
146        self.advance();
147        self.skip_whitespace();
148
149        // Check for empty object
150        if self.peek() == Some('}') {
151            self.advance();
152            return Ok(Value::Object(object));
153        }
154
155        loop {
156            // Parse key
157            let key = match self.parse_string() {
158                Ok(Value::String(s)) => s,
159                Err(_) if self.allow_incomplete => {
160                    // Incomplete object
161                    return Ok(Value::Object(object));
162                }
163                Err(e) => return Err(e),
164                _ => return Err(ParserError::ParsingFailed("Expected string key".into())),
165            };
166
167            self.skip_whitespace();
168
169            // Expect ':'
170            if self.peek() != Some(':') {
171                if self.allow_incomplete {
172                    // Add null value for incomplete pair
173                    object.insert(key, Value::Null);
174                    return Ok(Value::Object(object));
175                }
176                return Err(ParserError::ParsingFailed("Expected ':'".into()));
177            }
178            self.advance();
179            self.skip_whitespace();
180
181            // Parse value (keep same depth - we already incremented in parse_object)
182            let value = match self.parse_value(depth) {
183                Ok(v) => v,
184                Err(_) if self.allow_incomplete => {
185                    // When allow_partial_strings is false, don't add the key with Null
186                    // Just return the object without this incomplete key-value pair
187                    // This matches Python's behavior: Allow.ALL & ~Allow.STR
188                    if self.allow_partial_strings {
189                        // Add null for incomplete value
190                        object.insert(key, Value::Null);
191                    }
192                    return Ok(Value::Object(object));
193                }
194                Err(e) => return Err(e),
195            };
196
197            object.insert(key, value);
198            self.skip_whitespace();
199
200            match self.peek() {
201                Some(',') => {
202                    self.advance();
203                    self.skip_whitespace();
204                    // Check for trailing comma
205                    if self.peek() == Some('}') {
206                        self.advance();
207                        return Ok(Value::Object(object));
208                    }
209                }
210                Some('}') => {
211                    self.advance();
212                    return Ok(Value::Object(object));
213                }
214                None if self.allow_incomplete => {
215                    return Ok(Value::Object(object));
216                }
217                _ => {
218                    if self.allow_incomplete {
219                        return Ok(Value::Object(object));
220                    }
221                    return Err(ParserError::ParsingFailed("Expected ',' or '}'".into()));
222                }
223            }
224        }
225    }
226
227    fn parse_array(&mut self, depth: usize) -> ParserResult<Value> {
228        if depth > self.max_depth {
229            return Err(ParserError::DepthExceeded(self.max_depth));
230        }
231
232        let mut array = Vec::new();
233
234        // Consume '['
235        self.advance();
236        self.skip_whitespace();
237
238        // Check for empty array
239        if self.peek() == Some(']') {
240            self.advance();
241            return Ok(Value::Array(array));
242        }
243
244        loop {
245            // Parse value (keep same depth - we already incremented in parse_object)
246            let value = match self.parse_value(depth) {
247                Ok(v) => v,
248                Err(_) if self.allow_incomplete => {
249                    return Ok(Value::Array(array));
250                }
251                Err(e) => return Err(e),
252            };
253
254            array.push(value);
255            self.skip_whitespace();
256
257            match self.peek() {
258                Some(',') => {
259                    self.advance();
260                    self.skip_whitespace();
261                    // Check for trailing comma
262                    if self.peek() == Some(']') {
263                        self.advance();
264                        return Ok(Value::Array(array));
265                    }
266                }
267                Some(']') => {
268                    self.advance();
269                    return Ok(Value::Array(array));
270                }
271                None if self.allow_incomplete => {
272                    return Ok(Value::Array(array));
273                }
274                _ => {
275                    if self.allow_incomplete {
276                        return Ok(Value::Array(array));
277                    }
278                    return Err(ParserError::ParsingFailed("Expected ',' or ']'".into()));
279                }
280            }
281        }
282    }
283
284    fn parse_string(&mut self) -> ParserResult<Value> {
285        if self.peek() != Some('"') {
286            return Err(ParserError::ParsingFailed("Expected '\"'".into()));
287        }
288
289        // Consume opening quote
290        self.advance();
291
292        let mut string = String::new();
293        let mut escaped = false;
294
295        while let Some(ch) = self.peek() {
296            if escaped {
297                // Handle escape sequences
298                let escaped_char = match ch {
299                    '"' | '\\' | '/' => ch,
300                    'b' => '\u{0008}',
301                    'f' => '\u{000C}',
302                    'n' => '\n',
303                    'r' => '\r',
304                    't' => '\t',
305                    'u' => {
306                        // Unicode escape
307                        self.advance();
308                        let hex = self.parse_unicode_escape()?;
309                        string.push(hex);
310                        escaped = false;
311                        continue;
312                    }
313                    _ => ch, // Invalid escape, but be lenient
314                };
315                string.push(escaped_char);
316                escaped = false;
317            } else if ch == '\\' {
318                escaped = true;
319            } else if ch == '"' {
320                // End of string
321                self.advance();
322                return Ok(Value::String(string));
323            } else {
324                string.push(ch);
325            }
326            self.advance();
327        }
328
329        // Incomplete string
330        if self.allow_incomplete && self.allow_partial_strings {
331            Ok(Value::String(string))
332        } else {
333            Err(ParserError::ParsingFailed("Unterminated string".into()))
334        }
335    }
336
337    fn parse_unicode_escape(&mut self) -> ParserResult<char> {
338        let mut hex = String::new();
339        for _ in 0..4 {
340            if let Some(ch) = self.peek() {
341                if ch.is_ascii_hexdigit() {
342                    hex.push(ch);
343                    self.advance();
344                } else {
345                    break;
346                }
347            } else {
348                break;
349            }
350        }
351
352        if hex.len() == 4 {
353            u32::from_str_radix(&hex, 16)
354                .ok()
355                .and_then(char::from_u32)
356                .ok_or_else(|| ParserError::ParsingFailed("Invalid unicode escape".into()))
357        } else if self.allow_incomplete {
358            Ok('\u{FFFD}') // Replacement character
359        } else {
360            Err(ParserError::ParsingFailed(
361                "Incomplete unicode escape".into(),
362            ))
363        }
364    }
365
366    fn parse_number(&mut self) -> ParserResult<Value> {
367        let mut number = String::new();
368
369        // Handle negative sign
370        if self.peek() == Some('-') {
371            number.push('-');
372            self.advance();
373        }
374
375        // Parse integer part
376        if self.peek() == Some('0') {
377            number.push('0');
378            self.advance();
379        } else {
380            while let Some(ch) = self.peek() {
381                if ch.is_ascii_digit() {
382                    number.push(ch);
383                    self.advance();
384                } else {
385                    break;
386                }
387            }
388        }
389
390        // Parse decimal part
391        if self.peek() == Some('.') {
392            number.push('.');
393            self.advance();
394
395            while let Some(ch) = self.peek() {
396                if ch.is_ascii_digit() {
397                    number.push(ch);
398                    self.advance();
399                } else {
400                    break;
401                }
402            }
403        }
404
405        // Parse exponent
406        if let Some(ch) = self.peek() {
407            if ch == 'e' || ch == 'E' {
408                number.push(ch);
409                self.advance();
410
411                if let Some(sign) = self.peek() {
412                    if sign == '+' || sign == '-' {
413                        number.push(sign);
414                        self.advance();
415                    }
416                }
417
418                while let Some(ch) = self.peek() {
419                    if ch.is_ascii_digit() {
420                        number.push(ch);
421                        self.advance();
422                    } else {
423                        break;
424                    }
425                }
426            }
427        }
428
429        // Try to parse as integer first, then as float
430        if let Ok(n) = number.parse::<i64>() {
431            Ok(Value::Number(serde_json::Number::from(n)))
432        } else if let Ok(n) = number.parse::<f64>() {
433            Ok(Value::Number(
434                serde_json::Number::from_f64(n).unwrap_or_else(|| serde_json::Number::from(0)),
435            ))
436        } else if self.allow_incomplete {
437            Ok(Value::Number(serde_json::Number::from(0)))
438        } else {
439            Err(ParserError::ParsingFailed("Invalid number".into()))
440        }
441    }
442
443    fn parse_bool(&mut self) -> ParserResult<Value> {
444        let mut word = String::new();
445
446        // Peek at upcoming characters to validate it looks like a boolean
447        let mut temp_chars = self.chars.clone();
448        while let Some(&ch) = temp_chars.peek() {
449            if ch.is_alphabetic() && word.len() < 5 {
450                // "false" is 5 chars
451                word.push(ch);
452                temp_chars.next();
453            } else {
454                break;
455            }
456        }
457
458        // Check if it's a valid boolean prefix
459        let is_valid = word == "true"
460            || word == "false"
461            || (self.allow_incomplete && ("true".starts_with(&word) || "false".starts_with(&word)));
462
463        if !is_valid {
464            return Err(ParserError::ParsingFailed("Invalid boolean".into()));
465        }
466
467        // Now actually consume the characters
468        word.clear();
469        while let Some(ch) = self.peek() {
470            if ch.is_alphabetic() {
471                word.push(ch);
472                self.advance();
473            } else {
474                break;
475            }
476        }
477
478        match word.as_str() {
479            "true" => Ok(Value::Bool(true)),
480            "false" => Ok(Value::Bool(false)),
481            partial if self.allow_incomplete => {
482                if "true".starts_with(partial) {
483                    Ok(Value::Bool(true))
484                } else if "false".starts_with(partial) {
485                    Ok(Value::Bool(false))
486                } else {
487                    Err(ParserError::ParsingFailed("Invalid boolean".into()))
488                }
489            }
490            _ => Err(ParserError::ParsingFailed("Invalid boolean".into())),
491        }
492    }
493
494    fn parse_null(&mut self) -> ParserResult<Value> {
495        let mut word = String::new();
496
497        // Peek at upcoming characters to validate it looks like "null"
498        let mut temp_chars = self.chars.clone();
499        while let Some(&ch) = temp_chars.peek() {
500            if ch.is_alphabetic() && word.len() < 4 {
501                // "null" is 4 chars
502                word.push(ch);
503                temp_chars.next();
504            } else {
505                break;
506            }
507        }
508
509        // Check if it's a valid null prefix
510        let is_valid = word == "null" || (self.allow_incomplete && "null".starts_with(&word));
511
512        if !is_valid {
513            return Err(ParserError::ParsingFailed("Invalid null".into()));
514        }
515
516        // Now actually consume the characters
517        word.clear();
518        while let Some(ch) = self.peek() {
519            if ch.is_alphabetic() {
520                word.push(ch);
521                self.advance();
522            } else {
523                break;
524            }
525        }
526
527        if word == "null" || (self.allow_incomplete && "null".starts_with(&word)) {
528            Ok(Value::Null)
529        } else {
530            Err(ParserError::ParsingFailed("Invalid null".into()))
531        }
532    }
533}