toy_json_formatter/
parser.rs

1use std::{borrow::Cow, collections::HashMap};
2
3use generic_tokenizer::Location;
4
5use crate::{
6    tokenizer::{JsonParseErr, JsonToken, JsonTokenKind, JsonTokenizer, Span},
7    JsonParseState,
8};
9
10static DEFAULT_KEY: &'static str = "unknown_key";
11static DEFAULT_KEY_COW: Cow<'static, str> = Cow::Borrowed(&DEFAULT_KEY);
12
13pub(crate) struct JsonParser<'json> {
14    tokenizer: JsonTokenizer<'json>,
15    json: &'json str,
16    lookahead: Option<JsonToken>,
17    states: Vec<JsonParseState>,
18    values_being_built: Vec<ValueInProgress<'json>>,
19    errs: Vec<JsonParseErr>,
20}
21
22impl<'json> JsonParser<'json> {
23    fn new(json: &'json str) -> Self {
24        Self {
25            tokenizer: JsonTokenizer::new(json),
26            json,
27            lookahead: None,
28            states: vec![JsonParseState::Value],
29            values_being_built: Vec::new(),
30            errs: Vec::new(),
31        }
32    }
33
34    pub(crate) fn parse(json: &'json str) -> (Value<'json>, Vec<JsonParseErr>) {
35        Self::new(json).parse_internal()
36    }
37
38    fn parse_internal(mut self) -> (Value<'json>, Vec<JsonParseErr>) {
39        loop {
40            match self.states.pop() {
41                None => {
42                    self.match_token(JsonTokenKind::Comma);
43                    if self.states.is_empty() {
44                        match self.next_token() {
45                            None => {
46                                return (
47                                    self.unwind_full_value_stack()
48                                        .unwrap_or_else(|| Value::Null),
49                                    self.errs,
50                                )
51                            }
52                            Some(token) => {
53                                self.lookahead = Some(token);
54                                self.states.push(JsonParseState::Value);
55                            }
56                        }
57                    }
58                }
59                Some(state) => {
60                    match state {
61                        JsonParseState::Value => {
62                            match self.next_token() {
63                                None => {
64                                    return (
65                                        self.unwind_full_value_stack()
66                                            .unwrap_or_else(|| Value::Null),
67                                        self.errs,
68                                    );
69                                }
70                                Some(token) => {
71                                    if !self.states.is_empty() {
72                                        self.states.push(JsonParseState::AfterValue);
73                                    }
74                                    match &token.kind {
75                                        JsonTokenKind::ObjectStart => {
76                                            self.values_being_built.push(ValueInProgress::Object(
77                                                ObjectInProgress::new(),
78                                            ));
79
80                                            self.states.push(JsonParseState::Object);
81                                        }
82                                        JsonTokenKind::ArrayStart => {
83                                            self.values_being_built
84                                                .push(ValueInProgress::Array(Vec::new()));
85                                            self.states.push(JsonParseState::Array);
86                                        }
87                                        JsonTokenKind::String => {
88                                            self.values_being_built.push(ValueInProgress::String(
89                                                JsonString::new(self.json, token.span),
90                                            ));
91                                            // We just pushed a value on, so this should never fail
92                                            assert!(self.unwind_value_stack_once(), "BUG: Expected value to be on the stack since one was just added.");
93                                        }
94                                        JsonTokenKind::Number => {
95                                            self.values_being_built.push(ValueInProgress::Number(
96                                                JsonNumber::new(&self.json[token.span.as_range()]),
97                                            ));
98                                            assert!(self.unwind_value_stack_once(), "BUG: Expected value to be on the stack since one was just added.");
99                                        }
100                                        JsonTokenKind::True => {
101                                            self.values_being_built
102                                                .push(ValueInProgress::Bool(true));
103                                            assert!(self.unwind_value_stack_once(),"BUG: Expected value to be on the stack since one was just added.");
104                                        }
105                                        JsonTokenKind::False => {
106                                            self.values_being_built
107                                                .push(ValueInProgress::Bool(false));
108                                            assert!(self.unwind_value_stack_once(), "BUG: Expected value to be on the stack since one was just added.");
109                                        }
110                                        JsonTokenKind::Null => {
111                                            self.values_being_built.push(ValueInProgress::Null);
112                                            assert!(self.unwind_value_stack_once(), "BUG: Expected value to be on the stack since one was just added.");
113                                        }
114                                        // Explicitly specifying all kinds to make refactoring
115                                        // easier later.
116                                        JsonTokenKind::ArrayEnd
117                                        | JsonTokenKind::ObjectEnd
118                                        | JsonTokenKind::Colon
119                                        | JsonTokenKind::Comma => {
120                                            self.lookahead = Some(token);
121                                            self.states.push(JsonParseState::Value);
122                                            self.recover_in_panic_mode();
123                                        }
124                                    }
125                                }
126                            }
127                        }
128                        JsonParseState::Object => {
129                            if self.match_token(JsonTokenKind::ObjectEnd).is_some() {
130                                self.pop_object();
131                                continue;
132                            }
133
134                            self.states.push(JsonParseState::Object);
135                            self.states.push(JsonParseState::Value);
136                            self.states.push(JsonParseState::KeyValuePairColon);
137                            self.states.push(JsonParseState::KeyValuePairKey);
138                            continue;
139                        }
140                        JsonParseState::KeyValuePairColon => {
141                            if self.match_token(JsonTokenKind::Colon).is_some() {
142                                continue;
143                            } else {
144                                self.recover_in_panic_mode();
145                            }
146                        }
147                        JsonParseState::KeyValuePairKey => {
148                            if let Some(str_token) = self.match_token(JsonTokenKind::String) {
149                                match self.values_being_built.pop() {
150                                    Some(ValueInProgress::Object(mut obj)) => {
151                                        obj.active_key =
152                                            Some(JsonString::new(self.json, str_token.span));
153                                        self.values_being_built.push(ValueInProgress::Object(obj));
154                                    }
155                                    None
156                                    | Some(ValueInProgress::Null)
157                                    | Some(ValueInProgress::Bool(_))
158                                    | Some(ValueInProgress::Number(_))
159                                    | Some(ValueInProgress::String(_))
160                                    | Some(ValueInProgress::Array(_)) => {
161                                        self.recover_in_panic_mode();
162                                    }
163                                }
164                            }
165                        }
166                        JsonParseState::AfterValue => {
167                            if self.match_token(JsonTokenKind::Comma).is_none() {
168                                continue;
169                            }
170
171                            match self
172                                .states
173                                .last()
174                                .expect("BUG: States should include at least 1 value")
175                            {
176                                JsonParseState::Object => {
177                                    self.states.push(JsonParseState::Value);
178                                    self.states.push(JsonParseState::KeyValuePairColon);
179                                    self.states.push(JsonParseState::KeyValuePairKey);
180                                }
181                                JsonParseState::Array => {
182                                    self.states.push(JsonParseState::Value);
183                                }
184                                JsonParseState::KeyValuePairColon
185                                | JsonParseState::KeyValuePairKey
186                                | JsonParseState::Value
187                                | JsonParseState::AfterValue => {
188                                    self.recover_in_panic_mode();
189                                }
190                            }
191                        }
192                        JsonParseState::Array => {
193                            if let Some(_) = self.match_token(JsonTokenKind::ArrayEnd) {
194                                self.pop_array();
195                                continue;
196                            }
197
198                            self.states.push(JsonParseState::Array);
199                            self.states.push(JsonParseState::Value);
200                        }
201                    }
202                }
203            }
204        }
205    }
206
207    fn pop_object(&mut self) {
208        if !self
209            .values_being_built
210            .iter()
211            .rev()
212            .any(|val| matches!(val, ValueInProgress::Object(_)))
213        {
214            return;
215        }
216
217        loop {
218            match self.values_being_built.pop() {
219                None => break,
220                Some(ValueInProgress::Object(obj)) => {
221                    self.values_being_built.push(ValueInProgress::Object(obj));
222                    self.unwind_value_stack_once();
223                    break;
224                }
225                Some(other_val) => {
226                    self.values_being_built.push(other_val);
227                    self.unwind_value_stack_once();
228                }
229            }
230        }
231    }
232
233    fn pop_array(&mut self) {
234        if !self
235            .values_being_built
236            .iter()
237            .rev()
238            .any(|val| matches!(val, ValueInProgress::Array(_)))
239        {
240            return;
241        }
242
243        loop {
244            match self.values_being_built.pop() {
245                None => break,
246                Some(ValueInProgress::Array(vec)) => {
247                    self.values_being_built.push(ValueInProgress::Array(vec));
248                    self.unwind_value_stack_once();
249                    break;
250                }
251                Some(other) => {
252                    self.values_being_built.push(other);
253                    self.unwind_value_stack_once();
254                }
255            }
256        }
257    }
258
259    fn unwind_full_value_stack(&mut self) -> Option<Value<'json>> {
260        loop {
261            self.unwind_value_stack_once();
262            if self.values_being_built.len() <= 1 {
263                break;
264            }
265        }
266
267        match self.values_being_built.pop() {
268            None => None,
269            Some(val) => Some(val.into()),
270        }
271    }
272
273    /// Unwinds one value from the value stack.
274    ///
275    /// Returns false if no value was able to be
276    /// unwound, true otherwise
277    fn unwind_value_stack_once(&mut self) -> bool {
278        if let Some(top) = self.values_being_built.pop() {
279            match self.values_being_built.pop() {
280                None => {
281                    self.values_being_built.push(top);
282                    return true;
283                }
284                Some(new_top) => {
285                    match new_top {
286                        ValueInProgress::Array(mut vec) => {
287                            vec.push(top.into());
288                            self.values_being_built.push(ValueInProgress::Array(vec));
289                            return true;
290                        }
291                        ValueInProgress::Object(mut obj) => {
292                            let key = match std::mem::take(&mut obj.active_key) {
293                                None => JsonString::default(),
294                                Some(obj_key) => obj_key,
295                            };
296
297                            let mut key_for_map = key;
298                            if obj.map.contains_key(&key_for_map) {
299                                if key_for_map.span.is_some() {
300                                    self.errs.push(JsonParseErr::DuplicateObjectKeys(
301                                        match obj
302                                            .map
303                                            .get_key_value(&key_for_map)
304                                            .unwrap()
305                                            .0
306                                            .span
307                                            .as_ref()
308                                        {
309                                            None => Location::default(),
310                                            Some(span) => span.start.clone(),
311                                        },
312                                        match key_for_map.span.as_ref() {
313                                            None => Location::default(),
314                                            Some(span) => span.start.clone(),
315                                        },
316                                    ));
317                                }
318
319                                let mut counter: u64 = 0;
320                                // most of the time this should take < 10 iterations,
321                                // so only allocate space for 1 additional ascii digit.
322                                let mut sanitized =
323                                    String::with_capacity(key_for_map.parsed.as_ref().len() + 1);
324                                sanitized.push_str(&key_for_map.sanitized);
325                                let mut parsed =
326                                    String::with_capacity(key_for_map.parsed.len() + 1);
327                                parsed.push_str(&key_for_map.parsed);
328                                key_for_map = JsonString {
329                                    span: None,
330                                    sanitized: Cow::Owned(sanitized),
331                                    parsed: Cow::Owned(parsed),
332                                };
333
334                                loop {
335                                    let counter_str = counter.to_string();
336                                    for cow in [&mut key_for_map.sanitized, &mut key_for_map.parsed]
337                                    {
338                                        if let Cow::Owned(string) = cow {
339                                            string.push_str(&counter_str);
340                                        } else {
341                                            unreachable!();
342                                        }
343                                    }
344
345                                    if !obj.map.contains_key(&key_for_map) {
346                                        break;
347                                    }
348
349                                    for cow in [&mut key_for_map.parsed, &mut key_for_map.sanitized]
350                                    {
351                                        if let Cow::Owned(string) = cow {
352                                            for _ in counter_str.chars() {
353                                                string.pop();
354                                            }
355                                        } else {
356                                            unreachable!();
357                                        }
358                                    }
359                                    counter += 1;
360                                }
361                            }
362
363                            obj.insert(key_for_map, top.into());
364                            self.values_being_built.push(ValueInProgress::Object(obj));
365                            return true;
366                        }
367                        ValueInProgress::Null
368                        | ValueInProgress::Bool(_)
369                        | ValueInProgress::String(_)
370                        | ValueInProgress::Number(_) => {
371                            self.values_being_built
372                                .push(ValueInProgress::Array(vec![new_top.into(), top.into()]));
373                            return true;
374                        }
375                    }
376                }
377            }
378        } else {
379            return false;
380        }
381    }
382
383    fn recover_in_panic_mode(&mut self) {
384        loop {
385            match self.next_token() {
386                None => break,
387                Some(token) => match token.kind {
388                    JsonTokenKind::ArrayStart | JsonTokenKind::ObjectStart => {
389                        self.lookahead = Some(token);
390                        break;
391                    }
392                    JsonTokenKind::ArrayEnd => {
393                        if !self
394                            .states
395                            .iter()
396                            .rev()
397                            .any(|state| *state == JsonParseState::Array)
398                        {
399                            continue;
400                        }
401
402                        while let Some(state) = self.states.pop() {
403                            if let JsonParseState::Array = state {
404                                break;
405                            }
406                        }
407
408                        self.lookahead = Some(token);
409                        break;
410                    }
411                    JsonTokenKind::ObjectEnd => {
412                        if !self
413                            .states
414                            .iter()
415                            .rev()
416                            .any(|state| *state == JsonParseState::Array)
417                        {
418                            continue;
419                        }
420
421                        while let Some(state) = self.states.pop() {
422                            if let JsonParseState::Object = state {
423                                break;
424                            }
425                        }
426
427                        self.lookahead = Some(token);
428                        break;
429                    }
430                    JsonTokenKind::Comma => {
431                        if !self.states.iter().any(|state| {
432                            matches!(state, JsonParseState::Array | JsonParseState::Object)
433                        }) {
434                            continue;
435                        }
436
437                        while let Some(state) = self.states.pop() {
438                            match state {
439                                JsonParseState::Array => {
440                                    self.states.push(JsonParseState::Array);
441                                    break;
442                                }
443                                JsonParseState::Object => {
444                                    self.states.push(JsonParseState::Object);
445                                    self.states.push(JsonParseState::Value);
446                                    self.states.push(JsonParseState::KeyValuePairColon);
447                                    self.states.push(JsonParseState::KeyValuePairKey);
448                                    break;
449                                }
450                                _ => {}
451                            }
452                        }
453                    }
454                    JsonTokenKind::Null => {
455                        self.values_being_built.push(ValueInProgress::Null);
456                        self.unwind_value_stack_once();
457                        break;
458                    }
459                    JsonTokenKind::Number => {
460                        self.values_being_built
461                            .push(ValueInProgress::Number(JsonNumber::new(
462                                &self.json[token.span.as_range()],
463                            )));
464                        self.unwind_value_stack_once();
465                        break;
466                    }
467                    JsonTokenKind::String => {
468                        self.values_being_built
469                            .push(ValueInProgress::String(JsonString::new(
470                                self.json, token.span,
471                            )));
472                        self.unwind_value_stack_once();
473                        break;
474                    }
475                    JsonTokenKind::True => {
476                        self.values_being_built.push(ValueInProgress::Bool(true));
477                        self.unwind_value_stack_once();
478                        break;
479                    }
480                    JsonTokenKind::False => {
481                        self.values_being_built.push(ValueInProgress::Bool(false));
482                        self.unwind_value_stack_once();
483                        break;
484                    }
485                    JsonTokenKind::Colon => {}
486                },
487            }
488        }
489    }
490
491    fn match_token(&mut self, kind: JsonTokenKind) -> Option<JsonToken> {
492        match self.next_token() {
493            None => return None,
494            Some(token) => {
495                if token.kind == kind {
496                    return Some(token);
497                }
498                self.lookahead = Some(token);
499                return None;
500            }
501        }
502    }
503
504    fn next_token(&mut self) -> Option<JsonToken> {
505        if let Some(token) = std::mem::take(&mut self.lookahead) {
506            return Some(token);
507        }
508
509        loop {
510            match self.tokenizer.next() {
511                None => return None,
512                Some(result) => match result {
513                    Ok(token) => return Some(token),
514                    Err(err) => match &err {
515                        // if there are more tokens we want them, so continue
516                        JsonParseErr::UnexpectedEOF
517                        // not meaningful to parser, so skip
518                        | JsonParseErr::TrailingComma(_)
519                        // defer to string parser to handle
520                        | JsonParseErr::UnclosedString(_)
521                        // defer to string parser to handle
522                        | JsonParseErr::InvalidUnicodeEscapeSequence(_)
523                        // defer to number parser to handle
524                        | JsonParseErr::IllegalLeading0(_)
525                        | JsonParseErr::DuplicateObjectKeys(_, _) => {
526                            self.errs.push(err);
527                        }
528                        // We want these sequences to show up in the output,
529                        // so label them as strings.
530                        JsonParseErr::UnexpectedCharacters(span) => {
531                            let span = span.clone();
532                            self.errs.push(err);
533                            if span.end.byte_index() != span.start.byte_index() {
534                                return Some(JsonToken {
535                                    kind: JsonTokenKind::String,
536                                    span,
537                                });
538                            }
539                        }
540                    },
541                },
542            }
543        }
544    }
545}
546
547enum ValueInProgress<'json> {
548    Null,
549    Bool(bool),
550    Number(JsonNumber<'json>),
551    String(JsonString<'json>),
552    Array(Vec<Value<'json>>),
553    Object(ObjectInProgress<'json>),
554}
555
556impl<'json> Into<Value<'json>> for ValueInProgress<'json> {
557    fn into(self) -> Value<'json> {
558        match self {
559            ValueInProgress::Null => Value::Null,
560            ValueInProgress::Bool(bool) => Value::Bool(bool),
561            ValueInProgress::Number(num) => Value::Number(num),
562            ValueInProgress::String(str) => Value::String(str),
563            ValueInProgress::Array(arr) => Value::Array(arr),
564            ValueInProgress::Object(obj) => Value::Object(obj.into()),
565        }
566    }
567}
568
569struct ObjectInProgress<'json> {
570    active_key: Option<JsonString<'json>>,
571    keys_in_found_order: Vec<Cow<'json, str>>,
572    map: HashMap<JsonString<'json>, Value<'json>>,
573}
574
575impl<'json> Into<Object<'json>> for ObjectInProgress<'json> {
576    fn into(mut self) -> Object<'json> {
577        if let Some(active_key) = std::mem::take(&mut self.active_key) {
578            self.insert(active_key, Value::Null);
579        }
580
581        Object {
582            map: self.map,
583            keys_in_found_order: self.keys_in_found_order,
584        }
585    }
586}
587
588impl<'json> ObjectInProgress<'json> {
589    fn new() -> Self {
590        Self {
591            active_key: None,
592            keys_in_found_order: Vec::new(),
593            map: HashMap::new(),
594        }
595    }
596
597    fn insert(&mut self, key: JsonString<'json>, value: Value<'json>) {
598        self.keys_in_found_order.push(key.parsed.to_owned());
599        if self.map.contains_key(&key) {
600            let mut new_keys = Vec::with_capacity(self.keys_in_found_order.capacity());
601            for found in self.keys_in_found_order.iter() {
602                if found.as_ref() == key.parsed.as_ref() {
603                    continue;
604                }
605                new_keys.push(found);
606            }
607        }
608        self.map.insert(key, value);
609    }
610}
611
612#[derive(Debug)]
613pub enum Value<'json> {
614    Null,
615    Bool(bool),
616    Number(JsonNumber<'json>),
617    String(JsonString<'json>),
618    Array(Vec<Value<'json>>),
619    Object(Object<'json>),
620}
621
622impl<'json> Value<'json> {
623    /// Retrieves all values from within this value recursively.
624    pub fn get_all_leaves_iter_mut(&mut self) -> impl Iterator<Item = &mut Value<'json>> {
625        let mut result = Vec::new();
626        match self {
627            Value::Null | Value::Bool(_) | Value::Number(_) | Value::String(_) => result.push(self),
628            Value::Array(arr) => {
629                for item in arr.iter_mut() {
630                    for sub_value in item.get_all_leaves_iter_mut() {
631                        result.push(sub_value);
632                    }
633                }
634            }
635            Value::Object(obj) => {
636                for kvp in obj.map.iter_mut() {
637                    for sub_value in kvp.1.get_all_leaves_iter_mut() {
638                        result.push(sub_value);
639                    }
640                }
641            }
642        }
643        result.into_iter()
644    }
645}
646
647#[derive(Debug)]
648pub struct Object<'json> {
649    map: HashMap<JsonString<'json>, Value<'json>>,
650    keys_in_found_order: Vec<Cow<'json, str>>,
651}
652
653impl<'json> Value<'json> {
654    pub fn to_string(&self) -> String {
655        let mut result = String::new();
656        self.to_string_helper(&mut result, false, 0, "  ");
657        result
658    }
659
660    pub fn to_string_pretty(&self) -> String {
661        let mut result = String::new();
662        self.to_string_helper(&mut result, true, 0, "  ");
663        result
664    }
665
666    pub fn to_string_pretty_with_indent_str(&self, indent_str: &str) -> String {
667        let mut result = String::new();
668        self.to_string_helper(&mut result, true, 0, indent_str);
669        result
670    }
671
672    fn to_string_helper(
673        &self,
674        buf: &mut String,
675        pretty: bool,
676        indent_level: usize,
677        indent_str: &str,
678    ) {
679        match self {
680            Self::Null => {
681                buf.push_str("null");
682            }
683            Self::Bool(bool) => {
684                if *bool {
685                    buf.push_str("true");
686                } else {
687                    buf.push_str("false");
688                }
689            }
690            Self::Number(num) => {
691                buf.push_str(num.sanitized);
692            }
693            Self::String(str) => {
694                Self::to_string_for_string(str, buf);
695            }
696            Self::Array(vec) => {
697                buf.push('[');
698
699                for (i, item) in vec.iter().enumerate() {
700                    if i != 0 {
701                        buf.push(',');
702                    }
703                    if pretty {
704                        buf.push('\n');
705                        for _ in 0..indent_level + 1 {
706                            buf.push_str(indent_str);
707                        }
708                    }
709                    item.to_string_helper(buf, pretty, indent_level + 1, indent_str);
710                }
711
712                if pretty && vec.len() > 0 {
713                    buf.push('\n');
714                    for _ in 0..indent_level {
715                        buf.push_str(indent_str);
716                    }
717                }
718
719                buf.push(']');
720            }
721            Self::Object(obj) => {
722                buf.push('{');
723
724                for (i, cow) in obj.keys_in_found_order.iter().enumerate() {
725                    if i != 0 {
726                        buf.push(',');
727                    }
728                    if pretty {
729                        buf.push('\n');
730                        for _ in 0..indent_level + 1 {
731                            buf.push_str(indent_str);
732                        }
733                    }
734                    let key = obj
735                        .map
736                        .get_key_value(&JsonString {
737                            span: None,
738                            parsed: cow.clone(),
739                            sanitized: Cow::Borrowed(""),
740                        })
741                        .expect("BUG: key to be in the object")
742                        .0;
743                    Self::to_string_for_string(key, buf);
744                    buf.push(':');
745                    if pretty {
746                        buf.push(' ');
747                    }
748
749                    let value = obj.map.get(&JsonString {
750                        span: None,
751                        parsed: cow.clone(), // hash is only determined by the parsed value
752                        sanitized: Cow::Borrowed(""),
753                    }).expect("BUG: values in the keys in found order vec should always be in the object hashmap as well.");
754                    value.to_string_helper(buf, pretty, indent_level + 1, indent_str);
755                }
756
757                if pretty && obj.keys_in_found_order.len() > 0 {
758                    buf.push('\n');
759                    for _ in 0..indent_level {
760                        buf.push_str(indent_str);
761                    }
762                }
763
764                buf.push('}')
765            }
766        }
767    }
768
769    fn to_string_for_string(str: &JsonString, buf: &mut String) {
770        buf.push('"');
771        buf.push_str(&str.sanitized);
772        buf.push('"');
773    }
774}
775
776#[derive(Clone, Debug)]
777pub struct JsonNumber<'json> {
778    #[allow(dead_code)]
779    source: &'json str,
780    sanitized: &'json str,
781}
782
783impl<'json> JsonNumber<'json> {
784    pub(crate) fn new(source: &'json str) -> Self {
785        Self {
786            source,
787            sanitized: Self::sanitize(source),
788        }
789    }
790
791    fn sanitize(source: &str) -> &str {
792        let mut chars = source.chars().peekable();
793        let mut num_matched_zeroes = 0;
794        while let Some('0') = chars.peek() {
795            chars.next();
796            num_matched_zeroes += 1;
797        }
798
799        if num_matched_zeroes == 0 {
800            return source;
801        }
802
803        match chars.peek() {
804            Some('.' | 'e' | 'E') => {
805                return &source[num_matched_zeroes - 1..];
806            }
807            Some('0'..='9') => {
808                return &source[num_matched_zeroes..];
809            }
810            _ => {
811                return source;
812            }
813        }
814    }
815}
816
817#[derive(Clone, Debug)]
818pub struct JsonString<'json> {
819    pub(crate) span: Option<Span>,
820    pub(crate) parsed: Cow<'json, str>,
821    pub(crate) sanitized: Cow<'json, str>,
822}
823
824impl<'json> JsonString<'json> {
825    pub(crate) fn new(original_json: &'json str, span: Span) -> Self {
826        let range = span.as_range();
827        Self {
828            span: Some(span),
829            parsed: Self::unescape(&original_json[range.clone()]),
830            sanitized: Self::sanitize(&original_json[range]),
831        }
832    }
833
834    pub fn unescape(source: &str) -> Cow<'_, str> {
835        let result = Self::parse(source, true);
836        #[cfg(debug_assertions)]
837        println!("\n\n\nUnescaped: {source}\n\n\nResult: {result}\n\n\n");
838        result
839    }
840
841    fn sanitize(source: &str) -> Cow<'_, str> {
842        let result = Self::parse(source, false);
843        #[cfg(debug_assertions)]
844        println!("\n\n\nUnescaped: {source}\n\n\nSanitized: {result}\n\n\n");
845        result
846    }
847
848    fn parse(mut source: &str, replace_escape_chars: bool) -> Cow<'_, str> {
849        // Remove the quote at the beginning (if there is one)
850        if let Some('"') = source.chars().next() {
851            source = &source[1..];
852        }
853
854        let mut chars = source.char_indices().peekable();
855        let mut cow = Cow::Borrowed(source);
856
857        loop {
858            let ch = chars.next();
859            match ch {
860                None => break,
861                Some((i, ch)) => {
862                    match ch {
863                        '\\' => {
864                            let mut string = match cow {
865                                Cow::Borrowed(_) => source[..i].to_string(),
866                                Cow::Owned(string) => string,
867                            };
868
869                            if !replace_escape_chars {
870                                string.push('\\');
871                            }
872
873                            match chars.next() {
874                                None => {
875                                    string.push('\\');
876                                    cow = Cow::Owned(string);
877                                    break;
878                                }
879                                Some((_, next_ch)) => {
880                                    let ch_to_add = match next_ch {
881                                        '"' => '"',
882                                        '\\' => '\\',
883                                        '/' => '/',
884                                        'b' => {
885                                            if replace_escape_chars {
886                                                '\u{0008}'
887                                            } else {
888                                                'b'
889                                            }
890                                        }
891                                        'f' => {
892                                            if replace_escape_chars {
893                                                '\u{000c}'
894                                            } else {
895                                                'f'
896                                            }
897                                        }
898                                        'n' => {
899                                            if replace_escape_chars {
900                                                '\n'
901                                            } else {
902                                                'n'
903                                            }
904                                        }
905                                        'r' => {
906                                            if replace_escape_chars {
907                                                '\r'
908                                            } else {
909                                                'r'
910                                            }
911                                        }
912                                        't' => {
913                                            if replace_escape_chars {
914                                                '\t'
915                                            } else {
916                                                't'
917                                            }
918                                        }
919                                        'u' => {
920                                            let mut code = String::with_capacity(4);
921                                            let mut is_valid_unicode_escape = true;
922
923                                            for _ in 0..4 {
924                                                match chars.peek() {
925                                                    Some((_, ch)) => {
926                                                        if ch.is_ascii_hexdigit() {
927                                                            code.push(chars.next().unwrap().1);
928                                                        } else {
929                                                            is_valid_unicode_escape = false;
930                                                            string.push_str("\\u");
931                                                            string.push_str(&code);
932                                                            break;
933                                                        }
934                                                    }
935                                                    _ => {
936                                                        is_valid_unicode_escape = false;
937                                                        string.push_str("\\u");
938                                                        string.push_str(&code);
939                                                        break;
940                                                    }
941                                                }
942                                            }
943
944                                            if is_valid_unicode_escape {
945                                                match u32::from_str_radix(&code, 16) {
946                                                    Ok(parsed) => match char::from_u32(parsed) {
947                                                        Some(ch) => {
948                                                            if replace_escape_chars {
949                                                                string.push(ch);
950                                                            } else {
951                                                                string.push('u');
952                                                                string.push_str(&code);
953                                                            }
954                                                        }
955                                                        None => {
956                                                            string.push_str("\\u");
957                                                            string.push_str(&code);
958                                                        }
959                                                    },
960                                                    Err(_) => {
961                                                        string.push_str("\\u");
962                                                        string.push_str(&code);
963                                                    }
964                                                }
965                                            }
966
967                                            // We're doing custom additions to the string, so no
968                                            // need to pass a character to the outer loop
969                                            cow = Cow::Owned(string);
970                                            continue;
971                                        }
972                                        ch => {
973                                            // a '\\' gets added if we're not replacing escape chars for
974                                            // all the other code paths, so we need to account for that.
975                                            if replace_escape_chars {
976                                                string.push('\\');
977                                            }
978                                            string.push('\\');
979                                            string.push(ch);
980                                            cow = Cow::Owned(string);
981                                            continue;
982                                        }
983                                    };
984
985                                    string.push(ch_to_add);
986                                    cow = Cow::Owned(string)
987                                }
988                            }
989                        }
990                        '"' => {
991                            // don't escape the ending quote. Quotes are ascii, so -1 is safe
992                            if i == source.len() - 1 {
993                                cow = match cow {
994                                    Cow::Owned(string) => Cow::Owned(string),
995                                    Cow::Borrowed(str) => Cow::Borrowed(&str[..str.len() - 1]),
996                                };
997                                continue;
998                            }
999
1000                            let mut string = match cow {
1001                                Cow::Borrowed(_) => source[..i].to_string(),
1002                                Cow::Owned(string) => string,
1003                            };
1004
1005                            string.push_str("\\\"");
1006                            cow = Cow::Owned(string);
1007                        }
1008                        '\n' | '\u{0008}' | '\u{000c}' | '\r' | '\t' => {
1009                            if replace_escape_chars {
1010                                cow = match cow {
1011                                    Cow::Borrowed(_) => Cow::Borrowed(&source[..i]),
1012                                    Cow::Owned(mut string) => {
1013                                        string.push(ch);
1014                                        Cow::Owned(string)
1015                                    }
1016                                };
1017                                continue;
1018                            }
1019
1020                            let mut string = match cow {
1021                                Cow::Borrowed(_) => source[..i].to_string(),
1022                                Cow::Owned(string) => string,
1023                            };
1024
1025                            string.push('\\');
1026                            string.push(match ch {
1027                                '\u{0008}' => 'b',
1028                                '\u{000c}' => 'f',
1029                                '\n' => 'n',
1030                                '\r' => 'r',
1031                                '\t' => 't',
1032                                _ => unreachable!(),
1033                            });
1034                            cow = Cow::Owned(string);
1035                        }
1036                        ch => {
1037                            if ch.is_control() {
1038                                if replace_escape_chars {
1039                                    cow = match cow {
1040                                        Cow::Borrowed(_) => Cow::Owned(source[..i].to_string()),
1041                                        Cow::Owned(mut string) => {
1042                                            string.push(ch);
1043                                            Cow::Owned(string)
1044                                        }
1045                                    };
1046                                    continue;
1047                                }
1048
1049                                cow = match cow {
1050                                    Cow::Owned(mut string) => {
1051                                        string.push_str("\\u");
1052                                        let num_string = (ch as u32).to_string();
1053                                        for _ in 0..(4 - num_string.len()) {
1054                                            string.push('0');
1055                                        }
1056                                        string.push_str(&num_string);
1057                                        Cow::Owned(string)
1058                                    }
1059                                    Cow::Borrowed(str) => {
1060                                        let mut string = str.to_string();
1061                                        string.push_str("\\u");
1062                                        let num_string = (ch as u32).to_string();
1063                                        for _ in 0..(4 - num_string.len()) {
1064                                            string.push('0');
1065                                        }
1066                                        string.push_str(&num_string);
1067                                        Cow::Owned(string)
1068                                    }
1069                                };
1070                                continue;
1071                            }
1072
1073                            match cow {
1074                                Cow::Borrowed(_) => {}
1075                                Cow::Owned(mut string) => {
1076                                    string.push(ch);
1077                                    cow = Cow::Owned(string);
1078                                }
1079                            }
1080                        }
1081                    }
1082                }
1083            }
1084        }
1085
1086        cow
1087    }
1088}
1089
1090impl<'json> std::hash::Hash for JsonString<'json> {
1091    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
1092        match &self.parsed {
1093            Cow::Borrowed(str) => str.hash(state),
1094            Cow::Owned(string) => string.hash(state),
1095        }
1096    }
1097}
1098
1099impl<'json> PartialEq for JsonString<'json> {
1100    fn eq(&self, other: &Self) -> bool {
1101        self.parsed.eq(&other.parsed)
1102    }
1103}
1104
1105impl<'json> Eq for JsonString<'json> {}
1106
1107impl<'json> Default for JsonString<'json> {
1108    fn default() -> Self {
1109        Self {
1110            span: None,
1111            parsed: DEFAULT_KEY_COW.clone(),
1112            sanitized: DEFAULT_KEY_COW.clone(),
1113        }
1114    }
1115}
1116
1117#[cfg(test)]
1118mod tests {
1119    #[test]
1120    fn obj_keys_with_escapes_work() {
1121        let input = "{\"key\\\\ \\t\\b\\n\\a\":10\"}";
1122        let (output, _) = crate::parse(input);
1123
1124        println!("{}", output.to_string());
1125        println!("{}", output.to_string_pretty());
1126    }
1127}