json_sourcemap/
lib.rs

1#![doc = include_str!("../README.md")]
2
3use std::collections::HashMap;
4use std::sync::LazyLock;
5
6use serde_json::{Number, Value};
7
8static ESCAPED_CHARS: LazyLock<HashMap<char, &'static str>> = LazyLock::new(|| {
9    let mut map = HashMap::new();
10    map.insert('b', r"\b");
11    map.insert('f', r"\f");
12    map.insert('n', "\n");
13    map.insert('r', "\r");
14    map.insert('t', "\t");
15    map.insert('"', "\"");
16    map.insert('/', "/");
17    map.insert('\\', "\\");
18    map
19});
20
21/// The json-source-map error type
22#[derive(Debug, thiserror::Error, Clone)]
23#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
24pub enum Error {
25    #[error("Unexpected end of JSON input")]
26    UnexpectedEof,
27    #[error("Unexpected token: {0} in JSON at position {1}")]
28    UnexpectedToken(char, usize),
29    #[error("Convert to unicode codepoint failed")]
30    Int,
31    #[error("Invalid unicode codepoint: {0} at position {1}")]
32    InvalidUnicodeCodePoint(u32, usize),
33}
34
35#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
36#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
37pub struct Location {
38    pub line: usize,
39    pub column: usize,
40    pub pos: usize,
41}
42
43/// The parse options
44#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
45#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
46pub struct Options {
47    /// Whether to allow big integers
48    pub bigint: bool,
49}
50
51#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
52#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
53pub enum Prop {
54    Key,
55    KeyEnd,
56    Value,
57    ValueEnd,
58}
59
60struct Parser {
61    chars: Vec<char>,
62    #[allow(dead_code)]
63    options: Options,
64
65    line: usize,
66    column: usize,
67    pos: usize,
68
69    /// key is the json pointer, value is the start and end location
70    pointers: HashMap<String, LocationMap>,
71}
72
73#[derive(Debug, Clone)]
74pub struct ParseResult {
75    pub value: Value,
76    pub pointers: HashMap<String, LocationMap>,
77}
78
79impl ParseResult {
80    /// Get the location of the json pointer
81    pub fn get_location(&self, ptr: &str) -> Option<&LocationMap> {
82        self.pointers.get(ptr)
83    }
84}
85
86/// The location information of the json pointer
87#[derive(Debug, Clone)]
88#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
89pub struct LocationMap(HashMap<Prop, Location>);
90
91impl LocationMap {
92    /// Get the location of the property
93    pub fn get(&self, prop: Prop) -> Option<Location> {
94        self.0.get(&prop).cloned()
95    }
96
97    fn insert(&mut self, prop: Prop, loc: Location) {
98        self.0.insert(prop, loc);
99    }
100
101    /// Get the start location of the json pointer's value
102    pub fn value(&self) -> Location {
103        self.get(Prop::Value).unwrap()
104    }
105
106    /// Get the start location of the json pointer's key
107    pub fn key(&self) -> Location {
108        self.get(Prop::Key).unwrap()
109    }
110
111    /// Get the end location of the json pointer's value
112    pub fn value_end(&self) -> Location {
113        self.get(Prop::ValueEnd).unwrap()
114    }
115
116    /// Get the end location of the json pointer's key
117    pub fn key_end(&self) -> Location {
118        self.get(Prop::KeyEnd).unwrap()
119    }
120}
121
122impl Parser {
123    fn new(source: &str, options: Options) -> Self {
124        let chars = source.chars().collect();
125        Parser {
126            chars,
127            options,
128            line: 0,
129            column: 0,
130            pos: 0,
131            pointers: HashMap::new(),
132        }
133    }
134
135    fn parse(&mut self, ptr: &str, top_level: bool) -> Result<Value, Error> {
136        self.whitespace();
137        self.map(ptr, Prop::Value);
138        let c = self.get_char()?;
139        let data = match c {
140            't' => {
141                self.expect("rue")?;
142                Value::Bool(true)
143            }
144            'f' => {
145                self.expect("alse")?;
146                Value::Bool(false)
147            }
148            'n' => {
149                self.expect("ull")?;
150                Value::Null
151            }
152            '"' => Value::String(self.parse_string()?),
153            '[' => Value::Array(self.parse_array(ptr)?),
154            '{' => self.parse_object(ptr)?,
155            '-' | '0'..='9' => Value::Number(self.parse_number()?),
156            _ => return Err(Error::UnexpectedToken(c, self.pos)),
157        };
158        self.map(ptr, Prop::ValueEnd);
159        // dbg!("?");
160        self.whitespace();
161        // dbg!("? ?", top_level, self.pos, self.len());
162        if top_level && self.pos < self.len() {
163            return Err(self.unexpected_token());
164        }
165
166        Ok(data)
167    }
168
169    #[inline]
170    fn len(&self) -> usize {
171        self.chars.len()
172    }
173
174    fn whitespace(&mut self) {
175        'outer: {
176            while self.pos < self.len() {
177                match self.chars.get(self.pos) {
178                    Some(' ') => self.column += 1,
179                    Some('\t') => self.column += 4,
180                    Some('\r') => self.column = 0,
181                    Some('\n') => {
182                        self.line += 1;
183                        self.column = 0;
184                    }
185                    _ => break 'outer,
186                }
187                self.pos += 1;
188            }
189            // dbg!(1);
190        }
191    }
192
193    fn parse_string(&mut self) -> Result<String, Error> {
194        let mut s = String::new();
195        loop {
196            match self.get_char()? {
197                '"' => break,
198                '\\' => {
199                    let c = self.get_char()?;
200                    if let Some(escaped) = ESCAPED_CHARS.get(&c) {
201                        s.push_str(escaped);
202                    } else if c == 'u' {
203                        s.push(self.get_char_code()?);
204                    } else {
205                        return Err(self.was_unexpected_token());
206                    }
207                }
208                c => {
209                    s.push(c);
210                }
211            }
212            // dbg!(2);
213        }
214        Ok(s)
215    }
216
217    fn parse_number(&mut self) -> Result<serde_json::value::Number, Error> {
218        self.back_char();
219
220        let mut num_str = String::new();
221        // let mut is_integer = true;
222        if self.next() == '-' {
223            num_str.push(self.get_char()?);
224        }
225
226        let next = if self.next() == '0' {
227            self.get_char()?.to_string()
228        } else {
229            self.get_digits()?
230        };
231        num_str = num_str + &next;
232
233        if self.next() == '.' {
234            // is_integer = false;
235            num_str.push(self.get_char()?);
236            num_str = num_str + &self.get_digits()?;
237        }
238
239        if self.next() == 'e' || self.next() == 'E' {
240            // is_integer = false;
241            num_str.push(self.get_char()?);
242            if self.next() == '-' || self.next() == '+' {
243                num_str.push(self.get_char()?);
244            }
245            num_str = num_str + &self.get_digits()?;
246        }
247
248        // let res = num_str.parse::<f64>().unwrap();
249
250        // let n = if is_integer {
251        //     serde_json::number::N::PosInt(res)
252        // } else {
253        //     res
254        // };
255
256        Ok(Number::from_string_unchecked(num_str))
257    }
258
259    fn parse_array(&mut self, ptr: &str) -> Result<Vec<Value>, Error> {
260        self.whitespace();
261        let mut array = Vec::new();
262        let c = self.get_char()?; // [
263        if c == ']' {
264            return Ok(array);
265        }
266        self.back_char();
267
268        loop {
269            let item_ptr = format!("{}/{}", ptr, array.len());
270            array.push(self.parse(&item_ptr, false)?);
271            self.whitespace();
272            let c = self.get_char()?;
273            if c == ']' {
274                break;
275            } else if c != ',' {
276                return Err(self.unexpected_token());
277            }
278            self.whitespace();
279            // dbg!(3);
280        }
281
282        Ok(array)
283    }
284
285    fn parse_object(&mut self, ptr: &str) -> Result<Value, Error> {
286        self.whitespace();
287        let mut object = serde_json::Map::new();
288        if self.get_char()? == '}' {
289            return Ok(object.into());
290        }
291
292        self.back_char();
293
294        loop {
295            let loc = self.get_location();
296            if self.get_char()? != '"' {
297                return Err(self.was_unexpected_token());
298            }
299            let key = self.parse_string()?;
300            let prop_ptr = format!("{}/{}", ptr, Self::escape_json_pointer(&key));
301            self.map_location(&prop_ptr, Prop::Key, loc);
302            self.map(&prop_ptr, Prop::KeyEnd);
303            self.whitespace();
304            if self.get_char()? != ':' {
305                return Err(self.was_unexpected_token());
306            }
307            self.whitespace();
308            let value = self.parse(&prop_ptr, false)?;
309            object.insert(key, value);
310            self.whitespace();
311
312            match self.get_char()? {
313                '}' => break,
314                ',' => {}
315                _ => return Err(self.was_unexpected_token()),
316            }
317
318            self.whitespace();
319        }
320        Ok(object.into())
321    }
322
323    fn expect(&mut self, s: &str) -> Result<(), Error> {
324        for c in s.chars() {
325            if self.get_char()? != c {
326                return Err(self.was_unexpected_token());
327            }
328        }
329        Ok(())
330    }
331
332    #[inline]
333    fn get_char(&mut self) -> Result<char, Error> {
334        self.check_unexpected_eof()?;
335        let c = self.next();
336        self.pos += 1;
337        self.column += 1;
338        Ok(c)
339    }
340
341    #[inline]
342    fn next(&self) -> char {
343        *self
344            .chars
345            .get(self.pos)
346            .unwrap_or_else(|| panic!("Unexpected EOF, pos: {}", self.pos))
347    }
348
349    /// Backs up the parser one character.
350    fn back_char(&mut self) {
351        self.pos -= 1;
352        self.column -= 1;
353    }
354
355    fn get_char_code(&mut self) -> Result<char, Error> {
356        let count = 4;
357        let mut code = String::new();
358        for _ in 0..count {
359            let c = self.get_char()?;
360            if !c.is_ascii_hexdigit() {
361                return Err(Error::UnexpectedToken(c, self.pos));
362            }
363            code.push(c);
364        }
365
366        let unicode = u32::from_str_radix(&code, 16).map_err(|_| Error::Int)?;
367        char::from_u32(unicode).ok_or(Error::InvalidUnicodeCodePoint(unicode, self.pos))
368    }
369
370    fn get_digits(&mut self) -> Result<String, Error> {
371        let mut digits = String::new();
372        loop {
373            let c = self.next();
374            if c.is_ascii_digit() {
375                digits.push(self.get_char()?);
376            } else {
377                break;
378            }
379            // dbg!(5);
380        }
381        Ok(digits)
382    }
383
384    fn map(&mut self, ptr: impl ToString, pro: Prop) {
385        self.map_location(ptr, pro, self.get_location());
386    }
387
388    fn map_location(&mut self, ptr: impl ToString, prop: Prop, loc: Location) {
389        self.pointers
390            .entry(ptr.to_string())
391            .or_insert_with(|| LocationMap(HashMap::new()))
392            .insert(prop, loc);
393    }
394
395    fn get_location(&self) -> Location {
396        Location {
397            line: self.line,
398            column: self.column,
399            pos: self.pos,
400        }
401    }
402
403    fn unexpected_token(&self) -> Error {
404        Error::UnexpectedToken(self.next(), self.pos)
405    }
406
407    fn was_unexpected_token(&mut self) -> Error {
408        self.back_char();
409        self.unexpected_token()
410    }
411
412    fn check_unexpected_eof(&self) -> Result<(), Error> {
413        if self.pos >= self.len() {
414            return Err(Error::UnexpectedEof);
415        }
416
417        Ok(())
418    }
419
420    fn escape_json_pointer(s: &str) -> String {
421        s.replace("~", "~0").replace("/", "~1")
422    }
423}
424
425pub fn parse(source: &str, options: Options) -> Result<ParseResult, Error> {
426    let mut parser = Parser::new(source, options);
427    let value = parser.parse("", true)?;
428    Ok(ParseResult {
429        value,
430        pointers: parser.pointers,
431    })
432}
433
434#[cfg(test)]
435mod tests {
436    use super::*;
437
438    #[test]
439    fn test_parse() {
440        let source = r#"{
441            "name": "John",
442            "age": 30,
443            "cars": [
444                "Ford",
445                "BMW",
446                "Fiat"
447            ]
448        }"#;
449
450        let res = parse(source, Options::default()).unwrap();
451        assert!(res.value.is_object());
452        assert_eq!(
453            res.pointers["/name"].key(),
454            Location {
455                line: 1,
456                column: 12,
457                pos: 14
458            }
459        );
460        assert_eq!(
461            res.pointers["/name"].key_end(),
462            Location {
463                line: 1,
464                column: 18,
465                pos: 20
466            }
467        );
468        assert_eq!(
469            res.value,
470            serde_json::from_str::<serde_json::Value>(source).unwrap()
471        );
472
473        let source = r#"{
474  "foo": "bar"
475}"#;
476        let res = parse(source, Options::default()).unwrap();
477        assert!(res.value.is_object());
478        assert_eq!(
479            res.pointers[""].value(),
480            Location {
481                line: 0,
482                column: 0,
483                pos: 0
484            }
485        );
486        assert_eq!(
487            res.pointers[""].value_end(),
488            Location {
489                line: 2,
490                column: 1,
491                pos: 18
492            }
493        );
494
495        assert_eq!(
496            res.pointers["/foo"].key(),
497            Location {
498                line: 1,
499                column: 2,
500                pos: 4
501            }
502        );
503        assert_eq!(
504            res.pointers["/foo"].key_end(),
505            Location {
506                line: 1,
507                column: 7,
508                pos: 9
509            }
510        );
511        assert_eq!(
512            res.pointers["/foo"].value(),
513            Location {
514                line: 1,
515                column: 9,
516                pos: 11
517            }
518        );
519        assert_eq!(
520            res.pointers["/foo"].value_end(),
521            Location {
522                line: 1,
523                column: 14,
524                pos: 16
525            }
526        );
527        assert_eq!(
528            res.value,
529            serde_json::from_str::<serde_json::Value>(source).unwrap()
530        );
531
532        let source = r#"{
533            "name": "John",
534            "age": 30.0
535        }"#;
536        let res = parse(source, Options::default()).unwrap();
537        assert!(res.value.is_object());
538        assert_eq!(
539            res.pointers["/age"].value(),
540            Location {
541                line: 2,
542                column: 19,
543                pos: 49
544            }
545        );
546        assert_eq!(
547            res.pointers["/age"].value_end(),
548            Location {
549                line: 2,
550                column: 23,
551                pos: 53
552            }
553        );
554        assert_eq!(
555            res.value,
556            serde_json::from_str::<serde_json::Value>(source).unwrap()
557        );
558
559        let source = r#"{"number":1.23e+10000}"#;
560        let res = parse(source, Options::default()).unwrap();
561        assert!(res.value.is_object());
562        assert_eq!(
563            res.pointers["/number"].value(),
564            Location {
565                line: 0,
566                column: 10,
567                pos: 10
568            }
569        );
570        assert_eq!(
571            res.pointers["/number"].value_end(),
572            Location {
573                line: 0,
574                column: 21,
575                pos: 21
576            }
577        );
578
579        let source = r#"{"number":-1.23e-10000}"#;
580        let res = parse(source, Options::default()).unwrap();
581        assert!(res.value.is_object());
582        assert_eq!(
583            res.pointers["/number"].value(),
584            Location {
585                line: 0,
586                column: 10,
587                pos: 10
588            }
589        );
590        assert_eq!(
591            res.pointers["/number"].value_end(),
592            Location {
593                line: 0,
594                column: 22,
595                pos: 22
596            }
597        );
598
599        let source = r#"{"number":-0.0}"#;
600        let res = parse(source, Options::default()).unwrap();
601        assert!(res.value.is_object());
602        assert_eq!(
603            res.pointers["/number"].value(),
604            Location {
605                line: 0,
606                column: 10,
607                pos: 10
608            }
609        );
610        assert_eq!(
611            res.pointers["/number"].value_end(),
612            Location {
613                line: 0,
614                column: 14,
615                pos: 14
616            }
617        );
618        assert_eq!(
619            res.value,
620            serde_json::from_str::<serde_json::Value>(source).unwrap()
621        );
622
623        let source = r#"{"code":"\u0020"}"#;
624        let res = parse(source, Options::default()).unwrap();
625        assert!(res.value.is_object());
626        assert_eq!(
627            res.pointers["/code"].value(),
628            Location {
629                line: 0,
630                column: 8,
631                pos: 8
632            }
633        );
634        assert_eq!(
635            res.pointers["/code"].value_end(),
636            Location {
637                line: 0,
638                column: 16,
639                pos: 16
640            }
641        );
642        assert_eq!(
643            res.value,
644            serde_json::from_str::<serde_json::Value>(source).unwrap()
645        );
646
647        let source = r#"{"chinese":"你好"}"#;
648        let res = parse(source, Options::default()).unwrap();
649        assert!(res.value.is_object());
650        assert_eq!(
651            res.pointers["/chinese"].value(),
652            Location {
653                line: 0,
654                column: 11,
655                pos: 11
656            }
657        );
658        assert_eq!(
659            res.pointers["/chinese"].value_end(),
660            Location {
661                line: 0,
662                column: 15,
663                pos: 15
664            }
665        );
666    }
667}