eson_tokenizer/
lib.rs

1extern crate core;
2
3use nom::branch::alt;
4use nom::error::VerboseError;
5use nom::IResult;
6use nom_locate::LocatedSpan;
7
8pub use token::{ReadTokenRange, Token, TokenDec, TokenKey, TokenRange};
9
10use crate::decorator::parse_decorators;
11use crate::dict::parse_dict;
12use crate::list::parse_lst;
13use crate::prim::{parse_boolean, parse_null, parse_number, parse_string};
14use crate::sp::soc0;
15
16pub type Span<'a> = LocatedSpan<&'a str>;
17
18mod comments;
19mod decorator;
20mod dict;
21mod expr;
22mod fmt_string;
23mod fn_call;
24mod id;
25mod list;
26mod prim;
27mod reference_var;
28mod sp;
29mod token;
30mod var;
31
32pub fn parse_prim(i: Span) -> nom::IResult<Span, Token, VerboseError<Span>> {
33    alt((parse_boolean, parse_null, parse_number, parse_string))(i)
34}
35
36pub fn parse_base(i: Span) -> IResult<Span, Token, VerboseError<Span>> {
37    let (remaining, decorators) = parse_decorators(i)?;
38    let (remaining, _) = soc0(remaining)?;
39    let (remaining, root) = alt((parse_dict, parse_lst))(remaining)?;
40    Ok((
41        remaining,
42        Token::TokenFrameRoot(
43            decorators.into_iter().map(|d| d).collect(),
44            Box::from(root),
45            TokenRange::from((i, remaining)),
46        ),
47    ))
48}
49
50// trait SpanExt {
51//     // character_count(Span::from("hello \u{1F601}")) => 7
52//     fn character_count(&self) -> usize;
53//
54//     // byte_count(Span::from("hello \u{1F601}")) => 10
55//     fn byte_count(&self) -> usize;
56// }
57//
58// impl SpanExt for Span<'_> {
59//     fn character_count(&self) -> usize {
60//         self.fragment().chars().count()
61//     }
62//
63//     fn byte_count(&self) -> usize {
64//         self.fragment().len()
65//     }
66// }
67
68#[cfg(test)]
69mod tests {
70    use nom::character::complete::{digit1, one_of};
71    use nom::combinator::rest;
72    use nom::sequence::preceded;
73    use nom::Slice;
74
75    use crate::token::{TokenDec, TokenId, TokenKey, TokenRange};
76    use crate::Token::{TokenFrameDict, TokenFrameRoot, TokenPrimNumberInt};
77
78    use super::*;
79
80    #[test]
81    fn test_comments() {
82        let s = Span::from(
83            r##"/* hello world */
84// this is a test file
85{}"##,
86        );
87        assert_eq!(
88            parse_base(s),
89            Ok((
90                s.slice(43..),
91                TokenFrameRoot(
92                    vec![],
93                    Box::from(TokenFrameDict(vec![], TokenRange::from((s, 41, 43)))),
94                    TokenRange::from((s, 0, 43))
95                )
96            ))
97        );
98    }
99
100    // #[test]
101    // fn test_span_ext() {
102    //     let s = Span::from("hello \n");
103    //     assert_eq!(s.character_count(), 7);
104    //     assert_eq!(s.byte_count(), 7);
105    //
106    //     let s = Span::from("hello \r\n");
107    //     assert_eq!(s.character_count(), 8);
108    //     assert_eq!(s.byte_count(), 8);
109    //
110    //     let s = Span::from("hello \u{08}");
111    //     assert_eq!(s.to_string(), "hello \u{08}");
112    //     assert_eq!(s.character_count(), 7);
113    //     assert_eq!(s.byte_count(), 7);
114    //
115    //     let s = Span::from("hello \u{FE0F}");
116    //     assert_eq!(s.to_string(), "hello \u{FE0F}");
117    //     assert_eq!(s.character_count(), 7);
118    //     assert_eq!(s.byte_count(), 9);
119    //
120    //     let s = Span::from("hello \u{1F601}");
121    //     assert_eq!(s.to_string(), "hello 😁");
122    //     assert_eq!(s.character_count(), 7);
123    //     assert_eq!(s.byte_count(), 10);
124    // }
125
126    #[test]
127    fn test_parse_prim() {
128        let s = Span::from(r#"true"#);
129        assert_eq!(
130            parse_prim(s),
131            Ok((
132                s.slice(4..),
133                Token::TokenPrimBoolean(true, TokenRange::from((s, 0, 4)))
134            ))
135        );
136
137        let s = Span::from(r#"false"#);
138        assert_eq!(
139            parse_prim(s),
140            Ok((
141                s.slice(5..),
142                Token::TokenPrimBoolean(false, TokenRange::from((s, 0, 5)))
143            ))
144        );
145
146        let s = Span::from(r#"null"#);
147        assert_eq!(
148            parse_prim(s),
149            Ok((
150                s.slice(4..),
151                Token::TokenPrimNull(TokenRange::from((s, 0, 4)))
152            ))
153        );
154
155        let s = Span::from(r#"1"#);
156        assert_eq!(
157            parse_prim(s),
158            Ok((
159                s.slice(1..),
160                Token::TokenPrimNumberInt(1, TokenRange::from((s, 0, 1)))
161            ))
162        );
163
164        let s = Span::from(r#"1.0"#);
165        assert_eq!(
166            parse_prim(s),
167            Ok((
168                s.slice(3..),
169                Token::TokenPrimNumberFloat(1.0, TokenRange::from((s, 0, 3)))
170            ))
171        );
172
173        let s = Span::from(r#""foo""#);
174        assert_eq!(
175            parse_prim(s),
176            Ok((
177                s.slice(5..),
178                Token::TokenPrimString("foo".to_string(), TokenRange::from((s, 0, 5)))
179            ))
180        );
181    }
182
183    #[test]
184    fn test_simple_root() {
185        let s = Span::from(r#"{ "a": 1 }"#);
186        assert_eq!(
187            parse_base(s),
188            Ok((
189                s.slice(10..),
190                Token::TokenFrameRoot(
191                    vec![],
192                    Box::from(Token::TokenFrameDict(
193                        vec![(
194                            TokenKey::String("a".to_string(), TokenRange::from((s, 2, 5))),
195                            vec![],
196                            Token::TokenPrimNumberInt(1, TokenRange::from((s, 7, 8)))
197                        )],
198                        TokenRange::from((s, 0, 10))
199                    )),
200                    TokenRange::from((s, 0, 10))
201                )
202            ))
203        );
204
205        let s = Span::from("// comment \n {foo: 1, bar: 2,}");
206        assert_eq!(
207            parse_base(s),
208            Ok((
209                s.slice(30..),
210                TokenFrameRoot(
211                    vec![],
212                    Box::from(TokenFrameDict(
213                        vec![
214                            (
215                                TokenKey::String("foo".to_string(), TokenRange::from((s, 14, 17))),
216                                vec![],
217                                TokenPrimNumberInt(1, TokenRange::from((s, 19, 20)))
218                            ),
219                            (
220                                TokenKey::String("bar".to_string(), TokenRange::from((s, 22, 25))),
221                                vec![],
222                                TokenPrimNumberInt(2, TokenRange::from((s, 27, 28)))
223                            ),
224                        ],
225                        TokenRange::from((s, 13, 30))
226                    )),
227                    TokenRange::from((s, 0, 30))
228                )
229            ))
230        );
231    }
232
233    #[test]
234    fn test_expr_eson() {
235        let s = Span::from(r#"{ "a": 1 != 2 }"#);
236        assert_eq!(
237            parse_base(s),
238            Ok((
239                s.slice(15..),
240                Token::TokenFrameRoot(
241                    vec![],
242                    Box::from(Token::TokenFrameDict(
243                        vec![(
244                            TokenKey::String("a".to_string(), TokenRange::from((s, 2, 5))),
245                            vec![],
246                            Token::TokenExprSequence(
247                                vec![
248                                    Token::TokenPrimNumberInt(1, TokenRange::from((s, 7, 8))),
249                                    Token::TokenOpNe(TokenRange::from((s, 9, 11))),
250                                    Token::TokenPrimNumberInt(2, TokenRange::from((s, 12, 13))),
251                                ],
252                                TokenRange::from((s, 7, 13))
253                            )
254                        )],
255                        TokenRange::from((s, 0, 15))
256                    )),
257                    TokenRange::from((s, 0, 15))
258                )
259            ))
260        );
261    }
262
263    #[test]
264    fn test_comment_decorator_eson() {
265        let s = Span::from(
266            r###"
267                // foo decorator
268                @foo
269                { "a": 1 }"###,
270        );
271        assert_eq!(
272            parse_base(s),
273            Ok((
274                s.slice(81..),
275                Token::TokenFrameRoot(
276                    vec![TokenDec(
277                        TokenId("foo".to_string(), TokenRange::from((s, 51, 54))),
278                        vec![],
279                        TokenRange::from((s, 50, 54))
280                    )],
281                    Box::from(Token::TokenFrameDict(
282                        vec![(
283                            TokenKey::String("a".to_string(), TokenRange::from((s, 73, 76))),
284                            vec![],
285                            Token::TokenPrimNumberInt(1, TokenRange::from((s, 78, 79)))
286                        )],
287                        TokenRange::from((s, 71, 81))
288                    )),
289                    TokenRange::from((s, 0, 81))
290                )
291            ))
292        );
293    }
294
295    #[test]
296    fn test_list_eson() {
297        let s = Span::from(r#"[1, 2, 3]"#);
298        assert_eq!(
299            parse_base(s),
300            Ok((
301                s.slice(9..),
302                Token::TokenFrameRoot(
303                    vec![],
304                    Box::from(Token::TokenFrameList(
305                        vec![
306                            (
307                                TokenKey::DummySn(0),
308                                vec![],
309                                Token::TokenPrimNumberInt(1, TokenRange::from((s, 1, 2)))
310                            ),
311                            (
312                                TokenKey::DummySn(1),
313                                vec![],
314                                Token::TokenPrimNumberInt(2, TokenRange::from((s, 4, 5)))
315                            ),
316                            (
317                                TokenKey::DummySn(2),
318                                vec![],
319                                Token::TokenPrimNumberInt(3, TokenRange::from((s, 7, 8)))
320                            ),
321                        ],
322                        TokenRange::from((s, 0, 9))
323                    )),
324                    TokenRange::from((s, 0, 9))
325                )
326            ))
327        );
328    }
329
330    #[test]
331    fn test_ref_dict() {
332        let s = Span::from(r#"{ "a": &sibling.b, "b": 2 }"#);
333        assert_eq!(
334            parse_base(s),
335            Ok((
336                s.slice(27..),
337                Token::TokenFrameRoot(
338                    vec![],
339                    Box::from(Token::TokenFrameDict(
340                        vec![
341                            (
342                                TokenKey::String("a".to_string(), TokenRange::from((s, 2, 5))),
343                                vec![],
344                                Token::TokenRefVarSibling(
345                                    vec![TokenKey::String(
346                                        "b".to_string(),
347                                        TokenRange::from((s, 16, 17))
348                                    )],
349                                    TokenRange::from((s, 7, 17))
350                                )
351                            ),
352                            (
353                                TokenKey::String("b".to_string(), TokenRange::from((s, 19, 22))),
354                                vec![],
355                                Token::TokenPrimNumberInt(2, TokenRange::from((s, 24, 25)))
356                            ),
357                        ],
358                        TokenRange::from((s, 0, 27))
359                    )),
360                    TokenRange::from((s, 0, 27))
361                )
362            ))
363        );
364    }
365
366    #[test]
367    fn test_ref_list() {
368        let s = Span::from(r#"[&sibling[1], 2]"#);
369        assert_eq!(
370            parse_base(s),
371            Ok((
372                s.slice(16..),
373                Token::TokenFrameRoot(
374                    vec![],
375                    Box::from(Token::TokenFrameList(
376                        vec![
377                            (
378                                TokenKey::DummySn(0),
379                                vec![],
380                                Token::TokenRefVarSibling(
381                                    vec![TokenKey::Sn(1, TokenRange::from((s, 10, 11)))],
382                                    TokenRange::from((s, 1, 12))
383                                )
384                            ),
385                            (
386                                TokenKey::DummySn(1),
387                                vec![],
388                                Token::TokenPrimNumberInt(2, TokenRange::from((s, 14, 15)))
389                            ),
390                        ],
391                        TokenRange::from((s, 0, 16))
392                    )),
393                    TokenRange::from((s, 0, 16))
394                )
395            ))
396        );
397    }
398
399    #[test]
400    fn test_var_list() {
401        let s = Span::from(r#"[a, 2]"#);
402        assert_eq!(
403            parse_base(s),
404            Ok((
405                s.slice(6..),
406                Token::TokenFrameRoot(
407                    vec![],
408                    Box::from(Token::TokenFrameList(
409                        vec![
410                            (
411                                TokenKey::DummySn(0),
412                                vec![],
413                                Token::TokenVar(
414                                    vec![TokenKey::String(
415                                        "a".to_string(),
416                                        TokenRange::from((s, 1, 2))
417                                    )],
418                                    TokenRange::from((s, 1, 2))
419                                )
420                            ),
421                            (
422                                TokenKey::DummySn(1),
423                                vec![],
424                                Token::TokenPrimNumberInt(2, TokenRange::from((s, 4, 5)))
425                            ),
426                        ],
427                        TokenRange::from((s, 0, 6))
428                    )),
429                    TokenRange::from((s, 0, 6))
430                )
431            ))
432        );
433    }
434
435    #[test]
436    fn test_fn_call_list() {
437        let s = Span::from(r#"[f({a: 1}), 2]"#);
438        assert_eq!(
439            parse_base(s),
440            Ok((
441                s.slice(14..),
442                Token::TokenFrameRoot(
443                    vec![],
444                    Box::from(Token::TokenFrameList(
445                        vec![
446                            (
447                                TokenKey::DummySn(0),
448                                vec![],
449                                Token::TokenFnCall(
450                                    TokenId("f".to_string(), TokenRange::from((s, 1, 2))),
451                                    vec![Token::TokenFrameDict(
452                                        vec![(
453                                            TokenKey::String(
454                                                "a".to_string(),
455                                                TokenRange::from((s, 4, 5))
456                                            ),
457                                            vec![],
458                                            Token::TokenPrimNumberInt(
459                                                1,
460                                                TokenRange::from((s, 7, 8))
461                                            )
462                                        )],
463                                        TokenRange::from((s, 3, 9))
464                                    )],
465                                    TokenRange::from((s, 1, 10))
466                                )
467                            ),
468                            (
469                                TokenKey::DummySn(1),
470                                vec![],
471                                Token::TokenPrimNumberInt(2, TokenRange::from((s, 12, 13)))
472                            ),
473                        ],
474                        TokenRange::from((s, 0, 14))
475                    )),
476                    TokenRange::from((s, 0, 14))
477                )
478            ))
479        );
480    }
481
482    #[test]
483    fn test_fmt_string_list() {
484        let s = Span::from(r#"[f"a ${ &sibling[1] }", "b"]"#);
485        assert_eq!(
486            parse_base(s),
487            Ok((
488                s.slice(28..),
489                Token::TokenFrameRoot(
490                    vec![],
491                    Box::from(Token::TokenFrameList(
492                        vec![
493                            (
494                                TokenKey::DummySn(0),
495                                vec![],
496                                Token::TokenExprSequence(
497                                    vec![
498                                        Token::TokenPrimString(
499                                            "a ".to_string(),
500                                            TokenRange::from((s, 3, 5))
501                                        ),
502                                        Token::TokenDummyOpConcat,
503                                        Token::TokenRefVarSibling(
504                                            vec![TokenKey::Sn(1, TokenRange::from((s, 17, 18)))],
505                                            TokenRange::from((s, 8, 19))
506                                        ),
507                                    ],
508                                    TokenRange::from((s, 1, 22))
509                                )
510                            ),
511                            (
512                                TokenKey::DummySn(1),
513                                vec![],
514                                Token::TokenPrimString(
515                                    "b".to_string(),
516                                    TokenRange::from((s, 24, 27))
517                                )
518                            ),
519                        ],
520                        TokenRange::from((s, 0, 28))
521                    )),
522                    TokenRange::from((s, 0, 28))
523                )
524            ))
525        );
526    }
527
528    #[test]
529    fn test_root_ref_in_fmt_string_dict() {
530        let s = Span::from(r#"{ "a": f"a ${ &root.b[0] }", "b": [0, 1] }"#);
531        assert_eq!(
532            parse_base(s),
533            Ok((
534                s.slice(42..),
535                Token::TokenFrameRoot(
536                    vec![],
537                    Box::from(Token::TokenFrameDict(
538                        vec![
539                            (
540                                TokenKey::String("a".to_string(), TokenRange::from((s, 2, 5))),
541                                vec![],
542                                Token::TokenExprSequence(
543                                    vec![
544                                        Token::TokenPrimString(
545                                            "a ".to_string(),
546                                            TokenRange::from((s, 9, 11))
547                                        ),
548                                        Token::TokenDummyOpConcat,
549                                        Token::TokenRefVarRoot(
550                                            vec![
551                                                TokenKey::String(
552                                                    "b".to_string(),
553                                                    TokenRange::from((s, 20, 21))
554                                                ),
555                                                TokenKey::Sn(0, TokenRange::from((s, 22, 23))),
556                                            ],
557                                            TokenRange::from((s, 14, 24))
558                                        ),
559                                    ],
560                                    TokenRange::from((s, 7, 27))
561                                )
562                            ),
563                            (
564                                TokenKey::String("b".to_string(), TokenRange::from((s, 29, 32))),
565                                vec![],
566                                Token::TokenFrameList(
567                                    vec![
568                                        (
569                                            TokenKey::DummySn(0),
570                                            vec![],
571                                            Token::TokenPrimNumberInt(
572                                                0,
573                                                TokenRange::from((s, 35, 36))
574                                            )
575                                        ),
576                                        (
577                                            TokenKey::DummySn(1),
578                                            vec![],
579                                            Token::TokenPrimNumberInt(
580                                                1,
581                                                TokenRange::from((s, 38, 39))
582                                            )
583                                        ),
584                                    ],
585                                    TokenRange::from((s, 34, 40))
586                                )
587                            ),
588                        ],
589                        TokenRange::from((s, 0, 42))
590                    )),
591                    TokenRange::from((s, 0, 42))
592                )
593            ))
594        );
595    }
596
597    #[test]
598    fn test() {
599        use nom::combinator::cut;
600
601        fn parser(input: &str) -> IResult<&str, &str> {
602            alt((preceded(one_of("+-"), digit1), rest))(input)
603        }
604
605        assert_eq!(parser("+10 ab"), Ok((" ab", "10")));
606        assert_eq!(parser("ab"), Ok(("", "ab")));
607        assert_eq!(parser("+"), Ok(("", "+")));
608
609        fn parser2(input: &str) -> IResult<&str, &str> {
610            alt((preceded(one_of("+-"), cut(digit1)), rest))(input)
611        }
612
613        assert_eq!(parser2("+10 ab"), Ok((" ab", "10")));
614        assert_eq!(parser2("ab"), Ok(("", "ab")));
615    }
616}