toml_parse/tkn_tree/
parse_tkns.rs

1use muncher::Muncher;
2use rowan::SmolStr;
3
4use super::err::{ParseTomlError, TomlErrorKind, TomlResult};
5use super::kinds::TomlKind::{self, *};
6
7use chrono::{NaiveDate, NaiveTime};
8
9use super::common::{
10    cmp_tokens, BOOL_END, DATE_CHAR, DATE_END, DATE_LIKE, DATE_TIME, EOL, IDENT_END, INT_END,
11    KEY_END, NUM_END, SEG_END, TIME_CHAR, WHITESPACE,
12};
13use super::kinds::{Element, TomlNode, TomlToken};
14use super::syntax::Parser;
15
16impl Into<(TomlKind, SmolStr)> for Element {
17    fn into(self) -> (TomlKind, SmolStr) {
18        match self {
19            Element::Node(n) => (n.kind, n.text),
20            Element::Token(tkn) => (tkn.kind, tkn.text),
21        }
22    }
23}
24
25fn is_valid_key(s: &str) -> bool {
26    s.chars()
27        .all(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_' | '.' | '-' | '\'' | '"'))
28}
29
30fn is_valid_datetime(s: &str) -> TomlResult<bool> {
31    let dt = s.split(DATE_TIME).collect::<Vec<_>>();
32    if dt.len() == 1 {
33        if dt[0].contains(':') {
34            let time = dt[0].split(':').collect::<Vec<_>>();
35            if time[2].contains('.') {
36                let (sec, milli) = {
37                    let fractional = time[2].split('.').collect::<Vec<_>>();
38                    (fractional[0].parse()?, fractional[1].parse()?)
39                };
40                NaiveTime::from_hms_milli(time[0].parse()?, time[1].parse()?, sec, milli);
41            } else {
42                NaiveTime::from_hms(time[0].parse()?, time[1].parse()?, time[2].parse()?);
43            };
44            Ok(true)
45        } else {
46            let date = dt[0].split('-').collect::<Vec<_>>();
47
48            assert_eq!(date.len(), 3);
49
50            let _ = NaiveDate::from_ymd(date[0].parse()?, date[1].parse()?, date[2].parse()?);
51            Ok(true)
52        }
53    } else {
54        let date = dt[0].split(DATE_CHAR).collect::<Vec<_>>();
55        let time = dt[1].split(TIME_CHAR).collect::<Vec<_>>();
56        let _ =
57            if time.len() > 3 {
58                if s.contains('+') {
59                    // TODO dont include offset for now
60                    NaiveDate::from_ymd(date[0].parse()?, date[1].parse()?, date[2].parse()?)
61                        .and_hms(time[0].parse()?, time[1].parse()?, time[2].parse()?)
62                } else {
63                    NaiveDate::from_ymd(date[0].parse()?, date[1].parse()?, date[2].parse()?)
64                        .and_hms_milli(
65                            time[0].parse()?,
66                            time[1].parse()?,
67                            time[2].parse()?,
68                            time[3].parse()?,
69                        )
70                }
71            } else {
72                NaiveDate::from_ymd(date[0].parse()?, date[1].parse()?, date[2].parse()?).and_hms(
73                    time[0].parse()?,
74                    time[1].parse()?,
75                    time[2].parse()?,
76                )
77            };
78        Ok(true)
79    }
80}
81
82impl TomlToken {
83    // fn whitespace(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
84    //     let (s, e) = muncher.eat_until_count(|c| !cmp_tokens(c, WHITESPACE));
85    //     // TODO is this more efficient than eat_until to String??
86    //     let text = SmolStr::new(&muncher.text()[s..e]);
87    //     parser.builder.token(Whitespace.into(), text);
88    //     Ok(())
89    // }
90
91    /// Returns Element if whitespace was found.
92    fn maybe_whitespace(muncher: &mut Muncher) -> Option<Element> {
93        let (s, e) = muncher.eat_until_count(|c| !cmp_tokens(c, WHITESPACE));
94        // TODO is this more efficient than eat_until to String??
95        let text = SmolStr::new(&muncher.text()[s..e]);
96        if e > s {
97            Some(Element::Token(Self {
98                kind: Whitespace,
99                text,
100            }))
101        } else {
102            None
103        }
104    }
105
106    fn hash(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
107        assert!(muncher.eat_hash());
108        parser.builder.token(Hash.into(), SmolStr::new("#"));
109        Ok(())
110    }
111
112    // fn plus(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
113    //     assert!(muncher.eat_plus());
114    //     parser.builder.token(Plus.into(), SmolStr::new("+"));
115    //     Ok(())
116    // }
117
118    // fn minus(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
119    //     assert!(muncher.eat_minus());
120    //     parser.builder.token(Minus.into(), SmolStr::new("-"));
121    //     Ok(())
122    // }
123
124    fn equal(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
125        assert!(muncher.eat_eq());
126        parser.builder.token(Equal.into(), SmolStr::new("="));
127        Ok(())
128    }
129
130    // fn comma(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
131    //     assert!(muncher.eat_comma());
132    //     parser.builder.token(Comma.into(), SmolStr::new(","));
133    //     Ok(())
134    // }
135
136    /// Returns Element if comma was found. The last item
137    /// in an array may or may not have a comma.
138    fn maybe_comma(muncher: &mut Muncher) -> Option<Element> {
139        if muncher.eat_comma() {
140            Some(Element::Token(Self {
141                kind: Comma,
142                text: SmolStr::new(","),
143            }))
144        } else {
145            None
146        }
147    }
148
149    fn dot(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
150        assert!(muncher.eat_dot());
151        parser.builder.token(Dot.into(), SmolStr::new("."));
152        Ok(())
153    }
154
155    fn maybe_dot(muncher: &mut Muncher) -> Option<Element> {
156        if muncher.eat_dot() {
157            Some(Element::Token(Self {
158                kind: Dot,
159                text: SmolStr::new("."),
160            }))
161        } else {
162            None
163        }
164    }
165
166    fn double_quote(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
167        assert!(muncher.eat_double_quote());
168        parser.builder.token(DoubleQuote.into(), SmolStr::new("\""));
169        Ok(())
170    }
171
172    fn triple_quote(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
173        assert!(muncher.eat_double_quote());
174        assert!(muncher.eat_double_quote());
175        assert!(muncher.eat_double_quote());
176        parser
177            .builder
178            .token(TripleQuote.into(), SmolStr::new("\"\"\""));
179        Ok(())
180    }
181
182    fn single_quote(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
183        assert!(muncher.eat_single_quote());
184        parser.builder.token(SingleQuote.into(), SmolStr::new("\'"));
185        Ok(())
186    }
187
188    fn ident(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
189        let (s, e) = muncher.eat_until_count(|c| cmp_tokens(c, IDENT_END));
190        // TODO is this more efficient than eat_until to String??
191        let text = SmolStr::new(&muncher.text()[s..e]);
192        parser.builder.token(Ident.into(), text);
193        Ok(())
194    }
195
196    fn ident_seg(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
197        let (s, e) = muncher.eat_until_count(|c| cmp_tokens(c, SEG_END));
198        let text = SmolStr::new(&muncher.text()[s..e]);
199        parser.builder.token(Ident.into(), text);
200        Ok(())
201    }
202
203    fn ident_double_str(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
204        let (s, e) = muncher.eat_until_count(|c| c == &'"');
205        // TODO is this more efficient than eat_until to String??
206        let text = SmolStr::new(&muncher.text()[s..e]);
207        parser.builder.token(Ident.into(), text);
208        Ok(())
209    }
210
211    fn ident_triple_str(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
212        let (s, e) = muncher.eat_range_of("\"\"\"");
213        let text = SmolStr::new(&muncher.text()[s..e]);
214        parser.builder.token(Ident.into(), text);
215        Ok(())
216    }
217
218    fn ident_single_str(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
219        let (s, e) = muncher.eat_until_count(|c| c == &'\'');
220        let text = SmolStr::new(&muncher.text()[s..e]);
221        parser.builder.token(Ident.into(), text);
222        Ok(())
223    }
224
225    fn comment_text(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
226        let (s, e) = muncher.eat_until_count(|c| cmp_tokens(c, EOL));
227        let text = SmolStr::new(&muncher.text()[s..e]);
228        parser.builder.token(CommentText.into(), text);
229        Ok(())
230    }
231
232    fn open_brace(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
233        assert!(muncher.eat_open_brc());
234        parser.builder.token(OpenBrace.into(), SmolStr::new("["));
235        Ok(())
236    }
237    fn close_brace(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
238        assert!(muncher.eat_close_brc());
239        parser.builder.token(CloseBrace.into(), SmolStr::new("]"));
240        Ok(())
241    }
242    fn open_curly(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
243        assert!(muncher.eat_open_curly());
244        parser.builder.token(OpenCurly.into(), SmolStr::new("{"));
245        Ok(())
246    }
247    fn close_curly(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
248        assert!(muncher.eat_close_curly());
249        parser.builder.token(CloseCurly.into(), SmolStr::new("}"));
250        Ok(())
251    }
252
253    fn boolean(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
254        let (s, e) = muncher.eat_until_count(|c| cmp_tokens(c, BOOL_END));
255        let boolean = &muncher.text()[s..e];
256
257        let text = SmolStr::new(boolean);
258        if boolean == "true" || boolean == "false" {
259            parser.builder.token(Bool.into(), text);
260            Ok(())
261        } else {
262            let (col, ln) = muncher.cursor_position();
263            let msg = "invalid integer".into();
264            Err(ParseTomlError::new(
265                msg,
266                TomlErrorKind::UnexpectedToken {
267                    tkn: boolean.into(),
268                    ln,
269                    col,
270                },
271            ))
272        }
273    }
274
275    fn integer(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
276        let (s, e) = muncher.eat_until_count(|c| cmp_tokens(c, INT_END));
277        let int = &muncher.text()[s..e];
278
279        if int.chars().all(|c| c.is_numeric()) {
280            let text = SmolStr::new(int);
281            parser.builder.token(Integer.into(), text);
282            Ok(())
283        } else {
284            let (col, ln) = muncher.cursor_position();
285            let msg = "invalid integer".into();
286            Err(ParseTomlError::new(
287                msg,
288                TomlErrorKind::UnexpectedToken {
289                    tkn: int.into(),
290                    ln,
291                    col,
292                },
293            ))
294        }
295    }
296}
297
298/// All `TomlNodes` parse `Whitespace` token from the previous tokens
299/// whitespace.
300impl TomlNode {
301    /// Builds `Whitespace` and `Hash` token and adds them as
302    /// children.
303    fn comment(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
304        parser.builder.start_node(Comment.into());
305
306        TomlToken::hash(muncher, parser)?;
307        TomlToken::comment_text(muncher, parser)?;
308
309        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
310            let (kind, text) = ws.into();
311            parser.builder.token(kind.into(), text)
312        }
313
314        parser.builder.finish_node();
315        Ok(())
316    }
317
318    /// Builds `Whitespace`, `Integer`, `Dot` and `Integer token and adds them as
319    /// children.
320    fn float(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
321        parser.builder.start_node(Float.into());
322
323        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
324            let (kind, text) = ws.into();
325            parser.builder.token(kind.into(), text)
326        }
327        TomlToken::integer(muncher, parser)?;
328        TomlToken::dot(muncher, parser)?;
329        TomlToken::integer(muncher, parser)?;
330        parser.builder.finish_node();
331        Ok(())
332    }
333
334    /// Builds `Date` node from `Whitespace` and `Date` token and if valid adds them as
335    /// children.
336    fn date_time(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
337        parser.builder.start_node(Date.into());
338        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
339            let (kind, text) = ws.into();
340            parser.builder.token(kind.into(), text)
341        }
342        let (s, e) = muncher.eat_until_count(|c| cmp_tokens(c, DATE_END));
343
344        let text = SmolStr::new(&muncher.text()[s..e]);
345
346        if is_valid_datetime(&text) != Ok(true) {
347            let (col, ln) = muncher.cursor_position();
348            let msg = "invalid integer".into();
349            Err(ParseTomlError::new(
350                msg,
351                TomlErrorKind::UnexpectedToken {
352                    tkn: text.into(),
353                    ln,
354                    col,
355                },
356            ))
357        } else {
358            parser.builder.token(Ident.into(), text);
359            parser.builder.finish_node();
360            Ok(())
361        }
362    }
363
364    /// Builds `Str` node from `Whitespace`, `SingleQuote` and `Ident` token and adds them as
365    /// children.
366    fn single_str(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
367        parser.builder.start_node(Str.into());
368        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
369            let (kind, text) = ws.into();
370            parser.builder.token(kind.into(), text)
371        }
372
373        TomlToken::single_quote(muncher, parser)?;
374        TomlToken::ident_single_str(muncher, parser)?;
375        TomlToken::single_quote(muncher, parser)?;
376
377        parser.builder.finish_node();
378        Ok(())
379    }
380
381    /// Builds `Str` node from `Whitespace`, `DoubleQuote` and `Ident` token and adds them as
382    /// children.
383    fn double_str(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
384        parser.builder.start_node(Str.into());
385        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
386            let (kind, text) = ws.into();
387            parser.builder.token(kind.into(), text)
388        }
389
390        TomlToken::double_quote(muncher, parser)?;
391        TomlToken::ident_double_str(muncher, parser)?;
392        TomlToken::double_quote(muncher, parser)?;
393
394        parser.builder.finish_node();
395        Ok(())
396    }
397
398    /// Builds `Str` node from `Whitespace`, `DoubleQuote` and `Ident` token and adds them as
399    /// children.
400    fn string(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
401        parser.builder.start_node(Str.into());
402        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
403            let (kind, text) = ws.into();
404            parser.builder.token(kind.into(), text)
405        }
406
407        if muncher.seek(2).map(|s| s == "\"\"") == Some(true) {
408            TomlToken::triple_quote(muncher, parser)?;
409            TomlToken::ident_triple_str(muncher, parser)?;
410            TomlToken::triple_quote(muncher, parser)?;
411        } else {
412            TomlToken::double_quote(muncher, parser)?;
413            TomlToken::ident_double_str(muncher, parser)?;
414            TomlToken::double_quote(muncher, parser)?;
415        }
416
417        parser.builder.finish_node();
418        Ok(())
419    }
420
421    /// Builds `Key` node from `Whitespace` and `Ident` token and adds them as
422    /// children.
423    fn key(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
424        parser.builder.start_node(Key.into());
425
426        // if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
427        //     let (kind, text) = ws.into();
428        //     parser.builder.token(kind.into(), text)
429        // }
430        let (s, e) = muncher.peek_until_count(|c| cmp_tokens(c, KEY_END));
431        // muncher.reset_peek();
432        match muncher.peek() {
433            Some(&'"') => TomlNode::double_str(muncher, parser),
434            Some(&'\'') => TomlNode::single_str(muncher, parser),
435            Some(ch) if ch.is_ascii() => TomlToken::ident(muncher, parser),
436            Some(tkn) => {
437                let (col, ln) = muncher.cursor_position();
438                let msg = "invalid token in key".into();
439                let tkn = format!("{}", tkn);
440                return Err(ParseTomlError::new(
441                    msg,
442                    TomlErrorKind::UnexpectedToken { tkn, ln, col },
443                ));
444            }
445            None => unreachable!("NONE in key"),
446        }?;
447
448        let text = SmolStr::new(&muncher.text()[s..e]);
449        if is_valid_key(&text) {
450            parser.builder.finish_node();
451            Ok(())
452        } else {
453            let (col, ln) = muncher.cursor_position();
454            let msg = "invalid token in key".into();
455            let tkn = format!("{}", text);
456            Err(ParseTomlError::new(
457                msg,
458                TomlErrorKind::UnexpectedToken { tkn, ln, col },
459            ))
460        }
461    }
462
463    /// Builds `Value` node from `Whitespace` and whatever value node is present
464    /// and adds them as children. this is called for top level key value pairs
465    /// and tables.
466    fn value(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
467        parser.builder.start_node(Value.into());
468        match muncher.peek() {
469            Some('"') => TomlNode::string(muncher, parser),
470            Some('\'') => TomlNode::single_str(muncher, parser),
471            Some('t') | Some('f') => TomlToken::boolean(muncher, parser),
472            Some('[') => TomlNode::array(muncher, parser),
473            Some('{') => TomlNode::inline_table(muncher, parser),
474            Some(digi) if digi.is_numeric() => {
475                muncher.reset_peek();
476                let raw = muncher
477                    .peek_until(|c| cmp_tokens(c, NUM_END))
478                    .collect::<String>();
479                if raw.contains(DATE_LIKE) {
480                    TomlNode::date_time(muncher, parser)
481                } else if raw.contains('.') {
482                    TomlNode::float(muncher, parser)
483                } else {
484                    TomlToken::integer(muncher, parser)
485                }
486            }
487            None => unimplemented!("found EOF in value"),
488            _ => {
489                let msg = "invalid token in value";
490                let tkn = if let Some(peek) = muncher.peek() {
491                    format!("{:?}", peek)
492                } else {
493                    "no token".into()
494                };
495                let (col, ln) = muncher.cursor_position();
496                return Err(ParseTomlError::new(
497                    msg.into(),
498                    TomlErrorKind::UnexpectedToken { tkn, ln, col },
499                ));
500            }
501        }?;
502        parser.builder.finish_node();
503        Ok(())
504    }
505
506    /// Builds `KeyValue` node from `Whitespace`, `Key` and whatever value node is present
507    /// and adds them as children.
508    fn key_value(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
509        if muncher.is_done() {
510            unreachable!("BUG tokenizer should never hit DONE in key value")
511        }
512
513        if muncher.peek() == Some(&'#') {
514            TomlNode::comment(muncher, parser)?;
515            return Ok(());
516        }
517
518        parser.builder.start_node(KeyValue.into());
519
520        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
521            let (kind, text) = ws.into();
522            parser.builder.token(kind.into(), text)
523        }
524
525        TomlNode::key(muncher, parser)?;
526
527        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
528            let (kind, text) = ws.into();
529            parser.builder.token(kind.into(), text)
530        }
531
532        TomlToken::equal(muncher, parser)?;
533
534        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
535            let (kind, text) = ws.into();
536            parser.builder.token(kind.into(), text)
537        }
538
539        TomlNode::value(muncher, parser)?;
540
541        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
542            let (kind, text) = ws.into();
543            parser.builder.token(kind.into(), text)
544        }
545
546        parser.builder.finish_node();
547        Ok(())
548    }
549
550    /// Builds `KeyValue` node from `Whitespace`, `Key` and whatever value node is present
551    /// and adds them as children. This is only for `InlineTable`s.
552    fn inline_key_value(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
553        if muncher.is_done() {
554            unreachable!("BUG tokenizer should never hit DONE in inline key value")
555        }
556        parser.builder.start_node(KeyValue.into());
557
558        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
559            let (kind, text) = ws.into();
560            parser.builder.token(kind.into(), text)
561        }
562
563        TomlNode::key(muncher, parser)?;
564        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
565            let (kind, text) = ws.into();
566            parser.builder.token(kind.into(), text)
567        }
568
569        TomlToken::equal(muncher, parser)?;
570        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
571            let (kind, text) = ws.into();
572            parser.builder.token(kind.into(), text)
573        }
574        TomlNode::inline_value(muncher, parser)?;
575        parser.builder.finish_node();
576        Ok(())
577    }
578
579    /// Builds `ArrayItem` node from `Whitespace` and whatever `Value` node is present
580    /// and adds them as children.
581    fn array_item(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<Option<()>> {
582        if muncher.peek() == Some(&']') {
583            return Ok(None);
584        }
585
586        parser.builder.start_node(ArrayItem.into());
587        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
588            let (kind, text) = ws.into();
589            parser.builder.token(kind.into(), text)
590        }
591
592        TomlNode::value(muncher, parser)?;
593
594        if let Some(comma) = TomlToken::maybe_comma(muncher) {
595            let (kind, text) = comma.into();
596            parser.builder.token(kind.into(), text);
597        }
598        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
599            let (kind, text) = ws.into();
600            parser.builder.token(kind.into(), text);
601        }
602
603        parser.builder.finish_node();
604        Ok(Some(()))
605    }
606
607    /// Builds `Array` node from `Whitespace` and whatever `ArrayItem` nodes are present
608    /// and adds them as children.
609    fn array(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
610        parser.builder.start_node(Array.into());
611        TomlToken::open_brace(muncher, parser)?;
612        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
613            let (kind, text) = ws.into();
614            parser.builder.token(kind.into(), text)
615        }
616
617        while TomlNode::array_item(muncher, parser)?.is_some() { /* loop to array end */ }
618
619        TomlToken::close_brace(muncher, parser)?;
620        parser.builder.finish_node();
621        Ok(())
622    }
623
624    /// Builds `Value` node from `Whitespace` and whatever value node is present
625    /// and adds them as children. This is called for inline tables only.
626    fn inline_value(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
627        parser.builder.start_node(Value.into());
628
629        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
630            let (kind, text) = ws.into();
631            parser.builder.token(kind.into(), text)
632        }
633        match muncher.peek() {
634            Some('"') => TomlNode::string(muncher, parser),
635            Some('\'') => TomlNode::single_str(muncher, parser),
636            Some('t') | Some('f') => TomlToken::boolean(muncher, parser),
637            Some('[') => TomlNode::array(muncher, parser),
638            Some('{') => TomlNode::inline_table(muncher, parser),
639            Some(digi) if digi.is_numeric() => {
640                muncher.reset_peek();
641                let raw = muncher
642                    .peek_until(|c| cmp_tokens(c, NUM_END))
643                    .collect::<String>();
644                if raw.contains(DATE_LIKE) {
645                    TomlNode::date_time(muncher, parser)
646                } else if raw.contains('.') {
647                    TomlNode::float(muncher, parser)
648                } else {
649                    TomlToken::integer(muncher, parser)
650                }
651            }
652            None => unimplemented!("value found EOF"),
653            _ => {
654                let msg = "invalid token in key value pairs";
655                let tkn = if let Some(peek) = muncher.peek() {
656                    format!("{:#?}", peek)
657                } else {
658                    "no token".into()
659                };
660                let (col, ln) = muncher.cursor_position();
661                return Err(ParseTomlError::new(
662                    msg.into(),
663                    TomlErrorKind::UnexpectedToken { tkn, ln, col },
664                ));
665            }
666        }?;
667        parser.builder.finish_node();
668        Ok(())
669    }
670
671    /// Builds `InlineTable` node from `Whitespace` and whatever `KeyValue` nodes are present
672    /// and adds them as children.
673    fn inline_table(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
674        parser.builder.start_node(InlineTable.into());
675        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
676            let (kind, text) = ws.into();
677            parser.builder.token(kind.into(), text)
678        }
679
680        TomlToken::open_curly(muncher, parser)?;
681        loop {
682            // TODO this is weak make some sort of stack machine.
683            if muncher.peek() == Some(&'}') {
684                break;
685            }
686            TomlNode::inline_key_value(muncher, parser)?;
687            // an inline table and an array are the only two node types that
688            // have comma's optionally eat comma and any following whitespace.
689            if let Some(comma) = TomlToken::maybe_comma(muncher) {
690                let (kind, text) = comma.into();
691                parser.builder.token(kind.into(), text)
692            }
693            if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
694                let (kind, text) = ws.into();
695                parser.builder.token(kind.into(), text)
696            }
697        }
698        TomlToken::close_curly(muncher, parser)?;
699        parser.builder.finish_node();
700        Ok(())
701    }
702
703    /// If `Heading` contains a '.' a `TomlNode` `SegIdent` is produced other wise
704    /// a plain `TomlToken` `Ident` is added.
705    fn ident_heading(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
706        let (s, e) = muncher.peek_until_count(|c| c == &']');
707        // TODO is this more efficient than eat_until to String??
708        let text = SmolStr::new(&muncher.text()[s..e]);
709
710        if text.contains('"') {
711            parser.builder.start_node(SegIdent.into());
712            if text.contains('.') {
713                let mut txt = text;
714                loop {
715                    // TODO if not in loop `value moved here, in previous iteration of loop` error BOOO
716                    let mut in_str = false;
717                    let dot_index = |ch: char| -> bool {
718                        if ch == '"' && !in_str {
719                            in_str = true;
720                        } else if ch == '"' && in_str {
721                            in_str = false;
722                        };
723
724                        ch == '.' && !in_str
725                    };
726                    if txt.starts_with('"') {
727                        TomlNode::double_str(muncher, parser)?;
728                        if let Some(dot) = TomlToken::maybe_dot(muncher) {
729                            let (kind, text) = dot.into();
730                            parser.builder.token(kind.into(), text)
731                        }
732                        if let Some(idx) = txt.chars().position(dot_index) {
733                            txt = SmolStr::from(txt.split_at(idx).1);
734                        } else {
735                            break;
736                        }
737                    } else {
738                        TomlToken::ident_seg(muncher, parser)?;
739                        if let Some(dot) = TomlToken::maybe_dot(muncher) {
740                            let (kind, text) = dot.into();
741                            parser.builder.token(kind.into(), text)
742                        }
743                        if let Some(idx) = txt.chars().position(dot_index) {
744                            txt = SmolStr::from(txt.split_at(idx + 1).1);
745                        } else {
746                            break;
747                        }
748                    }
749                }
750            } else {
751                TomlNode::double_str(muncher, parser)?;
752            }
753            parser.builder.finish_node();
754            Ok(())
755        } else if text.contains('.') {
756            parser.builder.start_node(SegIdent.into());
757
758            TomlToken::ident_seg(muncher, parser)?;
759            if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
760                let (kind, text) = ws.into();
761                parser.builder.token(kind.into(), text)
762            }
763            TomlToken::dot(muncher, parser)?;
764            if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
765                let (kind, text) = ws.into();
766                parser.builder.token(kind.into(), text)
767            }
768            TomlToken::ident_seg(muncher, parser)?;
769
770            // for all segments after the first we loop for each
771            for _ in 2..text.split('.').count() {
772                if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
773                    let (kind, text) = ws.into();
774                    parser.builder.token(kind.into(), text)
775                }
776                if let Some(dot) = TomlToken::maybe_dot(muncher) {
777                    let (kind, text) = dot.into();
778                    parser.builder.token(kind.into(), text);
779                    TomlToken::ident_seg(muncher, parser)?;
780                }
781            }
782            parser.builder.finish_node();
783            Ok(())
784        } else {
785            parser.builder.token(Ident.into(), text);
786            Ok(())
787        }
788    }
789
790    /// Builds `Heading` node from `Whitespace` and either `Ident` token or
791    /// `SegIdent` node and adds them as children.
792    fn heading(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
793        parser.builder.start_node(Heading.into());
794
795        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
796            let (kind, text) = ws.into();
797            parser.builder.token(kind.into(), text)
798        }
799
800        if muncher.seek(2).map(|s| s.starts_with("[[")) == Some(true) {
801            parser.builder.start_node(TomlKind::ArrayHeading.into());
802            TomlToken::open_brace(muncher, parser)?;
803            TomlToken::open_brace(muncher, parser)?;
804
805            match muncher.peek() {
806                Some(ch) if ch.is_ascii() => TomlNode::ident_heading(muncher, parser)?,
807                Some(tkn) => {
808                    let (col, ln) = muncher.cursor_position();
809                    let msg = "invalid heading token".into();
810                    let tkn = format!("{}", tkn);
811                    return Err(ParseTomlError::new(
812                        msg,
813                        TomlErrorKind::UnexpectedToken { tkn, ln, col },
814                    ));
815                }
816                None => unreachable!("empty toml heading"),
817            };
818            // although this is an iterator it advances the cursor which is what we want
819            let _eaten = muncher.eat_until(|c| c == &']');
820
821            TomlToken::close_brace(muncher, parser)?;
822            TomlToken::close_brace(muncher, parser)?;
823
824            if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
825                let (kind, text) = ws.into();
826                parser.builder.token(kind.into(), text)
827            }
828            // finishes ArrayHeading
829            parser.builder.finish_node();
830            // finishes Heading
831            parser.builder.finish_node();
832            return Ok(());
833        };
834
835        TomlToken::open_brace(muncher, parser)?;
836        match muncher.peek() {
837            Some(ch) if ch.is_ascii() => TomlNode::ident_heading(muncher, parser)?,
838            Some(tkn) => {
839                let (col, ln) = muncher.cursor_position();
840                let msg = "invalid heading token".into();
841                let tkn = format!("{}", tkn);
842                return Err(ParseTomlError::new(
843                    msg,
844                    TomlErrorKind::UnexpectedToken { tkn, ln, col },
845                ));
846            }
847            None => unreachable!("empty toml heading"),
848        };
849        // although this is an iterator it advances the cursor which is what we want
850        let _eaten = muncher.eat_until(|c| c == &']');
851
852        TomlToken::close_brace(muncher, parser)?;
853
854        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
855            let (kind, text) = ws.into();
856            parser.builder.token(kind.into(), text)
857        }
858
859        parser.builder.finish_node();
860        Ok(())
861    }
862
863    /// Builds `Table` node from `Whitespace` and whatever `KeyValue` nodes are present
864    /// and adds them as children.
865    fn table(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
866        parser.builder.start_node(Table.into());
867
868        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
869            let (kind, text) = ws.into();
870            parser.builder.token(kind.into(), text)
871        }
872
873        TomlNode::heading(muncher, parser)?;
874        loop {
875            if muncher.seek(5).map(|s| s.contains('[')) == Some(true) {
876                break;
877            }
878            muncher.reset_peek();
879            // TODO this is weak.
880            if muncher.is_done() {
881                break;
882            }
883            TomlNode::key_value(muncher, parser)?;
884        }
885
886        if let Some(ws) = TomlToken::maybe_whitespace(muncher) {
887            let (kind, text) = ws.into();
888            parser.builder.token(kind.into(), text)
889        }
890
891        parser.builder.finish_node();
892        Ok(())
893    }
894}
895
896pub struct Tokenizer;
897
898impl Tokenizer {
899    /// Returns a wrapper around a `rowan::GreenNodeBuilder` called `Parser`.
900    /// The can be turned into a walk-able `SyntaxNode`.
901    ///
902    /// # Examples
903    /// ```
904    /// # use toml_parse::{Tokenizer, Parser};
905    /// # use rowan::GreenNodeBuilder;
906    /// let toml = "";
907    /// let parse_builder = Parser::new();
908    /// let parsed = Tokenizer::parse(toml, parse_builder).expect("parse failed");
909    /// let green_node = parsed.parse().expect("parse failed");
910    /// let root_node = green_node.syntax();
911    /// ```
912    pub fn parse(input: &str, mut p: Parser) -> TomlResult<Parser> {
913        let mut muncher = Muncher::new(input);
914        Tokenizer::parse_file(&mut muncher, &mut p)?;
915        Ok(p)
916    }
917
918    /// It seems the only three top level Kinds are `KeyValue` pairs,
919    /// `Table`s and `Comments`.
920    fn parse_file(muncher: &mut Muncher, parser: &mut Parser) -> TomlResult<()> {
921        parser.builder.start_node(Root.into());
922        loop {
923            if muncher.is_done() {
924                parser.builder.token(EoF.into(), SmolStr::default());
925                break;
926            }
927
928            match muncher.peek() {
929                Some('#') => {
930                    TomlNode::comment(muncher, parser)?;
931                }
932                Some('[') => {
933                    TomlNode::table(muncher, parser)?;
934                }
935                Some(ch) if ch.is_ascii() => {
936                    TomlNode::key_value(muncher, parser)?;
937                }
938                Some(tkn) => {
939                    let msg = "toml file must be key values or tables".into();
940                    let tkn = format!("{}", tkn);
941                    let (col, ln) = muncher.cursor_position();
942                    return Err(ParseTomlError::new(
943                        msg,
944                        TomlErrorKind::UnexpectedToken { tkn, ln, col },
945                    ));
946                }
947                None => {
948                    parser.builder.token(EoF.into(), SmolStr::default());
949                    break;
950                }
951            }
952        }
953        parser.builder.finish_node();
954        Ok(())
955    }
956}