micron_parser/parsing/
parser.rs

1use std::mem::{self};
2
3use itertools::Itertools;
4use logos::Logos;
5
6use crate::{
7    events::{self, Event, token_to_event},
8    lexer::Token,
9    model::EmptyLine,
10    parsing::model::{
11        Alignment, Block, Document, Field, InlineNode, Line, Link, Ruler, Section, Style, StyleSet,
12    },
13};
14
15#[derive(Debug, Clone, PartialEq)]
16pub struct ParseError {
17    invalid_slice: String,
18}
19
20// The code/logic duplication between Parser and inside a Section is unfortunate.
21// Perhaps it would be better to represent a Document as a series of Sections/Blocks,
22// and just treat items outside any section as being in a header-less "section 0".
23// Or maybe something else entirely.
24#[derive(Debug)]
25pub struct Parser {
26    blocks: Vec<Result<Block, ParseError>>,
27    current_block: Option<Block>,
28    current_style: StyleSet,
29    current_alignment: Alignment,
30    in_header: bool,
31    saved_style: StyleSet,
32}
33
34impl Default for Parser {
35    fn default() -> Self {
36        Parser::new()
37    }
38}
39
40impl Parser {
41    pub fn new() -> Self {
42        Self {
43            blocks: vec![],
44            current_block: None,
45            current_style: StyleSet::default(),
46            current_alignment: Alignment::default(),
47            in_header: false,
48            saved_style: StyleSet::default(),
49        }
50    }
51
52    fn finish_block(&mut self) {
53        if let Some(mut block) = self.current_block.take() {
54            if let Block::Section(section) = &mut block {
55                section.finish_child();
56            }
57            self.blocks.push(Ok(block));
58        }
59    }
60
61    fn set_style(&mut self, style: Style) {
62        self.current_style.set(style);
63    }
64
65    fn unset_style(&mut self, style: Style) {
66        self.current_style.unset(&style);
67    }
68
69    /// Set a new alignment for the current header/line, as alignments apply to entire lines.
70    fn set_alignment(&mut self, alignment: Alignment) {
71        self.current_alignment = alignment;
72        if let Some(block) = self.current_block.as_mut() {
73            match block {
74                Block::Line(paragraph) => paragraph.alignment = alignment,
75                Block::EmptyLine(_) => (),
76                Block::Section(section) => {
77                    if self.in_header {
78                        section.set_header_alignment(alignment);
79                    } else {
80                        section.set_alignment(alignment);
81                    }
82                }
83                Block::Ruler(_) => (),
84            }
85        }
86    }
87
88    fn enter_header(&mut self) {
89        if !self.in_header {
90            // When we enter a header, a new style scope is created
91            self.in_header = true;
92            self.saved_style = mem::take(&mut self.current_style);
93        }
94    }
95
96    fn exit_header(&mut self) {
97        if self.in_header {
98            // we just exited a header; restore style scope
99            self.in_header = false;
100            self.current_style = mem::take(&mut self.saved_style);
101        }
102    }
103
104    fn start_section(&mut self, level: u8) {
105        self.finish_block();
106        self.enter_header();
107        self.current_block = Some(Block::Section(Section {
108            level,
109            current_alignment: self.current_alignment,
110            ..Default::default()
111        }))
112    }
113
114    fn end_section(&mut self) {
115        self.finish_block();
116    }
117
118    fn start_paragraph(&mut self) {
119        self.finish_block();
120        self.current_block = Some(Block::Line(Line {
121            alignment: self.current_alignment,
122            ..Default::default()
123        }));
124    }
125
126    fn add_inline_node(&mut self, node: InlineNode) {
127        if let Some(block) = self.current_block.as_mut() {
128            match block {
129                Block::Line(paragraph) => {
130                    paragraph.add_node(node);
131                }
132                Block::EmptyLine(_) => {
133                    self.start_paragraph();
134                    self.add_inline_node(node);
135                }
136                Block::Section(section) => {
137                    if self.in_header {
138                        section.add_to_header(node);
139                    } else {
140                        section.add_inline_node(node);
141                    }
142                }
143                Block::Ruler(_) => {
144                    self.start_paragraph();
145                    self.add_inline_node(node);
146                }
147            }
148        } else {
149            self.start_paragraph();
150            self.add_inline_node(node);
151        }
152    }
153
154    fn add_text(&mut self, text: String) {
155        let node = InlineNode::Text {
156            style: self.current_style.clone(),
157            text,
158        };
159        self.add_inline_node(node);
160    }
161
162    fn add_emptyline(&mut self) {
163        let empty_line = EmptyLine {
164            style: self.current_style.clone(),
165            alignment: self.current_alignment,
166        };
167        if let Some(Block::Section(section)) = self.current_block.as_mut() {
168            section.add_empty_line(empty_line);
169        } else {
170            self.finish_block();
171            self.current_block = Some(Block::EmptyLine(empty_line));
172        }
173    }
174
175    /// Adds a newline. This means finishing any in-progress blocks,
176    fn add_newline(&mut self) {
177        if self.in_header {
178            self.exit_header();
179        } else {
180            self.add_inline_node(InlineNode::Newline {
181                style: self.current_style.clone(),
182            });
183            match self.current_block.as_mut() {
184                Some(Block::Section(section)) => {
185                    if section.current_child.is_some() {
186                        section.finish_child();
187                    }
188                }
189                Some(_) => self.finish_block(),
190                _ => (),
191            }
192        }
193    }
194
195    fn add_link(&mut self, link: Link) {
196        let node = InlineNode::Link {
197            style: self.current_style.clone(),
198            link,
199        };
200        self.add_inline_node(node);
201    }
202
203    fn add_field(&mut self, field: Field) {
204        let node = InlineNode::Field {
205            style: self.current_style.clone(),
206            field,
207        };
208        self.add_inline_node(node);
209    }
210
211    fn add_ruler(&mut self, symbol: Option<String>) {
212        let line = Ruler {
213            symbol,
214            style: self.current_style.clone(),
215        };
216        if let Some(Block::Section(section)) = self.current_block.as_mut() {
217            section.add_ruler(line);
218        } else {
219            self.finish_block();
220            self.current_block = Some(Block::Ruler(line));
221        }
222    }
223
224    /// Parse the provided micron markup
225    pub fn parse(mut self, micron_markup: &str) -> Result<Document, Vec<ParseError>> {
226        let lexer = Token::lexer(micron_markup);
227        let events = token_to_event(lexer);
228        let mut previous_event: Option<Event>;
229        let mut current_event: Option<Event> = None;
230        for e in events {
231            previous_event = current_event.clone();
232            current_event = Some(e.clone());
233            match e {
234                Event::ControlHeader(_) => (),
235                Event::BoldStart => self.set_style(Style::Bold),
236                Event::BoldEnd => self.unset_style(Style::Bold),
237                Event::UnderlineStart => self.set_style(Style::Underline),
238                Event::UnderlineEnd => self.unset_style(Style::Underline),
239                Event::ItalicStart => self.set_style(Style::Italic),
240                Event::ItalicEnd => self.unset_style(Style::Italic),
241                Event::Align(alignment) => match alignment {
242                    events::Alignment::Default => self.set_alignment(Alignment::Default),
243                    events::Alignment::Left => self.set_alignment(Alignment::Left),
244                    events::Alignment::Center => self.set_alignment(Alignment::Center),
245                    events::Alignment::Right => self.set_alignment(Alignment::Right),
246                },
247                Event::LiteralStart => (),
248                Event::LiteralEnd => (),
249                Event::ForegroundColorSet(c) => self.set_style(Style::ForegroundColor(c)),
250                Event::ForegroundColorUnset => {
251                    self.unset_style(Style::ForegroundColor("".to_string()))
252                }
253                Event::BackgroundColorSet(c) => self.set_style(Style::BackgroundColor(c)),
254                Event::BackgroundColorUnset => {
255                    self.unset_style(Style::BackgroundColor("".to_string()))
256                }
257                Event::Link(link) => self.add_link(link),
258                Event::Field(field) => self.add_field(field),
259                Event::SectionStart { level } => self.start_section(level),
260                Event::SectionReset => self.end_section(),
261                Event::Line(c) => self.add_ruler(c),
262                Event::Newline => {
263                    if previous_event == Some(Event::Newline) {
264                        self.add_emptyline();
265                    } else {
266                        self.add_newline();
267                    }
268                }
269                Event::Text(s) => self.add_text(s),
270                Event::Comment(_) => (),
271                Event::UnknownTag(_) => (),
272            }
273        }
274        self.finish_block();
275        let results = self.blocks;
276        let (blocks, errors): (Vec<Block>, Vec<ParseError>) =
277            results.into_iter().partition_result();
278        if errors.is_empty() {
279            Ok(Document { blocks })
280        } else {
281            Err(errors)
282        }
283    }
284}
285
286#[cfg(test)]
287mod tests {
288    use claims::assert_ok;
289
290    use crate::model::ChildBlock;
291
292    use super::*;
293
294    #[test]
295    fn can_parse_bold_italic_underline2() {
296        let mu = include_str!("../../tests/cases/bold_italic_underline2.mu");
297        let doc = assert_ok!(Parser::new().parse(mu));
298
299        let mut iter = dbg!(&doc).blocks.iter();
300
301        assert_eq!(
302            iter.next(),
303            Some(&Block::Line(Line {
304                nodes: vec![
305                    InlineNode::Text {
306                        style: StyleSet::default(),
307                        text: "This is a ".to_string()
308                    },
309                    InlineNode::Text {
310                        style: StyleSet(vec![Style::Bold, Style::Italic, Style::Underline,]),
311                        text: "bold, italicized, and underlined".to_string()
312                    },
313                    InlineNode::Text {
314                        style: StyleSet::default(),
315                        text: " example with a different formatting end.".to_string()
316                    },
317                    InlineNode::Newline {
318                        style: StyleSet::default(),
319                    },
320                ],
321                ..Default::default()
322            }))
323        );
324
325        assert_eq!(iter.next(), None,);
326        assert_eq!(1, doc.blocks.len());
327    }
328
329    #[test]
330    fn can_parse_escape() {
331        let mu = include_str!("../../tests/cases/escape.mu");
332        let doc = assert_ok!(Parser::new().parse(mu));
333
334        let mut iter = dbg!(&doc).blocks.iter();
335
336        assert_eq!(
337            iter.next(),
338            Some(&Block::Line(Line {
339                nodes: vec![
340                    InlineNode::Text {
341                        style: StyleSet::default(),
342                        text: "This is an escaped `!bold text`!".to_string()
343                    },
344                    InlineNode::Newline {
345                        style: StyleSet::default(),
346                    },
347                ],
348                ..Default::default()
349            }))
350        );
351        assert_eq!(
352            iter.next(),
353            Some(&Block::Line(Line {
354                nodes: vec![
355                    InlineNode::Text {
356                        style: StyleSet::default(),
357                        text: r#"Escaped escape: \"#.to_string()
358                    },
359                    InlineNode::Newline {
360                        style: StyleSet::default(),
361                    },
362                ],
363                ..Default::default()
364            }))
365        );
366
367        assert_eq!(iter.next(), None,);
368        assert_eq!(2, doc.blocks.len());
369    }
370
371    #[test]
372    fn can_parse_cursed_style_reset_style() {
373        let mu = include_str!("../../tests/cases/cursed/style_reset_style.mu");
374        let doc = assert_ok!(Parser::new().parse(mu));
375
376        let mut iter = dbg!(&doc).blocks.iter();
377
378        assert_eq!(
379            iter.next(),
380            Some(&Block::Line(Line {
381                nodes: vec![
382                    InlineNode::Text {
383                        style: StyleSet(vec![Style::Bold]),
384                        text: "Bold".to_string()
385                    },
386                    InlineNode::Text {
387                        style: StyleSet::default(),
388                        text: "NotBold".to_string()
389                    },
390                    InlineNode::Text {
391                        style: StyleSet(vec![Style::Bold]),
392                        text: "Bold".to_string()
393                    },
394                    InlineNode::Newline {
395                        style: StyleSet::default(),
396                    },
397                ],
398                ..Default::default()
399            }))
400        );
401
402        assert_eq!(iter.next(), None,);
403        assert_eq!(1, doc.blocks.len());
404    }
405
406    #[test]
407    fn can_parse_background() {
408        let mu = include_str!("../../tests/cases/background.mu");
409        let doc = assert_ok!(Parser::new().parse(mu));
410
411        let mut iter = dbg!(&doc).blocks.iter();
412
413        assert_eq!(
414            iter.next(),
415            Some(&Block::Line(Line {
416                nodes: vec![
417                    InlineNode::Text {
418                        style: StyleSet::default(),
419                        text: "We'll set ".to_string()
420                    },
421                    InlineNode::Text {
422                        style: StyleSet(vec![Style::BackgroundColor("bbb".to_string()),]),
423                        text: "background here, unset ".to_string()
424                    },
425                    InlineNode::Text {
426                        style: StyleSet::default(),
427                        text: " here.".to_string()
428                    },
429                    InlineNode::Newline {
430                        style: StyleSet::default()
431                    }
432                ],
433                ..Default::default()
434            }))
435        );
436        assert_eq!(
437            iter.next(),
438            Some(&Block::Line(Line {
439                nodes: vec![
440                    InlineNode::Text {
441                        style: StyleSet(vec![Style::BackgroundColor("e2e".to_string()),]),
442                        text: "This is a full width background.".to_string()
443                    },
444                    InlineNode::Newline {
445                        style: StyleSet(vec![Style::BackgroundColor("e2e".to_string()),]),
446                    }
447                ],
448                ..Default::default()
449            }))
450        );
451
452        assert_eq!(iter.next(), None,);
453        assert_eq!(2, doc.blocks.len());
454    }
455
456    #[test]
457    fn can_parse_section() {
458        let mu = include_str!("../../tests/cases/section.mu");
459        let doc = assert_ok!(Parser::new().parse(mu));
460
461        let mut iter = dbg!(&doc).blocks.iter();
462
463        assert_eq!(
464            iter.next(),
465            Some(&Block::Section(Section {
466                level: 1,
467                header: Some(Line {
468                    nodes: vec![InlineNode::Text {
469                        style: StyleSet::default(),
470                        text: "This is a section header".to_string()
471                    }],
472                    alignment: Alignment::default(),
473                }),
474                children: vec![
475                    ChildBlock::Line(Line {
476                        nodes: vec![
477                            InlineNode::Text {
478                                style: StyleSet::default(),
479                                text: "and this is some section text".to_string()
480                            },
481                            InlineNode::Newline {
482                                style: StyleSet::default(),
483                            },
484                        ],
485                        alignment: Alignment::default(),
486                    }),
487                    ChildBlock::Line(Line {
488                        nodes: vec![
489                            InlineNode::Text {
490                                style: StyleSet::default(),
491                                text: "this is a > mid sentence section tag, ignored".to_string()
492                            },
493                            InlineNode::Newline {
494                                style: StyleSet::default(),
495                            },
496                        ],
497                        alignment: Alignment::default(),
498                    }),
499                    ChildBlock::EmptyLine(EmptyLine::default()),
500                ],
501                current_child: None,
502                current_alignment: Alignment::default(),
503            }))
504        );
505        assert_eq!(
506            iter.next(),
507            Some(&Block::Line(Line {
508                nodes: vec![InlineNode::Newline {
509                    style: StyleSet::default(),
510                }],
511                alignment: Alignment::default(),
512            }))
513        );
514        assert_eq!(
515            iter.next(),
516            Some(&Block::Line(Line {
517                nodes: vec![
518                    InlineNode::Text {
519                        style: StyleSet::default(),
520                        text: "and this is some non sectioned text".to_string(),
521                    },
522                    InlineNode::Newline {
523                        style: StyleSet::default(),
524                    },
525                ],
526                alignment: Alignment::default(),
527            }))
528        );
529        assert_eq!(
530            iter.next(),
531            Some(&Block::Line(Line {
532                nodes: vec![
533                    InlineNode::Text {
534                        style: StyleSet::default(),
535                        text: "this is a < mid sentence section end tag, ignored".to_string(),
536                    },
537                    InlineNode::Newline {
538                        style: StyleSet::default(),
539                    },
540                ],
541                alignment: Alignment::default(),
542            }))
543        );
544        assert_eq!(
545            iter.next(),
546            Some(&Block::Line(Line {
547                nodes: vec![
548                    InlineNode::Text {
549                        style: StyleSet::default(),
550                        text: " but this end section tag is honored".to_string(),
551                    },
552                    InlineNode::Newline {
553                        style: StyleSet::default(),
554                    },
555                ],
556                alignment: Alignment::default(),
557            }))
558        );
559
560        assert_eq!(iter.next(), None,);
561        assert_eq!(5, doc.blocks.len());
562    }
563
564    #[test]
565    fn can_parse_alignment() {
566        let mu = include_str!("../../tests/cases/alignment.mu");
567        let doc = assert_ok!(Parser::new().parse(mu));
568
569        let mut iter = dbg!(&doc).blocks.iter();
570
571        assert_eq!(
572            iter.next(),
573            Some(&Block::Line(Line {
574                nodes: vec![
575                    InlineNode::Text {
576                        style: StyleSet::default(),
577                        text: "Centered".to_string()
578                    },
579                    InlineNode::Newline {
580                        style: StyleSet::default()
581                    }
582                ],
583                alignment: Alignment::Center,
584            }))
585        );
586        assert_eq!(
587            iter.next(),
588            Some(&Block::Line(Line {
589                nodes: vec![
590                    InlineNode::Text {
591                        style: StyleSet::default(),
592                        text: "Also Centered".to_string()
593                    },
594                    InlineNode::Newline {
595                        style: StyleSet::default()
596                    }
597                ],
598                alignment: Alignment::Center,
599            }))
600        );
601        assert_eq!(
602            iter.next(),
603            Some(&Block::EmptyLine(EmptyLine {
604                style: StyleSet::default(),
605                alignment: Alignment::Center
606            }))
607        );
608        assert_eq!(
609            iter.next(),
610            Some(&Block::Line(Line {
611                nodes: vec![
612                    InlineNode::Text {
613                        style: StyleSet::default(),
614                        text: "Default".to_string()
615                    },
616                    InlineNode::Newline {
617                        style: StyleSet::default()
618                    }
619                ],
620                alignment: Alignment::Default,
621            }))
622        );
623        assert_eq!(
624            iter.next(),
625            Some(&Block::EmptyLine(EmptyLine {
626                style: StyleSet::default(),
627                alignment: Alignment::Default
628            }))
629        );
630        assert_eq!(
631            iter.next(),
632            Some(&Block::Line(Line {
633                nodes: vec![
634                    InlineNode::Text {
635                        style: StyleSet::default(),
636                        text: "Right".to_string()
637                    },
638                    InlineNode::Newline {
639                        style: StyleSet::default()
640                    }
641                ],
642                alignment: Alignment::Right,
643            }))
644        );
645        assert_eq!(
646            iter.next(),
647            Some(&Block::Line(Line {
648                nodes: vec![
649                    InlineNode::Text {
650                        style: StyleSet::default(),
651                        text: "Default".to_string()
652                    },
653                    InlineNode::Newline {
654                        style: StyleSet::default()
655                    }
656                ],
657                alignment: Alignment::Right,
658            }))
659        );
660        assert_eq!(
661            iter.next(),
662            Some(&Block::EmptyLine(EmptyLine {
663                style: StyleSet::default(),
664                alignment: Alignment::Right,
665            }))
666        );
667        assert_eq!(
668            iter.next(),
669            Some(&Block::Line(Line {
670                nodes: vec![
671                    InlineNode::Text {
672                        style: StyleSet::default(),
673                        text: "Centered".to_string()
674                    },
675                    InlineNode::Newline {
676                        style: StyleSet::default()
677                    }
678                ],
679                alignment: Alignment::Center,
680            }))
681        );
682        assert_eq!(
683            iter.next(),
684            Some(&Block::Line(Line {
685                nodes: vec![
686                    InlineNode::Text {
687                        style: StyleSet::default(),
688                        text: "Default".to_string()
689                    },
690                    InlineNode::Newline {
691                        style: StyleSet::default()
692                    }
693                ],
694                alignment: Alignment::Center,
695            }))
696        );
697
698        assert_eq!(iter.next(), None,);
699        assert_eq!(10, doc.blocks().len());
700    }
701
702    #[test]
703    fn can_parse_cursed_align_reset() {
704        let mu = include_str!("../../tests/cases/cursed/align_reset.mu");
705        let doc = assert_ok!(Parser::new().parse(mu));
706
707        let mut iter = dbg!(&doc).blocks.iter();
708
709        assert_eq!(
710            iter.next(),
711            Some(&Block::Line(Line {
712                nodes: vec![
713                    InlineNode::Text {
714                        style: StyleSet::default(),
715                        text: "Centered".to_string()
716                    },
717                    InlineNode::Newline {
718                        style: StyleSet::default(),
719                    },
720                ],
721                alignment: Alignment::Center,
722            }))
723        );
724        assert_eq!(
725            iter.next(),
726            Some(&Block::Line(Line {
727                nodes: vec![
728                    InlineNode::Text {
729                        style: StyleSet::default(),
730                        text: "Default".to_string()
731                    },
732                    InlineNode::Text {
733                        style: StyleSet::default(),
734                        text: " Align".to_string()
735                    },
736                    InlineNode::Newline {
737                        style: StyleSet::default(),
738                    },
739                ],
740                alignment: Alignment::Default,
741            }))
742        );
743
744        assert_eq!(iter.next(), None,);
745        assert_eq!(2, doc.blocks().len());
746    }
747
748    #[test]
749    fn can_parse_cursed_eol_color_unset() {
750        let mu = include_str!("../../tests/cases/cursed/eol_color_unset.mu");
751        let doc = assert_ok!(Parser::new().parse(mu));
752
753        let mut iter = dbg!(&doc).blocks.iter();
754
755        assert_eq!(
756            iter.next(),
757            Some(&Block::Line(Line {
758                nodes: vec![
759                    InlineNode::Text {
760                        style: StyleSet(vec![Style::BackgroundColor("00f".to_string()),]),
761                        text: "red".to_string()
762                    },
763                    InlineNode::Newline {
764                        style: StyleSet::default(),
765                    }
766                ],
767                alignment: Alignment::default(),
768            }))
769        );
770        assert_eq!(
771            iter.next(),
772            Some(&Block::Line(Line {
773                nodes: vec![
774                    InlineNode::Text {
775                        style: StyleSet::default(),
776                        text: "not red".to_string()
777                    },
778                    InlineNode::Newline {
779                        style: StyleSet::default(),
780                    }
781                ],
782                alignment: Alignment::default(),
783            }))
784        );
785
786        assert_eq!(iter.next(), None,);
787        assert_eq!(2, doc.blocks().len());
788    }
789
790    #[test]
791    fn can_parse_nomad_guide() {
792        let mu = include_str!("../../tests/cases/nomad_net_guide.mu");
793        let doc = assert_ok!(Parser::new().parse(mu));
794
795        let _iter = dbg!(&doc).blocks.iter();
796
797        assert_eq!(41, doc.blocks().len());
798    }
799}