note_mark/layer/
parser.rs

1//! Parser of tokens.
2//!
3//! This module provides a parser of tokens. The parser is implemented as a
4//! recursive descent parser.
5
6use crate::model::{token::*, tree::*};
7use config::*;
8
9/// Parser of tokens.
10///
11/// This struct contains configurations for parsing. These configurations are
12/// for supporting various markdown syntax.
13///
14/// # Example
15///
16/// ```
17/// use note_mark::prelude::*;
18///
19/// let parser = Parser::default().headline_ending(HeadlineEnding::SoftBreak);
20///
21/// let markdown = Markdown::default().parser(parser);
22///
23/// let html = markdown.execute("# Hello, world!\nThis is a new line.");
24///
25/// assert_eq!(html, "<h1>Hello, world!</h1><p>This is a new line.</p>");
26///
27/// let parser = Parser::default().headline_ending(HeadlineEnding::HardBreak);
28///
29/// let markdown = Markdown::default().parser(parser);
30///
31/// let html = markdown.execute("# Hello, world!\nThis is a new line.");
32///
33/// assert_eq!(html, "<h1>Hello, world!<br>This is a new line.</h1>");
34/// ```
35#[derive(Debug, Clone)]
36pub struct Parser {
37    /// The end of paragraph is decided by at liest two consecutive line breaks.
38    /// This determines whether to treat the previous sentence as a paragraph if
39    /// the next line is another block element.
40    pub paragraph_ending: ParagraphEnding,
41    /// This determines whether to allow a line break in a headline.
42    pub headline_ending: HeadlineEnding,
43    /// This determines whether to make the indent rule of list strict or loose.
44    list_indent_rule: IndentRule,
45    /// This determines whether to make the indent style of list space, tab, or
46    /// both.
47    pub list_indent_style: IndentStyle,
48}
49
50pub mod config {
51    //! Configurations for parsing.
52    //!
53    //! This module provides configurations for parsing. The configurations are
54    //! used in [Parser](super::Parser).
55
56    /// Ending of paragraph.
57    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
58    pub enum ParagraphEnding {
59        AllowSoftBreak,
60        HardBreak,
61    }
62
63    /// Ending of headline.
64    #[allow(clippy::enum_variant_names)]
65    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
66    pub enum HeadlineEnding {
67        SoftBreak,
68        AllowSoftBreak,
69        HardBreak,
70    }
71
72    /// Indent rule of list.
73    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
74    pub enum IndentRule {
75        Strict,
76        Loose,
77    }
78
79    /// Indent style of list.
80    #[derive(Debug, Clone, Copy, PartialEq, Eq)]
81    pub enum IndentStyle {
82        Space(u8),
83        Tab,
84        Both,
85    }
86}
87
88impl Default for Parser {
89    fn default() -> Self {
90        Self {
91            paragraph_ending: ParagraphEnding::HardBreak,
92            headline_ending: HeadlineEnding::HardBreak,
93            list_indent_rule: IndentRule::Strict,
94            list_indent_style: IndentStyle::Space(2),
95        }
96    }
97}
98
99impl Parser {
100    /// Create a new parser.
101    pub fn new() -> Self {
102        Self::default()
103    }
104
105    /// Set ending of paragraph.
106    ///
107    /// # Example
108    ///
109    /// ```
110    /// use note_mark::prelude::*;
111    ///
112    /// let parser = Parser::default().paragraph_ending(ParagraphEnding::AllowSoftBreak);
113    ///
114    /// let markdown = Markdown::default().parser(parser);
115    ///
116    /// let html = markdown.execute("Hello, world!\n# This is a new headline.");
117    ///
118    /// assert_eq!(html, "<p>Hello, world!</p><h1>This is a new headline.</h1>");
119    ///
120    /// let parser = Parser::default().paragraph_ending(ParagraphEnding::HardBreak);
121    ///
122    /// let markdown = Markdown::default().parser(parser);
123    ///
124    /// let html = markdown.execute("Hello, world!\n# This is a new headline.");
125    ///
126    /// assert_eq!(html, "<p>Hello, world!<br># This is a new headline.</p>");
127    /// ```
128    pub fn paragraph_ending(mut self, ending: ParagraphEnding) -> Self {
129        self.paragraph_ending = ending;
130
131        self
132    }
133
134    /// Set ending of headline.
135    ///
136    /// # Example
137    ///
138    /// ```
139    /// use note_mark::prelude::*;
140    ///
141    /// let parser = Parser::default().headline_ending(HeadlineEnding::SoftBreak);
142    ///
143    /// let markdown = Markdown::default().parser(parser);
144    ///
145    /// let html = markdown.execute("# Hello, world!\nThis is a new line.");
146    ///
147    /// assert_eq!(html, "<h1>Hello, world!</h1><p>This is a new line.</p>");
148    ///
149    /// let parser = Parser::default().headline_ending(HeadlineEnding::HardBreak);
150    ///
151    /// let markdown = Markdown::default().parser(parser);
152    ///
153    /// let html = markdown.execute("# Hello, world!\nThis is a new line.");
154    ///
155    /// assert_eq!(html, "<h1>Hello, world!<br>This is a new line.</h1>");
156    /// ```
157    pub fn headline_ending(mut self, ending: HeadlineEnding) -> Self {
158        self.headline_ending = ending;
159
160        self
161    }
162
163    /// Set indent rule of list.
164    ///
165    /// **This config did not work correctly.**
166    #[allow(dead_code)]
167    fn list_indent_rule(mut self, rule: IndentRule) -> Self {
168        self.list_indent_rule = rule;
169
170        self
171    }
172
173    /// Set indent style of list.
174    ///
175    /// # Example
176    ///
177    /// ```
178    /// use note_mark::prelude::*;
179    ///
180    /// let parser = Parser::default().list_indent_style(IndentStyle::Space(2));
181    ///
182    /// let markdown = Markdown::default().parser(parser);
183    ///
184    /// let html = markdown.execute("- Hello, world!\n  - This is a new line.");
185    ///
186    /// assert_eq!(html, "<ul><li>Hello, world!<ul><li>This is a new line.</li></ul></li></ul>");
187    ///
188    /// let parser = Parser::default().list_indent_style(IndentStyle::Tab);
189    ///
190    /// let markdown = Markdown::default().parser(parser);
191    ///
192    /// let html = markdown.execute("- Hello, world!\n\t- This is a new line.");
193    ///
194    /// assert_eq!(html, "<ul><li>Hello, world!<ul><li>This is a new line.</li></ul></li></ul>");
195    /// ```
196    pub fn list_indent_style(mut self, style: IndentStyle) -> Self {
197        self.list_indent_style = style;
198
199        self
200    }
201
202    /// Set all indent style.
203    ///
204    /// Currently, this setting is only for list.
205    #[allow(dead_code)]
206    fn indent_style(mut self, style: IndentStyle) -> Self {
207        self.list_indent_style = style;
208
209        self
210    }
211
212    /// Parse tokens to markdown tree.
213    pub fn parse<'a>(
214        &self,
215        input: &'a str,
216        tokens: impl Iterator<Item = Token>,
217    ) -> MarkdownTree<'a> {
218        Executor::with_config(input, self.clone()).parse(tokens.collect::<Vec<Token>>())
219    }
220}
221
222/// Executor of parser.
223struct Executor<'a> {
224    input: &'a str,
225    config: Parser,
226}
227
228/// # Functions for constructing Executor and parsing tokens.
229impl<'a> Executor<'a> {
230    /// Create a new executor.
231    #[allow(dead_code)]
232    fn new(input: &'a str) -> Self {
233        Self {
234            input,
235            config: Parser::new(),
236        }
237    }
238
239    /// Create a new executor with config.
240    fn with_config(input: &'a str, config: Parser) -> Self {
241        Self { input, config }
242    }
243
244    /// Parse tokens to markdown tree.
245    fn parse(&self, tokens: Vec<Token>) -> MarkdownTree<'a> {
246        self.markdown_tree(&tokens)
247    }
248}
249
250/// # Utility functions for parsing.
251#[allow(dead_code)]
252impl<'a, 'b> Executor<'a> {
253    /// Trim tokens from start.
254    fn trim_start(tokens: &'b [Token], kind: TokenKind) -> &'b [Token] {
255        let mut temp = tokens;
256
257        loop {
258            if temp.is_empty() {
259                break;
260            }
261
262            if temp[0].kind == kind {
263                temp = &temp[1..];
264            } else {
265                break;
266            }
267        }
268
269        temp
270    }
271
272    /// Trim tokens from end.
273    fn trim_end(tokens: &'b [Token], kind: TokenKind) -> &'b [Token] {
274        let mut temp = tokens;
275
276        loop {
277            if temp.is_empty() {
278                break;
279            }
280
281            if temp[temp.len() - 1].kind == kind {
282                temp = &temp[..temp.len() - 1];
283            } else {
284                break;
285            }
286        }
287
288        temp
289    }
290
291    /// Trim tokens from start and end.
292    fn trim(tokens: &'b [Token], kind: TokenKind) -> &'b [Token] {
293        Self::trim_start(Self::trim_end(tokens, kind), kind)
294    }
295
296    /// Trim white spaces from start.
297    fn trim_white_spaces(tokens: &'b [Token]) -> &'b [Token] {
298        let mut rest = tokens;
299
300        loop {
301            let mut new_rest = Self::trim_start(rest, TokenKind::Space);
302            new_rest = Self::trim_start(new_rest, TokenKind::Tab);
303
304            if new_rest.len() == rest.len() {
305                break rest;
306            }
307
308            rest = new_rest;
309        }
310    }
311
312    /// Get a line of tokens.
313    ///
314    /// # Arguments
315    ///
316    /// * `trim` - Trim white spaces of rest tokens from start.
317    fn get_line(tokens: &'b [Token], trim: bool) -> (&'b [Token], &'b [Token]) {
318        if let Some(index) = tokens
319            .iter()
320            .position(|token| token.kind == TokenKind::Break)
321        {
322            if trim {
323                (
324                    &tokens[..index],
325                    Self::trim_start(&tokens[index + 1..], TokenKind::Break),
326                )
327            } else {
328                (&tokens[..index], &tokens[index + 1..])
329            }
330        } else {
331            (Self::trim_end(tokens, TokenKind::Break), &[])
332        }
333    }
334
335    /// Get a paragraph of tokens.
336    fn get_paragraph(tokens: &'b [Token]) -> (&'b [Token], &'b [Token]) {
337        if let Some(index) = tokens.windows(2).position(|tokens| {
338            tokens[0].kind == TokenKind::Break && tokens[1].kind == TokenKind::Break
339        }) {
340            (
341                &tokens[..index],
342                Self::trim_start(&tokens[index + 2..], TokenKind::Break),
343            )
344        } else {
345            (Self::trim_end(tokens, TokenKind::Break), &[])
346        }
347    }
348
349    /// Count indent level.
350    ///
351    /// # Returns
352    ///
353    /// (level: u32, remainder: u32)
354    fn indent_level(tokens: &[Token], style: IndentStyle) -> (u32, u32) {
355        match style {
356            IndentStyle::Space(n) => {
357                let mut level = 0;
358
359                for token in tokens {
360                    match token.kind {
361                        TokenKind::Space => level += 1,
362                        _ => break,
363                    }
364                }
365
366                ((level / n) as u32, (level % n) as u32)
367            }
368            IndentStyle::Tab => {
369                let mut level = 0;
370
371                for token in tokens {
372                    match token.kind {
373                        TokenKind::Tab => level += 1,
374                        _ => break,
375                    }
376                }
377
378                (level, 0)
379            }
380            IndentStyle::Both => {
381                let mut level = 0;
382
383                for token in tokens {
384                    match token.kind {
385                        TokenKind::Space => level += 1,
386                        TokenKind::Tab => level += 2,
387                        _ => break,
388                    }
389                }
390
391                (level / 2, level % 2)
392            }
393        }
394    }
395
396    /// Reduce indent level.
397    ///
398    /// # Arguments
399    ///
400    /// * `format` - If true, remove remainder.
401    fn reduce_indent(tokens: &[Token], style: IndentStyle, format: bool) -> Vec<Token> {
402        let mut output = vec![];
403
404        let mut rest = tokens;
405
406        loop {
407            let (line, new_rest) = Self::get_line(rest, false);
408
409            if rest.is_empty() {
410                break;
411            }
412
413            let (level, remainder) = Self::indent_level(line, style);
414
415            if level == 0 {
416                output.extend_from_slice(line);
417            } else {
418                match style {
419                    IndentStyle::Space(n) => {
420                        if format {
421                            output.extend_from_slice(&line[(n as usize) + (remainder as usize)..]);
422                        } else {
423                            output.extend_from_slice(&line[n as usize..]);
424                        }
425                    }
426                    IndentStyle::Tab => {
427                        output.extend_from_slice(&line[1..]);
428                    }
429                    IndentStyle::Both => {
430                        if line[0].kind == TokenKind::Space {
431                            if format {
432                                output.extend_from_slice(&line[2 + (remainder as usize)..]);
433                            } else {
434                                output.extend_from_slice(&line[2..]);
435                            }
436                        } else {
437                            output.extend_from_slice(&line[1..]);
438                        }
439                    }
440                }
441            }
442
443            if let Some(break_token) = rest.get(line.len()) {
444                output.push(*break_token);
445            }
446
447            rest = new_rest;
448        }
449
450        output
451    }
452
453    fn align_indent(tokens: &'b [Token], style: IndentStyle, rule: IndentRule) -> &'b [Token] {
454        match rule {
455            IndentRule::Strict => tokens,
456            IndentRule::Loose => {
457                let (_, remainder) = Self::indent_level(tokens, style);
458
459                &tokens[remainder as usize..]
460            }
461        }
462    }
463}
464
465/// # Fuctions for building block tree.
466impl<'a, 'b> Executor<'a> {
467    /// Parse tokens to markdown tree.
468    fn markdown_tree(&self, tokens: &'b [Token]) -> MarkdownTree<'a> {
469        MarkdownTree {
470            root: self.block_tree(tokens),
471        }
472    }
473
474    /// Parse tokens to block tree.
475    fn block_tree(&self, tokens: &'b [Token]) -> BlockTree<'a> {
476        let mut tree = BlockTree { root: vec![] };
477
478        let mut rest = tokens;
479
480        'root: while !rest.is_empty() {
481            for f in [Self::not_paragraph, Self::paragraph] {
482                if let Some((item, new_rest)) = f(self, rest) {
483                    tree.root.push(item);
484                    rest = new_rest;
485                    continue 'root;
486                }
487            }
488        }
489
490        tree
491    }
492
493    /// Parse tokens to paragraph item.
494    fn paragraph(&self, tokens: &'b [Token]) -> Option<(BlockItem<'a>, &'b [Token])> {
495        match self.config.paragraph_ending {
496            ParagraphEnding::HardBreak => {
497                let (input, rest) = Self::get_paragraph(tokens);
498
499                Some((BlockItem::Paragraph(self.inline_tree(input)), rest))
500            }
501            ParagraphEnding::AllowSoftBreak => {
502                let (input, rest) = self.get_until_maybe_block_item(tokens);
503
504                Some((BlockItem::Paragraph(self.inline_tree(input)), rest))
505            }
506        }
507    }
508
509    /// Parse tokens to not paragraph item.
510    fn not_paragraph(&self, tokens: &'b [Token]) -> Option<(BlockItem<'a>, &'b [Token])> {
511        for f in [
512            Self::headline,
513            Self::bullet_list,
514            Self::ordered_list,
515            Self::blockquote,
516        ] {
517            if let Some((item, rest)) = f(self, tokens) {
518                return Some((item, rest));
519            }
520        }
521
522        None
523    }
524
525    /// Parse tokens to headline item.
526    fn headline(&self, tokens: &'b [Token]) -> Option<(BlockItem<'a>, &'b [Token])> {
527        let tokens = Self::trim_white_spaces(tokens);
528
529        let mut level = 0;
530
531        for i in 0..7 {
532            if let Some(token) = tokens.get(i) {
533                match token.kind {
534                    TokenKind::Pound => continue,
535                    TokenKind::Space => {
536                        level = i;
537                        break;
538                    }
539                    _ => return None,
540                }
541            }
542        }
543
544        if level == 0 {
545            return None;
546        }
547
548        let content = Self::trim_start(&tokens[level..], TokenKind::Space);
549
550        match self.config.headline_ending {
551            HeadlineEnding::SoftBreak => {
552                let (input, rest) = Self::get_line(content, true);
553
554                Some((
555                    BlockItem::Headline(level as u8, self.inline_tree(input)),
556                    rest,
557                ))
558            }
559            HeadlineEnding::AllowSoftBreak => {
560                let (input, rest) = self.get_until_maybe_block_item(content);
561
562                Some((
563                    BlockItem::Headline(level as u8, self.inline_tree(input)),
564                    rest,
565                ))
566            }
567            HeadlineEnding::HardBreak => {
568                let (input, rest) = Self::get_paragraph(content);
569
570                Some((
571                    BlockItem::Headline(level as u8, self.inline_tree(input)),
572                    rest,
573                ))
574            }
575        }
576    }
577
578    /// Parse tokens to bullet list item.
579    fn bullet_list(&self, tokens: &'b [Token]) -> Option<(BlockItem<'a>, &'b [Token])> {
580        let mut tree = ListTree { root: vec![] };
581
582        let mut rest = tokens;
583
584        let input2 = Self::align_indent(
585            tokens,
586            self.config.list_indent_style,
587            self.config.list_indent_rule,
588        );
589
590        if input2.get(0)?.kind != TokenKind::Hyphen || input2.get(1)?.kind != TokenKind::Space {
591            return None;
592        }
593
594        while !rest.is_empty() {
595            let input3 = Self::align_indent(
596                rest,
597                self.config.list_indent_style,
598                self.config.list_indent_rule,
599            );
600
601            if input3.get(0)?.kind != TokenKind::Hyphen || input3.get(1)?.kind != TokenKind::Space {
602                break;
603            }
604
605            let (input, new_rest) = self.get_until_maybe_block_item(&rest[2..]);
606
607            if input.is_empty() {
608                break;
609            }
610
611            tree.root.push(self.list_item(input));
612
613            rest = new_rest;
614        }
615
616        Some((BlockItem::BulletList(tree), rest))
617    }
618
619    fn ordered_list(&self, tokens: &'b [Token]) -> Option<(BlockItem<'a>, &'b [Token])> {
620        let mut tree = ListTree { root: vec![] };
621
622        let mut rest = tokens;
623
624        let input2 = Self::align_indent(
625            tokens,
626            self.config.list_indent_style,
627            self.config.list_indent_rule,
628        );
629
630        if input2.get(0)?.kind != TokenKind::Text
631            || input2.get(1)?.kind != TokenKind::Dot
632            || input2.get(2)?.kind != TokenKind::Space
633        {
634            return None;
635        }
636
637        if !self.input[tokens[0].range()]
638            .chars()
639            .all(|c| c.is_ascii_digit())
640        {
641            return None;
642        }
643
644        while !rest.is_empty() {
645            let input3 = Self::align_indent(
646                rest,
647                self.config.list_indent_style,
648                self.config.list_indent_rule,
649            );
650
651            if input3.get(0)?.kind != TokenKind::Text
652                || input3.get(1)?.kind != TokenKind::Dot
653                || input3.get(2)?.kind != TokenKind::Space
654            {
655                break;
656            }
657
658            if !self.input[input3[0].range()]
659                .chars()
660                .all(|c| c.is_ascii_digit())
661            {
662                break;
663            }
664
665            let (input, new_rest) = self.get_until_maybe_block_item(&rest[3..]);
666
667            if input.is_empty() {
668                break;
669            }
670
671            tree.root.push(self.list_item(input));
672
673            rest = new_rest;
674        }
675
676        Some((BlockItem::OrderedList(tree), rest))
677    }
678
679    fn list_item(&self, tokens: &'b [Token]) -> ListItem<'a> {
680        let (name, children_rest) = {
681            let mut this_rest = tokens;
682
683            let mut name = InlineTree { root: vec![] };
684
685            while !this_rest.is_empty() {
686                let (input, rest) = Self::get_line(this_rest, false);
687
688                if input.is_empty() {
689                    break;
690                }
691
692                if Self::indent_level(input, self.config.list_indent_style).0 != 0 {
693                    break;
694                }
695
696                name.root.append(&mut self.inline_tree(input).root);
697
698                name.root.push(InlineItem::Break);
699
700                this_rest = rest;
701            }
702
703            name.root.pop();
704
705            (name, this_rest)
706        };
707
708        let tokens = Self::reduce_indent(children_rest, self.config.list_indent_style, true);
709
710        ListItem {
711            name,
712            children: self.block_tree(&tokens).root,
713        }
714    }
715
716    fn blockquote(&self, tokens: &'b [Token]) -> Option<(BlockItem<'a>, &'b [Token])> {
717        if tokens.get(0)?.kind != TokenKind::Gt {
718            return None;
719        }
720
721        let mut rest = tokens;
722
723        let mut indented_tokens = vec![];
724
725        while !rest.is_empty() {
726            if rest.get(0)?.kind != TokenKind::Gt {
727                break;
728            }
729
730            let (input, new_rest) = Self::get_line(&rest[1..], false);
731
732            let input2 = if self.maybe_block_item(input, true) {
733                Self::align_indent(input, IndentStyle::Space(2), IndentRule::Loose)
734            } else {
735                Self::trim_start(input, TokenKind::Space)
736            };
737
738            indented_tokens.extend_from_slice(input2);
739
740            if let Some(token) = rest.get(1 + input.len()) {
741                indented_tokens.push(*token);
742            }
743
744            rest = new_rest;
745        }
746
747        let tree = self.block_tree(&indented_tokens);
748
749        Some((BlockItem::BlockQuote(tree), rest))
750    }
751
752    /// Judge if tokens is maybe block item.
753    fn maybe_block_item(&self, tokens: &[Token], trim: bool) -> bool {
754        let tokens = if trim {
755            Self::trim_white_spaces(tokens)
756        } else {
757            tokens
758        };
759
760        if self.headline(tokens).is_some() {
761            return true;
762        }
763
764        if tokens.is_empty() {
765            return false;
766        }
767
768        if tokens[0].kind == TokenKind::Gt {
769            return true;
770        }
771
772        if tokens.len() < 2 {
773            return false;
774        }
775
776        if tokens[0].kind == TokenKind::Hyphen && tokens[1].kind == TokenKind::Space {
777            return true;
778        }
779
780        if tokens.len() < 3 {
781            return false;
782        }
783
784        if (tokens[0].kind == TokenKind::Text
785            && tokens[1].kind == TokenKind::Dot
786            && tokens[2].kind == TokenKind::Space)
787            && self.input[tokens[0].range()]
788                .chars()
789                .all(|c| c.is_ascii_digit())
790        {
791            return true;
792        }
793
794        false
795    }
796
797    /// Get tokens until maybe block item.
798    fn get_until_maybe_block_item(&self, tokens: &'b [Token]) -> (&'b [Token], &'b [Token]) {
799        let mut iter = Self::trim_end(tokens, TokenKind::Break).iter().enumerate();
800
801        let (front, back) = loop {
802            if let Some((index, _)) = iter.find(|(_, token)| token.kind == TokenKind::Break) {
803                if self.maybe_block_item(&tokens[index + 1..], false) {
804                    break (&tokens[..index], &tokens[index + 1..]);
805                } else if tokens[index].kind == TokenKind::Break
806                    && tokens[index + 1].kind == TokenKind::Break
807                {
808                    break (&tokens[..index], &tokens[index + 2..]);
809                }
810            } else {
811                break (tokens, &[]);
812            }
813        };
814
815        (
816            Self::trim_end(front, TokenKind::Break),
817            Self::trim_start(back, TokenKind::Break),
818        )
819    }
820}
821
822/// # Functions for building inline tree.
823impl<'a, 'b> Executor<'a> {
824    /// Parse tokens to inline tree.
825    ///
826    /// This function parses all tokens to inline tree.
827    /// So confirm that tokens does not include block items.
828    fn inline_tree(&self, tokens: &[Token]) -> InlineTree<'a> {
829        let mut tree = InlineTree { root: vec![] };
830
831        let mut rest = tokens;
832
833        'root: while !rest.is_empty() {
834            for f in &[Self::strong, Self::italic, Self::r#break] {
835                if let Some((item, new_rest)) = f(self, rest) {
836                    tree.root.push(item);
837                    rest = new_rest;
838                    continue 'root;
839                }
840            }
841
842            if let Some(InlineItem::Text(text)) = tree.root.last_mut() {
843                *text += &self.input[rest[0].range()];
844                rest = &rest[1..];
845                continue;
846            } else {
847                tree.root
848                    .push(InlineItem::Text(self.input[rest[0].range()].into()));
849                rest = &rest[1..];
850                continue;
851            }
852        }
853
854        tree
855    }
856
857    /// Parse tokens to italic item.
858    fn italic(&self, tokens: &'b [Token]) -> Option<(InlineItem<'a>, &'b [Token])> {
859        if tokens[0].kind != TokenKind::Star {
860            return None;
861        }
862
863        let (index, _) = tokens
864            .iter()
865            .enumerate()
866            .skip(1)
867            .find(|(_, token)| token.kind == TokenKind::Star)?;
868
869        let tree = self.inline_tree(&tokens[1..index]);
870
871        Some((InlineItem::Italic(tree), &tokens[index + 1..]))
872    }
873
874    /// Parse tokens to strong item.
875    fn strong(&self, tokens: &'b [Token]) -> Option<(InlineItem<'a>, &'b [Token])> {
876        if tokens[0].kind != TokenKind::Star || tokens.get(1)?.kind != TokenKind::Star {
877            return None;
878        }
879
880        let (index, _) = tokens
881            .windows(2)
882            .enumerate()
883            .skip(1)
884            .find(|(_, t)| t[0].kind == TokenKind::Star && t[1].kind == TokenKind::Star)?;
885
886        let tree = self.inline_tree(&tokens[2..index]);
887
888        Some((InlineItem::Strong(tree), &tokens[index + 2..]))
889    }
890
891    /// Parse tokens to break item.
892    fn r#break(&self, tokens: &'b [Token]) -> Option<(InlineItem<'a>, &'b [Token])> {
893        if tokens[0].kind != TokenKind::Break {
894            return None;
895        }
896
897        Some((InlineItem::Break, &tokens[1..]))
898    }
899}
900
901#[cfg(test)]
902mod tests {
903    use super::*;
904    use crate::layer::lexer::lex;
905
906    fn lex_to_vec(input: &str) -> Vec<Token> {
907        lex(input).collect()
908    }
909
910    #[test]
911    fn test_parse() {
912        let input = "# Hello *World*!\n\nparagraph\n\n";
913
914        let tokens = lex(input);
915
916        let tree = Parser::new().parse(input, tokens);
917
918        assert_eq!(
919            tree,
920            MarkdownTree {
921                root: BlockTree {
922                    root: vec![
923                        BlockItem::Headline(
924                            1,
925                            InlineTree {
926                                root: vec![
927                                    InlineItem::Text("Hello ".into()),
928                                    InlineItem::Italic(InlineTree {
929                                        root: vec![InlineItem::Text("World".into())]
930                                    }),
931                                    InlineItem::Text("!".into()),
932                                ]
933                            }
934                        ),
935                        BlockItem::Paragraph(InlineTree {
936                            root: vec![InlineItem::Text("paragraph".into())]
937                        }),
938                    ]
939                }
940            }
941        );
942    }
943
944    #[test]
945    fn test_reduce_indent() {
946        let input = "  # Hello *World*!\n\nparagraph\n\n";
947        let tokens = lex_to_vec(input);
948
949        let result = Executor::reduce_indent(&tokens, IndentStyle::Space(2), true)
950            .into_iter()
951            .map(|token| token.kind)
952            .collect::<Vec<_>>();
953
954        let expected = "# Hello *World*!\n\nparagraph\n\n";
955        let expected_tokens = lex(expected)
956            .into_iter()
957            .map(|token| token.kind)
958            .collect::<Vec<_>>();
959
960        assert_eq!(result, expected_tokens);
961    }
962
963    #[test]
964    fn test_block_tree() {
965        let input = "# Hello *World*!\n\nparagraph\n\n";
966        let tokens = lex_to_vec(input);
967        let parser = Executor::new(input);
968
969        let tree = parser.block_tree(&tokens);
970
971        assert_eq!(
972            tree,
973            BlockTree {
974                root: vec![
975                    BlockItem::Headline(
976                        1,
977                        InlineTree {
978                            root: vec![
979                                InlineItem::Text("Hello ".into()),
980                                InlineItem::Italic(InlineTree {
981                                    root: vec![InlineItem::Text("World".into())]
982                                }),
983                                InlineItem::Text("!".into()),
984                            ]
985                        }
986                    ),
987                    BlockItem::Paragraph(InlineTree {
988                        root: vec![InlineItem::Text("paragraph".into())]
989                    }),
990                ]
991            }
992        );
993    }
994
995    #[test]
996    fn test_paragraph() {
997        let input = "Hello *World*!\n\n";
998        let tokens = lex_to_vec(input);
999        let parser = Executor::new(input);
1000
1001        let (item, rest) = parser.paragraph(&tokens).unwrap();
1002
1003        assert_eq!(
1004            item,
1005            BlockItem::Paragraph(InlineTree {
1006                root: vec![
1007                    InlineItem::Text("Hello ".into()),
1008                    InlineItem::Italic(InlineTree {
1009                        root: vec![InlineItem::Text("World".into())]
1010                    }),
1011                    InlineItem::Text("!".into()),
1012                ]
1013            })
1014        );
1015        assert_eq!(rest.len(), 0);
1016
1017        let input = "Hello\n";
1018
1019        let tokens = lex_to_vec(input);
1020        let parser = Executor::new(input);
1021
1022        let (item, rest) = parser.paragraph(&tokens).unwrap();
1023
1024        assert_eq!(
1025            item,
1026            BlockItem::Paragraph(InlineTree {
1027                root: vec![InlineItem::Text("Hello".into())]
1028            })
1029        );
1030
1031        assert_eq!(rest.len(), 0);
1032    }
1033
1034    #[test]
1035    fn test_paragraph_before_not_paragraph() {
1036        let input = "Hello *World*!\n# Hello\n";
1037        let tokens = lex_to_vec(input);
1038        let parser = Executor::with_config(
1039            input,
1040            Parser::new().paragraph_ending(ParagraphEnding::AllowSoftBreak),
1041        );
1042
1043        let (item, rest) = parser.paragraph(&tokens).unwrap();
1044
1045        assert_eq!(
1046            item,
1047            BlockItem::Paragraph(InlineTree {
1048                root: vec![
1049                    InlineItem::Text("Hello ".into()),
1050                    InlineItem::Italic(InlineTree {
1051                        root: vec![InlineItem::Text("World".into())]
1052                    }),
1053                    InlineItem::Text("!".into()),
1054                ]
1055            })
1056        );
1057
1058        assert_eq!(rest.len(), 4);
1059    }
1060
1061    #[test]
1062    fn test_headline() {
1063        let input = "###  Hello *World*!\n\n";
1064        let tokens = lex_to_vec(input);
1065        let parser = Executor::new(input);
1066
1067        let (item, rest) = parser.headline(&tokens).unwrap();
1068
1069        assert_eq!(
1070            item,
1071            BlockItem::Headline(
1072                3,
1073                InlineTree {
1074                    root: vec![
1075                        InlineItem::Text("Hello ".into()),
1076                        InlineItem::Italic(InlineTree {
1077                            root: vec![InlineItem::Text("World".into())]
1078                        }),
1079                        InlineItem::Text("!".into()),
1080                    ]
1081                }
1082            )
1083        );
1084
1085        assert_eq!(rest.len(), 0);
1086    }
1087
1088    #[test]
1089    fn test_headline2() {
1090        let input = "# Hello World!\n# Goodbye\n";
1091        let tokens = lex_to_vec(input);
1092        let parser = Executor::new(input);
1093
1094        let (item, _) = parser.headline(&tokens).unwrap();
1095
1096        assert_ne!(
1097            item,
1098            BlockItem::Headline(
1099                1,
1100                InlineTree {
1101                    root: vec![InlineItem::Text("Hello World!".into())]
1102                }
1103            )
1104        );
1105
1106        let parser = Executor::with_config(
1107            input,
1108            Parser::default().headline_ending(HeadlineEnding::AllowSoftBreak),
1109        );
1110
1111        let (item, _) = parser.headline(&tokens).unwrap();
1112
1113        assert_eq!(
1114            item,
1115            BlockItem::Headline(
1116                1,
1117                InlineTree {
1118                    root: vec![InlineItem::Text("Hello World!".into())]
1119                }
1120            )
1121        );
1122    }
1123
1124    #[test]
1125    fn test_bullet_list() {
1126        let input = "- Hello *World*!\n- Hello *World*!\n\n";
1127        let tokens = lex_to_vec(input);
1128        let parser = Executor::new(input);
1129
1130        let (item, rest) = parser.bullet_list(&tokens).unwrap();
1131
1132        assert_eq!(
1133            item,
1134            BlockItem::BulletList(ListTree {
1135                root: vec![
1136                    ListItem {
1137                        name: InlineTree {
1138                            root: vec![
1139                                InlineItem::Text("Hello ".into()),
1140                                InlineItem::Italic(InlineTree {
1141                                    root: vec![InlineItem::Text("World".into())]
1142                                }),
1143                                InlineItem::Text("!".into()),
1144                            ]
1145                        },
1146                        children: vec![]
1147                    },
1148                    ListItem {
1149                        name: InlineTree {
1150                            root: vec![
1151                                InlineItem::Text("Hello ".into()),
1152                                InlineItem::Italic(InlineTree {
1153                                    root: vec![InlineItem::Text("World".into())]
1154                                }),
1155                                InlineItem::Text("!".into()),
1156                            ]
1157                        },
1158                        children: vec![]
1159                    },
1160                ]
1161            }),
1162        );
1163
1164        assert_eq!(rest.len(), 0);
1165
1166        let input = "- Hello!\n  - Fooo!\nHappy\n  - hogee!\n- Good\njobs\n# End\n";
1167        let tokens = lex_to_vec(input);
1168        let parser = Executor::new(input);
1169
1170        let (item, rest) = parser.bullet_list(&tokens).unwrap();
1171
1172        assert_eq!(
1173            item,
1174            BlockItem::BulletList(ListTree {
1175                root: vec![
1176                    ListItem {
1177                        name: InlineTree {
1178                            root: vec![InlineItem::Text("Hello!".into())]
1179                        },
1180                        children: vec![BlockItem::BulletList(ListTree {
1181                            root: vec![
1182                                ListItem {
1183                                    name: InlineTree {
1184                                        root: vec![
1185                                            InlineItem::Text("Fooo!".into()),
1186                                            InlineItem::Break,
1187                                            InlineItem::Text("Happy".into())
1188                                        ]
1189                                    },
1190                                    children: vec![]
1191                                },
1192                                ListItem {
1193                                    name: InlineTree {
1194                                        root: vec![InlineItem::Text("hogee!".into())]
1195                                    },
1196                                    children: vec![]
1197                                }
1198                            ]
1199                        }),]
1200                    },
1201                    ListItem {
1202                        name: InlineTree {
1203                            root: vec![
1204                                InlineItem::Text("Good".into()),
1205                                InlineItem::Break,
1206                                InlineItem::Text("jobs".into())
1207                            ]
1208                        },
1209                        children: vec![]
1210                    },
1211                ]
1212            }),
1213        );
1214
1215        assert_eq!(rest.len(), 4);
1216    }
1217
1218    #[test]
1219    fn ordered_list() {
1220        let input = "1. Hello!\n  1. Fooo!\nHappy\n  1. hogee!\n1. Good\njobs\n# End\n";
1221        let tokens = lex_to_vec(input);
1222        let parser = Executor::new(input);
1223
1224        let (item, rest) = parser.ordered_list(&tokens).unwrap();
1225
1226        assert_eq!(
1227            item,
1228            BlockItem::OrderedList(ListTree {
1229                root: vec![
1230                    ListItem {
1231                        name: InlineTree {
1232                            root: vec![InlineItem::Text("Hello!".into())]
1233                        },
1234                        children: vec![BlockItem::OrderedList(ListTree {
1235                            root: vec![
1236                                ListItem {
1237                                    name: InlineTree {
1238                                        root: vec![
1239                                            InlineItem::Text("Fooo!".into()),
1240                                            InlineItem::Break,
1241                                            InlineItem::Text("Happy".into())
1242                                        ]
1243                                    },
1244                                    children: vec![]
1245                                },
1246                                ListItem {
1247                                    name: InlineTree {
1248                                        root: vec![InlineItem::Text("hogee!".into())]
1249                                    },
1250                                    children: vec![]
1251                                }
1252                            ]
1253                        }),]
1254                    },
1255                    ListItem {
1256                        name: InlineTree {
1257                            root: vec![
1258                                InlineItem::Text("Good".into()),
1259                                InlineItem::Break,
1260                                InlineItem::Text("jobs".into())
1261                            ]
1262                        },
1263                        children: vec![]
1264                    },
1265                ]
1266            }),
1267        );
1268
1269        assert_eq!(rest.len(), 4);
1270    }
1271
1272    #[test]
1273    fn test_blockquote() {
1274        let input = ">Hello\n>\n>>Yeah\nHappy";
1275        let tokens = lex_to_vec(input);
1276        let parser = Executor::new(input);
1277
1278        let (item, rest) = parser.blockquote(&tokens).unwrap();
1279
1280        assert_eq!(
1281            item,
1282            BlockItem::BlockQuote(BlockTree {
1283                root: vec![
1284                    BlockItem::Paragraph(InlineTree {
1285                        root: vec![InlineItem::Text("Hello".into())]
1286                    }),
1287                    BlockItem::BlockQuote(BlockTree {
1288                        root: vec![BlockItem::Paragraph(InlineTree {
1289                            root: vec![InlineItem::Text("Yeah".into())]
1290                        }),]
1291                    }),
1292                ]
1293            })
1294        );
1295
1296        assert_eq!(rest.len(), 1);
1297    }
1298
1299    #[test]
1300    fn test_inline_tree() {
1301        let input = "Hello *World*!\n";
1302        let tokens = lex_to_vec(input);
1303        let parser = Executor::new(input);
1304
1305        let tree = parser.inline_tree(&tokens);
1306
1307        assert_eq!(
1308            tree,
1309            InlineTree {
1310                root: vec![
1311                    InlineItem::Text("Hello ".into()),
1312                    InlineItem::Italic(InlineTree {
1313                        root: vec![InlineItem::Text("World".into())]
1314                    }),
1315                    InlineItem::Text("!".into()),
1316                    InlineItem::Break,
1317                ]
1318            }
1319        );
1320    }
1321
1322    #[test]
1323    fn test_italic() {
1324        let input = r"*Hello*";
1325        let tokens = lex_to_vec(input);
1326        let parser = Executor::new(input);
1327
1328        let (item, rest) = parser.italic(&tokens).unwrap();
1329
1330        assert_eq!(
1331            item,
1332            InlineItem::Italic(InlineTree {
1333                root: vec![InlineItem::Text("Hello".into())]
1334            })
1335        );
1336        assert_eq!(rest.len(), 0);
1337
1338        let input = "*";
1339        let tokens = lex_to_vec(input);
1340        let parser = Executor::new(input);
1341
1342        assert_eq!(parser.italic(&tokens), None);
1343    }
1344
1345    #[test]
1346    fn test_strong() {
1347        let input = r"**Hello**";
1348        let tokens = lex_to_vec(input);
1349        let parser = Executor::new(input);
1350
1351        let (item, rest) = parser.strong(&tokens).unwrap();
1352
1353        assert_eq!(
1354            item,
1355            InlineItem::Strong(InlineTree {
1356                root: vec![InlineItem::Text("Hello".into())]
1357            })
1358        );
1359        assert_eq!(rest.len(), 0);
1360    }
1361
1362    #[test]
1363    fn test_break() {
1364        let input = "\r\nHello";
1365        let tokens = lex_to_vec(input);
1366        let parser = Executor::new(input);
1367
1368        let (item, rest) = parser.r#break(&tokens).unwrap();
1369
1370        assert_eq!(item, InlineItem::Break);
1371        assert_eq!(rest.len(), 1);
1372    }
1373}