flav_md_engine/lexer/
lexer.rs

1pub mod lexer {
2    use crate::lexer::builder::builder::MdNode;
3    use crate::lexer::pattern::code_block::code_block::{is_code_block_start, parse_code_block};
4    use crate::lexer::pattern::inline::inline::inline_parse;
5    use crate::lexer::pattern::list::list::ListPattern::SimpleList;
6    use crate::lexer::pattern::list::list::{
7        is_number_list, is_simple_list, parse_list, ListPattern,
8    };
9    use crate::lexer::pattern::quote::quote::{enclose_quote, is_quote_block};
10    use crate::lexer::pattern::table::table::{is_table_block_start, parse_table};
11
12    #[derive(Debug, PartialEq)]
13    pub enum Token {
14        H1,
15        H2,
16        H3,
17        H4,
18        H5,
19        H6,
20        P,
21        Ul,
22        Ol,
23        Li,
24        Blockquote,
25        Code,
26        Table,
27    }
28
29    impl Token {
30        pub fn value(&self) -> String {
31            match *self {
32                Token::H1 => "h1",
33                Token::H2 => "h2",
34                Token::H3 => "h3",
35                Token::H4 => "h4",
36                Token::H5 => "h5",
37                Token::H6 => "h6",
38                Token::P => "p",
39                Token::Ul => "ul",
40                Token::Ol => "ol",
41                Token::Li => "li",
42                Token::Blockquote => "Blockquote",
43                Token::Code => "code",
44                Token::Table => "table",
45            }
46            .to_string()
47        }
48
49        pub fn is_head(&self) -> bool {
50            match *self {
51                Token::H1 | Token::H2 | Token::H3 | Token::H4 | Token::H5 | Token::H6 => true,
52                _ => false,
53            }
54        }
55
56        pub fn value_of(number: i8) -> Self {
57            match number {
58                1 => Token::H1,
59                2 => Token::H2,
60                3 => Token::H3,
61                4 => Token::H4,
62                5 => Token::H5,
63                6 => Token::H6,
64                _ => Token::P,
65            }
66        }
67    }
68
69    #[derive(Debug, PartialEq)]
70    pub struct Table {
71        pub head: Vec<TableHead>,
72        pub body: Vec<Vec<String>>,
73    }
74
75    impl Table {
76        pub fn new(head: Vec<TableHead>, body: Vec<Vec<String>>) -> Self {
77            Table { head, body }
78        }
79    }
80
81    #[derive(Debug, PartialEq)]
82    pub struct TableHead {
83        pub cell: String,
84        align: Align,
85    }
86
87    impl TableHead {
88        pub fn new(cell: String, align: Align) -> Self {
89            TableHead { cell, align }
90        }
91
92        pub fn get_align(&self) -> String {
93            self.align.value()
94        }
95    }
96
97    #[derive(Debug, Copy, Clone, PartialEq)]
98    pub enum Align {
99        Center,
100        Left,
101        Right,
102    }
103
104    impl Align {
105        pub fn value(&self) -> String {
106            match *self {
107                Align::Center => "center",
108                Align::Left => "left",
109                Align::Right => "right",
110            }
111            .to_string()
112        }
113    }
114
115    #[derive(Debug, PartialEq)]
116    pub enum Content {
117        PlainText { value: String },
118        ElementNode { value: ElementNode },
119        ElementNodes { value: Vec<ElementNode> },
120        Table { value: Table },
121    }
122
123    #[derive(Debug, PartialEq)]
124    pub enum ElementNode {
125        Exist {
126            tag: Token,
127            content: Box<Content>,
128            children: Box<ElementNode>,
129        },
130        Nil,
131    }
132
133    impl ElementNode {
134        pub fn new(tag: Token, content: Content, children: Box<ElementNode>) -> Self {
135            ElementNode::Exist {
136                tag,
137                content: Box::new(content),
138                children,
139            }
140        }
141    }
142
143    #[macro_export]
144    macro_rules! table {
145        (head: $head:expr, body: $body:expr $(,)? ) => {
146            Table {
147                head: $head,
148                body: $body,
149            }
150        };
151    }
152
153    #[macro_export]
154    macro_rules! element_node {
155        (tag: $tag:expr, content: $content:expr, children: $children:expr $(,)? ) => {
156            ElementNode::Exist {
157                tag: $tag,
158                content: Box::new($content),
159                children: Box::new($children),
160            }
161        };
162        (tag: $tag:expr, content: $content:expr $(,)? ) => {
163            ElementNode::Exist {
164                tag: $tag,
165                content: Box::new($content),
166                children: Box::new(element_node!()),
167            }
168        };
169        () => {
170            ElementNode::Nil
171        };
172    }
173
174    #[macro_export]
175    macro_rules! content_element_nodes {
176        ($($x : expr), + $(,) ? ) => {
177            Content::ElementNodes { value: vec![$($x), +] }
178        };
179    }
180
181    #[macro_export]
182    macro_rules! content_plain_text {
183        ($value:expr $(,)? ) => {
184            Content::PlainText { value: $value }
185        };
186    }
187
188    fn parse_line(input: &String) -> ElementNode {
189        let mut sharp_count: i8 = 0;
190        for char in input.as_str().chars() {
191            if char == '#' {
192                sharp_count += 1;
193            } else if char == ' ' {
194                break;
195            }
196        }
197        let content = if sharp_count == 0 {
198            input
199        } else {
200            &input[(sharp_count as usize) + 1..]
201        }
202        .to_string();
203        element_node! {
204            tag: Token::value_of(sharp_count),
205            content: content_plain_text!(inline_parse(&content)),
206        }
207    }
208
209    fn parse(input: &Vec<String>) -> Vec<ElementNode> {
210        let mut element_nodes: Vec<ElementNode> = vec![];
211        let mut i: usize = 0;
212        while i < input.len() {
213            let list_index = i;
214            if is_simple_list(input.get(i).unwrap()) {
215                while is_simple_list(input.get(i).unwrap()) {
216                    i += 1;
217                }
218                if list_index != i {
219                    let parse_result =
220                        parse_list(input[list_index..i].to_vec(), ListPattern::SimpleList, 0);
221                    element_nodes.push(parse_result);
222                    continue;
223                }
224            } else if is_number_list(input.get(i).unwrap()) {
225                while is_number_list(input.get(i).unwrap()) {
226                    i += 1;
227                }
228                if list_index != i {
229                    let parse_result =
230                        parse_list(input[list_index..i].to_vec(), ListPattern::NumberList, 0);
231                    element_nodes.push(parse_result);
232                    continue;
233                }
234            } else if is_quote_block(input.get(i).unwrap()) {
235                let quote_start = i;
236                while i < input.len() && input.get(i).unwrap() != "" {
237                    i += 1;
238                }
239                let parse_result = parse(&enclose_quote(input[quote_start..i].to_vec()));
240                element_nodes.push(element_node! {
241                    tag: Token::Blockquote,
242                    content: Content::ElementNodes { value: parse_result },
243                });
244                i += 1;
245                continue;
246            } else if is_code_block_start(input.get(i).unwrap()) {
247                i += 1;
248                let code_block_start = i;
249                while !is_code_block_start(input.get(i).unwrap()) {
250                    i += 1;
251                }
252                element_nodes.push(element_node! {
253                    tag: Token::Code,
254                    content: Content::PlainText{
255                        value: parse_code_block(input[code_block_start..i].to_vec()).join("<br />"),
256                    },
257                });
258                i += 1;
259                continue;
260            } else if is_table_block_start(input.get(i).unwrap()) {
261                let (table, skip) = parse_table(input[i..].to_vec());
262                i += skip;
263                element_nodes.push(element_node! {
264                    tag: Token::Table,
265                    content: Content::Table {
266                        value: table
267                    },
268                });
269            }
270            element_nodes.push(parse_line(input.get(i).unwrap()));
271            i += 1;
272        }
273        element_nodes
274    }
275
276    pub struct Lexer {
277        text: Vec<String>,
278    }
279
280    impl Lexer {
281        pub fn new(text: Vec<String>) -> Self {
282            Lexer { text }
283        }
284
285        pub fn parse(&self) -> MdNode {
286            let result_str = parse(&self.text);
287            MdNode::new(result_str)
288        }
289    }
290
291    #[cfg(test)]
292    mod test_lexer {
293        use super::*;
294        use crate::vec_string;
295        use pretty_assertions::assert_eq;
296
297        #[test]
298        fn test_parse() {
299            let input = vec_string![
300                "# hello",
301                "## world",
302                "* hogehoge",
303                "* hogehoge1",
304                "  * this is [Google先生](https://example.com)",
305                "  * hogehoge3",
306                "    * hoge 4",
307                "* hogehoge4",
308                "1. hoge1",
309                "2. hoge2",
310                "  1. aaa",
311                "  2. ccc",
312                "    1. ddd",
313                "this is [Google先生](https://example.com)",
314                "画像 ![エビフライトライアングル](https://example.com)",
315                "> aaa",
316                "bbb",
317                ">> ccc",
318                "ddd",
319                "",
320                "## world",
321                "```html",
322                r#"<script src="hoge.js"></script>"#,
323                r#"<script src="hoge.js"></script>"#,
324                "```",
325                "this is `hoge` and `fuga`",
326                "this is *hoge*",
327                "this is **hoge**",
328                "this is *hoge **fuga***",
329                "|  head1  | head2 | head3|",
330                "|:----:|-----:|:----- |",
331                "|  aaa1  | bbb1 | ccc1|",
332                "|  aaa2 | bbb2 | ccc2|",
333                "aaa"
334            ];
335            let expected = MdNode::new(vec![
336                element_node! {
337                    tag: Token::H1,
338                    content: content_plain_text!("hello".to_string()),
339                },
340                element_node! {
341                    tag: Token::H2,
342                    content: content_plain_text!("world".to_string()),
343                },
344                element_node! {
345                    tag: Token::Ul,
346                    content: content_element_nodes![
347                        element_node! {
348                            tag: Token::Li,
349                            content: content_plain_text!("hogehoge".to_string()),
350                        },
351                        element_node! {
352                            tag: Token::Li,
353                            content: content_plain_text!("hogehoge1".to_string()),
354                            children: element_node! {
355                                tag: Token::Ul,
356                                content: content_element_nodes![
357                                    element_node! {
358                                        tag: Token::Li,
359                                        content: content_plain_text!(r#"this is <a class="flav-md-a" href="https://example.com" alt="Google先生">Google先生</a>"#.to_string()),
360                                    },
361                                    element_node! {
362                                        tag: Token::Li,
363                                        content: content_plain_text!("hogehoge3".to_string()),
364                                        children: element_node! {
365                                            tag: Token::Ul,
366                                            content: content_element_nodes![
367                                                element_node! {
368                                                    tag: Token::Li,
369                                                    content: content_plain_text!("hoge 4".to_string()),
370                                                }
371                                            ]
372                                        }
373                                    },
374                                ],
375                            }
376                        },
377                        element_node! {
378                            tag: Token::Li,
379                            content: content_plain_text!("hogehoge4".to_string()),
380                        },
381                    ]
382                },
383                element_node! {
384                    tag: Token::Ol,
385                    content: content_element_nodes![
386                        element_node! {
387                            tag: Token::Li,
388                            content: content_plain_text!("hoge1".to_string()),
389                        },
390                        element_node! {
391                            tag: Token::Li,
392                            content: content_plain_text!("hoge2".to_string()),
393                            children: element_node! {
394                                tag: Token::Ol,
395                                content: content_element_nodes![
396                                    element_node! {
397                                        tag: Token::Li,
398                                        content: content_plain_text!("aaa".to_string()),
399                                    },
400                                    element_node! {
401                                        tag: Token::Li,
402                                        content: content_plain_text!("ccc".to_string()),
403                                        children: element_node! {
404                                            tag: Token::Ol,
405                                            content: content_element_nodes![
406                                                element_node! {
407                                                    tag: Token::Li,
408                                                    content: content_plain_text!("ddd".to_string()),
409                                                }
410                                            ]
411                                        }
412                                    },
413                                ],
414                            }
415                        },
416                    ]
417                },
418                element_node! {
419                    tag: Token::P,
420                    content: content_plain_text!(r#"this is <a class="flav-md-a" href="https://example.com" alt="Google先生">Google先生</a>"#.to_string()),
421                },
422                element_node! {
423                    tag: Token::P,
424                    content: content_plain_text!(r#"画像 <img class="flav-md-img" src="https://example.com" alt="エビフライトライアングル">"#.to_string()),
425                },
426                element_node! {
427                    tag: Token::Blockquote,
428                    content: content_element_nodes![
429                        element_node! {
430                            tag: Token::P,
431                            content: content_plain_text!("aaa".to_string()),
432                        },
433                        element_node! {
434                            tag: Token::P,
435                            content: content_plain_text!("bbb".to_string()),
436                        },
437                        element_node! {
438                            tag: Token::Blockquote,
439                            content: content_element_nodes![
440                                element_node! {
441                                    tag: Token::P,
442                                    content: content_plain_text!("ccc".to_string()),
443                                },
444                                element_node! {
445                                    tag: Token::P,
446                                    content: content_plain_text!("ddd".to_string()),
447                                },
448                            ],
449                        },
450                    ],
451                },
452                element_node! {
453                    tag: Token::H2,
454                    content: content_plain_text!("world".to_string()),
455                },
456                element_node! {
457                    tag: Token::Code,
458                    content: content_plain_text!("&lt;script src=&quot;hoge.js&quot;&gt;&lt;/script&gt;<br />&lt;script src=&quot;hoge.js&quot;&gt;&lt;/script&gt;".to_string()),
459                },
460                element_node! {
461                    tag: Token::P,
462                    content: content_plain_text!(r#"this is <code class="flav-md-code-inline">hoge</code> and <code class="flav-md-code-inline">fuga</code>"#.to_string()),
463                },
464                element_node! {
465                    tag: Token::P,
466                    content: content_plain_text!(r#"this is <em class="flav-md-em">hoge</em>"#.to_string()),
467                },
468                element_node! {
469                    tag: Token::P,
470                    content: content_plain_text!(r#"this is <strong class="flav-md-strong">hoge</strong>"#.to_string()),
471                },
472                element_node! {
473                    tag: Token::P,
474                    content: content_plain_text!(r#"this is <em class="flav-md-em">hoge <strong class="flav-md-strong">fuga</strong></em>"#.to_string()),
475                },
476                element_node! {
477                    tag: Token::Table,
478                    content: Content::Table {
479                        value: table! {
480                            head: vec![
481                                TableHead::new("head1".to_string(), Align::Center),
482                                TableHead::new("head2".to_string(), Align::Right),
483                                TableHead::new("head3".to_string(), Align::Left),
484                            ],
485                            body: vec![
486                                vec_string!["aaa1", "bbb1", "ccc1"],
487                                vec_string!["aaa2", "bbb2", "ccc2"],
488                            ],
489                        },
490                    }
491                },
492                element_node! {
493                    tag: Token::P,
494                    content: content_plain_text!("aaa".to_string()),
495                },
496            ]);
497            let lex = Lexer::new(input);
498            assert_eq!(lex.parse(), expected);
499        }
500    }
501}