1pub mod lexer {
2 use crate::lexer::builder::builder::MdNode;
3 use crate::lexer::pattern::code_block::code_block::{is_code_block_start, parse_code_block};
4 use crate::lexer::pattern::inline::inline::inline_parse;
5 use crate::lexer::pattern::list::list::ListPattern::SimpleList;
6 use crate::lexer::pattern::list::list::{
7 is_number_list, is_simple_list, parse_list, ListPattern,
8 };
9 use crate::lexer::pattern::quote::quote::{enclose_quote, is_quote_block};
10 use crate::lexer::pattern::table::table::{is_table_block_start, parse_table};
11
12 #[derive(Debug, PartialEq)]
13 pub enum Token {
14 H1,
15 H2,
16 H3,
17 H4,
18 H5,
19 H6,
20 P,
21 Ul,
22 Ol,
23 Li,
24 Blockquote,
25 Code,
26 Table,
27 }
28
29 impl Token {
30 pub fn value(&self) -> String {
31 match *self {
32 Token::H1 => "h1",
33 Token::H2 => "h2",
34 Token::H3 => "h3",
35 Token::H4 => "h4",
36 Token::H5 => "h5",
37 Token::H6 => "h6",
38 Token::P => "p",
39 Token::Ul => "ul",
40 Token::Ol => "ol",
41 Token::Li => "li",
42 Token::Blockquote => "Blockquote",
43 Token::Code => "code",
44 Token::Table => "table",
45 }
46 .to_string()
47 }
48
49 pub fn is_head(&self) -> bool {
50 match *self {
51 Token::H1 | Token::H2 | Token::H3 | Token::H4 | Token::H5 | Token::H6 => true,
52 _ => false,
53 }
54 }
55
56 pub fn value_of(number: i8) -> Self {
57 match number {
58 1 => Token::H1,
59 2 => Token::H2,
60 3 => Token::H3,
61 4 => Token::H4,
62 5 => Token::H5,
63 6 => Token::H6,
64 _ => Token::P,
65 }
66 }
67 }
68
69 #[derive(Debug, PartialEq)]
70 pub struct Table {
71 pub head: Vec<TableHead>,
72 pub body: Vec<Vec<String>>,
73 }
74
75 impl Table {
76 pub fn new(head: Vec<TableHead>, body: Vec<Vec<String>>) -> Self {
77 Table { head, body }
78 }
79 }
80
81 #[derive(Debug, PartialEq)]
82 pub struct TableHead {
83 pub cell: String,
84 align: Align,
85 }
86
87 impl TableHead {
88 pub fn new(cell: String, align: Align) -> Self {
89 TableHead { cell, align }
90 }
91
92 pub fn get_align(&self) -> String {
93 self.align.value()
94 }
95 }
96
97 #[derive(Debug, Copy, Clone, PartialEq)]
98 pub enum Align {
99 Center,
100 Left,
101 Right,
102 }
103
104 impl Align {
105 pub fn value(&self) -> String {
106 match *self {
107 Align::Center => "center",
108 Align::Left => "left",
109 Align::Right => "right",
110 }
111 .to_string()
112 }
113 }
114
115 #[derive(Debug, PartialEq)]
116 pub enum Content {
117 PlainText { value: String },
118 ElementNode { value: ElementNode },
119 ElementNodes { value: Vec<ElementNode> },
120 Table { value: Table },
121 }
122
123 #[derive(Debug, PartialEq)]
124 pub enum ElementNode {
125 Exist {
126 tag: Token,
127 content: Box<Content>,
128 children: Box<ElementNode>,
129 },
130 Nil,
131 }
132
133 impl ElementNode {
134 pub fn new(tag: Token, content: Content, children: Box<ElementNode>) -> Self {
135 ElementNode::Exist {
136 tag,
137 content: Box::new(content),
138 children,
139 }
140 }
141 }
142
143 #[macro_export]
144 macro_rules! table {
145 (head: $head:expr, body: $body:expr $(,)? ) => {
146 Table {
147 head: $head,
148 body: $body,
149 }
150 };
151 }
152
153 #[macro_export]
154 macro_rules! element_node {
155 (tag: $tag:expr, content: $content:expr, children: $children:expr $(,)? ) => {
156 ElementNode::Exist {
157 tag: $tag,
158 content: Box::new($content),
159 children: Box::new($children),
160 }
161 };
162 (tag: $tag:expr, content: $content:expr $(,)? ) => {
163 ElementNode::Exist {
164 tag: $tag,
165 content: Box::new($content),
166 children: Box::new(element_node!()),
167 }
168 };
169 () => {
170 ElementNode::Nil
171 };
172 }
173
174 #[macro_export]
175 macro_rules! content_element_nodes {
176 ($($x : expr), + $(,) ? ) => {
177 Content::ElementNodes { value: vec![$($x), +] }
178 };
179 }
180
181 #[macro_export]
182 macro_rules! content_plain_text {
183 ($value:expr $(,)? ) => {
184 Content::PlainText { value: $value }
185 };
186 }
187
188 fn parse_line(input: &String) -> ElementNode {
189 let mut sharp_count: i8 = 0;
190 for char in input.as_str().chars() {
191 if char == '#' {
192 sharp_count += 1;
193 } else if char == ' ' {
194 break;
195 }
196 }
197 let content = if sharp_count == 0 {
198 input
199 } else {
200 &input[(sharp_count as usize) + 1..]
201 }
202 .to_string();
203 element_node! {
204 tag: Token::value_of(sharp_count),
205 content: content_plain_text!(inline_parse(&content)),
206 }
207 }
208
209 fn parse(input: &Vec<String>) -> Vec<ElementNode> {
210 let mut element_nodes: Vec<ElementNode> = vec![];
211 let mut i: usize = 0;
212 while i < input.len() {
213 let list_index = i;
214 if is_simple_list(input.get(i).unwrap()) {
215 while is_simple_list(input.get(i).unwrap()) {
216 i += 1;
217 }
218 if list_index != i {
219 let parse_result =
220 parse_list(input[list_index..i].to_vec(), ListPattern::SimpleList, 0);
221 element_nodes.push(parse_result);
222 continue;
223 }
224 } else if is_number_list(input.get(i).unwrap()) {
225 while is_number_list(input.get(i).unwrap()) {
226 i += 1;
227 }
228 if list_index != i {
229 let parse_result =
230 parse_list(input[list_index..i].to_vec(), ListPattern::NumberList, 0);
231 element_nodes.push(parse_result);
232 continue;
233 }
234 } else if is_quote_block(input.get(i).unwrap()) {
235 let quote_start = i;
236 while i < input.len() && input.get(i).unwrap() != "" {
237 i += 1;
238 }
239 let parse_result = parse(&enclose_quote(input[quote_start..i].to_vec()));
240 element_nodes.push(element_node! {
241 tag: Token::Blockquote,
242 content: Content::ElementNodes { value: parse_result },
243 });
244 i += 1;
245 continue;
246 } else if is_code_block_start(input.get(i).unwrap()) {
247 i += 1;
248 let code_block_start = i;
249 while !is_code_block_start(input.get(i).unwrap()) {
250 i += 1;
251 }
252 element_nodes.push(element_node! {
253 tag: Token::Code,
254 content: Content::PlainText{
255 value: parse_code_block(input[code_block_start..i].to_vec()).join("<br />"),
256 },
257 });
258 i += 1;
259 continue;
260 } else if is_table_block_start(input.get(i).unwrap()) {
261 let (table, skip) = parse_table(input[i..].to_vec());
262 i += skip;
263 element_nodes.push(element_node! {
264 tag: Token::Table,
265 content: Content::Table {
266 value: table
267 },
268 });
269 }
270 element_nodes.push(parse_line(input.get(i).unwrap()));
271 i += 1;
272 }
273 element_nodes
274 }
275
276 pub struct Lexer {
277 text: Vec<String>,
278 }
279
280 impl Lexer {
281 pub fn new(text: Vec<String>) -> Self {
282 Lexer { text }
283 }
284
285 pub fn parse(&self) -> MdNode {
286 let result_str = parse(&self.text);
287 MdNode::new(result_str)
288 }
289 }
290
291 #[cfg(test)]
292 mod test_lexer {
293 use super::*;
294 use crate::vec_string;
295 use pretty_assertions::assert_eq;
296
297 #[test]
298 fn test_parse() {
299 let input = vec_string",
305 " * hogehoge3",
306 " * hoge 4",
307 "* hogehoge4",
308 "1. hoge1",
309 "2. hoge2",
310 " 1. aaa",
311 " 2. ccc",
312 " 1. ddd",
313 "this is [Google先生](https://example.com)",
314 "画像 ",
315 "> aaa",
316 "bbb",
317 ">> ccc",
318 "ddd",
319 "",
320 "## world",
321 "```html",
322 r#"<script src="hoge.js"></script>"#,
323 r#"<script src="hoge.js"></script>"#,
324 "```",
325 "this is `hoge` and `fuga`",
326 "this is *hoge*",
327 "this is **hoge**",
328 "this is *hoge **fuga***",
329 "| head1 | head2 | head3|",
330 "|:----:|-----:|:----- |",
331 "| aaa1 | bbb1 | ccc1|",
332 "| aaa2 | bbb2 | ccc2|",
333 "aaa"
334 ];
335 let expected = MdNode::new(vec![
336 element_node! {
337 tag: Token::H1,
338 content: content_plain_text!("hello".to_string()),
339 },
340 element_node! {
341 tag: Token::H2,
342 content: content_plain_text!("world".to_string()),
343 },
344 element_node! {
345 tag: Token::Ul,
346 content: content_element_nodes![
347 element_node! {
348 tag: Token::Li,
349 content: content_plain_text!("hogehoge".to_string()),
350 },
351 element_node! {
352 tag: Token::Li,
353 content: content_plain_text!("hogehoge1".to_string()),
354 children: element_node! {
355 tag: Token::Ul,
356 content: content_element_nodes![
357 element_node! {
358 tag: Token::Li,
359 content: content_plain_text!(r#"this is <a class="flav-md-a" href="https://example.com" alt="Google先生">Google先生</a>"#.to_string()),
360 },
361 element_node! {
362 tag: Token::Li,
363 content: content_plain_text!("hogehoge3".to_string()),
364 children: element_node! {
365 tag: Token::Ul,
366 content: content_element_nodes![
367 element_node! {
368 tag: Token::Li,
369 content: content_plain_text!("hoge 4".to_string()),
370 }
371 ]
372 }
373 },
374 ],
375 }
376 },
377 element_node! {
378 tag: Token::Li,
379 content: content_plain_text!("hogehoge4".to_string()),
380 },
381 ]
382 },
383 element_node! {
384 tag: Token::Ol,
385 content: content_element_nodes![
386 element_node! {
387 tag: Token::Li,
388 content: content_plain_text!("hoge1".to_string()),
389 },
390 element_node! {
391 tag: Token::Li,
392 content: content_plain_text!("hoge2".to_string()),
393 children: element_node! {
394 tag: Token::Ol,
395 content: content_element_nodes![
396 element_node! {
397 tag: Token::Li,
398 content: content_plain_text!("aaa".to_string()),
399 },
400 element_node! {
401 tag: Token::Li,
402 content: content_plain_text!("ccc".to_string()),
403 children: element_node! {
404 tag: Token::Ol,
405 content: content_element_nodes![
406 element_node! {
407 tag: Token::Li,
408 content: content_plain_text!("ddd".to_string()),
409 }
410 ]
411 }
412 },
413 ],
414 }
415 },
416 ]
417 },
418 element_node! {
419 tag: Token::P,
420 content: content_plain_text!(r#"this is <a class="flav-md-a" href="https://example.com" alt="Google先生">Google先生</a>"#.to_string()),
421 },
422 element_node! {
423 tag: Token::P,
424 content: content_plain_text!(r#"画像 <img class="flav-md-img" src="https://example.com" alt="エビフライトライアングル">"#.to_string()),
425 },
426 element_node! {
427 tag: Token::Blockquote,
428 content: content_element_nodes![
429 element_node! {
430 tag: Token::P,
431 content: content_plain_text!("aaa".to_string()),
432 },
433 element_node! {
434 tag: Token::P,
435 content: content_plain_text!("bbb".to_string()),
436 },
437 element_node! {
438 tag: Token::Blockquote,
439 content: content_element_nodes![
440 element_node! {
441 tag: Token::P,
442 content: content_plain_text!("ccc".to_string()),
443 },
444 element_node! {
445 tag: Token::P,
446 content: content_plain_text!("ddd".to_string()),
447 },
448 ],
449 },
450 ],
451 },
452 element_node! {
453 tag: Token::H2,
454 content: content_plain_text!("world".to_string()),
455 },
456 element_node! {
457 tag: Token::Code,
458 content: content_plain_text!("<script src="hoge.js"></script><br /><script src="hoge.js"></script>".to_string()),
459 },
460 element_node! {
461 tag: Token::P,
462 content: content_plain_text!(r#"this is <code class="flav-md-code-inline">hoge</code> and <code class="flav-md-code-inline">fuga</code>"#.to_string()),
463 },
464 element_node! {
465 tag: Token::P,
466 content: content_plain_text!(r#"this is <em class="flav-md-em">hoge</em>"#.to_string()),
467 },
468 element_node! {
469 tag: Token::P,
470 content: content_plain_text!(r#"this is <strong class="flav-md-strong">hoge</strong>"#.to_string()),
471 },
472 element_node! {
473 tag: Token::P,
474 content: content_plain_text!(r#"this is <em class="flav-md-em">hoge <strong class="flav-md-strong">fuga</strong></em>"#.to_string()),
475 },
476 element_node! {
477 tag: Token::Table,
478 content: Content::Table {
479 value: table! {
480 head: vec![
481 TableHead::new("head1".to_string(), Align::Center),
482 TableHead::new("head2".to_string(), Align::Right),
483 TableHead::new("head3".to_string(), Align::Left),
484 ],
485 body: vec![
486 vec_string!["aaa1", "bbb1", "ccc1"],
487 vec_string!["aaa2", "bbb2", "ccc2"],
488 ],
489 },
490 }
491 },
492 element_node! {
493 tag: Token::P,
494 content: content_plain_text!("aaa".to_string()),
495 },
496 ]);
497 let lex = Lexer::new(input);
498 assert_eq!(lex.parse(), expected);
499 }
500 }
501}