Skip to main content

md_tui/
parser.rs

1use image::ImageReader;
2use itertools::Itertools;
3use pest::{
4    Parser,
5    iterators::{Pair, Pairs},
6};
7use pest_derive::Parser;
8use ratatui::style::Color;
9
10use crate::nodes::{
11    image::ImageComponent,
12    root::{Component, ComponentRoot},
13    textcomponent::{TextComponent, TextNode},
14    word::{MetaData, Word, WordType},
15};
16
17#[derive(Parser)]
18#[grammar = "md.pest"]
19pub struct MdParser;
20
21pub fn parse_markdown(name: Option<&str>, content: &str, width: u16) -> ComponentRoot {
22    let root: Pairs<'_, Rule> = if let Ok(file) = MdParser::parse(Rule::txt, content) {
23        file
24    } else {
25        return ComponentRoot::new(name.map(str::to_string), Vec::new());
26    };
27
28    let root_pair = root.into_iter().next().unwrap();
29
30    let children = parse_text(root_pair)
31        .children_owned()
32        .into_iter()
33        .dedup()
34        .collect();
35
36    let parse_root = ParseRoot::new(name.map(str::to_string), children);
37
38    let mut root = node_to_component(parse_root).add_missing_components();
39
40    root.transform(width);
41    root
42}
43
44fn parse_text(pair: Pair<'_, Rule>) -> ParseNode {
45    let content = if pair.as_rule() == Rule::code_line {
46        pair.as_str().replace('\t', "    ").replace('\r', "")
47    } else {
48        pair.as_str().replace('\n', " ")
49    };
50    let mut component = ParseNode::new(pair.as_rule().into(), content);
51    let children = parse_node_children(pair.into_inner());
52    component.add_children(children);
53    component
54}
55
56fn parse_node_children(pair: Pairs<'_, Rule>) -> Vec<ParseNode> {
57    let mut children = Vec::new();
58    for inner_pair in pair {
59        children.push(parse_text(inner_pair));
60    }
61    children
62}
63
64fn node_to_component(root: ParseRoot) -> ComponentRoot {
65    let mut children = Vec::new();
66    let name = root.file_name().clone();
67    for component in root.children_owned() {
68        let comp = parse_component(component);
69        children.push(comp);
70    }
71
72    ComponentRoot::new(name, children)
73}
74
75fn is_url(url: &str) -> bool {
76    url.starts_with("http://") || url.starts_with("https://")
77}
78
79fn parse_component(parse_node: ParseNode) -> Component {
80    match parse_node.kind() {
81        MdParseEnum::Image => {
82            let leaf_nodes = get_leaf_nodes(parse_node);
83            let mut alt_text = String::new();
84            let mut image = None;
85            for node in leaf_nodes {
86                if node.kind() == MdParseEnum::AltText {
87                    node.content().clone_into(&mut alt_text);
88                } else if is_url(node.content()) {
89                    #[cfg(feature = "network")]
90                    {
91                        let mut buf = Vec::new();
92                        image = ureq::get(node.content()).call().ok().and_then(|b| {
93                            let noe = b.into_body().read_to_vec();
94                            noe.ok().and_then(|b| {
95                                buf = b;
96                                image::load_from_memory(&buf).ok()
97                            })
98                        });
99                    }
100                    #[cfg(not(feature = "network"))]
101                    {
102                        image = None;
103                    }
104                } else {
105                    image = ImageReader::open(node.content())
106                        .ok()
107                        .and_then(|r| r.decode().ok());
108                }
109            }
110
111            if let Some(img) = image.as_ref() {
112                let height = img.height();
113
114                let comp = ImageComponent::new(img.to_owned(), height, alt_text.clone());
115
116                if let Some(comp) = comp {
117                    Component::Image(comp)
118                } else {
119                    let word = [Word::new(format!("[{alt_text}]"), WordType::Normal)];
120
121                    let comp = TextComponent::new(TextNode::Paragraph, word.into());
122                    Component::TextComponent(comp)
123                }
124            } else {
125                let word = [
126                    Word::new("Image".to_string(), WordType::Normal),
127                    Word::new(" ".to_owned(), WordType::Normal),
128                    Word::new("not".to_owned(), WordType::Normal),
129                    Word::new(" ".to_owned(), WordType::Normal),
130                    Word::new("found".to_owned(), WordType::Normal),
131                    Word::new("/".to_owned(), WordType::Normal),
132                    Word::new("fetched".to_owned(), WordType::Normal),
133                    Word::new(" ".to_owned(), WordType::Normal),
134                    Word::new(format!("[{alt_text}]"), WordType::Normal),
135                ];
136
137                let comp = TextComponent::new(TextNode::Paragraph, word.into());
138                Component::TextComponent(comp)
139            }
140        }
141
142        MdParseEnum::Task => {
143            let leaf_nodes = get_leaf_nodes(parse_node);
144            let mut words = Vec::new();
145            for node in leaf_nodes {
146                let word_type = WordType::from(node.kind());
147
148                let mut content: String = node
149                    .content()
150                    .chars()
151                    .dedup_by(|x, y| *x == ' ' && *y == ' ')
152                    .collect();
153
154                if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
155                    let comp = Word::new(content.clone(), WordType::LinkData);
156                    words.push(comp);
157                }
158
159                if content.starts_with(' ') {
160                    content.remove(0);
161                    let comp = Word::new(" ".to_owned(), word_type);
162                    words.push(comp);
163                }
164                words.push(Word::new(content, word_type));
165            }
166            Component::TextComponent(TextComponent::new(TextNode::Task, words))
167        }
168
169        MdParseEnum::Quote => {
170            let leaf_nodes = get_leaf_nodes(parse_node);
171            let mut words = Vec::new();
172            for node in leaf_nodes {
173                let word_type = WordType::from(node.kind());
174                let mut content = node.content().to_owned();
175
176                if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
177                    let comp = Word::new(content.clone(), WordType::LinkData);
178                    words.push(comp);
179                }
180                if content.starts_with(' ') {
181                    content.remove(0);
182                    let comp = Word::new(" ".to_owned(), word_type);
183                    words.push(comp);
184                }
185                words.push(Word::new(content, word_type));
186            }
187            if let Some(w) = words.first_mut() {
188                w.set_content(w.content().trim_start().to_owned());
189            }
190            Component::TextComponent(TextComponent::new(TextNode::Quote, words))
191        }
192
193        MdParseEnum::Heading => {
194            let indent = parse_node
195                .content()
196                .chars()
197                .take_while(|c| *c == '#')
198                .count();
199            let leaf_nodes = get_leaf_nodes(parse_node);
200            let mut words = Vec::new();
201
202            words.push(Word::new(
203                String::new(),
204                WordType::MetaInfo(MetaData::HeadingLevel(indent as u8)),
205            ));
206
207            if indent > 1 {
208                words.push(Word::new(
209                    format!("{} ", "#".repeat(indent)),
210                    WordType::Normal,
211                ));
212            }
213
214            for node in leaf_nodes {
215                let word_type = WordType::from(node.kind());
216                let mut content = node.content().to_owned();
217
218                if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
219                    let comp = Word::new(content.clone(), WordType::LinkData);
220                    words.push(comp);
221                }
222
223                if content.starts_with(' ') {
224                    content.remove(0);
225                    let comp = Word::new(" ".to_owned(), word_type);
226                    words.push(comp);
227                }
228                words.push(Word::new(content, word_type));
229            }
230            if let Some(w) = words.first_mut() {
231                w.set_content(w.content().trim_start().to_owned());
232            }
233            Component::TextComponent(TextComponent::new(TextNode::Heading, words))
234        }
235
236        MdParseEnum::Paragraph => {
237            let leaf_nodes = get_leaf_nodes(parse_node);
238            let mut words = Vec::new();
239            for node in leaf_nodes {
240                let word_type = WordType::from(node.kind());
241                let mut content = node.content().to_owned();
242
243                if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
244                    let comp = Word::new(content.clone(), WordType::LinkData);
245                    words.push(comp);
246                }
247
248                if content.starts_with(' ') {
249                    content.remove(0);
250                    let comp = Word::new(" ".to_owned(), word_type);
251                    words.push(comp);
252                }
253                words.push(Word::new(content, word_type));
254            }
255            if let Some(w) = words.first_mut() {
256                w.set_content(w.content().trim_start().to_owned());
257            }
258            Component::TextComponent(TextComponent::new(TextNode::Paragraph, words))
259        }
260
261        MdParseEnum::CodeBlock => {
262            let leaf_nodes = get_leaf_nodes(parse_node);
263            let mut words = Vec::new();
264
265            let mut space_indented = false;
266
267            for node in leaf_nodes {
268                if node.kind() == MdParseEnum::CodeBlockStrSpaceIndented {
269                    space_indented = true;
270                }
271                let word_type = WordType::from(node.kind());
272                let content = node.content().to_owned();
273                words.push(vec![Word::new(content, word_type)]);
274            }
275
276            if space_indented {
277                words.push(vec![Word::new(
278                    " ".to_owned(),
279                    WordType::CodeBlock(Color::Reset),
280                )]);
281            }
282
283            Component::TextComponent(TextComponent::new_formatted(TextNode::CodeBlock, words))
284        }
285
286        MdParseEnum::ListContainer => {
287            let mut words = Vec::new();
288            for child in parse_node.children_owned() {
289                let kind = child.kind();
290                let leaf_nodes = get_leaf_nodes(child);
291                let mut inner_words = Vec::new();
292                for node in leaf_nodes {
293                    let word_type = WordType::from(node.kind());
294
295                    let mut content = match node.kind() {
296                        MdParseEnum::Indent => node.content().to_owned(),
297                        _ => node
298                            .content()
299                            .chars()
300                            .dedup_by(|x, y| *x == ' ' && *y == ' ')
301                            .collect(),
302                    };
303
304                    if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
305                        let comp = Word::new(content.clone(), WordType::LinkData);
306                        inner_words.push(comp);
307                    }
308                    if content.starts_with(' ') && node.kind() != MdParseEnum::Indent {
309                        content.remove(0);
310                        let comp = Word::new(" ".to_owned(), word_type);
311                        inner_words.push(comp);
312                    }
313
314                    inner_words.push(Word::new(content, word_type));
315                }
316                if kind == MdParseEnum::UnorderedList {
317                    inner_words.push(Word::new(
318                        "X".to_owned(),
319                        WordType::MetaInfo(MetaData::UList),
320                    ));
321                    let list_symbol = Word::new("• ".to_owned(), WordType::ListMarker);
322                    inner_words.insert(1, list_symbol);
323                } else if kind == MdParseEnum::OrderedList {
324                    inner_words.push(Word::new(
325                        "X".to_owned(),
326                        WordType::MetaInfo(MetaData::OList),
327                    ));
328                }
329                words.push(inner_words);
330            }
331            Component::TextComponent(TextComponent::new_formatted(TextNode::List, words))
332        }
333
334        MdParseEnum::Table => {
335            let mut words = Vec::new();
336            let mut meta_info = Vec::new();
337            for cell in parse_node.children_owned() {
338                if cell.kind() == MdParseEnum::TableSeparator {
339                    meta_info.push(Word::new(
340                        cell.content().to_owned(),
341                        WordType::MetaInfo(MetaData::ColumnsCount),
342                    ));
343                    continue;
344                }
345                let mut inner_words = Vec::new();
346
347                if cell.children().is_empty() {
348                    words.push(inner_words);
349                    continue;
350                }
351
352                for word in get_leaf_nodes(cell) {
353                    let word_type = WordType::from(word.kind());
354                    let mut content = word.content().to_owned();
355
356                    if matches!(word.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
357                        let comp = Word::new(content.clone(), WordType::LinkData);
358                        inner_words.push(comp);
359                    }
360
361                    if content.starts_with(' ') {
362                        content.remove(0);
363                        let comp = Word::new(" ".to_owned(), word_type);
364                        inner_words.push(comp);
365                    }
366
367                    inner_words.push(Word::new(content, word_type));
368                }
369                words.push(inner_words);
370            }
371            Component::TextComponent(TextComponent::new_formatted_with_meta(
372                TextNode::Table(vec![], vec![]),
373                words,
374                meta_info,
375            ))
376        }
377
378        MdParseEnum::BlockSeparator => {
379            Component::TextComponent(TextComponent::new(TextNode::LineBreak, Vec::new()))
380        }
381        MdParseEnum::HorizontalSeparator => Component::TextComponent(TextComponent::new(
382            TextNode::HorizontalSeparator,
383            Vec::new(),
384        )),
385        MdParseEnum::Footnote => {
386            let mut words = Vec::new();
387            let foot_ref = parse_node.children().first().unwrap().to_owned();
388            words.push(Word::new(foot_ref.content, WordType::FootnoteData));
389            let _rest = parse_node
390                .children_owned()
391                .into_iter()
392                .skip(1)
393                .map(|e| e.content)
394                .collect::<String>();
395            words.push(Word::new(_rest, WordType::Footnote));
396            Component::TextComponent(TextComponent::new(TextNode::Footnote, words))
397        }
398        _ => todo!("Not implemented for {:?}", parse_node.kind()),
399    }
400}
401
402fn get_leaf_nodes(node: ParseNode) -> Vec<ParseNode> {
403    let mut leaf_nodes = Vec::new();
404
405    // Insert separator information between links
406    if node.kind() == MdParseEnum::Link {
407        let comp = if node.content().starts_with(' ') {
408            ParseNode::new(MdParseEnum::Word, " ".to_owned())
409        } else {
410            ParseNode::new(MdParseEnum::Word, String::new())
411        };
412        leaf_nodes.push(comp);
413    }
414
415    if matches!(
416        node.kind(),
417        MdParseEnum::CodeStr
418            | MdParseEnum::ItalicStr
419            | MdParseEnum::BoldStr
420            | MdParseEnum::BoldItalicStr
421            | MdParseEnum::StrikethroughStr
422    ) && node.content().starts_with(' ')
423    {
424        let comp = ParseNode::new(MdParseEnum::Word, " ".to_owned());
425        leaf_nodes.push(comp);
426    }
427
428    if node.children().is_empty() {
429        leaf_nodes.push(node);
430    } else {
431        for child in node.children_owned() {
432            leaf_nodes.append(&mut get_leaf_nodes(child));
433        }
434    }
435    leaf_nodes
436}
437
438pub fn print_from_root(root: &ComponentRoot) {
439    for child in root.components() {
440        print_component(child, 0);
441    }
442}
443
444fn print_component(component: &TextComponent, _depth: usize) {
445    println!(
446        "Component: {:?}, height: {}, y_offset: {}",
447        component.kind(),
448        component.height(),
449        component.y_offset()
450    );
451    component.meta_info().iter().for_each(|w| {
452        println!("Meta: {}, kind: {:?}", w.content(), w.kind());
453    });
454    component.content().iter().for_each(|w| {
455        w.iter().for_each(|w| {
456            println!("Content:{}, kind: {:?}", w.content(), w.kind());
457        });
458    });
459}
460
461#[derive(Debug, Clone)]
462pub struct ParseRoot {
463    file_name: Option<String>,
464    children: Vec<ParseNode>,
465}
466
467impl ParseRoot {
468    #[must_use]
469    pub fn new(file_name: Option<String>, children: Vec<ParseNode>) -> Self {
470        Self {
471            file_name,
472            children,
473        }
474    }
475
476    #[must_use]
477    pub fn children(&self) -> &Vec<ParseNode> {
478        &self.children
479    }
480
481    #[must_use]
482    pub fn children_owned(self) -> Vec<ParseNode> {
483        self.children
484    }
485
486    #[must_use]
487    pub fn file_name(&self) -> Option<String> {
488        self.file_name.clone()
489    }
490}
491
492#[derive(Debug, Clone, PartialEq, Eq)]
493pub struct ParseNode {
494    kind: MdParseEnum,
495    content: String,
496    children: Vec<ParseNode>,
497}
498
499impl ParseNode {
500    #[must_use]
501    pub fn new(kind: MdParseEnum, content: String) -> Self {
502        Self {
503            kind,
504            content,
505            children: Vec::new(),
506        }
507    }
508
509    #[must_use]
510    pub fn kind(&self) -> MdParseEnum {
511        self.kind
512    }
513
514    #[must_use]
515    pub fn content(&self) -> &str {
516        &self.content
517    }
518
519    pub fn add_children(&mut self, children: Vec<ParseNode>) {
520        self.children.extend(children);
521    }
522
523    #[must_use]
524    pub fn children(&self) -> &Vec<ParseNode> {
525        &self.children
526    }
527
528    #[must_use]
529    pub fn children_owned(self) -> Vec<ParseNode> {
530        self.children
531    }
532}
533
534#[derive(Debug, Clone, Copy, PartialEq, Eq)]
535pub enum MdParseEnum {
536    AltText,
537    BlockSeparator,
538    Bold,
539    BoldItalic,
540    BoldItalicStr,
541    BoldStr,
542    Caution,
543    Code,
544    CodeBlock,
545    CodeBlockStr,
546    CodeBlockStrSpaceIndented,
547    CodeStr,
548    Digit,
549    FootnoteRef,
550    Footnote,
551    Heading,
552    HorizontalSeparator,
553    Image,
554    Imortant,
555    Indent,
556    InlineLink,
557    Italic,
558    ItalicStr,
559    Link,
560    LinkData,
561    ListContainer,
562    Note,
563    OrderedList,
564    PLanguage,
565    Paragraph,
566    Quote,
567    Sentence,
568    Strikethrough,
569    StrikethroughStr,
570    Table,
571    TableCell,
572    TableSeparator,
573    Task,
574    TaskClosed,
575    TaskOpen,
576    Tip,
577    UnorderedList,
578    Warning,
579    WikiLink,
580    Word,
581}
582
583impl From<Rule> for MdParseEnum {
584    fn from(value: Rule) -> Self {
585        match value {
586            Rule::word | Rule::h_word | Rule::latex_word | Rule::t_word => Self::Word,
587            Rule::indent => Self::Indent,
588            Rule::italic_word_var_1 | Rule::italic_word_var_2 => Self::Italic,
589            Rule::italic_var_1 | Rule::italic_var_2 => Self::ItalicStr,
590            Rule::bold_word => Self::Bold,
591            Rule::bold => Self::BoldStr,
592            Rule::bold_italic_word => Self::BoldItalic,
593            Rule::bold_italic => Self::BoldItalicStr,
594            Rule::strikethrough_word => Self::Strikethrough,
595            Rule::strikethrough => Self::StrikethroughStr,
596            Rule::code_word => Self::Code,
597            Rule::code => Self::CodeStr,
598            Rule::programming_language => Self::PLanguage,
599            Rule::link_word | Rule::link_line | Rule::link | Rule::wiki_link_word => Self::Link,
600            Rule::wiki_link_alone => Self::WikiLink,
601            Rule::inline_link | Rule::inline_link_wrapper => Self::InlineLink,
602            Rule::o_list_counter | Rule::digit => Self::Digit,
603            Rule::task_open => Self::TaskOpen,
604            Rule::task_complete => Self::TaskClosed,
605            Rule::code_line => Self::CodeBlockStr,
606            Rule::indented_code_line | Rule::indented_code_newline => {
607                Self::CodeBlockStrSpaceIndented
608            }
609            Rule::sentence | Rule::t_sentence | Rule::footnote_sentence => Self::Sentence,
610            Rule::table_cell => Self::TableCell,
611            Rule::table_separator => Self::TableSeparator,
612            Rule::u_list => Self::UnorderedList,
613            Rule::o_list => Self::OrderedList,
614            Rule::h1 | Rule::h2 | Rule::h3 | Rule::h4 | Rule::h5 | Rule::h6 | Rule::heading => {
615                Self::Heading
616            }
617            Rule::list_container => Self::ListContainer,
618            Rule::paragraph => Self::Paragraph,
619            Rule::code_block | Rule::indented_code_block => Self::CodeBlock,
620            Rule::table => Self::Table,
621            Rule::quote => Self::Quote,
622            Rule::task => Self::Task,
623            Rule::block_sep => Self::BlockSeparator,
624            Rule::horizontal_sep => Self::HorizontalSeparator,
625            Rule::link_data | Rule::wiki_link_data => Self::LinkData,
626            Rule::warning => Self::Warning,
627            Rule::note => Self::Note,
628            Rule::tip => Self::Tip,
629            Rule::important => Self::Imortant,
630            Rule::caution => Self::Caution,
631            Rule::p_char
632            | Rule::t_char
633            | Rule::link_char
634            | Rule::wiki_link_char
635            | Rule::normal
636            | Rule::t_normal
637            | Rule::latex
638            | Rule::comment
639            | Rule::txt
640            | Rule::task_prefix
641            | Rule::quote_prefix
642            | Rule::code_block_prefix
643            | Rule::table_prefix
644            | Rule::list_prefix
645            | Rule::forbidden_sentence_prefix => Self::Paragraph,
646            Rule::image => Self::Image,
647            Rule::alt_word | Rule::alt_text => Self::AltText,
648            Rule::footnote_ref => Self::FootnoteRef,
649            Rule::footnote => Self::Footnote,
650            Rule::heading_prefix
651            | Rule::alt_char
652            | Rule::b_char
653            | Rule::c_char
654            | Rule::c_line_char
655            | Rule::comment_char
656            | Rule::i_char_var_1
657            | Rule::i_char_var_2
658            | Rule::latex_char
659            | Rule::quote_marking
660            | Rule::inline_link_char
661            | Rule::s_char
662            | Rule::WHITESPACE_S
663            | Rule::wiki_link
664            | Rule::footnote_ref_container => todo!(),
665        }
666    }
667}