md_tui/
parser.rs

1use image::ImageReader;
2use itertools::Itertools;
3use pest::{
4    Parser,
5    iterators::{Pair, Pairs},
6};
7use pest_derive::Parser;
8use ratatui::style::Color;
9
10use crate::nodes::{
11    image::ImageComponent,
12    root::{Component, ComponentRoot},
13    textcomponent::{TextComponent, TextNode},
14    word::{MetaData, Word, WordType},
15};
16
17#[derive(Parser)]
18#[grammar = "md.pest"]
19pub struct MdParser;
20
21pub fn parse_markdown(name: Option<&str>, content: &str, width: u16) -> ComponentRoot {
22    let root: Pairs<'_, Rule> = if let Ok(file) = MdParser::parse(Rule::txt, content) {
23        file
24    } else {
25        return ComponentRoot::new(name.map(str::to_string), Vec::new());
26    };
27
28    let root_pair = root.into_iter().next().unwrap();
29
30    let children = parse_text(root_pair)
31        .children_owned()
32        .into_iter()
33        .dedup()
34        .collect();
35
36    let parse_root = ParseRoot::new(name.map(str::to_string), children);
37
38    let mut root = node_to_component(parse_root).add_missing_components();
39
40    root.transform(width);
41    root
42}
43
44fn parse_text(pair: Pair<'_, Rule>) -> ParseNode {
45    let content = if pair.as_rule() == Rule::code_line {
46        pair.as_str().replace('\t', "    ").replace('\r', "")
47    } else {
48        pair.as_str().replace('\n', " ")
49    };
50    let mut component = ParseNode::new(pair.as_rule().into(), content);
51    let children = parse_node_children(pair.into_inner());
52    component.add_children(children);
53    component
54}
55
56fn parse_node_children(pair: Pairs<'_, Rule>) -> Vec<ParseNode> {
57    let mut children = Vec::new();
58    for inner_pair in pair {
59        children.push(parse_text(inner_pair));
60    }
61    children
62}
63
64fn node_to_component(root: ParseRoot) -> ComponentRoot {
65    let mut children = Vec::new();
66    let name = root.file_name().clone();
67    for component in root.children_owned() {
68        let comp = parse_component(component);
69        children.push(comp);
70    }
71
72    ComponentRoot::new(name, children)
73}
74
75fn is_url(url: &str) -> bool {
76    url.starts_with("http://") || url.starts_with("https://")
77}
78
79fn parse_component(parse_node: ParseNode) -> Component {
80    match parse_node.kind() {
81        MdParseEnum::Image => {
82            let leaf_nodes = get_leaf_nodes(parse_node);
83            let mut alt_text = String::new();
84            let mut image = None;
85            for node in leaf_nodes {
86                if node.kind() == MdParseEnum::AltText {
87                    node.content().clone_into(&mut alt_text);
88                } else if is_url(node.content()) {
89                    #[cfg(feature = "network")]
90                    {
91                        let mut buf = Vec::new();
92                        image = ureq::get(node.content()).call().ok().and_then(|b| {
93                            let noe = b.into_body().read_to_vec();
94                            noe.ok().and_then(|b| {
95                                buf = b;
96                                image::load_from_memory(&buf).ok()
97                            })
98                        });
99                    }
100                    #[cfg(not(feature = "network"))]
101                    {
102                        image = None;
103                    }
104                } else {
105                    image = ImageReader::open(node.content())
106                        .ok()
107                        .and_then(|r| r.decode().ok());
108                }
109            }
110
111            if let Some(img) = image.as_ref() {
112                let height = img.height();
113
114                let comp = ImageComponent::new(img.to_owned(), height, alt_text.clone());
115
116                if let Some(comp) = comp {
117                    Component::Image(comp)
118                } else {
119                    let word = [Word::new(format!("[{alt_text}]"), WordType::Normal)];
120
121                    let comp = TextComponent::new(TextNode::Paragraph, word.into());
122                    Component::TextComponent(comp)
123                }
124            } else {
125                let word = [
126                    Word::new("Image".to_string(), WordType::Normal),
127                    Word::new(" ".to_owned(), WordType::Normal),
128                    Word::new("not".to_owned(), WordType::Normal),
129                    Word::new(" ".to_owned(), WordType::Normal),
130                    Word::new("found".to_owned(), WordType::Normal),
131                    Word::new("/".to_owned(), WordType::Normal),
132                    Word::new("fetched".to_owned(), WordType::Normal),
133                    Word::new(" ".to_owned(), WordType::Normal),
134                    Word::new(format!("[{alt_text}]"), WordType::Normal),
135                ];
136
137                let comp = TextComponent::new(TextNode::Paragraph, word.into());
138                Component::TextComponent(comp)
139            }
140        }
141
142        MdParseEnum::Task => {
143            let leaf_nodes = get_leaf_nodes(parse_node);
144            let mut words = Vec::new();
145            for node in leaf_nodes {
146                let word_type = WordType::from(node.kind());
147
148                let mut content: String = node
149                    .content()
150                    .chars()
151                    .dedup_by(|x, y| *x == ' ' && *y == ' ')
152                    .collect();
153
154                if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
155                    let comp = Word::new(content.clone(), WordType::LinkData);
156                    words.push(comp);
157                }
158
159                if content.starts_with(' ') {
160                    content.remove(0);
161                    let comp = Word::new(" ".to_owned(), word_type);
162                    words.push(comp);
163                }
164                words.push(Word::new(content, word_type));
165            }
166            Component::TextComponent(TextComponent::new(TextNode::Task, words))
167        }
168
169        MdParseEnum::Quote => {
170            let leaf_nodes = get_leaf_nodes(parse_node);
171            let mut words = Vec::new();
172            for node in leaf_nodes {
173                let word_type = WordType::from(node.kind());
174                let mut content = node.content().to_owned();
175
176                if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
177                    let comp = Word::new(content.clone(), WordType::LinkData);
178                    words.push(comp);
179                }
180                if content.starts_with(' ') {
181                    content.remove(0);
182                    let comp = Word::new(" ".to_owned(), word_type);
183                    words.push(comp);
184                }
185                words.push(Word::new(content, word_type));
186            }
187            if let Some(w) = words.first_mut() {
188                w.set_content(w.content().trim_start().to_owned());
189            }
190            Component::TextComponent(TextComponent::new(TextNode::Quote, words))
191        }
192
193        MdParseEnum::Heading => {
194            let indent = parse_node
195                .content()
196                .chars()
197                .take_while(|c| *c == '#')
198                .count();
199            let leaf_nodes = get_leaf_nodes(parse_node);
200            let mut words = Vec::new();
201
202            words.push(Word::new(
203                String::new(),
204                WordType::MetaInfo(MetaData::HeadingLevel(indent as u8)),
205            ));
206
207            if indent > 1 {
208                words.push(Word::new(
209                    format!("{} ", "#".repeat(indent)),
210                    WordType::Normal,
211                ));
212            }
213
214            for node in leaf_nodes {
215                let word_type = WordType::from(node.kind());
216                let mut content = node.content().to_owned();
217
218                if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
219                    let comp = Word::new(content.clone(), WordType::LinkData);
220                    words.push(comp);
221                }
222
223                if content.starts_with(' ') {
224                    content.remove(0);
225                    let comp = Word::new(" ".to_owned(), word_type);
226                    words.push(comp);
227                }
228                words.push(Word::new(content, word_type));
229            }
230            if let Some(w) = words.first_mut() {
231                w.set_content(w.content().trim_start().to_owned());
232            }
233            Component::TextComponent(TextComponent::new(TextNode::Heading, words))
234        }
235
236        MdParseEnum::Paragraph => {
237            let leaf_nodes = get_leaf_nodes(parse_node);
238            let mut words = Vec::new();
239            for node in leaf_nodes {
240                let word_type = WordType::from(node.kind());
241                let mut content = node.content().to_owned();
242
243                if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
244                    let comp = Word::new(content.clone(), WordType::LinkData);
245                    words.push(comp);
246                }
247
248                if content.starts_with(' ') {
249                    content.remove(0);
250                    let comp = Word::new(" ".to_owned(), word_type);
251                    words.push(comp);
252                }
253                words.push(Word::new(content, word_type));
254            }
255            if let Some(w) = words.first_mut() {
256                w.set_content(w.content().trim_start().to_owned());
257            }
258            Component::TextComponent(TextComponent::new(TextNode::Paragraph, words))
259        }
260
261        MdParseEnum::CodeBlock => {
262            let leaf_nodes = get_leaf_nodes(parse_node);
263            let mut words = Vec::new();
264
265            let mut space_indented = false;
266
267            for node in leaf_nodes {
268                if node.kind() == MdParseEnum::CodeBlockStrSpaceIndented {
269                    space_indented = true;
270                }
271                let word_type = WordType::from(node.kind());
272                let content = node.content().to_owned();
273                words.push(vec![Word::new(content, word_type)]);
274            }
275
276            if space_indented {
277                words.push(vec![Word::new(
278                    " ".to_owned(),
279                    WordType::CodeBlock(Color::Reset),
280                )]);
281            }
282
283            Component::TextComponent(TextComponent::new_formatted(TextNode::CodeBlock, words))
284        }
285
286        MdParseEnum::ListContainer => {
287            let mut words = Vec::new();
288            for child in parse_node.children_owned() {
289                let kind = child.kind();
290                let leaf_nodes = get_leaf_nodes(child);
291                let mut inner_words = Vec::new();
292                for node in leaf_nodes {
293                    let word_type = WordType::from(node.kind());
294
295                    let mut content = match node.kind() {
296                        MdParseEnum::Indent => node.content().to_owned(),
297                        _ => node
298                            .content()
299                            .chars()
300                            .dedup_by(|x, y| *x == ' ' && *y == ' ')
301                            .collect(),
302                    };
303
304                    if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
305                        let comp = Word::new(content.clone(), WordType::LinkData);
306                        inner_words.push(comp);
307                    }
308                    if content.starts_with(' ') && node.kind() != MdParseEnum::Indent {
309                        content.remove(0);
310                        let comp = Word::new(" ".to_owned(), word_type);
311                        inner_words.push(comp);
312                    }
313
314                    inner_words.push(Word::new(content, word_type));
315                }
316                if kind == MdParseEnum::UnorderedList {
317                    inner_words.push(Word::new(
318                        "X".to_owned(),
319                        WordType::MetaInfo(MetaData::UList),
320                    ));
321                    let list_symbol = Word::new("• ".to_owned(), WordType::ListMarker);
322                    inner_words.insert(1, list_symbol);
323                } else if kind == MdParseEnum::OrderedList {
324                    inner_words.push(Word::new(
325                        "X".to_owned(),
326                        WordType::MetaInfo(MetaData::OList),
327                    ));
328                }
329                words.push(inner_words);
330            }
331            Component::TextComponent(TextComponent::new_formatted(TextNode::List, words))
332        }
333
334        MdParseEnum::Table => {
335            let mut words = Vec::new();
336            for cell in parse_node.children_owned() {
337                if cell.kind() == MdParseEnum::TableSeparator {
338                    words.push(vec![Word::new(
339                        cell.content().to_owned(),
340                        WordType::MetaInfo(MetaData::ColumnsCount),
341                    )]);
342                    continue;
343                }
344                let mut inner_words = Vec::new();
345
346                if cell.children().is_empty() {
347                    words.push(inner_words);
348                    continue;
349                }
350
351                for word in get_leaf_nodes(cell) {
352                    let word_type = WordType::from(word.kind());
353                    let mut content = word.content().to_owned();
354
355                    if matches!(word.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
356                        let comp = Word::new(content.clone(), WordType::LinkData);
357                        inner_words.push(comp);
358                    }
359
360                    if content.starts_with(' ') {
361                        content.remove(0);
362                        let comp = Word::new(" ".to_owned(), word_type);
363                        inner_words.push(comp);
364                    }
365
366                    inner_words.push(Word::new(content, word_type));
367                }
368                words.push(inner_words);
369            }
370            Component::TextComponent(TextComponent::new_formatted(
371                TextNode::Table(vec![], vec![]),
372                words,
373            ))
374        }
375
376        MdParseEnum::BlockSeparator => {
377            Component::TextComponent(TextComponent::new(TextNode::LineBreak, Vec::new()))
378        }
379        MdParseEnum::HorizontalSeparator => Component::TextComponent(TextComponent::new(
380            TextNode::HorizontalSeparator,
381            Vec::new(),
382        )),
383        MdParseEnum::Footnote => {
384            let mut words = Vec::new();
385            let foot_ref = parse_node.children().first().unwrap().to_owned();
386            words.push(Word::new(foot_ref.content, WordType::FootnoteData));
387            let _rest = parse_node
388                .children_owned()
389                .into_iter()
390                .skip(1)
391                .map(|e| e.content)
392                .collect::<String>();
393            words.push(Word::new(_rest, WordType::Footnote));
394            Component::TextComponent(TextComponent::new(TextNode::Footnote, words))
395        }
396        _ => todo!("Not implemented for {:?}", parse_node.kind()),
397    }
398}
399
400fn get_leaf_nodes(node: ParseNode) -> Vec<ParseNode> {
401    let mut leaf_nodes = Vec::new();
402
403    // Insert separator information between links
404    if node.kind() == MdParseEnum::Link {
405        let comp = if node.content().starts_with(' ') {
406            ParseNode::new(MdParseEnum::Word, " ".to_owned())
407        } else {
408            ParseNode::new(MdParseEnum::Word, String::new())
409        };
410        leaf_nodes.push(comp);
411    }
412
413    if matches!(
414        node.kind(),
415        MdParseEnum::CodeStr
416            | MdParseEnum::ItalicStr
417            | MdParseEnum::BoldStr
418            | MdParseEnum::BoldItalicStr
419            | MdParseEnum::StrikethroughStr
420    ) && node.content().starts_with(' ')
421    {
422        let comp = ParseNode::new(MdParseEnum::Word, " ".to_owned());
423        leaf_nodes.push(comp);
424    }
425
426    if node.children().is_empty() {
427        leaf_nodes.push(node);
428    } else {
429        for child in node.children_owned() {
430            leaf_nodes.append(&mut get_leaf_nodes(child));
431        }
432    }
433    leaf_nodes
434}
435
436pub fn print_from_root(root: &ComponentRoot) {
437    for child in root.components() {
438        print_component(child, 0);
439    }
440}
441
442fn print_component(component: &TextComponent, _depth: usize) {
443    println!(
444        "Component: {:?}, height: {}, y_offset: {}",
445        component.kind(),
446        component.height(),
447        component.y_offset()
448    );
449    component.meta_info().iter().for_each(|w| {
450        println!("Meta: {}, kind: {:?}", w.content(), w.kind());
451    });
452    component.content().iter().for_each(|w| {
453        w.iter().for_each(|w| {
454            println!("Content:{}, kind: {:?}", w.content(), w.kind());
455        });
456    });
457}
458
459#[derive(Debug, Clone)]
460pub struct ParseRoot {
461    file_name: Option<String>,
462    children: Vec<ParseNode>,
463}
464
465impl ParseRoot {
466    #[must_use]
467    pub fn new(file_name: Option<String>, children: Vec<ParseNode>) -> Self {
468        Self {
469            file_name,
470            children,
471        }
472    }
473
474    #[must_use]
475    pub fn children(&self) -> &Vec<ParseNode> {
476        &self.children
477    }
478
479    #[must_use]
480    pub fn children_owned(self) -> Vec<ParseNode> {
481        self.children
482    }
483
484    #[must_use]
485    pub fn file_name(&self) -> Option<String> {
486        self.file_name.clone()
487    }
488}
489
490#[derive(Debug, Clone, PartialEq, Eq)]
491pub struct ParseNode {
492    kind: MdParseEnum,
493    content: String,
494    children: Vec<ParseNode>,
495}
496
497impl ParseNode {
498    #[must_use]
499    pub fn new(kind: MdParseEnum, content: String) -> Self {
500        Self {
501            kind,
502            content,
503            children: Vec::new(),
504        }
505    }
506
507    #[must_use]
508    pub fn kind(&self) -> MdParseEnum {
509        self.kind
510    }
511
512    #[must_use]
513    pub fn content(&self) -> &str {
514        &self.content
515    }
516
517    pub fn add_children(&mut self, children: Vec<ParseNode>) {
518        self.children.extend(children);
519    }
520
521    #[must_use]
522    pub fn children(&self) -> &Vec<ParseNode> {
523        &self.children
524    }
525
526    #[must_use]
527    pub fn children_owned(self) -> Vec<ParseNode> {
528        self.children
529    }
530}
531
532#[derive(Debug, Clone, Copy, PartialEq, Eq)]
533pub enum MdParseEnum {
534    AltText,
535    BlockSeparator,
536    Bold,
537    BoldItalic,
538    BoldItalicStr,
539    BoldStr,
540    Caution,
541    Code,
542    CodeBlock,
543    CodeBlockStr,
544    CodeBlockStrSpaceIndented,
545    CodeStr,
546    Digit,
547    FootnoteRef,
548    Footnote,
549    Heading,
550    HorizontalSeparator,
551    Image,
552    Imortant,
553    Indent,
554    InlineLink,
555    Italic,
556    ItalicStr,
557    Link,
558    LinkData,
559    ListContainer,
560    Note,
561    OrderedList,
562    PLanguage,
563    Paragraph,
564    Quote,
565    Sentence,
566    Strikethrough,
567    StrikethroughStr,
568    Table,
569    TableCell,
570    TableSeparator,
571    Task,
572    TaskClosed,
573    TaskOpen,
574    Tip,
575    UnorderedList,
576    Warning,
577    WikiLink,
578    Word,
579}
580
581impl From<Rule> for MdParseEnum {
582    fn from(value: Rule) -> Self {
583        match value {
584            Rule::word | Rule::h_word | Rule::latex_word | Rule::t_word => Self::Word,
585            Rule::indent => Self::Indent,
586            Rule::italic_word => Self::Italic,
587            Rule::italic => Self::ItalicStr,
588            Rule::bold_word => Self::Bold,
589            Rule::bold => Self::BoldStr,
590            Rule::bold_italic_word => Self::BoldItalic,
591            Rule::bold_italic => Self::BoldItalicStr,
592            Rule::strikethrough_word => Self::Strikethrough,
593            Rule::strikethrough => Self::StrikethroughStr,
594            Rule::code_word => Self::Code,
595            Rule::code => Self::CodeStr,
596            Rule::programming_language => Self::PLanguage,
597            Rule::link_word | Rule::link_line | Rule::link | Rule::wiki_link_word => Self::Link,
598            Rule::wiki_link_alone => Self::WikiLink,
599            Rule::inline_link | Rule::inline_link_wrapper => Self::InlineLink,
600            Rule::o_list_counter | Rule::digit => Self::Digit,
601            Rule::task_open => Self::TaskOpen,
602            Rule::task_complete => Self::TaskClosed,
603            Rule::code_line => Self::CodeBlockStr,
604            Rule::indented_code_line | Rule::indented_code_newline => {
605                Self::CodeBlockStrSpaceIndented
606            }
607            Rule::sentence | Rule::t_sentence | Rule::footnote_sentence => Self::Sentence,
608            Rule::table_cell => Self::TableCell,
609            Rule::table_separator => Self::TableSeparator,
610            Rule::u_list => Self::UnorderedList,
611            Rule::o_list => Self::OrderedList,
612            Rule::h1 | Rule::h2 | Rule::h3 | Rule::h4 | Rule::h5 | Rule::h6 | Rule::heading => {
613                Self::Heading
614            }
615            Rule::list_container => Self::ListContainer,
616            Rule::paragraph => Self::Paragraph,
617            Rule::code_block | Rule::indented_code_block => Self::CodeBlock,
618            Rule::table => Self::Table,
619            Rule::quote => Self::Quote,
620            Rule::task => Self::Task,
621            Rule::block_sep => Self::BlockSeparator,
622            Rule::horizontal_sep => Self::HorizontalSeparator,
623            Rule::link_data | Rule::wiki_link_data => Self::LinkData,
624            Rule::warning => Self::Warning,
625            Rule::note => Self::Note,
626            Rule::tip => Self::Tip,
627            Rule::important => Self::Imortant,
628            Rule::caution => Self::Caution,
629            Rule::p_char
630            | Rule::t_char
631            | Rule::link_char
632            | Rule::wiki_link_char
633            | Rule::normal
634            | Rule::t_normal
635            | Rule::latex
636            | Rule::comment
637            | Rule::txt
638            | Rule::task_prefix
639            | Rule::quote_prefix
640            | Rule::code_block_prefix
641            | Rule::table_prefix
642            | Rule::list_prefix
643            | Rule::forbidden_sentence_prefix => Self::Paragraph,
644            Rule::image => Self::Image,
645            Rule::alt_word | Rule::alt_text => Self::AltText,
646            Rule::footnote_ref => Self::FootnoteRef,
647            Rule::footnote => Self::Footnote,
648            Rule::heading_prefix
649            | Rule::alt_char
650            | Rule::b_char
651            | Rule::c_char
652            | Rule::c_line_char
653            | Rule::comment_char
654            | Rule::i_char
655            | Rule::latex_char
656            | Rule::quote_marking
657            | Rule::inline_link_char
658            | Rule::s_char
659            | Rule::WHITESPACE_S
660            | Rule::wiki_link
661            | Rule::footnote_ref_container => todo!(),
662        }
663    }
664}