1use image::ImageReader;
2use itertools::Itertools;
3use pest::{
4 Parser,
5 iterators::{Pair, Pairs},
6};
7use pest_derive::Parser;
8use ratatui::style::Color;
9
10use crate::nodes::{
11 image::ImageComponent,
12 root::{Component, ComponentRoot},
13 textcomponent::{TextComponent, TextNode},
14 word::{MetaData, Word, WordType},
15};
16
17#[derive(Parser)]
18#[grammar = "md.pest"]
19pub struct MdParser;
20
21pub fn parse_markdown(name: Option<&str>, content: &str, width: u16) -> ComponentRoot {
22 let root: Pairs<'_, Rule> = if let Ok(file) = MdParser::parse(Rule::txt, content) {
23 file
24 } else {
25 return ComponentRoot::new(name.map(str::to_string), Vec::new());
26 };
27
28 let root_pair = root.into_iter().next().unwrap();
29
30 let children = parse_text(root_pair)
31 .children_owned()
32 .into_iter()
33 .dedup()
34 .collect();
35
36 let parse_root = ParseRoot::new(name.map(str::to_string), children);
37
38 let mut root = node_to_component(parse_root).add_missing_components();
39
40 root.transform(width);
41 root
42}
43
44fn parse_text(pair: Pair<'_, Rule>) -> ParseNode {
45 let content = if pair.as_rule() == Rule::code_line {
46 pair.as_str().replace('\t', " ").replace('\r', "")
47 } else {
48 pair.as_str().replace('\n', " ")
49 };
50 let mut component = ParseNode::new(pair.as_rule().into(), content);
51 let children = parse_node_children(pair.into_inner());
52 component.add_children(children);
53 component
54}
55
56fn parse_node_children(pair: Pairs<'_, Rule>) -> Vec<ParseNode> {
57 let mut children = Vec::new();
58 for inner_pair in pair {
59 children.push(parse_text(inner_pair));
60 }
61 children
62}
63
64fn node_to_component(root: ParseRoot) -> ComponentRoot {
65 let mut children = Vec::new();
66 let name = root.file_name().clone();
67 for component in root.children_owned() {
68 let comp = parse_component(component);
69 children.push(comp);
70 }
71
72 ComponentRoot::new(name, children)
73}
74
75fn is_url(url: &str) -> bool {
76 url.starts_with("http://") || url.starts_with("https://")
77}
78
79fn parse_component(parse_node: ParseNode) -> Component {
80 match parse_node.kind() {
81 MdParseEnum::Image => {
82 let leaf_nodes = get_leaf_nodes(parse_node);
83 let mut alt_text = String::new();
84 let mut image = None;
85 for node in leaf_nodes {
86 if node.kind() == MdParseEnum::AltText {
87 node.content().clone_into(&mut alt_text);
88 } else if is_url(node.content()) {
89 #[cfg(feature = "network")]
90 {
91 let mut buf = Vec::new();
92 image = ureq::get(node.content()).call().ok().and_then(|b| {
93 let noe = b.into_body().read_to_vec();
94 noe.ok().and_then(|b| {
95 buf = b;
96 image::load_from_memory(&buf).ok()
97 })
98 });
99 }
100 #[cfg(not(feature = "network"))]
101 {
102 image = None;
103 }
104 } else {
105 image = ImageReader::open(node.content())
106 .ok()
107 .and_then(|r| r.decode().ok());
108 }
109 }
110
111 if let Some(img) = image.as_ref() {
112 let height = img.height();
113
114 let comp = ImageComponent::new(img.to_owned(), height, alt_text.clone());
115
116 if let Some(comp) = comp {
117 Component::Image(comp)
118 } else {
119 let word = [Word::new(format!("[{alt_text}]"), WordType::Normal)];
120
121 let comp = TextComponent::new(TextNode::Paragraph, word.into());
122 Component::TextComponent(comp)
123 }
124 } else {
125 let word = [
126 Word::new("Image".to_string(), WordType::Normal),
127 Word::new(" ".to_owned(), WordType::Normal),
128 Word::new("not".to_owned(), WordType::Normal),
129 Word::new(" ".to_owned(), WordType::Normal),
130 Word::new("found".to_owned(), WordType::Normal),
131 Word::new("/".to_owned(), WordType::Normal),
132 Word::new("fetched".to_owned(), WordType::Normal),
133 Word::new(" ".to_owned(), WordType::Normal),
134 Word::new(format!("[{alt_text}]"), WordType::Normal),
135 ];
136
137 let comp = TextComponent::new(TextNode::Paragraph, word.into());
138 Component::TextComponent(comp)
139 }
140 }
141
142 MdParseEnum::Task => {
143 let leaf_nodes = get_leaf_nodes(parse_node);
144 let mut words = Vec::new();
145 for node in leaf_nodes {
146 let word_type = WordType::from(node.kind());
147
148 let mut content: String = node
149 .content()
150 .chars()
151 .dedup_by(|x, y| *x == ' ' && *y == ' ')
152 .collect();
153
154 if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
155 let comp = Word::new(content.clone(), WordType::LinkData);
156 words.push(comp);
157 }
158
159 if content.starts_with(' ') {
160 content.remove(0);
161 let comp = Word::new(" ".to_owned(), word_type);
162 words.push(comp);
163 }
164 words.push(Word::new(content, word_type));
165 }
166 Component::TextComponent(TextComponent::new(TextNode::Task, words))
167 }
168
169 MdParseEnum::Quote => {
170 let leaf_nodes = get_leaf_nodes(parse_node);
171 let mut words = Vec::new();
172 for node in leaf_nodes {
173 let word_type = WordType::from(node.kind());
174 let mut content = node.content().to_owned();
175
176 if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
177 let comp = Word::new(content.clone(), WordType::LinkData);
178 words.push(comp);
179 }
180 if content.starts_with(' ') {
181 content.remove(0);
182 let comp = Word::new(" ".to_owned(), word_type);
183 words.push(comp);
184 }
185 words.push(Word::new(content, word_type));
186 }
187 if let Some(w) = words.first_mut() {
188 w.set_content(w.content().trim_start().to_owned());
189 }
190 Component::TextComponent(TextComponent::new(TextNode::Quote, words))
191 }
192
193 MdParseEnum::Heading => {
194 let indent = parse_node
195 .content()
196 .chars()
197 .take_while(|c| *c == '#')
198 .count();
199 let leaf_nodes = get_leaf_nodes(parse_node);
200 let mut words = Vec::new();
201
202 words.push(Word::new(
203 String::new(),
204 WordType::MetaInfo(MetaData::HeadingLevel(indent as u8)),
205 ));
206
207 if indent > 1 {
208 words.push(Word::new(
209 format!("{} ", "#".repeat(indent)),
210 WordType::Normal,
211 ));
212 }
213
214 for node in leaf_nodes {
215 let word_type = WordType::from(node.kind());
216 let mut content = node.content().to_owned();
217
218 if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
219 let comp = Word::new(content.clone(), WordType::LinkData);
220 words.push(comp);
221 }
222
223 if content.starts_with(' ') {
224 content.remove(0);
225 let comp = Word::new(" ".to_owned(), word_type);
226 words.push(comp);
227 }
228 words.push(Word::new(content, word_type));
229 }
230 if let Some(w) = words.first_mut() {
231 w.set_content(w.content().trim_start().to_owned());
232 }
233 Component::TextComponent(TextComponent::new(TextNode::Heading, words))
234 }
235
236 MdParseEnum::Paragraph => {
237 let leaf_nodes = get_leaf_nodes(parse_node);
238 let mut words = Vec::new();
239 for node in leaf_nodes {
240 let word_type = WordType::from(node.kind());
241 let mut content = node.content().to_owned();
242
243 if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
244 let comp = Word::new(content.clone(), WordType::LinkData);
245 words.push(comp);
246 }
247
248 if content.starts_with(' ') {
249 content.remove(0);
250 let comp = Word::new(" ".to_owned(), word_type);
251 words.push(comp);
252 }
253 words.push(Word::new(content, word_type));
254 }
255 if let Some(w) = words.first_mut() {
256 w.set_content(w.content().trim_start().to_owned());
257 }
258 Component::TextComponent(TextComponent::new(TextNode::Paragraph, words))
259 }
260
261 MdParseEnum::CodeBlock => {
262 let leaf_nodes = get_leaf_nodes(parse_node);
263 let mut words = Vec::new();
264
265 let mut space_indented = false;
266
267 for node in leaf_nodes {
268 if node.kind() == MdParseEnum::CodeBlockStrSpaceIndented {
269 space_indented = true;
270 }
271 let word_type = WordType::from(node.kind());
272 let content = node.content().to_owned();
273 words.push(vec![Word::new(content, word_type)]);
274 }
275
276 if space_indented {
277 words.push(vec![Word::new(
278 " ".to_owned(),
279 WordType::CodeBlock(Color::Reset),
280 )]);
281 }
282
283 Component::TextComponent(TextComponent::new_formatted(TextNode::CodeBlock, words))
284 }
285
286 MdParseEnum::ListContainer => {
287 let mut words = Vec::new();
288 for child in parse_node.children_owned() {
289 let kind = child.kind();
290 let leaf_nodes = get_leaf_nodes(child);
291 let mut inner_words = Vec::new();
292 for node in leaf_nodes {
293 let word_type = WordType::from(node.kind());
294
295 let mut content = match node.kind() {
296 MdParseEnum::Indent => node.content().to_owned(),
297 _ => node
298 .content()
299 .chars()
300 .dedup_by(|x, y| *x == ' ' && *y == ' ')
301 .collect(),
302 };
303
304 if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
305 let comp = Word::new(content.clone(), WordType::LinkData);
306 inner_words.push(comp);
307 }
308 if content.starts_with(' ') && node.kind() != MdParseEnum::Indent {
309 content.remove(0);
310 let comp = Word::new(" ".to_owned(), word_type);
311 inner_words.push(comp);
312 }
313
314 inner_words.push(Word::new(content, word_type));
315 }
316 if kind == MdParseEnum::UnorderedList {
317 inner_words.push(Word::new(
318 "X".to_owned(),
319 WordType::MetaInfo(MetaData::UList),
320 ));
321 let list_symbol = Word::new("• ".to_owned(), WordType::ListMarker);
322 inner_words.insert(1, list_symbol);
323 } else if kind == MdParseEnum::OrderedList {
324 inner_words.push(Word::new(
325 "X".to_owned(),
326 WordType::MetaInfo(MetaData::OList),
327 ));
328 }
329 words.push(inner_words);
330 }
331 Component::TextComponent(TextComponent::new_formatted(TextNode::List, words))
332 }
333
334 MdParseEnum::Table => {
335 let mut words = Vec::new();
336 let mut meta_info = Vec::new();
337 for cell in parse_node.children_owned() {
338 if cell.kind() == MdParseEnum::TableSeparator {
339 meta_info.push(Word::new(
340 cell.content().to_owned(),
341 WordType::MetaInfo(MetaData::ColumnsCount),
342 ));
343 continue;
344 }
345 let mut inner_words = Vec::new();
346
347 if cell.children().is_empty() {
348 words.push(inner_words);
349 continue;
350 }
351
352 for word in get_leaf_nodes(cell) {
353 let word_type = WordType::from(word.kind());
354 let mut content = word.content().to_owned();
355
356 if matches!(word.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
357 let comp = Word::new(content.clone(), WordType::LinkData);
358 inner_words.push(comp);
359 }
360
361 if content.starts_with(' ') {
362 content.remove(0);
363 let comp = Word::new(" ".to_owned(), word_type);
364 inner_words.push(comp);
365 }
366
367 inner_words.push(Word::new(content, word_type));
368 }
369 words.push(inner_words);
370 }
371 Component::TextComponent(TextComponent::new_formatted_with_meta(
372 TextNode::Table(vec![], vec![]),
373 words,
374 meta_info,
375 ))
376 }
377
378 MdParseEnum::BlockSeparator => {
379 Component::TextComponent(TextComponent::new(TextNode::LineBreak, Vec::new()))
380 }
381 MdParseEnum::HorizontalSeparator => Component::TextComponent(TextComponent::new(
382 TextNode::HorizontalSeparator,
383 Vec::new(),
384 )),
385 MdParseEnum::Footnote => {
386 let mut words = Vec::new();
387 let foot_ref = parse_node.children().first().unwrap().to_owned();
388 words.push(Word::new(foot_ref.content, WordType::FootnoteData));
389 let _rest = parse_node
390 .children_owned()
391 .into_iter()
392 .skip(1)
393 .map(|e| e.content)
394 .collect::<String>();
395 words.push(Word::new(_rest, WordType::Footnote));
396 Component::TextComponent(TextComponent::new(TextNode::Footnote, words))
397 }
398 _ => todo!("Not implemented for {:?}", parse_node.kind()),
399 }
400}
401
402fn get_leaf_nodes(node: ParseNode) -> Vec<ParseNode> {
403 let mut leaf_nodes = Vec::new();
404
405 if node.kind() == MdParseEnum::Link {
407 let comp = if node.content().starts_with(' ') {
408 ParseNode::new(MdParseEnum::Word, " ".to_owned())
409 } else {
410 ParseNode::new(MdParseEnum::Word, String::new())
411 };
412 leaf_nodes.push(comp);
413 }
414
415 if matches!(
416 node.kind(),
417 MdParseEnum::CodeStr
418 | MdParseEnum::ItalicStr
419 | MdParseEnum::BoldStr
420 | MdParseEnum::BoldItalicStr
421 | MdParseEnum::StrikethroughStr
422 ) && node.content().starts_with(' ')
423 {
424 let comp = ParseNode::new(MdParseEnum::Word, " ".to_owned());
425 leaf_nodes.push(comp);
426 }
427
428 if node.children().is_empty() {
429 leaf_nodes.push(node);
430 } else {
431 for child in node.children_owned() {
432 leaf_nodes.append(&mut get_leaf_nodes(child));
433 }
434 }
435 leaf_nodes
436}
437
438pub fn print_from_root(root: &ComponentRoot) {
439 for child in root.components() {
440 print_component(child, 0);
441 }
442}
443
444fn print_component(component: &TextComponent, _depth: usize) {
445 println!(
446 "Component: {:?}, height: {}, y_offset: {}",
447 component.kind(),
448 component.height(),
449 component.y_offset()
450 );
451 component.meta_info().iter().for_each(|w| {
452 println!("Meta: {}, kind: {:?}", w.content(), w.kind());
453 });
454 component.content().iter().for_each(|w| {
455 w.iter().for_each(|w| {
456 println!("Content:{}, kind: {:?}", w.content(), w.kind());
457 });
458 });
459}
460
461#[derive(Debug, Clone)]
462pub struct ParseRoot {
463 file_name: Option<String>,
464 children: Vec<ParseNode>,
465}
466
467impl ParseRoot {
468 #[must_use]
469 pub fn new(file_name: Option<String>, children: Vec<ParseNode>) -> Self {
470 Self {
471 file_name,
472 children,
473 }
474 }
475
476 #[must_use]
477 pub fn children(&self) -> &Vec<ParseNode> {
478 &self.children
479 }
480
481 #[must_use]
482 pub fn children_owned(self) -> Vec<ParseNode> {
483 self.children
484 }
485
486 #[must_use]
487 pub fn file_name(&self) -> Option<String> {
488 self.file_name.clone()
489 }
490}
491
492#[derive(Debug, Clone, PartialEq, Eq)]
493pub struct ParseNode {
494 kind: MdParseEnum,
495 content: String,
496 children: Vec<ParseNode>,
497}
498
499impl ParseNode {
500 #[must_use]
501 pub fn new(kind: MdParseEnum, content: String) -> Self {
502 Self {
503 kind,
504 content,
505 children: Vec::new(),
506 }
507 }
508
509 #[must_use]
510 pub fn kind(&self) -> MdParseEnum {
511 self.kind
512 }
513
514 #[must_use]
515 pub fn content(&self) -> &str {
516 &self.content
517 }
518
519 pub fn add_children(&mut self, children: Vec<ParseNode>) {
520 self.children.extend(children);
521 }
522
523 #[must_use]
524 pub fn children(&self) -> &Vec<ParseNode> {
525 &self.children
526 }
527
528 #[must_use]
529 pub fn children_owned(self) -> Vec<ParseNode> {
530 self.children
531 }
532}
533
534#[derive(Debug, Clone, Copy, PartialEq, Eq)]
535pub enum MdParseEnum {
536 AltText,
537 BlockSeparator,
538 Bold,
539 BoldItalic,
540 BoldItalicStr,
541 BoldStr,
542 Caution,
543 Code,
544 CodeBlock,
545 CodeBlockStr,
546 CodeBlockStrSpaceIndented,
547 CodeStr,
548 Digit,
549 FootnoteRef,
550 Footnote,
551 Heading,
552 HorizontalSeparator,
553 Image,
554 Imortant,
555 Indent,
556 InlineLink,
557 Italic,
558 ItalicStr,
559 Link,
560 LinkData,
561 ListContainer,
562 Note,
563 OrderedList,
564 PLanguage,
565 Paragraph,
566 Quote,
567 Sentence,
568 Strikethrough,
569 StrikethroughStr,
570 Table,
571 TableCell,
572 TableSeparator,
573 Task,
574 TaskClosed,
575 TaskOpen,
576 Tip,
577 UnorderedList,
578 Warning,
579 WikiLink,
580 Word,
581}
582
583impl From<Rule> for MdParseEnum {
584 fn from(value: Rule) -> Self {
585 match value {
586 Rule::word | Rule::h_word | Rule::latex_word | Rule::t_word => Self::Word,
587 Rule::indent => Self::Indent,
588 Rule::italic_word_var_1 | Rule::italic_word_var_2 => Self::Italic,
589 Rule::italic_var_1 | Rule::italic_var_2 => Self::ItalicStr,
590 Rule::bold_word => Self::Bold,
591 Rule::bold => Self::BoldStr,
592 Rule::bold_italic_word => Self::BoldItalic,
593 Rule::bold_italic => Self::BoldItalicStr,
594 Rule::strikethrough_word => Self::Strikethrough,
595 Rule::strikethrough => Self::StrikethroughStr,
596 Rule::code_word => Self::Code,
597 Rule::code => Self::CodeStr,
598 Rule::programming_language => Self::PLanguage,
599 Rule::link_word | Rule::link_line | Rule::link | Rule::wiki_link_word => Self::Link,
600 Rule::wiki_link_alone => Self::WikiLink,
601 Rule::inline_link | Rule::inline_link_wrapper => Self::InlineLink,
602 Rule::o_list_counter | Rule::digit => Self::Digit,
603 Rule::task_open => Self::TaskOpen,
604 Rule::task_complete => Self::TaskClosed,
605 Rule::code_line => Self::CodeBlockStr,
606 Rule::indented_code_line | Rule::indented_code_newline => {
607 Self::CodeBlockStrSpaceIndented
608 }
609 Rule::sentence | Rule::t_sentence | Rule::footnote_sentence => Self::Sentence,
610 Rule::table_cell => Self::TableCell,
611 Rule::table_separator => Self::TableSeparator,
612 Rule::u_list => Self::UnorderedList,
613 Rule::o_list => Self::OrderedList,
614 Rule::h1 | Rule::h2 | Rule::h3 | Rule::h4 | Rule::h5 | Rule::h6 | Rule::heading => {
615 Self::Heading
616 }
617 Rule::list_container => Self::ListContainer,
618 Rule::paragraph => Self::Paragraph,
619 Rule::code_block | Rule::indented_code_block => Self::CodeBlock,
620 Rule::table => Self::Table,
621 Rule::quote => Self::Quote,
622 Rule::task => Self::Task,
623 Rule::block_sep => Self::BlockSeparator,
624 Rule::horizontal_sep => Self::HorizontalSeparator,
625 Rule::link_data | Rule::wiki_link_data => Self::LinkData,
626 Rule::warning => Self::Warning,
627 Rule::note => Self::Note,
628 Rule::tip => Self::Tip,
629 Rule::important => Self::Imortant,
630 Rule::caution => Self::Caution,
631 Rule::p_char
632 | Rule::t_char
633 | Rule::link_char
634 | Rule::wiki_link_char
635 | Rule::normal
636 | Rule::t_normal
637 | Rule::latex
638 | Rule::comment
639 | Rule::txt
640 | Rule::task_prefix
641 | Rule::quote_prefix
642 | Rule::code_block_prefix
643 | Rule::table_prefix
644 | Rule::list_prefix
645 | Rule::forbidden_sentence_prefix => Self::Paragraph,
646 Rule::image => Self::Image,
647 Rule::alt_word | Rule::alt_text => Self::AltText,
648 Rule::footnote_ref => Self::FootnoteRef,
649 Rule::footnote => Self::Footnote,
650 Rule::heading_prefix
651 | Rule::alt_char
652 | Rule::b_char
653 | Rule::c_char
654 | Rule::c_line_char
655 | Rule::comment_char
656 | Rule::i_char_var_1
657 | Rule::i_char_var_2
658 | Rule::latex_char
659 | Rule::quote_marking
660 | Rule::inline_link_char
661 | Rule::s_char
662 | Rule::WHITESPACE_S
663 | Rule::wiki_link
664 | Rule::footnote_ref_container => todo!(),
665 }
666 }
667}