1use image::ImageReader;
2use itertools::Itertools;
3use pest::{
4 Parser,
5 iterators::{Pair, Pairs},
6};
7use pest_derive::Parser;
8use ratatui::style::Color;
9
10use crate::nodes::{
11 image::ImageComponent,
12 root::{Component, ComponentRoot},
13 textcomponent::{TextComponent, TextNode},
14 word::{MetaData, Word, WordType},
15};
16
17#[derive(Parser)]
18#[grammar = "md.pest"]
19pub struct MdParser;
20
21pub fn parse_markdown(name: Option<&str>, content: &str, width: u16) -> ComponentRoot {
22 let root: Pairs<'_, Rule> = if let Ok(file) = MdParser::parse(Rule::txt, content) {
23 file
24 } else {
25 return ComponentRoot::new(name.map(str::to_string), Vec::new());
26 };
27
28 let root_pair = root.into_iter().next().unwrap();
29
30 let children = parse_text(root_pair)
31 .children_owned()
32 .into_iter()
33 .dedup()
34 .collect();
35
36 let parse_root = ParseRoot::new(name.map(str::to_string), children);
37
38 let mut root = node_to_component(parse_root).add_missing_components();
39
40 root.transform(width);
41 root
42}
43
44fn parse_text(pair: Pair<'_, Rule>) -> ParseNode {
45 let content = if pair.as_rule() == Rule::code_line {
46 pair.as_str().replace('\t', " ").replace('\r', "")
47 } else {
48 pair.as_str().replace('\n', " ")
49 };
50 let mut component = ParseNode::new(pair.as_rule().into(), content);
51 let children = parse_node_children(pair.into_inner());
52 component.add_children(children);
53 component
54}
55
56fn parse_node_children(pair: Pairs<'_, Rule>) -> Vec<ParseNode> {
57 let mut children = Vec::new();
58 for inner_pair in pair {
59 children.push(parse_text(inner_pair));
60 }
61 children
62}
63
64fn node_to_component(root: ParseRoot) -> ComponentRoot {
65 let mut children = Vec::new();
66 let name = root.file_name().clone();
67 for component in root.children_owned() {
68 let comp = parse_component(component);
69 children.push(comp);
70 }
71
72 ComponentRoot::new(name, children)
73}
74
75fn is_url(url: &str) -> bool {
76 url.starts_with("http://") || url.starts_with("https://")
77}
78
79fn parse_component(parse_node: ParseNode) -> Component {
80 match parse_node.kind() {
81 MdParseEnum::Image => {
82 let leaf_nodes = get_leaf_nodes(parse_node);
83 let mut alt_text = String::new();
84 let mut image = None;
85 for node in leaf_nodes {
86 if node.kind() == MdParseEnum::AltText {
87 node.content().clone_into(&mut alt_text);
88 } else if is_url(node.content()) {
89 #[cfg(feature = "network")]
90 {
91 let mut buf = Vec::new();
92 image = ureq::get(node.content()).call().ok().and_then(|b| {
93 let noe = b.into_body().read_to_vec();
94 noe.ok().and_then(|b| {
95 buf = b;
96 image::load_from_memory(&buf).ok()
97 })
98 });
99 }
100 #[cfg(not(feature = "network"))]
101 {
102 image = None;
103 }
104 } else {
105 image = ImageReader::open(node.content())
106 .ok()
107 .and_then(|r| r.decode().ok());
108 }
109 }
110
111 if let Some(img) = image.as_ref() {
112 let height = img.height();
113
114 let comp = ImageComponent::new(img.to_owned(), height, alt_text.clone());
115
116 if let Some(comp) = comp {
117 Component::Image(comp)
118 } else {
119 let word = [Word::new(format!("[{alt_text}]"), WordType::Normal)];
120
121 let comp = TextComponent::new(TextNode::Paragraph, word.into());
122 Component::TextComponent(comp)
123 }
124 } else {
125 let word = [
126 Word::new("Image".to_string(), WordType::Normal),
127 Word::new(" ".to_owned(), WordType::Normal),
128 Word::new("not".to_owned(), WordType::Normal),
129 Word::new(" ".to_owned(), WordType::Normal),
130 Word::new("found".to_owned(), WordType::Normal),
131 Word::new("/".to_owned(), WordType::Normal),
132 Word::new("fetched".to_owned(), WordType::Normal),
133 Word::new(" ".to_owned(), WordType::Normal),
134 Word::new(format!("[{alt_text}]"), WordType::Normal),
135 ];
136
137 let comp = TextComponent::new(TextNode::Paragraph, word.into());
138 Component::TextComponent(comp)
139 }
140 }
141
142 MdParseEnum::Task => {
143 let leaf_nodes = get_leaf_nodes(parse_node);
144 let mut words = Vec::new();
145 for node in leaf_nodes {
146 let word_type = WordType::from(node.kind());
147
148 let mut content: String = node
149 .content()
150 .chars()
151 .dedup_by(|x, y| *x == ' ' && *y == ' ')
152 .collect();
153
154 if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
155 let comp = Word::new(content.clone(), WordType::LinkData);
156 words.push(comp);
157 }
158
159 if content.starts_with(' ') {
160 content.remove(0);
161 let comp = Word::new(" ".to_owned(), word_type);
162 words.push(comp);
163 }
164 words.push(Word::new(content, word_type));
165 }
166 Component::TextComponent(TextComponent::new(TextNode::Task, words))
167 }
168
169 MdParseEnum::Quote => {
170 let leaf_nodes = get_leaf_nodes(parse_node);
171 let mut words = Vec::new();
172 for node in leaf_nodes {
173 let word_type = WordType::from(node.kind());
174 let mut content = node.content().to_owned();
175
176 if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
177 let comp = Word::new(content.clone(), WordType::LinkData);
178 words.push(comp);
179 }
180 if content.starts_with(' ') {
181 content.remove(0);
182 let comp = Word::new(" ".to_owned(), word_type);
183 words.push(comp);
184 }
185 words.push(Word::new(content, word_type));
186 }
187 if let Some(w) = words.first_mut() {
188 w.set_content(w.content().trim_start().to_owned());
189 }
190 Component::TextComponent(TextComponent::new(TextNode::Quote, words))
191 }
192
193 MdParseEnum::Heading => {
194 let indent = parse_node
195 .content()
196 .chars()
197 .take_while(|c| *c == '#')
198 .count();
199 let leaf_nodes = get_leaf_nodes(parse_node);
200 let mut words = Vec::new();
201
202 words.push(Word::new(
203 String::new(),
204 WordType::MetaInfo(MetaData::HeadingLevel(indent as u8)),
205 ));
206
207 if indent > 1 {
208 words.push(Word::new(
209 format!("{} ", "#".repeat(indent)),
210 WordType::Normal,
211 ));
212 }
213
214 for node in leaf_nodes {
215 let word_type = WordType::from(node.kind());
216 let mut content = node.content().to_owned();
217
218 if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
219 let comp = Word::new(content.clone(), WordType::LinkData);
220 words.push(comp);
221 }
222
223 if content.starts_with(' ') {
224 content.remove(0);
225 let comp = Word::new(" ".to_owned(), word_type);
226 words.push(comp);
227 }
228 words.push(Word::new(content, word_type));
229 }
230 if let Some(w) = words.first_mut() {
231 w.set_content(w.content().trim_start().to_owned());
232 }
233 Component::TextComponent(TextComponent::new(TextNode::Heading, words))
234 }
235
236 MdParseEnum::Paragraph => {
237 let leaf_nodes = get_leaf_nodes(parse_node);
238 let mut words = Vec::new();
239 for node in leaf_nodes {
240 let word_type = WordType::from(node.kind());
241 let mut content = node.content().to_owned();
242
243 if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
244 let comp = Word::new(content.clone(), WordType::LinkData);
245 words.push(comp);
246 }
247
248 if content.starts_with(' ') {
249 content.remove(0);
250 let comp = Word::new(" ".to_owned(), word_type);
251 words.push(comp);
252 }
253 words.push(Word::new(content, word_type));
254 }
255 if let Some(w) = words.first_mut() {
256 w.set_content(w.content().trim_start().to_owned());
257 }
258 Component::TextComponent(TextComponent::new(TextNode::Paragraph, words))
259 }
260
261 MdParseEnum::CodeBlock => {
262 let leaf_nodes = get_leaf_nodes(parse_node);
263 let mut words = Vec::new();
264
265 let mut space_indented = false;
266
267 for node in leaf_nodes {
268 if node.kind() == MdParseEnum::CodeBlockStrSpaceIndented {
269 space_indented = true;
270 }
271 let word_type = WordType::from(node.kind());
272 let content = node.content().to_owned();
273 words.push(vec![Word::new(content, word_type)]);
274 }
275
276 if space_indented {
277 words.push(vec![Word::new(
278 " ".to_owned(),
279 WordType::CodeBlock(Color::Reset),
280 )]);
281 }
282
283 Component::TextComponent(TextComponent::new_formatted(TextNode::CodeBlock, words))
284 }
285
286 MdParseEnum::ListContainer => {
287 let mut words = Vec::new();
288 for child in parse_node.children_owned() {
289 let kind = child.kind();
290 let leaf_nodes = get_leaf_nodes(child);
291 let mut inner_words = Vec::new();
292 for node in leaf_nodes {
293 let word_type = WordType::from(node.kind());
294
295 let mut content = match node.kind() {
296 MdParseEnum::Indent => node.content().to_owned(),
297 _ => node
298 .content()
299 .chars()
300 .dedup_by(|x, y| *x == ' ' && *y == ' ')
301 .collect(),
302 };
303
304 if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
305 let comp = Word::new(content.clone(), WordType::LinkData);
306 inner_words.push(comp);
307 }
308 if content.starts_with(' ') && node.kind() != MdParseEnum::Indent {
309 content.remove(0);
310 let comp = Word::new(" ".to_owned(), word_type);
311 inner_words.push(comp);
312 }
313
314 inner_words.push(Word::new(content, word_type));
315 }
316 if kind == MdParseEnum::UnorderedList {
317 inner_words.push(Word::new(
318 "X".to_owned(),
319 WordType::MetaInfo(MetaData::UList),
320 ));
321 let list_symbol = Word::new("• ".to_owned(), WordType::ListMarker);
322 inner_words.insert(1, list_symbol);
323 } else if kind == MdParseEnum::OrderedList {
324 inner_words.push(Word::new(
325 "X".to_owned(),
326 WordType::MetaInfo(MetaData::OList),
327 ));
328 }
329 words.push(inner_words);
330 }
331 Component::TextComponent(TextComponent::new_formatted(TextNode::List, words))
332 }
333
334 MdParseEnum::Table => {
335 let mut words = Vec::new();
336 for cell in parse_node.children_owned() {
337 if cell.kind() == MdParseEnum::TableSeparator {
338 words.push(vec![Word::new(
339 cell.content().to_owned(),
340 WordType::MetaInfo(MetaData::ColumnsCount),
341 )]);
342 continue;
343 }
344 let mut inner_words = Vec::new();
345
346 if cell.children().is_empty() {
347 words.push(inner_words);
348 continue;
349 }
350
351 for word in get_leaf_nodes(cell) {
352 let word_type = WordType::from(word.kind());
353 let mut content = word.content().to_owned();
354
355 if matches!(word.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
356 let comp = Word::new(content.clone(), WordType::LinkData);
357 inner_words.push(comp);
358 }
359
360 if content.starts_with(' ') {
361 content.remove(0);
362 let comp = Word::new(" ".to_owned(), word_type);
363 inner_words.push(comp);
364 }
365
366 inner_words.push(Word::new(content, word_type));
367 }
368 words.push(inner_words);
369 }
370 Component::TextComponent(TextComponent::new_formatted(
371 TextNode::Table(vec![], vec![]),
372 words,
373 ))
374 }
375
376 MdParseEnum::BlockSeparator => {
377 Component::TextComponent(TextComponent::new(TextNode::LineBreak, Vec::new()))
378 }
379 MdParseEnum::HorizontalSeparator => Component::TextComponent(TextComponent::new(
380 TextNode::HorizontalSeparator,
381 Vec::new(),
382 )),
383 MdParseEnum::Footnote => {
384 let mut words = Vec::new();
385 let foot_ref = parse_node.children().first().unwrap().to_owned();
386 words.push(Word::new(foot_ref.content, WordType::FootnoteData));
387 let _rest = parse_node
388 .children_owned()
389 .into_iter()
390 .skip(1)
391 .map(|e| e.content)
392 .collect::<String>();
393 words.push(Word::new(_rest, WordType::Footnote));
394 Component::TextComponent(TextComponent::new(TextNode::Footnote, words))
395 }
396 _ => todo!("Not implemented for {:?}", parse_node.kind()),
397 }
398}
399
400fn get_leaf_nodes(node: ParseNode) -> Vec<ParseNode> {
401 let mut leaf_nodes = Vec::new();
402
403 if node.kind() == MdParseEnum::Link {
405 let comp = if node.content().starts_with(' ') {
406 ParseNode::new(MdParseEnum::Word, " ".to_owned())
407 } else {
408 ParseNode::new(MdParseEnum::Word, String::new())
409 };
410 leaf_nodes.push(comp);
411 }
412
413 if matches!(
414 node.kind(),
415 MdParseEnum::CodeStr
416 | MdParseEnum::ItalicStr
417 | MdParseEnum::BoldStr
418 | MdParseEnum::BoldItalicStr
419 | MdParseEnum::StrikethroughStr
420 ) && node.content().starts_with(' ')
421 {
422 let comp = ParseNode::new(MdParseEnum::Word, " ".to_owned());
423 leaf_nodes.push(comp);
424 }
425
426 if node.children().is_empty() {
427 leaf_nodes.push(node);
428 } else {
429 for child in node.children_owned() {
430 leaf_nodes.append(&mut get_leaf_nodes(child));
431 }
432 }
433 leaf_nodes
434}
435
436pub fn print_from_root(root: &ComponentRoot) {
437 for child in root.components() {
438 print_component(child, 0);
439 }
440}
441
442fn print_component(component: &TextComponent, _depth: usize) {
443 println!(
444 "Component: {:?}, height: {}, y_offset: {}",
445 component.kind(),
446 component.height(),
447 component.y_offset()
448 );
449 component.meta_info().iter().for_each(|w| {
450 println!("Meta: {}, kind: {:?}", w.content(), w.kind());
451 });
452 component.content().iter().for_each(|w| {
453 w.iter().for_each(|w| {
454 println!("Content:{}, kind: {:?}", w.content(), w.kind());
455 });
456 });
457}
458
459#[derive(Debug, Clone)]
460pub struct ParseRoot {
461 file_name: Option<String>,
462 children: Vec<ParseNode>,
463}
464
465impl ParseRoot {
466 #[must_use]
467 pub fn new(file_name: Option<String>, children: Vec<ParseNode>) -> Self {
468 Self {
469 file_name,
470 children,
471 }
472 }
473
474 #[must_use]
475 pub fn children(&self) -> &Vec<ParseNode> {
476 &self.children
477 }
478
479 #[must_use]
480 pub fn children_owned(self) -> Vec<ParseNode> {
481 self.children
482 }
483
484 #[must_use]
485 pub fn file_name(&self) -> Option<String> {
486 self.file_name.clone()
487 }
488}
489
490#[derive(Debug, Clone, PartialEq, Eq)]
491pub struct ParseNode {
492 kind: MdParseEnum,
493 content: String,
494 children: Vec<ParseNode>,
495}
496
497impl ParseNode {
498 #[must_use]
499 pub fn new(kind: MdParseEnum, content: String) -> Self {
500 Self {
501 kind,
502 content,
503 children: Vec::new(),
504 }
505 }
506
507 #[must_use]
508 pub fn kind(&self) -> MdParseEnum {
509 self.kind
510 }
511
512 #[must_use]
513 pub fn content(&self) -> &str {
514 &self.content
515 }
516
517 pub fn add_children(&mut self, children: Vec<ParseNode>) {
518 self.children.extend(children);
519 }
520
521 #[must_use]
522 pub fn children(&self) -> &Vec<ParseNode> {
523 &self.children
524 }
525
526 #[must_use]
527 pub fn children_owned(self) -> Vec<ParseNode> {
528 self.children
529 }
530}
531
532#[derive(Debug, Clone, Copy, PartialEq, Eq)]
533pub enum MdParseEnum {
534 AltText,
535 BlockSeparator,
536 Bold,
537 BoldItalic,
538 BoldItalicStr,
539 BoldStr,
540 Caution,
541 Code,
542 CodeBlock,
543 CodeBlockStr,
544 CodeBlockStrSpaceIndented,
545 CodeStr,
546 Digit,
547 FootnoteRef,
548 Footnote,
549 Heading,
550 HorizontalSeparator,
551 Image,
552 Imortant,
553 Indent,
554 InlineLink,
555 Italic,
556 ItalicStr,
557 Link,
558 LinkData,
559 ListContainer,
560 Note,
561 OrderedList,
562 PLanguage,
563 Paragraph,
564 Quote,
565 Sentence,
566 Strikethrough,
567 StrikethroughStr,
568 Table,
569 TableCell,
570 TableSeparator,
571 Task,
572 TaskClosed,
573 TaskOpen,
574 Tip,
575 UnorderedList,
576 Warning,
577 WikiLink,
578 Word,
579}
580
581impl From<Rule> for MdParseEnum {
582 fn from(value: Rule) -> Self {
583 match value {
584 Rule::word | Rule::h_word | Rule::latex_word | Rule::t_word => Self::Word,
585 Rule::indent => Self::Indent,
586 Rule::italic_word => Self::Italic,
587 Rule::italic => Self::ItalicStr,
588 Rule::bold_word => Self::Bold,
589 Rule::bold => Self::BoldStr,
590 Rule::bold_italic_word => Self::BoldItalic,
591 Rule::bold_italic => Self::BoldItalicStr,
592 Rule::strikethrough_word => Self::Strikethrough,
593 Rule::strikethrough => Self::StrikethroughStr,
594 Rule::code_word => Self::Code,
595 Rule::code => Self::CodeStr,
596 Rule::programming_language => Self::PLanguage,
597 Rule::link_word | Rule::link_line | Rule::link | Rule::wiki_link_word => Self::Link,
598 Rule::wiki_link_alone => Self::WikiLink,
599 Rule::inline_link | Rule::inline_link_wrapper => Self::InlineLink,
600 Rule::o_list_counter | Rule::digit => Self::Digit,
601 Rule::task_open => Self::TaskOpen,
602 Rule::task_complete => Self::TaskClosed,
603 Rule::code_line => Self::CodeBlockStr,
604 Rule::indented_code_line | Rule::indented_code_newline => {
605 Self::CodeBlockStrSpaceIndented
606 }
607 Rule::sentence | Rule::t_sentence | Rule::footnote_sentence => Self::Sentence,
608 Rule::table_cell => Self::TableCell,
609 Rule::table_separator => Self::TableSeparator,
610 Rule::u_list => Self::UnorderedList,
611 Rule::o_list => Self::OrderedList,
612 Rule::h1 | Rule::h2 | Rule::h3 | Rule::h4 | Rule::h5 | Rule::h6 | Rule::heading => {
613 Self::Heading
614 }
615 Rule::list_container => Self::ListContainer,
616 Rule::paragraph => Self::Paragraph,
617 Rule::code_block | Rule::indented_code_block => Self::CodeBlock,
618 Rule::table => Self::Table,
619 Rule::quote => Self::Quote,
620 Rule::task => Self::Task,
621 Rule::block_sep => Self::BlockSeparator,
622 Rule::horizontal_sep => Self::HorizontalSeparator,
623 Rule::link_data | Rule::wiki_link_data => Self::LinkData,
624 Rule::warning => Self::Warning,
625 Rule::note => Self::Note,
626 Rule::tip => Self::Tip,
627 Rule::important => Self::Imortant,
628 Rule::caution => Self::Caution,
629 Rule::p_char
630 | Rule::t_char
631 | Rule::link_char
632 | Rule::wiki_link_char
633 | Rule::normal
634 | Rule::t_normal
635 | Rule::latex
636 | Rule::comment
637 | Rule::txt
638 | Rule::task_prefix
639 | Rule::quote_prefix
640 | Rule::code_block_prefix
641 | Rule::table_prefix
642 | Rule::list_prefix
643 | Rule::forbidden_sentence_prefix => Self::Paragraph,
644 Rule::image => Self::Image,
645 Rule::alt_word | Rule::alt_text => Self::AltText,
646 Rule::footnote_ref => Self::FootnoteRef,
647 Rule::footnote => Self::Footnote,
648 Rule::heading_prefix
649 | Rule::alt_char
650 | Rule::b_char
651 | Rule::c_char
652 | Rule::c_line_char
653 | Rule::comment_char
654 | Rule::i_char
655 | Rule::latex_char
656 | Rule::quote_marking
657 | Rule::inline_link_char
658 | Rule::s_char
659 | Rule::WHITESPACE_S
660 | Rule::wiki_link
661 | Rule::footnote_ref_container => todo!(),
662 }
663 }
664}