Skip to main content

md_tui/
parser.rs

1use std::sync::atomic::{AtomicU32, Ordering};
2
3use image::ImageReader;
4use itertools::Itertools;
5use pest::{
6    Parser,
7    iterators::{Pair, Pairs},
8};
9use pest_derive::Parser;
10use ratatui::style::Color;
11
12use crate::nodes::{
13    image::ImageComponent,
14    root::{Component, ComponentRoot},
15    textcomponent::{TextComponent, TextNode},
16    word::{MetaData, Word, WordType},
17};
18
19/// Process-wide monotonic counter for assigning unique IDs to `<details>`
20/// blocks. Each parsed details summary gets a fresh ID so it can be addressed
21/// by the runtime fold-toggle and selector independently of its position in
22/// the document.
23static DETAILS_ID_COUNTER: AtomicU32 = AtomicU32::new(0);
24
25fn next_details_id() -> u32 {
26    DETAILS_ID_COUNTER.fetch_add(1, Ordering::Relaxed)
27}
28
29/// Prepend `id` to the owning-details chain of every text component in
30/// `components`. Used after parsing a `<details>` body so that nested
31/// children (which may already carry inner IDs) correctly record the
32/// outer-to-inner containment order.
33fn tag_owning_details(components: &mut [Component], id: u32) {
34    for c in components.iter_mut() {
35        if let Component::TextComponent(tc) = c {
36            tc.prepend_owning_details_id(id);
37        }
38    }
39}
40
41#[derive(Parser)]
42#[grammar = "md.pest"]
43pub struct MdParser;
44
45pub fn parse_markdown(name: Option<&str>, content: &str, width: u16) -> ComponentRoot {
46    let root: Pairs<'_, Rule> = if let Ok(file) = MdParser::parse(Rule::txt, content) {
47        file
48    } else {
49        return ComponentRoot::new(name.map(str::to_string), Vec::new());
50    };
51
52    let root_pair = root.into_iter().next().unwrap();
53
54    let children = parse_text(root_pair)
55        .children_owned()
56        .into_iter()
57        .dedup_by(|x, y| {
58            x.kind() == MdParseEnum::BlockSeparator && y.kind == MdParseEnum::BlockSeparator
59        })
60        .collect();
61
62    let parse_root = ParseRoot::new(name.map(str::to_string), children);
63
64    let mut root = node_to_component(parse_root).add_missing_components();
65
66    root.transform(width);
67    root.recompute_visibility();
68    root
69}
70
71fn parse_text(pair: Pair<'_, Rule>) -> ParseNode {
72    let content = if pair.as_rule() == Rule::code_line {
73        pair.as_str().replace('\t', "    ").replace('\r', "")
74    } else {
75        pair.as_str().replace('\n', " ")
76    };
77    let mut component = ParseNode::new(pair.as_rule().into(), content);
78    let children = parse_node_children(pair.into_inner());
79    component.add_children(children);
80    component
81}
82
83fn parse_node_children(pair: Pairs<'_, Rule>) -> Vec<ParseNode> {
84    let mut children = Vec::new();
85    for inner_pair in pair {
86        children.push(parse_text(inner_pair));
87    }
88    children
89}
90
91fn node_to_component(root: ParseRoot) -> ComponentRoot {
92    let mut children = Vec::new();
93    let name = root.file_name().clone();
94    for component in root.children_owned() {
95        children.extend(parse_components(component));
96    }
97
98    ComponentRoot::new(name, children)
99}
100
101fn parse_components(parse_node: ParseNode) -> Vec<Component> {
102    if parse_node.kind() == MdParseEnum::Details {
103        return parse_details(parse_node);
104    }
105    vec![parse_component(parse_node)]
106}
107
108fn parse_details(parse_node: ParseNode) -> Vec<Component> {
109    let mut header_text = String::from("Details");
110    let mut body_components: Vec<Component> = Vec::new();
111    let mut open_attr_present = false;
112
113    for child in parse_node.children_owned() {
114        match child.kind() {
115            MdParseEnum::DetailsOpenAttr => {
116                open_attr_present = true;
117            }
118            MdParseEnum::DetailsSummary => {
119                let text: String = get_leaf_nodes(child)
120                    .into_iter()
121                    .map(|n| n.content().to_string())
122                    .collect::<Vec<_>>()
123                    .join("");
124                let trimmed = text.trim().to_string();
125                if !trimmed.is_empty() {
126                    header_text = trimmed;
127                }
128            }
129            MdParseEnum::DetailsBody => {
130                for body_child in child.children_owned() {
131                    body_components.extend(parse_components(body_child));
132                }
133            }
134            _ => {
135                body_components.extend(parse_components(child));
136            }
137        }
138    }
139
140    let id = next_details_id();
141    tag_owning_details(&mut body_components, id);
142
143    let body_len = body_components.len();
144    let folded = !open_attr_present;
145
146    let mut out = Vec::with_capacity(1 + body_len);
147    out.push(Component::TextComponent(TextComponent::new(
148        TextNode::DetailsSummary {
149            id,
150            folded,
151            body_len,
152        },
153        vec![Word::new(header_text, WordType::Normal)],
154    )));
155    out.extend(body_components);
156    out
157}
158
159fn is_url(url: &str) -> bool {
160    url.starts_with("http://") || url.starts_with("https://")
161}
162
163fn parse_component(parse_node: ParseNode) -> Component {
164    match parse_node.kind() {
165        MdParseEnum::Image => {
166            let leaf_nodes = get_leaf_nodes(parse_node);
167            let mut alt_text = String::new();
168            let mut image = None;
169            for node in leaf_nodes {
170                if node.kind() == MdParseEnum::AltText {
171                    node.content().clone_into(&mut alt_text);
172                } else if is_url(node.content()) {
173                    #[cfg(feature = "network")]
174                    {
175                        let mut buf = Vec::new();
176                        image = ureq::get(node.content()).call().ok().and_then(|b| {
177                            let noe = b.into_body().read_to_vec();
178                            noe.ok().and_then(|b| {
179                                buf = b;
180                                image::load_from_memory(&buf).ok()
181                            })
182                        });
183                    }
184                    #[cfg(not(feature = "network"))]
185                    {
186                        image = None;
187                    }
188                } else {
189                    image = ImageReader::open(node.content())
190                        .ok()
191                        .and_then(|r| r.decode().ok());
192                }
193            }
194
195            if let Some(img) = image.as_ref() {
196                let height = img.height();
197
198                let comp = ImageComponent::new(img.to_owned(), height, alt_text.clone());
199
200                if let Some(comp) = comp {
201                    Component::Image(comp)
202                } else {
203                    let word = [Word::new(format!("[{alt_text}]"), WordType::Normal)];
204
205                    let comp = TextComponent::new(TextNode::Paragraph, word.into());
206                    Component::TextComponent(comp)
207                }
208            } else {
209                let word = [
210                    Word::new("Image".to_string(), WordType::Normal),
211                    Word::new(" ".to_owned(), WordType::Normal),
212                    Word::new("not".to_owned(), WordType::Normal),
213                    Word::new(" ".to_owned(), WordType::Normal),
214                    Word::new("found".to_owned(), WordType::Normal),
215                    Word::new("/".to_owned(), WordType::Normal),
216                    Word::new("fetched".to_owned(), WordType::Normal),
217                    Word::new(" ".to_owned(), WordType::Normal),
218                    Word::new(format!("[{alt_text}]"), WordType::Normal),
219                ];
220
221                let comp = TextComponent::new(TextNode::Paragraph, word.into());
222                Component::TextComponent(comp)
223            }
224        }
225
226        MdParseEnum::Task => {
227            let leaf_nodes = get_leaf_nodes(parse_node);
228            let mut words = Vec::new();
229            for node in leaf_nodes {
230                let word_type = WordType::from(node.kind());
231
232                let mut content: String = node
233                    .content()
234                    .chars()
235                    .dedup_by(|x, y| *x == ' ' && *y == ' ')
236                    .collect();
237
238                if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
239                    let comp = Word::new(content.clone(), WordType::LinkData);
240                    words.push(comp);
241                }
242
243                if content.starts_with(' ') {
244                    content.remove(0);
245                    let comp = Word::new(" ".to_owned(), word_type);
246                    words.push(comp);
247                }
248                words.push(Word::new(content, word_type));
249            }
250            Component::TextComponent(TextComponent::new(TextNode::Task, words))
251        }
252
253        MdParseEnum::Quote => {
254            let leaf_nodes = get_leaf_nodes(parse_node);
255            let mut words = Vec::new();
256            for node in leaf_nodes {
257                let word_type = WordType::from(node.kind());
258                let mut content = node.content().to_owned();
259
260                if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
261                    let comp = Word::new(content.clone(), WordType::LinkData);
262                    words.push(comp);
263                }
264                if content.starts_with(' ') {
265                    content.remove(0);
266                    let comp = Word::new(" ".to_owned(), word_type);
267                    words.push(comp);
268                }
269                words.push(Word::new(content, word_type));
270            }
271            if let Some(w) = words.first_mut() {
272                w.set_content(w.content().trim_start().to_owned());
273            }
274            Component::TextComponent(TextComponent::new(TextNode::Quote, words))
275        }
276
277        MdParseEnum::Heading => {
278            let indent = parse_node
279                .content()
280                .chars()
281                .take_while(|c| *c == '#')
282                .count();
283            let leaf_nodes = get_leaf_nodes(parse_node);
284            let mut words = Vec::new();
285
286            words.push(Word::new(
287                String::new(),
288                WordType::MetaInfo(MetaData::HeadingLevel(indent as u8)),
289            ));
290
291            if indent > 1 {
292                words.push(Word::new(
293                    format!("{} ", "#".repeat(indent)),
294                    WordType::Normal,
295                ));
296            }
297
298            for node in leaf_nodes {
299                let word_type = WordType::from(node.kind());
300                let mut content = node
301                    .content()
302                    .to_owned()
303                    .chars()
304                    .dedup_by(|x, y| *x == ' ' && *y == ' ')
305                    .collect::<String>();
306
307                if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
308                    let comp = Word::new(content.clone(), WordType::LinkData);
309                    words.push(comp);
310                }
311
312                if content.starts_with(' ') {
313                    content.remove(0);
314                    let comp = Word::new(" ".to_owned(), word_type);
315                    words.push(comp);
316                }
317                words.push(Word::new(content, word_type));
318            }
319
320            if let Some(w) = words
321                .iter_mut()
322                .filter(|f| f.kind() == WordType::Normal)
323                .nth(1)
324                && indent > 1
325            {
326                w.set_content(w.content().trim_start().to_owned());
327            }
328
329            Component::TextComponent(TextComponent::new(TextNode::Heading, words))
330        }
331
332        MdParseEnum::Paragraph => {
333            let leaf_nodes = get_leaf_nodes(parse_node);
334            let mut words = Vec::new();
335            for node in leaf_nodes {
336                let word_type = WordType::from(node.kind());
337                let mut content = node.content().to_owned();
338
339                if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
340                    let comp = Word::new(content.clone(), WordType::LinkData);
341                    words.push(comp);
342                }
343
344                if content.starts_with(' ') {
345                    content.remove(0);
346                    let comp = Word::new(" ".to_owned(), word_type);
347                    words.push(comp);
348                }
349                words.push(Word::new(content, word_type));
350            }
351            if let Some(w) = words.first_mut() {
352                w.set_content(w.content().trim_start().to_owned());
353            }
354            Component::TextComponent(TextComponent::new(TextNode::Paragraph, words))
355        }
356
357        MdParseEnum::CodeBlock => {
358            let leaf_nodes = get_leaf_nodes(parse_node);
359            let mut words = Vec::new();
360
361            let mut space_indented = false;
362
363            for node in leaf_nodes {
364                if node.kind() == MdParseEnum::CodeBlockStrSpaceIndented {
365                    space_indented = true;
366                }
367                let word_type = WordType::from(node.kind());
368                let content = node.content().to_owned();
369                words.push(vec![Word::new(content, word_type)]);
370            }
371
372            if space_indented {
373                words.push(vec![Word::new(
374                    " ".to_owned(),
375                    WordType::CodeBlock(Color::Reset),
376                )]);
377            }
378
379            Component::TextComponent(TextComponent::new_formatted(TextNode::CodeBlock, words))
380        }
381
382        MdParseEnum::ListContainer => {
383            let mut words = Vec::new();
384            for child in parse_node.children_owned() {
385                let kind = child.kind();
386                let leaf_nodes = get_leaf_nodes(child);
387                let mut inner_words = Vec::new();
388                for node in leaf_nodes {
389                    let word_type = WordType::from(node.kind());
390
391                    let mut content = match node.kind() {
392                        MdParseEnum::Indent => node.content().to_owned(),
393                        _ => node
394                            .content()
395                            .chars()
396                            .dedup_by(|x, y| *x == ' ' && *y == ' ')
397                            .collect(),
398                    };
399
400                    if matches!(node.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
401                        let comp = Word::new(content.clone(), WordType::LinkData);
402                        inner_words.push(comp);
403                    }
404                    if content.starts_with(' ') && node.kind() != MdParseEnum::Indent {
405                        content.remove(0);
406                        let comp = Word::new(" ".to_owned(), word_type);
407                        inner_words.push(comp);
408                    }
409
410                    inner_words.push(Word::new(content, word_type));
411                }
412                if kind == MdParseEnum::UnorderedList {
413                    inner_words.push(Word::new(
414                        "X".to_owned(),
415                        WordType::MetaInfo(MetaData::UList),
416                    ));
417                    let list_symbol = Word::new("• ".to_owned(), WordType::ListMarker);
418                    inner_words.insert(1, list_symbol);
419                } else if kind == MdParseEnum::OrderedList {
420                    inner_words.push(Word::new(
421                        "X".to_owned(),
422                        WordType::MetaInfo(MetaData::OList),
423                    ));
424                }
425                words.push(inner_words);
426            }
427            Component::TextComponent(TextComponent::new_formatted(TextNode::List, words))
428        }
429
430        MdParseEnum::Table => {
431            let mut words = Vec::new();
432            let mut meta_info = Vec::new();
433            for cell in parse_node.children_owned() {
434                if cell.kind() == MdParseEnum::TableSeparator {
435                    meta_info.push(Word::new(
436                        cell.content().to_owned(),
437                        WordType::MetaInfo(MetaData::ColumnsCount),
438                    ));
439                    continue;
440                }
441                let mut inner_words = Vec::new();
442
443                if cell.children().is_empty() {
444                    words.push(inner_words);
445                    continue;
446                }
447
448                for word in get_leaf_nodes(cell) {
449                    let word_type = WordType::from(word.kind());
450                    let mut content = word.content().to_owned();
451
452                    if matches!(word.kind(), MdParseEnum::WikiLink | MdParseEnum::InlineLink) {
453                        let comp = Word::new(content.clone(), WordType::LinkData);
454                        inner_words.push(comp);
455                    }
456
457                    if content.starts_with(' ') {
458                        content.remove(0);
459                        let comp = Word::new(" ".to_owned(), word_type);
460                        inner_words.push(comp);
461                    }
462
463                    inner_words.push(Word::new(content, word_type));
464                }
465                words.push(inner_words);
466            }
467            Component::TextComponent(TextComponent::new_formatted_with_meta(
468                TextNode::Table(vec![], vec![]),
469                words,
470                meta_info,
471            ))
472        }
473
474        MdParseEnum::BlockSeparator => {
475            Component::TextComponent(TextComponent::new(TextNode::LineBreak, Vec::new()))
476        }
477        MdParseEnum::HorizontalSeparator => Component::TextComponent(TextComponent::new(
478            TextNode::HorizontalSeparator,
479            Vec::new(),
480        )),
481        MdParseEnum::Footnote => {
482            let mut words = Vec::new();
483            let foot_ref = parse_node.children().first().unwrap().to_owned();
484            words.push(Word::new(foot_ref.content, WordType::FootnoteData));
485            let _rest = parse_node
486                .children_owned()
487                .into_iter()
488                .skip(1)
489                .map(|e| e.content)
490                .collect::<String>();
491            words.push(Word::new(_rest, WordType::Footnote));
492            Component::TextComponent(TextComponent::new(TextNode::Footnote, words))
493        }
494        _ => todo!("Not implemented for {:?}", parse_node.kind()),
495    }
496}
497
498fn get_leaf_nodes(node: ParseNode) -> Vec<ParseNode> {
499    let mut leaf_nodes = Vec::new();
500
501    // Insert separator information between links
502    if node.kind() == MdParseEnum::Link {
503        let comp = if node.content().starts_with(' ') {
504            ParseNode::new(MdParseEnum::Word, " ".to_owned())
505        } else {
506            ParseNode::new(MdParseEnum::Word, String::new())
507        };
508        leaf_nodes.push(comp);
509    }
510
511    if matches!(
512        node.kind(),
513        MdParseEnum::CodeStr
514            | MdParseEnum::ItalicStr
515            | MdParseEnum::BoldStr
516            | MdParseEnum::BoldItalicStr
517            | MdParseEnum::StrikethroughStr
518    ) && node.content().starts_with(' ')
519    {
520        let comp = ParseNode::new(MdParseEnum::Word, " ".to_owned());
521        leaf_nodes.push(comp);
522    }
523
524    if node.children().is_empty() {
525        // Formatting containers (italic/bold/code/strikethrough) with no named
526        // children arise when the grammar matches only silent content — e.g.
527        // bare newlines between asterisks in CRLF documents.  Nothing to
528        // render; skip so WordType::from is never called on a container type.
529        if !matches!(
530            node.kind(),
531            MdParseEnum::ItalicStr
532                | MdParseEnum::BoldStr
533                | MdParseEnum::BoldItalicStr
534                | MdParseEnum::StrikethroughStr
535                | MdParseEnum::CodeStr
536        ) {
537            leaf_nodes.push(node);
538        }
539    } else {
540        for child in node.children_owned() {
541            leaf_nodes.append(&mut get_leaf_nodes(child));
542        }
543    }
544    leaf_nodes
545}
546
547pub fn print_from_root(root: &ComponentRoot) {
548    for child in root.components() {
549        print_component(child, 0);
550    }
551}
552
553fn print_component(component: &TextComponent, _depth: usize) {
554    println!(
555        "Component: {:?}, height: {}, y_offset: {}",
556        component.kind(),
557        component.height(),
558        component.y_offset()
559    );
560    component.meta_info().iter().for_each(|w| {
561        println!("Meta: {}, kind: {:?}", w.content(), w.kind());
562    });
563    component.content().iter().for_each(|w| {
564        w.iter().for_each(|w| {
565            println!("Content:{}, kind: {:?}", w.content(), w.kind());
566        });
567    });
568}
569
570#[derive(Debug, Clone)]
571pub struct ParseRoot {
572    file_name: Option<String>,
573    children: Vec<ParseNode>,
574}
575
576impl ParseRoot {
577    #[must_use]
578    pub fn new(file_name: Option<String>, children: Vec<ParseNode>) -> Self {
579        Self {
580            file_name,
581            children,
582        }
583    }
584
585    #[must_use]
586    pub fn children(&self) -> &Vec<ParseNode> {
587        &self.children
588    }
589
590    #[must_use]
591    pub fn children_owned(self) -> Vec<ParseNode> {
592        self.children
593    }
594
595    #[must_use]
596    pub fn file_name(&self) -> Option<String> {
597        self.file_name.clone()
598    }
599}
600
601#[derive(Debug, Clone, PartialEq, Eq)]
602pub struct ParseNode {
603    kind: MdParseEnum,
604    content: String,
605    children: Vec<ParseNode>,
606}
607
608impl ParseNode {
609    #[must_use]
610    pub fn new(kind: MdParseEnum, content: String) -> Self {
611        Self {
612            kind,
613            content,
614            children: Vec::new(),
615        }
616    }
617
618    #[must_use]
619    pub fn kind(&self) -> MdParseEnum {
620        self.kind
621    }
622
623    #[must_use]
624    pub fn content(&self) -> &str {
625        &self.content
626    }
627
628    pub fn add_children(&mut self, children: Vec<ParseNode>) {
629        self.children.extend(children);
630    }
631
632    #[must_use]
633    pub fn children(&self) -> &Vec<ParseNode> {
634        &self.children
635    }
636
637    #[must_use]
638    pub fn children_owned(self) -> Vec<ParseNode> {
639        self.children
640    }
641}
642
643#[derive(Debug, Clone, Copy, PartialEq, Eq)]
644pub enum MdParseEnum {
645    AltText,
646    BlockSeparator,
647    Bold,
648    BoldItalic,
649    BoldItalicStr,
650    BoldStr,
651    Caution,
652    Code,
653    CodeBlock,
654    CodeBlockStr,
655    CodeBlockStrSpaceIndented,
656    CodeStr,
657    Details,
658    DetailsBody,
659    DetailsOpenAttr,
660    DetailsSummary,
661    Digit,
662    FootnoteRef,
663    Footnote,
664    Heading,
665    HorizontalSeparator,
666    Image,
667    Imortant,
668    Indent,
669    InlineLink,
670    Italic,
671    ItalicStr,
672    Link,
673    LinkData,
674    ListContainer,
675    Note,
676    OrderedList,
677    PLanguage,
678    Paragraph,
679    Quote,
680    Sentence,
681    Strikethrough,
682    StrikethroughStr,
683    Table,
684    TableCell,
685    TableSeparator,
686    Task,
687    TaskClosed,
688    TaskOpen,
689    Tip,
690    UnorderedList,
691    Warning,
692    WikiLink,
693    Word,
694}
695
696impl From<Rule> for MdParseEnum {
697    fn from(value: Rule) -> Self {
698        match value {
699            Rule::word | Rule::h_word | Rule::latex_word | Rule::t_word => Self::Word,
700            Rule::indent => Self::Indent,
701            Rule::italic_word_var_1 | Rule::italic_word_var_2 => Self::Italic,
702            Rule::italic_var_1 | Rule::italic_var_2 => Self::ItalicStr,
703            Rule::bold_word => Self::Bold,
704            Rule::bold => Self::BoldStr,
705            Rule::bold_italic_word => Self::BoldItalic,
706            Rule::bold_italic => Self::BoldItalicStr,
707            Rule::strikethrough_word => Self::Strikethrough,
708            Rule::strikethrough => Self::StrikethroughStr,
709            Rule::code_word => Self::Code,
710            Rule::code => Self::CodeStr,
711            Rule::programming_language => Self::PLanguage,
712            Rule::link_word | Rule::link_line | Rule::link | Rule::wiki_link_word => Self::Link,
713            Rule::wiki_link_alone => Self::WikiLink,
714            Rule::inline_link | Rule::inline_link_wrapper => Self::InlineLink,
715            Rule::o_list_counter | Rule::digit => Self::Digit,
716            Rule::task_open => Self::TaskOpen,
717            Rule::task_complete => Self::TaskClosed,
718            Rule::code_line => Self::CodeBlockStr,
719            Rule::indented_code_line | Rule::indented_code_newline => {
720                Self::CodeBlockStrSpaceIndented
721            }
722            Rule::sentence | Rule::t_sentence | Rule::footnote_sentence => Self::Sentence,
723            Rule::table_cell => Self::TableCell,
724            Rule::table_separator => Self::TableSeparator,
725            Rule::u_list => Self::UnorderedList,
726            Rule::o_list => Self::OrderedList,
727            Rule::h1 | Rule::h2 | Rule::h3 | Rule::h4 | Rule::h5 | Rule::h6 | Rule::heading => {
728                Self::Heading
729            }
730            Rule::list_container => Self::ListContainer,
731            Rule::paragraph => Self::Paragraph,
732            Rule::code_block | Rule::indented_code_block => Self::CodeBlock,
733            Rule::table => Self::Table,
734            Rule::quote => Self::Quote,
735            Rule::task => Self::Task,
736            Rule::block_sep => Self::BlockSeparator,
737            Rule::horizontal_sep => Self::HorizontalSeparator,
738            Rule::link_data | Rule::wiki_link_data => Self::LinkData,
739            Rule::details => Self::Details,
740            Rule::details_body => Self::DetailsBody,
741            Rule::details_open_attr => Self::DetailsOpenAttr,
742            Rule::summary | Rule::summary_text => Self::DetailsSummary,
743            Rule::warning => Self::Warning,
744            Rule::note => Self::Note,
745            Rule::tip => Self::Tip,
746            Rule::important => Self::Imortant,
747            Rule::caution => Self::Caution,
748            Rule::p_char
749            | Rule::t_char
750            | Rule::link_char
751            | Rule::wiki_link_char
752            | Rule::normal
753            | Rule::t_normal
754            | Rule::latex
755            | Rule::comment
756            | Rule::txt
757            | Rule::task_prefix
758            | Rule::quote_prefix
759            | Rule::code_block_prefix
760            | Rule::table_prefix
761            | Rule::list_prefix
762            | Rule::forbidden_sentence_prefix => Self::Paragraph,
763            Rule::image => Self::Image,
764            Rule::alt_word | Rule::alt_text => Self::AltText,
765            Rule::footnote_ref => Self::FootnoteRef,
766            Rule::footnote => Self::Footnote,
767            Rule::heading_prefix
768            | Rule::alt_char
769            | Rule::b_char
770            | Rule::c_char
771            | Rule::c_line_char
772            | Rule::comment_char
773            | Rule::i_char_var_1
774            | Rule::i_char_var_2
775            | Rule::latex_char
776            | Rule::quote_marking
777            | Rule::inline_link_char
778            | Rule::s_char
779            | Rule::WHITESPACE_S
780            | Rule::wiki_link
781            | Rule::footnote_ref_container
782            | Rule::details_open_tag
783            | Rule::details_close_tag
784            | Rule::summary_open_tag
785            | Rule::summary_close_tag => todo!(),
786        }
787    }
788}
789
790#[cfg(test)]
791mod tests {
792    use super::*;
793    use crate::nodes::textcomponent::TextNode;
794
795    fn component_kinds(md: &str) -> Vec<TextNode> {
796        parse_markdown(None, md, 80)
797            .components()
798            .iter()
799            .map(|c| c.kind())
800            .collect()
801    }
802
803    #[test]
804    fn italic_with_trailing_space_followed_by_italic_crlf() {
805        // italic_var_2 can match " *\r\n\r\n*" (space + asterisk + blank line +
806        // asterisk) with only silent NEWLINE iterations in the body, producing an
807        // ItalicStr node with no named children.  That must not panic.
808        let md = "*Section A*\r\n\r\n*Item with trailing space *\r\n\r\n*Section B*\r\n";
809        let kinds = component_kinds(md);
810        assert!(!kinds.is_empty());
811    }
812
813    fn has_details_summary(kinds: &[TextNode]) -> bool {
814        kinds
815            .iter()
816            .any(|k| matches!(k, TextNode::DetailsSummary { .. }))
817    }
818
819    #[test]
820    fn parses_details_with_summary() {
821        let md = "<details>\n<summary>Title</summary>\n\nBody paragraph.\n\n</details>\n";
822        let kinds = component_kinds(md);
823        assert!(
824            has_details_summary(&kinds),
825            "expected DetailsSummary header, got {kinds:?}"
826        );
827        assert!(
828            kinds.iter().any(|k| matches!(k, TextNode::Paragraph)),
829            "expected body paragraph, got {kinds:?}"
830        );
831    }
832
833    #[test]
834    fn parses_details_open_attribute_starts_unfolded() {
835        // `<details open>` honors HTML semantics: initial state expanded.
836        let md = "<details open>\n<summary>S</summary>\n\nbody\n\n</details>\n";
837        let kinds = component_kinds(md);
838        let folded = kinds.iter().find_map(|k| match k {
839            TextNode::DetailsSummary { folded, .. } => Some(*folded),
840            _ => None,
841        });
842        assert_eq!(
843            folded,
844            Some(false),
845            "<details open> should start unfolded, got {kinds:?}"
846        );
847    }
848
849    #[test]
850    fn parses_details_without_open_starts_folded() {
851        // Plain `<details>` (no `open` attribute) starts collapsed.
852        let md = "<details>\n<summary>S</summary>\n\nbody\n\n</details>\n";
853        let kinds = component_kinds(md);
854        let folded = kinds.iter().find_map(|k| match k {
855            TextNode::DetailsSummary { folded, .. } => Some(*folded),
856            _ => None,
857        });
858        assert_eq!(
859            folded,
860            Some(true),
861            "<details> without `open` should start folded, got {kinds:?}"
862        );
863    }
864
865    #[test]
866    fn parses_details_without_summary() {
867        let md = "<details>\n\nplain body\n\n</details>\n";
868        let kinds = component_kinds(md);
869        assert!(has_details_summary(&kinds));
870    }
871
872    #[test]
873    fn parses_uppercase_details() {
874        let md = "<DETAILS>\n<SUMMARY>Caps</SUMMARY>\n\nbody\n\n</DETAILS>\n";
875        let kinds = component_kinds(md);
876        assert!(
877            has_details_summary(&kinds),
878            "case-insensitive matching failed, got {kinds:?}"
879        );
880    }
881
882    #[test]
883    fn malformed_details_does_not_panic() {
884        let md = "<details>\n<summary>S</summary>\n\nbody never closes\n";
885        let _ = parse_markdown(None, md, 80);
886    }
887
888    #[test]
889    fn nested_details_produces_two_summary_headers() {
890        let md = "<details>\n<summary>Outer</summary>\n\n<details>\n<summary>Inner</summary>\n\ninner body\n\n</details>\n\n</details>\n";
891        let kinds = component_kinds(md);
892        let summary_count = kinds
893            .iter()
894            .filter(|k| matches!(k, TextNode::DetailsSummary { .. }))
895            .count();
896        assert_eq!(summary_count, 2, "expected 2 DetailsSummary, got {kinds:?}");
897    }
898
899    #[test]
900    fn html_close_tag_not_autolink() {
901        let md = "</details>";
902        let kinds = component_kinds(md);
903        assert!(
904            kinds
905                .iter()
906                .all(|k| !matches!(k, TextNode::DetailsSummary { .. })),
907            "stray close tag shouldn't produce DetailsSummary"
908        );
909    }
910
911    #[test]
912    fn issue_169_example_parses() {
913        // The exact example from issue #169 — two <details> blocks with tables.
914        let md = "# Dependencies\n\n\
915            <details>\n<summary>Explicit dependencies</summary>\n\n\
916            |Dependency|Before|After|\n|-|-|-|\n|bpy|0.10.1|2.10.1|\n\n\
917            </details>\n\n\
918            <details open>\n<summary>Implicit dependencies</summary>\n\n\
919            |Dependency|Before|After|\n|-|-|-|\n|python|0.10.0|0.10.1|\n\n\
920            </details>\n";
921        let kinds = component_kinds(md);
922        let summary_count = kinds
923            .iter()
924            .filter(|k| matches!(k, TextNode::DetailsSummary { .. }))
925            .count();
926        assert_eq!(
927            summary_count, 2,
928            "expected 2 summary headers, got {kinds:?}"
929        );
930        let table_count = kinds
931            .iter()
932            .filter(|k| matches!(k, TextNode::Table(_, _)))
933            .count();
934        assert_eq!(
935            table_count, 2,
936            "expected 2 tables inside details, got {kinds:?}"
937        );
938    }
939
940    #[test]
941    fn plain_paragraph_unaffected() {
942        let md = "Just a paragraph.\n";
943        let kinds = component_kinds(md);
944        assert!(!has_details_summary(&kinds));
945    }
946
947    #[test]
948    fn nested_details_tags_inner_components_with_both_ids() {
949        let md = "<details>\n<summary>Outer</summary>\n\n<details>\n<summary>Inner</summary>\n\ninner body\n\n</details>\n\n</details>\n";
950        let root = parse_markdown(None, md, 80);
951        let comps = root.components();
952        // Find the inner summary's owning chain — it should have exactly
953        // one id (the outer's). The inner body should have two (outer,
954        // inner — outermost-first ordering).
955        let summaries: Vec<&[u32]> = comps
956            .iter()
957            .filter(|c| matches!(c.kind(), TextNode::DetailsSummary { .. }))
958            .map(|c| c.owning_details_ids())
959            .collect();
960        assert_eq!(summaries.len(), 2, "expected 2 summaries");
961        // First (outer) summary has no owning details. Second (inner)
962        // summary has one: the outer.
963        assert_eq!(summaries[0].len(), 0, "outer summary has no owners");
964        assert_eq!(
965            summaries[1].len(),
966            1,
967            "inner summary belongs to one outer details body"
968        );
969
970        // The inner body paragraph belongs to both outer + inner.
971        let inner_para = comps
972            .iter()
973            .find(|c| matches!(c.kind(), TextNode::Paragraph) && c.owning_details_ids().len() == 2)
974            .expect("inner body paragraph with two owning details ids");
975        assert_eq!(
976            inner_para.owning_details_ids().len(),
977            2,
978            "inner body paragraph belongs to outer and inner"
979        );
980    }
981
982    #[test]
983    fn default_collapsed_hides_body_components() {
984        // A plain (collapsed-by-default) <details> hides its body
985        // components, so they contribute zero height to the layout.
986        let md = "<details>\n<summary>S</summary>\n\nhidden body\n\n</details>\n";
987        let root = parse_markdown(None, md, 80);
988        let comps = root.components();
989        let body_para = comps
990            .iter()
991            .find(|c| matches!(c.kind(), TextNode::Paragraph))
992            .expect("expected body paragraph component");
993        assert!(body_para.is_hidden(), "collapsed body should be hidden");
994        assert_eq!(
995            body_para.height(),
996            0,
997            "hidden component height must be 0 so set_scroll positions correctly"
998        );
999    }
1000
1001    #[test]
1002    fn open_attribute_keeps_body_visible() {
1003        let md = "<details open>\n<summary>S</summary>\n\nvisible body\n\n</details>\n";
1004        let root = parse_markdown(None, md, 80);
1005        let comps = root.components();
1006        let body_para = comps
1007            .iter()
1008            .find(|c| matches!(c.kind(), TextNode::Paragraph))
1009            .expect("expected body paragraph component");
1010        assert!(!body_para.is_hidden(), "open body should be visible");
1011    }
1012
1013    #[test]
1014    fn toggle_fold_hides_and_reveals_body() {
1015        let md = "<details open>\n<summary>S</summary>\n\nbody text\n\n</details>\n";
1016        let mut root = parse_markdown(None, md, 80);
1017        let initial_height = root.height();
1018        // Select the only details summary, then toggle it folded.
1019        root.select_details(0).expect("select_details");
1020        root.toggle_selected_details().expect("toggle");
1021        let folded_height = root.height();
1022        assert!(
1023            folded_height < initial_height,
1024            "folding should reduce total height ({folded_height} < {initial_height})"
1025        );
1026        // Toggle again to re-expand.
1027        root.toggle_selected_details().expect("untoggle");
1028        let unfolded_height = root.height();
1029        assert_eq!(
1030            unfolded_height, initial_height,
1031            "unfolding restores original height"
1032        );
1033    }
1034
1035    #[test]
1036    fn outer_fold_hides_inner_summary() {
1037        let md = "<details open>\n<summary>Outer</summary>\n\n<details open>\n<summary>Inner</summary>\n\ninner body\n\n</details>\n\n</details>\n";
1038        let mut root = parse_markdown(None, md, 80);
1039        // Fold the outer details — the inner summary header AND its body
1040        // should both become hidden.
1041        root.select_details(0).expect("select outer");
1042        root.toggle_selected_details().expect("fold outer");
1043
1044        let mut inner_summary_hidden = false;
1045        let mut inner_body_hidden = false;
1046        for c in root.components() {
1047            if matches!(c.kind(), TextNode::DetailsSummary { .. })
1048                && c.owning_details_ids().len() == 1
1049                && c.is_hidden()
1050            {
1051                inner_summary_hidden = true;
1052            }
1053            if matches!(c.kind(), TextNode::Paragraph)
1054                && c.owning_details_ids().len() == 2
1055                && c.is_hidden()
1056            {
1057                inner_body_hidden = true;
1058            }
1059        }
1060        assert!(
1061            inner_summary_hidden,
1062            "inner summary should be hidden when outer is folded"
1063        );
1064        assert!(
1065            inner_body_hidden,
1066            "inner body should be hidden when outer is folded"
1067        );
1068
1069        // num_details reports only currently-visible summaries — the
1070        // inner one disappears from the selector cycle while outer is
1071        // folded.
1072        assert_eq!(
1073            root.num_details(),
1074            1,
1075            "only the outer summary is visible when outer is folded"
1076        );
1077    }
1078
1079    #[test]
1080    fn linebreak_inherits_shared_owning_ids() {
1081        // The block-separator-inserted LineBreaks should inherit the
1082        // owning-details chain that is shared between their neighbors,
1083        // so a LineBreak between two body components is hidden together
1084        // with them when the surrounding details folds.
1085        let md = "<details>\n<summary>S</summary>\n\nfirst body\n\nsecond body\n\n</details>\n";
1086        let root = parse_markdown(None, md, 80);
1087        let comps = root.components();
1088        let interior_linebreak = comps.iter().find(|c| {
1089            matches!(c.kind(), TextNode::LineBreak) && !c.owning_details_ids().is_empty()
1090        });
1091        assert!(
1092            interior_linebreak.is_some(),
1093            "expected a LineBreak inside the details body to inherit its owners"
1094        );
1095        let lb = interior_linebreak.unwrap();
1096        assert!(
1097            lb.is_hidden(),
1098            "LineBreak inside a folded details body should be hidden"
1099        );
1100    }
1101
1102    #[test]
1103    fn inline_code_in_heading_parses_as_single_heading() {
1104        // Backtick code spans inside headings must not be split into a separate
1105        // Paragraph component — the whole line should be one Heading node.
1106        let md = "## Title — `Code in title`\n";
1107        let kinds = component_kinds(md);
1108        let heading_count = kinds.iter().filter(|k| **k == TextNode::Heading).count();
1109        let paragraph_count = kinds.iter().filter(|k| **k == TextNode::Paragraph).count();
1110        assert_eq!(
1111            heading_count, 1,
1112            "expected exactly one Heading, got {kinds:?}"
1113        );
1114        assert_eq!(
1115            paragraph_count, 0,
1116            "inline code must not spill into a Paragraph, got {kinds:?}"
1117        );
1118    }
1119
1120    #[test]
1121    fn inline_code_in_heading_contains_code_word() {
1122        let md = "## Service — `MyService`\n";
1123        let root = parse_markdown(None, md, 80);
1124        let comps = root.components();
1125        let heading = comps
1126            .iter()
1127            .find(|c| c.kind() == TextNode::Heading)
1128            .expect("no Heading found");
1129        let has_code_word = heading
1130            .content()
1131            .iter()
1132            .flatten()
1133            .any(|w| w.kind() == WordType::Code);
1134        assert!(
1135            has_code_word,
1136            "heading content should contain a Code word, got {:?}",
1137            heading.content()
1138        );
1139    }
1140
1141    #[test]
1142    fn inline_code_on_line_after_heading_stays_separate() {
1143        // A heading must not swallow a following line that begins with an inline
1144        // code span: `code`'s leading optional NEWLINE must not leak across the
1145        // heading boundary.
1146        let md = "# Heading\n`foo` bar\n";
1147        let kinds = component_kinds(md);
1148        let heading_count = kinds.iter().filter(|k| **k == TextNode::Heading).count();
1149        let paragraph_count = kinds.iter().filter(|k| **k == TextNode::Paragraph).count();
1150        assert_eq!(
1151            heading_count, 1,
1152            "expected exactly one Heading, got {kinds:?}"
1153        );
1154        assert_eq!(
1155            paragraph_count, 1,
1156            "code line after heading must be its own Paragraph, got {kinds:?}"
1157        );
1158    }
1159    fn heading_text(md: &str) -> String {
1160        let root = parse_markdown(None, md, 80);
1161        let comps = root.components();
1162        comps
1163            .iter()
1164            .find(|c| c.kind() == TextNode::Heading)
1165            .expect("no Heading found")
1166            .content()
1167            .iter()
1168            .flatten()
1169            .filter(|w| !matches!(w.kind(), WordType::MetaInfo(_)))
1170            .map(|w| w.content())
1171            .collect()
1172    }
1173
1174    #[test]
1175    fn inline_code_in_heading_keeps_surrounding_spaces() {
1176        // A code span in a heading must keep the separating space to following
1177        // text instead of gluing onto it.
1178        assert_eq!(heading_text("## Title `Code` more\n"), "## Title Code more");
1179        // Leading code span keeps the space to following text.
1180        assert_eq!(heading_text("# `Lead` rest\n"), "Lead rest");
1181        // Multiple code spans all stay spaced.
1182        assert_eq!(heading_text("## a `b` c `d` e\n"), "## a b c d e");
1183        // Code-only heading is unaffected.
1184        assert_eq!(heading_text("# `OnlyCode`\n"), "OnlyCode");
1185        // No source space between code and text stays glued.
1186        assert_eq!(heading_text("## word `code`tight\n"), "## word codetight");
1187    }
1188
1189    #[test]
1190    fn heading_plain_text_unchanged() {
1191        assert_eq!(heading_text("# Plain heading\n"), "Plain heading");
1192        assert_eq!(
1193            heading_text("##    Many   spaces   here\n"),
1194            "## Many spaces here"
1195        );
1196    }
1197}