weaver_lib/
document_toc.rs

1use crate::{document::Heading, slugify::slugify};
2use markdown::{ParseOptions, mdast::Node};
3
4// Helper function to recursively extract text from inline nodes
5// This is needed to get the raw text content of a heading or other inline structures
6fn extract_text_from_mdast_inline(node: &Node) -> String {
7    let mut text = String::new();
8    match &node {
9        Node::Text(text_node) => text.push_str(&text_node.value),
10        Node::Code(code_node) => text.push_str(&code_node.value),
11        // Add other inline node types you want to include text from (e.g., Strong, Emphasis, Link)
12        // These nodes typically have children, so we need to recurse
13        Node::Emphasis(_) | Node::Strong(_) | Node::Link(_) => {
14            if let Some(children) = node.children() {
15                for child in children.iter() {
16                    text.push_str(&extract_text_from_mdast_inline(child)); // Recurse
17                }
18            }
19        }
20        _ => {
21            // For other node types, if they have children, recurse into them
22            if let Some(children) = node.children() {
23                for child in children.iter() {
24                    text.push_str(&extract_text_from_mdast_inline(child));
25                }
26            }
27        }
28    }
29    text
30}
31
32fn collect_mdast_headings_to_map(node: &Node, headings_map: &mut Vec<Heading>) {
33    // Check if the current node is a Heading
34    if let Node::Heading(heading) = &node {
35        let heading_text = if let Some(children) = node.children() {
36            let mut text = String::new();
37            for child in children.iter() {
38                text.push_str(&extract_text_from_mdast_inline(child));
39            }
40            text
41        } else {
42            String::new()
43        };
44        let slug = slugify(&heading_text);
45        if !slug.is_empty() {
46            headings_map.push(Heading {
47                slug,
48                text: heading_text,
49                depth: heading.depth,
50            });
51        }
52    }
53
54    // Recursively visit children of the current node.
55    // Headings can appear as children of Root, BlockQuote, List, ListItem, etc.
56    if let Some(children) = node.children() {
57        for child in children.iter() {
58            collect_mdast_headings_to_map(child, headings_map);
59        }
60    }
61}
62
63pub fn toc_from_document(markdown: &str) -> Vec<Heading> {
64    let mut toc_map = vec![];
65    let ast = markdown::to_mdast(markdown, &ParseOptions::gfm()).unwrap();
66    collect_mdast_headings_to_map(&ast, &mut toc_map);
67    toc_map
68}
69
70#[cfg(test)]
71mod test {
72    use std::sync::Arc;
73
74    use tokio::sync::Mutex;
75
76    use crate::document::Document;
77
78    use super::*;
79
80    #[tokio::test]
81    async fn test_markdown_toc_generation() {
82        let base_path_wd = std::env::current_dir().unwrap().display().to_string();
83        let base_path = format!("{}/test_fixtures/markdown", base_path_wd);
84        let doc_arc = Arc::new(Mutex::new(Document::new_from_path(
85            base_path.clone().into(),
86            format!("{}/with_headings.md", base_path).into(),
87        )));
88
89        assert_eq!(
90            vec![
91                Heading {
92                    depth: 1,
93                    text: "heading 1".into(),
94                    slug: "heading-1".into(),
95                },
96                Heading {
97                    depth: 2,
98                    text: "heading 2".into(),
99                    slug: "heading-2".into(),
100                },
101                Heading {
102                    depth: 3,
103                    text: "heading 3".into(),
104                    slug: "heading-3".into(),
105                },
106                Heading {
107                    depth: 4,
108                    text: "heading 4".into(),
109                    slug: "heading-4".into(),
110                },
111                Heading {
112                    depth: 5,
113                    text: "heading 5".into(),
114                    slug: "heading-5".into(),
115                },
116                Heading {
117                    depth: 6,
118                    text: "heading 6".into(),
119                    slug: "heading-6".into(),
120                },
121            ],
122            toc_from_document(doc_arc.lock().await.markdown.as_str())
123        );
124    }
125}