weaver_lib/
document_toc.rs

1use markdown::{ParseOptions, mdast::Node};
2use slug::slugify;
3
4use crate::document::Heading;
5
6// Helper function to recursively extract text from inline nodes
7// This is needed to get the raw text content of a heading or other inline structures
8fn extract_text_from_mdast_inline(node: &Node) -> String {
9    let mut text = String::new();
10    match &node {
11        Node::Text(text_node) => text.push_str(&text_node.value),
12        Node::Code(code_node) => text.push_str(&code_node.value),
13        // Add other inline node types you want to include text from (e.g., Strong, Emphasis, Link)
14        // These nodes typically have children, so we need to recurse
15        Node::Emphasis(_) | Node::Strong(_) | Node::Link(_) => {
16            if let Some(children) = node.children() {
17                for child in children.iter() {
18                    text.push_str(&extract_text_from_mdast_inline(child)); // Recurse
19                }
20            }
21        }
22        _ => {
23            // For other node types, if they have children, recurse into them
24            if let Some(children) = node.children() {
25                for child in children.iter() {
26                    text.push_str(&extract_text_from_mdast_inline(child));
27                }
28            }
29        }
30    }
31    text
32}
33
34fn collect_mdast_headings_to_map(node: &Node, headings_map: &mut Vec<Heading>) {
35    // Check if the current node is a Heading
36    if let Node::Heading(heading) = &node {
37        let heading_text = if let Some(children) = node.children() {
38            let mut text = String::new();
39            for child in children.iter() {
40                text.push_str(&extract_text_from_mdast_inline(child));
41            }
42            text
43        } else {
44            String::new()
45        };
46        let slug = slugify(&heading_text);
47        if !slug.is_empty() {
48            headings_map.push(Heading {
49                slug,
50                text: heading_text,
51                depth: heading.depth,
52            });
53        }
54    }
55
56    // Recursively visit children of the current node.
57    // Headings can appear as children of Root, BlockQuote, List, ListItem, etc.
58    if let Some(children) = node.children() {
59        for child in children.iter() {
60            collect_mdast_headings_to_map(child, headings_map);
61        }
62    }
63}
64
65pub fn toc_from_document(markdown: &str) -> Vec<Heading> {
66    let mut toc_map = vec![];
67    let ast = markdown::to_mdast(markdown, &ParseOptions::gfm()).unwrap();
68    collect_mdast_headings_to_map(&ast, &mut toc_map);
69    toc_map
70}
71
72#[cfg(test)]
73mod test {
74    use std::sync::Arc;
75
76    use tokio::sync::Mutex;
77
78    use crate::document::Document;
79
80    use super::*;
81
82    #[tokio::test]
83    async fn test_markdown_toc_generation() {
84        let base_path_wd = std::env::current_dir().unwrap().display().to_string();
85        let base_path = format!("{}/test_fixtures/markdown", base_path_wd);
86        let doc_arc = Arc::new(Mutex::new(Document::new_from_path(
87            format!("{}/with_headings.md", base_path).into(),
88        )));
89
90        assert_eq!(
91            vec![
92                Heading {
93                    depth: 1,
94                    text: "heading 1".into(),
95                    slug: "heading-1".into(),
96                },
97                Heading {
98                    depth: 2,
99                    text: "heading 2".into(),
100                    slug: "heading-2".into(),
101                },
102                Heading {
103                    depth: 3,
104                    text: "heading 3".into(),
105                    slug: "heading-3".into(),
106                },
107                Heading {
108                    depth: 4,
109                    text: "heading 4".into(),
110                    slug: "heading-4".into(),
111                },
112                Heading {
113                    depth: 5,
114                    text: "heading 5".into(),
115                    slug: "heading-5".into(),
116                },
117                Heading {
118                    depth: 6,
119                    text: "heading 6".into(),
120                    slug: "heading-6".into(),
121                },
122            ],
123            toc_from_document(doc_arc.lock().await.markdown.as_str())
124        );
125    }
126}