Skip to main content

aptu_coder_core/languages/
markdown.rs

1// SPDX-FileCopyrightText: 2026 aptu-coder contributors
2// SPDX-License-Identifier: Apache-2.0
3//! Markdown language handler for tree-sitter-md.
4//!
5//! Extracts ATX headings (`# Heading`) and setext headings (underlined with `===` or `---`)
6//! as function-equivalent elements. Fenced code block contents are not extracted.
7
8/// Tree-sitter query for extracting Markdown headings as elements.
9///
10/// Both ATX headings (`# Title`) and setext headings (text underlined with `=` or `-`)
11/// are captured via the `heading_content` field. The field syntax is required because
12/// `heading_content` is a field name on the heading nodes, not a standalone node type.
13pub const ELEMENT_QUERY: &str = r"
14(atx_heading heading_content: (_) @func_name) @function
15(setext_heading heading_content: (_) @func_name) @function
16";
17
18/// Tree-sitter call query for Markdown (empty -- no call sites in Markdown).
19pub const CALL_QUERY: &str = "";
20
21#[cfg(all(test, feature = "lang-markdown"))]
22mod tests {
23    use tree_sitter::{Parser, StreamingIterator};
24
25    fn parse_and_query(src: &str, query_str: &str) -> Vec<String> {
26        let language = tree_sitter_md::LANGUAGE;
27        let mut parser = Parser::new();
28        parser
29            .set_language(&language.into())
30            .expect("failed to set language");
31        let tree = parser.parse(src, None).expect("parse failed");
32        let query = tree_sitter::Query::new(&language.into(), query_str).expect("invalid query");
33        let mut cursor = tree_sitter::QueryCursor::new();
34        let mut matches = cursor.matches(&query, tree.root_node(), src.as_bytes());
35        let func_name_idx = query
36            .capture_index_for_name("func_name")
37            .expect("no func_name capture");
38        let mut names = Vec::new();
39        while let Some(m) = matches.next() {
40            for cap in m.captures {
41                if cap.index == func_name_idx {
42                    let text = &src[cap.node.start_byte()..cap.node.end_byte()];
43                    names.push(text.trim().to_owned());
44                }
45            }
46        }
47        names
48    }
49
50    /// ATX headings are extracted with the correct heading text.
51    #[test]
52    fn test_atx_headings_extracted() {
53        let src = "# Introduction\n\n## Installation\n\n### Details\n";
54        let names = parse_and_query(src, super::ELEMENT_QUERY);
55        assert_eq!(names, vec!["Introduction", "Installation", "Details"]);
56    }
57
58    /// Setext headings are extracted as functions.
59    #[test]
60    fn test_setext_heading_extracted() {
61        let src = "Overview\n========\n\nSetup\n-----\n";
62        let names = parse_and_query(src, super::ELEMENT_QUERY);
63        assert_eq!(names, vec!["Overview", "Setup"]);
64    }
65
66    /// A file with no headings returns zero functions and no error.
67    #[test]
68    fn test_no_headings_returns_empty() {
69        let src = "Just some prose.\n\nNo headings here.\n";
70        let names = parse_and_query(src, super::ELEMENT_QUERY);
71        assert!(names.is_empty());
72    }
73
74    /// A `#` inside a fenced code block is NOT extracted as a heading.
75    #[test]
76    fn test_code_fence_not_extracted() {
77        let src = "```python\n# not a heading\nprint('hello')\n```\n";
78        let names = parse_and_query(src, super::ELEMENT_QUERY);
79        assert!(names.is_empty());
80    }
81}