Skip to main content

mdbook_treesitter/
lib.rs

1//! `mdbook-treesitter` — an mdBook preprocessor that replaces
2//! `{{ #treesitter <path>#<query>?<params> }}` directives with the code
3//! extracted from the referenced source file using tree-sitter queries.
4//!
5//! # Directive syntax
6//!
7//! ```markdown
8//! {{ #treesitter path/to/file.rs }}
9//! {{ #treesitter path/to/file.rs#query_name }}
10//! {{ #treesitter path/to/file.rs#query_name?param1=val1&param2=val2 }}
11//! ```
12//!
13//! The space around the braces is optional:
14//!
15//! ```markdown
16//! {{#treesitter path/to/file.rs#doc_comment?name=Foo}}
17//! ```
18
19pub mod config;
20pub mod language;
21pub mod query;
22
23use std::collections::HashMap;
24use std::path::Path;
25
26use anyhow::{Context, Result};
27use mdbook_preprocessor::book::{Book, BookItem};
28use mdbook_preprocessor::{Preprocessor, PreprocessorContext};
29use regex::Regex;
30
31use config::Config;
32use language::LanguageEntry;
33
34// ─── Regex ────────────────────────────────────────────────────────────────────
35
36/// Matches an optional leading `\` then `{{ #treesitter ... }}`.
37/// Group 1: the backslash (present → escaped, do not expand).
38/// Group 2: the directive inner text.
39fn directive_regex() -> Regex {
40    Regex::new(r"(\\)?\{\{[\s]*#treesitter\s+([^\}]+?)\s*\}\}").unwrap()
41}
42
43// ─── Parsed directive ─────────────────────────────────────────────────────────
44
45/// A parsed `#treesitter` directive.
46#[derive(Debug, PartialEq, Eq)]
47pub struct Directive {
48    /// Path to the source file, relative to the chapter source file.
49    pub file_path: String,
50    /// Optional query name (the part after `#`).
51    pub query_name: Option<String>,
52    /// Query parameters (the part after `?`).
53    pub params: HashMap<String, String>,
54}
55
56impl Directive {
57    /// Parse a directive from its inner text, e.g.
58    /// `../../foo.rs#doc_comment?name=Foo`.
59    pub fn parse(inner: &str) -> Result<Self> {
60        // Split on `#` first to separate the file path from query+params.
61        let (file_part, rest) = if let Some(pos) = inner.find('#') {
62            (&inner[..pos], Some(&inner[pos + 1..]))
63        } else {
64            (inner, None)
65        };
66
67        let file_path = file_part.trim().to_string();
68
69        let (query_name, params) = match rest {
70            None => (None, HashMap::new()),
71            Some(rest) => {
72                let (qname, params_str) = if let Some(pos) = rest.find('?') {
73                    (&rest[..pos], Some(&rest[pos + 1..]))
74                } else {
75                    (rest, None)
76                };
77
78                let params = match params_str {
79                    None => HashMap::new(),
80                    Some(ps) => ps
81                        .split('&')
82                        .filter(|s| !s.is_empty())
83                        .filter_map(|kv| {
84                            let mut parts = kv.splitn(2, '=');
85                            let k = parts.next()?.to_string();
86                            let v = parts.next().unwrap_or("").to_string();
87                            Some((k, v))
88                        })
89                        .collect(),
90                };
91
92                let qname = qname.trim();
93                (
94                    if qname.is_empty() {
95                        None
96                    } else {
97                        Some(qname.to_string())
98                    },
99                    params,
100                )
101            }
102        };
103
104        Ok(Directive {
105            file_path,
106            query_name,
107            params,
108        })
109    }
110}
111
112// ─── Preprocessor ─────────────────────────────────────────────────────────────
113
114/// The `mdbook-treesitter` preprocessor.
115pub struct TreesitterPreprocessor;
116
117impl Preprocessor for TreesitterPreprocessor {
118    fn name(&self) -> &str {
119        "treesitter"
120    }
121
122    fn run(
123        &self,
124        ctx: &PreprocessorContext,
125        mut book: Book,
126    ) -> mdbook_preprocessor::errors::Result<Book> {
127        let cfg = load_config(ctx)?;
128        let book_root = ctx.root.clone();
129        let src_dir = book_root.join(&ctx.config.book.src);
130
131        let registry = language::build_registry(&cfg.languages, &book_root)
132            .context("building language registry")?;
133
134        let mut errors: Vec<String> = Vec::new();
135
136        book.for_each_mut(|item| {
137            if let BookItem::Chapter(chapter) = item {
138                let chapter_dir = chapter
139                    .path
140                    .as_deref()
141                    .and_then(|p| p.parent().map(|parent| src_dir.join(parent)))
142                    .unwrap_or_else(|| src_dir.clone());
143
144                match process_chapter(&chapter.content, &chapter_dir, &registry) {
145                    Ok(new_content) => chapter.content = new_content,
146                    Err(e) => errors.push(format!(
147                        "chapter {:?}: {e:#}",
148                        chapter.path.as_deref().unwrap_or(Path::new("<unknown>"))
149                    )),
150                }
151            }
152        });
153
154        if !errors.is_empty() {
155            return Err(anyhow::anyhow!(
156                "mdbook-treesitter encountered errors:\n{}",
157                errors.join("\n")
158            ));
159        }
160
161        Ok(book)
162    }
163
164    fn supports_renderer(&self, renderer: &str) -> mdbook_preprocessor::errors::Result<bool> {
165        // This preprocessor works with any renderer.
166        Ok(renderer != "not-supported")
167    }
168}
169
170// ─── Config loading ───────────────────────────────────────────────────────────
171
172fn load_config(ctx: &PreprocessorContext) -> Result<Config> {
173    match ctx.config.get::<Config>("preprocessor.treesitter") {
174        Ok(Some(cfg)) => Ok(cfg),
175        Ok(None) => Ok(Config::default()),
176        Err(e) => Err(anyhow::anyhow!(
177            "invalid [preprocessor.treesitter] config: {e}"
178        )),
179    }
180}
181
182// ─── Chapter processing ───────────────────────────────────────────────────────
183
184/// Replace all `{{ #treesitter ... }}` directives in `content` with the
185/// extracted code.  Directives that fail to resolve are reported as errors.
186pub fn process_chapter(
187    content: &str,
188    chapter_dir: &Path,
189    registry: &HashMap<String, LanguageEntry>,
190) -> Result<String> {
191    let re = directive_regex();
192    let mut result = String::with_capacity(content.len());
193    let mut last_end = 0;
194    let mut first_error: Option<anyhow::Error> = None;
195
196    for cap in re.captures_iter(content) {
197        let full_match = cap.get(0).unwrap();
198        let escaped = cap.get(1).is_some(); // leading backslash present
199        let inner = cap.get(2).unwrap().as_str();
200
201        result.push_str(&content[last_end..full_match.start()]);
202
203        if escaped {
204            // Strip the backslash; emit the directive literally.
205            result.push_str("{{ #treesitter ");
206            result.push_str(inner);
207            result.push_str(" }}");
208        } else {
209            match resolve_directive(inner, chapter_dir, registry) {
210                Ok(replacement) => result.push_str(&replacement),
211                Err(e) => {
212                    result.push_str(&format!("<!-- mdbook-treesitter error: {e} -->"));
213                    if first_error.is_none() {
214                        first_error = Some(e);
215                    }
216                }
217            }
218        }
219
220        last_end = full_match.end();
221    }
222
223    result.push_str(&content[last_end..]);
224
225    if let Some(e) = first_error {
226        return Err(e);
227    }
228
229    Ok(result)
230}
231
232/// Resolve a single directive inner text (e.g. `../../foo.rs#doc_comment?name=Foo`)
233/// and return the markdown replacement (a fenced code block).
234fn resolve_directive(
235    inner: &str,
236    chapter_dir: &Path,
237    registry: &HashMap<String, LanguageEntry>,
238) -> Result<String> {
239    let directive =
240        Directive::parse(inner).with_context(|| format!("parsing directive `{inner}`"))?;
241
242    let file_path = chapter_dir.join(&directive.file_path);
243    let source = std::fs::read_to_string(&file_path)
244        .with_context(|| format!("reading `{}`", file_path.display()))?;
245
246    let ext = file_path.extension().and_then(|e| e.to_str()).unwrap_or("");
247
248    let lang_entry = registry
249        .get(ext)
250        .with_context(|| format!("no language registered for extension `.{ext}`"))?;
251
252    let code = match &directive.query_name {
253        None => {
254            // No query — return the whole file.
255            source.clone()
256        }
257        Some(qname) => {
258            let query_cfg = lang_entry
259                .queries
260                .get(qname)
261                .with_context(|| format!("no query `{qname}` registered for language `.{ext}`"))?;
262
263            query::run_query(&lang_entry.language, &source, query_cfg, &directive.params)
264                .with_context(|| format!("running query `{qname}` on `{}`", file_path.display()))?
265        }
266    };
267
268    Ok(code)
269}