Skip to main content

mdbook_treesitter/
config.rs

1//! Configuration types for the mdbook-treesitter preprocessor.
2//!
3//! Loaded from `book.toml` under `[preprocessor.treesitter]`.
4
5use serde::{Deserialize, Serialize};
6use std::collections::HashMap;
7
8/// Top-level preprocessor configuration.
9///
10/// ```toml
11/// [preprocessor.treesitter]
12/// command = "cargo run --manifest-path ../Cargo.toml"  # optional, for dev
13/// ```
14#[derive(Debug, Default, Clone, Serialize, Deserialize)]
15pub struct Config {
16    /// The command mdBook uses to invoke the preprocessor.
17    /// Declared here so serde doesn't try to parse it as a language config.
18    #[serde(default)]
19    pub command: Option<String>,
20    /// Per-language configuration, keyed by language name (e.g. "rust", "toml").
21    #[serde(flatten)]
22    pub languages: HashMap<String, LanguageConfig>,
23}
24
25/// Configuration for a single language.
26///
27/// ```toml
28/// [preprocessor.treesitter.python]
29/// parser = "/path/to/parser.so"
30///
31/// [preprocessor.treesitter.python.queries]
32/// my_query = "(function_definition name: (identifier) @name) @func"
33/// ```
34#[derive(Debug, Default, Clone, Serialize, Deserialize)]
35pub struct LanguageConfig {
36    /// Path to a custom `.so` parser. Supports absolute and relative paths
37    /// (resolved relative to `book.toml`).
38    pub parser: Option<String>,
39    /// Named queries for this language.
40    #[serde(default)]
41    pub queries: HashMap<String, QueryConfig>,
42}
43
44/// A named query — either a raw tree-sitter S-expression string, or a table
45/// with an explicit format and optional post-processing.
46///
47/// ```toml
48/// # Simple tree-sitter query (string form — no post-processing)
49/// [preprocessor.treesitter.rust.queries]
50/// struct = "(struct_item name: (type_identifier) @name) @struct"
51///
52/// # Tree-sitter query with a strip regex (table form, format defaults to treesitter)
53/// [preprocessor.treesitter.rust.queries.comment_text]
54/// query = "((line_comment)+ @doc_comment ...)"
55/// strip = "^///? ?"
56///
57/// # jq query (table form with explicit format)
58/// [preprocessor.treesitter.rust.queries.doc_comment_jq]
59/// format = "jq"
60/// query = ".children[] | select(.type == \"struct_item\") | ..."
61/// ```
62#[derive(Debug, Clone, Serialize, Deserialize)]
63#[serde(untagged)]
64pub enum QueryConfig {
65    /// A plain tree-sitter S-expression query string (no strip).
66    TreeSitter(String),
67    /// A structured query: format defaults to `treesitter` when omitted.
68    ///
69    /// The optional `strip` field is a regex applied to every output line —
70    /// matches are removed, which lets you strip comment delimiters, braces,
71    /// leading whitespace, etc.
72    Structured {
73        #[serde(default)]
74        format: QueryFormat,
75        query: String,
76        /// Regex whose matches are deleted from each output line.
77        #[serde(default)]
78        strip: Option<String>,
79        /// Output template applied once per query match.
80        ///
81        /// Use `{capture_name}` placeholders to interpolate captured text.
82        /// When multiple nodes are captured under the same name (e.g. `(line_comment)+`),
83        /// they are joined with `\n` before substitution.  The `strip` regex,
84        /// if present, is applied to each capture's text before substitution.
85        ///
86        /// ```toml
87        /// [preprocessor.treesitter.rust.queries.enum_variants]
88        /// query = "((line_comment)+ @doc_comment . (enum_variant name: (identifier) @name))"
89        /// strip = "^///? ?"
90        /// template = "- {name}: {doc_comment}"
91        /// ```
92        #[serde(default)]
93        template: Option<String>,
94    },
95}
96
97/// The query language format.
98#[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize)]
99#[serde(rename_all = "lowercase")]
100pub enum QueryFormat {
101    /// Tree-sitter S-expression query (default).
102    #[default]
103    TreeSitter,
104    /// jq filter applied to the tree-sitter AST converted to JSON.
105    Jq,
106}
107
108impl QueryConfig {
109    pub fn format(&self) -> QueryFormat {
110        match self {
111            QueryConfig::TreeSitter(_) => QueryFormat::TreeSitter,
112            QueryConfig::Structured { format, .. } => format.clone(),
113        }
114    }
115
116    pub fn query_str(&self) -> &str {
117        match self {
118            QueryConfig::TreeSitter(s) => s.as_str(),
119            QueryConfig::Structured { query, .. } => query.as_str(),
120        }
121    }
122
123    /// Returns the strip regex, if any.
124    pub fn strip(&self) -> Option<&str> {
125        match self {
126            QueryConfig::TreeSitter(_) => None,
127            QueryConfig::Structured { strip, .. } => strip.as_deref(),
128        }
129    }
130
131    /// Returns the output template, if any.
132    pub fn template(&self) -> Option<&str> {
133        match self {
134            QueryConfig::TreeSitter(_) => None,
135            QueryConfig::Structured { template, .. } => template.as_deref(),
136        }
137    }
138}