Skip to main content

editor_core_treesitter/
registry.rs

1use serde::Deserialize;
2use std::collections::BTreeMap;
3use std::path::{Path, PathBuf};
4
5/// Mapping from file extension (without `.`) to Tree-sitter `language_id`.
6///
7/// Example: `"rs" -> "rust"`.
8pub type TreeSitterExtensionMap = BTreeMap<String, String>;
9
10/// Mapping from Tree-sitter `language_id` (e.g. `"rust"`) to its on-disk configuration.
11pub type TreeSitterConfigMap = BTreeMap<String, TreeSitterConfig>;
12
13/// File-based Tree-sitter configuration for one language.
14///
15/// This is intentionally UI-agnostic: it describes *where* the editor can load a Tree-sitter WASM
16/// grammar and related query files.
17#[derive(Debug, Clone, PartialEq, Eq)]
18pub struct TreeSitterConfig {
19    /// WASM grammar module (`language.wasm`).
20    pub wasm_path: PathBuf,
21    /// Syntax highlighting query (`highlights.scm`).
22    pub highlights_path: PathBuf,
23    /// Optional folding query (`folds.scm`).
24    pub folds_path: Option<PathBuf>,
25    /// Optional indentation query (`indents.scm`).
26    pub indents_path: Option<PathBuf>,
27    /// Optional tags query (`tags.scm`).
28    pub tags_path: Option<PathBuf>,
29    /// Optional injections query (`injections.scm`).
30    pub injections_path: Option<PathBuf>,
31}
32
33impl TreeSitterConfig {
34    /// Create a config from explicit paths.
35    pub fn new(wasm_path: PathBuf, highlights_path: PathBuf) -> Self {
36        Self {
37            wasm_path,
38            highlights_path,
39            folds_path: None,
40            indents_path: None,
41            tags_path: None,
42            injections_path: None,
43        }
44    }
45
46    /// Load a config by scanning a `<language_id>/` directory for conventional filenames.
47    ///
48    /// Required:
49    /// - `language.wasm`
50    /// - `highlights.scm`
51    ///
52    /// Optional:
53    /// - `folds.scm`
54    /// - `indents.scm`
55    /// - `tags.scm`
56    /// - `injections.scm`
57    pub fn from_language_dir(dir: &Path) -> Option<Self> {
58        let wasm_path = dir.join("language.wasm");
59        let highlights_path = dir.join("highlights.scm");
60        if !wasm_path.is_file() || !highlights_path.is_file() {
61            return None;
62        }
63
64        let folds_path = {
65            let p = dir.join("folds.scm");
66            p.is_file().then_some(p)
67        };
68        let indents_path = {
69            let p = dir.join("indents.scm");
70            p.is_file().then_some(p)
71        };
72        let tags_path = {
73            let p = dir.join("tags.scm");
74            p.is_file().then_some(p)
75        };
76        let injections_path = {
77            let p = dir.join("injections.scm");
78            p.is_file().then_some(p)
79        };
80
81        Some(Self {
82            wasm_path,
83            highlights_path,
84            folds_path,
85            indents_path,
86            tags_path,
87            injections_path,
88        })
89    }
90}
91
92/// Combined Tree-sitter registry used by the UI layer and FFI boundary.
93#[derive(Debug, Default, Clone, PartialEq, Eq)]
94pub struct TreeSitterRegistry {
95    /// Mapping from extension → language id.
96    pub extension_map: TreeSitterExtensionMap,
97    /// Mapping from language id → file-based config.
98    pub languages: TreeSitterConfigMap,
99}
100
101/// Errors produced when parsing/validating a Tree-sitter registry.
102#[derive(Debug)]
103pub enum TreeSitterRegistryError {
104    /// JSON parsing or schema validation failed.
105    Json(String),
106    /// Unsupported registry schema version.
107    UnsupportedSchemaVersion(u32),
108    /// A required value was missing or invalid.
109    InvalidValue(String),
110    /// I/O error while scanning a directory.
111    Io(String),
112}
113
114impl std::fmt::Display for TreeSitterRegistryError {
115    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
116        match self {
117            Self::Json(msg) => write!(f, "tree-sitter registry json error: {msg}"),
118            Self::UnsupportedSchemaVersion(v) => {
119                write!(f, "tree-sitter registry unsupported schema_version: {v}")
120            }
121            Self::InvalidValue(msg) => write!(f, "tree-sitter registry invalid value: {msg}"),
122            Self::Io(msg) => write!(f, "tree-sitter registry io error: {msg}"),
123        }
124    }
125}
126
127impl std::error::Error for TreeSitterRegistryError {}
128
129#[derive(Debug, Deserialize)]
130struct RegistryJson {
131    schema_version: u32,
132    #[serde(default)]
133    root_dir: Option<String>,
134    #[serde(default)]
135    extension_map: TreeSitterExtensionMap,
136    #[serde(default)]
137    languages: BTreeMap<String, LanguageJson>,
138}
139
140#[derive(Debug, Deserialize)]
141struct LanguageJson {
142    wasm: String,
143    highlights: String,
144    #[serde(default)]
145    folds: Option<String>,
146    #[serde(default)]
147    indents: Option<String>,
148    #[serde(default)]
149    tags: Option<String>,
150    #[serde(default)]
151    injections: Option<String>,
152}
153
154impl TreeSitterRegistry {
155    /// Parse a schema-versioned Tree-sitter registry JSON string.
156    ///
157    /// The registry supports:
158    /// - absolute paths
159    /// - or relative paths resolved against `root_dir`
160    pub fn from_json_str(json: &str) -> Result<Self, TreeSitterRegistryError> {
161        let parsed: RegistryJson =
162            serde_json::from_str(json).map_err(|e| TreeSitterRegistryError::Json(e.to_string()))?;
163        if parsed.schema_version != 1 {
164            return Err(TreeSitterRegistryError::UnsupportedSchemaVersion(
165                parsed.schema_version,
166            ));
167        }
168
169        let root_dir = parsed.root_dir.as_deref().map(PathBuf::from);
170        let root_dir = root_dir.as_deref();
171
172        let mut languages = TreeSitterConfigMap::new();
173        for (language_id, lang) in parsed.languages {
174            if language_id.trim().is_empty() {
175                return Err(TreeSitterRegistryError::InvalidValue(
176                    "language_id must not be empty".to_string(),
177                ));
178            }
179
180            let wasm_path = resolve_path(root_dir, &lang.wasm)
181                .map_err(TreeSitterRegistryError::InvalidValue)?;
182            let highlights_path = resolve_path(root_dir, &lang.highlights)
183                .map_err(TreeSitterRegistryError::InvalidValue)?;
184            let folds_path = match lang.folds.as_deref() {
185                Some(p) if !p.trim().is_empty() => {
186                    Some(resolve_path(root_dir, p).map_err(TreeSitterRegistryError::InvalidValue)?)
187                }
188                _ => None,
189            };
190            let indents_path = match lang.indents.as_deref() {
191                Some(p) if !p.trim().is_empty() => {
192                    Some(resolve_path(root_dir, p).map_err(TreeSitterRegistryError::InvalidValue)?)
193                }
194                _ => None,
195            };
196            let tags_path = match lang.tags.as_deref() {
197                Some(p) if !p.trim().is_empty() => {
198                    Some(resolve_path(root_dir, p).map_err(TreeSitterRegistryError::InvalidValue)?)
199                }
200                _ => None,
201            };
202            let injections_path = match lang.injections.as_deref() {
203                Some(p) if !p.trim().is_empty() => {
204                    Some(resolve_path(root_dir, p).map_err(TreeSitterRegistryError::InvalidValue)?)
205                }
206                _ => None,
207            };
208
209            languages.insert(
210                language_id,
211                TreeSitterConfig {
212                    wasm_path,
213                    highlights_path,
214                    folds_path,
215                    indents_path,
216                    tags_path,
217                    injections_path,
218                },
219            );
220        }
221
222        Ok(Self {
223            extension_map: parsed.extension_map,
224            languages,
225        })
226    }
227
228    /// Return the configured language id for a file path, based on its extension.
229    pub fn language_id_for_path<'a>(&'a self, path: &Path) -> Option<&'a str> {
230        let ext = normalized_extension_for_path(path)?;
231        self.extension_map.get(&ext).map(|s| s.as_str())
232    }
233
234    /// Scan a `treesitter/` root directory and return a `language_id -> config` map.
235    ///
236    /// This is useful for hosts that follow the conventional on-disk layout:
237    ///
238    /// ```text
239    /// treesitter/
240    ///   rust/
241    ///     language.wasm
242    ///     highlights.scm
243    /// ```
244    pub fn scan_language_configs(
245        root_dir: &Path,
246    ) -> Result<TreeSitterConfigMap, TreeSitterRegistryError> {
247        let mut out = TreeSitterConfigMap::new();
248        let entries =
249            std::fs::read_dir(root_dir).map_err(|e| TreeSitterRegistryError::Io(e.to_string()))?;
250        for entry in entries {
251            let entry = entry.map_err(|e| TreeSitterRegistryError::Io(e.to_string()))?;
252            let path = entry.path();
253            if !path.is_dir() {
254                continue;
255            }
256
257            let Some(language_id) = entry.file_name().to_str().map(|s| s.to_string()) else {
258                continue;
259            };
260            if language_id.starts_with('.') {
261                continue;
262            }
263
264            let Some(cfg) = TreeSitterConfig::from_language_dir(&path) else {
265                continue;
266            };
267            out.insert(language_id, cfg);
268        }
269        Ok(out)
270    }
271}
272
273fn resolve_path(root_dir: Option<&Path>, raw: &str) -> Result<PathBuf, String> {
274    let raw = raw.trim();
275    if raw.is_empty() {
276        return Err("path must not be empty".to_string());
277    }
278    let path = PathBuf::from(raw);
279    if path.is_absolute() {
280        return Ok(path);
281    }
282    let Some(root_dir) = root_dir else {
283        return Err(format!("relative path without root_dir: {raw}"));
284    };
285    Ok(root_dir.join(path))
286}
287
288fn normalized_extension_for_path(path: &Path) -> Option<String> {
289    let ext = path.extension()?.to_str()?;
290    let ext = ext.trim().trim_start_matches('.');
291    if ext.is_empty() {
292        return None;
293    }
294    Some(ext.to_ascii_lowercase())
295}