Skip to main content

graphy_parser/
dynamic_loader.rs

1//! Dynamic tree-sitter grammar loading.
2//!
3//! Loads compiled tree-sitter grammars (.so/.dylib) from
4//! `~/.config/graphy/grammars/<lang>/` at runtime, similar
5//! to how Neovim loads grammars via `:TSInstall`.
6
7use std::borrow::Cow;
8use std::path::PathBuf;
9
10use graphy_core::Language;
11use tracing::{debug, warn};
12
13use crate::tags_registry::TagsLanguageConfig;
14
15#[cfg(target_os = "macos")]
16pub const LIB_EXT: &str = "dylib";
17#[cfg(target_os = "linux")]
18pub const LIB_EXT: &str = "so";
19#[cfg(target_os = "windows")]
20pub const LIB_EXT: &str = "dll";
21
22/// Metadata for a known tree-sitter grammar.
23pub struct GrammarInfo {
24    pub name: &'static str,
25    pub language: Language,
26    pub repo_url: &'static str,
27    pub ts_func_symbol: &'static str,
28    pub extensions: &'static [&'static str],
29    pub src_dir: &'static str,
30    pub has_cpp_scanner: bool,
31    /// Git ref (tag/commit) compatible with tree-sitter ABI 14 (tree-sitter 0.24).
32    /// Use `None` to clone HEAD (risky if the grammar moves to a newer ABI).
33    pub compatible_ref: Option<&'static str>,
34}
35
36// Pinned to commits compatible with tree-sitter 0.24 (ABI 14).
37pub const KNOWN_GRAMMARS: &[GrammarInfo] = &[
38    GrammarInfo {
39        name: "go",
40        language: Language::Go,
41        repo_url: "https://github.com/tree-sitter/tree-sitter-go",
42        ts_func_symbol: "tree_sitter_go",
43        extensions: &["go"],
44        src_dir: "src",
45        has_cpp_scanner: false,
46        compatible_ref: Some("3c3775faa968158a8b4ac190a7fda867fd5fb748"),
47    },
48    GrammarInfo {
49        name: "java",
50        language: Language::Java,
51        repo_url: "https://github.com/tree-sitter/tree-sitter-java",
52        ts_func_symbol: "tree_sitter_java",
53        extensions: &["java"],
54        src_dir: "src",
55        has_cpp_scanner: false,
56        compatible_ref: Some("94703d5a6bed02b98e438d7cad1136c01a60ba2c"),
57    },
58    GrammarInfo {
59        name: "php",
60        language: Language::Php,
61        repo_url: "https://github.com/tree-sitter/tree-sitter-php",
62        ts_func_symbol: "tree_sitter_php",
63        extensions: &["php"],
64        src_dir: "php/src",
65        has_cpp_scanner: false,
66        compatible_ref: Some("43aad2b9a98aa8e603ea0cf5bb630728a5591ad8"),
67    },
68    GrammarInfo {
69        name: "c",
70        language: Language::C,
71        repo_url: "https://github.com/tree-sitter/tree-sitter-c",
72        ts_func_symbol: "tree_sitter_c",
73        extensions: &["c", "h"],
74        src_dir: "src",
75        has_cpp_scanner: false,
76        compatible_ref: Some("362a8a41b265056592a0c3771664a21d23a71392"),
77    },
78    GrammarInfo {
79        name: "cpp",
80        language: Language::Cpp,
81        repo_url: "https://github.com/tree-sitter/tree-sitter-cpp",
82        ts_func_symbol: "tree_sitter_cpp",
83        extensions: &["cpp", "cc", "cxx", "hpp"],
84        src_dir: "src",
85        has_cpp_scanner: true,
86        compatible_ref: Some("f41e1a044c8a84ea9fa8577fdd2eab92ec96de02"),
87    },
88    GrammarInfo {
89        name: "c-sharp",
90        language: Language::CSharp,
91        repo_url: "https://github.com/tree-sitter/tree-sitter-c-sharp",
92        ts_func_symbol: "tree_sitter_c_sharp",
93        extensions: &["cs"],
94        src_dir: "src",
95        has_cpp_scanner: true,
96        compatible_ref: Some("362a8a41b265056592a0c3771664a21d23a71392"),
97    },
98    GrammarInfo {
99        name: "ruby",
100        language: Language::Ruby,
101        repo_url: "https://github.com/tree-sitter/tree-sitter-ruby",
102        ts_func_symbol: "tree_sitter_ruby",
103        extensions: &["rb"],
104        src_dir: "src",
105        has_cpp_scanner: true,
106        compatible_ref: Some("71bd32fb7607035768799732addba884a37a6210"),
107    },
108    GrammarInfo {
109        name: "kotlin",
110        language: Language::Kotlin,
111        repo_url: "https://github.com/fwcd/tree-sitter-kotlin",
112        ts_func_symbol: "tree_sitter_kotlin",
113        extensions: &["kt", "kts"],
114        src_dir: "src",
115        has_cpp_scanner: true,
116        compatible_ref: None,
117    },
118];
119
120/// Base directory for dynamic grammars: `~/.config/graphy/grammars/`
121pub fn grammars_dir() -> PathBuf {
122    dirs::config_dir()
123        .unwrap_or_else(|| {
124            let home = std::env::var("HOME").unwrap_or_else(|_| ".".to_string());
125            PathBuf::from(home).join(".config")
126        })
127        .join("graphy")
128        .join("grammars")
129}
130
131/// Directory for a specific language grammar.
132pub fn grammar_dir_for(name: &str) -> PathBuf {
133    grammars_dir().join(name)
134}
135
136/// Check if a dynamic grammar is installed.
137pub fn is_installed(name: &str) -> bool {
138    grammar_dir_for(name)
139        .join(format!("parser.{LIB_EXT}"))
140        .exists()
141}
142
143/// List all installed dynamic grammars.
144pub fn list_installed() -> Vec<String> {
145    let dir = grammars_dir();
146    if !dir.is_dir() {
147        return vec![];
148    }
149    let mut installed = Vec::new();
150    if let Ok(entries) = std::fs::read_dir(&dir) {
151        for entry in entries.flatten() {
152            let path = entry.path();
153            if path.is_dir() && path.join(format!("parser.{LIB_EXT}")).exists() {
154                if let Some(name) = path.file_name().and_then(|n| n.to_str()) {
155                    installed.push(name.to_string());
156                }
157            }
158        }
159    }
160    installed.sort();
161    installed
162}
163
164/// Look up a known grammar by Language enum.
165pub fn grammar_info_for_language(lang: Language) -> Option<&'static GrammarInfo> {
166    KNOWN_GRAMMARS.iter().find(|g| g.language == lang)
167}
168
169/// Look up a known grammar by name.
170pub fn grammar_info_by_name(name: &str) -> Option<&'static GrammarInfo> {
171    KNOWN_GRAMMARS.iter().find(|g| g.name == name)
172}
173
174/// Load a dynamic grammar at runtime.
175///
176/// Looks for `~/.config/graphy/grammars/<name>/parser.{so,dylib}` and
177/// loads it via `libloading`. The shared library must export a
178/// `tree_sitter_<name>()` function returning a tree_sitter Language.
179pub fn load_dynamic_grammar(lang: Language) -> Option<TagsLanguageConfig> {
180    let info = grammar_info_for_language(lang)?;
181    let dir = grammar_dir_for(info.name);
182    let lib_path = dir.join(format!("parser.{LIB_EXT}"));
183
184    if !lib_path.exists() {
185        debug!("No dynamic grammar for {:?} at {}", lang, lib_path.display());
186        return None;
187    }
188
189    // Load the shared library.
190    // SAFETY: We trust the .so was compiled from a tree-sitter grammar repo.
191    let lib = match unsafe { libloading::Library::new(&lib_path) } {
192        Ok(lib) => lib,
193        Err(e) => {
194            warn!("Failed to load grammar library {}: {e}", lib_path.display());
195            return None;
196        }
197    };
198
199    let ts_language = unsafe {
200        let func: libloading::Symbol<unsafe extern "C" fn() -> tree_sitter::Language> =
201            match lib.get(info.ts_func_symbol.as_bytes()) {
202                Ok(f) => f,
203                Err(e) => {
204                    warn!(
205                        "Symbol '{}' not found in {}: {e}",
206                        info.ts_func_symbol,
207                        lib_path.display()
208                    );
209                    return None;
210                }
211            };
212        func()
213    };
214
215    // Leak the library handle so it stays loaded for the process lifetime.
216    // Dropping it would invalidate the Language's function pointers.
217    std::mem::forget(lib);
218
219    let tags_query = load_tags_query(info.name, &dir);
220
221    Some(TagsLanguageConfig {
222        ts_language,
223        tags_query: Cow::Owned(tags_query),
224        language: lang,
225    })
226}
227
228/// Load tags.scm: prefer user's file in grammar dir, fall back to bundled.
229fn load_tags_query(name: &str, grammar_dir: &std::path::Path) -> String {
230    let custom_path = grammar_dir.join("tags.scm");
231    if custom_path.exists() {
232        if let Ok(content) = std::fs::read_to_string(&custom_path) {
233            debug!("Using custom tags.scm for {name}");
234            return content;
235        }
236    }
237    bundled_tags_query(name)
238        .unwrap_or("")
239        .to_string()
240}
241
242/// Bundled tags.scm queries (embedded at compile time as fallback).
243pub fn bundled_tags_query(name: &str) -> Option<&'static str> {
244    match name {
245        "go" => Some(include_str!("../tags/go.scm")),
246        "java" => Some(include_str!("../tags/java.scm")),
247        "php" => Some(include_str!("../tags/php.scm")),
248        "c" => Some(include_str!("../tags/c.scm")),
249        "cpp" => Some(include_str!("../tags/cpp.scm")),
250        "c-sharp" => Some(include_str!("../tags/csharp.scm")),
251        "ruby" => Some(include_str!("../tags/ruby.scm")),
252        "kotlin" => Some(include_str!("../tags/kotlin.scm")),
253        _ => None,
254    }
255}
256
257#[cfg(test)]
258mod tests {
259    use super::*;
260
261    #[test]
262    fn known_grammars_has_entries() {
263        assert!(KNOWN_GRAMMARS.len() >= 7);
264    }
265
266    #[test]
267    fn grammar_info_by_name_works() {
268        let go = grammar_info_by_name("go").unwrap();
269        assert_eq!(go.language, Language::Go);
270        assert_eq!(go.ts_func_symbol, "tree_sitter_go");
271    }
272
273    #[test]
274    fn grammar_info_for_language_works() {
275        let java = grammar_info_for_language(Language::Java).unwrap();
276        assert_eq!(java.name, "java");
277    }
278
279    #[test]
280    fn grammars_dir_is_valid() {
281        let dir = grammars_dir();
282        assert!(dir.to_string_lossy().contains("graphy"));
283        assert!(dir.to_string_lossy().contains("grammars"));
284    }
285
286    #[test]
287    fn bundled_tags_queries_exist() {
288        for name in &["go", "java", "php", "c", "cpp", "c-sharp", "ruby"] {
289            let query = bundled_tags_query(name);
290            assert!(query.is_some(), "Missing bundled tags.scm for {name}");
291            assert!(!query.unwrap().is_empty(), "Empty tags.scm for {name}");
292        }
293    }
294
295    #[test]
296    fn unknown_grammar_returns_none() {
297        assert!(grammar_info_by_name("brainfuck").is_none());
298        assert!(bundled_tags_query("brainfuck").is_none());
299    }
300
301    #[test]
302    fn all_known_grammars_have_consistent_metadata() {
303        // Every entry in KNOWN_GRAMMARS should have non-empty required fields,
304        // unique names, and extensions that don't overlap across grammars.
305        let mut seen_names = std::collections::HashSet::new();
306        let mut seen_extensions = std::collections::HashMap::new();
307
308        for info in KNOWN_GRAMMARS {
309            // Name must be non-empty and unique
310            assert!(!info.name.is_empty(), "Grammar name is empty");
311            assert!(seen_names.insert(info.name), "Duplicate grammar name: {}", info.name);
312
313            // Must have at least one file extension
314            assert!(!info.extensions.is_empty(), "Grammar {} has no extensions", info.name);
315
316            // Extensions should not overlap with other grammars
317            for ext in info.extensions {
318                if let Some(prev) = seen_extensions.insert(*ext, info.name) {
319                    panic!("Extension '.{ext}' claimed by both '{prev}' and '{}'", info.name);
320                }
321            }
322
323            // ts_func_symbol should start with "tree_sitter_"
324            assert!(
325                info.ts_func_symbol.starts_with("tree_sitter_"),
326                "Grammar {} has unexpected symbol: {}",
327                info.name, info.ts_func_symbol
328            );
329
330            // repo_url must be a valid-looking URL
331            assert!(
332                info.repo_url.starts_with("https://"),
333                "Grammar {} has invalid repo URL: {}",
334                info.name, info.repo_url
335            );
336
337            // Lookup by name and by language should be consistent
338            let by_name = grammar_info_by_name(info.name).unwrap();
339            assert_eq!(by_name.language, info.language);
340            let by_lang = grammar_info_for_language(info.language).unwrap();
341            assert_eq!(by_lang.name, info.name);
342        }
343    }
344}