1#[cfg(feature = "static-grammar-libs")]
7include!(concat!(env!("OUT_DIR"), "/generated_grammar.rs"));
8
9#[cfg(feature = "static-grammar-libs")]
10use lazy_static::lazy_static;
11
12#[cfg(feature = "static-grammar-libs")]
13lazy_static! {
14 pub static ref SUPPORTED_LANGUAGES: Vec<&'static str> = {
18 let mut keys: Vec<&'static str> = LANGUAGES.keys().copied().collect();
19 keys.sort_unstable();
20 keys
21 };
22}
23
24#[cfg(not(feature = "static-grammar-libs"))]
25use phf::phf_map;
26
27#[cfg(not(feature = "static-grammar-libs"))]
28use tree_sitter::Language;
29
30use log::{debug, error, info};
31use logging_timer::time;
32use serde::{Deserialize, Serialize};
33use std::{
34 collections::HashMap,
35 fs, io,
36 path::{Path, PathBuf},
37};
38use thiserror::Error;
39use tree_sitter::{Parser, Tree, LANGUAGE_VERSION, MIN_COMPATIBLE_LANGUAGE_VERSION};
40
41static FILE_EXTS: phf::Map<&'static str, &'static str> = phf_map! {
45 "hs" => "haskell",
46 "rs" => "rust",
47 "go" => "go",
48 "c" => "c",
49 "cc" => "cpp",
50 "cpp" => "cpp",
51 "cs" => "c_sharp",
52 "java" => "java",
53 "py" => "python",
54 "css" => "css",
55 "sh" => "bash",
56 "bash" => "bash",
57 "jl" => "julia",
58 "ml" => "ocaml",
59 "rb" => "ruby",
60 "scala" => "scala",
61 "sc" => "scala",
62 "swift" => "swift",
63 "php" => "php",
64 "json" => "json",
65 "hcl" => "hcl",
66 "ts" => "typescript",
67 "tsx" => "tsx",
68 "js" => "typescript",
69 "jsx" => "tsx",
70 "hpp" => "cpp",
71 "tpp" => "tpp",
72 "h" => "c",
73 "tf" => "hcl",
74 "md" => "markdown",
75};
76
77#[derive(Error, Debug)]
79pub enum LoadingError {
80 #[cfg(feature = "static-grammar-libs")]
81 #[error("The program was not compiled with support for {0}")]
82 StaticNotCompiled(String),
83
84 #[error("This program was not compiled with support for any grammars")]
85 NoGrammars,
86
87 #[error("Unsupported extension: {0}")]
88 UnsupportedExt(String),
89
90 #[error("Did not find a valid file extension from filename {0}")]
91 NoFileExt(String),
92
93 #[error("tree-sitter had an error")]
94 LanguageError(#[from] tree_sitter::LanguageError),
95
96 #[error("could not parse {0} with tree-sitter")]
97 TSParseFailure(PathBuf),
98
99 #[error("Some IO error was encountered")]
100 IoError(#[from] io::Error),
101
102 #[error("Unable to dynamically load grammar")]
103 LibloadingError(#[from] libloading::Error),
104
105 #[error("Attempted to load a tree-sitter grammar with incompatible language ABI version: {0} (supported range: {1} - {2})")]
106 AbiOutOfRange(usize, usize, usize),
107}
108
109type StringMap = HashMap<String, String>;
110
111#[derive(Debug, Eq, PartialEq, Serialize, Deserialize, Clone, Default)]
113#[serde(rename_all = "kebab-case")]
114pub struct GrammarConfig {
115 pub dylib_overrides: Option<StringMap>,
120
121 pub file_associations: Option<StringMap>,
128}
129
130#[cfg(feature = "static-grammar-libs")]
134fn generate_language_static(lang: &str) -> Result<Language, LoadingError> {
135 info!("Using tree-sitter parser for language {}", lang);
136 match LANGUAGES.get(lang) {
137 Some(grammar_fn) => Ok(unsafe { grammar_fn() }),
138 None => Err(LoadingError::StaticNotCompiled(lang.to_string())),
139 }
140}
141
142#[must_use]
154pub fn tree_sitter_constructor_symbol_name(lang: &str) -> String {
155 format!("tree_sitter_{}", lang.replace('-', "_"))
156}
157
158#[cfg(feature = "dynamic-grammar-libs")]
164fn lib_name_from_lang(lang: &str) -> String {
165 let extension = if cfg!(target_os = "macos") {
166 "dylib"
167 } else if cfg!(any(target_os = "linux", target_os = "netbsd")) {
168 "so"
169 } else if cfg!(target_os = "windows") {
170 "dll"
171 } else {
172 panic!("Dynamic libraries are not supported for this platform.");
173 };
174 format!("libtree-sitter-{}.{}", lang.replace('_', "-"), extension)
175}
176
177pub fn construct_ts_lang_from_shared_lib(
204 language_name: &str,
205 parser_path: &Path,
206) -> Result<Language, LoadingError> {
207 info!(
208 "Loading dynamic library for language '{}' path '{}'",
209 language_name,
210 parser_path.to_string_lossy(),
211 );
212 let constructor_symbol_name = tree_sitter_constructor_symbol_name(language_name);
213 debug!(
214 "Using '{}' as symbol name for parser constructor method",
215 constructor_symbol_name
216 );
217 let grammar = unsafe {
219 let shared_library = Box::new(libloading::Library::new(parser_path.as_os_str())?);
223 let static_shared_library = Box::leak(shared_library);
224 let constructor = static_shared_library.get::<libloading::Symbol<
225 unsafe extern "C" fn() -> Language,
226 >>(constructor_symbol_name.as_bytes())?;
227 constructor()
228 };
229 Ok(grammar)
230}
231
232#[cfg(feature = "dynamic-grammar-libs")]
234fn generate_language_dynamic(
235 lang: &str,
236 overrides: Option<&StringMap>,
237) -> Result<Language, LoadingError> {
238 let default_fname = lib_name_from_lang(lang);
239
240 let lib_fname = if let Some(d) = overrides {
241 debug!("Overriding dynamic library name because of user config");
242 d.get(lang).unwrap_or(&default_fname)
243 } else {
244 &default_fname
245 };
246 let language_path = PathBuf::from(lib_fname);
247 construct_ts_lang_from_shared_lib(lang, &language_path)
248}
249
250#[allow(clippy::vec_init_then_push)]
256#[allow(unused)]
258pub fn generate_language(lang: &str, config: &GrammarConfig) -> Result<Language, LoadingError> {
259 let mut grammar_candidates = Vec::new();
261
262 #[cfg(feature = "dynamic-grammar-libs")]
264 if config.dylib_overrides.is_some() {
265 grammar_candidates.push(generate_language_dynamic(
266 lang,
267 config.dylib_overrides.as_ref(),
268 ));
269 }
270
271 #[cfg(feature = "static-grammar-libs")]
274 grammar_candidates.push(generate_language_static(lang));
275
276 #[cfg(feature = "dynamic-grammar-libs")]
277 if config.dylib_overrides.is_none() {
278 grammar_candidates.push(generate_language_dynamic(
279 lang,
280 config.dylib_overrides.as_ref(),
281 ));
282 }
283
284 let last_cand_idx = grammar_candidates.len() - 1;
286
287 for (i, candidate_result) in grammar_candidates.into_iter().enumerate() {
288 let is_last_cand = i == last_cand_idx;
289
290 match candidate_result {
291 Ok(grammar) => {
292 info!("Succeeded loading grammar for {}", lang);
293 ts_language_abi_checked(&grammar)?;
294 return Ok(grammar);
295 }
296 Err(e) => {
297 debug!("Failed to load candidate grammar for {}: {}", lang, &e);
298 if is_last_cand {
301 error!("Failed to load all candidate grammars for {}", lang);
302 return Err(e);
303 }
304 }
305 };
306 }
307 error!("No grammars were loaded at all");
308 Err(LoadingError::NoGrammars)
309}
310
311#[must_use]
317pub fn resolve_language_str<'a>(
318 ext: &str,
319 overrides: Option<&'a HashMap<String, String>>,
320) -> Option<&'a str> {
321 let lang_from_override = {
322 if let Some(overrides) = overrides {
323 overrides.get(ext)
324 } else {
325 None
326 }
327 };
328 let lang_from_defaults = FILE_EXTS.get(ext);
329
330 if let Some(lang) = lang_from_override {
331 info!(
332 "Deduced language \"{}\" from extension \"{}\" provided from user mappings",
333 lang, ext
334 );
335 Some(lang)
336 } else if let Some(lang) = lang_from_defaults {
337 info!(
338 "Deduced language \"{}\" from extension \"{}\" from default mappings",
339 lang, ext
340 );
341 Some(lang)
342 } else {
343 error!(
344 "Was not able to find a language string for extension {}",
345 ext
346 );
347 None
348 }
349}
350
351#[deprecated(
355 since = "0.8.1",
356 note = "You should use lang_name_from_file_ext instead."
357)]
358pub fn language_from_ext(
359 ext: &str,
360 grammar_config: &GrammarConfig,
361) -> Result<Language, LoadingError> {
362 let language_str_cand = resolve_language_str(ext, grammar_config.file_associations.as_ref());
363
364 if let Some(language_str) = language_str_cand {
365 generate_language(language_str, grammar_config)
366 } else {
367 Err(LoadingError::UnsupportedExt(ext.to_string()))
368 }
369}
370
371pub fn lang_name_from_file_ext<'cfg>(
400 ext: &str,
401 grammar_config: &'cfg GrammarConfig,
402) -> Result<&'cfg str, LoadingError> {
403 let language_str_cand = resolve_language_str(ext, grammar_config.file_associations.as_ref());
404 match language_str_cand {
405 Some(s) => Ok(s),
406 None => Err(LoadingError::UnsupportedExt(ext.to_string())),
407 }
408}
409
410pub fn ts_language_abi_checked(ts_language: &Language) -> Result<(), LoadingError> {
418 let loaded_ts_version = ts_language.abi_version();
419 let is_abi_compatible =
420 (MIN_COMPATIBLE_LANGUAGE_VERSION..=LANGUAGE_VERSION).contains(&loaded_ts_version);
421 if !is_abi_compatible {
422 return Err(LoadingError::AbiOutOfRange(
423 loaded_ts_version,
424 MIN_COMPATIBLE_LANGUAGE_VERSION,
425 LANGUAGE_VERSION,
426 ));
427 }
428 Ok(())
429}
430
431#[time("info", "parse::{}")]
436pub fn parse_file(
437 p: &Path,
438 language: Option<&str>,
439 config: &GrammarConfig,
440) -> Result<Tree, LoadingError> {
441 let resolved_language = match language {
444 Some(lang) => Ok(lang),
445 None => {
446 if let Some(ext) = p.extension() {
447 lang_name_from_file_ext(&ext.to_string_lossy(), config)
448 } else {
449 Err(LoadingError::NoFileExt(p.to_string_lossy().to_string()))
450 }
451 }
452 }?;
453 let mut parser = Parser::new();
454 let ts_lang = generate_language(resolved_language, config)?;
455 parser.set_language(&ts_lang)?;
456 let text = fs::read_to_string(p)?;
457 match parser.parse(&text, None) {
458 Some(ast) => {
459 debug!("Parsed AST");
460 Ok(ast)
461 }
462 None => Err(LoadingError::TSParseFailure(p.to_owned())),
463 }
464}
465
466#[cfg(test)]
467mod tests {
468 use super::*;
469
470 #[cfg(feature = "static-grammar-libs")]
473 #[test]
474 fn static_load_parsers() {
475 let mut failures = Vec::new();
478
479 for (&name, lang) in &LANGUAGES {
480 let mut parser = tree_sitter::Parser::new();
481 let result = unsafe {
482 let ts_lang = lang();
483 parser.set_language(&ts_lang)
484 };
485
486 if let Err(e) = result {
487 failures.push((name, e));
488 }
489 }
490
491 assert!(failures.is_empty(), "{failures:#?}");
492 }
493
494 #[cfg(feature = "dynamic-grammar-libs")]
495 #[test]
496 #[ignore] fn dynamic_load_parsers() {
498 let languages = vec![
499 "rust", "cpp", "python", "bash", "ocaml", "go", "ruby", "java", "c_sharp", "css",
500 "php", "json", "tsx", "hcl",
501 ];
502 let mut failures = Vec::new();
503
504 for &name in &languages {
505 if generate_language_dynamic(name, None).is_err() {
506 failures.push(name);
507 }
508 }
509
510 assert!(failures.is_empty(), "{:#?}", failures);
511 }
512
513 #[cfg(feature = "static-grammar-libs")]
514 #[test]
515 fn test_static_grammar_tree_sitter_abi_compatibility() -> Result<(), LoadingError> {
516 for (_, language_ctor) in &LANGUAGES {
517 unsafe {
518 let language = language_ctor();
519 ts_language_abi_checked(&language)?;
520 }
521 }
522 Ok(())
523 }
524}