tree_sitter_stack_graphs/
loader.rs

1// -*- coding: utf-8 -*-
2// ------------------------------------------------------------------------------------------------
3// Copyright © 2021, stack-graphs authors.
4// Licensed under either of Apache License, Version 2.0, or MIT license, at your option.
5// Please see the LICENSE-APACHE or LICENSE-MIT files in this distribution for license details.
6// ------------------------------------------------------------------------------------------------
7
8//! Defines file loader for stack graph languages
9
10use ini::Ini;
11use itertools::Itertools;
12use once_cell::sync::Lazy;
13use regex::Regex;
14use stack_graphs::graph::StackGraph;
15use std::borrow::Cow;
16use std::collections::HashMap;
17use std::ffi::OsStr;
18use std::path::Path;
19use std::path::PathBuf;
20use std::sync::Arc;
21use thiserror::Error;
22use tree_sitter::Language;
23use tree_sitter_graph::ast::File as TsgFile;
24use tree_sitter_graph::Variables;
25use tree_sitter_loader::Config as TsConfig;
26use tree_sitter_loader::LanguageConfiguration as TSLanguageConfiguration;
27use tree_sitter_loader::Loader as TsLoader;
28
29use crate::CancellationFlag;
30use crate::FileAnalyzer;
31use crate::StackGraphLanguage;
32use crate::FILE_PATH_VAR;
33
34const BUILTINS_FILENAME: &str = "<builtins>";
35
36pub static DEFAULT_TSG_PATHS: Lazy<Vec<LoadPath>> =
37    Lazy::new(|| vec![LoadPath::Grammar("queries/stack-graphs".into())]);
38pub static DEFAULT_BUILTINS_PATHS: Lazy<Vec<LoadPath>> =
39    Lazy::new(|| vec![LoadPath::Grammar("queries/builtins".into())]);
40
41/// Data type that holds all information to recognize and analyze files for a language
42pub struct LanguageConfiguration {
43    pub language: Language,
44    pub scope: Option<String>,
45    pub content_regex: Option<Regex>,
46    pub file_types: Vec<String>,
47    pub sgl: StackGraphLanguage,
48    pub builtins: StackGraph,
49    pub special_files: FileAnalyzers,
50    /// Can be set to true if the stack graph rules ensure that there can be no similar
51    /// paths in a file, in which case it is safe to turn of similar path detection. If
52    /// incorrectly set to true, performance of path finding suffers from exponential
53    /// blow up.
54    pub no_similar_paths_in_file: bool,
55}
56
57impl LanguageConfiguration {
58    /// Build a language configuration from tsg and builtins sources. The tsg path
59    /// is kept for informational use only, see [`StackGraphLanguage::from_source`][].
60    pub fn from_sources<'a>(
61        language: Language,
62        scope: Option<String>,
63        content_regex: Option<Regex>,
64        file_types: Vec<String>,
65        tsg_path: PathBuf,
66        tsg_source: &'a str,
67        builtins_source: Option<(PathBuf, &'a str)>,
68        builtins_config: Option<&str>,
69        cancellation_flag: &dyn CancellationFlag,
70    ) -> Result<Self, LoadError<'a>> {
71        let sgl = StackGraphLanguage::from_source(language.clone(), tsg_path.clone(), tsg_source)
72            .map_err(|err| LoadError::SglParse {
73            inner: err,
74            tsg_path,
75            tsg: Cow::from(tsg_source),
76        })?;
77        let mut builtins = StackGraph::new();
78        if let Some((builtins_path, builtins_source)) = builtins_source {
79            let mut builtins_globals = Variables::new();
80
81            if let Some(builtins_config) = builtins_config {
82                Loader::load_globals_from_config_str(builtins_config, &mut builtins_globals)?;
83            }
84
85            builtins_globals
86                .add(FILE_PATH_VAR.into(), BUILTINS_FILENAME.into())
87                .unwrap_or_default();
88
89            let file = builtins.add_file(BUILTINS_FILENAME).unwrap();
90            sgl.build_stack_graph_into(
91                &mut builtins,
92                file,
93                builtins_source,
94                &builtins_globals,
95                cancellation_flag,
96            )
97            .map_err(|err| LoadError::Builtins {
98                inner: err,
99                source_path: builtins_path,
100                source: Cow::from(builtins_source),
101                tsg_path: sgl.tsg_path.clone(),
102                tsg: Cow::from(tsg_source),
103            })?;
104        }
105        Ok(LanguageConfiguration {
106            language,
107            scope,
108            content_regex,
109            file_types,
110            sgl,
111            builtins,
112            special_files: FileAnalyzers::new(),
113            no_similar_paths_in_file: false,
114        })
115    }
116
117    // Extracted from tree_sitter_loader::Loader::language_configuration_for_file_name
118    fn best_for_file<'a>(
119        languages: &'a Vec<LanguageConfiguration>,
120        path: &Path,
121        content: &mut dyn ContentProvider,
122    ) -> std::io::Result<Option<&'a LanguageConfiguration>> {
123        let mut best_score = -1isize;
124        let mut best = None;
125        for language in languages {
126            if let Some(score) =
127                matches_file(&language.file_types, &language.content_regex, path, content)?
128            {
129                if score > best_score {
130                    best_score = score;
131                    best = Some(language);
132                }
133            }
134        }
135        Ok(best)
136    }
137
138    pub fn matches_file(
139        &self,
140        path: &Path,
141        content: &mut dyn ContentProvider,
142    ) -> std::io::Result<bool> {
143        matches_file(&self.file_types, &self.content_regex, path, content).map(|l| l.is_some())
144    }
145}
146
147#[derive(Clone, Default)]
148pub struct FileAnalyzers {
149    file_analyzers: HashMap<String, Arc<dyn FileAnalyzer + Send + Sync>>,
150}
151
152impl FileAnalyzers {
153    pub fn new() -> Self {
154        FileAnalyzers {
155            file_analyzers: HashMap::new(),
156        }
157    }
158
159    pub fn with(
160        mut self,
161        file_name: String,
162        analyzer: impl FileAnalyzer + Send + Sync + 'static,
163    ) -> Self {
164        self.file_analyzers.insert(file_name, Arc::new(analyzer));
165        self
166    }
167
168    pub fn add(
169        &mut self,
170        file_name: String,
171        analyzer: impl FileAnalyzer + Send + Sync + 'static,
172    ) -> &mut Self {
173        self.file_analyzers.insert(file_name, Arc::new(analyzer));
174        self
175    }
176
177    pub fn get(&self, file_name: &str) -> Option<Arc<dyn FileAnalyzer + Send + Sync>> {
178        self.file_analyzers.get(file_name).cloned()
179    }
180}
181
182/// A load path specifies a file to load from, either as a regular path or relative to the grammar location.
183#[derive(Clone, Debug)]
184pub enum LoadPath {
185    Regular(PathBuf),
186    Grammar(PathBuf),
187}
188
189impl LoadPath {
190    fn get_for_grammar(&self, grammar_path: &Path) -> PathBuf {
191        match self {
192            Self::Regular(path) => path.clone(),
193            Self::Grammar(path) => grammar_path.join(path),
194        }
195    }
196}
197
198/// The loader is created from either a tree-sitter configuration or a list of search paths, and an
199/// optional scope and search paths for stack graphs definitions and builtins; or a list of language
200/// configurations.
201///
202/// The loader is called with a file path and optional file content and tries to find the language for
203/// that file. The loader will search for tree-sitter languages in the given search paths, or in current
204/// directory and the paths defined in the tree-sitter configuration. If a scope is provided, it will be
205/// used to restrict the discovered languages to those with a matching scope. If no languages were found
206/// at all, an error is raised. Otherwise, a language matching the file path and content is returned, if
207/// it exists among the discovered languages.
208///
209/// The paths for stack graphs definitions and builtins can be regular or relative to the grammar directory.
210/// Paths may omit file extensions, in which case any supported file extension will be tried. The first path
211/// that exists will be selected. It is considered an error if no stack graphs definitions is found. Builtins
212/// are always optional.
213///
214/// Previously loaded languages are cached in the loader, so subsequent loads are fast.
215pub struct Loader(LoaderImpl);
216
217enum LoaderImpl {
218    Paths(PathLoader),
219    Provided(LanguageConfigurationsLoader),
220}
221
222impl Loader {
223    pub fn from_paths(
224        paths: Vec<PathBuf>,
225        scope: Option<String>,
226        tsg_paths: Vec<LoadPath>,
227        builtins_paths: Vec<LoadPath>,
228    ) -> Result<Self, LoadError<'static>> {
229        Ok(Self(LoaderImpl::Paths(PathLoader {
230            loader: SupplementedTsLoader::new()?,
231            paths,
232            scope,
233            tsg_paths,
234            builtins_paths,
235            cache: Vec::new(),
236        })))
237    }
238
239    pub fn from_tree_sitter_configuration(
240        config: &TsConfig,
241        scope: Option<String>,
242        tsg_paths: Vec<LoadPath>,
243        builtins_paths: Vec<LoadPath>,
244    ) -> Result<Self, LoadError<'static>> {
245        Ok(Self(LoaderImpl::Paths(PathLoader {
246            loader: SupplementedTsLoader::new()?,
247            paths: PathLoader::config_paths(config)?,
248            scope,
249            tsg_paths,
250            builtins_paths,
251            cache: Vec::new(),
252        })))
253    }
254
255    pub fn from_language_configurations(
256        configurations: Vec<LanguageConfiguration>,
257        scope: Option<String>,
258    ) -> Result<Self, LoadError<'static>> {
259        let configurations = configurations
260            .into_iter()
261            .filter(|lc| scope.is_none() || lc.scope == scope)
262            .collect();
263        Ok(Self(LoaderImpl::Provided(LanguageConfigurationsLoader {
264            configurations,
265        })))
266    }
267
268    /// Load a Tree-sitter language for the given file. Loading is based on the loader configuration and the given file path.
269    /// Most users should use [`Self::load_for_file`], but this method can be useful if only the underlying Tree-sitter language
270    /// is necessary, as it will not attempt to load the TSG file.
271    pub fn load_tree_sitter_language_for_file(
272        &mut self,
273        path: &Path,
274        content: &mut dyn ContentProvider,
275    ) -> Result<Option<&tree_sitter::Language>, LoadError<'static>> {
276        match &mut self.0 {
277            LoaderImpl::Paths(loader) => loader.load_tree_sitter_language_for_file(path, content),
278            LoaderImpl::Provided(loader) => {
279                loader.load_tree_sitter_language_for_file(path, content)
280            }
281        }
282    }
283
284    /// Load a stack graph language for the given file. Loading is based on the loader configuration and the given file path.
285    pub fn load_for_file<'a>(
286        &'a mut self,
287        path: &Path,
288        content: &mut dyn ContentProvider,
289        cancellation_flag: &dyn CancellationFlag,
290    ) -> Result<FileLanguageConfigurations<'a>, LoadError<'static>> {
291        match &mut self.0 {
292            LoaderImpl::Paths(loader) => loader.load_for_file(path, content, cancellation_flag),
293            LoaderImpl::Provided(loader) => loader.load_for_file(path, content),
294        }
295    }
296
297    pub fn load_globals_from_config_path(
298        path: &Path,
299        globals: &mut Variables,
300    ) -> Result<(), LoadError<'static>> {
301        let conf = Ini::load_from_file(path)?;
302        Self::load_globals_from_config(&conf, globals)
303    }
304
305    pub fn load_globals_from_config_str(
306        config: &str,
307        globals: &mut Variables,
308    ) -> Result<(), LoadError<'static>> {
309        if config.is_empty() {
310            return Ok(());
311        }
312        let conf = Ini::load_from_str(config).map_err(ini::Error::Parse)?;
313        Self::load_globals_from_config(&conf, globals)
314    }
315
316    fn load_tsg<'a>(
317        language: Language,
318        tsg_source: Cow<'a, str>,
319    ) -> Result<TsgFile, LoadError<'a>> {
320        let tsg = TsgFile::from_str(language, &tsg_source).map_err(|err| LoadError::TsgParse {
321            inner: err,
322            tsg_path: PathBuf::from("<unknown tsg path>"),
323            tsg: Cow::from(tsg_source),
324        })?;
325        Ok(tsg)
326    }
327
328    fn load_builtins_into<'a>(
329        sgl: &StackGraphLanguage,
330        path: &Path,
331        source: Cow<'a, str>,
332        config: &str,
333        graph: &mut StackGraph,
334        cancellation_flag: &dyn CancellationFlag,
335    ) -> Result<(), LoadError<'a>> {
336        let file_name = path.to_string_lossy();
337        let file = graph.add_file(&file_name).unwrap();
338        let mut globals = Variables::new();
339
340        Self::load_globals_from_config_str(&config, &mut globals)?;
341
342        globals
343            .add(FILE_PATH_VAR.into(), BUILTINS_FILENAME.into())
344            .unwrap_or_default();
345
346        sgl.build_stack_graph_into(graph, file, &source, &globals, cancellation_flag)
347            .map_err(|err| LoadError::Builtins {
348                inner: err,
349                source_path: path.to_path_buf(),
350                source,
351                tsg_path: sgl.tsg_path.to_path_buf(),
352                tsg: sgl.tsg_source.clone(),
353            })?;
354        return Ok(());
355    }
356
357    fn load_globals_from_config(
358        conf: &Ini,
359        globals: &mut Variables,
360    ) -> Result<(), LoadError<'static>> {
361        if let Some(globals_section) = conf.section(Some("globals")) {
362            for (name, value) in globals_section.iter() {
363                globals.add(name.into(), value.into()).map_err(|_| {
364                    LoadError::Reader(
365                        format!("Duplicate global variable {} in config", name).into(),
366                    )
367                })?;
368            }
369        }
370        Ok(())
371    }
372}
373
374/// Struct holding the language configurations for a file.
375#[derive(Default)]
376pub struct FileLanguageConfigurations<'a> {
377    /// The file's primary language. The language configuration's `StackGraphLanguage` should be used to process the file.
378    pub primary: Option<&'a LanguageConfiguration>,
379    /// Any secondary languages, which have special file analyzers for the file.
380    pub secondary: Vec<(
381        &'a LanguageConfiguration,
382        Arc<dyn FileAnalyzer + Send + Sync>,
383    )>,
384}
385
386impl FileLanguageConfigurations<'_> {
387    pub fn has_some(&self) -> bool {
388        self.primary.is_some() || !self.secondary.is_empty()
389    }
390
391    pub fn no_similar_paths_in_file(&self) -> bool {
392        let mut no_similar_paths_in_file = true;
393        if let Some(lc) = &self.primary {
394            no_similar_paths_in_file &= lc.no_similar_paths_in_file;
395        }
396        for (lc, _) in &self.secondary {
397            no_similar_paths_in_file &= lc.no_similar_paths_in_file;
398        }
399        return no_similar_paths_in_file;
400    }
401}
402
403#[derive(Debug, Error)]
404pub enum LoadError<'a> {
405    #[error("{0}")]
406    Cancelled(&'static str),
407    #[error(transparent)]
408    Config(#[from] ini::Error),
409    #[error(transparent)]
410    Io(#[from] std::io::Error),
411    #[error("{inner}")]
412    SglParse {
413        #[source]
414        inner: crate::LanguageError,
415        tsg_path: PathBuf,
416        tsg: Cow<'a, str>,
417    },
418    #[error("No languages found {0}")]
419    NoLanguagesFound(String),
420    #[error("No TSG file found")]
421    NoTsgFound,
422    #[error(transparent)]
423    Reader(Box<dyn std::error::Error + Send + Sync>),
424    #[error("{inner}")]
425    Builtins {
426        #[source]
427        inner: crate::BuildError,
428        source_path: PathBuf,
429        source: Cow<'a, str>,
430        tsg_path: PathBuf,
431        tsg: Cow<'a, str>,
432    },
433    #[error("{inner}")]
434    TsgParse {
435        inner: tree_sitter_graph::ParseError,
436        tsg_path: PathBuf,
437        tsg: Cow<'a, str>,
438    },
439    #[error(transparent)]
440    TreeSitter(anyhow::Error),
441}
442
443impl LoadError<'_> {
444    pub fn display_pretty<'a>(&'a self) -> impl std::fmt::Display + 'a {
445        DisplayLoadErrorPretty { error: self }
446    }
447}
448
449struct DisplayLoadErrorPretty<'a> {
450    error: &'a LoadError<'a>,
451}
452
453impl std::fmt::Display for DisplayLoadErrorPretty<'_> {
454    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
455        match self.error {
456            LoadError::Builtins {
457                inner,
458                source_path,
459                source,
460                tsg_path,
461                tsg,
462            } => write!(
463                f,
464                "{}",
465                inner.display_pretty(source_path, source, tsg_path, tsg)
466            )?,
467            LoadError::SglParse {
468                inner,
469                tsg_path,
470                tsg,
471            } => write!(f, "{}", inner.display_pretty(tsg_path, tsg))?,
472            LoadError::TsgParse {
473                inner,
474                tsg_path,
475                tsg,
476            } => write!(f, "{}", inner.display_pretty(tsg_path, tsg))?,
477            err => writeln!(f, "{}", err)?,
478        }
479        Ok(())
480    }
481}
482
483// ------------------------------------------------------------------------------------------------
484// provided languages loader
485
486struct LanguageConfigurationsLoader {
487    configurations: Vec<LanguageConfiguration>,
488}
489
490impl LanguageConfigurationsLoader {
491    /// Load a Tree-sitter language for the given file. Loading is based on the loader configuration and the given file path.
492    /// Most users should use [`Self::load_for_file`], but this method can be useful if only the underlying Tree-sitter language
493    /// is necessary, as it will not attempt to load the TSG file.
494    pub fn load_tree_sitter_language_for_file(
495        &mut self,
496        path: &Path,
497        content: &mut dyn ContentProvider,
498    ) -> Result<Option<&tree_sitter::Language>, LoadError<'static>> {
499        for configuration in self.configurations.iter() {
500            if configuration.matches_file(path, content)? {
501                return Ok(Some(&configuration.language));
502            }
503        }
504        Ok(None)
505    }
506
507    /// Load a stack graph language for the given file. Loading is based on the loader configuration and the given file path.
508    pub fn load_for_file<'a>(
509        &'a mut self,
510        path: &Path,
511        content: &mut dyn ContentProvider,
512    ) -> Result<FileLanguageConfigurations<'a>, LoadError<'static>> {
513        let primary = LanguageConfiguration::best_for_file(&self.configurations, path, content)?;
514        let mut secondary = Vec::new();
515        for language in self.configurations.iter() {
516            if let Some(fa) = path
517                .file_name()
518                .and_then(|file_name| language.special_files.get(&file_name.to_string_lossy()))
519            {
520                secondary.push((language, fa));
521            }
522        }
523        Ok(FileLanguageConfigurations { primary, secondary })
524    }
525}
526
527// ------------------------------------------------------------------------------------------------
528// path based loader
529
530struct PathLoader {
531    loader: SupplementedTsLoader,
532    paths: Vec<PathBuf>,
533    scope: Option<String>,
534    tsg_paths: Vec<LoadPath>,
535    builtins_paths: Vec<LoadPath>,
536    cache: Vec<(Language, LanguageConfiguration)>,
537}
538
539impl PathLoader {
540    // Adopted from tree_sitter_loader::Loader::load
541    fn config_paths(config: &TsConfig) -> Result<Vec<PathBuf>, LoadError<'static>> {
542        if config.parser_directories.is_empty() {
543            eprintln!("Warning: You have not configured any parser directories!");
544            eprintln!("Please run `tree-sitter init-config` and edit the resulting");
545            eprintln!("configuration file to indicate where we should look for");
546            eprintln!("language grammars.");
547            eprintln!("");
548        }
549        let mut paths = Vec::new();
550        for parser_container_dir in &config.parser_directories {
551            if let Ok(entries) = std::fs::read_dir(parser_container_dir) {
552                for entry in entries {
553                    let entry = entry?;
554                    if let Some(parser_dir_name) = entry.file_name().to_str() {
555                        if parser_dir_name.starts_with("tree-sitter-") {
556                            paths.push(parser_container_dir.join(parser_dir_name));
557                        }
558                    }
559                }
560            }
561        }
562        Ok(paths)
563    }
564
565    pub fn load_tree_sitter_language_for_file(
566        &mut self,
567        path: &Path,
568        content: &mut dyn ContentProvider,
569    ) -> Result<Option<&tree_sitter::Language>, LoadError<'static>> {
570        if let Some(selected_language) = self.select_language_for_file(path, content)? {
571            return Ok(Some(&selected_language.language));
572        }
573        Ok(None)
574    }
575
576    pub fn load_for_file<'a>(
577        &'a mut self,
578        path: &Path,
579        content: &mut dyn ContentProvider,
580        cancellation_flag: &dyn CancellationFlag,
581    ) -> Result<FileLanguageConfigurations<'a>, LoadError<'static>> {
582        let selected_language = self.select_language_for_file(path, content)?;
583        let language = match selected_language {
584            Some(selected_language) => selected_language.clone(),
585            None => return Ok(FileLanguageConfigurations::default()),
586        };
587        // the borrow checker is a hard master...
588        let index = self.cache.iter().position(|e| &e.0 == &language.language);
589        let index = match index {
590            Some(index) => index,
591            None => {
592                let tsg = self.load_tsg_from_paths(&language)?;
593                let sgl = StackGraphLanguage::new(language.language.clone(), tsg);
594
595                let mut builtins = StackGraph::new();
596                self.load_builtins_from_paths_into(
597                    &language,
598                    &sgl,
599                    &mut builtins,
600                    cancellation_flag,
601                )?;
602
603                let lc = LanguageConfiguration {
604                    language: language.language.clone(),
605                    scope: language.scope,
606                    content_regex: language.content_regex,
607                    file_types: language.file_types,
608                    sgl,
609                    builtins,
610                    special_files: FileAnalyzers::new(),
611                    // always detect similar paths, we don't know the language configuration when loading from the file system
612                    no_similar_paths_in_file: false,
613                };
614                self.cache.push((language.language, lc));
615
616                self.cache.len() - 1
617            }
618        };
619        let lc = &self.cache[index].1;
620        Ok(FileLanguageConfigurations {
621            primary: Some(lc),
622            secondary: Vec::default(),
623        })
624    }
625
626    // Select language for the given file, considering paths and scope fields
627    fn select_language_for_file(
628        &mut self,
629        file_path: &Path,
630        file_content: &mut dyn ContentProvider,
631    ) -> Result<Option<&SupplementedLanguage>, LoadError<'static>> {
632        // The borrow checker is not smart enough to realize that the early returns
633        // ensure any references from the self.select_* call (which require a mutable
634        // borrow) do not outlive the match. Therefore, we use a raw self_ptr and unsafe
635        // dereferencing to make those calls.
636        let self_ptr = self as *mut Self;
637        let mut found_languages = false;
638        for path in &self.paths {
639            found_languages |= match unsafe { &mut *self_ptr }.select_language_for_file_from_path(
640                &path,
641                file_path,
642                file_content,
643            ) {
644                Ok(Some(language)) => return Ok(Some(language)),
645                Ok(None) => true,
646                Err(LoadError::NoLanguagesFound(_)) => false,
647                Err(err) => return Err(err),
648            };
649        }
650        if !found_languages {
651            return Err(LoadError::NoLanguagesFound(format!(
652                "in {}{}",
653                self.paths.iter().map(|p| p.display()).format(":"),
654                self.scope
655                    .as_ref()
656                    .map_or(String::default(), |s| format!(" for scope {}", s)),
657            )));
658        }
659        Ok(None)
660    }
661
662    // Select language from the given path for the given file, considering scope field
663    fn select_language_for_file_from_path(
664        &mut self,
665        language_path: &Path,
666        file_path: &Path,
667        file_content: &mut dyn ContentProvider,
668    ) -> Result<Option<&SupplementedLanguage>, LoadError> {
669        let scope = self.scope.as_deref();
670        let languages = self.loader.languages_at_path(language_path, scope)?;
671        if languages.is_empty() {
672            return Err(LoadError::NoLanguagesFound(format!(
673                "at {}{}",
674                language_path.display(),
675                scope.map_or(String::default(), |s| format!(" for scope {}", s)),
676            )));
677        }
678        if let Some(language) =
679            SupplementedLanguage::best_for_file(languages, file_path, file_content)?
680        {
681            return Ok(Some(language));
682        };
683        Ok(None)
684    }
685
686    // Load the TSG file for the given language and path
687    fn load_tsg_from_paths(
688        &self,
689        language: &SupplementedLanguage,
690    ) -> Result<TsgFile, LoadError<'static>> {
691        for tsg_path in &self.tsg_paths {
692            let mut tsg_path = tsg_path.get_for_grammar(&language.root_path);
693            if tsg_path.extension().is_none() {
694                tsg_path.set_extension("tsg");
695            }
696            if tsg_path.exists() {
697                let tsg_source = std::fs::read_to_string(tsg_path)?;
698                return Loader::load_tsg(language.language.clone(), Cow::from(tsg_source));
699            }
700        }
701        return Err(LoadError::NoTsgFound);
702    }
703
704    // Builtins are loaded from queries/builtins.EXT and an optional queries/builtins.cfg configuration.
705    // In the future, we may extend this to support builtins spread over multiple files queries/builtins/NAME.EXT
706    // and optional corresponding configuration files queries/builtins/NAME.cfg.
707    fn load_builtins_from_paths_into(
708        &self,
709        language: &SupplementedLanguage,
710        sgl: &StackGraphLanguage,
711        graph: &mut StackGraph,
712        cancellation_flag: &dyn CancellationFlag,
713    ) -> Result<(), LoadError<'static>> {
714        for builtins_path in &self.builtins_paths {
715            let mut builtins_path = builtins_path.get_for_grammar(&language.root_path);
716            if builtins_path.exists() && !builtins_path.is_dir() {
717                return Self::load_builtins_from_path_into(
718                    sgl,
719                    &builtins_path,
720                    graph,
721                    cancellation_flag,
722                );
723            }
724            for extension in &language.file_types {
725                builtins_path.set_extension(extension);
726                if builtins_path.exists() && !builtins_path.is_dir() {
727                    return Self::load_builtins_from_path_into(
728                        sgl,
729                        &builtins_path,
730                        graph,
731                        cancellation_flag,
732                    );
733                }
734            }
735        }
736        Ok(())
737    }
738
739    fn load_builtins_from_path_into(
740        sgl: &StackGraphLanguage,
741        builtins_path: &Path,
742        graph: &mut StackGraph,
743        cancellation_flag: &dyn CancellationFlag,
744    ) -> Result<(), LoadError<'static>> {
745        let source = std::fs::read_to_string(builtins_path)?;
746        let mut config_path = builtins_path.to_path_buf();
747        config_path.set_extension("cfg");
748        let config = if config_path.exists() {
749            std::fs::read_to_string(builtins_path)?
750        } else {
751            "".into()
752        };
753        Loader::load_builtins_into(
754            sgl,
755            builtins_path,
756            Cow::from(source),
757            &config,
758            graph,
759            cancellation_flag,
760        )
761    }
762}
763
764// ------------------------------------------------------------------------------------------------
765// tree_sitter_loader supplements
766
767// Wraps a tree_sitter_loader::Loader
768struct SupplementedTsLoader(TsLoader, HashMap<PathBuf, Vec<SupplementedLanguage>>);
769
770impl SupplementedTsLoader {
771    pub fn new() -> Result<Self, LoadError<'static>> {
772        let loader = TsLoader::new().map_err(LoadError::TreeSitter)?;
773        Ok(Self(loader, HashMap::new()))
774    }
775
776    pub fn languages_at_path(
777        &mut self,
778        path: &Path,
779        scope: Option<&str>,
780    ) -> Result<Vec<&SupplementedLanguage>, LoadError> {
781        if !self.1.contains_key(path) {
782            let languages = self
783                .0
784                .languages_at_path(&path)
785                .map_err(LoadError::TreeSitter)?;
786            let configurations = self
787                .0
788                .find_language_configurations_at_path(&path, true)
789                .map_err(LoadError::TreeSitter)?;
790            let languages = languages
791                .into_iter()
792                .map(|(l, _)| l)
793                .zip(configurations.into_iter())
794                .map(SupplementedLanguage::from)
795                .filter(|language| scope.map_or(true, |scope| language.matches_scope(scope)))
796                .collect::<Vec<_>>();
797            self.1.insert(path.to_path_buf(), languages);
798        }
799        Ok(self.1[path].iter().map(|l| l).collect())
800    }
801}
802
803#[derive(Clone, Debug)]
804struct SupplementedLanguage {
805    pub language: Language,
806    pub scope: Option<String>,
807    pub content_regex: Option<Regex>,
808    pub file_types: Vec<String>,
809    pub root_path: PathBuf,
810}
811
812impl SupplementedLanguage {
813    pub fn matches_scope(&self, scope: &str) -> bool {
814        self.scope.as_ref().map_or(false, |s| s == scope)
815    }
816
817    // Extracted from tree_sitter_loader::Loader::language_configuration_for_file_name
818    pub fn matches_file(
819        &self,
820        path: &Path,
821        content: &mut dyn ContentProvider,
822    ) -> std::io::Result<Option<isize>> {
823        matches_file(&self.file_types, &self.content_regex, path, content)
824    }
825
826    // Extracted from tree_sitter_loader::Loader::language_configuration_for_file_name
827    pub fn best_for_file<'a>(
828        languages: Vec<&'a SupplementedLanguage>,
829        path: &Path,
830        content: &mut dyn ContentProvider,
831    ) -> std::io::Result<Option<&'a SupplementedLanguage>> {
832        let mut best_score = -1isize;
833        let mut best = None;
834        for language in languages {
835            if let Some(score) = language.matches_file(path, content)? {
836                if score > best_score {
837                    best_score = score;
838                    best = Some(language);
839                }
840            }
841        }
842        Ok(best)
843    }
844}
845
846impl From<(Language, &TSLanguageConfiguration<'_>)> for SupplementedLanguage {
847    fn from((language, config): (Language, &TSLanguageConfiguration)) -> Self {
848        Self {
849            scope: config.scope.clone(),
850            content_regex: config.content_regex.clone(),
851            file_types: config.file_types.clone(),
852            root_path: config.root_path.clone(),
853            language,
854        }
855    }
856}
857
858// Extracted from tree_sitter_loader::Loader::language_configuration_for_file_name
859pub fn matches_file(
860    file_types: &Vec<String>,
861    content_regex: &Option<Regex>,
862    path: &Path,
863    content: &mut dyn ContentProvider,
864) -> std::io::Result<Option<isize>> {
865    // Check path extension
866    if !path
867        .extension()
868        .and_then(OsStr::to_str)
869        .map_or(false, |ext| file_types.iter().any(|ft| ft == ext))
870    {
871        return Ok(None);
872    }
873
874    // Apply content regex
875    let content = content.get(path)?;
876    if let (Some(file_content), Some(content_regex)) = (content, &content_regex) {
877        // If the language configuration has a content regex, assign
878        // a score based on the length of the first match.
879        if let Some(mat) = content_regex.find(&file_content) {
880            let score = (mat.end() - mat.start()) as isize;
881            return Ok(Some(score));
882        } else {
883            return Ok(None);
884        }
885    }
886
887    Ok(Some(0isize))
888}
889
890pub trait ContentProvider {
891    fn get(&mut self, path: &Path) -> std::io::Result<Option<&str>>;
892}
893
894/// FileReader reads files from the filesystem and caches the most recently read file.
895pub struct FileReader {
896    cache: Option<(PathBuf, String)>,
897}
898
899impl FileReader {
900    pub fn new() -> Self {
901        Self { cache: None }
902    }
903
904    pub fn get(&mut self, path: &Path) -> std::io::Result<&str> {
905        if self.cache.as_ref().map_or(true, |(p, _)| p != path) {
906            let content = std::fs::read_to_string(path)?;
907            self.cache = Some((path.to_path_buf(), content));
908        }
909        Ok(&self.cache.as_ref().unwrap().1)
910    }
911}
912
913impl ContentProvider for FileReader {
914    fn get(&mut self, path: &Path) -> std::io::Result<Option<&str>> {
915        self.get(path).map(Some)
916    }
917}
918
919impl ContentProvider for Option<&str> {
920    fn get(&mut self, _path: &Path) -> std::io::Result<Option<&str>> {
921        Ok(self.clone())
922    }
923}