topiary_config/
language.rs

1//! This module contains the `Language` struct, which represents a language configuration, and
2//! associated methods.
3
4#[cfg(not(target_arch = "wasm32"))]
5use anyhow::anyhow;
6#[cfg(not(target_arch = "wasm32"))]
7use gix::{
8    interrupt::IS_INTERRUPTED,
9    progress::Discard,
10    remote::{self, fetch, fetch::refmap, Direction},
11    worktree::state::checkout,
12    ObjectId,
13};
14use std::collections::HashSet;
15#[cfg(not(target_arch = "wasm32"))]
16use std::num::NonZero;
17#[cfg(not(target_arch = "wasm32"))]
18use std::path::PathBuf;
19
20use crate::error::TopiaryConfigResult;
21#[cfg(not(target_arch = "wasm32"))]
22use crate::error::{TopiaryConfigError, TopiaryConfigFetchingError};
23
24/// Language definitions, as far as the CLI and configuration are concerned, contain everything
25/// needed to configure formatting for that language.
26#[derive(Debug, serde::Deserialize, PartialEq, serde::Serialize, Clone)]
27pub struct Language {
28    /// The name of the language, used as a key when looking up information in the deserialised
29    /// configuration and to convert to the respective Tree-sitter grammar
30    pub name: String,
31
32    /// The configuration of the language, includes all properties that Topiary
33    /// needs to properly format the language
34    pub config: LanguageConfiguration,
35}
36
37#[derive(Debug, serde::Deserialize, PartialEq, serde::Serialize, Clone)]
38pub struct LanguageConfiguration {
39    /// A set of the filetype extensions associated with this language. This enables Topiary to
40    /// switch to the right language based on the input filename.
41    pub extensions: HashSet<String>,
42
43    /// The indentation string used for this language; defaults to "  " (i.e., two spaces). Any
44    /// string can be provided, but in most instances it will be some whitespace (e.g., "    ",
45    /// "\t", etc.)
46    pub indent: Option<String>,
47
48    /// The tree-sitter source of the language, contains all that is needed to pull and compile the tree-sitter grammar
49    pub grammar: Grammar,
50}
51
52#[derive(Debug, serde::Deserialize, PartialEq, serde::Serialize, Clone)]
53pub struct Grammar {
54    #[cfg(not(target_arch = "wasm32"))]
55    pub source: GrammarSource,
56    /// If symbol of the language in the compiled grammar. Usually this is
57    /// `tree_sitter_<LANGUAGE_NAME>`, but in rare cases it differs. For
58    /// instance our "tree-sitter-query" language, where the symbol is:
59    /// `tree_sitter_query` instead of `tree_sitter_tree_sitter_query`.
60    pub symbol: Option<String>,
61}
62
63#[derive(Debug, serde::Deserialize, PartialEq, serde::Serialize, Clone)]
64#[cfg(not(target_arch = "wasm32"))]
65pub enum GrammarSource {
66    #[serde(rename = "git")]
67    Git(GitSource),
68    #[serde(rename = "path")]
69    Path(PathBuf),
70}
71
72#[derive(Debug, serde::Deserialize, PartialEq, serde::Serialize, Clone)]
73#[cfg(not(target_arch = "wasm32"))]
74pub struct GitSource {
75    /// The URL of the git repository that contains the tree-sitter grammar.
76    pub git: String,
77    /// The revision of the git repository to use.
78    pub rev: String,
79    /// The sub-directory within the repository where the grammar is located. Defaults to the root of the repository
80    pub subdir: Option<String>,
81}
82
83impl Language {
84    pub fn new(name: String, config: LanguageConfiguration) -> Self {
85        Self { name, config }
86    }
87
88    #[cfg(not(target_arch = "wasm32"))]
89    pub fn find_query_file(&self) -> TopiaryConfigResult<PathBuf> {
90        let basename = PathBuf::from(self.name.as_str()).with_extension("scm");
91
92        #[rustfmt::skip]
93        let potentials: [Option<PathBuf>; 4] = [
94            std::env::var("TOPIARY_LANGUAGE_DIR").map(PathBuf::from).ok(),
95            option_env!("TOPIARY_LANGUAGE_DIR").map(PathBuf::from),
96            Some(PathBuf::from("./topiary-queries/queries")),
97            Some(PathBuf::from("../topiary-queries/queries")),
98        ];
99
100        potentials
101            .into_iter()
102            .flatten()
103            .map(|path| path.join(&basename))
104            .find(|path| path.exists())
105            .ok_or_else(|| TopiaryConfigError::QueryFileNotFound(basename))
106    }
107
108    #[cfg(not(target_arch = "wasm32"))]
109    // Returns the library path, and ensures the parent directories exist.
110    pub fn library_path(&self) -> std::io::Result<PathBuf> {
111        match &self.config.grammar.source {
112            GrammarSource::Git(git_source) => {
113                let mut library_path = crate::project_dirs().cache_dir().to_path_buf();
114                library_path.push(self.name.clone());
115                std::fs::create_dir_all(&library_path)?;
116
117                // Set the output path as the revision of the grammar,
118                // with a platform-appropriate extension
119                library_path.push(git_source.rev.clone());
120                library_path.set_extension(std::env::consts::DLL_EXTENSION);
121
122                Ok(library_path)
123            }
124
125            GrammarSource::Path(path) => Ok(path.to_path_buf()),
126        }
127    }
128
129    #[cfg(not(target_arch = "wasm32"))]
130    // NOTE: Much of the following code is heavily inspired by the `helix-loader` crate with license MPL-2.0.
131    // To be safe, assume any and all of the following code is MLP-2.0 and copyrighted to the Helix project.
132    pub fn grammar(
133        &self,
134    ) -> Result<topiary_tree_sitter_facade::Language, TopiaryConfigFetchingError> {
135        let library_path = self.library_path()?;
136
137        // Ensure the comile exists
138        if !library_path.is_file() {
139            match &self.config.grammar.source {
140                GrammarSource::Git(git_source) => {
141                    git_source.fetch_and_compile(&self.name, library_path.clone())?
142                }
143                GrammarSource::Path(_) => {
144                    return Err(TopiaryConfigFetchingError::GrammarFileNotFound(
145                        library_path,
146                    ))
147                }
148            }
149        }
150
151        assert!(library_path.is_file());
152        log::debug!("Loading grammar from {}", library_path.to_string_lossy());
153
154        use libloading::{Library, Symbol};
155
156        let library = unsafe { Library::new(&library_path) }?;
157        let language_fn_name = if let Some(symbol_name) = self.config.grammar.symbol.clone() {
158            symbol_name
159        } else {
160            format!("tree_sitter_{}", self.name.replace('-', "_"))
161        };
162
163        let language = unsafe {
164            let language_fn: Symbol<unsafe extern "C" fn() -> *const ()> =
165                library.get(language_fn_name.as_bytes())?;
166            tree_sitter_language::LanguageFn::from_raw(*language_fn)
167        };
168        std::mem::forget(library);
169        Ok(topiary_tree_sitter_facade::Language::from(language))
170    }
171
172    #[cfg(target_arch = "wasm32")]
173    pub async fn grammar(&self) -> TopiaryConfigResult<topiary_tree_sitter_facade::Language> {
174        let language_name = self.name.as_str();
175
176        let grammar_path = if language_name == "tree_sitter_query" {
177            "/playground/scripts/tree-sitter-query.wasm".to_string()
178        } else {
179            format!("/playground/scripts/tree-sitter-{language_name}.wasm")
180        };
181
182        Ok(
183            topiary_web_tree_sitter_sys::Language::load_path(&grammar_path)
184                .await
185                .map_err(|e| {
186                    let error: topiary_tree_sitter_facade::LanguageError = e.into();
187                    error
188                })?
189                .into(),
190        )
191    }
192}
193
194type Result<T, E = TopiaryConfigFetchingError> = std::result::Result<T, E>;
195
196trait GitResult<T> {
197    fn wrap_err(self) -> Result<T>;
198}
199
200impl<T, E: Into<anyhow::Error>> GitResult<T> for Result<T, E> {
201    fn wrap_err(self) -> Result<T> {
202        self.map_err(|e| TopiaryConfigFetchingError::Git(e.into()))
203    }
204}
205
206#[cfg(not(target_arch = "wasm32"))]
207impl GitSource {
208    fn fetch_and_compile(
209        &self,
210        name: &str,
211        library_path: PathBuf,
212    ) -> Result<(), TopiaryConfigFetchingError> {
213        log::info!(
214            "{}: Language Grammar not found, attempting to fetch and compile it",
215            name
216        );
217        // Create a temporary directory to clone the repository to. We could
218        // cached the repositories, but the additional disk space is probably
219        // not worth the benefits gained by caching. The tempdir is deleted
220        // when dropped
221        let tmp_dir = tempfile::tempdir()?;
222
223        self.fetch_and_compile_with_dir(name, library_path, false, tmp_dir.keep())
224    }
225
226    /// This function is heavily inspired by the one used in Nickel:
227    /// https://github.com/tweag/nickel/blob/master/git/src/lib.rs
228    pub fn fetch_and_compile_with_dir(
229        &self,
230        name: &str,
231        library_path: PathBuf,
232        force: bool,
233        tmp_dir: PathBuf,
234    ) -> Result<(), TopiaryConfigFetchingError> {
235        if !force && library_path.is_file() {
236            log::info!("{}: Built grammar already exists; nothing to do", name);
237            return Ok(());
238        }
239        let tmp_dir = tmp_dir.join(name);
240        std::fs::create_dir_all(&tmp_dir)?;
241
242        // Fetch the git directory somewhere temporary.
243        let git_tempdir = tempfile::tempdir().wrap_err()?;
244        let repo = gix::init(git_tempdir.path()).wrap_err()?;
245
246        let remote = repo
247            .remote_at(self.git.as_str())
248            .wrap_err()?
249            .with_fetch_tags(fetch::Tags::None)
250            .with_refspecs(Some(self.rev.as_str()), Direction::Fetch)
251            .wrap_err()?;
252
253        // This does similar credentials stuff to the git CLI (e.g. it looks for ssh
254        // keys if it's a fetch over ssh, or it tries to run `askpass` if it needs
255        // credentials for https). Maybe we want to have explicit credentials
256        // configuration instead of or in addition to the default?
257        let connection = remote.connect(Direction::Fetch).wrap_err()?;
258        let outcome = connection
259            .prepare_fetch(&mut Discard, remote::ref_map::Options::default())
260            .wrap_err()?
261            // For now, we always fetch shallow. Maybe for the index it's more efficient to
262            // keep a single repo around and update it? But that might be in another method.
263            .with_shallow(fetch::Shallow::DepthAtRemote(NonZero::new(1).unwrap()))
264            .receive(&mut Discard, &IS_INTERRUPTED)
265            .wrap_err()?;
266
267        if outcome.ref_map.mappings.len() > 1 {
268            return Err(anyhow!("we only asked for 1 ref; why did we get more?")).wrap_err();
269        }
270        if outcome.ref_map.mappings.is_empty() {
271            return Err(anyhow!("Ref not found: {:?} {:?}", self.git, self.rev,)).wrap_err();
272        }
273
274        let object_id = source_object_id(&outcome.ref_map.mappings[0].remote)?;
275        let object = repo.find_object(object_id).wrap_err()?;
276        let tree_id = object.peel_to_tree().wrap_err()?.id();
277        let mut index = repo.index_from_tree(&tree_id).wrap_err()?;
278
279        log::info!("{}: Checking out {} {}", name, self.git, self.rev);
280        checkout(
281            &mut index,
282            &tmp_dir,
283            repo.objects.clone(),
284            &Discard,
285            &Discard,
286            &IS_INTERRUPTED,
287            checkout::Options {
288                overwrite_existing: true,
289                ..Default::default()
290            },
291        )
292        .wrap_err()?;
293        index.write(Default::default()).wrap_err()?;
294
295        // Update the build path for grammars that are not defined at the repo root
296        let grammar_path = match self.subdir.clone() {
297            // Some grammars are in a subdirectory, go there
298            Some(subdir) => tmp_dir.join(subdir),
299            None => tmp_dir,
300        };
301
302        // Build grammar
303        log::info!("{name}: Building grammar");
304        let mut loader =
305            tree_sitter_loader::Loader::new().map_err(TopiaryConfigFetchingError::Build)?;
306        loader.debug_build(false);
307        loader.force_rebuild(true);
308        loader
309            .compile_parser_at_path(&grammar_path, library_path, &[])
310            .map_err(TopiaryConfigFetchingError::Build)?;
311
312        log::info!("{name}: Grammar successfully compiled");
313        Ok(())
314    }
315}
316
317fn source_object_id(source: &refmap::Source) -> Result<ObjectId> {
318    match source {
319        refmap::Source::ObjectId(id) => Ok(*id),
320        refmap::Source::Ref(r) => {
321            let (_name, id, peeled) = r.unpack();
322
323            Ok(peeled
324                .or(id)
325                .ok_or_else(|| anyhow!("unborn reference"))
326                .wrap_err()?
327                .to_owned())
328        }
329    }
330}