topiary_config/
language.rs

1//! This module contains the `Language` struct, which represents a language configuration, and
2//! associated methods.
3
4#[cfg(not(target_arch = "wasm32"))]
5use anyhow::anyhow;
6#[cfg(not(target_arch = "wasm32"))]
7use gix::{
8    ObjectId,
9    interrupt::IS_INTERRUPTED,
10    progress::Discard,
11    remote::{self, Direction, fetch, fetch::refmap},
12    worktree::state::checkout,
13};
14use std::collections::HashSet;
15#[cfg(not(target_arch = "wasm32"))]
16use std::num::NonZero;
17#[cfg(not(target_arch = "wasm32"))]
18use std::path::PathBuf;
19
20use crate::error::TopiaryConfigResult;
21#[cfg(not(target_arch = "wasm32"))]
22use crate::error::{TopiaryConfigError, TopiaryConfigFetchingError};
23
24/// Language definitions, as far as the CLI and configuration are concerned, contain everything
25/// needed to configure formatting for that language.
26#[derive(Debug, serde::Deserialize, PartialEq, serde::Serialize, Clone)]
27pub struct Language {
28    /// The name of the language, used as a key when looking up information in the deserialised
29    /// configuration and to convert to the respective Tree-sitter grammar
30    pub name: String,
31
32    /// The configuration of the language, includes all properties that Topiary
33    /// needs to properly format the language
34    pub config: LanguageConfiguration,
35}
36
37#[derive(Debug, serde::Deserialize, PartialEq, serde::Serialize, Clone)]
38pub struct LanguageConfiguration {
39    /// A set of the filetype extensions associated with this language. This enables Topiary to
40    /// switch to the right language based on the input filename.
41    pub extensions: HashSet<String>,
42
43    /// The indentation string used for this language; defaults to "  " (i.e., two spaces). Any
44    /// string can be provided, but in most instances it will be some whitespace (e.g., "    ",
45    /// "\t", etc.)
46    pub indent: Option<String>,
47
48    /// The tree-sitter source of the language, contains all that is needed to pull and compile the tree-sitter grammar
49    pub grammar: Grammar,
50}
51
52#[derive(Debug, serde::Deserialize, PartialEq, serde::Serialize, Clone)]
53pub struct Grammar {
54    #[cfg(not(target_arch = "wasm32"))]
55    pub source: GrammarSource,
56    /// If symbol of the language in the compiled grammar. Usually this is
57    /// `tree_sitter_<LANGUAGE_NAME>`, but in rare cases it differs. For
58    /// instance our "tree-sitter-query" language, where the symbol is:
59    /// `tree_sitter_query` instead of `tree_sitter_tree_sitter_query`.
60    pub symbol: Option<String>,
61}
62
63#[derive(Debug, serde::Deserialize, PartialEq, serde::Serialize, Clone)]
64#[cfg(not(target_arch = "wasm32"))]
65pub enum GrammarSource {
66    #[serde(rename = "git")]
67    Git(GitSource),
68    #[serde(rename = "path")]
69    Path(PathBuf),
70}
71
72#[derive(Debug, serde::Deserialize, PartialEq, serde::Serialize, Clone)]
73#[cfg(not(target_arch = "wasm32"))]
74pub struct GitSource {
75    /// The URL of the git repository that contains the tree-sitter grammar.
76    pub git: String,
77    /// The revision of the git repository to use.
78    pub rev: String,
79    /// The sub-directory within the repository where the grammar is located. Defaults to the root of the repository
80    pub subdir: Option<String>,
81}
82
83impl Language {
84    pub fn new(name: String, config: LanguageConfiguration) -> Self {
85        Self { name, config }
86    }
87
88    pub fn indent(&self) -> Option<String> {
89        self.config.indent.clone()
90    }
91
92    #[cfg(not(target_arch = "wasm32"))]
93    #[allow(clippy::result_large_err)]
94    pub fn find_query_file(&self) -> TopiaryConfigResult<PathBuf> {
95        use crate::source::Source;
96
97        let basename = PathBuf::from(self.name.as_str()).with_extension("scm");
98
99        #[rustfmt::skip]
100        let potentials: [Option<PathBuf>; 5] = [
101            std::env::var("TOPIARY_LANGUAGE_DIR").map(PathBuf::from).ok(),
102            option_env!("TOPIARY_LANGUAGE_DIR").map(PathBuf::from),
103            Source::fetch_one(&None).queries_dir(),
104            Some(PathBuf::from("./topiary-queries/queries")),
105            Some(PathBuf::from("../topiary-queries/queries")),
106        ];
107
108        potentials
109            .into_iter()
110            .flatten()
111            .map(|path| path.join(&basename))
112            .find(|path| path.exists())
113            .ok_or_else(|| TopiaryConfigError::QueryFileNotFound(basename))
114    }
115
116    #[cfg(not(target_arch = "wasm32"))]
117    // Returns the library path, and ensures the parent directories exist.
118    pub fn library_path(&self) -> std::io::Result<PathBuf> {
119        match &self.config.grammar.source {
120            GrammarSource::Git(git_source) => {
121                let mut library_path = crate::project_dirs().cache_dir().to_path_buf();
122                library_path.push(self.name.clone());
123                std::fs::create_dir_all(&library_path)?;
124
125                // Set the output path as the revision of the grammar,
126                // with a platform-appropriate extension
127                library_path.push(git_source.rev.clone());
128                library_path.set_extension(std::env::consts::DLL_EXTENSION);
129
130                Ok(library_path)
131            }
132
133            GrammarSource::Path(path) => Ok(path.to_path_buf()),
134        }
135    }
136
137    #[cfg(not(target_arch = "wasm32"))]
138    // NOTE: Much of the following code is heavily inspired by the `helix-loader` crate with license MPL-2.0.
139    // To be safe, assume any and all of the following code is MLP-2.0 and copyrighted to the Helix project.
140    pub fn grammar(
141        &self,
142    ) -> Result<topiary_tree_sitter_facade::Language, TopiaryConfigFetchingError> {
143        let library_path = self.library_path()?;
144
145        // Ensure the compile exists
146        if !library_path.is_file() {
147            match &self.config.grammar.source {
148                GrammarSource::Git(git_source) => {
149                    git_source.fetch_and_compile(&self.name, library_path.clone())?
150                }
151                GrammarSource::Path(_) => {
152                    return Err(TopiaryConfigFetchingError::GrammarFileNotFound(
153                        library_path,
154                    ));
155                }
156            }
157        }
158
159        assert!(library_path.is_file());
160        log::debug!("Loading grammar from {}", library_path.display());
161
162        use libloading::{Library, Symbol};
163
164        let library = unsafe { Library::new(&library_path) }?;
165        let language_fn_name = if let Some(symbol_name) = self.config.grammar.symbol.clone() {
166            symbol_name
167        } else {
168            format!("tree_sitter_{}", self.name.replace('-', "_"))
169        };
170
171        let language = unsafe {
172            let language_fn: Symbol<unsafe extern "C" fn() -> *const ()> =
173                library.get(language_fn_name.as_bytes())?;
174            tree_sitter_language::LanguageFn::from_raw(*language_fn)
175        };
176        std::mem::forget(library);
177        Ok(topiary_tree_sitter_facade::Language::from(language))
178    }
179
180    #[cfg(target_arch = "wasm32")]
181    #[allow(clippy::result_large_err)]
182    pub async fn grammar(&self) -> TopiaryConfigResult<topiary_tree_sitter_facade::Language> {
183        let language_name = self.name.as_str();
184
185        let grammar_path = if language_name == "tree_sitter_query" {
186            "/playground/scripts/tree-sitter-query.wasm".to_string()
187        } else {
188            format!("/playground/scripts/tree-sitter-{language_name}.wasm")
189        };
190
191        Ok(
192            topiary_web_tree_sitter_sys::Language::load_path(&grammar_path)
193                .await
194                .map_err(|e| {
195                    let error: topiary_tree_sitter_facade::LanguageError = e.into();
196                    error
197                })?
198                .into(),
199        )
200    }
201}
202
203type Result<T, E = TopiaryConfigFetchingError> = std::result::Result<T, E>;
204
205trait GitResult<T> {
206    fn wrap_err(self) -> Result<T>;
207}
208
209impl<T, E: Into<anyhow::Error>> GitResult<T> for Result<T, E> {
210    fn wrap_err(self) -> Result<T> {
211        self.map_err(|e| TopiaryConfigFetchingError::Git(e.into()))
212    }
213}
214
215#[cfg(not(target_arch = "wasm32"))]
216impl GitSource {
217    fn fetch_and_compile(
218        &self,
219        name: &str,
220        library_path: PathBuf,
221    ) -> Result<(), TopiaryConfigFetchingError> {
222        log::info!("{name}: Language Grammar not found, attempting to fetch and compile it");
223        // Create a temporary directory to clone the repository to. We could
224        // cached the repositories, but the additional disk space is probably
225        // not worth the benefits gained by caching. The tempdir is deleted
226        // when dropped
227        let tmp_dir = tempfile::tempdir()?;
228
229        self.fetch_and_compile_with_dir(name, library_path, false, tmp_dir.keep())
230    }
231
232    /// This function is heavily inspired by the one used in Nickel:
233    /// https://github.com/tweag/nickel/blob/master/git/src/lib.rs
234    pub fn fetch_and_compile_with_dir(
235        &self,
236        name: &str,
237        library_path: PathBuf,
238        force: bool,
239        tmp_dir: PathBuf,
240    ) -> Result<(), TopiaryConfigFetchingError> {
241        if !force && library_path.is_file() {
242            log::info!("{name}: Built grammar already exists; nothing to do");
243            return Ok(());
244        }
245        let tmp_dir = tmp_dir.join(name);
246        std::fs::create_dir_all(&tmp_dir)?;
247
248        // Fetch the git directory somewhere temporary.
249        let git_tempdir = tempfile::tempdir().wrap_err()?;
250        let repo = gix::init(git_tempdir.path()).wrap_err()?;
251
252        let remote = repo
253            .remote_at(self.git.as_str())
254            .wrap_err()?
255            .with_fetch_tags(fetch::Tags::None)
256            .with_refspecs(Some(self.rev.as_str()), Direction::Fetch)
257            .wrap_err()?;
258
259        // This does similar credentials stuff to the git CLI (e.g. it looks for ssh
260        // keys if it's a fetch over ssh, or it tries to run `askpass` if it needs
261        // credentials for https). Maybe we want to have explicit credentials
262        // configuration instead of or in addition to the default?
263        let connection = remote.connect(Direction::Fetch).wrap_err()?;
264        let outcome = connection
265            .prepare_fetch(&mut Discard, remote::ref_map::Options::default())
266            .wrap_err()?
267            // For now, we always fetch shallow. Maybe for the index it's more efficient to
268            // keep a single repo around and update it? But that might be in another method.
269            .with_shallow(fetch::Shallow::DepthAtRemote(NonZero::new(1).unwrap()))
270            .receive(&mut Discard, &IS_INTERRUPTED)
271            .wrap_err()?;
272
273        if outcome.ref_map.mappings.len() > 1 {
274            return Err(anyhow!("we only asked for 1 ref; why did we get more?")).wrap_err();
275        }
276        if outcome.ref_map.mappings.is_empty() {
277            return Err(anyhow!("Ref not found: {:?} {:?}", self.git, self.rev,)).wrap_err();
278        }
279
280        let object_id = source_object_id(&outcome.ref_map.mappings[0].remote)?;
281        let object = repo.find_object(object_id).wrap_err()?;
282        let tree_id = object.peel_to_tree().wrap_err()?.id();
283        let mut index = repo.index_from_tree(&tree_id).wrap_err()?;
284
285        log::info!("{}: Checking out {} {}", name, self.git, self.rev);
286        checkout(
287            &mut index,
288            &tmp_dir,
289            repo.objects.clone(),
290            &Discard,
291            &Discard,
292            &IS_INTERRUPTED,
293            checkout::Options {
294                overwrite_existing: true,
295                ..Default::default()
296            },
297        )
298        .wrap_err()?;
299        index.write(Default::default()).wrap_err()?;
300
301        // Update the build path for grammars that are not defined at the repo root
302        let grammar_path = match self.subdir.clone() {
303            // Some grammars are in a subdirectory, go there
304            Some(subdir) => tmp_dir.join(subdir),
305            None => tmp_dir,
306        };
307
308        // Build grammar
309        log::info!("{name}: Building grammar");
310        let mut loader =
311            tree_sitter_loader::Loader::new().map_err(TopiaryConfigFetchingError::Build)?;
312        loader.debug_build(false);
313        loader.force_rebuild(true);
314        loader
315            .compile_parser_at_path(&grammar_path, library_path, &[])
316            .map_err(TopiaryConfigFetchingError::Build)?;
317
318        log::info!("{name}: Grammar successfully compiled");
319        Ok(())
320    }
321}
322
323fn source_object_id(source: &refmap::Source) -> Result<ObjectId> {
324    match source {
325        refmap::Source::ObjectId(id) => Ok(*id),
326        refmap::Source::Ref(r) => {
327            let (_name, id, peeled) = r.unpack();
328
329            Ok(peeled
330                .or(id)
331                .ok_or_else(|| anyhow!("unborn reference"))
332                .wrap_err()?
333                .to_owned())
334        }
335    }
336}