syntect_assets/
assets.rs

1use std::ffi::OsStr;
2use std::fs;
3use std::path::Path;
4
5use once_cell::unsync::OnceCell;
6
7use syntect::highlighting::Theme;
8use syntect::parsing::{SyntaxReference, SyntaxSet};
9
10use crate::syntax_mapping::ignored_suffixes::IgnoredSuffixes;
11use crate::syntax_mapping::{MappingTarget, SyntaxMapping};
12
13use lazy_theme_set::LazyThemeSet;
14
15use serialized_syntax_set::*;
16
17use crate::error::*;
18
19pub(crate) mod assets_metadata;
20mod lazy_theme_set;
21mod serialized_syntax_set;
22
23#[derive(Debug)]
24pub struct HighlightingAssets {
25    syntax_set_cell: OnceCell<SyntaxSet>,
26    serialized_syntax_set: SerializedSyntaxSet,
27
28    theme_set: LazyThemeSet,
29    fallback_theme: Option<&'static str>,
30}
31
32#[derive(Debug)]
33pub struct SyntaxReferenceInSet<'a> {
34    pub syntax: &'a SyntaxReference,
35    pub syntax_set: &'a SyntaxSet,
36}
37
38/// Lazy-loaded syntaxes are already compressed, and we don't want to compress
39/// already compressed data.
40pub(crate) const COMPRESS_SYNTAXES: bool = false;
41
42/// We don't want to compress our [LazyThemeSet] since the lazy-loaded themes
43/// within it are already compressed, and compressing another time just makes
44/// performance suffer
45pub(crate) const COMPRESS_THEMES: bool = false;
46
47/// Compress for size of ~40 kB instead of ~200 kB without much difference in
48/// performance due to lazy-loading
49pub(crate) const COMPRESS_LAZY_THEMES: bool = true;
50
51/// Compress for size of ~10 kB instead of ~120 kB
52pub(crate) const COMPRESS_ACKNOWLEDGEMENTS: bool = true;
53
54impl HighlightingAssets {
55    fn new(serialized_syntax_set: SerializedSyntaxSet, theme_set: LazyThemeSet) -> Self {
56        HighlightingAssets {
57            syntax_set_cell: OnceCell::new(),
58            serialized_syntax_set,
59            theme_set,
60            fallback_theme: None,
61        }
62    }
63
64    /// The default theme.
65    ///
66    /// ### Windows and Linux
67    ///
68    /// Windows and most Linux distributions has a dark terminal theme by
69    /// default. On these platforms, this function always returns a theme that
70    /// looks good on a dark background.
71    ///
72    /// ### macOS
73    ///
74    /// On macOS the default terminal background is light, but it is common that
75    /// Dark Mode is active, which makes the terminal background dark. On this
76    /// platform, the default theme depends on
77    /// ```bash
78    /// defaults read -globalDomain AppleInterfaceStyle
79    /// ```
80    /// To avoid the overhead of the check on macOS, simply specify a theme
81    /// explicitly via `--theme`, `BAT_THEME`, or `~/.config/syntect-assets`.
82    ///
83    /// See <https://github.com/sharkdp/bat/issues/1746> and
84    /// <https://github.com/sharkdp/bat/issues/1928> for more context.
85    pub fn default_theme() -> &'static str {
86        {
87            Self::default_dark_theme()
88        }
89    }
90
91    /**
92     * The default theme that looks good on a dark background.
93     */
94    fn default_dark_theme() -> &'static str {
95        "Monokai Extended"
96    }
97
98    /**
99     * The default theme that looks good on a light background.
100     */
101    #[cfg(target_os = "macos")]
102    fn default_light_theme() -> &'static str {
103        "Monokai Extended Light"
104    }
105
106    pub fn from_cache(cache_path: &Path) -> Result<Self> {
107        Ok(HighlightingAssets::new(
108            SerializedSyntaxSet::FromFile(cache_path.join("syntaxes.bin")),
109            asset_from_cache(&cache_path.join("themes.bin"), "theme set", COMPRESS_THEMES)?,
110        ))
111    }
112
113    pub fn from_binary() -> Self {
114        HighlightingAssets::new(
115            SerializedSyntaxSet::FromBinary(get_serialized_integrated_syntaxset()),
116            get_integrated_themeset(),
117        )
118    }
119
120    pub fn set_fallback_theme(&mut self, theme: &'static str) {
121        self.fallback_theme = Some(theme);
122    }
123
124    /// Return the collection of syntect syntax definitions.
125    pub fn get_syntax_set(&self) -> Result<&SyntaxSet> {
126        self.syntax_set_cell
127            .get_or_try_init(|| self.serialized_syntax_set.deserialize())
128    }
129
130    pub fn get_syntaxes(&self) -> Result<&[SyntaxReference]> {
131        Ok(self.get_syntax_set()?.syntaxes())
132    }
133
134    fn get_theme_set(&self) -> &LazyThemeSet {
135        &self.theme_set
136    }
137
138    pub fn themes(&self) -> impl Iterator<Item = &str> {
139        self.get_theme_set().themes()
140    }
141
142    /// Detect the syntax based on, in order:
143    ///  1. Syntax mappings with [MappingTarget::MapTo] and [MappingTarget::MapToUnknown]
144    ///     (e.g. `/etc/profile` -> `Bourne Again Shell (bash)`)
145    ///  2. The file name (e.g. `Dockerfile`)
146    ///  3. Syntax mappings with [MappingTarget::MapExtensionToUnknown]
147    ///     (e.g. `*.conf`)
148    ///  4. The file name extension (e.g. `.rs`)
149    ///
150    /// When detecting syntax based on syntax mappings, the full path is taken
151    /// into account. When detecting syntax based on file name, no regard is
152    /// taken to the path of the file. Only the file name itself matters. When
153    /// detecting syntax based on file name extension, only the file name
154    /// extension itself matters.
155    ///
156    /// Returns [Error::UndetectedSyntax] if it was not possible detect syntax
157    /// based on path/file name/extension (or if the path was mapped to
158    /// [MappingTarget::MapToUnknown] or [MappingTarget::MapExtensionToUnknown]).
159    /// In this case it is appropriate to fall back to other methods to detect
160    /// syntax. Such as using the contents of the first line of the file.
161    ///
162    /// Returns [Error::UnknownSyntax] if a syntax mapping exist, but the mapped
163    /// syntax does not exist.
164    pub fn get_syntax_for_path(
165        &self,
166        path: impl AsRef<Path>,
167        mapping: &SyntaxMapping,
168    ) -> Result<SyntaxReferenceInSet> {
169        let path = path.as_ref();
170
171        let syntax_match = mapping.get_syntax_for(path);
172
173        if let Some(MappingTarget::MapToUnknown) = syntax_match {
174            return Err(crate::error::Error::UndetectedSyntax(path.to_string_lossy().into()));
175        }
176
177        if let Some(MappingTarget::MapTo(syntax_name)) = syntax_match {
178            return self
179                .find_syntax_by_name(syntax_name)?
180                .ok_or_else(|| crate::error::Error::UnknownSyntax(syntax_name.to_owned()));
181        }
182
183        let file_name = path.file_name().unwrap_or_default();
184
185        match (
186            self.get_syntax_for_file_name(file_name, &mapping.ignored_suffixes)?,
187            syntax_match,
188        ) {
189            (Some(syntax), _) => Ok(syntax),
190
191            (_, Some(MappingTarget::MapExtensionToUnknown)) => {
192                Err(crate::error::Error::UndetectedSyntax(path.to_string_lossy().into()))
193            }
194
195            _ => self
196                .get_syntax_for_file_extension(file_name, &mapping.ignored_suffixes)?
197                .ok_or_else(|| crate::error::Error::UndetectedSyntax(path.to_string_lossy().into())),
198        }
199    }
200
201    /// Look up a syntect theme by name.
202    pub fn get_theme(&self, theme: &str) -> &Theme {
203        match self.get_theme_set().get(theme) {
204            Some(theme) => theme,
205            None => {
206                if theme == "ansi-light" || theme == "ansi-dark" {
207                    log::warn!("Theme '{}' is deprecated, using 'ansi' instead.", theme);
208                    return self.get_theme("ansi");
209                }
210                if !theme.is_empty() {
211                    log::warn!("Unknown theme '{}', using default.", theme)
212                }
213                self.get_theme_set()
214                    .get(self.fallback_theme.unwrap_or_else(Self::default_theme))
215                    .expect("something is very wrong if the default theme is missing")
216            }
217        }
218    }
219
220
221    pub(crate) fn find_syntax_by_name(
222        &self,
223        syntax_name: &str,
224    ) -> Result<Option<SyntaxReferenceInSet>> {
225        let syntax_set = self.get_syntax_set()?;
226        Ok(syntax_set
227            .find_syntax_by_name(syntax_name)
228            .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
229    }
230
231    fn find_syntax_by_extension(&self, e: Option<&OsStr>) -> Result<Option<SyntaxReferenceInSet>> {
232        let syntax_set = self.get_syntax_set()?;
233        let extension = e.and_then(|x| x.to_str()).unwrap_or_default();
234        Ok(syntax_set
235            .find_syntax_by_extension(extension)
236            .map(|syntax| SyntaxReferenceInSet { syntax, syntax_set }))
237    }
238
239    fn get_syntax_for_file_name(
240        &self,
241        file_name: &OsStr,
242        ignored_suffixes: &IgnoredSuffixes,
243    ) -> Result<Option<SyntaxReferenceInSet>> {
244        let mut syntax = self.find_syntax_by_extension(Some(file_name))?;
245        if syntax.is_none() {
246            syntax =
247                ignored_suffixes.try_with_stripped_suffix(file_name, |stripped_file_name| {
248                    // Note: recursion
249                    self.get_syntax_for_file_name(stripped_file_name, ignored_suffixes)
250                })?;
251        }
252        Ok(syntax)
253    }
254
255    fn get_syntax_for_file_extension(
256        &self,
257        file_name: &OsStr,
258        ignored_suffixes: &IgnoredSuffixes,
259    ) -> Result<Option<SyntaxReferenceInSet>> {
260        let mut syntax = self.find_syntax_by_extension(Path::new(file_name).extension())?;
261        if syntax.is_none() {
262            syntax =
263                ignored_suffixes.try_with_stripped_suffix(file_name, |stripped_file_name| {
264                    // Note: recursion
265                    self.get_syntax_for_file_extension(stripped_file_name, ignored_suffixes)
266                })?;
267        }
268        Ok(syntax)
269    }
270
271}
272
273pub(crate) fn get_serialized_integrated_syntaxset() -> &'static [u8] {
274    include_bytes!("../assets/syntaxes.bin")
275}
276
277pub(crate) fn get_integrated_themeset() -> LazyThemeSet {
278    from_binary(include_bytes!("../assets/themes.bin"), COMPRESS_THEMES)
279}
280
281pub fn get_acknowledgements() -> String {
282    from_binary(
283        include_bytes!("../assets/acknowledgements.bin"),
284        COMPRESS_ACKNOWLEDGEMENTS,
285    )
286}
287
288pub(crate) fn from_binary<T: serde::de::DeserializeOwned>(v: &[u8], compressed: bool) -> T {
289    asset_from_contents(v, "n/a", compressed)
290        .expect("data integrated in binary is never faulty, but make sure `compressed` is in sync!")
291}
292
293fn asset_from_contents<T: serde::de::DeserializeOwned>(
294    contents: &[u8],
295    description: &str,
296    compressed: bool,
297) -> Result<T> {
298    if compressed {
299        bincode::deserialize_from(flate2::read::ZlibDecoder::new(contents))
300    } else {
301        bincode::deserialize_from(contents)
302    }
303    .map_err(|_| format!("Could not parse {}", description).into())
304}
305
306fn asset_from_cache<T: serde::de::DeserializeOwned>(
307    path: &Path,
308    description: &str,
309    compressed: bool,
310) -> Result<T> {
311    let contents = fs::read(path).map_err(|_| {
312        format!(
313            "Could not load cached {} '{}'",
314            description,
315            path.to_string_lossy()
316        )
317    })?;
318    asset_from_contents(&contents[..], description, compressed)
319        .map_err(|_| format!("Could not parse cached {}", description).into())
320}