syntect_no_panic/parsing/
syntax_set.rs

1use super::scope::*;
2use super::syntax_definition::*;
3use super::ParsingError;
4
5#[cfg(feature = "metadata")]
6use super::metadata::{LoadMetadata, Metadata, RawMetadataEntry};
7
8#[cfg(feature = "yaml-load")]
9use super::super::LoadingError;
10
11use std::collections::{BTreeSet, HashMap, HashSet};
12use std::fs::File;
13use std::io::{BufRead, BufReader};
14use std::mem;
15use std::path::Path;
16
17use super::regex::Regex;
18use crate::parsing::syntax_definition::ContextId;
19use once_cell::sync::OnceCell;
20use serde_derive::{Deserialize, Serialize};
21
22/// A syntax set holds multiple syntaxes that have been linked together.
23///
24/// Use a [`SyntaxSetBuilder`] to load syntax definitions and build a syntax set.
25///
26/// After building, the syntax set is immutable and can no longer be modified, but you can convert
27/// it back into a builder by using the [`into_builder`] method.
28///
29/// [`SyntaxSetBuilder`]: struct.SyntaxSetBuilder.html
30/// [`into_builder`]: #method.into_builder
31#[derive(Debug, Serialize, Deserialize)]
32pub struct SyntaxSet {
33    syntaxes: Vec<SyntaxReference>,
34    /// Stores the syntax index for every path that was loaded
35    path_syntaxes: Vec<(String, usize)>,
36
37    #[serde(skip_serializing, skip_deserializing, default = "OnceCell::new")]
38    first_line_cache: OnceCell<FirstLineCache>,
39    /// Metadata, e.g. indent and commenting information.
40    ///
41    /// NOTE: if serializing, you should handle metadata manually; that is, you should serialize and
42    /// deserialize it separately. See `examples/gendata.rs` for an example.
43    #[cfg(feature = "metadata")]
44    #[serde(skip, default)]
45    pub(crate) metadata: Metadata,
46}
47
48/// A linked version of a [`SyntaxDefinition`] that is only useful as part of the
49/// [`SyntaxSet`] that contains it. See docs for [`SyntaxSetBuilder::build`] for
50/// more info.
51#[derive(Clone, Debug, Serialize, Deserialize)]
52pub struct SyntaxReference {
53    pub name: String,
54    pub file_extensions: Vec<String>,
55    pub scope: Scope,
56    pub first_line_match: Option<String>,
57    pub hidden: bool,
58    #[serde(serialize_with = "ordered_map")]
59    pub variables: HashMap<String, String>,
60    #[serde(skip)]
61    pub(crate) lazy_contexts: OnceCell<LazyContexts>,
62    pub(crate) serialized_lazy_contexts: Vec<u8>,
63}
64
65/// The lazy-loaded parts of a [`SyntaxReference`].
66#[derive(Clone, Debug, Serialize, Deserialize)]
67pub(crate) struct LazyContexts {
68    #[serde(serialize_with = "ordered_map")]
69    pub(crate) context_ids: HashMap<String, ContextId>,
70    pub(crate) contexts: Vec<Context>,
71}
72
73/// A syntax set builder is used for loading syntax definitions from the file
74/// system or by adding [`SyntaxDefinition`] objects.
75///
76/// Once all the syntaxes have been added, call [`build`] to turn the builder into
77/// a [`SyntaxSet`] that can be used for parsing or highlighting.
78///
79/// [`SyntaxDefinition`]: syntax_definition/struct.SyntaxDefinition.html
80/// [`build`]: #method.build
81/// [`SyntaxSet`]: struct.SyntaxSet.html
82#[derive(Clone, Default)]
83pub struct SyntaxSetBuilder {
84    syntaxes: Vec<SyntaxDefinition>,
85    path_syntaxes: Vec<(String, usize)>,
86    #[cfg(feature = "metadata")]
87    raw_metadata: LoadMetadata,
88
89    /// If this `SyntaxSetBuilder` is created with `SyntaxSet::into_builder`
90    /// from a `SyntaxSet` that already had metadata, we keep that metadata,
91    /// merging it with newly loaded metadata.
92    #[cfg(feature = "metadata")]
93    existing_metadata: Option<Metadata>,
94}
95
96#[cfg(feature = "yaml-load")]
97fn load_syntax_file(
98    p: &Path,
99    lines_include_newline: bool,
100) -> Result<SyntaxDefinition, LoadingError> {
101    let s = std::fs::read_to_string(p)?;
102
103    SyntaxDefinition::load_from_str(
104        &s,
105        lines_include_newline,
106        p.file_stem().and_then(|x| x.to_str()),
107    )
108    .map_err(|e| LoadingError::ParseSyntax(e, format!("{}", p.display())))
109}
110
111impl Clone for SyntaxSet {
112    fn clone(&self) -> SyntaxSet {
113        SyntaxSet {
114            syntaxes: self.syntaxes.clone(),
115            path_syntaxes: self.path_syntaxes.clone(),
116            // Will need to be re-initialized
117            first_line_cache: OnceCell::new(),
118            #[cfg(feature = "metadata")]
119            metadata: self.metadata.clone(),
120        }
121    }
122}
123
124impl Default for SyntaxSet {
125    fn default() -> Self {
126        SyntaxSet {
127            syntaxes: Vec::new(),
128            path_syntaxes: Vec::new(),
129            first_line_cache: OnceCell::new(),
130            #[cfg(feature = "metadata")]
131            metadata: Metadata::default(),
132        }
133    }
134}
135
136impl SyntaxSet {
137    pub fn new() -> SyntaxSet {
138        SyntaxSet::default()
139    }
140
141    /// Convenience constructor for creating a builder, then loading syntax
142    /// definitions from a folder and then building the syntax set.
143    ///
144    /// Note that this uses `lines_include_newline` set to `false`, see the
145    /// [`add_from_folder`] method docs on [`SyntaxSetBuilder`] for an explanation
146    /// as to why this might not be the best.
147    ///
148    /// [`add_from_folder`]: struct.SyntaxSetBuilder.html#method.add_from_folder
149    /// [`SyntaxSetBuilder`]: struct.SyntaxSetBuilder.html
150    #[cfg(feature = "yaml-load")]
151    pub fn load_from_folder<P: AsRef<Path>>(folder: P) -> Result<SyntaxSet, LoadingError> {
152        let mut builder = SyntaxSetBuilder::new();
153        builder.add_from_folder(folder, false)?;
154        Ok(builder.build())
155    }
156
157    /// The list of syntaxes in the set
158    pub fn syntaxes(&self) -> &[SyntaxReference] {
159        &self.syntaxes[..]
160    }
161
162    #[cfg(feature = "metadata")]
163    pub fn set_metadata(&mut self, metadata: Metadata) {
164        self.metadata = metadata;
165    }
166
167    /// The loaded metadata for this set.
168    #[cfg(feature = "metadata")]
169    pub fn metadata(&self) -> &Metadata {
170        &self.metadata
171    }
172
173    /// Finds a syntax by its default scope, for example `source.regexp` finds the regex syntax.
174    ///
175    /// This and all similar methods below do a linear search of syntaxes, this should be fast
176    /// because there aren't many syntaxes, but don't think you can call it a bajillion times per
177    /// second.
178    pub fn find_syntax_by_scope(&self, scope: Scope) -> Option<&SyntaxReference> {
179        self.syntaxes.iter().rev().find(|&s| s.scope == scope)
180    }
181
182    pub fn find_syntax_by_name<'a>(&'a self, name: &str) -> Option<&'a SyntaxReference> {
183        self.syntaxes.iter().rev().find(|&s| name == s.name)
184    }
185
186    pub fn find_syntax_by_extension<'a>(&'a self, extension: &str) -> Option<&'a SyntaxReference> {
187        self.syntaxes.iter().rev().find(|&s| {
188            s.file_extensions
189                .iter()
190                .any(|e| e.eq_ignore_ascii_case(extension))
191        })
192    }
193
194    /// Searches for a syntax first by extension and then by case-insensitive name
195    ///
196    /// This is useful for things like Github-flavoured-markdown code block highlighting where all
197    /// you have to go on is a short token given by the user
198    pub fn find_syntax_by_token<'a>(&'a self, s: &str) -> Option<&'a SyntaxReference> {
199        {
200            let ext_res = self.find_syntax_by_extension(s);
201            if ext_res.is_some() {
202                return ext_res;
203            }
204        }
205        self.syntaxes
206            .iter()
207            .rev()
208            .find(|&syntax| syntax.name.eq_ignore_ascii_case(s))
209    }
210
211    /// Try to find the syntax for a file based on its first line
212    ///
213    /// This uses regexes that come with some sublime syntax grammars for matching things like
214    /// shebangs and mode lines like `-*- Mode: C -*-`
215    pub fn find_syntax_by_first_line<'a>(
216        &'a self,
217        s: &str,
218    ) -> Result<Option<&'a SyntaxReference>, LoadingError> {
219        let s = s.strip_prefix("\u{feff}").unwrap_or(s); // Strip UTF-8 BOM
220        let cache = self.first_line_cache();
221        for &(ref reg, i) in cache.regexes.iter().rev() {
222            let found = reg
223                .search(s, 0, s.len(), None, false /*ignore errors*/)
224                .map_err(|e| LoadingError::ParseSyntax(e, "regex parsing".to_string()))?;
225            if found {
226                return Ok(Some(&self.syntaxes[i]));
227            }
228        }
229        Ok(None)
230    }
231
232    /// Searches for a syntax by it's original file path when it was first loaded from disk
233    ///
234    /// This is primarily useful for syntax tests. Some may specify a
235    /// `Packages/PackageName/SyntaxName.sublime-syntax` path, and others may just have
236    /// `SyntaxName.sublime-syntax`. This caters for these by matching the end of the path of the
237    /// loaded syntax definition files
238    // however, if a syntax name is provided without a folder, make sure we don't accidentally match the end of a different syntax definition's name - by checking a / comes before it or it is the full path
239    pub fn find_syntax_by_path<'a>(&'a self, path: &str) -> Option<&'a SyntaxReference> {
240        let mut slash_path = "/".to_string();
241        slash_path.push_str(path);
242        self.path_syntaxes
243            .iter()
244            .rev()
245            .find(|t| t.0.ends_with(&slash_path) || t.0 == path)
246            .map(|&(_, i)| &self.syntaxes[i])
247    }
248
249    /// Convenience method that tries to find the syntax for a file path, first by extension/name
250    /// and then by first line of the file if that doesn't work.
251    ///
252    /// May IO Error because it sometimes tries to read the first line of the file.
253    ///
254    /// # Examples
255    ///
256    /// When determining how to highlight a file, use this in combination with a fallback to plain
257    /// text:
258    ///
259    /// ```
260    /// use syntect::parsing::SyntaxSet;
261    /// let ss = SyntaxSet::load_defaults_newlines();
262    /// let syntax = ss.find_syntax_for_file("testdata/highlight_test.erb")
263    ///     .unwrap() // for IO errors, you may want to use try!() or another plain text fallback
264    ///     .unwrap_or_else(|| ss.find_syntax_plain_text());
265    /// assert_eq!(syntax.name, "HTML (Rails)");
266    /// ```
267    pub fn find_syntax_for_file<P: AsRef<Path>>(
268        &self,
269        path_obj: P,
270    ) -> Result<Option<&SyntaxReference>, LoadingError> {
271        let path: &Path = path_obj.as_ref();
272        let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
273        let extension = path.extension().and_then(|x| x.to_str()).unwrap_or("");
274        let ext_syntax = self
275            .find_syntax_by_extension(file_name)
276            .or_else(|| self.find_syntax_by_extension(extension));
277        let line_syntax = if ext_syntax.is_none() {
278            let mut line = String::new();
279            let f = File::open(path)?;
280            let mut line_reader = BufReader::new(&f);
281            line_reader.read_line(&mut line)?;
282            self.find_syntax_by_first_line(&line)?
283        } else {
284            None
285        };
286        let syntax = ext_syntax.or(line_syntax);
287        Ok(syntax)
288    }
289
290    /// Finds a syntax for plain text, which usually has no highlighting rules.
291    ///
292    /// This is good as a fallback when you can't find another syntax but you still want to use the
293    /// same highlighting pipeline code.
294    ///
295    /// This syntax should always be present, if not this method will panic. If the way you load
296    /// syntaxes doesn't create one, use [`add_plain_text_syntax`].
297    ///
298    /// # Examples
299    /// ```
300    /// use syntect::parsing::SyntaxSetBuilder;
301    /// let mut builder = SyntaxSetBuilder::new();
302    /// builder.add_plain_text_syntax();
303    /// let ss = builder.build();
304    /// let syntax = ss.find_syntax_by_token("rs").unwrap_or_else(|| ss.find_syntax_plain_text());
305    /// assert_eq!(syntax.name, "Plain Text");
306    /// ```
307    ///
308    /// [`add_plain_text_syntax`]: struct.SyntaxSetBuilder.html#method.add_plain_text_syntax
309    pub fn find_syntax_plain_text(&self) -> &SyntaxReference {
310        self.find_syntax_by_name("Plain Text")
311            .expect("All syntax sets ought to have a plain text syntax")
312    }
313
314    /// Converts this syntax set into a builder so that more syntaxes can be
315    /// added to it.
316    ///
317    /// Note that newly added syntaxes can have references to existing syntaxes
318    /// in the set, but not the other way around.
319    pub fn into_builder(self) -> SyntaxSetBuilder {
320        #[cfg(feature = "metadata")]
321        let SyntaxSet {
322            syntaxes,
323            path_syntaxes,
324            metadata,
325            ..
326        } = self;
327        #[cfg(not(feature = "metadata"))]
328        let SyntaxSet {
329            syntaxes,
330            path_syntaxes,
331            ..
332        } = self;
333
334        let mut context_map = HashMap::new();
335        for (syntax_index, syntax) in syntaxes.iter().enumerate() {
336            for (context_index, context) in syntax.contexts().iter().enumerate() {
337                context_map.insert(
338                    ContextId {
339                        syntax_index,
340                        context_index,
341                    },
342                    context.clone(),
343                );
344            }
345        }
346
347        let mut builder_syntaxes = Vec::with_capacity(syntaxes.len());
348
349        for syntax in syntaxes {
350            let SyntaxReference {
351                name,
352                file_extensions,
353                scope,
354                first_line_match,
355                hidden,
356                variables,
357                serialized_lazy_contexts,
358                ..
359            } = syntax;
360
361            let lazy_contexts = LazyContexts::deserialize(&serialized_lazy_contexts[..]);
362            let mut builder_contexts = HashMap::with_capacity(lazy_contexts.context_ids.len());
363            for (name, context_id) in lazy_contexts.context_ids {
364                if let Some(context) = context_map.remove(&context_id) {
365                    builder_contexts.insert(name, context);
366                }
367            }
368
369            let syntax_definition = SyntaxDefinition {
370                name,
371                file_extensions,
372                scope,
373                first_line_match,
374                hidden,
375                variables,
376                contexts: builder_contexts,
377            };
378            builder_syntaxes.push(syntax_definition);
379        }
380
381        SyntaxSetBuilder {
382            syntaxes: builder_syntaxes,
383            path_syntaxes,
384            #[cfg(feature = "metadata")]
385            existing_metadata: Some(metadata),
386            #[cfg(feature = "metadata")]
387            raw_metadata: LoadMetadata::default(),
388        }
389    }
390
391    #[inline(always)]
392    pub(crate) fn get_context(&self, context_id: &ContextId) -> Result<&Context, ParsingError> {
393        let syntax = &self
394            .syntaxes
395            .get(context_id.syntax_index)
396            .ok_or(ParsingError::MissingContext(*context_id))?;
397        syntax
398            .contexts()
399            .get(context_id.context_index)
400            .ok_or(ParsingError::MissingContext(*context_id))
401    }
402
403    fn first_line_cache(&self) -> &FirstLineCache {
404        self.first_line_cache
405            .get_or_init(|| FirstLineCache::new(self.syntaxes()))
406    }
407
408    pub fn find_unlinked_contexts(&self) -> BTreeSet<String> {
409        let SyntaxSet { syntaxes, .. } = self;
410
411        let mut unlinked_contexts = BTreeSet::new();
412
413        for syntax in syntaxes {
414            let SyntaxReference { name, scope, .. } = syntax;
415
416            for context in syntax.contexts() {
417                Self::find_unlinked_contexts_in_context(
418                    name,
419                    scope,
420                    context,
421                    &mut unlinked_contexts,
422                );
423            }
424        }
425        unlinked_contexts
426    }
427
428    fn find_unlinked_contexts_in_context(
429        name: &str,
430        scope: &Scope,
431        context: &Context,
432        unlinked_contexts: &mut BTreeSet<String>,
433    ) {
434        for pattern in context.patterns.iter() {
435            let maybe_refs_to_check = match pattern {
436                Pattern::Match(match_pat) => match &match_pat.operation {
437                    MatchOperation::Push(context_refs) => Some(context_refs),
438                    MatchOperation::Set(context_refs) => Some(context_refs),
439                    _ => None,
440                },
441                _ => None,
442            };
443            for context_ref in maybe_refs_to_check.into_iter().flatten() {
444                match context_ref {
445                    ContextReference::Direct(_) => {}
446                    _ => {
447                        unlinked_contexts.insert(format!(
448                            "Syntax '{}' with scope '{}' has unresolved context reference {:?}",
449                            name, scope, &context_ref
450                        ));
451                    }
452                }
453            }
454        }
455    }
456}
457
458impl SyntaxReference {
459    pub(crate) fn context_ids(&self) -> &HashMap<String, ContextId> {
460        &self.lazy_contexts().context_ids
461    }
462
463    fn contexts(&self) -> &[Context] {
464        &self.lazy_contexts().contexts
465    }
466
467    fn lazy_contexts(&self) -> &LazyContexts {
468        self.lazy_contexts
469            .get_or_init(|| LazyContexts::deserialize(&self.serialized_lazy_contexts[..]))
470    }
471}
472
473impl LazyContexts {
474    fn deserialize(data: &[u8]) -> LazyContexts {
475        crate::dumps::from_reader(data).expect("data is not corrupt or out of sync with the code")
476    }
477}
478
479impl SyntaxSetBuilder {
480    pub fn new() -> SyntaxSetBuilder {
481        SyntaxSetBuilder::default()
482    }
483
484    /// Add a syntax to the set.
485    pub fn add(&mut self, syntax: SyntaxDefinition) {
486        self.syntaxes.push(syntax);
487    }
488
489    /// The list of syntaxes added so far.
490    pub fn syntaxes(&self) -> &[SyntaxDefinition] {
491        &self.syntaxes[..]
492    }
493
494    /// A rarely useful method that loads in a syntax with no highlighting rules for plain text
495    ///
496    /// Exists mainly for adding the plain text syntax to syntax set dumps, because for some reason
497    /// the default Sublime plain text syntax is still in `.tmLanguage` format.
498    #[cfg(feature = "yaml-load")]
499    pub fn add_plain_text_syntax(&mut self) {
500        let s = "---\nname: Plain Text\nfile_extensions: [txt]\nscope: text.plain\ncontexts: \
501                 {main: []}";
502        let syn = SyntaxDefinition::load_from_str(s, false, None).unwrap();
503        self.syntaxes.push(syn);
504    }
505
506    /// Loads all the `.sublime-syntax` files in a folder into this builder.
507    ///
508    /// The `lines_include_newline` parameter is used to work around the fact that Sublime Text
509    /// normally passes line strings including newline characters (`\n`) to its regex engine. This
510    /// results in many syntaxes having regexes matching `\n`, which doesn't work if you don't pass
511    /// in newlines. It is recommended that if you can you pass in lines with newlines if you can
512    /// and pass `true` for this parameter. If that is inconvenient pass `false` and the loader
513    /// will do some hacky find and replaces on the match regexes that seem to work for the default
514    /// syntax set, but may not work for any other syntaxes.
515    ///
516    /// In the future I might include a "slow mode" that copies the lines passed in and appends a
517    /// newline if there isn't one, but in the interest of performance currently this hacky fix will
518    /// have to do.
519    #[cfg(feature = "yaml-load")]
520    pub fn add_from_folder<P: AsRef<Path>>(
521        &mut self,
522        folder: P,
523        lines_include_newline: bool,
524    ) -> Result<(), LoadingError> {
525        for entry in crate::utils::walk_dir(folder).sort_by(|a, b| a.file_name().cmp(b.file_name()))
526        {
527            let entry = entry.map_err(LoadingError::WalkDir)?;
528            if entry
529                .path()
530                .extension()
531                .map_or(false, |e| e == "sublime-syntax")
532            {
533                let syntax = load_syntax_file(entry.path(), lines_include_newline)?;
534                if let Some(path_str) = entry.path().to_str() {
535                    // Split the path up and rejoin with slashes so that syntaxes loaded on Windows
536                    // can still be loaded the same way.
537                    let path = Path::new(path_str);
538                    let path_parts: Vec<_> = path.iter().map(|c| c.to_str().unwrap()).collect();
539                    self.path_syntaxes
540                        .push((path_parts.join("/").to_string(), self.syntaxes.len()));
541                }
542                self.syntaxes.push(syntax);
543            }
544
545            #[cfg(feature = "metadata")]
546            {
547                if entry.path().extension() == Some("tmPreferences".as_ref()) {
548                    match RawMetadataEntry::load(entry.path()) {
549                        Ok(meta) => self.raw_metadata.add_raw(meta),
550                        Err(_err) => (),
551                    }
552                }
553            }
554        }
555
556        Ok(())
557    }
558
559    /// Build a [`SyntaxSet`] from the syntaxes that have been added to this
560    /// builder.
561    ///
562    /// ### Linking
563    ///
564    /// The contexts in syntaxes can reference other contexts in the same syntax
565    /// or even other syntaxes. For example, a HTML syntax can reference a CSS
566    /// syntax so that CSS blocks in HTML work as expected.
567    ///
568    /// Those references work in various ways and involve one or two lookups.
569    /// To avoid having to do these lookups during parsing/highlighting, the
570    /// references are changed to directly reference contexts via index. That's
571    /// called linking.
572    ///
573    /// Linking is done in this build step. So in order to get the best
574    /// performance, you should try to avoid calling this too much. Ideally,
575    /// create a [`SyntaxSet`] once and then use it many times. If you can,
576    /// serialize a [`SyntaxSet`] for your program and when you run the program,
577    /// directly load the [`SyntaxSet`].
578    ///
579    /// [`SyntaxSet`]: struct.SyntaxSet.html
580    pub fn build(self) -> SyntaxSet {
581        #[cfg(not(feature = "metadata"))]
582        let SyntaxSetBuilder {
583            syntaxes: syntax_definitions,
584            path_syntaxes,
585        } = self;
586        #[cfg(feature = "metadata")]
587        let SyntaxSetBuilder {
588            syntaxes: syntax_definitions,
589            path_syntaxes,
590            raw_metadata,
591            existing_metadata,
592        } = self;
593
594        let mut syntaxes = Vec::with_capacity(syntax_definitions.len());
595        let mut all_context_ids = Vec::new();
596        let mut all_contexts = vec![Vec::new(); syntax_definitions.len()];
597
598        for (syntax_index, syntax_definition) in syntax_definitions.into_iter().enumerate() {
599            let SyntaxDefinition {
600                name,
601                file_extensions,
602                scope,
603                first_line_match,
604                hidden,
605                variables,
606                contexts,
607            } = syntax_definition;
608
609            let mut context_ids = HashMap::new();
610
611            let mut contexts: Vec<(String, Context)> = contexts.into_iter().collect();
612            // Sort the values of the HashMap so that the contexts in the
613            // resulting SyntaxSet have a deterministic order for serializing.
614            // Because we're sorting by the keys which are unique, we can use
615            // an unstable sort.
616            contexts.sort_unstable_by(|(name_a, _), (name_b, _)| name_a.cmp(name_b));
617            for (name, context) in contexts {
618                let context_index = all_contexts[syntax_index].len();
619                context_ids.insert(
620                    name,
621                    ContextId {
622                        syntax_index,
623                        context_index,
624                    },
625                );
626                all_contexts[syntax_index].push(context);
627            }
628
629            let syntax = SyntaxReference {
630                name,
631                file_extensions,
632                scope,
633                first_line_match,
634                hidden,
635                variables,
636                lazy_contexts: OnceCell::new(),
637                serialized_lazy_contexts: Vec::new(), // initialized in the last step
638            };
639            syntaxes.push(syntax);
640            all_context_ids.push(context_ids);
641        }
642
643        let mut found_more_backref_includes = true;
644        for (syntax_index, _syntax) in syntaxes.iter().enumerate() {
645            let mut no_prototype = HashSet::new();
646            let prototype = all_context_ids[syntax_index].get("prototype");
647            if let Some(prototype_id) = prototype {
648                // TODO: We could do this after parsing YAML, instead of here?
649                Self::recursively_mark_no_prototype(
650                    prototype_id,
651                    &all_context_ids[syntax_index],
652                    &all_contexts,
653                    &mut no_prototype,
654                );
655            }
656
657            for context_id in all_context_ids[syntax_index].values() {
658                let context = &mut all_contexts[context_id.syntax_index][context_id.context_index];
659                if let Some(prototype_id) = prototype {
660                    if context.meta_include_prototype && !no_prototype.contains(context_id) {
661                        context.prototype = Some(*prototype_id);
662                    }
663                }
664                Self::link_context(context, syntax_index, &all_context_ids, &syntaxes);
665
666                if context.uses_backrefs {
667                    found_more_backref_includes = true;
668                }
669            }
670        }
671
672        // We need to recursively mark contexts that include contexts which
673        // use backreferences as using backreferences. In theory we could use
674        // a more efficient method here like doing a toposort or constructing
675        // a representation with reversed edges and then tracing in the
676        // opposite direction, but I benchmarked this and it adds <2% to link
677        // time on the default syntax set, and linking doesn't even happen
678        // when loading from a binary dump.
679        while found_more_backref_includes {
680            found_more_backref_includes = false;
681            // find any contexts which include a context which uses backrefs
682            // and mark those as using backrefs - to support nested includes
683            for syntax_index in 0..syntaxes.len() {
684                for context_index in 0..all_contexts[syntax_index].len() {
685                    let context = &all_contexts[syntax_index][context_index];
686                    if !context.uses_backrefs && context.patterns.iter().any(|pattern| {
687                        matches!(pattern, Pattern::Include(ContextReference::Direct(id)) if all_contexts[id.syntax_index][id.context_index].uses_backrefs)
688                    }) {
689                        let context = &mut all_contexts[syntax_index][context_index];
690                        context.uses_backrefs = true;
691                        // look for contexts including this context
692                        found_more_backref_includes = true;
693                    }
694                }
695            }
696        }
697
698        #[cfg(feature = "metadata")]
699        let metadata = match existing_metadata {
700            Some(existing) => existing.merged_with_raw(raw_metadata),
701            None => raw_metadata.into(),
702        };
703
704        // The combination of
705        //  * the algorithms above
706        //  * the borrow checker
707        // makes it necessary to set these up as the last step.
708        for syntax in &mut syntaxes {
709            let lazy_contexts = LazyContexts {
710                context_ids: all_context_ids.remove(0),
711                contexts: all_contexts.remove(0),
712            };
713
714            syntax.serialized_lazy_contexts = crate::dumps::dump_binary(&lazy_contexts);
715        }
716
717        SyntaxSet {
718            syntaxes,
719            path_syntaxes,
720            first_line_cache: OnceCell::new(),
721            #[cfg(feature = "metadata")]
722            metadata,
723        }
724    }
725
726    /// Anything recursively included by the prototype shouldn't include the prototype.
727    /// This marks them as such.
728    fn recursively_mark_no_prototype(
729        context_id: &ContextId,
730        syntax_context_ids: &HashMap<String, ContextId>,
731        all_contexts: &[Vec<Context>],
732        no_prototype: &mut HashSet<ContextId>,
733    ) {
734        let first_time = no_prototype.insert(*context_id);
735        if !first_time {
736            return;
737        }
738
739        for pattern in &all_contexts[context_id.syntax_index][context_id.context_index].patterns {
740            match *pattern {
741                // Apparently inline blocks also don't include the prototype when within the prototype.
742                // This is really weird, but necessary to run the YAML syntax.
743                Pattern::Match(ref match_pat) => {
744                    let maybe_context_refs = match match_pat.operation {
745                        MatchOperation::Push(ref context_refs)
746                        | MatchOperation::Set(ref context_refs) => Some(context_refs),
747                        MatchOperation::Pop | MatchOperation::None => None,
748                    };
749                    if let Some(context_refs) = maybe_context_refs {
750                        for context_ref in context_refs.iter() {
751                            match context_ref {
752                                ContextReference::Inline(ref s)
753                                | ContextReference::Named(ref s) => {
754                                    if let Some(i) = syntax_context_ids.get(s) {
755                                        Self::recursively_mark_no_prototype(
756                                            i,
757                                            syntax_context_ids,
758                                            all_contexts,
759                                            no_prototype,
760                                        );
761                                    }
762                                }
763                                ContextReference::Direct(ref id) => {
764                                    Self::recursively_mark_no_prototype(
765                                        id,
766                                        syntax_context_ids,
767                                        all_contexts,
768                                        no_prototype,
769                                    );
770                                }
771                                _ => (),
772                            }
773                        }
774                    }
775                }
776                Pattern::Include(ref reference) => match reference {
777                    ContextReference::Named(ref s) => {
778                        if let Some(id) = syntax_context_ids.get(s) {
779                            Self::recursively_mark_no_prototype(
780                                id,
781                                syntax_context_ids,
782                                all_contexts,
783                                no_prototype,
784                            );
785                        }
786                    }
787                    ContextReference::Direct(ref id) => {
788                        Self::recursively_mark_no_prototype(
789                            id,
790                            syntax_context_ids,
791                            all_contexts,
792                            no_prototype,
793                        );
794                    }
795                    _ => (),
796                },
797            }
798        }
799    }
800
801    fn link_context(
802        context: &mut Context,
803        syntax_index: usize,
804        all_context_ids: &[HashMap<String, ContextId>],
805        syntaxes: &[SyntaxReference],
806    ) {
807        for pattern in &mut context.patterns {
808            match *pattern {
809                Pattern::Match(ref mut match_pat) => {
810                    Self::link_match_pat(match_pat, syntax_index, all_context_ids, syntaxes)
811                }
812                Pattern::Include(ref mut context_ref) => {
813                    Self::link_ref(context_ref, syntax_index, all_context_ids, syntaxes)
814                }
815            }
816        }
817    }
818
819    fn link_ref(
820        context_ref: &mut ContextReference,
821        syntax_index: usize,
822        all_context_ids: &[HashMap<String, ContextId>],
823        syntaxes: &[SyntaxReference],
824    ) {
825        // println!("{:?}", context_ref);
826        use super::syntax_definition::ContextReference::*;
827        let linked_context_id = match *context_ref {
828            Named(ref s) | Inline(ref s) => {
829                // This isn't actually correct, but it is better than nothing/crashing.
830                // This is being phased out anyhow, see https://github.com/sublimehq/Packages/issues/73
831                // Fixes issue #30
832                if s == "$top_level_main" {
833                    all_context_ids[syntax_index].get("main")
834                } else {
835                    all_context_ids[syntax_index].get(s)
836                }
837            }
838            ByScope {
839                scope,
840                ref sub_context,
841                with_escape,
842            } => Self::with_plain_text_fallback(
843                all_context_ids,
844                syntaxes,
845                with_escape,
846                Self::find_id(sub_context, all_context_ids, syntaxes, |index_and_syntax| {
847                    index_and_syntax.1.scope == scope
848                }),
849            ),
850            File {
851                ref name,
852                ref sub_context,
853                with_escape,
854            } => Self::with_plain_text_fallback(
855                all_context_ids,
856                syntaxes,
857                with_escape,
858                Self::find_id(sub_context, all_context_ids, syntaxes, |index_and_syntax| {
859                    &index_and_syntax.1.name == name
860                }),
861            ),
862            Direct(_) => None,
863        };
864        if let Some(context_id) = linked_context_id {
865            let mut new_ref = Direct(*context_id);
866            mem::swap(context_ref, &mut new_ref);
867        }
868    }
869
870    fn with_plain_text_fallback<'a>(
871        all_context_ids: &'a [HashMap<String, ContextId>],
872        syntaxes: &'a [SyntaxReference],
873        with_escape: bool,
874        context_id: Option<&'a ContextId>,
875    ) -> Option<&'a ContextId> {
876        context_id.or_else(|| {
877            if with_escape {
878                // If we keep this reference unresolved, syntect will crash
879                // when it encounters the reference. Rather than crashing,
880                // we instead fall back to "Plain Text". This seems to be
881                // how Sublime Text behaves. It should be a safe thing to do
882                // since `embed`s always includes an `escape` to get out of
883                // the `embed`.
884                Self::find_id(&None, all_context_ids, syntaxes, |index_and_syntax| {
885                    index_and_syntax.1.name == "Plain Text"
886                })
887            } else {
888                None
889            }
890        })
891    }
892
893    fn find_id<'a>(
894        sub_context: &Option<String>,
895        all_context_ids: &'a [HashMap<String, ContextId>],
896        syntaxes: &'a [SyntaxReference],
897        predicate: impl FnMut(&(usize, &SyntaxReference)) -> bool,
898    ) -> Option<&'a ContextId> {
899        let context_name = sub_context.as_ref().map_or("main", |x| &**x);
900        syntaxes
901            .iter()
902            .enumerate()
903            .rev()
904            .find(predicate)
905            .and_then(|index_and_syntax| all_context_ids[index_and_syntax.0].get(context_name))
906    }
907
908    fn link_match_pat(
909        match_pat: &mut MatchPattern,
910        syntax_index: usize,
911        all_context_ids: &[HashMap<String, ContextId>],
912        syntaxes: &[SyntaxReference],
913    ) {
914        let maybe_context_refs = match match_pat.operation {
915            MatchOperation::Push(ref mut context_refs)
916            | MatchOperation::Set(ref mut context_refs) => Some(context_refs),
917            MatchOperation::Pop | MatchOperation::None => None,
918        };
919        if let Some(context_refs) = maybe_context_refs {
920            for context_ref in context_refs.iter_mut() {
921                Self::link_ref(context_ref, syntax_index, all_context_ids, syntaxes);
922            }
923        }
924        if let Some(ref mut context_ref) = match_pat.with_prototype {
925            Self::link_ref(context_ref, syntax_index, all_context_ids, syntaxes);
926        }
927    }
928}
929
930#[derive(Debug)]
931struct FirstLineCache {
932    /// (first line regex, syntax index) pairs for all syntaxes with a first line regex
933    regexes: Vec<(Regex, usize)>,
934}
935
936impl FirstLineCache {
937    fn new(syntaxes: &[SyntaxReference]) -> FirstLineCache {
938        let mut regexes = Vec::new();
939        for (i, syntax) in syntaxes.iter().enumerate() {
940            if let Some(ref reg_str) = syntax.first_line_match {
941                let reg = Regex::new(reg_str.into());
942                regexes.push((reg, i));
943            }
944        }
945        FirstLineCache { regexes }
946    }
947}
948
949#[cfg(feature = "yaml-load")]
950#[cfg(test)]
951mod tests {
952    use super::*;
953    use crate::parsing::{syntax_definition, ParseState, Scope};
954    use std::collections::HashMap;
955
956    #[test]
957    fn can_load() {
958        let mut builder = SyntaxSetBuilder::new();
959        builder.add_from_folder("testdata/Packages", false).unwrap();
960
961        let cmake_dummy_syntax = SyntaxDefinition {
962            name: "CMake".to_string(),
963            file_extensions: vec!["CMakeLists.txt".to_string(), "cmake".to_string()],
964            scope: Scope::new("source.cmake").unwrap(),
965            first_line_match: None,
966            hidden: false,
967            variables: HashMap::new(),
968            contexts: HashMap::new(),
969        };
970
971        builder.add(cmake_dummy_syntax);
972        builder.add_plain_text_syntax();
973
974        let ps = builder.build();
975
976        assert_eq!(
977            &ps.find_syntax_by_first_line("#!/usr/bin/env node")
978                .unwrap()
979                .unwrap()
980                .name,
981            "JavaScript"
982        );
983        let rails_scope = Scope::new("source.ruby.rails").unwrap();
984        let syntax = ps.find_syntax_by_name("Ruby on Rails").unwrap();
985        ps.find_syntax_plain_text();
986        assert_eq!(&ps.find_syntax_by_extension("rake").unwrap().name, "Ruby");
987        assert_eq!(&ps.find_syntax_by_extension("RAKE").unwrap().name, "Ruby");
988        assert_eq!(&ps.find_syntax_by_token("ruby").unwrap().name, "Ruby");
989        assert_eq!(
990            &ps.find_syntax_by_first_line("lol -*- Mode: C -*- such line")
991                .unwrap()
992                .unwrap()
993                .name,
994            "C"
995        );
996        assert_eq!(
997            &ps.find_syntax_for_file("testdata/parser.rs")
998                .unwrap()
999                .unwrap()
1000                .name,
1001            "Rust"
1002        );
1003        assert_eq!(
1004            &ps.find_syntax_for_file("testdata/test_first_line.test")
1005                .expect("Error finding syntax for file")
1006                .expect("No syntax found for file")
1007                .name,
1008            "Ruby"
1009        );
1010        assert_eq!(
1011            &ps.find_syntax_for_file(".bashrc").unwrap().unwrap().name,
1012            "Bourne Again Shell (bash)"
1013        );
1014        assert_eq!(
1015            &ps.find_syntax_for_file("CMakeLists.txt")
1016                .unwrap()
1017                .unwrap()
1018                .name,
1019            "CMake"
1020        );
1021        assert_eq!(
1022            &ps.find_syntax_for_file("test.cmake").unwrap().unwrap().name,
1023            "CMake"
1024        );
1025        assert_eq!(
1026            &ps.find_syntax_for_file("Rakefile").unwrap().unwrap().name,
1027            "Ruby"
1028        );
1029        assert!(&ps
1030            .find_syntax_by_first_line("derp derp hi lol")
1031            .unwrap()
1032            .is_none());
1033        assert_eq!(
1034            &ps.find_syntax_by_path("Packages/Rust/Rust.sublime-syntax")
1035                .unwrap()
1036                .name,
1037            "Rust"
1038        );
1039        // println!("{:#?}", syntax);
1040        assert_eq!(syntax.scope, rails_scope);
1041        // unreachable!();
1042        let main_context = ps
1043            .get_context(&syntax.context_ids()["main"])
1044            .expect("#[cfg(test)]");
1045        let count = syntax_definition::context_iter(&ps, main_context).count();
1046        assert_eq!(count, 109);
1047    }
1048
1049    #[test]
1050    fn can_clone() {
1051        let cloned_syntax_set = {
1052            let mut builder = SyntaxSetBuilder::new();
1053            builder.add(syntax_a());
1054            builder.add(syntax_b());
1055
1056            let syntax_set_original = builder.build();
1057            #[allow(clippy::redundant_clone)] // We want to test .clone()
1058            syntax_set_original.clone()
1059            // Note: The original syntax set is dropped
1060        };
1061
1062        let syntax = cloned_syntax_set.find_syntax_by_extension("a").unwrap();
1063        let mut parse_state = ParseState::new(syntax, false);
1064        let ops = parse_state
1065            .parse_line("a go_b b", &cloned_syntax_set)
1066            .expect("#[cfg(test)]");
1067        let expected = (7, ScopeStackOp::Push(Scope::new("b").unwrap()));
1068        assert_ops_contain(&ops, &expected);
1069    }
1070
1071    #[test]
1072    fn can_list_added_syntaxes() {
1073        let mut builder = SyntaxSetBuilder::new();
1074        builder.add(syntax_a());
1075        builder.add(syntax_b());
1076        let syntaxes = builder.syntaxes();
1077
1078        assert_eq!(syntaxes.len(), 2);
1079        assert_eq!(syntaxes[0].name, "A");
1080        assert_eq!(syntaxes[1].name, "B");
1081    }
1082
1083    #[test]
1084    fn can_add_more_syntaxes_with_builder() {
1085        let syntax_set_original = {
1086            let mut builder = SyntaxSetBuilder::new();
1087            builder.add(syntax_a());
1088            builder.add(syntax_b());
1089            builder.build()
1090        };
1091
1092        let mut builder = syntax_set_original.into_builder();
1093
1094        let syntax_c = SyntaxDefinition::load_from_str(
1095            r#"
1096        name: C
1097        scope: source.c
1098        file_extensions: [c]
1099        contexts:
1100          main:
1101            - match: 'c'
1102              scope: c
1103            - match: 'go_a'
1104              push: scope:source.a#main
1105        "#,
1106            true,
1107            None,
1108        )
1109        .unwrap();
1110
1111        builder.add(syntax_c);
1112
1113        let syntax_set = builder.build();
1114
1115        let syntax = syntax_set.find_syntax_by_extension("c").unwrap();
1116        let mut parse_state = ParseState::new(syntax, false);
1117        let ops = parse_state
1118            .parse_line("c go_a a go_b b", &syntax_set)
1119            .expect("#[cfg(test)]");
1120        let expected = (14, ScopeStackOp::Push(Scope::new("b").unwrap()));
1121        assert_ops_contain(&ops, &expected);
1122    }
1123
1124    #[test]
1125    fn falls_back_to_plain_text_when_embedded_scope_is_missing() {
1126        test_plain_text_fallback(
1127            r#"
1128        name: Z
1129        scope: source.z
1130        file_extensions: [z]
1131        contexts:
1132          main:
1133            - match: 'z'
1134              scope: z
1135            - match: 'go_x'
1136              embed: scope:does.not.exist
1137              escape: 'leave_x'
1138        "#,
1139        );
1140    }
1141
1142    #[test]
1143    fn falls_back_to_plain_text_when_embedded_file_is_missing() {
1144        test_plain_text_fallback(
1145            r#"
1146        name: Z
1147        scope: source.z
1148        file_extensions: [z]
1149        contexts:
1150          main:
1151            - match: 'z'
1152              scope: z
1153            - match: 'go_x'
1154              embed: DoesNotExist.sublime-syntax
1155              escape: 'leave_x'
1156        "#,
1157        );
1158    }
1159
1160    fn test_plain_text_fallback(syntax_definition: &str) {
1161        let syntax = SyntaxDefinition::load_from_str(syntax_definition, true, None).unwrap();
1162
1163        let mut builder = SyntaxSetBuilder::new();
1164        builder.add_plain_text_syntax();
1165        builder.add(syntax);
1166        let syntax_set = builder.build();
1167
1168        let syntax = syntax_set.find_syntax_by_extension("z").unwrap();
1169        let mut parse_state = ParseState::new(syntax, false);
1170        let ops = parse_state
1171            .parse_line("z go_x x leave_x z", &syntax_set)
1172            .unwrap();
1173        let expected_ops = vec![
1174            (0, ScopeStackOp::Push(Scope::new("source.z").unwrap())),
1175            (0, ScopeStackOp::Push(Scope::new("z").unwrap())),
1176            (1, ScopeStackOp::Pop(1)),
1177            (6, ScopeStackOp::Push(Scope::new("text.plain").unwrap())),
1178            (9, ScopeStackOp::Pop(1)),
1179            (17, ScopeStackOp::Push(Scope::new("z").unwrap())),
1180            (18, ScopeStackOp::Pop(1)),
1181        ];
1182        assert_eq!(ops, expected_ops);
1183    }
1184
1185    #[test]
1186    fn can_find_unlinked_contexts() {
1187        let syntax_set = {
1188            let mut builder = SyntaxSetBuilder::new();
1189            builder.add(syntax_a());
1190            builder.add(syntax_b());
1191            builder.build()
1192        };
1193
1194        let unlinked_contexts = syntax_set.find_unlinked_contexts();
1195        assert_eq!(unlinked_contexts.len(), 0);
1196
1197        let syntax_set = {
1198            let mut builder = SyntaxSetBuilder::new();
1199            builder.add(syntax_a());
1200            builder.build()
1201        };
1202
1203        let unlinked_contexts: Vec<String> =
1204            syntax_set.find_unlinked_contexts().into_iter().collect();
1205        assert_eq!(unlinked_contexts.len(), 1);
1206        assert_eq!(unlinked_contexts[0], "Syntax 'A' with scope 'source.a' has unresolved context reference ByScope { scope: <source.b>, sub_context: Some(\"main\"), with_escape: false }");
1207    }
1208
1209    #[test]
1210    fn can_use_in_multiple_threads() {
1211        use rayon::prelude::*;
1212
1213        let syntax_set = {
1214            let mut builder = SyntaxSetBuilder::new();
1215            builder.add(syntax_a());
1216            builder.add(syntax_b());
1217            builder.build()
1218        };
1219
1220        let lines = vec!["a a a", "a go_b b", "go_b b", "go_b b  b"];
1221
1222        let results: Vec<Vec<(usize, ScopeStackOp)>> = lines
1223            .par_iter()
1224            .map(|line| {
1225                let syntax = syntax_set.find_syntax_by_extension("a").unwrap();
1226                let mut parse_state = ParseState::new(syntax, false);
1227                parse_state
1228                    .parse_line(line, &syntax_set)
1229                    .expect("#[cfg(test)]")
1230            })
1231            .collect();
1232
1233        assert_ops_contain(
1234            &results[0],
1235            &(4, ScopeStackOp::Push(Scope::new("a").unwrap())),
1236        );
1237        assert_ops_contain(
1238            &results[1],
1239            &(7, ScopeStackOp::Push(Scope::new("b").unwrap())),
1240        );
1241        assert_ops_contain(
1242            &results[2],
1243            &(5, ScopeStackOp::Push(Scope::new("b").unwrap())),
1244        );
1245        assert_ops_contain(
1246            &results[3],
1247            &(8, ScopeStackOp::Push(Scope::new("b").unwrap())),
1248        );
1249    }
1250
1251    #[test]
1252    fn is_sync() {
1253        check_sync::<SyntaxSet>();
1254    }
1255
1256    #[test]
1257    fn is_send() {
1258        check_send::<SyntaxSet>();
1259    }
1260
1261    #[test]
1262    fn can_override_syntaxes() {
1263        let syntax_set = {
1264            let mut builder = SyntaxSetBuilder::new();
1265            builder.add(syntax_a());
1266            builder.add(syntax_b());
1267
1268            let syntax_a2 = SyntaxDefinition::load_from_str(
1269                r#"
1270                name: A improved
1271                scope: source.a
1272                file_extensions: [a]
1273                first_line_match: syntax\s+a
1274                contexts:
1275                  main:
1276                    - match: a
1277                      scope: a2
1278                    - match: go_b
1279                      push: scope:source.b#main
1280                "#,
1281                true,
1282                None,
1283            )
1284            .unwrap();
1285
1286            builder.add(syntax_a2);
1287
1288            let syntax_c = SyntaxDefinition::load_from_str(
1289                r#"
1290                name: C
1291                scope: source.c
1292                file_extensions: [c]
1293                first_line_match: syntax\s+.*
1294                contexts:
1295                  main:
1296                    - match: c
1297                      scope: c
1298                    - match: go_a
1299                      push: scope:source.a#main
1300                "#,
1301                true,
1302                None,
1303            )
1304            .unwrap();
1305
1306            builder.add(syntax_c);
1307
1308            builder.build()
1309        };
1310
1311        let mut syntax = syntax_set.find_syntax_by_extension("a").unwrap();
1312        assert_eq!(syntax.name, "A improved");
1313        syntax = syntax_set
1314            .find_syntax_by_scope(Scope::new("source.a").unwrap())
1315            .unwrap();
1316        assert_eq!(syntax.name, "A improved");
1317        syntax = syntax_set
1318            .find_syntax_by_first_line("syntax a")
1319            .unwrap()
1320            .unwrap();
1321        assert_eq!(syntax.name, "C");
1322
1323        let mut parse_state = ParseState::new(syntax, false);
1324        let ops = parse_state
1325            .parse_line("c go_a a", &syntax_set)
1326            .expect("msg");
1327        let expected = (7, ScopeStackOp::Push(Scope::new("a2").unwrap()));
1328        assert_ops_contain(&ops, &expected);
1329    }
1330
1331    #[test]
1332    fn can_parse_issue219() {
1333        // Go to builder and back after loading so that build() gets Direct references instead of
1334        // Named ones. The bug was that Direct references were not handled when marking as
1335        // "no prototype", so prototype contexts accidentally had the prototype set, which made
1336        // the parser loop forever.
1337        let syntax_set = SyntaxSet::load_defaults_newlines().into_builder().build();
1338        let syntax = syntax_set.find_syntax_by_extension("yaml").unwrap();
1339
1340        let mut parse_state = ParseState::new(syntax, false);
1341        let ops = parse_state
1342            .parse_line("# test\n", &syntax_set)
1343            .expect("#[cfg(test)]");
1344        let expected = (
1345            0,
1346            ScopeStackOp::Push(Scope::new("comment.line.number-sign.yaml").unwrap()),
1347        );
1348        assert_ops_contain(&ops, &expected);
1349    }
1350
1351    #[test]
1352    fn no_prototype_for_contexts_included_from_prototype() {
1353        let mut builder = SyntaxSetBuilder::new();
1354        let syntax = SyntaxDefinition::load_from_str(
1355            r#"
1356                name: Test Prototype
1357                scope: source.test
1358                file_extensions: [test]
1359                contexts:
1360                  prototype:
1361                    - include: included_from_prototype
1362                  main:
1363                    - match: main
1364                    - match: other
1365                      push: other
1366                  other:
1367                    - match: o
1368                  included_from_prototype:
1369                    - match: p
1370                      scope: p
1371                "#,
1372            true,
1373            None,
1374        )
1375        .unwrap();
1376        builder.add(syntax);
1377        let ss = builder.build();
1378
1379        // "main" and "other" should have context set, "prototype" and "included_from_prototype"
1380        // must not have a prototype set.
1381        assert_prototype_only_on(&["main", "other"], &ss, &ss.syntaxes()[0]);
1382
1383        // Building again should have the same result. The difference is that after the first
1384        // build(), the references have been replaced with Direct references, so the code needs to
1385        // handle that correctly.
1386        let rebuilt = ss.into_builder().build();
1387        assert_prototype_only_on(&["main", "other"], &rebuilt, &rebuilt.syntaxes()[0]);
1388    }
1389
1390    #[test]
1391    fn no_prototype_for_contexts_inline_in_prototype() {
1392        let mut builder = SyntaxSetBuilder::new();
1393        let syntax = SyntaxDefinition::load_from_str(
1394            r#"
1395                name: Test Prototype
1396                scope: source.test
1397                file_extensions: [test]
1398                contexts:
1399                  prototype:
1400                    - match: p
1401                      push:
1402                        - match: p2
1403                  main:
1404                    - match: main
1405                "#,
1406            true,
1407            None,
1408        )
1409        .unwrap();
1410        builder.add(syntax);
1411        let ss = builder.build();
1412
1413        assert_prototype_only_on(&["main"], &ss, &ss.syntaxes()[0]);
1414
1415        let rebuilt = ss.into_builder().build();
1416        assert_prototype_only_on(&["main"], &rebuilt, &rebuilt.syntaxes()[0]);
1417    }
1418
1419    #[test]
1420    fn find_syntax_set_from_line_with_bom() {
1421        // Regression test for #529
1422        let syntax_set = SyntaxSet::load_defaults_newlines();
1423        let syntax_ref = syntax_set
1424            .find_syntax_by_first_line("\u{feff}<?xml version=\"1.0\"?>")
1425            .unwrap()
1426            .unwrap();
1427        assert_eq!(syntax_ref.name, "XML");
1428    }
1429
1430    fn assert_ops_contain(ops: &[(usize, ScopeStackOp)], expected: &(usize, ScopeStackOp)) {
1431        assert!(
1432            ops.contains(expected),
1433            "expected operations to contain {:?}: {:?}",
1434            expected,
1435            ops
1436        );
1437    }
1438
1439    fn assert_prototype_only_on(
1440        expected: &[&str],
1441        syntax_set: &SyntaxSet,
1442        syntax: &SyntaxReference,
1443    ) {
1444        for (name, id) in syntax.context_ids() {
1445            if name == "__main" || name == "__start" {
1446                // Skip special contexts
1447                continue;
1448            }
1449            let context = syntax_set.get_context(id).expect("#[cfg(test)]");
1450            if expected.contains(&name.as_str()) {
1451                assert!(
1452                    context.prototype.is_some(),
1453                    "Expected context {} to have prototype",
1454                    name
1455                );
1456            } else {
1457                assert!(
1458                    context.prototype.is_none(),
1459                    "Expected context {} to not have prototype",
1460                    name
1461                );
1462            }
1463        }
1464    }
1465
1466    fn check_send<T: Send>() {}
1467
1468    fn check_sync<T: Sync>() {}
1469
1470    fn syntax_a() -> SyntaxDefinition {
1471        SyntaxDefinition::load_from_str(
1472            r#"
1473            name: A
1474            scope: source.a
1475            file_extensions: [a]
1476            contexts:
1477              main:
1478                - match: 'a'
1479                  scope: a
1480                - match: 'go_b'
1481                  push: scope:source.b#main
1482            "#,
1483            true,
1484            None,
1485        )
1486        .unwrap()
1487    }
1488
1489    fn syntax_b() -> SyntaxDefinition {
1490        SyntaxDefinition::load_from_str(
1491            r#"
1492            name: B
1493            scope: source.b
1494            file_extensions: [b]
1495            contexts:
1496              main:
1497                - match: 'b'
1498                  scope: b
1499            "#,
1500            true,
1501            None,
1502        )
1503        .unwrap()
1504    }
1505}