syntect_no_panic/parsing/
parser.rs

1// Suppression of a false positive clippy lint. Upstream issue:
2//
3//   mutable_key_type false positive for raw pointers
4//   https://github.com/rust-lang/rust-clippy/issues/6745
5//
6// We use `*const MatchPattern` as key in our `SearchCache` hash map.
7// Clippy thinks this is a problem since `MatchPattern` has interior mutability
8// via `MatchPattern::regex::regex` which is an `AtomicLazyCell`.
9// But raw pointers are hashed via the pointer itself, not what is pointed to.
10// See https://github.com/rust-lang/rust/blob/1.54.0/library/core/src/hash/mod.rs#L717-L725
11#![allow(clippy::mutable_key_type)]
12
13use super::regex::Region;
14use super::scope::*;
15use super::syntax_definition::*;
16use crate::parsing::syntax_definition::ContextId;
17use crate::parsing::syntax_set::{SyntaxReference, SyntaxSet};
18use fnv::FnvHasher;
19use std::collections::HashMap;
20use std::hash::BuildHasherDefault;
21use std::i32;
22use std::usize;
23
24/// Errors that can occur while parsing.
25#[derive(Debug, thiserror::Error)]
26#[non_exhaustive]
27pub enum ParsingError {
28    #[error("Somehow main context was popped from the stack")]
29    MissingMainContext,
30    /// A context is missing. Usually caused by a syntax referencing a another
31    /// syntax that is not known to syntect. See e.g. <https://github.com/trishume/syntect/issues/421>
32    #[error("Missing context with ID '{0:?}'")]
33    MissingContext(ContextId),
34    #[error("Bad index to match_at: {0}")]
35    BadMatchIndex(usize),
36    #[error("Tried to use a ContextReference that has not bee resolved yet: {0:?}")]
37    UnresolvedContextReference(ContextReference),
38    #[error("Lazy syntax parsing failed: {0}")]
39    LazyParseSyntaxError(#[from] crate::parsing::ParseSyntaxError),
40}
41
42/// Keeps the current parser state (the internal syntax interpreter stack) between lines of parsing.
43///
44/// If you are parsing an entire file you create one of these at the start and use it
45/// all the way to the end.
46///
47/// # Caching
48///
49/// One reason this is exposed is that since it implements `Clone` you can actually cache
50/// these (probably along with a [`HighlightState`]) and only re-start parsing from the point of a change.
51/// See the docs for [`HighlightState`] for more in-depth discussion of caching.
52///
53/// This state doesn't keep track of the current scope stack and parsing only returns changes to this stack
54/// so if you want to construct scope stacks you'll need to keep track of that as well.
55/// Note that [`HighlightState`] contains exactly this as a public field that you can use.
56///
57/// **Note:** Caching is for advanced users who have tons of time to maximize performance or want to do so eventually.
58/// It is not recommended that you try caching the first time you implement highlighting.
59///
60/// [`HighlightState`]: ../highlighting/struct.HighlightState.html
61#[derive(Debug, Clone, Eq, PartialEq)]
62pub struct ParseState {
63    stack: Vec<StateLevel>,
64    first_line: bool,
65    // See issue #101. Contains indices of frames pushed by `with_prototype`s.
66    // Doesn't look at `with_prototype`s below top of stack.
67    proto_starts: Vec<usize>,
68    ignore_errors: bool,
69}
70
71#[derive(Debug, Clone, Eq, PartialEq)]
72struct StateLevel {
73    context: ContextId,
74    prototypes: Vec<ContextId>,
75    captures: Option<(Region, String)>,
76}
77
78#[derive(Debug)]
79struct RegexMatch<'a> {
80    regions: Region,
81    context: &'a Context,
82    pat_index: usize,
83    from_with_prototype: bool,
84    would_loop: bool,
85}
86
87/// Maps the pattern to the start index, which is -1 if not found.
88type SearchCache = HashMap<*const MatchPattern, Option<Region>, BuildHasherDefault<FnvHasher>>;
89
90// To understand the implementation of this, here's an introduction to how
91// Sublime Text syntax definitions work.
92//
93// Let's say we have the following made-up syntax definition:
94//
95//     contexts:
96//       main:
97//         - match: A
98//           scope: scope.a.first
99//           push: context-a
100//         - match: b
101//           scope: scope.b
102//         - match: \w+
103//           scope: scope.other
104//       context-a:
105//         - match: a+
106//           scope: scope.a.rest
107//         - match: (?=.)
108//           pop: true
109//
110// There are two contexts, `main` and `context-a`. Each context contains a list
111// of match rules with instructions for how to proceed.
112//
113// Let's say we have the input string " Aaaabxxx". We start at position 0 in
114// the string. We keep a stack of contexts, which at the beginning is just main.
115//
116// So we start by looking at the top of the context stack (main), and look at
117// the rules in order. The rule that wins is the first one that matches
118// "earliest" in the input string. In our example:
119//
120// 1. The first one matches "A". Note that matches are not anchored, so this
121//    matches at position 1.
122// 2. The second one matches "b", so position 5. The first rule is winning.
123// 3. The third one matches "\w+", so also position 1. But because the first
124//    rule comes first, it wins.
125//
126// So now we execute the winning rule. Whenever we matched some text, we assign
127// the scope (if there is one) to the matched text and advance our position to
128// after the matched text. The scope is "scope.a.first" and our new position is
129// after the "A", so 2. The "push" means that we should change our stack by
130// pushing `context-a` on top of it.
131//
132// In the next step, we repeat the above, but now with the rules in `context-a`.
133// The result is that we match "a+" and assign "scope.a.rest" to "aaa", and our
134// new position is now after the "aaa". Note that there was no instruction for
135// changing the stack, so we stay in that context.
136//
137// In the next step, the first rule doesn't match anymore, so we go to the next
138// rule where "(?=.)" matches. The instruction is to "pop", which means we
139// pop the top of our context stack, which means we're now back in main.
140//
141// This time in main, we match "b", and in the next step we match the rest with
142// "\w+", and we're done.
143//
144//
145// ## Preventing loops
146//
147// These are the basics of how matching works. Now, you saw that you can write
148// patterns that result in an empty match and don't change the position. These
149// are called non-consuming matches. The problem with them is that they could
150// result in infinite loops. Let's look at a syntax where that is the case:
151//
152//     contexts:
153//       main:
154//         - match: (?=.)
155//           push: test
156//       test:
157//         - match: \w+
158//           scope: word
159//         - match: (?=.)
160//           pop: true
161//
162// This is a bit silly, but it's a minimal example for explaining how matching
163// works in that case.
164//
165// Let's say we have the input string " hello". In `main`, our rule matches and
166// we go into `test` and stay at position 0. Now, the best match is the rule
167// with "pop". But if we used that rule, we'd pop back to `main` and would still
168// be at the same position we started at! So this would be an infinite loop,
169// which we don't want.
170//
171// So what Sublime Text does in case a looping rule "won":
172//
173// * If there's another rule that matches at the same position and does not
174//   result in a loop, use that instead.
175// * Otherwise, go to the next position and go through all the rules in the
176//   current context again. Note that it means that the "pop" could again be the
177//   winning rule, but that's ok as it wouldn't result in a loop anymore.
178//
179// So in our input string, we'd skip one character and try to match the rules
180// again. This time, the "\w+" wins because it comes first.
181
182impl ParseState {
183    /// Creates a state from a syntax definition, keeping its own reference-counted point to the
184    /// main context of the syntax
185    pub fn new(syntax: &SyntaxReference, ignore_errors: bool) -> ParseState {
186        let start_state = StateLevel {
187            context: syntax.context_ids()["__start"],
188            prototypes: Vec::new(),
189            captures: None,
190        };
191        ParseState {
192            stack: vec![start_state],
193            first_line: true,
194            proto_starts: Vec::new(),
195            ignore_errors,
196        }
197    }
198
199    /// Parses a single line of the file. Because of the way regex engines work you unfortunately
200    /// have to pass in a single line contiguous in memory. This can be bad for really long lines.
201    /// Sublime Text avoids this by just not highlighting lines that are too long (thousands of characters).
202    ///
203    /// For efficiency reasons this returns only the changes to the current scope at each point in the line.
204    /// You can use [`ScopeStack::apply`] on each operation in succession to get the stack for a given point.
205    /// Look at the code in `highlighter.rs` for an example of doing this for highlighting purposes.
206    ///
207    /// The returned vector is in order both by index to apply at (the `usize`) and also by order to apply them at a
208    /// given index (e.g popping old scopes before pushing new scopes).
209    ///
210    /// The [`SyntaxSet`] has to be the one that contained the syntax that was used to construct
211    /// this [`ParseState`], or an extended version of it. Otherwise the parsing would return the
212    /// wrong result or even panic. The reason for this is that contexts within the [`SyntaxSet`]
213    /// are referenced via indexes.
214    ///
215    /// [`ScopeStack::apply`]: struct.ScopeStack.html#method.apply
216    /// [`SyntaxSet`]: struct.SyntaxSet.html
217    /// [`ParseState`]: struct.ParseState.html
218    pub fn parse_line(
219        &mut self,
220        line: &str,
221        syntax_set: &SyntaxSet,
222    ) -> Result<Vec<(usize, ScopeStackOp)>, ParsingError> {
223        if self.stack.is_empty() {
224            return Err(ParsingError::MissingMainContext);
225        }
226        let mut match_start = 0;
227        let mut res = Vec::new();
228
229        if self.first_line {
230            let cur_level = &self.stack[self.stack.len() - 1];
231            let context = syntax_set.get_context(&cur_level.context)?;
232            if !context.meta_content_scope.is_empty() {
233                res.push((0, ScopeStackOp::Push(context.meta_content_scope[0])));
234            }
235            self.first_line = false;
236        }
237
238        let mut regions = Region::new();
239        let fnv = BuildHasherDefault::<FnvHasher>::default();
240        let mut search_cache: SearchCache = HashMap::with_capacity_and_hasher(128, fnv);
241        // Used for detecting loops with push/pop, see long comment above.
242        let mut non_consuming_push_at = (0, 0);
243
244        while self.parse_next_token(
245            line,
246            syntax_set,
247            &mut match_start,
248            &mut search_cache,
249            &mut regions,
250            &mut non_consuming_push_at,
251            &mut res,
252        )? {}
253
254        Ok(res)
255    }
256
257    #[allow(clippy::too_many_arguments)]
258    fn parse_next_token(
259        &mut self,
260        line: &str,
261        syntax_set: &SyntaxSet,
262        start: &mut usize,
263        search_cache: &mut SearchCache,
264        regions: &mut Region,
265        non_consuming_push_at: &mut (usize, usize),
266        ops: &mut Vec<(usize, ScopeStackOp)>,
267    ) -> Result<bool, ParsingError> {
268        let check_pop_loop = {
269            let (pos, stack_depth) = *non_consuming_push_at;
270            pos == *start && stack_depth == self.stack.len()
271        };
272
273        // Trim proto_starts that are no longer valid
274        while self
275            .proto_starts
276            .last()
277            .map(|start| *start >= self.stack.len())
278            .unwrap_or(false)
279        {
280            self.proto_starts.pop();
281        }
282
283        let best_match = self.find_best_match(
284            line,
285            *start,
286            syntax_set,
287            search_cache,
288            regions,
289            check_pop_loop,
290        )?;
291
292        if let Some(reg_match) = best_match {
293            if reg_match.would_loop {
294                // A push that doesn't consume anything (a regex that resulted
295                // in an empty match at the current position) can not be
296                // followed by a non-consuming pop. Otherwise we're back where
297                // we started and would try the same sequence of matches again,
298                // resulting in an infinite loop. In this case, Sublime Text
299                // advances one character and tries again, thus preventing the
300                // loop.
301
302                // println!("pop_would_loop for match {:?}, start {}", reg_match, *start);
303
304                // nth(1) gets the next character if there is one. Need to do
305                // this instead of just += 1 because we have byte indices and
306                // unicode characters can be more than 1 byte.
307                if let Some((i, _)) = line[*start..].char_indices().nth(1) {
308                    *start += i;
309                    return Ok(true);
310                } else {
311                    // End of line, no character to advance and no point trying
312                    // any more patterns.
313                    return Ok(false);
314                }
315            }
316
317            let match_end = reg_match.regions.pos(0).unwrap().1;
318
319            let consuming = match_end > *start;
320            if !consuming {
321                // The match doesn't consume any characters. If this is a
322                // "push", remember the position and stack size so that we can
323                // check the next "pop" for loops. Otherwise leave the state,
324                // e.g. non-consuming "set" could also result in a loop.
325                let context = reg_match.context;
326                let match_pattern = context.match_at(reg_match.pat_index)?;
327                if let MatchOperation::Push(_) = match_pattern.operation {
328                    *non_consuming_push_at = (match_end, self.stack.len() + 1);
329                }
330            }
331
332            *start = match_end;
333
334            // ignore `with_prototype`s below this if a context is pushed
335            if reg_match.from_with_prototype {
336                // use current height, since we're before the actual push
337                self.proto_starts.push(self.stack.len());
338            }
339
340            let level_context = {
341                let id = &self.stack[self.stack.len() - 1].context;
342                syntax_set.get_context(id)?
343            };
344            self.exec_pattern(line, &reg_match, level_context, syntax_set, ops)?;
345
346            Ok(true)
347        } else {
348            Ok(false)
349        }
350    }
351
352    fn find_best_match<'a>(
353        &self,
354        line: &str,
355        start: usize,
356        syntax_set: &'a SyntaxSet,
357        search_cache: &mut SearchCache,
358        regions: &mut Region,
359        check_pop_loop: bool,
360    ) -> Result<Option<RegexMatch<'a>>, ParsingError> {
361        let cur_level = &self.stack[self.stack.len() - 1];
362        let context = syntax_set.get_context(&cur_level.context)?;
363        let prototype = if let Some(ref p) = context.prototype {
364            Some(p)
365        } else {
366            None
367        };
368
369        // Build an iterator for the contexts we want to visit in order
370        let context_chain = {
371            let proto_start = self.proto_starts.last().cloned().unwrap_or(0);
372            // Sublime applies with_prototypes from bottom to top
373            let with_prototypes = self.stack[proto_start..].iter().flat_map(|lvl| {
374                lvl.prototypes
375                    .iter()
376                    .map(move |ctx| (true, ctx, lvl.captures.as_ref()))
377            });
378            let cur_prototype = prototype.into_iter().map(|ctx| (false, ctx, None));
379            let cur_context =
380                Some((false, &cur_level.context, cur_level.captures.as_ref())).into_iter();
381            with_prototypes.chain(cur_prototype).chain(cur_context)
382        };
383
384        // println!("{:#?}", cur_level);
385        // println!("token at {} on {}", start, line.trim_right());
386
387        let mut min_start = usize::MAX;
388        let mut best_match: Option<RegexMatch<'_>> = None;
389        let mut pop_would_loop = false;
390
391        for (from_with_proto, ctx, captures) in context_chain {
392            for (pat_context, pat_index) in context_iter(syntax_set, syntax_set.get_context(ctx)?) {
393                let match_pat = pat_context.match_at(pat_index)?;
394
395                if let Some(match_region) =
396                    self.search(line, start, match_pat, captures, search_cache, regions)?
397                {
398                    let (match_start, match_end) = match_region.pos(0).unwrap();
399
400                    // println!("matched pattern {:?} at start {} end {}", match_pat.regex_str, match_start, match_end);
401
402                    if match_start < min_start || (match_start == min_start && pop_would_loop) {
403                        // New match is earlier in text than old match,
404                        // or old match was a looping pop at the same
405                        // position.
406
407                        // println!("setting as current match");
408
409                        min_start = match_start;
410
411                        let consuming = match_end > start;
412                        pop_would_loop = check_pop_loop
413                            && !consuming
414                            && matches!(match_pat.operation, MatchOperation::Pop);
415
416                        best_match = Some(RegexMatch {
417                            regions: match_region,
418                            context: pat_context,
419                            pat_index,
420                            from_with_prototype: from_with_proto,
421                            would_loop: pop_would_loop,
422                        });
423
424                        if match_start == start && !pop_would_loop {
425                            // We're not gonna find a better match after this,
426                            // so as an optimization we can stop matching now.
427                            return Ok(best_match);
428                        }
429                    }
430                }
431            }
432        }
433        Ok(best_match)
434    }
435
436    fn search(
437        &self,
438        line: &str,
439        start: usize,
440        match_pat: &MatchPattern,
441        captures: Option<&(Region, String)>,
442        search_cache: &mut SearchCache,
443        regions: &mut Region,
444    ) -> Result<Option<Region>, ParsingError> {
445        // println!("{} - {:?} - {:?}", match_pat.regex_str, match_pat.has_captures, cur_level.captures.is_some());
446        let match_ptr = match_pat as *const MatchPattern;
447
448        if let Some(maybe_region) = search_cache.get(&match_ptr) {
449            if let Some(ref region) = *maybe_region {
450                let match_start = region.pos(0).unwrap().0;
451                if match_start >= start {
452                    // Cached match is valid, return it. Otherwise do another
453                    // search below.
454                    return Ok(Some(region.clone()));
455                }
456            } else {
457                // Didn't find a match earlier, so no point trying to match it again
458                return Ok(None);
459            }
460        }
461
462        let (matched, can_cache) = match (match_pat.has_captures, captures) {
463            (true, Some(captures)) => {
464                let (region, s) = captures;
465                let regex = match_pat.regex_with_refs(region, s);
466                let matched =
467                    regex.search(line, start, line.len(), Some(regions), self.ignore_errors)?;
468                (matched, false)
469            }
470            _ => {
471                let regex = match_pat.regex();
472                let matched =
473                    regex.search(line, start, line.len(), Some(regions), self.ignore_errors)?;
474                (matched, true)
475            }
476        };
477
478        if matched {
479            let (match_start, match_end) = regions.pos(0).unwrap();
480            // this is necessary to avoid infinite looping on dumb patterns
481            let does_something = match match_pat.operation {
482                MatchOperation::None => match_start != match_end,
483                _ => true,
484            };
485            if can_cache && does_something {
486                search_cache.insert(match_pat, Some(regions.clone()));
487            }
488            if does_something {
489                // print!("catch {} at {} on {}", match_pat.regex_str, match_start, line);
490                return Ok(Some(regions.clone()));
491            }
492        } else if can_cache {
493            search_cache.insert(match_pat, None);
494        }
495        Ok(None)
496    }
497
498    /// Returns true if the stack was changed
499    fn exec_pattern<'a>(
500        &mut self,
501        line: &str,
502        reg_match: &RegexMatch<'a>,
503        level_context: &'a Context,
504        syntax_set: &'a SyntaxSet,
505        ops: &mut Vec<(usize, ScopeStackOp)>,
506    ) -> Result<bool, ParsingError> {
507        let (match_start, match_end) = reg_match.regions.pos(0).unwrap();
508        let context = reg_match.context;
509        let pat = context.match_at(reg_match.pat_index)?;
510        // println!("running pattern {:?} on '{}' at {}, operation {:?}", pat.regex_str, line, match_start, pat.operation);
511
512        self.push_meta_ops(
513            true,
514            match_start,
515            level_context,
516            &pat.operation,
517            syntax_set,
518            ops,
519        )?;
520        for s in &pat.scope {
521            // println!("pushing {:?} at {}", s, match_start);
522            ops.push((match_start, ScopeStackOp::Push(*s)));
523        }
524        if let Some(ref capture_map) = pat.captures {
525            // captures could appear in an arbitrary order, have to produce ops in right order
526            // ex: ((bob)|(hi))* could match hibob in wrong order, and outer has to push first
527            // we don't have to handle a capture matching multiple times, Sublime doesn't
528            let mut map: Vec<((usize, i32), ScopeStackOp)> = Vec::new();
529            for &(cap_index, ref scopes) in capture_map.iter() {
530                if let Some((cap_start, cap_end)) = reg_match.regions.pos(cap_index) {
531                    // marking up empty captures causes pops to be sorted wrong
532                    if cap_start == cap_end {
533                        continue;
534                    }
535                    // println!("capture {:?} at {:?}-{:?}", scopes[0], cap_start, cap_end);
536                    for scope in scopes.iter() {
537                        map.push((
538                            (cap_start, -((cap_end - cap_start) as i32)),
539                            ScopeStackOp::Push(*scope),
540                        ));
541                    }
542                    map.push(((cap_end, i32::MIN), ScopeStackOp::Pop(scopes.len())));
543                }
544            }
545            map.sort_by(|a, b| a.0.cmp(&b.0));
546            for ((index, _), op) in map.into_iter() {
547                ops.push((index, op));
548            }
549        }
550        if !pat.scope.is_empty() {
551            // println!("popping at {}", match_end);
552            ops.push((match_end, ScopeStackOp::Pop(pat.scope.len())));
553        }
554        self.push_meta_ops(
555            false,
556            match_end,
557            level_context,
558            &pat.operation,
559            syntax_set,
560            ops,
561        )?;
562
563        self.perform_op(line, &reg_match.regions, pat, syntax_set)
564    }
565
566    fn push_meta_ops(
567        &self,
568        initial: bool,
569        index: usize,
570        cur_context: &Context,
571        match_op: &MatchOperation,
572        syntax_set: &SyntaxSet,
573        ops: &mut Vec<(usize, ScopeStackOp)>,
574    ) -> Result<(), ParsingError> {
575        // println!("metas ops for {:?}, initial: {}",
576        //          match_op,
577        //          initial);
578        // println!("{:?}", cur_context.meta_scope);
579        match *match_op {
580            MatchOperation::Pop => {
581                let v = if initial {
582                    &cur_context.meta_content_scope
583                } else {
584                    &cur_context.meta_scope
585                };
586                if !v.is_empty() {
587                    ops.push((index, ScopeStackOp::Pop(v.len())));
588                }
589
590                // cleared scopes are restored after the scopes from match pattern that invoked the pop are applied
591                if !initial && cur_context.clear_scopes.is_some() {
592                    ops.push((index, ScopeStackOp::Restore))
593                }
594            }
595            // for some reason the ST3 behaviour of set is convoluted and is inconsistent with the docs and other ops
596            // - the meta_content_scope of the current context is applied to the matched thing, unlike pop
597            // - the clear_scopes are applied after the matched token, unlike push
598            // - the interaction with meta scopes means that the token has the meta scopes of both the current scope and the new scope.
599            MatchOperation::Push(ref context_refs) | MatchOperation::Set(ref context_refs) => {
600                let is_set = matches!(*match_op, MatchOperation::Set(_));
601                // a match pattern that "set"s keeps the meta_content_scope and meta_scope from the previous context
602                if initial {
603                    if is_set && cur_context.clear_scopes.is_some() {
604                        // cleared scopes from the old context are restored immediately
605                        ops.push((index, ScopeStackOp::Restore));
606                    }
607                    // add each context's meta scope
608                    for r in context_refs.iter() {
609                        let ctx = r.resolve(syntax_set)?;
610
611                        if !is_set {
612                            if let Some(clear_amount) = ctx.clear_scopes {
613                                ops.push((index, ScopeStackOp::Clear(clear_amount)));
614                            }
615                        }
616
617                        for scope in ctx.meta_scope.iter() {
618                            ops.push((index, ScopeStackOp::Push(*scope)));
619                        }
620                    }
621                } else {
622                    let repush = (is_set
623                        && (!cur_context.meta_scope.is_empty()
624                            || !cur_context.meta_content_scope.is_empty()))
625                        || context_refs.iter().any(|r| {
626                            let ctx = r.resolve(syntax_set).unwrap();
627
628                            !ctx.meta_content_scope.is_empty()
629                                || (ctx.clear_scopes.is_some() && is_set)
630                        });
631                    if repush {
632                        // remove previously pushed meta scopes, so that meta content scopes will be applied in the correct order
633                        let mut num_to_pop: usize = context_refs
634                            .iter()
635                            .map(|r| {
636                                let ctx = r.resolve(syntax_set).unwrap();
637                                ctx.meta_scope.len()
638                            })
639                            .sum();
640
641                        // also pop off the original context's meta scopes
642                        if is_set {
643                            num_to_pop +=
644                                cur_context.meta_content_scope.len() + cur_context.meta_scope.len();
645                        }
646
647                        // do all the popping as one operation
648                        if num_to_pop > 0 {
649                            ops.push((index, ScopeStackOp::Pop(num_to_pop)));
650                        }
651
652                        // now we push meta scope and meta context scope for each context pushed
653                        for r in context_refs {
654                            let ctx = r.resolve(syntax_set)?;
655
656                            // for some reason, contrary to my reading of the docs, set does this after the token
657                            if is_set {
658                                if let Some(clear_amount) = ctx.clear_scopes {
659                                    ops.push((index, ScopeStackOp::Clear(clear_amount)));
660                                }
661                            }
662
663                            for scope in ctx.meta_scope.iter() {
664                                ops.push((index, ScopeStackOp::Push(*scope)));
665                            }
666                            for scope in ctx.meta_content_scope.iter() {
667                                ops.push((index, ScopeStackOp::Push(*scope)));
668                            }
669                        }
670                    }
671                }
672            }
673            MatchOperation::None => (),
674        }
675
676        Ok(())
677    }
678
679    /// Returns true if the stack was changed
680    fn perform_op(
681        &mut self,
682        line: &str,
683        regions: &Region,
684        pat: &MatchPattern,
685        syntax_set: &SyntaxSet,
686    ) -> Result<bool, ParsingError> {
687        let (ctx_refs, old_proto_ids) = match pat.operation {
688            MatchOperation::Push(ref ctx_refs) => (ctx_refs, None),
689            MatchOperation::Set(ref ctx_refs) => {
690                // a `with_prototype` stays active when the context is `set`
691                // until the context layer in the stack (where the `with_prototype`
692                // was initially applied) is popped off.
693                (ctx_refs, self.stack.pop().map(|s| s.prototypes))
694            }
695            MatchOperation::Pop => {
696                self.stack.pop();
697                return Ok(true);
698            }
699            MatchOperation::None => return Ok(false),
700        };
701        for (i, r) in ctx_refs.iter().enumerate() {
702            let mut proto_ids = if i == 0 {
703                // it is only necessary to preserve the old prototypes
704                // at the first stack frame pushed
705                old_proto_ids.clone().unwrap_or_else(Vec::new)
706            } else {
707                Vec::new()
708            };
709            if i == ctx_refs.len() - 1 {
710                // if a with_prototype was specified, and multiple contexts were pushed,
711                // then the with_prototype applies only to the last context pushed, i.e.
712                // top most on the stack after all the contexts are pushed - this is also
713                // referred to as the "target" of the push by sublimehq - see
714                // https://forum.sublimetext.com/t/dev-build-3111/19240/17 for more info
715                if let Some(ref p) = pat.with_prototype {
716                    proto_ids.push(p.id()?);
717                }
718            }
719            let context_id = r.id()?;
720            let context = syntax_set.get_context(&context_id)?;
721            let captures = {
722                let mut uses_backrefs = context.uses_backrefs;
723                if !proto_ids.is_empty() {
724                    uses_backrefs = uses_backrefs
725                        || proto_ids
726                            .iter()
727                            .any(|id| syntax_set.get_context(id).unwrap().uses_backrefs);
728                }
729                if uses_backrefs {
730                    Some((regions.clone(), line.to_owned()))
731                } else {
732                    None
733                }
734            };
735            self.stack.push(StateLevel {
736                context: context_id,
737                prototypes: proto_ids,
738                captures,
739            });
740        }
741        Ok(true)
742    }
743}
744
745#[cfg(feature = "yaml-load")]
746#[cfg(test)]
747mod tests {
748    use super::*;
749    use crate::parsing::ScopeStackOp::{Clear, Pop, Push, Restore};
750    use crate::parsing::{Scope, ScopeStack, SyntaxSet, SyntaxSetBuilder};
751    use crate::util::debug_print_ops;
752
753    const TEST_SYNTAX: &str = include_str!("../../testdata/parser_tests.sublime-syntax");
754
755    #[test]
756    fn can_parse_simple() {
757        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
758        let mut state = {
759            let syntax = ss.find_syntax_by_name("Ruby on Rails").unwrap();
760            ParseState::new(syntax, false)
761        };
762
763        let ops1 = ops(&mut state, "module Bob::Wow::Troll::Five; 5; end", &ss);
764        let test_ops1 = vec![
765            (0, Push(Scope::new("source.ruby.rails").unwrap())),
766            (0, Push(Scope::new("meta.module.ruby").unwrap())),
767            (0, Push(Scope::new("keyword.control.module.ruby").unwrap())),
768            (6, Pop(2)),
769            (6, Push(Scope::new("meta.module.ruby").unwrap())),
770            (7, Pop(1)),
771            (7, Push(Scope::new("meta.module.ruby").unwrap())),
772            (7, Push(Scope::new("entity.name.module.ruby").unwrap())),
773            (7, Push(Scope::new("support.other.namespace.ruby").unwrap())),
774            (10, Pop(1)),
775            (10, Push(Scope::new("punctuation.accessor.ruby").unwrap())),
776        ];
777        assert_eq!(&ops1[0..test_ops1.len()], &test_ops1[..]);
778
779        let ops2 = ops(&mut state, "def lol(wow = 5)", &ss);
780        let test_ops2 = vec![
781            (0, Push(Scope::new("meta.function.ruby").unwrap())),
782            (0, Push(Scope::new("keyword.control.def.ruby").unwrap())),
783            (3, Pop(2)),
784            (3, Push(Scope::new("meta.function.ruby").unwrap())),
785            (4, Push(Scope::new("entity.name.function.ruby").unwrap())),
786            (7, Pop(1)),
787        ];
788        assert_eq!(&ops2[0..test_ops2.len()], &test_ops2[..]);
789    }
790
791    #[test]
792    fn can_parse_yaml() {
793        let ps = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
794        let mut state = {
795            let syntax = ps.find_syntax_by_name("YAML").unwrap();
796            ParseState::new(syntax, false)
797        };
798
799        assert_eq!(
800            ops(&mut state, "key: value\n", &ps),
801            vec![
802                (0, Push(Scope::new("source.yaml").unwrap())),
803                (
804                    0,
805                    Push(Scope::new("string.unquoted.plain.out.yaml").unwrap())
806                ),
807                (0, Push(Scope::new("entity.name.tag.yaml").unwrap())),
808                (3, Pop(2)),
809                (
810                    3,
811                    Push(Scope::new("punctuation.separator.key-value.mapping.yaml").unwrap())
812                ),
813                (4, Pop(1)),
814                (
815                    5,
816                    Push(Scope::new("string.unquoted.plain.out.yaml").unwrap())
817                ),
818                (10, Pop(1)),
819            ]
820        );
821    }
822
823    #[test]
824    fn can_parse_includes() {
825        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
826        let mut state = {
827            let syntax = ss.find_syntax_by_name("HTML (Rails)").unwrap();
828            ParseState::new(syntax, false)
829        };
830
831        let ops = ops(&mut state, "<script>var lol = '<% def wow(", &ss);
832
833        let mut test_stack = ScopeStack::new();
834        test_stack.push(Scope::new("text.html.ruby").unwrap());
835        test_stack.push(Scope::new("text.html.basic").unwrap());
836        test_stack.push(Scope::new("source.js.embedded.html").unwrap());
837        test_stack.push(Scope::new("source.js").unwrap());
838        test_stack.push(Scope::new("string.quoted.single.js").unwrap());
839        test_stack.push(Scope::new("source.ruby.rails.embedded.html").unwrap());
840        test_stack.push(Scope::new("meta.function.parameters.ruby").unwrap());
841
842        let mut stack = ScopeStack::new();
843        for (_, op) in ops.iter() {
844            stack.apply(op).expect("#[cfg(test)]");
845        }
846        assert_eq!(stack, test_stack);
847    }
848
849    #[test]
850    fn can_parse_backrefs() {
851        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
852        let mut state = {
853            let syntax = ss.find_syntax_by_name("Ruby on Rails").unwrap();
854            ParseState::new(syntax, false)
855        };
856
857        // For parsing HEREDOC, the "SQL" is captured at the beginning and then used in another
858        // regex with a backref, to match the end of the HEREDOC. Note that there can be code
859        // after the marker (`.strip`) here.
860        assert_eq!(
861            ops(&mut state, "lol = <<-SQL.strip", &ss),
862            vec![
863                (0, Push(Scope::new("source.ruby.rails").unwrap())),
864                (
865                    4,
866                    Push(Scope::new("keyword.operator.assignment.ruby").unwrap())
867                ),
868                (5, Pop(1)),
869                (
870                    6,
871                    Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())
872                ),
873                (
874                    6,
875                    Push(Scope::new("punctuation.definition.string.begin.ruby").unwrap())
876                ),
877                (12, Pop(1)),
878                (12, Pop(1)),
879                (
880                    12,
881                    Push(Scope::new("string.unquoted.embedded.sql.ruby").unwrap())
882                ),
883                (12, Push(Scope::new("text.sql.embedded.ruby").unwrap())),
884                (12, Clear(ClearAmount::TopN(2))),
885                (12, Push(Scope::new("punctuation.accessor.ruby").unwrap())),
886                (13, Pop(1)),
887                (18, Restore),
888            ]
889        );
890
891        assert_eq!(ops(&mut state, "wow", &ss), vec![]);
892
893        assert_eq!(
894            ops(&mut state, "SQL", &ss),
895            vec![
896                (0, Pop(1)),
897                (
898                    0,
899                    Push(Scope::new("punctuation.definition.string.end.ruby").unwrap())
900                ),
901                (3, Pop(1)),
902                (3, Pop(1)),
903            ]
904        );
905    }
906
907    #[test]
908    fn can_parse_preprocessor_rules() {
909        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
910        let mut state = {
911            let syntax = ss.find_syntax_by_name("C").unwrap();
912            ParseState::new(syntax, false)
913        };
914
915        assert_eq!(
916            ops(&mut state, "#ifdef FOO", &ss),
917            vec![
918                (0, Push(Scope::new("source.c").unwrap())),
919                (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
920                (0, Push(Scope::new("keyword.control.import.c").unwrap())),
921                (6, Pop(1)),
922                (10, Pop(1)),
923            ]
924        );
925        assert_eq!(
926            ops(&mut state, "{", &ss),
927            vec![
928                (0, Push(Scope::new("meta.block.c").unwrap())),
929                (
930                    0,
931                    Push(Scope::new("punctuation.section.block.begin.c").unwrap())
932                ),
933                (1, Pop(1)),
934            ]
935        );
936        assert_eq!(
937            ops(&mut state, "#else", &ss),
938            vec![
939                (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
940                (0, Push(Scope::new("keyword.control.import.c").unwrap())),
941                (5, Pop(1)),
942                (5, Pop(1)),
943            ]
944        );
945        assert_eq!(
946            ops(&mut state, "{", &ss),
947            vec![
948                (0, Push(Scope::new("meta.block.c").unwrap())),
949                (
950                    0,
951                    Push(Scope::new("punctuation.section.block.begin.c").unwrap())
952                ),
953                (1, Pop(1)),
954            ]
955        );
956        assert_eq!(
957            ops(&mut state, "#endif", &ss),
958            vec![
959                (0, Pop(1)),
960                (0, Push(Scope::new("meta.block.c").unwrap())),
961                (0, Push(Scope::new("meta.preprocessor.c").unwrap())),
962                (0, Push(Scope::new("keyword.control.import.c").unwrap())),
963                (6, Pop(2)),
964                (6, Pop(2)),
965                (6, Push(Scope::new("meta.block.c").unwrap())),
966            ]
967        );
968        assert_eq!(
969            ops(&mut state, "    foo;", &ss),
970            vec![
971                (7, Push(Scope::new("punctuation.terminator.c").unwrap())),
972                (8, Pop(1)),
973            ]
974        );
975        assert_eq!(
976            ops(&mut state, "}", &ss),
977            vec![
978                (
979                    0,
980                    Push(Scope::new("punctuation.section.block.end.c").unwrap())
981                ),
982                (1, Pop(1)),
983                (1, Pop(1)),
984            ]
985        );
986    }
987
988    #[test]
989    fn can_parse_issue25() {
990        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
991        let mut state = {
992            let syntax = ss.find_syntax_by_name("C").unwrap();
993            ParseState::new(syntax, false)
994        };
995
996        // test fix for issue #25
997        assert_eq!(ops(&mut state, "struct{estruct", &ss).len(), 10);
998    }
999
1000    #[test]
1001    fn can_compare_parse_states() {
1002        let ss = SyntaxSet::load_from_folder("testdata/Packages").unwrap();
1003        let syntax = ss.find_syntax_by_name("Java").unwrap();
1004        let mut state1 = ParseState::new(syntax, false);
1005        let mut state2 = ParseState::new(syntax, false);
1006
1007        assert_eq!(ops(&mut state1, "class Foo {", &ss).len(), 11);
1008        assert_eq!(ops(&mut state2, "class Fooo {", &ss).len(), 11);
1009
1010        assert_eq!(state1, state2);
1011        ops(&mut state1, "}", &ss);
1012        assert_ne!(state1, state2);
1013    }
1014
1015    #[test]
1016    fn can_parse_non_nested_clear_scopes() {
1017        let line = "'hello #simple_cleared_scopes_test world test \\n '";
1018        let expect = [
1019            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
1020            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
1021            "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
1022        ];
1023        expect_scope_stacks(line, &expect, TEST_SYNTAX);
1024    }
1025
1026    #[test]
1027    fn can_parse_non_nested_too_many_clear_scopes() {
1028        let line = "'hello #too_many_cleared_scopes_test world test \\n '";
1029        let expect = [
1030            "<example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
1031            "<example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
1032            "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
1033        ];
1034        expect_scope_stacks(line, &expect, TEST_SYNTAX);
1035    }
1036
1037    #[test]
1038    fn can_parse_nested_clear_scopes() {
1039        let line = "'hello #nested_clear_scopes_test world foo bar test \\n '";
1040        let expect = [
1041            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pushes-clear-scopes.example>",
1042            "<source.test>, <example.meta-scope.cleared-previous-meta-scope.example>, <foo>",
1043            "<source.test>, <example.meta-scope.after-clear-scopes.example>, <example.pops-clear-scopes.example>",
1044            "<source.test>, <string.quoted.single.example>, <constant.character.escape.example>",
1045        ];
1046        expect_scope_stacks(line, &expect, TEST_SYNTAX);
1047    }
1048
1049    #[test]
1050    fn can_parse_infinite_loop() {
1051        let line = "#infinite_loop_test 123";
1052        let expect = ["<source.test>, <constant.numeric.test>"];
1053        expect_scope_stacks(line, &expect, TEST_SYNTAX);
1054    }
1055
1056    #[test]
1057    fn can_parse_infinite_seeming_loop() {
1058        // See https://github.com/SublimeTextIssues/Core/issues/1190 for an
1059        // explanation.
1060        let line = "#infinite_seeming_loop_test hello";
1061        let expect = [
1062            "<source.test>, <keyword.test>",
1063            "<source.test>, <test>, <string.unquoted.test>",
1064            "<source.test>, <test>, <keyword.control.test>",
1065        ];
1066        expect_scope_stacks(line, &expect, TEST_SYNTAX);
1067    }
1068
1069    #[test]
1070    fn can_parse_prototype_that_pops_main() {
1071        let syntax = r#"
1072name: test
1073scope: source.test
1074contexts:
1075  prototype:
1076    # This causes us to pop out of the main context. Sublime Text handles that
1077    # by pushing main back automatically.
1078    - match: (?=!)
1079      pop: true
1080  main:
1081    - match: foo
1082      scope: test.good
1083"#;
1084
1085        let line = "foo!";
1086        let expect = ["<source.test>, <test.good>"];
1087        expect_scope_stacks(line, &expect, syntax);
1088    }
1089
1090    #[test]
1091    fn can_parse_syntax_with_newline_in_character_class() {
1092        let syntax = r#"
1093name: test
1094scope: source.test
1095contexts:
1096  main:
1097    - match: foo[\n]
1098      scope: foo.end
1099    - match: foo
1100      scope: foo.any
1101"#;
1102
1103        let line = "foo";
1104        let expect = ["<source.test>, <foo.end>"];
1105        expect_scope_stacks(line, &expect, syntax);
1106
1107        let line = "foofoofoo";
1108        let expect = [
1109            "<source.test>, <foo.any>",
1110            "<source.test>, <foo.any>",
1111            "<source.test>, <foo.end>",
1112        ];
1113        expect_scope_stacks(line, &expect, syntax);
1114    }
1115
1116    #[test]
1117    fn can_parse_issue120() {
1118        let syntax = SyntaxDefinition::load_from_str(
1119            include_str!("../../testdata/embed_escape_test.sublime-syntax"),
1120            false,
1121            None,
1122        )
1123        .unwrap();
1124
1125        let line1 = "\"abctest\" foobar";
1126        let expect1 = [
1127            "<meta.attribute-with-value.style.html>, <string.quoted.double>, <punctuation.definition.string.begin.html>",
1128            "<meta.attribute-with-value.style.html>, <source.css>",
1129            "<meta.attribute-with-value.style.html>, <string.quoted.double>, <punctuation.definition.string.end.html>",
1130            "<meta.attribute-with-value.style.html>, <source.css>, <test.embedded>",
1131            "<top-level.test>",
1132        ];
1133
1134        expect_scope_stacks_with_syntax(line1, &expect1, syntax.clone());
1135
1136        let line2 = ">abctest</style>foobar";
1137        let expect2 = [
1138            "<meta.tag.style.begin.html>, <punctuation.definition.tag.end.html>",
1139            "<source.css.embedded.html>, <test.embedded>",
1140            "<top-level.test>",
1141        ];
1142        expect_scope_stacks_with_syntax(line2, &expect2, syntax);
1143    }
1144
1145    #[test]
1146    fn can_parse_non_consuming_pop_that_would_loop() {
1147        // See https://github.com/trishume/syntect/issues/127
1148        let syntax = r#"
1149name: test
1150scope: source.test
1151contexts:
1152  main:
1153    # This makes us go into "test" without consuming any characters
1154    - match: (?=hello)
1155      push: test
1156  test:
1157    # If we used this match, we'd go back to "main" without consuming anything,
1158    # and then back into "test", infinitely looping. ST detects this at this
1159    # point and ignores this match until at least one character matched.
1160    - match: (?!world)
1161      pop: true
1162    - match: \w+
1163      scope: test.matched
1164"#;
1165
1166        let line = "hello";
1167        let expect = ["<source.test>, <test.matched>"];
1168        expect_scope_stacks(line, &expect, syntax);
1169    }
1170
1171    #[test]
1172    fn can_parse_non_consuming_set_and_pop_that_would_loop() {
1173        let syntax = r#"
1174name: test
1175scope: source.test
1176contexts:
1177  main:
1178    # This makes us go into "a" without advancing
1179    - match: (?=test)
1180      push: a
1181  a:
1182    # This makes us go into "b" without advancing
1183    - match: (?=t)
1184      set: b
1185  b:
1186    # If we used this match, we'd go back to "main" without having advanced,
1187    # which means we'd have an infinite loop like with the previous test.
1188    # So even for a "set", we have to check if we're advancing or not.
1189    - match: (?=t)
1190      pop: true
1191    - match: \w+
1192      scope: test.matched
1193"#;
1194
1195        let line = "test";
1196        let expect = ["<source.test>, <test.matched>"];
1197        expect_scope_stacks(line, &expect, syntax);
1198    }
1199
1200    #[test]
1201    fn can_parse_non_consuming_set_after_consuming_push_that_does_not_loop() {
1202        let syntax = r#"
1203name: test
1204scope: source.test
1205contexts:
1206  main:
1207    # This makes us go into "a", but we consumed a character
1208    - match: t
1209      push: a
1210    - match: \w+
1211      scope: test.matched
1212  a:
1213    # This makes us go into "b" without consuming
1214    - match: (?=e)
1215      set: b
1216  b:
1217    # This match does not result in an infinite loop because we already consumed
1218    # a character to get into "a", so it's ok to pop back into "main".
1219    - match: (?=e)
1220      pop: true
1221"#;
1222
1223        let line = "test";
1224        let expect = ["<source.test>, <test.matched>"];
1225        expect_scope_stacks(line, &expect, syntax);
1226    }
1227
1228    #[test]
1229    fn can_parse_non_consuming_set_after_consuming_set_that_does_not_loop() {
1230        let syntax = r#"
1231name: test
1232scope: source.test
1233contexts:
1234  main:
1235    - match: (?=hello)
1236      push: a
1237    - match: \w+
1238      scope: test.matched
1239  a:
1240    - match: h
1241      set: b
1242  b:
1243    - match: (?=e)
1244      set: c
1245  c:
1246    # This is not an infinite loop because "a" consumed a character, so we can
1247    # actually pop back into main and then match the rest of the input.
1248    - match: (?=e)
1249      pop: true
1250"#;
1251
1252        let line = "hello";
1253        let expect = ["<source.test>, <test.matched>"];
1254        expect_scope_stacks(line, &expect, syntax);
1255    }
1256
1257    #[test]
1258    fn can_parse_non_consuming_pop_that_would_loop_at_end_of_line() {
1259        let syntax = r#"
1260name: test
1261scope: source.test
1262contexts:
1263  main:
1264    # This makes us go into "test" without consuming, even at the end of line
1265    - match: ""
1266      push: test
1267  test:
1268    - match: ""
1269      pop: true
1270    - match: \w+
1271      scope: test.matched
1272"#;
1273
1274        let line = "hello";
1275        let expect = ["<source.test>, <test.matched>"];
1276        expect_scope_stacks(line, &expect, syntax);
1277    }
1278
1279    #[test]
1280    fn can_parse_empty_but_consuming_set_that_does_not_loop() {
1281        let syntax = r#"
1282name: test
1283scope: source.test
1284contexts:
1285  main:
1286    - match: (?=hello)
1287      push: a
1288    - match: ello
1289      scope: test.good
1290  a:
1291    # This is an empty match, but it consumed a character (the "h")
1292    - match: (?=e)
1293      set: b
1294  b:
1295    # .. so it's ok to pop back to main from here
1296    - match: ""
1297      pop: true
1298    - match: ello
1299      scope: test.bad
1300"#;
1301
1302        let line = "hello";
1303        let expect = ["<source.test>, <test.good>"];
1304        expect_scope_stacks(line, &expect, syntax);
1305    }
1306
1307    #[test]
1308    fn can_parse_non_consuming_pop_that_does_not_loop() {
1309        let syntax = r#"
1310name: test
1311scope: source.test
1312contexts:
1313  main:
1314    # This is a non-consuming push, so "b" will need to check for a
1315    # non-consuming pop
1316    - match: (?=hello)
1317      push: [b, a]
1318    - match: ello
1319      scope: test.good
1320  a:
1321    # This pop is ok, it consumed "h"
1322    - match: (?=e)
1323      pop: true
1324  b:
1325    # This is non-consuming, and we set to "c"
1326    - match: (?=e)
1327      set: c
1328  c:
1329    # It's ok to pop back to "main" here because we consumed a character in the
1330    # meantime.
1331    - match: ""
1332      pop: true
1333    - match: ello
1334      scope: test.bad
1335"#;
1336
1337        let line = "hello";
1338        let expect = ["<source.test>, <test.good>"];
1339        expect_scope_stacks(line, &expect, syntax);
1340    }
1341
1342    #[test]
1343    fn can_parse_non_consuming_pop_with_multi_push_that_does_not_loop() {
1344        let syntax = r#"
1345name: test
1346scope: source.test
1347contexts:
1348  main:
1349    - match: (?=hello)
1350      push: [b, a]
1351    - match: ello
1352      scope: test.good
1353  a:
1354    # This pop is ok, as we're not popping back to "main" yet (which would loop),
1355    # we're popping to "b"
1356    - match: ""
1357      pop: true
1358    - match: \w+
1359      scope: test.bad
1360  b:
1361    - match: \w+
1362      scope: test.good
1363"#;
1364
1365        let line = "hello";
1366        let expect = ["<source.test>, <test.good>"];
1367        expect_scope_stacks(line, &expect, syntax);
1368    }
1369
1370    #[test]
1371    fn can_parse_non_consuming_pop_of_recursive_context_that_does_not_loop() {
1372        let syntax = r#"
1373name: test
1374scope: source.test
1375contexts:
1376  main:
1377    - match: xxx
1378      scope: test.good
1379    - include: basic-identifiers
1380
1381  basic-identifiers:
1382    - match: '\w+::'
1383      scope: test.matched
1384      push: no-type-names
1385
1386  no-type-names:
1387      - include: basic-identifiers
1388      - match: \w+
1389        scope: test.matched.inside
1390      # This is a tricky one because when this is the best match,
1391      # we have two instances of "no-type-names" on the stack, so we're popping
1392      # back from "no-type-names" to another "no-type-names".
1393      - match: ''
1394        pop: true
1395"#;
1396
1397        let line = "foo::bar::* xxx";
1398        let expect = ["<source.test>, <test.good>"];
1399        expect_scope_stacks(line, &expect, syntax);
1400    }
1401
1402    #[test]
1403    fn can_parse_non_consuming_pop_order() {
1404        let syntax = r#"
1405name: test
1406scope: source.test
1407contexts:
1408  main:
1409    - match: (?=hello)
1410      push: test
1411  test:
1412    # This matches first
1413    - match: (?=e)
1414      push: good
1415    # But this (looping) match replaces it, because it's an earlier match
1416    - match: (?=h)
1417      pop: true
1418    # And this should not replace it, as it's a later match (only matches at
1419    # the same position can replace looping pops).
1420    - match: (?=o)
1421      push: bad
1422  good:
1423    - match: \w+
1424      scope: test.good
1425  bad:
1426    - match: \w+
1427      scope: test.bad
1428"#;
1429
1430        let line = "hello";
1431        let expect = ["<source.test>, <test.good>"];
1432        expect_scope_stacks(line, &expect, syntax);
1433    }
1434
1435    #[test]
1436    fn can_parse_prototype_with_embed() {
1437        let syntax = r#"
1438name: Javadoc
1439scope: text.html.javadoc
1440contexts:
1441  prototype:
1442    - match: \*
1443      scope: punctuation.definition.comment.javadoc
1444
1445  main:
1446    - meta_include_prototype: false
1447    - match: /\*\*
1448      scope: comment.block.documentation.javadoc punctuation.definition.comment.begin.javadoc
1449      embed: contents
1450      embed_scope: comment.block.documentation.javadoc text.html.javadoc
1451      escape: \*/
1452      escape_captures:
1453        0: comment.block.documentation.javadoc punctuation.definition.comment.end.javadoc
1454
1455  contents:
1456    - match: ''
1457"#;
1458
1459        let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1460        expect_scope_stacks_with_syntax("/** * */", &["<comment.block.documentation.javadoc>, <punctuation.definition.comment.begin.javadoc>", "<comment.block.documentation.javadoc>, <text.html.javadoc>, <punctuation.definition.comment.javadoc>", "<comment.block.documentation.javadoc>, <punctuation.definition.comment.end.javadoc>"], syntax);
1461    }
1462
1463    #[test]
1464    fn can_parse_context_included_in_prototype_via_named_reference() {
1465        let syntax = r#"
1466scope: source.test
1467contexts:
1468  prototype:
1469    - match: a
1470      push: a
1471    - match: b
1472      scope: test.bad
1473  main:
1474    - match: unused
1475  # This context is included in the prototype (see `push: a`).
1476  # Because of that, ST doesn't apply the prototype to this context, so if
1477  # we're in here the "b" shouldn't match.
1478  a:
1479    - match: a
1480      scope: test.good
1481"#;
1482
1483        let stack_states = stack_states(parse("aa b", syntax));
1484        assert_eq!(
1485            stack_states,
1486            vec![
1487                "<source.test>",
1488                "<source.test>, <test.good>",
1489                "<source.test>",
1490            ],
1491            "Expected test.bad to not match"
1492        );
1493    }
1494
1495    #[test]
1496    fn can_parse_with_prototype_set() {
1497        let syntax = r#"%YAML 1.2
1498---
1499scope: source.test-set-with-proto
1500contexts:
1501  main:
1502    - match: a
1503      scope: a
1504      set: next1
1505      with_prototype:
1506        - match: '1'
1507          scope: '1'
1508        - match: '2'
1509          scope: '2'
1510        - match: '3'
1511          scope: '3'
1512        - match: '4'
1513          scope: '4'
1514    - match: '5'
1515      scope: '5'
1516      set: [next3, next2]
1517      with_prototype:
1518        - match: c
1519          scope: cwith
1520  next1:
1521    - match: b
1522      scope: b
1523      set: next2
1524  next2:
1525    - match: c
1526      scope: c
1527      push: next3
1528    - match: e
1529      scope: e
1530      pop: true
1531    - match: f
1532      scope: f
1533      set: [next1, next2]
1534  next3:
1535    - match: d
1536      scope: d
1537    - match: (?=e)
1538      pop: true
1539    - match: c
1540      scope: cwithout
1541"#;
1542
1543        expect_scope_stacks_with_syntax(
1544            "a1b2c3d4e5",
1545            &[
1546                "<a>", "<1>", "<b>", "<2>", "<c>", "<3>", "<d>", "<4>", "<e>", "<5>",
1547            ],
1548            SyntaxDefinition::load_from_str(syntax, true, None).unwrap(),
1549        );
1550        expect_scope_stacks_with_syntax(
1551            "5cfcecbedcdea",
1552            &[
1553                "<5>",
1554                "<cwith>",
1555                "<f>",
1556                "<e>",
1557                "<b>",
1558                "<d>",
1559                "<cwithout>",
1560                "<a>",
1561            ],
1562            SyntaxDefinition::load_from_str(syntax, true, None).unwrap(),
1563        );
1564    }
1565
1566    #[test]
1567    fn can_parse_issue176() {
1568        let syntax = r#"
1569scope: source.dummy
1570contexts:
1571  main:
1572    - match: (test)(?=(foo))(f)
1573      captures:
1574        1: test
1575        2: ignored
1576        3: f
1577      push:
1578        - match: (oo)
1579          captures:
1580            1: keyword
1581"#;
1582
1583        let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1584        expect_scope_stacks_with_syntax(
1585            "testfoo",
1586            &["<test>", /*"<ignored>",*/ "<f>", "<keyword>"],
1587            syntax,
1588        );
1589    }
1590
1591    #[test]
1592    fn can_parse_two_with_prototypes_at_same_stack_level() {
1593        let syntax_yamlstr = r#"
1594%YAML 1.2
1595---
1596# See http://www.sublimetext.com/docs/3/syntax.html
1597scope: source.example-wp
1598contexts:
1599  main:
1600    - match: a
1601      scope: a
1602      push:
1603        - match: b
1604          scope: b
1605          set:
1606            - match: c
1607              scope: c
1608          with_prototype:
1609            - match: '2'
1610              scope: '2'
1611      with_prototype:
1612        - match: '1'
1613          scope: '1'
1614"#;
1615
1616        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
1617        expect_scope_stacks_with_syntax("abc12", &["<1>", "<2>"], syntax);
1618    }
1619
1620    #[test]
1621    fn can_parse_two_with_prototypes_at_same_stack_level_set_multiple() {
1622        let syntax_yamlstr = r#"
1623%YAML 1.2
1624---
1625# See http://www.sublimetext.com/docs/3/syntax.html
1626scope: source.example-wp
1627contexts:
1628  main:
1629    - match: a
1630      scope: a
1631      push:
1632        - match: b
1633          scope: b
1634          set: [context1, context2, context3]
1635          with_prototype:
1636            - match: '2'
1637              scope: '2'
1638      with_prototype:
1639        - match: '1'
1640          scope: '1'
1641    - match: '1'
1642      scope: digit1
1643    - match: '2'
1644      scope: digit2
1645  context1:
1646    - match: e
1647      scope: e
1648      pop: true
1649    - match: '2'
1650      scope: digit2
1651  context2:
1652    - match: d
1653      scope: d
1654      pop: true
1655    - match: '2'
1656      scope: digit2
1657  context3:
1658    - match: c
1659      scope: c
1660      pop: true
1661"#;
1662
1663        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
1664        expect_scope_stacks_with_syntax("ab12", &["<1>", "<2>"], syntax.clone());
1665        expect_scope_stacks_with_syntax("abc12", &["<1>", "<digit2>"], syntax.clone());
1666        expect_scope_stacks_with_syntax("abcd12", &["<1>", "<digit2>"], syntax.clone());
1667        expect_scope_stacks_with_syntax("abcde12", &["<digit1>", "<digit2>"], syntax);
1668    }
1669
1670    #[test]
1671    fn can_parse_two_with_prototypes_at_same_stack_level_updated_captures() {
1672        let syntax_yamlstr = r#"
1673%YAML 1.2
1674---
1675# See http://www.sublimetext.com/docs/3/syntax.html
1676scope: source.example-wp
1677contexts:
1678  main:
1679    - match: (a)
1680      scope: a
1681      push:
1682        - match: (b)
1683          scope: b
1684          set:
1685            - match: c
1686              scope: c
1687          with_prototype:
1688            - match: d
1689              scope: d
1690      with_prototype:
1691        - match: \1
1692          scope: '1'
1693          pop: true
1694"#;
1695
1696        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
1697        expect_scope_stacks_with_syntax("aa", &["<a>", "<1>"], syntax.clone());
1698        expect_scope_stacks_with_syntax("abcdb", &["<a>", "<b>", "<c>", "<d>", "<1>"], syntax);
1699    }
1700
1701    #[test]
1702    fn can_parse_two_with_prototypes_at_same_stack_level_updated_captures_ignore_unexisting() {
1703        let syntax_yamlstr = r#"
1704%YAML 1.2
1705---
1706# See http://www.sublimetext.com/docs/3/syntax.html
1707scope: source.example-wp
1708contexts:
1709  main:
1710    - match: (a)(-)
1711      scope: a
1712      push:
1713        - match: (b)
1714          scope: b
1715          set:
1716            - match: c
1717              scope: c
1718          with_prototype:
1719            - match: d
1720              scope: d
1721      with_prototype:
1722        - match: \2
1723          scope: '2'
1724          pop: true
1725        - match: \1
1726          scope: '1'
1727          pop: true
1728"#;
1729
1730        let syntax = SyntaxDefinition::load_from_str(syntax_yamlstr, true, None).unwrap();
1731        expect_scope_stacks_with_syntax("a--", &["<a>", "<2>"], syntax.clone());
1732        // it seems that when ST encounters a non existing pop backreference, it just pops back to the with_prototype's original parent context - i.e. cdb is unscoped
1733        // TODO: it would be useful to have syntest functionality available here for easier testing and clarity
1734        expect_scope_stacks_with_syntax("a-bcdba-", &["<a>", "<b>"], syntax);
1735    }
1736
1737    #[test]
1738    fn can_parse_syntax_with_eol_and_newline() {
1739        let syntax = r#"
1740name: test
1741scope: source.test
1742contexts:
1743  main:
1744    - match: foo$\n
1745      scope: foo.newline
1746"#;
1747
1748        let line = "foo";
1749        let expect = ["<source.test>, <foo.newline>"];
1750        expect_scope_stacks(line, &expect, syntax);
1751    }
1752
1753    #[test]
1754    fn can_parse_syntax_with_eol_only() {
1755        let syntax = r#"
1756name: test
1757scope: source.test
1758contexts:
1759  main:
1760    - match: foo$
1761      scope: foo.newline
1762"#;
1763
1764        let line = "foo";
1765        let expect = ["<source.test>, <foo.newline>"];
1766        expect_scope_stacks(line, &expect, syntax);
1767    }
1768
1769    #[test]
1770    fn can_parse_syntax_with_beginning_of_line() {
1771        let syntax = r#"
1772name: test
1773scope: source.test
1774contexts:
1775  main:
1776    - match: \w+
1777      scope: word
1778      push:
1779        # this should not match at the end of the line
1780        - match: ^\s*$
1781          pop: true
1782        - match: =+
1783          scope: heading
1784          pop: true
1785    - match: .*
1786      scope: other
1787"#;
1788
1789        let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1790        let syntax_set = link(syntax_newlines);
1791
1792        let mut state = ParseState::new(&syntax_set.syntaxes()[0], false);
1793        assert_eq!(
1794            ops(&mut state, "foo\n", &syntax_set),
1795            vec![
1796                (0, Push(Scope::new("source.test").unwrap())),
1797                (0, Push(Scope::new("word").unwrap())),
1798                (3, Pop(1))
1799            ]
1800        );
1801        assert_eq!(
1802            ops(&mut state, "===\n", &syntax_set),
1803            vec![(0, Push(Scope::new("heading").unwrap())), (3, Pop(1))]
1804        );
1805
1806        assert_eq!(
1807            ops(&mut state, "bar\n", &syntax_set),
1808            vec![(0, Push(Scope::new("word").unwrap())), (3, Pop(1))]
1809        );
1810        // This should result in popping out of the context
1811        assert_eq!(ops(&mut state, "\n", &syntax_set), vec![]);
1812        // So now this matches other
1813        assert_eq!(
1814            ops(&mut state, "====\n", &syntax_set),
1815            vec![(0, Push(Scope::new("other").unwrap())), (4, Pop(1))]
1816        );
1817    }
1818
1819    #[test]
1820    fn can_parse_syntax_with_comment_and_eol() {
1821        let syntax = r#"
1822name: test
1823scope: source.test
1824contexts:
1825  main:
1826    - match: (//).*$
1827      scope: comment.line.double-slash
1828"#;
1829
1830        let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1831        let syntax_set = link(syntax_newlines);
1832
1833        let mut state = ParseState::new(&syntax_set.syntaxes()[0], false);
1834        assert_eq!(
1835            ops(&mut state, "// foo\n", &syntax_set),
1836            vec![
1837                (0, Push(Scope::new("source.test").unwrap())),
1838                (0, Push(Scope::new("comment.line.double-slash").unwrap())),
1839                // 6 is important here, should not be 7. The pattern should *not* consume the newline,
1840                // but instead match before it. This is important for whitespace-sensitive syntaxes
1841                // where newlines terminate statements such as Scala.
1842                (6, Pop(1))
1843            ]
1844        );
1845    }
1846
1847    #[test]
1848    fn can_parse_text_with_unicode_to_skip() {
1849        let syntax = r#"
1850name: test
1851scope: source.test
1852contexts:
1853  main:
1854    - match: (?=.)
1855      push: test
1856  test:
1857    - match: (?=.)
1858      pop: true
1859    - match: x
1860      scope: test.good
1861"#;
1862
1863        // U+03C0 GREEK SMALL LETTER PI, 2 bytes in UTF-8
1864        expect_scope_stacks("\u{03C0}x", &["<source.test>, <test.good>"], syntax);
1865        // U+0800 SAMARITAN LETTER ALAF, 3 bytes in UTF-8
1866        expect_scope_stacks("\u{0800}x", &["<source.test>, <test.good>"], syntax);
1867        // U+1F600 GRINNING FACE, 4 bytes in UTF-8
1868        expect_scope_stacks("\u{1F600}x", &["<source.test>, <test.good>"], syntax);
1869    }
1870
1871    #[test]
1872    fn can_include_backrefs() {
1873        let syntax = SyntaxDefinition::load_from_str(
1874            r#"
1875                name: Backref Include Test
1876                scope: source.backrefinc
1877                contexts:
1878                  main:
1879                    - match: (a)
1880                      scope: a
1881                      push: context1
1882                  context1:
1883                    - include: context2
1884                  context2:
1885                    - match: \1
1886                      scope: b
1887                      pop: true
1888                "#,
1889            true,
1890            None,
1891        )
1892        .unwrap();
1893
1894        expect_scope_stacks_with_syntax("aa", &["<a>", "<b>"], syntax);
1895    }
1896
1897    #[test]
1898    fn can_include_nested_backrefs() {
1899        let syntax = SyntaxDefinition::load_from_str(
1900            r#"
1901                name: Backref Include Test
1902                scope: source.backrefinc
1903                contexts:
1904                  main:
1905                    - match: (a)
1906                      scope: a
1907                      push: context1
1908                  context1:
1909                    - include: context3
1910                  context3:
1911                    - include: context2
1912                  context2:
1913                    - match: \1
1914                      scope: b
1915                      pop: true
1916                "#,
1917            true,
1918            None,
1919        )
1920        .unwrap();
1921
1922        expect_scope_stacks_with_syntax("aa", &["<a>", "<b>"], syntax);
1923    }
1924
1925    fn expect_scope_stacks(line_without_newline: &str, expect: &[&str], syntax: &str) {
1926        println!("Parsing with newlines");
1927        let line_with_newline = format!("{}\n", line_without_newline);
1928        let syntax_newlines = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1929        expect_scope_stacks_with_syntax(&line_with_newline, expect, syntax_newlines);
1930
1931        println!("Parsing without newlines");
1932        let syntax_nonewlines = SyntaxDefinition::load_from_str(syntax, false, None).unwrap();
1933        expect_scope_stacks_with_syntax(line_without_newline, expect, syntax_nonewlines);
1934    }
1935
1936    fn expect_scope_stacks_with_syntax(line: &str, expect: &[&str], syntax: SyntaxDefinition) {
1937        // check that each expected scope stack appears at least once while parsing the given test line
1938
1939        let syntax_set = link(syntax);
1940        let mut state = ParseState::new(&syntax_set.syntaxes()[0], false);
1941        let ops = ops(&mut state, line, &syntax_set);
1942        expect_scope_stacks_for_ops(ops, expect);
1943    }
1944
1945    fn expect_scope_stacks_for_ops(ops: Vec<(usize, ScopeStackOp)>, expect: &[&str]) {
1946        let mut criteria_met = Vec::new();
1947        for stack_str in stack_states(ops) {
1948            println!("{}", stack_str);
1949            for expectation in expect.iter() {
1950                if stack_str.contains(expectation) {
1951                    criteria_met.push(expectation);
1952                }
1953            }
1954        }
1955        if let Some(missing) = expect.iter().find(|e| !criteria_met.contains(e)) {
1956            panic!("expected scope stack '{}' missing", missing);
1957        }
1958    }
1959
1960    fn parse(line: &str, syntax: &str) -> Vec<(usize, ScopeStackOp)> {
1961        let syntax = SyntaxDefinition::load_from_str(syntax, true, None).unwrap();
1962        let syntax_set = link(syntax);
1963
1964        let mut state = ParseState::new(&syntax_set.syntaxes()[0], false);
1965        ops(&mut state, line, &syntax_set)
1966    }
1967
1968    fn link(syntax: SyntaxDefinition) -> SyntaxSet {
1969        let mut builder = SyntaxSetBuilder::new();
1970        builder.add(syntax);
1971        builder.build()
1972    }
1973
1974    fn ops(
1975        state: &mut ParseState,
1976        line: &str,
1977        syntax_set: &SyntaxSet,
1978    ) -> Vec<(usize, ScopeStackOp)> {
1979        let ops = state.parse_line(line, syntax_set).expect("#[cfg(test)]");
1980        debug_print_ops(line, &ops);
1981        ops
1982    }
1983
1984    fn stack_states(ops: Vec<(usize, ScopeStackOp)>) -> Vec<String> {
1985        let mut states = Vec::new();
1986        let mut stack = ScopeStack::new();
1987        for (_, op) in ops.iter() {
1988            stack.apply(op).expect("#[cfg(test)]");
1989            let scopes: Vec<String> = stack
1990                .as_slice()
1991                .iter()
1992                .map(|s| format!("{:?}", s))
1993                .collect();
1994            let stack_str = scopes.join(", ");
1995            states.push(stack_str);
1996        }
1997        states
1998    }
1999}
syntect_no_panic/parsing/parser.rs

syntect_no_panic/parsing/
parser.rs