sbnf/compiler/codegen/
mod.rs

1use std::fmt::Write;
2
3use base64;
4use hashbrown::HashMap;
5use indexmap::IndexMap;
6
7use super::common::{parse_scope, Compiler, Symbol};
8use super::interpreter::{Expression, Interpreted, Key, TerminalEmbed};
9use crate::sublime_syntax;
10
11pub mod lookahead;
12
13use lookahead::{Lookahead, StackEntry, StackEntryData, Terminal};
14
15#[derive(Debug, Clone, PartialEq, Eq, Hash)]
16struct BranchPoint<'a> {
17    name: &'a str,
18    can_fail: bool,
19}
20
21#[derive(Debug, Clone, PartialEq, Eq, Hash)]
22struct ContextKey<'a> {
23    rule_key: Key,
24    is_top_level: bool,
25    lookahead: Lookahead<'a>,
26    branch_point: Option<BranchPoint<'a>>,
27}
28
29impl<'a> ContextKey<'a> {
30    #[allow(dead_code)]
31    fn with_compiler(
32        &'a self,
33        compiler: &'a Compiler,
34    ) -> ContextKeyWithCompiler<'a> {
35        ContextKeyWithCompiler { key: self, compiler }
36    }
37}
38
39struct ContextKeyWithCompiler<'a> {
40    key: &'a ContextKey<'a>,
41    compiler: &'a Compiler,
42}
43
44impl std::fmt::Debug for ContextKeyWithCompiler<'_> {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        writeln!(f, "{}:", self)?;
47        for terminal in &self.key.lookahead.terminals {
48            writeln!(f, "{:?}", terminal.with_compiler(self.compiler))?;
49        }
50        Ok(())
51    }
52}
53
54impl std::fmt::Display for ContextKeyWithCompiler<'_> {
55    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
56        write!(f, "{}", self.key.rule_key.with_compiler(self.compiler))?;
57        if let Some(branch_point) = &self.key.branch_point {
58            write!(f, " (branch point: '{}')", branch_point.name)?;
59        }
60        if self.key.is_top_level {
61            write!(f, " top level")?;
62        }
63        Ok(())
64    }
65}
66
67#[derive(Debug)]
68struct Rule {
69    context_count: usize,
70    branch_point_count: usize,
71    entry_context_count: usize,
72}
73
74struct State<'a> {
75    compiler: &'a Compiler,
76    rules: HashMap<Key, Rule>,
77    context_queue: Vec<ContextKey<'a>>,
78    context_cache: HashMap<ContextKey<'a>, &'a str>,
79    include_context_cache: HashMap<ContextKey<'a>, &'a str>,
80    entry_contexts: HashMap<&'a [&'a str], &'a str>,
81    contexts: HashMap<&'a str, sublime_syntax::Context<'a>>,
82}
83
84pub fn codegen<'a>(
85    compiler: &'a Compiler,
86    interpreted: Interpreted<'a>,
87) -> sublime_syntax::Syntax<'a> {
88    let mut state = State {
89        compiler,
90        rules: HashMap::new(),
91        context_queue: vec![],
92        context_cache: HashMap::new(),
93        include_context_cache: HashMap::new(),
94        entry_contexts: HashMap::new(),
95        contexts: HashMap::new(),
96    };
97
98    for rule_key in &interpreted.entry_points {
99        gen_rule(&mut state, &interpreted, *rule_key);
100    }
101
102    while let Some(item) = state.context_queue.pop() {
103        gen_contexts(&mut state, &interpreted, vec![item]);
104    }
105
106    let contexts = compiler.allocator.alloc_slice_fill_iter(state.contexts);
107    contexts.sort_by_key(|v| v.0);
108
109    sublime_syntax::Syntax {
110        name: interpreted.metadata.name,
111        file_extensions: interpreted.metadata.file_extensions,
112        first_line_match: interpreted.metadata.first_line_match.clone(),
113        scope: interpreted.metadata.scope,
114        hidden: interpreted.metadata.hidden,
115        variables: &[],
116        contexts,
117    }
118}
119
120fn lookahead_rule<'a>(
121    state: &State<'a>,
122    interpreted: &Interpreted<'a>,
123    rule_key: Key,
124) -> lookahead::Lookahead<'a> {
125    let rule = &interpreted.rules[&rule_key];
126
127    let mut lookahead_state = lookahead::LookaheadState::new(state.compiler);
128    lookahead_state.push_variable(rule_key);
129
130    let mut lookahead = lookahead::lookahead(
131        interpreted,
132        rule.expression,
133        &mut lookahead_state,
134    );
135
136    lookahead_state.pop_variable(rule_key, &mut lookahead);
137
138    lookahead
139}
140
141fn gen_rule<'a>(
142    state: &mut State<'a>,
143    interpreted: &Interpreted<'a>,
144    rule_key: Key,
145) {
146    let lookahead = lookahead_rule(state, interpreted, rule_key);
147
148    let context_key = ContextKey {
149        rule_key,
150        is_top_level: true,
151        lookahead,
152        branch_point: None,
153    };
154
155    let name = rule_key.get_name(state.compiler);
156
157    let old_entry = state.context_cache.insert(context_key.clone(), name);
158    assert!(old_entry.is_none());
159
160    state.context_queue.push(context_key);
161}
162
163fn count_duplicate_regexes<'a, I>(iter: I) -> HashMap<Symbol, usize>
164where
165    I: std::iter::Iterator<Item = &'a Lookahead<'a>>,
166{
167    let mut map = HashMap::new();
168    for lookahead in iter {
169        for terminal in &lookahead.terminals {
170            if let Some(count) = map.get_mut(&terminal.regex) {
171                *count += 1;
172            } else {
173                map.insert(terminal.regex, 1);
174            }
175        }
176    }
177    map
178}
179
180fn index_terminals(lookahead: &Lookahead<'_>) -> IndexMap<Symbol, Vec<usize>> {
181    let mut map = IndexMap::<Symbol, Vec<usize>>::new();
182    for (i, terminal) in lookahead.terminals.iter().enumerate() {
183        if let Some(m) = map.get_mut(&terminal.regex) {
184            m.push(i);
185        } else {
186            map.insert(terminal.regex, vec![i]);
187        }
188    }
189    map
190}
191
192/*
193*/
194fn gen_contexts<'a>(
195    state: &mut State<'a>,
196    interpreted: &Interpreted<'a>,
197    contexts: Vec<ContextKey<'a>>,
198) {
199    assert!(!contexts.is_empty());
200    if contexts.len() > 1 {
201        assert!(contexts.iter().all(|c| c.branch_point.is_some()));
202    }
203
204    // println!("GEN CONTEXTS {}", contexts.len());
205    // for (name, context_key) in &contexts {
206    //     println!("GEN {} {:?}", name, context_key.with_compiler(state.compiler));
207    // }
208
209    let regexes =
210        count_duplicate_regexes(contexts.iter().map(|c| &c.lookahead));
211
212    let mut next_contexts: Vec<ContextKey<'_>> = vec![];
213
214    for context_key in contexts {
215        let name = state.context_cache[&context_key];
216
217        let mut patterns = vec![];
218
219        let rule_key = context_key.rule_key;
220        let is_top_level = context_key.is_top_level;
221        let lookahead = context_key.lookahead;
222        let branch_point = context_key.branch_point;
223
224        let meta_content_scope: sublime_syntax::Scope;
225        let meta_include_prototype: bool;
226        let capture: bool = false;
227        {
228            // Branch points have an "invalid" rule at the top of the stack
229            let rule = interpreted.rules.get(&rule_key).unwrap();
230
231            meta_content_scope = if branch_point.is_none() && is_top_level {
232                rule.options.scope
233            } else {
234                sublime_syntax::Scope::EMPTY
235            };
236
237            meta_include_prototype = rule.options.include_prototype;
238            // capture = rule.options.capture && !branch_point.is_some();
239        }
240
241        for (regex, terminal_indexes) in index_terminals(&lookahead) {
242            let continue_branch = *regexes.get(&regex).unwrap() > 1;
243
244            if terminal_indexes.len() == 1 {
245                let terminal = &lookahead.terminals[terminal_indexes[0]];
246
247                // Continue branch
248                if continue_branch {
249                    let scope = scope_for_match_stack(
250                        state,
251                        interpreted,
252                        Some(rule_key),
253                        terminal,
254                    );
255
256                    let exit = if let Some(lookahead) =
257                        lookahead::advance_terminal(
258                            interpreted,
259                            terminal,
260                            state.compiler,
261                        ) {
262                        let next_key = ContextKey {
263                            rule_key,
264                            is_top_level,
265                            lookahead: lookahead.clone(),
266                            branch_point: branch_point.clone(),
267                        };
268
269                        let name = if let Some(entry) =
270                            state.context_cache.get(&next_key)
271                        {
272                            entry
273                        } else {
274                            let name =
275                                create_context_name(state, next_key.clone());
276
277                            next_contexts.push(next_key);
278                            name
279                        };
280
281                        sublime_syntax::ContextChange::PushOne(name)
282                    } else {
283                        sublime_syntax::ContextChange::None
284                    };
285
286                    patterns.push(gen_terminal(
287                        state,
288                        interpreted,
289                        name,
290                        rule_key,
291                        &meta_content_scope,
292                        &branch_point,
293                        scope,
294                        terminal,
295                        exit,
296                        1,
297                    ));
298                } else {
299                    let scope = scope_for_match_stack(
300                        state,
301                        interpreted,
302                        if branch_point.is_some() {
303                            Some(rule_key)
304                        } else {
305                            None
306                        },
307                        terminal,
308                    );
309
310                    patterns.push(gen_simple_match(
311                        state,
312                        interpreted,
313                        name,
314                        rule_key,
315                        is_top_level,
316                        &meta_content_scope,
317                        &branch_point,
318                        &lookahead,
319                        scope,
320                        terminal,
321                    ));
322                }
323            } else {
324                // Start a branch point or use an existing one
325                let branch_point_name: &str;
326                let include_context_name: &str;
327                {
328                    // TODO: No need for context.end, context.empty or
329                    // branch_point in this struct.
330                    let key = ContextKey {
331                        rule_key,
332                        is_top_level,
333                        lookahead: Lookahead {
334                            terminals: terminal_indexes
335                                .iter()
336                                .map(|i| lookahead.terminals[*i].clone())
337                                .collect::<Vec<_>>(),
338                            end: lookahead::End::None,
339                            empty: true,
340                        },
341                        branch_point: None,
342                    };
343
344                    if let Some(include_context_name) =
345                        state.include_context_cache.get(&key)
346                    {
347                        patterns.push(sublime_syntax::ContextPattern::Include(
348                            include_context_name,
349                        ));
350                        continue;
351                    } else {
352                        // Start new branch
353                        branch_point_name =
354                            create_branch_point_name(state, rule_key);
355
356                        include_context_name =
357                            create_branch_point_include_context_name(
358                                state,
359                                branch_point_name,
360                            );
361                        assert!(!state
362                            .contexts
363                            .contains_key(&include_context_name));
364
365                        state
366                            .include_context_cache
367                            .insert(key, include_context_name);
368                    }
369                }
370
371                let mut branches = vec![];
372
373                let num_terminals = terminal_indexes.len();
374
375                // println!("START BRANCH {:?}", branch_point_name);
376                let mut is_repetition = true;
377
378                for (i, terminal_index) in
379                    terminal_indexes.into_iter().enumerate()
380                {
381                    let terminal = &lookahead.terminals[terminal_index];
382
383                    if !matches!(
384                        terminal.stack.last(),
385                        Some(StackEntry {
386                            data: StackEntryData::Repetition { .. },
387                            ..
388                        })
389                    ) {
390                        is_repetition = false;
391                    }
392
393                    /*
394                    The last branch of a branch point can't fail the branch;
395                    instead failing as illegal.
396
397                    If a branch point starts within another branch point the
398                    inner branch point's last branch fails the parent branch
399                    point, unless it is on the last branch. Effectively
400                    can_fail is always false for only the last branch in a tree
401                    of branch points.
402                    */
403                    let is_last = i != num_terminals - 1;
404                    let can_fail = is_last
405                        && branch_point.as_ref().is_none_or(|bp| bp.can_fail);
406                    let branch_point_name = match &branch_point {
407                        Some(branch_point) if !can_fail => branch_point.name,
408                        _ => branch_point_name,
409                    };
410
411                    // let branch_rule_key = branch_match.local_key(rule_key);
412                    let branch_rule_key = terminal.local_key(rule_key);
413
414                    let branch_key = ContextKey {
415                        rule_key: branch_rule_key,
416                        is_top_level, // TODO: correctness
417                        lookahead: Lookahead {
418                            terminals: vec![terminal.clone()],
419                            end: lookahead::End::Illegal,
420                            empty: false,
421                        },
422                        branch_point: Some(BranchPoint {
423                            name: branch_point_name,
424                            can_fail,
425                        }),
426                    };
427
428                    let ctx_name = if let Some(name) =
429                        state.context_cache.get(&branch_key)
430                    {
431                        branches.push(*name);
432                        None
433                    } else {
434                        let name =
435                            create_context_name(state, branch_key.clone());
436                        branches.push(name);
437                        Some(name)
438                    };
439
440                    let next_name = if let Some(lookahead) =
441                        lookahead::advance_terminal(
442                            interpreted,
443                            terminal,
444                            state.compiler,
445                        ) {
446                        let next_key = ContextKey {
447                            rule_key,
448                            is_top_level, // TODO: correctness
449                            lookahead,
450                            branch_point: branch_key.branch_point,
451                        };
452
453                        if let Some(name) = state.context_cache.get(&next_key) {
454                            Some(*name)
455                        } else {
456                            let name =
457                                create_context_name(state, next_key.clone());
458
459                            next_contexts.push(next_key);
460                            Some(name)
461                        }
462                    } else {
463                        None
464                    };
465
466                    if let Some(ctx_name) = ctx_name {
467                        let scope = scope_for_match_stack(
468                            state,
469                            interpreted,
470                            None,
471                            terminal,
472                        );
473
474                        let (exit, pop) = if let Some(name) = next_name {
475                            // Using set in branch_point is broken, so we
476                            // have to use push.
477                            (sublime_syntax::ContextChange::PushOne(name), 1)
478                        } else {
479                            (sublime_syntax::ContextChange::None, 1)
480                        };
481
482                        let terminal_match = gen_terminal(
483                            state,
484                            interpreted,
485                            ctx_name,
486                            rule_key,
487                            &meta_content_scope,
488                            &branch_point,
489                            scope,
490                            terminal,
491                            exit,
492                            pop,
493                        );
494
495                        let matches = state
496                            .compiler
497                            .allocator
498                            .alloc_slice_clone(&[terminal_match]);
499
500                        state.contexts.insert(
501                            ctx_name,
502                            sublime_syntax::Context {
503                                meta_scope: sublime_syntax::Scope::EMPTY,
504                                meta_content_scope:
505                                    sublime_syntax::Scope::EMPTY,
506                                meta_include_prototype: false,
507                                clear_scopes:
508                                    sublime_syntax::ScopeClear::Amount(0),
509                                matches,
510                                comment: None,
511                            },
512                        );
513                    }
514                }
515
516                assert!(branches.len() > 1);
517                let branches =
518                    state.compiler.allocator.alloc_slice_clone(&branches);
519
520                let lookahead_regex =
521                    bumpalo::format!(in &state.compiler.allocator,
522                        "(?={})", state.compiler.resolve_symbol(regex),
523                    )
524                    .into_bump_str();
525
526                let comment = bumpalo::format!(in &state.compiler.allocator,
527                    "Include context for branch point {}",
528                    branch_point_name
529                )
530                .into_bump_str();
531
532                let pop = if is_repetition { 0 } else { 1 };
533
534                let matches = state.compiler.allocator.alloc_slice_clone(&[
535                    sublime_syntax::ContextPattern::Match(
536                        sublime_syntax::Match {
537                            pattern: sublime_syntax::Pattern(lookahead_regex),
538                            scope: sublime_syntax::Scope::EMPTY,
539                            captures: &[],
540                            change_context:
541                                sublime_syntax::ContextChange::Branch(
542                                    branch_point_name,
543                                    branches,
544                                ),
545                            pop,
546                        },
547                    ),
548                ]);
549
550                state.contexts.insert(
551                    include_context_name,
552                    sublime_syntax::Context {
553                        meta_scope: sublime_syntax::Scope::EMPTY,
554                        meta_content_scope: sublime_syntax::Scope::EMPTY,
555                        meta_include_prototype: true,
556                        clear_scopes: sublime_syntax::ScopeClear::Amount(0),
557                        matches,
558                        comment: Some(comment),
559                    },
560                );
561
562                patterns.push(sublime_syntax::ContextPattern::Include(
563                    include_context_name,
564                ));
565            }
566        }
567
568        // Need to add the meta_content_scope to all patterns that pop. This
569        // matches expected behaviour in that the rule scope applies to all
570        // matches in this context.
571        // for p in &mut patterns {
572        //     match p {
573        //         sublime_syntax::ContextPattern::Match(sublime_syntax::Match {
574        //             scope,
575        //             change_context: sublime_syntax::ContextChange::Pop(_),
576        //             ..
577        //         }) => {
578        //             scope.scopes = meta_content_scope.scopes.iter().chain(scope.scopes.iter()).cloned().collect::<Vec<_>>();
579        //         },
580        //         _ => {},
581        //     }
582        // }
583
584        if let Some(pattern) = gen_end_match(
585            state,
586            interpreted,
587            rule_key,
588            is_top_level,
589            &branch_point,
590            &lookahead,
591            capture,
592        ) {
593            patterns.push(pattern);
594        }
595
596        let patterns = state.compiler.allocator.alloc_slice_clone(&patterns);
597
598        let comment = bumpalo::format!(in &state.compiler.allocator,
599            "Rule: {}",
600            rule_key.with_compiler(state.compiler),
601            // context_key.with_compiler(state.compiler)
602        )
603        .into_bump_str();
604
605        assert!(state.contexts.get(&name).is_none());
606        state.contexts.insert(
607            name,
608            sublime_syntax::Context {
609                meta_content_scope,
610                meta_scope: sublime_syntax::Scope::EMPTY,
611                // meta_scope,
612                meta_include_prototype,
613                clear_scopes: sublime_syntax::ScopeClear::Amount(0),
614                matches: patterns,
615                comment: Some(comment),
616            },
617        );
618    }
619
620    if !next_contexts.is_empty() {
621        gen_contexts(state, interpreted, next_contexts);
622    }
623}
624
625fn gen_end_match<'a>(
626    state: &mut State<'a>,
627    interpreted: &Interpreted<'a>,
628    rule_key: Key,
629    is_top_level: bool,
630    branch_point: &Option<BranchPoint<'a>>,
631    lookahead: &Lookahead<'a>,
632    capture: bool,
633) -> Option<sublime_syntax::ContextPattern<'a>> {
634    match &lookahead.end {
635        lookahead::End::Illegal => Some(if lookahead.empty && !capture {
636            sublime_syntax::Match {
637                pattern: sublime_syntax::Pattern::from(r"(?=\S)"),
638                scope: sublime_syntax::Scope::EMPTY,
639                captures: &[],
640                change_context: sublime_syntax::ContextChange::None,
641                pop: 1,
642            }
643        } else if branch_point.is_some()
644            && branch_point.as_ref().unwrap().can_fail
645        {
646            sublime_syntax::Match {
647                pattern: sublime_syntax::Pattern::from(r"\S"),
648                scope: sublime_syntax::Scope::EMPTY,
649                captures: &[],
650                change_context: sublime_syntax::ContextChange::Fail(
651                    branch_point.as_ref().unwrap().name,
652                ),
653                pop: 0,
654            }
655        } else {
656            sublime_syntax::Match {
657                pattern: sublime_syntax::Pattern::from(r"\S"),
658                scope: parse_scope(
659                    &interpreted.metadata,
660                    "invalid.illegal",
661                    state.compiler,
662                ),
663                captures: &[],
664                change_context: sublime_syntax::ContextChange::None,
665                pop: if capture { 0 } else { 1 },
666            }
667        }),
668        lookahead::End::None => None,
669        lookahead::End::Push(lookahead) => {
670            let push_context_key = ContextKey {
671                rule_key,
672                is_top_level,
673                lookahead: (**lookahead).clone(),
674                branch_point: branch_point.clone(),
675            };
676
677            let name = if let Some(name) =
678                state.context_cache.get(&push_context_key)
679            {
680                name
681            } else {
682                let name = create_context_name(state, push_context_key.clone());
683
684                state.context_queue.push(push_context_key);
685                name
686            };
687
688            Some(sublime_syntax::Match {
689                pattern: sublime_syntax::Pattern::from(r"(?=\S)"),
690                scope: sublime_syntax::Scope::EMPTY,
691                captures: &[],
692                change_context: sublime_syntax::ContextChange::PushOne(name),
693                pop: 1,
694            })
695        }
696    }
697    .map(sublime_syntax::ContextPattern::Match)
698}
699
700fn gen_terminal<'a>(
701    state: &mut State<'a>,
702    interpreted: &Interpreted<'a>,
703    context_name: &str,
704    rule_key: Key,
705    meta_content_scope: &sublime_syntax::Scope<'a>,
706    branch_point: &Option<BranchPoint>,
707    mut scope: sublime_syntax::Scope<'a>,
708    terminal: &Terminal<'a>,
709    mut exit: sublime_syntax::ContextChange<'a>,
710    mut pop_amount: u16,
711) -> sublime_syntax::ContextPattern<'a> {
712    match &terminal.options.unwrap().embed {
713        TerminalEmbed::Embed {
714            embed,
715            embed_scope,
716            escape,
717            escape_captures,
718        } => {
719            let embed_exit =
720                sublime_syntax::ContextChange::Embed(sublime_syntax::Embed {
721                    embed,
722                    embed_scope: *embed_scope,
723                    escape: Some(sublime_syntax::Pattern(escape)),
724                    escape_captures,
725                });
726
727            match &mut exit {
728                sublime_syntax::ContextChange::None => {
729                    exit = embed_exit;
730                }
731                sublime_syntax::ContextChange::Set(ref mut contexts)
732                | sublime_syntax::ContextChange::Push(ref mut contexts) => {
733                    // TODO: This generates duplicate contexts
734                    let embed_context = create_uncached_context_name(
735                        state,
736                        rule_key,
737                        branch_point,
738                    );
739
740                    let matches =
741                        state.compiler.allocator.alloc_slice_clone(&[
742                            sublime_syntax::ContextPattern::Match(
743                                sublime_syntax::Match {
744                                    pattern: sublime_syntax::Pattern::from(""),
745                                    scope: sublime_syntax::Scope::EMPTY,
746                                    captures: &[],
747                                    change_context: embed_exit,
748                                    pop: 1,
749                                },
750                            ),
751                        ]);
752
753                    state.contexts.insert(
754                        embed_context,
755                        sublime_syntax::Context {
756                            meta_scope: sublime_syntax::Scope::EMPTY,
757                            meta_content_scope: sublime_syntax::Scope::EMPTY,
758                            meta_include_prototype: true,
759                            clear_scopes: sublime_syntax::ScopeClear::Amount(0),
760                            matches,
761                            comment: None,
762                        },
763                    );
764
765                    // TODO: Avoid this extra allocation
766                    let mut ctx = contexts.to_vec();
767                    ctx.push(embed_context);
768                    *contexts =
769                        state.compiler.allocator.alloc_slice_clone(&ctx);
770                }
771                _ => panic!(),
772            }
773        }
774        TerminalEmbed::Include { context: path, prototype } => {
775            // Generate the prototype context
776            let prototype_context = {
777                let lookahead = lookahead_rule(state, interpreted, *prototype);
778
779                let prototype_key = ContextKey {
780                    rule_key: *prototype,
781                    is_top_level: true,
782                    lookahead: lookahead.clone(),
783                    branch_point: None,
784                };
785
786                if let Some(name) = state.context_cache.get(&prototype_key) {
787                    name
788                } else {
789                    let name =
790                        create_context_name(state, prototype_key.clone());
791
792                    state.context_queue.push(prototype_key);
793                    name
794                }
795            };
796
797            let include_exit = sublime_syntax::ContextChange::IncludeEmbed(
798                sublime_syntax::IncludeEmbed {
799                    path,
800                    use_push: false,
801                    with_prototype: state.compiler.allocator.alloc_slice_clone(
802                        &[sublime_syntax::ContextPattern::Include(
803                            prototype_context,
804                        )],
805                    ),
806                },
807            );
808
809            match exit {
810                sublime_syntax::ContextChange::None => {
811                    exit = include_exit;
812                }
813                sublime_syntax::ContextChange::Set(ref mut contexts)
814                | sublime_syntax::ContextChange::Push(ref mut contexts) => {
815                    // TODO: This generates duplicate contexts
816                    let embed_context = create_uncached_context_name(
817                        state,
818                        rule_key,
819                        branch_point,
820                    );
821
822                    let matches =
823                        state.compiler.allocator.alloc_slice_clone(&[
824                            sublime_syntax::ContextPattern::Match(
825                                sublime_syntax::Match {
826                                    pattern: sublime_syntax::Pattern::from(""),
827                                    scope: sublime_syntax::Scope::EMPTY,
828                                    captures: &[],
829                                    change_context: include_exit,
830                                    pop: 0,
831                                },
832                            ),
833                        ]);
834
835                    state.contexts.insert(
836                        embed_context,
837                        sublime_syntax::Context {
838                            meta_scope: sublime_syntax::Scope::EMPTY,
839                            meta_content_scope: sublime_syntax::Scope::EMPTY,
840                            meta_include_prototype: false,
841                            clear_scopes: sublime_syntax::ScopeClear::Amount(0),
842                            matches,
843                            comment: None,
844                        },
845                    );
846
847                    // TODO: Avoid this extra allocation
848                    let mut ctx = contexts.to_vec();
849                    ctx.push(embed_context);
850                    *contexts =
851                        state.compiler.allocator.alloc_slice_clone(&ctx);
852                }
853                _ => panic!(),
854            }
855        }
856        TerminalEmbed::None => {}
857    }
858
859    // Translate Set into Push/Pop if we're setting back to the same context
860    if let sublime_syntax::ContextChange::Push(contexts) = &exit {
861        if pop_amount > 0 && contexts[0] == context_name {
862            if contexts.len() > 1 {
863                exit = sublime_syntax::ContextChange::Push(&contexts[1..]);
864            } else {
865                exit = sublime_syntax::ContextChange::None;
866            }
867            pop_amount -= 1;
868        }
869    }
870
871    if let sublime_syntax::ContextChange::None = &exit {
872        if pop_amount > 0 {
873            scope =
874                meta_content_scope.extended(scope, &state.compiler.allocator);
875        }
876    }
877
878    sublime_syntax::ContextPattern::Match(sublime_syntax::Match {
879        pattern: sublime_syntax::Pattern(
880            state.compiler.resolve_symbol(terminal.regex),
881        ),
882        scope,
883        captures: terminal.options.unwrap().captures,
884        change_context: exit,
885        pop: pop_amount,
886    })
887}
888
889fn gen_simple_match<'a>(
890    state: &mut State<'a>,
891    interpreted: &Interpreted<'a>,
892    context_name: &str,
893    rule_key: Key,
894    is_top_level: bool,
895    meta_content_scope: &sublime_syntax::Scope<'a>,
896    branch_point: &Option<BranchPoint<'a>>,
897    lookahead: &Lookahead<'a>,
898    scope: sublime_syntax::Scope<'a>,
899    terminal: &Terminal<'a>,
900) -> sublime_syntax::ContextPattern<'a> {
901    let contexts = if let Some(StackEntry {
902        data: StackEntryData::Repetition { expression },
903        remaining,
904    }) = terminal.stack.last()
905    {
906        let next_lookahead = lookahead::lookahead_concatenation(
907            interpreted,
908            [expression]
909                .iter()
910                .cloned()
911                .cloned()
912                .chain(remaining.iter().cloned()),
913            &mut lookahead::LookaheadState::new(state.compiler),
914        );
915
916        let is_top_level = false;
917        let mut contexts = gen_simple_match_contexts(
918            state,
919            interpreted,
920            rule_key,
921            is_top_level,
922            &terminal.remaining,
923            &terminal.stack[..terminal.stack.len() - 1],
924        );
925
926        if next_lookahead == *lookahead {
927            // If the remaining of a top-level repetition leads to the same
928            // lookahead, then we have a simple repetition. We can just push
929            // the child match.
930            let exit = if contexts.is_empty() {
931                sublime_syntax::ContextChange::None
932            } else {
933                sublime_syntax::ContextChange::Push(
934                    state.compiler.allocator.alloc_slice_clone(&contexts),
935                )
936            };
937
938            return gen_terminal(
939                state,
940                interpreted,
941                context_name,
942                rule_key,
943                meta_content_scope,
944                branch_point,
945                scope,
946                terminal,
947                exit,
948                0,
949            );
950        } else if branch_point.is_none() {
951            // Unclear if correct??
952            // Otherwise we have a complex repetition, which behaves the
953            // same way as a regular match.
954            let repetition_context_key = ContextKey {
955                rule_key,
956                is_top_level: false,
957                lookahead: next_lookahead,
958                branch_point: None,
959            };
960
961            if let Some(name) = state.context_cache.get(&repetition_context_key)
962            {
963                contexts.insert(0, name);
964            } else {
965                let name =
966                    create_context_name(state, repetition_context_key.clone());
967                state.context_queue.push(repetition_context_key);
968                contexts.insert(0, name);
969            }
970        }
971
972        contexts
973    } else {
974        gen_simple_match_contexts(
975            state,
976            interpreted,
977            rule_key,
978            is_top_level,
979            &terminal.remaining,
980            &terminal.stack,
981        )
982    };
983
984    let pop = 1;
985    let exit = if contexts.is_empty() {
986        sublime_syntax::ContextChange::None
987    } else {
988        sublime_syntax::ContextChange::Push(
989            state.compiler.allocator.alloc_slice_clone(&contexts),
990        )
991    };
992
993    gen_terminal(
994        state,
995        interpreted,
996        context_name,
997        rule_key,
998        meta_content_scope,
999        branch_point,
1000        scope,
1001        terminal,
1002        exit,
1003        pop,
1004    )
1005}
1006
1007fn gen_simple_match_contexts<'a>(
1008    state: &mut State<'a>,
1009    interpreted: &Interpreted<'a>,
1010    mut rule_key: Key,
1011    mut is_top_level: bool,
1012    remaining: &[&'a Expression],
1013    stack: &[StackEntry<'a>],
1014) -> Vec<&'a str> {
1015    // Skip stack entries that don't have any remaining expressions. This avoids
1016    // creating meta-scope contexts when those get immediately popped anyway.
1017    let offset = if remaining.is_empty() {
1018        stack.iter().take_while(|e| e.remaining.is_empty()).count()
1019    } else {
1020        0
1021    };
1022
1023    // Create a context for each item in the match stack that needs it.
1024    let mut contexts = vec![];
1025
1026    if let Some(StackEntry { remaining, .. }) = stack[offset..].last() {
1027        if is_top_level && remaining.is_empty() {
1028            if let Some(context) =
1029                gen_meta_content_scope_context(state, interpreted, rule_key)
1030            {
1031                contexts.push(context);
1032            }
1033        }
1034    }
1035
1036    // We need to create entry contexts for remaining terminals that have a meta
1037    // content scope, otherwise they stack onto earlier terminals.
1038    // See issue #36
1039    let mut groups = vec![];
1040
1041    for (i, entry) in stack[offset..].iter().enumerate().rev() {
1042        match &entry.data {
1043            StackEntryData::Variable { .. } => {
1044                is_top_level = true;
1045            }
1046            _ => {
1047                is_top_level = false;
1048            }
1049        }
1050
1051        if !entry.remaining.is_empty() {
1052            let lookahead = lookahead::lookahead_concatenation(
1053                interpreted,
1054                entry.remaining.iter().cloned(),
1055                &mut lookahead::LookaheadState::new(state.compiler),
1056            );
1057
1058            groups.push(gen_simple_match_remaining_context(
1059                state,
1060                interpreted,
1061                is_top_level,
1062                rule_key,
1063                lookahead,
1064            ));
1065        }
1066
1067        if let StackEntryData::Variable { key } = &entry.data {
1068            rule_key = *key;
1069
1070            let rem = if i > 0 { &stack[i - 1].remaining } else { remaining };
1071            if rem.is_empty() && (i != 0 || !remaining.is_empty()) {
1072                // If a match has no remaining nodes it can generally be
1073                // ignored, unless it has a meta scope and there are child
1074                // matches that were not ignored. In those cases we create
1075                // a meta scope context.
1076                if let Some(context) =
1077                    gen_meta_content_scope_context(state, interpreted, rule_key)
1078                {
1079                    groups.push((vec![context], None));
1080                }
1081            }
1082        }
1083    }
1084
1085    if !remaining.is_empty() {
1086        let lookahead = lookahead::lookahead_concatenation(
1087            interpreted,
1088            remaining.iter().cloned(),
1089            &mut lookahead::LookaheadState::new(state.compiler),
1090        );
1091
1092        groups.push(gen_simple_match_remaining_context(
1093            state,
1094            interpreted,
1095            is_top_level,
1096            rule_key,
1097            lookahead,
1098        ));
1099    }
1100
1101    let last = groups.len();
1102    for (i, (c, key)) in groups.into_iter().enumerate() {
1103        if let Some(key) = key {
1104            if i != last - 1 {
1105                contexts.push(gen_entry_context(state, key, c));
1106            } else {
1107                contexts.extend(c);
1108            }
1109        } else {
1110            contexts.extend(c);
1111        }
1112    }
1113
1114    contexts
1115}
1116
1117fn gen_meta_content_scope_context<'a>(
1118    state: &mut State<'a>,
1119    interpreted: &Interpreted<'a>,
1120    rule_key: Key,
1121) -> Option<&'a str> {
1122    let meta_content_scope = interpreted.rules[&rule_key].options.scope;
1123
1124    if !meta_content_scope.is_empty() {
1125        let mut rule_meta_ctx_name = build_rule_key_name(state, rule_key);
1126        rule_meta_ctx_name.push_str("|meta");
1127
1128        if let Some((name, _)) =
1129            state.contexts.get_key_value(rule_meta_ctx_name.as_str())
1130        {
1131            Some(name)
1132        } else {
1133            let name = state.compiler.allocator.alloc_str(&rule_meta_ctx_name);
1134
1135            let matches = state.compiler.allocator.alloc_slice_clone(&[
1136                sublime_syntax::ContextPattern::Match(sublime_syntax::Match {
1137                    pattern: sublime_syntax::Pattern::from(""),
1138                    scope: sublime_syntax::Scope::EMPTY,
1139                    captures: &[],
1140                    change_context: sublime_syntax::ContextChange::None,
1141                    pop: 1,
1142                }),
1143            ]);
1144
1145            let comment = bumpalo::format!(in &state.compiler.allocator,
1146                "Meta scope context for {}",
1147                rule_key.with_compiler(state.compiler),
1148            )
1149            .into_bump_str();
1150
1151            state.contexts.insert(
1152                name,
1153                sublime_syntax::Context {
1154                    meta_content_scope,
1155                    meta_scope: sublime_syntax::Scope::EMPTY,
1156                    meta_include_prototype: true,
1157                    clear_scopes: sublime_syntax::ScopeClear::Amount(0),
1158                    matches,
1159                    comment: Some(comment),
1160                },
1161            );
1162
1163            Some(name)
1164        }
1165    } else {
1166        None
1167    }
1168}
1169
1170fn gen_entry_context<'a>(
1171    state: &mut State<'a>,
1172    rule_key: Key,
1173    contexts: Vec<&'a str>,
1174) -> &'a str {
1175    if let Some(context) = state.entry_contexts.get(&contexts.as_slice()) {
1176        return context;
1177    }
1178
1179    let index = if let Some(rule) = state.rules.get_mut(&rule_key) {
1180        let i = rule.entry_context_count;
1181        rule.entry_context_count += 1;
1182        i
1183    } else {
1184        state.rules.insert(
1185            rule_key,
1186            Rule {
1187                context_count: 0,
1188                branch_point_count: 0,
1189                entry_context_count: 1,
1190            },
1191        );
1192        0
1193    };
1194
1195    let entry_ctx = bumpalo::format!(in &state.compiler.allocator, "{}|entry-{}", contexts.last().unwrap(), index).into_bump_str();
1196
1197    assert!(!state.contexts.contains_key(entry_ctx));
1198
1199    let contexts = state.compiler.allocator.alloc_slice_clone(&contexts);
1200
1201    let matches = state.compiler.allocator.alloc_slice_clone(&[
1202        sublime_syntax::ContextPattern::Match(sublime_syntax::Match {
1203            pattern: sublime_syntax::Pattern::from(""),
1204            scope: sublime_syntax::Scope::EMPTY,
1205            captures: &[],
1206            change_context: sublime_syntax::ContextChange::Set(contexts),
1207            pop: 0,
1208        }),
1209    ]);
1210
1211    state.contexts.insert(
1212        entry_ctx,
1213        sublime_syntax::Context {
1214            meta_content_scope: sublime_syntax::Scope::EMPTY,
1215            meta_scope: sublime_syntax::Scope::EMPTY,
1216            meta_include_prototype: true,
1217            clear_scopes: sublime_syntax::ScopeClear::Amount(0),
1218            matches,
1219            comment: None,
1220        },
1221    );
1222    state.entry_contexts.insert(contexts, entry_ctx);
1223    entry_ctx
1224}
1225
1226fn gen_simple_match_remaining_context<'a>(
1227    state: &mut State<'a>,
1228    interpreted: &Interpreted<'a>,
1229    mut is_top_level: bool,
1230    mut rule_key: Key,
1231    mut lookahead: Lookahead<'a>,
1232) -> (Vec<&'a str>, Option<Key>) {
1233    // We can end up in situations where we have a context/rule_key that has
1234    // redundant variables, ie. every match stack has the same variable at the
1235    // end. Using a similar algorithm to gen_simple_match_contexts we can
1236    // de-duplicate this, which may also require making meta scope contexts.
1237    // See issue#18
1238    let min_matches_count =
1239        lookahead.terminals.iter().map(|term| term.stack.len()).min().unwrap();
1240
1241    let mut contexts = vec![];
1242
1243    for _ in 0..min_matches_count {
1244        // Take the last item in the match stack for the first lookahead match,
1245        // then make sure all the others are the same. Only then can we
1246        // de-duplicate.
1247        let sample = lookahead.terminals[0].stack.last().unwrap();
1248
1249        if !sample.remaining.is_empty() {
1250            break;
1251        }
1252
1253        let next_rule_key: Key = match &sample.data {
1254            StackEntryData::Variable { key } => *key,
1255            _ => break,
1256        };
1257
1258        let all_match = lookahead.terminals[1..].iter().all(|term| {
1259            let last = term.stack.last().unwrap();
1260
1261            let key: Key = match &last.data {
1262                StackEntryData::Variable { key } => *key,
1263                _ => return false,
1264            };
1265
1266            key == next_rule_key && last.remaining.is_empty()
1267        });
1268
1269        if !all_match {
1270            break;
1271        }
1272
1273        for terminal in &mut lookahead.terminals {
1274            terminal.stack.pop();
1275        }
1276
1277        if let Some(context_name) =
1278            gen_meta_content_scope_context(state, interpreted, rule_key)
1279        {
1280            contexts.push(context_name);
1281        }
1282
1283        rule_key = next_rule_key;
1284        is_top_level = true;
1285    }
1286
1287    let context_key =
1288        ContextKey { rule_key, is_top_level, lookahead, branch_point: None };
1289
1290    if let Some(name) = state.context_cache.get(&context_key) {
1291        contexts.push(name);
1292    } else {
1293        let name = create_context_name(state, context_key.clone());
1294        state.context_queue.push(context_key);
1295
1296        contexts.push(name);
1297    }
1298
1299    let key = if contexts.len() > 1
1300        || !interpreted.rules[&rule_key].options.scope.is_empty()
1301    {
1302        Some(rule_key)
1303    } else {
1304        None
1305    };
1306
1307    (contexts, key)
1308}
1309
1310fn build_rule_key_name(state: &State, rule_key: Key) -> String {
1311    let mut result = rule_key.get_name(state.compiler).to_string();
1312
1313    // Encode arguments
1314    if let Some(arguments) = rule_key.get_arguments(state.compiler) {
1315        result.push('@');
1316
1317        // Arguments can be in any format, so convert them to a string
1318        // representation first and then base-64 encode them to make them safe
1319        // to use in a context name.
1320        use base64::Engine;
1321        base64::engine::general_purpose::URL_SAFE_NO_PAD
1322            .encode_string(arguments.as_bytes(), &mut result);
1323    }
1324
1325    result
1326}
1327
1328// Generate an uncached unique name for a context key
1329fn create_uncached_context_name<'a>(
1330    state: &mut State<'a>,
1331    rule_key: Key,
1332    branch_point: &Option<BranchPoint>,
1333) -> &'a str {
1334    let mut result = build_rule_key_name(state, rule_key);
1335
1336    // Add inner context count to prevent context name collisions in inner contexts
1337    let index = if let Some(rule) = state.rules.get_mut(&rule_key) {
1338        let i = rule.context_count;
1339        rule.context_count += 1;
1340        i
1341    } else {
1342        state.rules.insert(
1343            rule_key,
1344            Rule {
1345                context_count: 1,
1346                branch_point_count: 0,
1347                entry_context_count: 0,
1348            },
1349        );
1350        0
1351    };
1352    write!(result, "|{}", index).unwrap();
1353
1354    // Add optional branch point
1355    if let Some(branch_point) = &branch_point {
1356        result.push('|');
1357        result.push_str(branch_point.name);
1358    }
1359
1360    state.compiler.allocator.alloc_str(&result)
1361}
1362
1363// Generate a unique name for a context key
1364fn create_context_name<'a>(
1365    state: &mut State<'a>,
1366    key: ContextKey<'a>,
1367) -> &'a str {
1368    let name =
1369        create_uncached_context_name(state, key.rule_key, &key.branch_point);
1370
1371    let old_entry = state.context_cache.insert(key, name);
1372    assert!(old_entry.is_none());
1373
1374    name
1375}
1376
1377// Generate a new branch point for a rule
1378fn create_branch_point_name<'a>(state: &mut State<'a>, key: Key) -> &'a str {
1379    let index = if let Some(rule) = state.rules.get_mut(&key) {
1380        rule.branch_point_count += 1;
1381        rule.branch_point_count
1382    } else {
1383        state.rules.insert(
1384            key,
1385            Rule {
1386                context_count: 0,
1387                branch_point_count: 1,
1388                entry_context_count: 0,
1389            },
1390        );
1391        1
1392    };
1393
1394    bumpalo::format!(
1395        in &state.compiler.allocator,
1396        "{}@{}",
1397        key.get_name(state.compiler),
1398        index
1399    )
1400    .into_bump_str()
1401}
1402
1403fn create_branch_point_include_context_name<'a>(
1404    state: &mut State<'a>,
1405    branch_point: &str,
1406) -> &'a str {
1407    bumpalo::format!(
1408        in &state.compiler.allocator,
1409        "include!{}", branch_point)
1410    .into_bump_str()
1411}
1412
1413fn scope_for_match_stack<'a>(
1414    state: &mut State<'a>,
1415    interpreted: &Interpreted<'a>,
1416    rule_key: Option<Key>,
1417    terminal: &Terminal<'a>,
1418) -> sublime_syntax::Scope<'a> {
1419    let mut scope = sublime_syntax::Scope::EMPTY;
1420
1421    if let Some(rule_key) = rule_key {
1422        scope = interpreted.rules[&rule_key].options.scope;
1423    }
1424
1425    for entry in terminal.stack.iter().rev() {
1426        if let StackEntryData::Variable { key } = &entry.data {
1427            let rule_options = &interpreted.rules[key].options;
1428
1429            scope =
1430                scope.extended(rule_options.scope, &state.compiler.allocator);
1431        }
1432    }
1433
1434    scope.extended(terminal.options.unwrap().scope, &state.compiler.allocator)
1435}