Skip to main content

tree_sitter_generate/
render.rs

1use std::{
2    cmp,
3    collections::{BTreeMap, BTreeSet, HashMap, HashSet},
4    fmt::Write,
5    mem::swap,
6};
7
8use crate::LANGUAGE_VERSION;
9use indoc::indoc;
10use serde::Serialize;
11use thiserror::Error;
12
13use super::{
14    build_tables::Tables,
15    grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
16    nfa::CharacterSet,
17    node_types::ChildType,
18    rules::{Alias, AliasMap, Symbol, SymbolType, TokenSet},
19    tables::{
20        AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable,
21        ParseTableEntry,
22    },
23};
24
25const SMALL_STATE_THRESHOLD: usize = 64;
26pub const ABI_VERSION_MIN: usize = 14;
27pub const ABI_VERSION_MAX: usize = LANGUAGE_VERSION;
28const ABI_VERSION_WITH_RESERVED_WORDS: usize = 15;
29
30pub type RenderResult<T> = Result<T, RenderError>;
31
32#[derive(Debug, Error, Serialize)]
33pub enum RenderError {
34    #[error("Parse table action count {0} exceeds maximum value of {max}", max=u16::MAX)]
35    ParseTable(usize),
36    #[error("This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {0}")]
37    ABI(usize),
38}
39
40#[clippy::format_args]
41macro_rules! add {
42    ($this: tt, $($arg: tt)*) => {{
43        $this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
44    }}
45}
46
47macro_rules! add_whitespace {
48    ($this:tt) => {{
49        // 4 bytes per char, 2 spaces per indent level
50        $this.buffer.reserve(4 * 2 * $this.indent_level);
51        for _ in 0..$this.indent_level {
52            write!(&mut $this.buffer, "  ").unwrap();
53        }
54    }};
55}
56
57#[clippy::format_args]
58macro_rules! add_line {
59    ($this: tt, $($arg: tt)*) => {
60        add_whitespace!($this);
61        $this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
62        $this.buffer += "\n";
63    }
64}
65
66macro_rules! indent {
67    ($this:tt) => {
68        $this.indent_level += 1;
69    };
70}
71
72macro_rules! dedent {
73    ($this:tt) => {
74        assert_ne!($this.indent_level, 0);
75        $this.indent_level -= 1;
76    };
77}
78
79#[derive(Default)]
80struct Generator {
81    buffer: String,
82    indent_level: usize,
83    language_name: String,
84    parse_table: ParseTable,
85    main_lex_table: LexTable,
86    keyword_lex_table: LexTable,
87    large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
88    large_character_set_info: Vec<LargeCharacterSetInfo>,
89    large_state_count: usize,
90    syntax_grammar: SyntaxGrammar,
91    lexical_grammar: LexicalGrammar,
92    default_aliases: AliasMap,
93    symbol_order: HashMap<Symbol, usize>,
94    symbol_ids: HashMap<Symbol, String>,
95    alias_ids: HashMap<Alias, String>,
96    unique_aliases: Vec<Alias>,
97    symbol_map: HashMap<Symbol, Symbol>,
98    reserved_word_sets: Vec<TokenSet>,
99    reserved_word_set_ids_by_parse_state: Vec<usize>,
100    field_names: Vec<String>,
101    supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
102    supertype_map: BTreeMap<String, Vec<ChildType>>,
103    abi_version: usize,
104    metadata: Option<Metadata>,
105}
106
107struct LargeCharacterSetInfo {
108    constant_name: String,
109    is_used: bool,
110}
111
112struct Metadata {
113    major_version: u8,
114    minor_version: u8,
115    patch_version: u8,
116}
117
118impl Generator {
119    fn generate(mut self) -> RenderResult<String> {
120        self.init();
121        self.add_header();
122        self.add_includes();
123        self.add_pragmas();
124        self.add_stats();
125        self.add_symbol_enum();
126        self.add_symbol_names_list();
127        self.add_unique_symbol_map();
128        self.add_symbol_metadata_list();
129
130        if !self.field_names.is_empty() {
131            self.add_field_name_enum();
132            self.add_field_name_names_list();
133            self.add_field_sequences();
134        }
135
136        if !self.parse_table.production_infos.is_empty() {
137            self.add_alias_sequences();
138        }
139
140        self.add_non_terminal_alias_map();
141        self.add_primary_state_id_list();
142
143        if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS && !self.supertype_map.is_empty() {
144            self.add_supertype_map();
145        }
146
147        let buffer_offset_before_lex_functions = self.buffer.len();
148
149        let mut main_lex_table = LexTable::default();
150        swap(&mut main_lex_table, &mut self.main_lex_table);
151        self.add_lex_function("ts_lex", main_lex_table);
152
153        if self.syntax_grammar.word_token.is_some() {
154            let mut keyword_lex_table = LexTable::default();
155            swap(&mut keyword_lex_table, &mut self.keyword_lex_table);
156            self.add_lex_function("ts_lex_keywords", keyword_lex_table);
157        }
158
159        // Once the lex functions are generated, and we've determined which large
160        // character sets are actually used, we can generate the large character set
161        // constants. Insert them into the output buffer before the lex functions.
162        let lex_functions = self.buffer[buffer_offset_before_lex_functions..].to_string();
163        self.buffer.truncate(buffer_offset_before_lex_functions);
164        for ix in 0..self.large_character_sets.len() {
165            self.add_character_set(ix);
166        }
167        self.buffer.push_str(&lex_functions);
168
169        self.add_lex_modes();
170
171        if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS && self.reserved_word_sets.len() > 1
172        {
173            self.add_reserved_word_sets();
174        }
175
176        self.add_parse_table()?;
177
178        if !self.syntax_grammar.external_tokens.is_empty() {
179            self.add_external_token_enum();
180            self.add_external_scanner_symbol_map();
181            self.add_external_scanner_states_list();
182        }
183
184        self.add_parser_export();
185
186        Ok(self.buffer)
187    }
188
189    fn init(&mut self) {
190        let mut symbol_identifiers = HashSet::new();
191        for i in 0..self.parse_table.symbols.len() {
192            self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
193        }
194        self.symbol_ids.insert(
195            Symbol::end_of_nonterminal_extra(),
196            self.symbol_ids[&Symbol::end()].clone(),
197        );
198
199        self.symbol_map = HashMap::new();
200
201        for symbol in &self.parse_table.symbols {
202            let mut mapping = symbol;
203
204            // There can be multiple symbols in the grammar that have the same name and kind,
205            // due to simple aliases. When that happens, ensure that they map to the same
206            // public-facing symbol. If one of the symbols is not aliased, choose that one
207            // to be the public-facing symbol. Otherwise, pick the symbol with the lowest
208            // numeric value.
209            if let Some(alias) = self.default_aliases.get(symbol) {
210                let kind = alias.kind();
211                for other_symbol in &self.parse_table.symbols {
212                    if let Some(other_alias) = self.default_aliases.get(other_symbol) {
213                        if other_symbol < mapping && other_alias == alias {
214                            mapping = other_symbol;
215                        }
216                    } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
217                        mapping = other_symbol;
218                        break;
219                    }
220                }
221            }
222            // Two anonymous tokens with different flags but the same string value
223            // should be represented with the same symbol in the public API. Examples:
224            // * "<" and token(prec(1, "<"))
225            // * "(" and token.immediate("(")
226            else if symbol.is_terminal() {
227                let metadata = self.metadata_for_symbol(*symbol);
228                for other_symbol in &self.parse_table.symbols {
229                    let other_metadata = self.metadata_for_symbol(*other_symbol);
230                    if other_metadata == metadata {
231                        if let Some(mapped) = self.symbol_map.get(other_symbol) {
232                            if mapped == symbol {
233                                break;
234                            }
235                        }
236                        mapping = other_symbol;
237                        break;
238                    }
239                }
240            }
241
242            self.symbol_map.insert(*symbol, *mapping);
243        }
244
245        for production_info in &self.parse_table.production_infos {
246            // Build a list of all field names
247            for field_name in production_info.field_map.keys() {
248                if let Err(i) = self.field_names.binary_search(field_name) {
249                    self.field_names.insert(i, field_name.clone());
250                }
251            }
252
253            for alias in &production_info.alias_sequence {
254                // Generate a mapping from aliases to C identifiers.
255                if let Some(alias) = &alias {
256                    // Some aliases match an existing symbol in the grammar.
257                    let alias_id =
258                        if let Some(existing_symbol) = self.symbols_for_alias(alias).first() {
259                            self.symbol_ids[&self.symbol_map[existing_symbol]].clone()
260                        }
261                        // Other aliases don't match any existing symbol, and need their own
262                        // identifiers.
263                        else {
264                            if let Err(i) = self.unique_aliases.binary_search(alias) {
265                                self.unique_aliases.insert(i, alias.clone());
266                            }
267
268                            if alias.is_named {
269                                format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
270                            } else {
271                                format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
272                            }
273                        };
274
275                    self.alias_ids.entry(alias.clone()).or_insert(alias_id);
276                }
277            }
278        }
279
280        for (ix, (symbol, _)) in self.large_character_sets.iter().enumerate() {
281            let count = self.large_character_sets[0..ix]
282                .iter()
283                .filter(|(sym, _)| sym == symbol)
284                .count()
285                + 1;
286            let constant_name = if let Some(symbol) = symbol {
287                format!("{}_character_set_{}", self.symbol_ids[symbol], count)
288            } else {
289                format!("extras_character_set_{count}")
290            };
291            self.large_character_set_info.push(LargeCharacterSetInfo {
292                constant_name,
293                is_used: false,
294            });
295        }
296
297        // Assign an id to each unique reserved word set
298        self.reserved_word_sets.push(TokenSet::new());
299        for state in &self.parse_table.states {
300            let id = if let Some(ix) = self
301                .reserved_word_sets
302                .iter()
303                .position(|set| *set == state.reserved_words)
304            {
305                ix
306            } else {
307                self.reserved_word_sets.push(state.reserved_words.clone());
308                self.reserved_word_sets.len() - 1
309            };
310            self.reserved_word_set_ids_by_parse_state.push(id);
311        }
312
313        if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
314            for (supertype, subtypes) in &self.supertype_symbol_map {
315                if let Some(supertype) = self.symbol_ids.get(supertype) {
316                    self.supertype_map
317                        .entry(supertype.clone())
318                        .or_insert_with(|| subtypes.clone());
319                }
320            }
321
322            self.supertype_symbol_map.clear();
323        }
324
325        // Determine which states should use the "small state" representation, and which should
326        // use the normal array representation.
327        let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
328        self.large_state_count = self
329            .parse_table
330            .states
331            .iter()
332            .enumerate()
333            .take_while(|(i, s)| {
334                *i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold
335            })
336            .count();
337    }
338
339    fn add_header(&mut self) {
340        add_line!(self, "/* Automatically @generated by tree-sitter */",);
341        add_line!(self, "");
342    }
343
344    fn add_includes(&mut self) {
345        add_line!(self, "#include \"tree_sitter/parser.h\"");
346        add_line!(self, "");
347    }
348
349    fn add_pragmas(&mut self) {
350        add_line!(self, "#if defined(__GNUC__) || defined(__clang__)");
351        add_line!(
352            self,
353            "#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\""
354        );
355        add_line!(self, "#endif");
356        add_line!(self, "");
357
358        // Compiling large lexer functions can be very slow. Disabling optimizations
359        // is not ideal, but only a very small fraction of overall parse time is
360        // spent lexing, so the performance impact of this is negligible.
361        if self.main_lex_table.states.len() > 300 {
362            add_line!(self, "#ifdef _MSC_VER");
363            add_line!(self, "#pragma optimize(\"\", off)");
364            add_line!(self, "#elif defined(__clang__)");
365            add_line!(self, "#pragma clang optimize off");
366            add_line!(self, "#elif defined(__GNUC__)");
367            add_line!(self, "#pragma GCC optimize (\"O0\")");
368            add_line!(self, "#endif");
369            add_line!(self, "");
370        }
371    }
372
373    fn add_stats(&mut self) {
374        let token_count = self
375            .parse_table
376            .symbols
377            .iter()
378            .filter(|symbol| {
379                if symbol.is_terminal() || symbol.is_eof() {
380                    true
381                } else if symbol.is_external() {
382                    self.syntax_grammar.external_tokens[symbol.index]
383                        .corresponding_internal_token
384                        .is_none()
385                } else {
386                    false
387                }
388            })
389            .count();
390
391        add_line!(self, "#define LANGUAGE_VERSION {}", self.abi_version);
392        add_line!(
393            self,
394            "#define STATE_COUNT {}",
395            self.parse_table.states.len()
396        );
397        add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count);
398
399        add_line!(
400            self,
401            "#define SYMBOL_COUNT {}",
402            self.parse_table.symbols.len()
403        );
404        add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len());
405        add_line!(self, "#define TOKEN_COUNT {token_count}");
406        add_line!(
407            self,
408            "#define EXTERNAL_TOKEN_COUNT {}",
409            self.syntax_grammar.external_tokens.len()
410        );
411        add_line!(self, "#define FIELD_COUNT {}", self.field_names.len());
412        add_line!(
413            self,
414            "#define MAX_ALIAS_SEQUENCE_LENGTH {}",
415            self.parse_table.max_aliased_production_length
416        );
417        add_line!(
418            self,
419            "#define MAX_RESERVED_WORD_SET_SIZE {}",
420            self.reserved_word_sets
421                .iter()
422                .map(TokenSet::len)
423                .max()
424                .unwrap()
425        );
426
427        add_line!(
428            self,
429            "#define PRODUCTION_ID_COUNT {}",
430            self.parse_table.production_infos.len()
431        );
432        add_line!(self, "#define SUPERTYPE_COUNT {}", self.supertype_map.len());
433        add_line!(self, "");
434    }
435
436    fn add_symbol_enum(&mut self) {
437        add_line!(self, "enum ts_symbol_identifiers {{");
438        indent!(self);
439        self.symbol_order.insert(Symbol::end(), 0);
440        let mut i = 1;
441        for symbol in &self.parse_table.symbols {
442            if *symbol != Symbol::end() {
443                self.symbol_order.insert(*symbol, i);
444                add_line!(self, "{} = {i},", self.symbol_ids[symbol]);
445                i += 1;
446            }
447        }
448        for alias in &self.unique_aliases {
449            add_line!(self, "{} = {i},", self.alias_ids[alias]);
450            i += 1;
451        }
452        dedent!(self);
453        add_line!(self, "}};");
454        add_line!(self, "");
455    }
456
457    fn add_symbol_names_list(&mut self) {
458        add_line!(self, "static const char * const ts_symbol_names[] = {{");
459        indent!(self);
460        for symbol in &self.parse_table.symbols {
461            let name = self.sanitize_string(
462                self.default_aliases
463                    .get(symbol)
464                    .map_or(self.metadata_for_symbol(*symbol).0, |alias| {
465                        alias.value.as_str()
466                    }),
467            );
468            add_line!(self, "[{}] = \"{name}\",", self.symbol_ids[symbol]);
469        }
470        for alias in &self.unique_aliases {
471            add_line!(
472                self,
473                "[{}] = \"{}\",",
474                self.alias_ids[alias],
475                self.sanitize_string(&alias.value)
476            );
477        }
478        dedent!(self);
479        add_line!(self, "}};");
480        add_line!(self, "");
481    }
482
483    fn add_unique_symbol_map(&mut self) {
484        add_line!(self, "static const TSSymbol ts_symbol_map[] = {{");
485        indent!(self);
486        for symbol in &self.parse_table.symbols {
487            add_line!(
488                self,
489                "[{}] = {},",
490                self.symbol_ids[symbol],
491                self.symbol_ids[&self.symbol_map[symbol]],
492            );
493        }
494
495        for alias in &self.unique_aliases {
496            add_line!(
497                self,
498                "[{}] = {},",
499                self.alias_ids[alias],
500                self.alias_ids[alias],
501            );
502        }
503
504        dedent!(self);
505        add_line!(self, "}};");
506        add_line!(self, "");
507    }
508
509    fn add_field_name_enum(&mut self) {
510        add_line!(self, "enum ts_field_identifiers {{");
511        indent!(self);
512        for (i, field_name) in self.field_names.iter().enumerate() {
513            add_line!(self, "{} = {},", self.field_id(field_name), i + 1);
514        }
515        dedent!(self);
516        add_line!(self, "}};");
517        add_line!(self, "");
518    }
519
520    fn add_field_name_names_list(&mut self) {
521        add_line!(self, "static const char * const ts_field_names[] = {{");
522        indent!(self);
523        add_line!(self, "[0] = NULL,");
524        for field_name in &self.field_names {
525            add_line!(self, "[{}] = \"{field_name}\",", self.field_id(field_name));
526        }
527        dedent!(self);
528        add_line!(self, "}};");
529        add_line!(self, "");
530    }
531
532    fn add_symbol_metadata_list(&mut self) {
533        add_line!(
534            self,
535            "static const TSSymbolMetadata ts_symbol_metadata[] = {{"
536        );
537        indent!(self);
538        for symbol in &self.parse_table.symbols {
539            add_line!(self, "[{}] = {{", self.symbol_ids[symbol]);
540            indent!(self);
541            if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) {
542                add_line!(self, ".visible = true,");
543                add_line!(self, ".named = {is_named},");
544            } else {
545                match self.metadata_for_symbol(*symbol).1 {
546                    VariableType::Named => {
547                        add_line!(self, ".visible = true,");
548                        add_line!(self, ".named = true,");
549                    }
550                    VariableType::Anonymous => {
551                        add_line!(self, ".visible = true,");
552                        add_line!(self, ".named = false,");
553                    }
554                    VariableType::Hidden => {
555                        add_line!(self, ".visible = false,");
556                        add_line!(self, ".named = true,");
557                        if self.syntax_grammar.supertype_symbols.contains(symbol) {
558                            add_line!(self, ".supertype = true,");
559                        }
560                    }
561                    VariableType::Auxiliary => {
562                        add_line!(self, ".visible = false,");
563                        add_line!(self, ".named = false,");
564                    }
565                }
566            }
567            dedent!(self);
568            add_line!(self, "}},");
569        }
570        for alias in &self.unique_aliases {
571            add_line!(self, "[{}] = {{", self.alias_ids[alias]);
572            indent!(self);
573            add_line!(self, ".visible = true,");
574            add_line!(self, ".named = {},", alias.is_named);
575            dedent!(self);
576            add_line!(self, "}},");
577        }
578        dedent!(self);
579        add_line!(self, "}};");
580        add_line!(self, "");
581    }
582
583    fn add_alias_sequences(&mut self) {
584        add_line!(
585            self,
586            "static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {{",
587        );
588        indent!(self);
589        for (i, production_info) in self.parse_table.production_infos.iter().enumerate() {
590            if production_info.alias_sequence.is_empty() {
591                // Work around MSVC's intolerance of empty array initializers by
592                // explicitly zero-initializing the first element.
593                if i == 0 {
594                    add_line!(self, "[0] = {{0}},");
595                }
596                continue;
597            }
598
599            add_line!(self, "[{i}] = {{");
600            indent!(self);
601            for (j, alias) in production_info.alias_sequence.iter().enumerate() {
602                if let Some(alias) = alias {
603                    add_line!(self, "[{j}] = {},", self.alias_ids[alias]);
604                }
605            }
606            dedent!(self);
607            add_line!(self, "}},");
608        }
609        dedent!(self);
610        add_line!(self, "}};");
611        add_line!(self, "");
612    }
613
614    fn add_non_terminal_alias_map(&mut self) {
615        let mut alias_ids_by_symbol = HashMap::new();
616        for variable in &self.syntax_grammar.variables {
617            for production in &variable.productions {
618                for step in &production.steps {
619                    if let Some(alias) = &step.alias {
620                        if step.symbol.is_non_terminal()
621                            && Some(alias) != self.default_aliases.get(&step.symbol)
622                            && self.symbol_ids.contains_key(&step.symbol)
623                        {
624                            if let Some(alias_id) = self.alias_ids.get(alias) {
625                                let alias_ids =
626                                    alias_ids_by_symbol.entry(step.symbol).or_insert(Vec::new());
627                                if let Err(i) = alias_ids.binary_search(&alias_id) {
628                                    alias_ids.insert(i, alias_id);
629                                }
630                            }
631                        }
632                    }
633                }
634            }
635        }
636
637        let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::<Vec<_>>();
638        alias_ids_by_symbol.sort_unstable_by_key(|e| e.0);
639
640        add_line!(
641            self,
642            "static const uint16_t ts_non_terminal_alias_map[] = {{"
643        );
644        indent!(self);
645        for (symbol, alias_ids) in alias_ids_by_symbol {
646            let symbol_id = &self.symbol_ids[symbol];
647            let public_symbol_id = &self.symbol_ids[&self.symbol_map[symbol]];
648            add_line!(self, "{symbol_id}, {},", 1 + alias_ids.len());
649            indent!(self);
650            add_line!(self, "{public_symbol_id},");
651            for alias_id in alias_ids {
652                add_line!(self, "{alias_id},");
653            }
654            dedent!(self);
655        }
656        add_line!(self, "0,");
657        dedent!(self);
658        add_line!(self, "}};");
659        add_line!(self, "");
660    }
661
662    /// Produces a list of the "primary state" for every state in the grammar.
663    ///
664    /// The "primary state" for a given state is the first encountered state that behaves
665    /// identically with respect to query analysis. We derive this by keeping track of the `core_id`
666    /// for each state and treating the first state with a given `core_id` as primary.
667    fn add_primary_state_id_list(&mut self) {
668        add_line!(
669            self,
670            "static const TSStateId ts_primary_state_ids[STATE_COUNT] = {{"
671        );
672        indent!(self);
673        let mut first_state_for_each_core_id = HashMap::new();
674        for (idx, state) in self.parse_table.states.iter().enumerate() {
675            let primary_state = first_state_for_each_core_id
676                .entry(state.core_id)
677                .or_insert(idx);
678            add_line!(self, "[{idx}] = {primary_state},");
679        }
680        dedent!(self);
681        add_line!(self, "}};");
682        add_line!(self, "");
683    }
684
685    fn add_field_sequences(&mut self) {
686        let mut flat_field_maps = vec![];
687        let mut next_flat_field_map_index = 0;
688        self.get_field_map_id(
689            Vec::new(),
690            &mut flat_field_maps,
691            &mut next_flat_field_map_index,
692        );
693
694        let mut field_map_ids = Vec::with_capacity(self.parse_table.production_infos.len());
695        for production_info in &self.parse_table.production_infos {
696            if production_info.field_map.is_empty() {
697                field_map_ids.push((0, 0));
698            } else {
699                let mut flat_field_map = Vec::with_capacity(production_info.field_map.len());
700                for (field_name, locations) in &production_info.field_map {
701                    for location in locations {
702                        flat_field_map.push((field_name.clone(), *location));
703                    }
704                }
705                let field_map_len = flat_field_map.len();
706                field_map_ids.push((
707                    self.get_field_map_id(
708                        flat_field_map,
709                        &mut flat_field_maps,
710                        &mut next_flat_field_map_index,
711                    ),
712                    field_map_len,
713                ));
714            }
715        }
716
717        add_line!(
718            self,
719            "static const TSMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {{",
720        );
721        indent!(self);
722        for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() {
723            if length > 0 {
724                add_line!(
725                    self,
726                    "[{production_id}] = {{.index = {row_id}, .length = {length}}},",
727                );
728            }
729        }
730        dedent!(self);
731        add_line!(self, "}};");
732        add_line!(self, "");
733
734        add_line!(
735            self,
736            "static const TSFieldMapEntry ts_field_map_entries[] = {{",
737        );
738        indent!(self);
739        for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) {
740            add_line!(self, "[{row_index}] =");
741            indent!(self);
742            for (field_name, location) in field_pairs {
743                add_whitespace!(self);
744                add!(self, "{{{}, {}", self.field_id(&field_name), location.index);
745                if location.inherited {
746                    add!(self, ", .inherited = true");
747                }
748                add!(self, "}},\n");
749            }
750            dedent!(self);
751        }
752
753        dedent!(self);
754        add_line!(self, "}};");
755        add_line!(self, "");
756    }
757
758    fn add_supertype_map(&mut self) {
759        add_line!(
760            self,
761            "static const TSSymbol ts_supertype_symbols[SUPERTYPE_COUNT] = {{"
762        );
763        indent!(self);
764        for supertype in self.supertype_map.keys() {
765            add_line!(self, "{supertype},");
766        }
767        dedent!(self);
768        add_line!(self, "}};\n");
769
770        add_line!(
771            self,
772            "static const TSMapSlice ts_supertype_map_slices[] = {{",
773        );
774        indent!(self);
775        let mut row_id = 0;
776        let mut supertype_ids = vec![0];
777        let mut supertype_string_map = BTreeMap::new();
778        for (supertype, subtypes) in &self.supertype_map {
779            supertype_string_map.insert(
780                supertype,
781                subtypes
782                    .iter()
783                    .flat_map(|s| match s {
784                        ChildType::Normal(symbol) => vec![self.symbol_ids.get(symbol).cloned()],
785                        ChildType::Aliased(alias) => {
786                            self.alias_ids.get(alias).cloned().map_or_else(
787                                || {
788                                    self.symbols_for_alias(alias)
789                                        .into_iter()
790                                        .map(|s| self.symbol_ids.get(&s).cloned())
791                                        .collect()
792                                },
793                                |a| vec![Some(a)],
794                            )
795                        }
796                    })
797                    .flatten()
798                    .collect::<BTreeSet<String>>(),
799            );
800        }
801        for (supertype, subtypes) in &supertype_string_map {
802            let length = subtypes.len();
803            add_line!(
804                self,
805                "[{supertype}] = {{.index = {row_id}, .length = {length}}},",
806            );
807            row_id += length;
808            supertype_ids.push(row_id);
809        }
810        dedent!(self);
811        add_line!(self, "}};");
812        add_line!(self, "");
813
814        add_line!(
815            self,
816            "static const TSSymbol ts_supertype_map_entries[] = {{",
817        );
818        indent!(self);
819        for (i, (_, subtypes)) in supertype_string_map.iter().enumerate() {
820            let row_index = supertype_ids[i];
821            add_line!(self, "[{row_index}] =");
822            indent!(self);
823            for subtype in subtypes {
824                add_whitespace!(self);
825                add!(self, "{subtype},\n");
826            }
827            dedent!(self);
828        }
829
830        dedent!(self);
831        add_line!(self, "}};");
832        add_line!(self, "");
833    }
834
835    fn add_lex_function(&mut self, name: &str, lex_table: LexTable) {
836        add_line!(
837            self,
838            "static bool {name}(TSLexer *lexer, TSStateId state) {{",
839        );
840        indent!(self);
841
842        add_line!(self, "START_LEXER();");
843        add_line!(self, "eof = lexer->eof(lexer);");
844        add_line!(self, "switch (state) {{");
845
846        indent!(self);
847        for (i, state) in lex_table.states.into_iter().enumerate() {
848            add_line!(self, "case {i}:");
849            indent!(self);
850            self.add_lex_state(i, state);
851            dedent!(self);
852        }
853
854        add_line!(self, "default:");
855        indent!(self);
856        add_line!(self, "return false;");
857        dedent!(self);
858
859        dedent!(self);
860        add_line!(self, "}}");
861
862        dedent!(self);
863        add_line!(self, "}}");
864        add_line!(self, "");
865    }
866
867    fn add_lex_state(&mut self, _state_ix: usize, state: LexState) {
868        if let Some(accept_action) = state.accept_action {
869            add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]);
870        }
871
872        if let Some(eof_action) = state.eof_action {
873            add_line!(self, "if (eof) ADVANCE({});", eof_action.state);
874        }
875
876        let mut chars_copy = CharacterSet::empty();
877        let mut large_set = CharacterSet::empty();
878        let mut ruled_out_chars = CharacterSet::empty();
879
880        // The transitions in a lex state are sorted with the single-character
881        // transitions first. If there are many single-character transitions,
882        // then implement them using an array of (lookahead character, state)
883        // pairs, instead of individual if statements, in order to reduce compile
884        // time.
885        let mut leading_simple_transition_count = 0;
886        let mut leading_simple_transition_range_count = 0;
887        for (chars, action) in &state.advance_actions {
888            if action.in_main_token
889                && chars.ranges().all(|r| {
890                    let start = *r.start() as u32;
891                    let end = *r.end() as u32;
892                    end <= start + 1 && u16::try_from(end).is_ok()
893                })
894            {
895                leading_simple_transition_count += 1;
896                leading_simple_transition_range_count += chars.range_count();
897            } else {
898                break;
899            }
900        }
901
902        if leading_simple_transition_range_count >= 8 {
903            add_line!(self, "ADVANCE_MAP(");
904            indent!(self);
905            for (chars, action) in &state.advance_actions[0..leading_simple_transition_count] {
906                for range in chars.ranges() {
907                    add_whitespace!(self);
908                    self.add_character(*range.start());
909                    add!(self, ", {},\n", action.state);
910                    if range.end() > range.start() {
911                        add_whitespace!(self);
912                        self.add_character(*range.end());
913                        add!(self, ", {},\n", action.state);
914                    }
915                }
916                ruled_out_chars = ruled_out_chars.add(chars);
917            }
918            dedent!(self);
919            add_line!(self, ");");
920        } else {
921            leading_simple_transition_count = 0;
922        }
923
924        for (chars, action) in &state.advance_actions[leading_simple_transition_count..] {
925            add_whitespace!(self);
926
927            // The lex state's advance actions are represented with disjoint
928            // sets of characters. When translating these disjoint sets into a
929            // sequence of checks, we don't need to re-check conditions that
930            // have already been checked due to previous transitions.
931            //
932            // Note that this simplification may result in an empty character set.
933            // That means that the transition is guaranteed (nothing further needs to
934            // be checked), not that this transition is impossible.
935            let simplified_chars = chars.simplify_ignoring(&ruled_out_chars);
936
937            // For large character sets, find the best matching character set from
938            // a pre-selected list of large character sets, which are based on the
939            // state transitions for invidual tokens. This transition may not exactly
940            // match one of the pre-selected character sets. In that case, determine
941            // the additional checks that need to be performed to match this transition.
942            let mut best_large_char_set: Option<(usize, CharacterSet, CharacterSet)> = None;
943            if simplified_chars.range_count() >= super::build_tables::LARGE_CHARACTER_RANGE_COUNT {
944                for (ix, (_, set)) in self.large_character_sets.iter().enumerate() {
945                    chars_copy.assign(&simplified_chars);
946                    large_set.assign(set);
947                    let intersection = chars_copy.remove_intersection(&mut large_set);
948                    if !intersection.is_empty() {
949                        let additions = chars_copy.simplify_ignoring(&ruled_out_chars);
950                        let removals = large_set.simplify_ignoring(&ruled_out_chars);
951                        let total_range_count = additions.range_count() + removals.range_count();
952                        if total_range_count >= simplified_chars.range_count() {
953                            continue;
954                        }
955                        if let Some((_, best_additions, best_removals)) = &best_large_char_set {
956                            let best_range_count =
957                                best_additions.range_count() + best_removals.range_count();
958                            if best_range_count < total_range_count {
959                                continue;
960                            }
961                        }
962                        best_large_char_set = Some((ix, additions, removals));
963                    }
964                }
965            }
966
967            // Add this transition's character set to the set of ruled out characters,
968            // which don't need to be checked for subsequent transitions in this state.
969            ruled_out_chars = ruled_out_chars.add(chars);
970
971            let mut large_char_set_ix = None;
972            let mut asserted_chars = simplified_chars;
973            let mut negated_chars = CharacterSet::empty();
974            if let Some((char_set_ix, additions, removals)) = best_large_char_set {
975                asserted_chars = additions;
976                negated_chars = removals;
977                large_char_set_ix = Some(char_set_ix);
978            }
979
980            let line_break = format!("\n{}", "  ".repeat(self.indent_level + 2));
981
982            let has_positive_condition = large_char_set_ix.is_some() || !asserted_chars.is_empty();
983            let has_negative_condition = !negated_chars.is_empty();
984            let has_condition = has_positive_condition || has_negative_condition;
985            if has_condition {
986                add!(self, "if (");
987                if has_positive_condition && has_negative_condition {
988                    add!(self, "(");
989                }
990            }
991
992            if let Some(large_char_set_ix) = large_char_set_ix {
993                let large_set = &self.large_character_sets[large_char_set_ix].1;
994
995                // If the character set contains the null character, check that we
996                // are not at the end of the file.
997                let check_eof = large_set.contains('\0');
998                if check_eof {
999                    add!(self, "(!eof && ");
1000                }
1001
1002                let char_set_info = &mut self.large_character_set_info[large_char_set_ix];
1003                char_set_info.is_used = true;
1004                add!(
1005                    self,
1006                    "set_contains({}, {}, lookahead)",
1007                    char_set_info.constant_name,
1008                    large_set.range_count(),
1009                );
1010                if check_eof {
1011                    add!(self, ")");
1012                }
1013            }
1014
1015            if !asserted_chars.is_empty() {
1016                if large_char_set_ix.is_some() {
1017                    add!(self, " ||{line_break}");
1018                }
1019
1020                // If the character set contains the max character, than it probably
1021                // corresponds to a negated character class in a regex, so it will be more
1022                // concise and readable to express it in terms of negated ranges.
1023                let is_included = !asserted_chars.contains(char::MAX);
1024                if !is_included {
1025                    asserted_chars = asserted_chars.negate().add_char('\0');
1026                }
1027
1028                self.add_character_range_conditions(&asserted_chars, is_included, &line_break);
1029            }
1030
1031            if has_negative_condition {
1032                if has_positive_condition {
1033                    add!(self, ") &&{line_break}");
1034                }
1035                self.add_character_range_conditions(&negated_chars, false, &line_break);
1036            }
1037
1038            if has_condition {
1039                add!(self, ") ");
1040            }
1041
1042            self.add_advance_action(action);
1043            add!(self, "\n");
1044        }
1045
1046        add_line!(self, "END_STATE();");
1047    }
1048
1049    fn add_character_range_conditions(
1050        &mut self,
1051        characters: &CharacterSet,
1052        is_included: bool,
1053        line_break: &str,
1054    ) {
1055        for (i, range) in characters.ranges().enumerate() {
1056            let start = *range.start();
1057            let end = *range.end();
1058            if is_included {
1059                if i > 0 {
1060                    add!(self, " ||{line_break}");
1061                }
1062
1063                if start == '\0' {
1064                    add!(self, "(!eof && ");
1065                    if end == '\0' {
1066                        add!(self, "lookahead == 0");
1067                    } else {
1068                        add!(self, "lookahead <= ");
1069                    }
1070                    self.add_character(end);
1071                    add!(self, ")");
1072                } else if end == start {
1073                    add!(self, "lookahead == ");
1074                    self.add_character(start);
1075                } else if end as u32 == start as u32 + 1 {
1076                    add!(self, "lookahead == ");
1077                    self.add_character(start);
1078                    add!(self, " ||{line_break}lookahead == ");
1079                    self.add_character(end);
1080                } else {
1081                    add!(self, "(");
1082                    self.add_character(start);
1083                    add!(self, " <= lookahead && lookahead <= ");
1084                    self.add_character(end);
1085                    add!(self, ")");
1086                }
1087            } else {
1088                if i > 0 {
1089                    add!(self, " &&{line_break}");
1090                }
1091                if end == start {
1092                    add!(self, "lookahead != ");
1093                    self.add_character(start);
1094                } else if end as u32 == start as u32 + 1 {
1095                    add!(self, "lookahead != ");
1096                    self.add_character(start);
1097                    add!(self, " &&{line_break}lookahead != ");
1098                    self.add_character(end);
1099                } else if start != '\0' {
1100                    add!(self, "(lookahead < ");
1101                    self.add_character(start);
1102                    add!(self, " || ");
1103                    self.add_character(end);
1104                    add!(self, " < lookahead)");
1105                } else {
1106                    add!(self, "lookahead > ");
1107                    self.add_character(end);
1108                }
1109            }
1110        }
1111    }
1112
1113    fn add_character_set(&mut self, ix: usize) {
1114        let characters = self.large_character_sets[ix].1.clone();
1115        let info = &self.large_character_set_info[ix];
1116        if !info.is_used {
1117            return;
1118        }
1119
1120        add_line!(
1121            self,
1122            "static const TSCharacterRange {}[] = {{",
1123            info.constant_name
1124        );
1125
1126        indent!(self);
1127        for (ix, range) in characters.ranges().enumerate() {
1128            let column = ix % 8;
1129            if column == 0 {
1130                if ix > 0 {
1131                    add!(self, "\n");
1132                }
1133                add_whitespace!(self);
1134            } else {
1135                add!(self, " ");
1136            }
1137            add!(self, "{{");
1138            self.add_character(*range.start());
1139            add!(self, ", ");
1140            self.add_character(*range.end());
1141            add!(self, "}},");
1142        }
1143        add!(self, "\n");
1144        dedent!(self);
1145        add_line!(self, "}};");
1146        add_line!(self, "");
1147    }
1148
1149    fn add_advance_action(&mut self, action: &AdvanceAction) {
1150        if action.in_main_token {
1151            add!(self, "ADVANCE({});", action.state);
1152        } else {
1153            add!(self, "SKIP({});", action.state);
1154        }
1155    }
1156
1157    fn add_lex_modes(&mut self) {
1158        add_line!(
1159            self,
1160            "static const {} ts_lex_modes[STATE_COUNT] = {{",
1161            if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1162                "TSLexerMode"
1163            } else {
1164                "TSLexMode"
1165            }
1166        );
1167        indent!(self);
1168        for (i, state) in self.parse_table.states.iter().enumerate() {
1169            add_whitespace!(self);
1170            add!(self, "[{i}] = {{");
1171            if state.is_end_of_non_terminal_extra() {
1172                add!(self, "(TSStateId)(-1),");
1173            } else {
1174                add!(self, ".lex_state = {}", state.lex_state_id);
1175
1176                if state.external_lex_state_id > 0 {
1177                    add!(
1178                        self,
1179                        ", .external_lex_state = {}",
1180                        state.external_lex_state_id
1181                    );
1182                }
1183
1184                if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1185                    let reserved_word_set_id = self.reserved_word_set_ids_by_parse_state[i];
1186                    if reserved_word_set_id != 0 {
1187                        add!(self, ", .reserved_word_set_id = {reserved_word_set_id}");
1188                    }
1189                }
1190            }
1191
1192            add!(self, "}},\n");
1193        }
1194        dedent!(self);
1195        add_line!(self, "}};");
1196        add_line!(self, "");
1197    }
1198
1199    fn add_reserved_word_sets(&mut self) {
1200        add_line!(
1201            self,
1202            "static const TSSymbol ts_reserved_words[{}][MAX_RESERVED_WORD_SET_SIZE] = {{",
1203            self.reserved_word_sets.len(),
1204        );
1205        indent!(self);
1206        for (id, set) in self.reserved_word_sets.iter().enumerate() {
1207            if id == 0 {
1208                continue;
1209            }
1210            add_line!(self, "[{id}] = {{");
1211            indent!(self);
1212            for token in set.iter() {
1213                add_line!(self, "{},", self.symbol_ids[&token]);
1214            }
1215            dedent!(self);
1216            add_line!(self, "}},");
1217        }
1218        dedent!(self);
1219        add_line!(self, "}};");
1220        add_line!(self, "");
1221    }
1222
1223    fn add_external_token_enum(&mut self) {
1224        add_line!(self, "enum ts_external_scanner_symbol_identifiers {{");
1225        indent!(self);
1226        for i in 0..self.syntax_grammar.external_tokens.len() {
1227            add_line!(
1228                self,
1229                "{} = {i},",
1230                self.external_token_id(&self.syntax_grammar.external_tokens[i]),
1231            );
1232        }
1233        dedent!(self);
1234        add_line!(self, "}};");
1235        add_line!(self, "");
1236    }
1237
1238    fn add_external_scanner_symbol_map(&mut self) {
1239        add_line!(
1240            self,
1241            "static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {{"
1242        );
1243        indent!(self);
1244        for i in 0..self.syntax_grammar.external_tokens.len() {
1245            let token = &self.syntax_grammar.external_tokens[i];
1246            let id_token = token
1247                .corresponding_internal_token
1248                .unwrap_or_else(|| Symbol::external(i));
1249            add_line!(
1250                self,
1251                "[{}] = {},",
1252                self.external_token_id(token),
1253                self.symbol_ids[&id_token],
1254            );
1255        }
1256        dedent!(self);
1257        add_line!(self, "}};");
1258        add_line!(self, "");
1259    }
1260
1261    fn add_external_scanner_states_list(&mut self) {
1262        add_line!(
1263            self,
1264            "static const bool ts_external_scanner_states[{}][EXTERNAL_TOKEN_COUNT] = {{",
1265            self.parse_table.external_lex_states.len(),
1266        );
1267        indent!(self);
1268        for i in 0..self.parse_table.external_lex_states.len() {
1269            if !self.parse_table.external_lex_states[i].is_empty() {
1270                add_line!(self, "[{i}] = {{");
1271                indent!(self);
1272                for token in self.parse_table.external_lex_states[i].iter() {
1273                    add_line!(
1274                        self,
1275                        "[{}] = true,",
1276                        self.external_token_id(&self.syntax_grammar.external_tokens[token.index])
1277                    );
1278                }
1279                dedent!(self);
1280                add_line!(self, "}},");
1281            }
1282        }
1283        dedent!(self);
1284        add_line!(self, "}};");
1285        add_line!(self, "");
1286    }
1287
1288    fn add_parse_table(&mut self) -> RenderResult<()> {
1289        let mut parse_table_entries = HashMap::new();
1290        let mut next_parse_action_list_index = 0;
1291
1292        // Parse action lists zero is for the default value, when a symbol is not valid.
1293        self.get_parse_action_list_id(
1294            &ParseTableEntry {
1295                actions: Vec::new(),
1296                reusable: false,
1297            },
1298            &mut parse_table_entries,
1299            &mut next_parse_action_list_index,
1300        );
1301
1302        add_line!(
1303            self,
1304            "static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {{",
1305        );
1306        indent!(self);
1307
1308        let mut terminal_entries = Vec::new();
1309        let mut nonterminal_entries = Vec::new();
1310
1311        for (i, state) in self
1312            .parse_table
1313            .states
1314            .iter()
1315            .enumerate()
1316            .take(self.large_state_count)
1317        {
1318            add_line!(self, "[STATE({i})] = {{");
1319            indent!(self);
1320
1321            // Ensure the entries are in a deterministic order, since they are
1322            // internally represented as a hash map.
1323            terminal_entries.clear();
1324            nonterminal_entries.clear();
1325            terminal_entries.extend(state.terminal_entries.iter());
1326            nonterminal_entries.extend(state.nonterminal_entries.iter());
1327            terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
1328            nonterminal_entries.sort_unstable_by_key(|k| k.0);
1329
1330            for (symbol, action) in &nonterminal_entries {
1331                add_line!(
1332                    self,
1333                    "[{}] = STATE({}),",
1334                    self.symbol_ids[symbol],
1335                    match action {
1336                        GotoAction::Goto(state) => *state,
1337                        GotoAction::ShiftExtra => i,
1338                    }
1339                );
1340            }
1341
1342            for (symbol, entry) in &terminal_entries {
1343                let entry_id = self.get_parse_action_list_id(
1344                    entry,
1345                    &mut parse_table_entries,
1346                    &mut next_parse_action_list_index,
1347                );
1348                add_line!(self, "[{}] = ACTIONS({entry_id}),", self.symbol_ids[symbol]);
1349            }
1350
1351            dedent!(self);
1352            add_line!(self, "}},");
1353        }
1354
1355        dedent!(self);
1356        add_line!(self, "}};");
1357        add_line!(self, "");
1358
1359        if self.large_state_count < self.parse_table.states.len() {
1360            add_line!(self, "static const uint16_t ts_small_parse_table[] = {{");
1361            indent!(self);
1362
1363            let mut next_table_index = 0;
1364            let mut small_state_indices = Vec::with_capacity(
1365                self.parse_table
1366                    .states
1367                    .len()
1368                    .saturating_sub(self.large_state_count),
1369            );
1370            let mut symbols_by_value = HashMap::<(usize, SymbolType), Vec<Symbol>>::new();
1371            for state in self.parse_table.states.iter().skip(self.large_state_count) {
1372                small_state_indices.push(next_table_index);
1373                symbols_by_value.clear();
1374
1375                terminal_entries.clear();
1376                terminal_entries.extend(state.terminal_entries.iter());
1377                terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
1378
1379                // In a given parse state, many lookahead symbols have the same actions.
1380                // So in the "small state" representation, group symbols by their action
1381                // in order to avoid repeating the action.
1382                for (symbol, entry) in &terminal_entries {
1383                    let entry_id = self.get_parse_action_list_id(
1384                        entry,
1385                        &mut parse_table_entries,
1386                        &mut next_parse_action_list_index,
1387                    );
1388                    symbols_by_value
1389                        .entry((entry_id, SymbolType::Terminal))
1390                        .or_default()
1391                        .push(**symbol);
1392                }
1393                for (symbol, action) in &state.nonterminal_entries {
1394                    let state_id = match action {
1395                        GotoAction::Goto(i) => *i,
1396                        GotoAction::ShiftExtra => {
1397                            self.large_state_count + small_state_indices.len() - 1
1398                        }
1399                    };
1400                    symbols_by_value
1401                        .entry((state_id, SymbolType::NonTerminal))
1402                        .or_default()
1403                        .push(*symbol);
1404                }
1405
1406                let mut values_with_symbols = symbols_by_value.drain().collect::<Vec<_>>();
1407                values_with_symbols.sort_unstable_by_key(|((value, kind), symbols)| {
1408                    (symbols.len(), *kind, *value, symbols[0])
1409                });
1410
1411                add_line!(
1412                    self,
1413                    "[{next_table_index}] = {},",
1414                    values_with_symbols.len()
1415                );
1416                indent!(self);
1417                next_table_index += 1;
1418
1419                for ((value, kind), symbols) in &mut values_with_symbols {
1420                    next_table_index += 2 + symbols.len();
1421                    if *kind == SymbolType::NonTerminal {
1422                        add_line!(self, "STATE({value}), {},", symbols.len());
1423                    } else {
1424                        add_line!(self, "ACTIONS({value}), {},", symbols.len());
1425                    }
1426
1427                    symbols.sort_unstable();
1428                    indent!(self);
1429                    for symbol in symbols {
1430                        add_line!(self, "{},", self.symbol_ids[symbol]);
1431                    }
1432                    dedent!(self);
1433                }
1434
1435                dedent!(self);
1436            }
1437
1438            dedent!(self);
1439            add_line!(self, "}};");
1440            add_line!(self, "");
1441
1442            add_line!(
1443                self,
1444                "static const uint32_t ts_small_parse_table_map[] = {{"
1445            );
1446            indent!(self);
1447            for i in self.large_state_count..self.parse_table.states.len() {
1448                add_line!(
1449                    self,
1450                    "[SMALL_STATE({i})] = {},",
1451                    small_state_indices[i - self.large_state_count]
1452                );
1453            }
1454            dedent!(self);
1455            add_line!(self, "}};");
1456            add_line!(self, "");
1457        }
1458        if next_parse_action_list_index >= usize::from(u16::MAX) {
1459            Err(RenderError::ParseTable(next_parse_action_list_index))?;
1460        }
1461
1462        let mut parse_table_entries = parse_table_entries
1463            .into_iter()
1464            .map(|(entry, i)| (i, entry))
1465            .collect::<Vec<_>>();
1466        parse_table_entries.sort_by_key(|(index, _)| *index);
1467        self.add_parse_action_list(parse_table_entries);
1468
1469        Ok(())
1470    }
1471
1472    fn add_parse_action_list(&mut self, parse_table_entries: Vec<(usize, ParseTableEntry)>) {
1473        add_line!(
1474            self,
1475            "static const TSParseActionEntry ts_parse_actions[] = {{"
1476        );
1477        indent!(self);
1478        for (i, entry) in parse_table_entries {
1479            add!(
1480                self,
1481                "  [{i}] = {{.entry = {{.count = {}, .reusable = {}}}}},",
1482                entry.actions.len(),
1483                entry.reusable
1484            );
1485            for action in entry.actions {
1486                add!(self, " ");
1487                match action {
1488                    ParseAction::Accept => add!(self, " ACCEPT_INPUT()"),
1489                    ParseAction::Recover => add!(self, "RECOVER()"),
1490                    ParseAction::ShiftExtra => add!(self, "SHIFT_EXTRA()"),
1491                    ParseAction::Shift {
1492                        state,
1493                        is_repetition,
1494                    } => {
1495                        if is_repetition {
1496                            add!(self, "SHIFT_REPEAT({state})");
1497                        } else {
1498                            add!(self, "SHIFT({state})");
1499                        }
1500                    }
1501                    ParseAction::Reduce {
1502                        symbol,
1503                        child_count,
1504                        dynamic_precedence,
1505                        production_id,
1506                        ..
1507                    } => {
1508                        add!(
1509                            self,
1510                            "REDUCE({}, {child_count}, {dynamic_precedence}, {production_id})",
1511                            self.symbol_ids[&symbol]
1512                        );
1513                    }
1514                }
1515                add!(self, ",");
1516            }
1517            add!(self, "\n");
1518        }
1519        dedent!(self);
1520        add_line!(self, "}};");
1521        add_line!(self, "");
1522    }
1523
1524    fn add_parser_export(&mut self) {
1525        let language_function_name = format!("tree_sitter_{}", self.language_name);
1526        let external_scanner_name = format!("{language_function_name}_external_scanner");
1527
1528        add_line!(self, "#ifdef __cplusplus");
1529        add_line!(self, r#"extern "C" {{"#);
1530        add_line!(self, "#endif");
1531
1532        if !self.syntax_grammar.external_tokens.is_empty() {
1533            add_line!(self, "void *{external_scanner_name}_create(void);");
1534            add_line!(self, "void {external_scanner_name}_destroy(void *);");
1535            add_line!(
1536                self,
1537                "bool {external_scanner_name}_scan(void *, TSLexer *, const bool *);",
1538            );
1539            add_line!(
1540                self,
1541                "unsigned {external_scanner_name}_serialize(void *, char *);",
1542            );
1543            add_line!(
1544                self,
1545                "void {external_scanner_name}_deserialize(void *, const char *, unsigned);",
1546            );
1547            add_line!(self, "");
1548        }
1549
1550        add_line!(self, "#ifdef TREE_SITTER_HIDE_SYMBOLS");
1551        add_line!(self, "#define TS_PUBLIC");
1552        add_line!(self, "#elif defined(_WIN32)");
1553        add_line!(self, "#define TS_PUBLIC __declspec(dllexport)");
1554        add_line!(self, "#else");
1555        add_line!(
1556            self,
1557            "#define TS_PUBLIC __attribute__((visibility(\"default\")))"
1558        );
1559        add_line!(self, "#endif");
1560        add_line!(self, "");
1561
1562        add_line!(
1563            self,
1564            "TS_PUBLIC const TSLanguage *{language_function_name}(void) {{",
1565        );
1566        indent!(self);
1567        add_line!(self, "static const TSLanguage language = {{");
1568        indent!(self);
1569        add_line!(self, ".abi_version = LANGUAGE_VERSION,");
1570
1571        // Quantities
1572        add_line!(self, ".symbol_count = SYMBOL_COUNT,");
1573        add_line!(self, ".alias_count = ALIAS_COUNT,");
1574        add_line!(self, ".token_count = TOKEN_COUNT,");
1575        add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,");
1576        add_line!(self, ".state_count = STATE_COUNT,");
1577        add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");
1578        add_line!(self, ".production_id_count = PRODUCTION_ID_COUNT,");
1579        if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1580            add_line!(self, ".supertype_count = SUPERTYPE_COUNT,");
1581        }
1582        add_line!(self, ".field_count = FIELD_COUNT,");
1583        add_line!(
1584            self,
1585            ".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,"
1586        );
1587
1588        // Parse table
1589        add_line!(self, ".parse_table = &ts_parse_table[0][0],");
1590        if self.large_state_count < self.parse_table.states.len() {
1591            add_line!(self, ".small_parse_table = ts_small_parse_table,");
1592            add_line!(self, ".small_parse_table_map = ts_small_parse_table_map,");
1593        }
1594        add_line!(self, ".parse_actions = ts_parse_actions,");
1595
1596        // Metadata
1597        add_line!(self, ".symbol_names = ts_symbol_names,");
1598        if !self.field_names.is_empty() {
1599            add_line!(self, ".field_names = ts_field_names,");
1600            add_line!(self, ".field_map_slices = ts_field_map_slices,");
1601            add_line!(self, ".field_map_entries = ts_field_map_entries,");
1602        }
1603        if !self.supertype_map.is_empty() && self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1604            add_line!(self, ".supertype_map_slices = ts_supertype_map_slices,");
1605            add_line!(self, ".supertype_map_entries = ts_supertype_map_entries,");
1606            add_line!(self, ".supertype_symbols = ts_supertype_symbols,");
1607        }
1608        add_line!(self, ".symbol_metadata = ts_symbol_metadata,");
1609        add_line!(self, ".public_symbol_map = ts_symbol_map,");
1610        add_line!(self, ".alias_map = ts_non_terminal_alias_map,");
1611        if !self.parse_table.production_infos.is_empty() {
1612            add_line!(self, ".alias_sequences = &ts_alias_sequences[0][0],");
1613        }
1614
1615        // Lexing
1616        add_line!(self, ".lex_modes = (const void*)ts_lex_modes,");
1617        add_line!(self, ".lex_fn = ts_lex,");
1618        if let Some(keyword_capture_token) = self.syntax_grammar.word_token {
1619            add_line!(self, ".keyword_lex_fn = ts_lex_keywords,");
1620            add_line!(
1621                self,
1622                ".keyword_capture_token = {},",
1623                self.symbol_ids[&keyword_capture_token]
1624            );
1625        }
1626
1627        if !self.syntax_grammar.external_tokens.is_empty() {
1628            add_line!(self, ".external_scanner = {{");
1629            indent!(self);
1630            add_line!(self, "&ts_external_scanner_states[0][0],");
1631            add_line!(self, "ts_external_scanner_symbol_map,");
1632            add_line!(self, "{external_scanner_name}_create,");
1633            add_line!(self, "{external_scanner_name}_destroy,");
1634            add_line!(self, "{external_scanner_name}_scan,");
1635            add_line!(self, "{external_scanner_name}_serialize,");
1636            add_line!(self, "{external_scanner_name}_deserialize,");
1637            dedent!(self);
1638            add_line!(self, "}},");
1639        }
1640
1641        add_line!(self, ".primary_state_ids = ts_primary_state_ids,");
1642
1643        if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1644            add_line!(self, ".name = \"{}\",", self.language_name);
1645
1646            if self.reserved_word_sets.len() > 1 {
1647                add_line!(self, ".reserved_words = &ts_reserved_words[0][0],");
1648            }
1649
1650            add_line!(
1651                self,
1652                ".max_reserved_word_set_size = {},",
1653                self.reserved_word_sets
1654                    .iter()
1655                    .map(TokenSet::len)
1656                    .max()
1657                    .unwrap()
1658            );
1659
1660            let Some(metadata) = &self.metadata else {
1661                panic!(
1662                    indoc! {"
1663                        Metadata is required to generate ABI version {}.
1664                        This means that your grammar doesn't have a tree-sitter.json config file with an appropriate version field in the metadata table.
1665                    "},
1666                    self.abi_version
1667                );
1668            };
1669
1670            add_line!(self, ".metadata = {{");
1671            indent!(self);
1672            add_line!(self, ".major_version = {},", metadata.major_version);
1673            add_line!(self, ".minor_version = {},", metadata.minor_version);
1674            add_line!(self, ".patch_version = {},", metadata.patch_version);
1675            dedent!(self);
1676            add_line!(self, "}},");
1677        }
1678
1679        dedent!(self);
1680        add_line!(self, "}};");
1681        add_line!(self, "return &language;");
1682        dedent!(self);
1683        add_line!(self, "}}");
1684        add_line!(self, "#ifdef __cplusplus");
1685        add_line!(self, "}}");
1686        add_line!(self, "#endif");
1687    }
1688
1689    fn get_parse_action_list_id(
1690        &self,
1691        entry: &ParseTableEntry,
1692        parse_table_entries: &mut HashMap<ParseTableEntry, usize>,
1693        next_parse_action_list_index: &mut usize,
1694    ) -> usize {
1695        if let Some(&index) = parse_table_entries.get(entry) {
1696            index
1697        } else {
1698            let result = *next_parse_action_list_index;
1699            parse_table_entries.insert(entry.clone(), result);
1700            *next_parse_action_list_index += 1 + entry.actions.len();
1701            result
1702        }
1703    }
1704
1705    fn get_field_map_id(
1706        &self,
1707        flat_field_map: Vec<(String, FieldLocation)>,
1708        flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>,
1709        next_flat_field_map_index: &mut usize,
1710    ) -> usize {
1711        if let Some((index, _)) = flat_field_maps.iter().find(|(_, e)| *e == *flat_field_map) {
1712            return *index;
1713        }
1714
1715        let result = *next_flat_field_map_index;
1716        *next_flat_field_map_index += flat_field_map.len();
1717        flat_field_maps.push((result, flat_field_map));
1718        result
1719    }
1720
1721    fn external_token_id(&self, token: &ExternalToken) -> String {
1722        format!(
1723            "ts_external_token_{}",
1724            self.sanitize_identifier(&token.name)
1725        )
1726    }
1727
1728    fn assign_symbol_id(&mut self, symbol: Symbol, used_identifiers: &mut HashSet<String>) {
1729        let mut id;
1730        if symbol == Symbol::end() {
1731            id = "ts_builtin_sym_end".to_string();
1732        } else {
1733            let (name, kind) = self.metadata_for_symbol(symbol);
1734            id = match kind {
1735                VariableType::Auxiliary => format!("aux_sym_{}", self.sanitize_identifier(name)),
1736                VariableType::Anonymous => format!("anon_sym_{}", self.sanitize_identifier(name)),
1737                VariableType::Hidden | VariableType::Named => {
1738                    format!("sym_{}", self.sanitize_identifier(name))
1739                }
1740            };
1741
1742            let mut suffix_number = 1;
1743            let mut suffix = String::new();
1744            while used_identifiers.contains(&id) {
1745                id.drain(id.len() - suffix.len()..);
1746                suffix_number += 1;
1747                suffix = suffix_number.to_string();
1748                id += &suffix;
1749            }
1750        }
1751
1752        used_identifiers.insert(id.clone());
1753        self.symbol_ids.insert(symbol, id);
1754    }
1755
1756    fn field_id(&self, field_name: &str) -> String {
1757        format!("field_{field_name}")
1758    }
1759
1760    fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) {
1761        match symbol.kind {
1762            SymbolType::End | SymbolType::EndOfNonTerminalExtra => ("end", VariableType::Hidden),
1763            SymbolType::NonTerminal => {
1764                let variable = &self.syntax_grammar.variables[symbol.index];
1765                (&variable.name, variable.kind)
1766            }
1767            SymbolType::Terminal => {
1768                let variable = &self.lexical_grammar.variables[symbol.index];
1769                (&variable.name, variable.kind)
1770            }
1771            SymbolType::External => {
1772                let token = &self.syntax_grammar.external_tokens[symbol.index];
1773                (&token.name, token.kind)
1774            }
1775        }
1776    }
1777
1778    fn symbols_for_alias(&self, alias: &Alias) -> Vec<Symbol> {
1779        self.parse_table
1780            .symbols
1781            .iter()
1782            .copied()
1783            .filter(move |symbol| {
1784                self.default_aliases.get(symbol).map_or_else(
1785                    || {
1786                        let (name, kind) = self.metadata_for_symbol(*symbol);
1787                        name == alias.value && kind == alias.kind()
1788                    },
1789                    |default_alias| default_alias == alias,
1790                )
1791            })
1792            .collect()
1793    }
1794
1795    fn sanitize_identifier(&self, name: &str) -> String {
1796        let mut result = String::with_capacity(name.len());
1797        for c in name.chars() {
1798            if c.is_ascii_alphanumeric() || c == '_' {
1799                result.push(c);
1800            } else {
1801                'special_chars: {
1802                    let replacement = match c {
1803                        ' ' if name.len() == 1 => "SPACE",
1804                        '~' => "TILDE",
1805                        '`' => "BQUOTE",
1806                        '!' => "BANG",
1807                        '@' => "AT",
1808                        '#' => "POUND",
1809                        '$' => "DOLLAR",
1810                        '%' => "PERCENT",
1811                        '^' => "CARET",
1812                        '&' => "AMP",
1813                        '*' => "STAR",
1814                        '(' => "LPAREN",
1815                        ')' => "RPAREN",
1816                        '-' => "DASH",
1817                        '+' => "PLUS",
1818                        '=' => "EQ",
1819                        '{' => "LBRACE",
1820                        '}' => "RBRACE",
1821                        '[' => "LBRACK",
1822                        ']' => "RBRACK",
1823                        '\\' => "BSLASH",
1824                        '|' => "PIPE",
1825                        ':' => "COLON",
1826                        ';' => "SEMI",
1827                        '"' => "DQUOTE",
1828                        '\'' => "SQUOTE",
1829                        '<' => "LT",
1830                        '>' => "GT",
1831                        ',' => "COMMA",
1832                        '.' => "DOT",
1833                        '?' => "QMARK",
1834                        '/' => "SLASH",
1835                        '\n' => "LF",
1836                        '\r' => "CR",
1837                        '\t' => "TAB",
1838                        '\0' => "NULL",
1839                        '\u{0001}' => "SOH",
1840                        '\u{0002}' => "STX",
1841                        '\u{0003}' => "ETX",
1842                        '\u{0004}' => "EOT",
1843                        '\u{0005}' => "ENQ",
1844                        '\u{0006}' => "ACK",
1845                        '\u{0007}' => "BEL",
1846                        '\u{0008}' => "BS",
1847                        '\u{000b}' => "VTAB",
1848                        '\u{000c}' => "FF",
1849                        '\u{000e}' => "SO",
1850                        '\u{000f}' => "SI",
1851                        '\u{0010}' => "DLE",
1852                        '\u{0011}' => "DC1",
1853                        '\u{0012}' => "DC2",
1854                        '\u{0013}' => "DC3",
1855                        '\u{0014}' => "DC4",
1856                        '\u{0015}' => "NAK",
1857                        '\u{0016}' => "SYN",
1858                        '\u{0017}' => "ETB",
1859                        '\u{0018}' => "CAN",
1860                        '\u{0019}' => "EM",
1861                        '\u{001a}' => "SUB",
1862                        '\u{001b}' => "ESC",
1863                        '\u{001c}' => "FS",
1864                        '\u{001d}' => "GS",
1865                        '\u{001e}' => "RS",
1866                        '\u{001f}' => "US",
1867                        '\u{007F}' => "DEL",
1868                        '\u{FEFF}' => "BOM",
1869                        '\u{0080}'..='\u{FFFF}' => {
1870                            write!(result, "u{:04x}", c as u32).unwrap();
1871                            break 'special_chars;
1872                        }
1873                        '\u{10000}'..='\u{10FFFF}' => {
1874                            write!(result, "U{:08x}", c as u32).unwrap();
1875                            break 'special_chars;
1876                        }
1877                        '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(),
1878                        ' ' => break 'special_chars,
1879                    };
1880                    if !result.is_empty() && !result.ends_with('_') {
1881                        result.push('_');
1882                    }
1883                    result += replacement;
1884                }
1885            }
1886        }
1887        result
1888    }
1889
1890    fn sanitize_string(&self, name: &str) -> String {
1891        let mut result = String::with_capacity(name.len());
1892        for c in name.chars() {
1893            match c {
1894                '\"' => result += "\\\"",
1895                '?' => result += "\\?",
1896                '\\' => result += "\\\\",
1897                '\u{0007}' => result += "\\a",
1898                '\u{0008}' => result += "\\b",
1899                '\u{000b}' => result += "\\v",
1900                '\u{000c}' => result += "\\f",
1901                '\n' => result += "\\n",
1902                '\r' => result += "\\r",
1903                '\t' => result += "\\t",
1904                '\0' => result += "\\0",
1905                '\u{0001}'..='\u{001f}' => write!(result, "\\x{:02x}", c as u32).unwrap(),
1906                '\u{007F}'..='\u{FFFF}' => write!(result, "\\u{:04x}", c as u32).unwrap(),
1907                '\u{10000}'..='\u{10FFFF}' => write!(result, "\\U{:08x}", c as u32).unwrap(),
1908                _ => result.push(c),
1909            }
1910        }
1911        result
1912    }
1913
1914    fn add_character(&mut self, c: char) {
1915        match c {
1916            '\'' => add!(self, "'\\''"),
1917            '\\' => add!(self, "'\\\\'"),
1918            '\u{000c}' => add!(self, "'\\f'"),
1919            '\n' => add!(self, "'\\n'"),
1920            '\t' => add!(self, "'\\t'"),
1921            '\r' => add!(self, "'\\r'"),
1922            _ => {
1923                if c == '\0' {
1924                    add!(self, "0");
1925                } else if c == ' ' || c.is_ascii_graphic() {
1926                    add!(self, "'{c}'");
1927                } else {
1928                    add!(self, "0x{:02x}", c as u32);
1929                }
1930            }
1931        }
1932    }
1933}
1934
1935/// Returns a String of C code for the given components of a parser.
1936///
1937/// # Arguments
1938///
1939/// * `name` - A string slice containing the name of the language
1940/// * `parse_table` - The generated parse table for the language
1941/// * `main_lex_table` - The generated lexing table for the language
1942/// * `keyword_lex_table` - The generated keyword lexing table for the language
1943/// * `keyword_capture_token` - A symbol indicating which token is used for keyword capture, if any.
1944/// * `syntax_grammar` - The syntax grammar extracted from the language's grammar
1945/// * `lexical_grammar` - The lexical grammar extracted from the language's grammar
1946/// * `default_aliases` - A map describing the global rename rules that should apply. the keys are
1947///   symbols that are *always* aliased in the same way, and the values are the aliases that are
1948///   applied to those symbols.
1949/// * `abi_version` - The language ABI version that should be generated. Usually you want
1950///   Tree-sitter's current version, but right after making an ABI change, it may be useful to
1951///   generate code with the previous ABI.
1952#[allow(clippy::too_many_arguments)]
1953pub fn render_c_code(
1954    name: &str,
1955    tables: Tables,
1956    syntax_grammar: SyntaxGrammar,
1957    lexical_grammar: LexicalGrammar,
1958    default_aliases: AliasMap,
1959    abi_version: usize,
1960    semantic_version: Option<(u8, u8, u8)>,
1961    supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
1962) -> RenderResult<String> {
1963    if !(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version) {
1964        Err(RenderError::ABI(abi_version))?;
1965    }
1966
1967    Generator {
1968        language_name: name.to_string(),
1969        parse_table: tables.parse_table,
1970        main_lex_table: tables.main_lex_table,
1971        keyword_lex_table: tables.keyword_lex_table,
1972        large_character_sets: tables.large_character_sets,
1973        large_character_set_info: Vec::new(),
1974        syntax_grammar,
1975        lexical_grammar,
1976        default_aliases,
1977        abi_version,
1978        metadata: semantic_version.map(|(major_version, minor_version, patch_version)| Metadata {
1979            major_version,
1980            minor_version,
1981            patch_version,
1982        }),
1983        supertype_symbol_map,
1984        ..Default::default()
1985    }
1986    .generate()
1987}