1use std::{
2 cmp,
3 collections::{BTreeMap, BTreeSet, HashMap, HashSet},
4 fmt::Write,
5 mem::swap,
6};
7
8use crate::LANGUAGE_VERSION;
9use indoc::indoc;
10use serde::Serialize;
11use thiserror::Error;
12
13use super::{
14 build_tables::Tables,
15 grammars::{ExternalToken, LexicalGrammar, SyntaxGrammar, VariableType},
16 nfa::CharacterSet,
17 node_types::ChildType,
18 rules::{Alias, AliasMap, Symbol, SymbolType, TokenSet},
19 tables::{
20 AdvanceAction, FieldLocation, GotoAction, LexState, LexTable, ParseAction, ParseTable,
21 ParseTableEntry,
22 },
23};
24
25const SMALL_STATE_THRESHOLD: usize = 64;
26pub const ABI_VERSION_MIN: usize = 14;
27pub const ABI_VERSION_MAX: usize = LANGUAGE_VERSION;
28const ABI_VERSION_WITH_RESERVED_WORDS: usize = 15;
29
30pub type RenderResult<T> = Result<T, RenderError>;
31
32#[derive(Debug, Error, Serialize)]
33pub enum RenderError {
34 #[error("Parse table action count {0} exceeds maximum value of {max}", max=u16::MAX)]
35 ParseTable(usize),
36 #[error("This version of Tree-sitter can only generate parsers with ABI version {ABI_VERSION_MIN} - {ABI_VERSION_MAX}, not {0}")]
37 ABI(usize),
38}
39
40#[clippy::format_args]
41macro_rules! add {
42 ($this: tt, $($arg: tt)*) => {{
43 $this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
44 }}
45}
46
47macro_rules! add_whitespace {
48 ($this:tt) => {{
49 $this.buffer.reserve(4 * 2 * $this.indent_level);
51 for _ in 0..$this.indent_level {
52 write!(&mut $this.buffer, " ").unwrap();
53 }
54 }};
55}
56
57#[clippy::format_args]
58macro_rules! add_line {
59 ($this: tt, $($arg: tt)*) => {
60 add_whitespace!($this);
61 $this.buffer.write_fmt(format_args!($($arg)*)).unwrap();
62 $this.buffer += "\n";
63 }
64}
65
66macro_rules! indent {
67 ($this:tt) => {
68 $this.indent_level += 1;
69 };
70}
71
72macro_rules! dedent {
73 ($this:tt) => {
74 assert_ne!($this.indent_level, 0);
75 $this.indent_level -= 1;
76 };
77}
78
79#[derive(Default)]
80struct Generator {
81 buffer: String,
82 indent_level: usize,
83 language_name: String,
84 parse_table: ParseTable,
85 main_lex_table: LexTable,
86 keyword_lex_table: LexTable,
87 large_character_sets: Vec<(Option<Symbol>, CharacterSet)>,
88 large_character_set_info: Vec<LargeCharacterSetInfo>,
89 large_state_count: usize,
90 syntax_grammar: SyntaxGrammar,
91 lexical_grammar: LexicalGrammar,
92 default_aliases: AliasMap,
93 symbol_order: HashMap<Symbol, usize>,
94 symbol_ids: HashMap<Symbol, String>,
95 alias_ids: HashMap<Alias, String>,
96 unique_aliases: Vec<Alias>,
97 symbol_map: HashMap<Symbol, Symbol>,
98 reserved_word_sets: Vec<TokenSet>,
99 reserved_word_set_ids_by_parse_state: Vec<usize>,
100 field_names: Vec<String>,
101 supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
102 supertype_map: BTreeMap<String, Vec<ChildType>>,
103 abi_version: usize,
104 metadata: Option<Metadata>,
105}
106
107struct LargeCharacterSetInfo {
108 constant_name: String,
109 is_used: bool,
110}
111
112struct Metadata {
113 major_version: u8,
114 minor_version: u8,
115 patch_version: u8,
116}
117
118impl Generator {
119 fn generate(mut self) -> RenderResult<String> {
120 self.init();
121 self.add_header();
122 self.add_includes();
123 self.add_pragmas();
124 self.add_stats();
125 self.add_symbol_enum();
126 self.add_symbol_names_list();
127 self.add_unique_symbol_map();
128 self.add_symbol_metadata_list();
129
130 if !self.field_names.is_empty() {
131 self.add_field_name_enum();
132 self.add_field_name_names_list();
133 self.add_field_sequences();
134 }
135
136 if !self.parse_table.production_infos.is_empty() {
137 self.add_alias_sequences();
138 }
139
140 self.add_non_terminal_alias_map();
141 self.add_primary_state_id_list();
142
143 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS && !self.supertype_map.is_empty() {
144 self.add_supertype_map();
145 }
146
147 let buffer_offset_before_lex_functions = self.buffer.len();
148
149 let mut main_lex_table = LexTable::default();
150 swap(&mut main_lex_table, &mut self.main_lex_table);
151 self.add_lex_function("ts_lex", main_lex_table);
152
153 if self.syntax_grammar.word_token.is_some() {
154 let mut keyword_lex_table = LexTable::default();
155 swap(&mut keyword_lex_table, &mut self.keyword_lex_table);
156 self.add_lex_function("ts_lex_keywords", keyword_lex_table);
157 }
158
159 let lex_functions = self.buffer[buffer_offset_before_lex_functions..].to_string();
163 self.buffer.truncate(buffer_offset_before_lex_functions);
164 for ix in 0..self.large_character_sets.len() {
165 self.add_character_set(ix);
166 }
167 self.buffer.push_str(&lex_functions);
168
169 self.add_lex_modes();
170
171 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS && self.reserved_word_sets.len() > 1
172 {
173 self.add_reserved_word_sets();
174 }
175
176 self.add_parse_table()?;
177
178 if !self.syntax_grammar.external_tokens.is_empty() {
179 self.add_external_token_enum();
180 self.add_external_scanner_symbol_map();
181 self.add_external_scanner_states_list();
182 }
183
184 self.add_parser_export();
185
186 Ok(self.buffer)
187 }
188
189 fn init(&mut self) {
190 let mut symbol_identifiers = HashSet::new();
191 for i in 0..self.parse_table.symbols.len() {
192 self.assign_symbol_id(self.parse_table.symbols[i], &mut symbol_identifiers);
193 }
194 self.symbol_ids.insert(
195 Symbol::end_of_nonterminal_extra(),
196 self.symbol_ids[&Symbol::end()].clone(),
197 );
198
199 self.symbol_map = HashMap::new();
200
201 for symbol in &self.parse_table.symbols {
202 let mut mapping = symbol;
203
204 if let Some(alias) = self.default_aliases.get(symbol) {
210 let kind = alias.kind();
211 for other_symbol in &self.parse_table.symbols {
212 if let Some(other_alias) = self.default_aliases.get(other_symbol) {
213 if other_symbol < mapping && other_alias == alias {
214 mapping = other_symbol;
215 }
216 } else if self.metadata_for_symbol(*other_symbol) == (&alias.value, kind) {
217 mapping = other_symbol;
218 break;
219 }
220 }
221 }
222 else if symbol.is_terminal() {
227 let metadata = self.metadata_for_symbol(*symbol);
228 for other_symbol in &self.parse_table.symbols {
229 let other_metadata = self.metadata_for_symbol(*other_symbol);
230 if other_metadata == metadata {
231 if let Some(mapped) = self.symbol_map.get(other_symbol) {
232 if mapped == symbol {
233 break;
234 }
235 }
236 mapping = other_symbol;
237 break;
238 }
239 }
240 }
241
242 self.symbol_map.insert(*symbol, *mapping);
243 }
244
245 for production_info in &self.parse_table.production_infos {
246 for field_name in production_info.field_map.keys() {
248 if let Err(i) = self.field_names.binary_search(field_name) {
249 self.field_names.insert(i, field_name.clone());
250 }
251 }
252
253 for alias in &production_info.alias_sequence {
254 if let Some(alias) = &alias {
256 let alias_id =
258 if let Some(existing_symbol) = self.symbols_for_alias(alias).first() {
259 self.symbol_ids[&self.symbol_map[existing_symbol]].clone()
260 }
261 else {
264 if let Err(i) = self.unique_aliases.binary_search(alias) {
265 self.unique_aliases.insert(i, alias.clone());
266 }
267
268 if alias.is_named {
269 format!("alias_sym_{}", self.sanitize_identifier(&alias.value))
270 } else {
271 format!("anon_alias_sym_{}", self.sanitize_identifier(&alias.value))
272 }
273 };
274
275 self.alias_ids.entry(alias.clone()).or_insert(alias_id);
276 }
277 }
278 }
279
280 for (ix, (symbol, _)) in self.large_character_sets.iter().enumerate() {
281 let count = self.large_character_sets[0..ix]
282 .iter()
283 .filter(|(sym, _)| sym == symbol)
284 .count()
285 + 1;
286 let constant_name = if let Some(symbol) = symbol {
287 format!("{}_character_set_{}", self.symbol_ids[symbol], count)
288 } else {
289 format!("extras_character_set_{count}")
290 };
291 self.large_character_set_info.push(LargeCharacterSetInfo {
292 constant_name,
293 is_used: false,
294 });
295 }
296
297 self.reserved_word_sets.push(TokenSet::new());
299 for state in &self.parse_table.states {
300 let id = if let Some(ix) = self
301 .reserved_word_sets
302 .iter()
303 .position(|set| *set == state.reserved_words)
304 {
305 ix
306 } else {
307 self.reserved_word_sets.push(state.reserved_words.clone());
308 self.reserved_word_sets.len() - 1
309 };
310 self.reserved_word_set_ids_by_parse_state.push(id);
311 }
312
313 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
314 for (supertype, subtypes) in &self.supertype_symbol_map {
315 if let Some(supertype) = self.symbol_ids.get(supertype) {
316 self.supertype_map
317 .entry(supertype.clone())
318 .or_insert_with(|| subtypes.clone());
319 }
320 }
321
322 self.supertype_symbol_map.clear();
323 }
324
325 let threshold = cmp::min(SMALL_STATE_THRESHOLD, self.parse_table.symbols.len() / 2);
328 self.large_state_count = self
329 .parse_table
330 .states
331 .iter()
332 .enumerate()
333 .take_while(|(i, s)| {
334 *i <= 1 || s.terminal_entries.len() + s.nonterminal_entries.len() > threshold
335 })
336 .count();
337 }
338
339 fn add_header(&mut self) {
340 add_line!(self, "/* Automatically @generated by tree-sitter */",);
341 add_line!(self, "");
342 }
343
344 fn add_includes(&mut self) {
345 add_line!(self, "#include \"tree_sitter/parser.h\"");
346 add_line!(self, "");
347 }
348
349 fn add_pragmas(&mut self) {
350 add_line!(self, "#if defined(__GNUC__) || defined(__clang__)");
351 add_line!(
352 self,
353 "#pragma GCC diagnostic ignored \"-Wmissing-field-initializers\""
354 );
355 add_line!(self, "#endif");
356 add_line!(self, "");
357
358 if self.main_lex_table.states.len() > 300 {
362 add_line!(self, "#ifdef _MSC_VER");
363 add_line!(self, "#pragma optimize(\"\", off)");
364 add_line!(self, "#elif defined(__clang__)");
365 add_line!(self, "#pragma clang optimize off");
366 add_line!(self, "#elif defined(__GNUC__)");
367 add_line!(self, "#pragma GCC optimize (\"O0\")");
368 add_line!(self, "#endif");
369 add_line!(self, "");
370 }
371 }
372
373 fn add_stats(&mut self) {
374 let token_count = self
375 .parse_table
376 .symbols
377 .iter()
378 .filter(|symbol| {
379 if symbol.is_terminal() || symbol.is_eof() {
380 true
381 } else if symbol.is_external() {
382 self.syntax_grammar.external_tokens[symbol.index]
383 .corresponding_internal_token
384 .is_none()
385 } else {
386 false
387 }
388 })
389 .count();
390
391 add_line!(self, "#define LANGUAGE_VERSION {}", self.abi_version);
392 add_line!(
393 self,
394 "#define STATE_COUNT {}",
395 self.parse_table.states.len()
396 );
397 add_line!(self, "#define LARGE_STATE_COUNT {}", self.large_state_count);
398
399 add_line!(
400 self,
401 "#define SYMBOL_COUNT {}",
402 self.parse_table.symbols.len()
403 );
404 add_line!(self, "#define ALIAS_COUNT {}", self.unique_aliases.len());
405 add_line!(self, "#define TOKEN_COUNT {token_count}");
406 add_line!(
407 self,
408 "#define EXTERNAL_TOKEN_COUNT {}",
409 self.syntax_grammar.external_tokens.len()
410 );
411 add_line!(self, "#define FIELD_COUNT {}", self.field_names.len());
412 add_line!(
413 self,
414 "#define MAX_ALIAS_SEQUENCE_LENGTH {}",
415 self.parse_table.max_aliased_production_length
416 );
417 add_line!(
418 self,
419 "#define MAX_RESERVED_WORD_SET_SIZE {}",
420 self.reserved_word_sets
421 .iter()
422 .map(TokenSet::len)
423 .max()
424 .unwrap()
425 );
426
427 add_line!(
428 self,
429 "#define PRODUCTION_ID_COUNT {}",
430 self.parse_table.production_infos.len()
431 );
432 add_line!(self, "#define SUPERTYPE_COUNT {}", self.supertype_map.len());
433 add_line!(self, "");
434 }
435
436 fn add_symbol_enum(&mut self) {
437 add_line!(self, "enum ts_symbol_identifiers {{");
438 indent!(self);
439 self.symbol_order.insert(Symbol::end(), 0);
440 let mut i = 1;
441 for symbol in &self.parse_table.symbols {
442 if *symbol != Symbol::end() {
443 self.symbol_order.insert(*symbol, i);
444 add_line!(self, "{} = {i},", self.symbol_ids[symbol]);
445 i += 1;
446 }
447 }
448 for alias in &self.unique_aliases {
449 add_line!(self, "{} = {i},", self.alias_ids[alias]);
450 i += 1;
451 }
452 dedent!(self);
453 add_line!(self, "}};");
454 add_line!(self, "");
455 }
456
457 fn add_symbol_names_list(&mut self) {
458 add_line!(self, "static const char * const ts_symbol_names[] = {{");
459 indent!(self);
460 for symbol in &self.parse_table.symbols {
461 let name = self.sanitize_string(
462 self.default_aliases
463 .get(symbol)
464 .map_or(self.metadata_for_symbol(*symbol).0, |alias| {
465 alias.value.as_str()
466 }),
467 );
468 add_line!(self, "[{}] = \"{name}\",", self.symbol_ids[symbol]);
469 }
470 for alias in &self.unique_aliases {
471 add_line!(
472 self,
473 "[{}] = \"{}\",",
474 self.alias_ids[alias],
475 self.sanitize_string(&alias.value)
476 );
477 }
478 dedent!(self);
479 add_line!(self, "}};");
480 add_line!(self, "");
481 }
482
483 fn add_unique_symbol_map(&mut self) {
484 add_line!(self, "static const TSSymbol ts_symbol_map[] = {{");
485 indent!(self);
486 for symbol in &self.parse_table.symbols {
487 add_line!(
488 self,
489 "[{}] = {},",
490 self.symbol_ids[symbol],
491 self.symbol_ids[&self.symbol_map[symbol]],
492 );
493 }
494
495 for alias in &self.unique_aliases {
496 add_line!(
497 self,
498 "[{}] = {},",
499 self.alias_ids[alias],
500 self.alias_ids[alias],
501 );
502 }
503
504 dedent!(self);
505 add_line!(self, "}};");
506 add_line!(self, "");
507 }
508
509 fn add_field_name_enum(&mut self) {
510 add_line!(self, "enum ts_field_identifiers {{");
511 indent!(self);
512 for (i, field_name) in self.field_names.iter().enumerate() {
513 add_line!(self, "{} = {},", self.field_id(field_name), i + 1);
514 }
515 dedent!(self);
516 add_line!(self, "}};");
517 add_line!(self, "");
518 }
519
520 fn add_field_name_names_list(&mut self) {
521 add_line!(self, "static const char * const ts_field_names[] = {{");
522 indent!(self);
523 add_line!(self, "[0] = NULL,");
524 for field_name in &self.field_names {
525 add_line!(self, "[{}] = \"{field_name}\",", self.field_id(field_name));
526 }
527 dedent!(self);
528 add_line!(self, "}};");
529 add_line!(self, "");
530 }
531
532 fn add_symbol_metadata_list(&mut self) {
533 add_line!(
534 self,
535 "static const TSSymbolMetadata ts_symbol_metadata[] = {{"
536 );
537 indent!(self);
538 for symbol in &self.parse_table.symbols {
539 add_line!(self, "[{}] = {{", self.symbol_ids[symbol]);
540 indent!(self);
541 if let Some(Alias { is_named, .. }) = self.default_aliases.get(symbol) {
542 add_line!(self, ".visible = true,");
543 add_line!(self, ".named = {is_named},");
544 } else {
545 match self.metadata_for_symbol(*symbol).1 {
546 VariableType::Named => {
547 add_line!(self, ".visible = true,");
548 add_line!(self, ".named = true,");
549 }
550 VariableType::Anonymous => {
551 add_line!(self, ".visible = true,");
552 add_line!(self, ".named = false,");
553 }
554 VariableType::Hidden => {
555 add_line!(self, ".visible = false,");
556 add_line!(self, ".named = true,");
557 if self.syntax_grammar.supertype_symbols.contains(symbol) {
558 add_line!(self, ".supertype = true,");
559 }
560 }
561 VariableType::Auxiliary => {
562 add_line!(self, ".visible = false,");
563 add_line!(self, ".named = false,");
564 }
565 }
566 }
567 dedent!(self);
568 add_line!(self, "}},");
569 }
570 for alias in &self.unique_aliases {
571 add_line!(self, "[{}] = {{", self.alias_ids[alias]);
572 indent!(self);
573 add_line!(self, ".visible = true,");
574 add_line!(self, ".named = {},", alias.is_named);
575 dedent!(self);
576 add_line!(self, "}},");
577 }
578 dedent!(self);
579 add_line!(self, "}};");
580 add_line!(self, "");
581 }
582
583 fn add_alias_sequences(&mut self) {
584 add_line!(
585 self,
586 "static const TSSymbol ts_alias_sequences[PRODUCTION_ID_COUNT][MAX_ALIAS_SEQUENCE_LENGTH] = {{",
587 );
588 indent!(self);
589 for (i, production_info) in self.parse_table.production_infos.iter().enumerate() {
590 if production_info.alias_sequence.is_empty() {
591 if i == 0 {
594 add_line!(self, "[0] = {{0}},");
595 }
596 continue;
597 }
598
599 add_line!(self, "[{i}] = {{");
600 indent!(self);
601 for (j, alias) in production_info.alias_sequence.iter().enumerate() {
602 if let Some(alias) = alias {
603 add_line!(self, "[{j}] = {},", self.alias_ids[alias]);
604 }
605 }
606 dedent!(self);
607 add_line!(self, "}},");
608 }
609 dedent!(self);
610 add_line!(self, "}};");
611 add_line!(self, "");
612 }
613
614 fn add_non_terminal_alias_map(&mut self) {
615 let mut alias_ids_by_symbol = HashMap::new();
616 for variable in &self.syntax_grammar.variables {
617 for production in &variable.productions {
618 for step in &production.steps {
619 if let Some(alias) = &step.alias {
620 if step.symbol.is_non_terminal()
621 && Some(alias) != self.default_aliases.get(&step.symbol)
622 && self.symbol_ids.contains_key(&step.symbol)
623 {
624 if let Some(alias_id) = self.alias_ids.get(alias) {
625 let alias_ids =
626 alias_ids_by_symbol.entry(step.symbol).or_insert(Vec::new());
627 if let Err(i) = alias_ids.binary_search(&alias_id) {
628 alias_ids.insert(i, alias_id);
629 }
630 }
631 }
632 }
633 }
634 }
635 }
636
637 let mut alias_ids_by_symbol = alias_ids_by_symbol.iter().collect::<Vec<_>>();
638 alias_ids_by_symbol.sort_unstable_by_key(|e| e.0);
639
640 add_line!(
641 self,
642 "static const uint16_t ts_non_terminal_alias_map[] = {{"
643 );
644 indent!(self);
645 for (symbol, alias_ids) in alias_ids_by_symbol {
646 let symbol_id = &self.symbol_ids[symbol];
647 let public_symbol_id = &self.symbol_ids[&self.symbol_map[symbol]];
648 add_line!(self, "{symbol_id}, {},", 1 + alias_ids.len());
649 indent!(self);
650 add_line!(self, "{public_symbol_id},");
651 for alias_id in alias_ids {
652 add_line!(self, "{alias_id},");
653 }
654 dedent!(self);
655 }
656 add_line!(self, "0,");
657 dedent!(self);
658 add_line!(self, "}};");
659 add_line!(self, "");
660 }
661
662 fn add_primary_state_id_list(&mut self) {
668 add_line!(
669 self,
670 "static const TSStateId ts_primary_state_ids[STATE_COUNT] = {{"
671 );
672 indent!(self);
673 let mut first_state_for_each_core_id = HashMap::new();
674 for (idx, state) in self.parse_table.states.iter().enumerate() {
675 let primary_state = first_state_for_each_core_id
676 .entry(state.core_id)
677 .or_insert(idx);
678 add_line!(self, "[{idx}] = {primary_state},");
679 }
680 dedent!(self);
681 add_line!(self, "}};");
682 add_line!(self, "");
683 }
684
685 fn add_field_sequences(&mut self) {
686 let mut flat_field_maps = vec![];
687 let mut next_flat_field_map_index = 0;
688 self.get_field_map_id(
689 Vec::new(),
690 &mut flat_field_maps,
691 &mut next_flat_field_map_index,
692 );
693
694 let mut field_map_ids = Vec::with_capacity(self.parse_table.production_infos.len());
695 for production_info in &self.parse_table.production_infos {
696 if production_info.field_map.is_empty() {
697 field_map_ids.push((0, 0));
698 } else {
699 let mut flat_field_map = Vec::with_capacity(production_info.field_map.len());
700 for (field_name, locations) in &production_info.field_map {
701 for location in locations {
702 flat_field_map.push((field_name.clone(), *location));
703 }
704 }
705 let field_map_len = flat_field_map.len();
706 field_map_ids.push((
707 self.get_field_map_id(
708 flat_field_map,
709 &mut flat_field_maps,
710 &mut next_flat_field_map_index,
711 ),
712 field_map_len,
713 ));
714 }
715 }
716
717 add_line!(
718 self,
719 "static const TSMapSlice ts_field_map_slices[PRODUCTION_ID_COUNT] = {{",
720 );
721 indent!(self);
722 for (production_id, (row_id, length)) in field_map_ids.into_iter().enumerate() {
723 if length > 0 {
724 add_line!(
725 self,
726 "[{production_id}] = {{.index = {row_id}, .length = {length}}},",
727 );
728 }
729 }
730 dedent!(self);
731 add_line!(self, "}};");
732 add_line!(self, "");
733
734 add_line!(
735 self,
736 "static const TSFieldMapEntry ts_field_map_entries[] = {{",
737 );
738 indent!(self);
739 for (row_index, field_pairs) in flat_field_maps.into_iter().skip(1) {
740 add_line!(self, "[{row_index}] =");
741 indent!(self);
742 for (field_name, location) in field_pairs {
743 add_whitespace!(self);
744 add!(self, "{{{}, {}", self.field_id(&field_name), location.index);
745 if location.inherited {
746 add!(self, ", .inherited = true");
747 }
748 add!(self, "}},\n");
749 }
750 dedent!(self);
751 }
752
753 dedent!(self);
754 add_line!(self, "}};");
755 add_line!(self, "");
756 }
757
758 fn add_supertype_map(&mut self) {
759 add_line!(
760 self,
761 "static const TSSymbol ts_supertype_symbols[SUPERTYPE_COUNT] = {{"
762 );
763 indent!(self);
764 for supertype in self.supertype_map.keys() {
765 add_line!(self, "{supertype},");
766 }
767 dedent!(self);
768 add_line!(self, "}};\n");
769
770 add_line!(
771 self,
772 "static const TSMapSlice ts_supertype_map_slices[] = {{",
773 );
774 indent!(self);
775 let mut row_id = 0;
776 let mut supertype_ids = vec![0];
777 let mut supertype_string_map = BTreeMap::new();
778 for (supertype, subtypes) in &self.supertype_map {
779 supertype_string_map.insert(
780 supertype,
781 subtypes
782 .iter()
783 .flat_map(|s| match s {
784 ChildType::Normal(symbol) => vec![self.symbol_ids.get(symbol).cloned()],
785 ChildType::Aliased(alias) => {
786 self.alias_ids.get(alias).cloned().map_or_else(
787 || {
788 self.symbols_for_alias(alias)
789 .into_iter()
790 .map(|s| self.symbol_ids.get(&s).cloned())
791 .collect()
792 },
793 |a| vec![Some(a)],
794 )
795 }
796 })
797 .flatten()
798 .collect::<BTreeSet<String>>(),
799 );
800 }
801 for (supertype, subtypes) in &supertype_string_map {
802 let length = subtypes.len();
803 add_line!(
804 self,
805 "[{supertype}] = {{.index = {row_id}, .length = {length}}},",
806 );
807 row_id += length;
808 supertype_ids.push(row_id);
809 }
810 dedent!(self);
811 add_line!(self, "}};");
812 add_line!(self, "");
813
814 add_line!(
815 self,
816 "static const TSSymbol ts_supertype_map_entries[] = {{",
817 );
818 indent!(self);
819 for (i, (_, subtypes)) in supertype_string_map.iter().enumerate() {
820 let row_index = supertype_ids[i];
821 add_line!(self, "[{row_index}] =");
822 indent!(self);
823 for subtype in subtypes {
824 add_whitespace!(self);
825 add!(self, "{subtype},\n");
826 }
827 dedent!(self);
828 }
829
830 dedent!(self);
831 add_line!(self, "}};");
832 add_line!(self, "");
833 }
834
835 fn add_lex_function(&mut self, name: &str, lex_table: LexTable) {
836 add_line!(
837 self,
838 "static bool {name}(TSLexer *lexer, TSStateId state) {{",
839 );
840 indent!(self);
841
842 add_line!(self, "START_LEXER();");
843 add_line!(self, "eof = lexer->eof(lexer);");
844 add_line!(self, "switch (state) {{");
845
846 indent!(self);
847 for (i, state) in lex_table.states.into_iter().enumerate() {
848 add_line!(self, "case {i}:");
849 indent!(self);
850 self.add_lex_state(i, state);
851 dedent!(self);
852 }
853
854 add_line!(self, "default:");
855 indent!(self);
856 add_line!(self, "return false;");
857 dedent!(self);
858
859 dedent!(self);
860 add_line!(self, "}}");
861
862 dedent!(self);
863 add_line!(self, "}}");
864 add_line!(self, "");
865 }
866
867 fn add_lex_state(&mut self, _state_ix: usize, state: LexState) {
868 if let Some(accept_action) = state.accept_action {
869 add_line!(self, "ACCEPT_TOKEN({});", self.symbol_ids[&accept_action]);
870 }
871
872 if let Some(eof_action) = state.eof_action {
873 add_line!(self, "if (eof) ADVANCE({});", eof_action.state);
874 }
875
876 let mut chars_copy = CharacterSet::empty();
877 let mut large_set = CharacterSet::empty();
878 let mut ruled_out_chars = CharacterSet::empty();
879
880 let mut leading_simple_transition_count = 0;
886 let mut leading_simple_transition_range_count = 0;
887 for (chars, action) in &state.advance_actions {
888 if action.in_main_token
889 && chars.ranges().all(|r| {
890 let start = *r.start() as u32;
891 let end = *r.end() as u32;
892 end <= start + 1 && u16::try_from(end).is_ok()
893 })
894 {
895 leading_simple_transition_count += 1;
896 leading_simple_transition_range_count += chars.range_count();
897 } else {
898 break;
899 }
900 }
901
902 if leading_simple_transition_range_count >= 8 {
903 add_line!(self, "ADVANCE_MAP(");
904 indent!(self);
905 for (chars, action) in &state.advance_actions[0..leading_simple_transition_count] {
906 for range in chars.ranges() {
907 add_whitespace!(self);
908 self.add_character(*range.start());
909 add!(self, ", {},\n", action.state);
910 if range.end() > range.start() {
911 add_whitespace!(self);
912 self.add_character(*range.end());
913 add!(self, ", {},\n", action.state);
914 }
915 }
916 ruled_out_chars = ruled_out_chars.add(chars);
917 }
918 dedent!(self);
919 add_line!(self, ");");
920 } else {
921 leading_simple_transition_count = 0;
922 }
923
924 for (chars, action) in &state.advance_actions[leading_simple_transition_count..] {
925 add_whitespace!(self);
926
927 let simplified_chars = chars.simplify_ignoring(&ruled_out_chars);
936
937 let mut best_large_char_set: Option<(usize, CharacterSet, CharacterSet)> = None;
943 if simplified_chars.range_count() >= super::build_tables::LARGE_CHARACTER_RANGE_COUNT {
944 for (ix, (_, set)) in self.large_character_sets.iter().enumerate() {
945 chars_copy.assign(&simplified_chars);
946 large_set.assign(set);
947 let intersection = chars_copy.remove_intersection(&mut large_set);
948 if !intersection.is_empty() {
949 let additions = chars_copy.simplify_ignoring(&ruled_out_chars);
950 let removals = large_set.simplify_ignoring(&ruled_out_chars);
951 let total_range_count = additions.range_count() + removals.range_count();
952 if total_range_count >= simplified_chars.range_count() {
953 continue;
954 }
955 if let Some((_, best_additions, best_removals)) = &best_large_char_set {
956 let best_range_count =
957 best_additions.range_count() + best_removals.range_count();
958 if best_range_count < total_range_count {
959 continue;
960 }
961 }
962 best_large_char_set = Some((ix, additions, removals));
963 }
964 }
965 }
966
967 ruled_out_chars = ruled_out_chars.add(chars);
970
971 let mut large_char_set_ix = None;
972 let mut asserted_chars = simplified_chars;
973 let mut negated_chars = CharacterSet::empty();
974 if let Some((char_set_ix, additions, removals)) = best_large_char_set {
975 asserted_chars = additions;
976 negated_chars = removals;
977 large_char_set_ix = Some(char_set_ix);
978 }
979
980 let line_break = format!("\n{}", " ".repeat(self.indent_level + 2));
981
982 let has_positive_condition = large_char_set_ix.is_some() || !asserted_chars.is_empty();
983 let has_negative_condition = !negated_chars.is_empty();
984 let has_condition = has_positive_condition || has_negative_condition;
985 if has_condition {
986 add!(self, "if (");
987 if has_positive_condition && has_negative_condition {
988 add!(self, "(");
989 }
990 }
991
992 if let Some(large_char_set_ix) = large_char_set_ix {
993 let large_set = &self.large_character_sets[large_char_set_ix].1;
994
995 let check_eof = large_set.contains('\0');
998 if check_eof {
999 add!(self, "(!eof && ");
1000 }
1001
1002 let char_set_info = &mut self.large_character_set_info[large_char_set_ix];
1003 char_set_info.is_used = true;
1004 add!(
1005 self,
1006 "set_contains({}, {}, lookahead)",
1007 char_set_info.constant_name,
1008 large_set.range_count(),
1009 );
1010 if check_eof {
1011 add!(self, ")");
1012 }
1013 }
1014
1015 if !asserted_chars.is_empty() {
1016 if large_char_set_ix.is_some() {
1017 add!(self, " ||{line_break}");
1018 }
1019
1020 let is_included = !asserted_chars.contains(char::MAX);
1024 if !is_included {
1025 asserted_chars = asserted_chars.negate().add_char('\0');
1026 }
1027
1028 self.add_character_range_conditions(&asserted_chars, is_included, &line_break);
1029 }
1030
1031 if has_negative_condition {
1032 if has_positive_condition {
1033 add!(self, ") &&{line_break}");
1034 }
1035 self.add_character_range_conditions(&negated_chars, false, &line_break);
1036 }
1037
1038 if has_condition {
1039 add!(self, ") ");
1040 }
1041
1042 self.add_advance_action(action);
1043 add!(self, "\n");
1044 }
1045
1046 add_line!(self, "END_STATE();");
1047 }
1048
1049 fn add_character_range_conditions(
1050 &mut self,
1051 characters: &CharacterSet,
1052 is_included: bool,
1053 line_break: &str,
1054 ) {
1055 for (i, range) in characters.ranges().enumerate() {
1056 let start = *range.start();
1057 let end = *range.end();
1058 if is_included {
1059 if i > 0 {
1060 add!(self, " ||{line_break}");
1061 }
1062
1063 if start == '\0' {
1064 add!(self, "(!eof && ");
1065 if end == '\0' {
1066 add!(self, "lookahead == 0");
1067 } else {
1068 add!(self, "lookahead <= ");
1069 }
1070 self.add_character(end);
1071 add!(self, ")");
1072 } else if end == start {
1073 add!(self, "lookahead == ");
1074 self.add_character(start);
1075 } else if end as u32 == start as u32 + 1 {
1076 add!(self, "lookahead == ");
1077 self.add_character(start);
1078 add!(self, " ||{line_break}lookahead == ");
1079 self.add_character(end);
1080 } else {
1081 add!(self, "(");
1082 self.add_character(start);
1083 add!(self, " <= lookahead && lookahead <= ");
1084 self.add_character(end);
1085 add!(self, ")");
1086 }
1087 } else {
1088 if i > 0 {
1089 add!(self, " &&{line_break}");
1090 }
1091 if end == start {
1092 add!(self, "lookahead != ");
1093 self.add_character(start);
1094 } else if end as u32 == start as u32 + 1 {
1095 add!(self, "lookahead != ");
1096 self.add_character(start);
1097 add!(self, " &&{line_break}lookahead != ");
1098 self.add_character(end);
1099 } else if start != '\0' {
1100 add!(self, "(lookahead < ");
1101 self.add_character(start);
1102 add!(self, " || ");
1103 self.add_character(end);
1104 add!(self, " < lookahead)");
1105 } else {
1106 add!(self, "lookahead > ");
1107 self.add_character(end);
1108 }
1109 }
1110 }
1111 }
1112
1113 fn add_character_set(&mut self, ix: usize) {
1114 let characters = self.large_character_sets[ix].1.clone();
1115 let info = &self.large_character_set_info[ix];
1116 if !info.is_used {
1117 return;
1118 }
1119
1120 add_line!(
1121 self,
1122 "static const TSCharacterRange {}[] = {{",
1123 info.constant_name
1124 );
1125
1126 indent!(self);
1127 for (ix, range) in characters.ranges().enumerate() {
1128 let column = ix % 8;
1129 if column == 0 {
1130 if ix > 0 {
1131 add!(self, "\n");
1132 }
1133 add_whitespace!(self);
1134 } else {
1135 add!(self, " ");
1136 }
1137 add!(self, "{{");
1138 self.add_character(*range.start());
1139 add!(self, ", ");
1140 self.add_character(*range.end());
1141 add!(self, "}},");
1142 }
1143 add!(self, "\n");
1144 dedent!(self);
1145 add_line!(self, "}};");
1146 add_line!(self, "");
1147 }
1148
1149 fn add_advance_action(&mut self, action: &AdvanceAction) {
1150 if action.in_main_token {
1151 add!(self, "ADVANCE({});", action.state);
1152 } else {
1153 add!(self, "SKIP({});", action.state);
1154 }
1155 }
1156
1157 fn add_lex_modes(&mut self) {
1158 add_line!(
1159 self,
1160 "static const {} ts_lex_modes[STATE_COUNT] = {{",
1161 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1162 "TSLexerMode"
1163 } else {
1164 "TSLexMode"
1165 }
1166 );
1167 indent!(self);
1168 for (i, state) in self.parse_table.states.iter().enumerate() {
1169 add_whitespace!(self);
1170 add!(self, "[{i}] = {{");
1171 if state.is_end_of_non_terminal_extra() {
1172 add!(self, "(TSStateId)(-1),");
1173 } else {
1174 add!(self, ".lex_state = {}", state.lex_state_id);
1175
1176 if state.external_lex_state_id > 0 {
1177 add!(
1178 self,
1179 ", .external_lex_state = {}",
1180 state.external_lex_state_id
1181 );
1182 }
1183
1184 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1185 let reserved_word_set_id = self.reserved_word_set_ids_by_parse_state[i];
1186 if reserved_word_set_id != 0 {
1187 add!(self, ", .reserved_word_set_id = {reserved_word_set_id}");
1188 }
1189 }
1190 }
1191
1192 add!(self, "}},\n");
1193 }
1194 dedent!(self);
1195 add_line!(self, "}};");
1196 add_line!(self, "");
1197 }
1198
1199 fn add_reserved_word_sets(&mut self) {
1200 add_line!(
1201 self,
1202 "static const TSSymbol ts_reserved_words[{}][MAX_RESERVED_WORD_SET_SIZE] = {{",
1203 self.reserved_word_sets.len(),
1204 );
1205 indent!(self);
1206 for (id, set) in self.reserved_word_sets.iter().enumerate() {
1207 if id == 0 {
1208 continue;
1209 }
1210 add_line!(self, "[{id}] = {{");
1211 indent!(self);
1212 for token in set.iter() {
1213 add_line!(self, "{},", self.symbol_ids[&token]);
1214 }
1215 dedent!(self);
1216 add_line!(self, "}},");
1217 }
1218 dedent!(self);
1219 add_line!(self, "}};");
1220 add_line!(self, "");
1221 }
1222
1223 fn add_external_token_enum(&mut self) {
1224 add_line!(self, "enum ts_external_scanner_symbol_identifiers {{");
1225 indent!(self);
1226 for i in 0..self.syntax_grammar.external_tokens.len() {
1227 add_line!(
1228 self,
1229 "{} = {i},",
1230 self.external_token_id(&self.syntax_grammar.external_tokens[i]),
1231 );
1232 }
1233 dedent!(self);
1234 add_line!(self, "}};");
1235 add_line!(self, "");
1236 }
1237
1238 fn add_external_scanner_symbol_map(&mut self) {
1239 add_line!(
1240 self,
1241 "static const TSSymbol ts_external_scanner_symbol_map[EXTERNAL_TOKEN_COUNT] = {{"
1242 );
1243 indent!(self);
1244 for i in 0..self.syntax_grammar.external_tokens.len() {
1245 let token = &self.syntax_grammar.external_tokens[i];
1246 let id_token = token
1247 .corresponding_internal_token
1248 .unwrap_or_else(|| Symbol::external(i));
1249 add_line!(
1250 self,
1251 "[{}] = {},",
1252 self.external_token_id(token),
1253 self.symbol_ids[&id_token],
1254 );
1255 }
1256 dedent!(self);
1257 add_line!(self, "}};");
1258 add_line!(self, "");
1259 }
1260
1261 fn add_external_scanner_states_list(&mut self) {
1262 add_line!(
1263 self,
1264 "static const bool ts_external_scanner_states[{}][EXTERNAL_TOKEN_COUNT] = {{",
1265 self.parse_table.external_lex_states.len(),
1266 );
1267 indent!(self);
1268 for i in 0..self.parse_table.external_lex_states.len() {
1269 if !self.parse_table.external_lex_states[i].is_empty() {
1270 add_line!(self, "[{i}] = {{");
1271 indent!(self);
1272 for token in self.parse_table.external_lex_states[i].iter() {
1273 add_line!(
1274 self,
1275 "[{}] = true,",
1276 self.external_token_id(&self.syntax_grammar.external_tokens[token.index])
1277 );
1278 }
1279 dedent!(self);
1280 add_line!(self, "}},");
1281 }
1282 }
1283 dedent!(self);
1284 add_line!(self, "}};");
1285 add_line!(self, "");
1286 }
1287
1288 fn add_parse_table(&mut self) -> RenderResult<()> {
1289 let mut parse_table_entries = HashMap::new();
1290 let mut next_parse_action_list_index = 0;
1291
1292 self.get_parse_action_list_id(
1294 &ParseTableEntry {
1295 actions: Vec::new(),
1296 reusable: false,
1297 },
1298 &mut parse_table_entries,
1299 &mut next_parse_action_list_index,
1300 );
1301
1302 add_line!(
1303 self,
1304 "static const uint16_t ts_parse_table[LARGE_STATE_COUNT][SYMBOL_COUNT] = {{",
1305 );
1306 indent!(self);
1307
1308 let mut terminal_entries = Vec::new();
1309 let mut nonterminal_entries = Vec::new();
1310
1311 for (i, state) in self
1312 .parse_table
1313 .states
1314 .iter()
1315 .enumerate()
1316 .take(self.large_state_count)
1317 {
1318 add_line!(self, "[STATE({i})] = {{");
1319 indent!(self);
1320
1321 terminal_entries.clear();
1324 nonterminal_entries.clear();
1325 terminal_entries.extend(state.terminal_entries.iter());
1326 nonterminal_entries.extend(state.nonterminal_entries.iter());
1327 terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
1328 nonterminal_entries.sort_unstable_by_key(|k| k.0);
1329
1330 for (symbol, action) in &nonterminal_entries {
1331 add_line!(
1332 self,
1333 "[{}] = STATE({}),",
1334 self.symbol_ids[symbol],
1335 match action {
1336 GotoAction::Goto(state) => *state,
1337 GotoAction::ShiftExtra => i,
1338 }
1339 );
1340 }
1341
1342 for (symbol, entry) in &terminal_entries {
1343 let entry_id = self.get_parse_action_list_id(
1344 entry,
1345 &mut parse_table_entries,
1346 &mut next_parse_action_list_index,
1347 );
1348 add_line!(self, "[{}] = ACTIONS({entry_id}),", self.symbol_ids[symbol]);
1349 }
1350
1351 dedent!(self);
1352 add_line!(self, "}},");
1353 }
1354
1355 dedent!(self);
1356 add_line!(self, "}};");
1357 add_line!(self, "");
1358
1359 if self.large_state_count < self.parse_table.states.len() {
1360 add_line!(self, "static const uint16_t ts_small_parse_table[] = {{");
1361 indent!(self);
1362
1363 let mut next_table_index = 0;
1364 let mut small_state_indices = Vec::with_capacity(
1365 self.parse_table
1366 .states
1367 .len()
1368 .saturating_sub(self.large_state_count),
1369 );
1370 let mut symbols_by_value = HashMap::<(usize, SymbolType), Vec<Symbol>>::new();
1371 for state in self.parse_table.states.iter().skip(self.large_state_count) {
1372 small_state_indices.push(next_table_index);
1373 symbols_by_value.clear();
1374
1375 terminal_entries.clear();
1376 terminal_entries.extend(state.terminal_entries.iter());
1377 terminal_entries.sort_unstable_by_key(|e| self.symbol_order.get(e.0));
1378
1379 for (symbol, entry) in &terminal_entries {
1383 let entry_id = self.get_parse_action_list_id(
1384 entry,
1385 &mut parse_table_entries,
1386 &mut next_parse_action_list_index,
1387 );
1388 symbols_by_value
1389 .entry((entry_id, SymbolType::Terminal))
1390 .or_default()
1391 .push(**symbol);
1392 }
1393 for (symbol, action) in &state.nonterminal_entries {
1394 let state_id = match action {
1395 GotoAction::Goto(i) => *i,
1396 GotoAction::ShiftExtra => {
1397 self.large_state_count + small_state_indices.len() - 1
1398 }
1399 };
1400 symbols_by_value
1401 .entry((state_id, SymbolType::NonTerminal))
1402 .or_default()
1403 .push(*symbol);
1404 }
1405
1406 let mut values_with_symbols = symbols_by_value.drain().collect::<Vec<_>>();
1407 values_with_symbols.sort_unstable_by_key(|((value, kind), symbols)| {
1408 (symbols.len(), *kind, *value, symbols[0])
1409 });
1410
1411 add_line!(
1412 self,
1413 "[{next_table_index}] = {},",
1414 values_with_symbols.len()
1415 );
1416 indent!(self);
1417 next_table_index += 1;
1418
1419 for ((value, kind), symbols) in &mut values_with_symbols {
1420 next_table_index += 2 + symbols.len();
1421 if *kind == SymbolType::NonTerminal {
1422 add_line!(self, "STATE({value}), {},", symbols.len());
1423 } else {
1424 add_line!(self, "ACTIONS({value}), {},", symbols.len());
1425 }
1426
1427 symbols.sort_unstable();
1428 indent!(self);
1429 for symbol in symbols {
1430 add_line!(self, "{},", self.symbol_ids[symbol]);
1431 }
1432 dedent!(self);
1433 }
1434
1435 dedent!(self);
1436 }
1437
1438 dedent!(self);
1439 add_line!(self, "}};");
1440 add_line!(self, "");
1441
1442 add_line!(
1443 self,
1444 "static const uint32_t ts_small_parse_table_map[] = {{"
1445 );
1446 indent!(self);
1447 for i in self.large_state_count..self.parse_table.states.len() {
1448 add_line!(
1449 self,
1450 "[SMALL_STATE({i})] = {},",
1451 small_state_indices[i - self.large_state_count]
1452 );
1453 }
1454 dedent!(self);
1455 add_line!(self, "}};");
1456 add_line!(self, "");
1457 }
1458 if next_parse_action_list_index >= usize::from(u16::MAX) {
1459 Err(RenderError::ParseTable(next_parse_action_list_index))?;
1460 }
1461
1462 let mut parse_table_entries = parse_table_entries
1463 .into_iter()
1464 .map(|(entry, i)| (i, entry))
1465 .collect::<Vec<_>>();
1466 parse_table_entries.sort_by_key(|(index, _)| *index);
1467 self.add_parse_action_list(parse_table_entries);
1468
1469 Ok(())
1470 }
1471
1472 fn add_parse_action_list(&mut self, parse_table_entries: Vec<(usize, ParseTableEntry)>) {
1473 add_line!(
1474 self,
1475 "static const TSParseActionEntry ts_parse_actions[] = {{"
1476 );
1477 indent!(self);
1478 for (i, entry) in parse_table_entries {
1479 add!(
1480 self,
1481 " [{i}] = {{.entry = {{.count = {}, .reusable = {}}}}},",
1482 entry.actions.len(),
1483 entry.reusable
1484 );
1485 for action in entry.actions {
1486 add!(self, " ");
1487 match action {
1488 ParseAction::Accept => add!(self, " ACCEPT_INPUT()"),
1489 ParseAction::Recover => add!(self, "RECOVER()"),
1490 ParseAction::ShiftExtra => add!(self, "SHIFT_EXTRA()"),
1491 ParseAction::Shift {
1492 state,
1493 is_repetition,
1494 } => {
1495 if is_repetition {
1496 add!(self, "SHIFT_REPEAT({state})");
1497 } else {
1498 add!(self, "SHIFT({state})");
1499 }
1500 }
1501 ParseAction::Reduce {
1502 symbol,
1503 child_count,
1504 dynamic_precedence,
1505 production_id,
1506 ..
1507 } => {
1508 add!(
1509 self,
1510 "REDUCE({}, {child_count}, {dynamic_precedence}, {production_id})",
1511 self.symbol_ids[&symbol]
1512 );
1513 }
1514 }
1515 add!(self, ",");
1516 }
1517 add!(self, "\n");
1518 }
1519 dedent!(self);
1520 add_line!(self, "}};");
1521 add_line!(self, "");
1522 }
1523
1524 fn add_parser_export(&mut self) {
1525 let language_function_name = format!("tree_sitter_{}", self.language_name);
1526 let external_scanner_name = format!("{language_function_name}_external_scanner");
1527
1528 add_line!(self, "#ifdef __cplusplus");
1529 add_line!(self, r#"extern "C" {{"#);
1530 add_line!(self, "#endif");
1531
1532 if !self.syntax_grammar.external_tokens.is_empty() {
1533 add_line!(self, "void *{external_scanner_name}_create(void);");
1534 add_line!(self, "void {external_scanner_name}_destroy(void *);");
1535 add_line!(
1536 self,
1537 "bool {external_scanner_name}_scan(void *, TSLexer *, const bool *);",
1538 );
1539 add_line!(
1540 self,
1541 "unsigned {external_scanner_name}_serialize(void *, char *);",
1542 );
1543 add_line!(
1544 self,
1545 "void {external_scanner_name}_deserialize(void *, const char *, unsigned);",
1546 );
1547 add_line!(self, "");
1548 }
1549
1550 add_line!(self, "#ifdef TREE_SITTER_HIDE_SYMBOLS");
1551 add_line!(self, "#define TS_PUBLIC");
1552 add_line!(self, "#elif defined(_WIN32)");
1553 add_line!(self, "#define TS_PUBLIC __declspec(dllexport)");
1554 add_line!(self, "#else");
1555 add_line!(
1556 self,
1557 "#define TS_PUBLIC __attribute__((visibility(\"default\")))"
1558 );
1559 add_line!(self, "#endif");
1560 add_line!(self, "");
1561
1562 add_line!(
1563 self,
1564 "TS_PUBLIC const TSLanguage *{language_function_name}(void) {{",
1565 );
1566 indent!(self);
1567 add_line!(self, "static const TSLanguage language = {{");
1568 indent!(self);
1569 add_line!(self, ".abi_version = LANGUAGE_VERSION,");
1570
1571 add_line!(self, ".symbol_count = SYMBOL_COUNT,");
1573 add_line!(self, ".alias_count = ALIAS_COUNT,");
1574 add_line!(self, ".token_count = TOKEN_COUNT,");
1575 add_line!(self, ".external_token_count = EXTERNAL_TOKEN_COUNT,");
1576 add_line!(self, ".state_count = STATE_COUNT,");
1577 add_line!(self, ".large_state_count = LARGE_STATE_COUNT,");
1578 add_line!(self, ".production_id_count = PRODUCTION_ID_COUNT,");
1579 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1580 add_line!(self, ".supertype_count = SUPERTYPE_COUNT,");
1581 }
1582 add_line!(self, ".field_count = FIELD_COUNT,");
1583 add_line!(
1584 self,
1585 ".max_alias_sequence_length = MAX_ALIAS_SEQUENCE_LENGTH,"
1586 );
1587
1588 add_line!(self, ".parse_table = &ts_parse_table[0][0],");
1590 if self.large_state_count < self.parse_table.states.len() {
1591 add_line!(self, ".small_parse_table = ts_small_parse_table,");
1592 add_line!(self, ".small_parse_table_map = ts_small_parse_table_map,");
1593 }
1594 add_line!(self, ".parse_actions = ts_parse_actions,");
1595
1596 add_line!(self, ".symbol_names = ts_symbol_names,");
1598 if !self.field_names.is_empty() {
1599 add_line!(self, ".field_names = ts_field_names,");
1600 add_line!(self, ".field_map_slices = ts_field_map_slices,");
1601 add_line!(self, ".field_map_entries = ts_field_map_entries,");
1602 }
1603 if !self.supertype_map.is_empty() && self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1604 add_line!(self, ".supertype_map_slices = ts_supertype_map_slices,");
1605 add_line!(self, ".supertype_map_entries = ts_supertype_map_entries,");
1606 add_line!(self, ".supertype_symbols = ts_supertype_symbols,");
1607 }
1608 add_line!(self, ".symbol_metadata = ts_symbol_metadata,");
1609 add_line!(self, ".public_symbol_map = ts_symbol_map,");
1610 add_line!(self, ".alias_map = ts_non_terminal_alias_map,");
1611 if !self.parse_table.production_infos.is_empty() {
1612 add_line!(self, ".alias_sequences = &ts_alias_sequences[0][0],");
1613 }
1614
1615 add_line!(self, ".lex_modes = (const void*)ts_lex_modes,");
1617 add_line!(self, ".lex_fn = ts_lex,");
1618 if let Some(keyword_capture_token) = self.syntax_grammar.word_token {
1619 add_line!(self, ".keyword_lex_fn = ts_lex_keywords,");
1620 add_line!(
1621 self,
1622 ".keyword_capture_token = {},",
1623 self.symbol_ids[&keyword_capture_token]
1624 );
1625 }
1626
1627 if !self.syntax_grammar.external_tokens.is_empty() {
1628 add_line!(self, ".external_scanner = {{");
1629 indent!(self);
1630 add_line!(self, "&ts_external_scanner_states[0][0],");
1631 add_line!(self, "ts_external_scanner_symbol_map,");
1632 add_line!(self, "{external_scanner_name}_create,");
1633 add_line!(self, "{external_scanner_name}_destroy,");
1634 add_line!(self, "{external_scanner_name}_scan,");
1635 add_line!(self, "{external_scanner_name}_serialize,");
1636 add_line!(self, "{external_scanner_name}_deserialize,");
1637 dedent!(self);
1638 add_line!(self, "}},");
1639 }
1640
1641 add_line!(self, ".primary_state_ids = ts_primary_state_ids,");
1642
1643 if self.abi_version >= ABI_VERSION_WITH_RESERVED_WORDS {
1644 add_line!(self, ".name = \"{}\",", self.language_name);
1645
1646 if self.reserved_word_sets.len() > 1 {
1647 add_line!(self, ".reserved_words = &ts_reserved_words[0][0],");
1648 }
1649
1650 add_line!(
1651 self,
1652 ".max_reserved_word_set_size = {},",
1653 self.reserved_word_sets
1654 .iter()
1655 .map(TokenSet::len)
1656 .max()
1657 .unwrap()
1658 );
1659
1660 let Some(metadata) = &self.metadata else {
1661 panic!(
1662 indoc! {"
1663 Metadata is required to generate ABI version {}.
1664 This means that your grammar doesn't have a tree-sitter.json config file with an appropriate version field in the metadata table.
1665 "},
1666 self.abi_version
1667 );
1668 };
1669
1670 add_line!(self, ".metadata = {{");
1671 indent!(self);
1672 add_line!(self, ".major_version = {},", metadata.major_version);
1673 add_line!(self, ".minor_version = {},", metadata.minor_version);
1674 add_line!(self, ".patch_version = {},", metadata.patch_version);
1675 dedent!(self);
1676 add_line!(self, "}},");
1677 }
1678
1679 dedent!(self);
1680 add_line!(self, "}};");
1681 add_line!(self, "return &language;");
1682 dedent!(self);
1683 add_line!(self, "}}");
1684 add_line!(self, "#ifdef __cplusplus");
1685 add_line!(self, "}}");
1686 add_line!(self, "#endif");
1687 }
1688
1689 fn get_parse_action_list_id(
1690 &self,
1691 entry: &ParseTableEntry,
1692 parse_table_entries: &mut HashMap<ParseTableEntry, usize>,
1693 next_parse_action_list_index: &mut usize,
1694 ) -> usize {
1695 if let Some(&index) = parse_table_entries.get(entry) {
1696 index
1697 } else {
1698 let result = *next_parse_action_list_index;
1699 parse_table_entries.insert(entry.clone(), result);
1700 *next_parse_action_list_index += 1 + entry.actions.len();
1701 result
1702 }
1703 }
1704
1705 fn get_field_map_id(
1706 &self,
1707 flat_field_map: Vec<(String, FieldLocation)>,
1708 flat_field_maps: &mut Vec<(usize, Vec<(String, FieldLocation)>)>,
1709 next_flat_field_map_index: &mut usize,
1710 ) -> usize {
1711 if let Some((index, _)) = flat_field_maps.iter().find(|(_, e)| *e == *flat_field_map) {
1712 return *index;
1713 }
1714
1715 let result = *next_flat_field_map_index;
1716 *next_flat_field_map_index += flat_field_map.len();
1717 flat_field_maps.push((result, flat_field_map));
1718 result
1719 }
1720
1721 fn external_token_id(&self, token: &ExternalToken) -> String {
1722 format!(
1723 "ts_external_token_{}",
1724 self.sanitize_identifier(&token.name)
1725 )
1726 }
1727
1728 fn assign_symbol_id(&mut self, symbol: Symbol, used_identifiers: &mut HashSet<String>) {
1729 let mut id;
1730 if symbol == Symbol::end() {
1731 id = "ts_builtin_sym_end".to_string();
1732 } else {
1733 let (name, kind) = self.metadata_for_symbol(symbol);
1734 id = match kind {
1735 VariableType::Auxiliary => format!("aux_sym_{}", self.sanitize_identifier(name)),
1736 VariableType::Anonymous => format!("anon_sym_{}", self.sanitize_identifier(name)),
1737 VariableType::Hidden | VariableType::Named => {
1738 format!("sym_{}", self.sanitize_identifier(name))
1739 }
1740 };
1741
1742 let mut suffix_number = 1;
1743 let mut suffix = String::new();
1744 while used_identifiers.contains(&id) {
1745 id.drain(id.len() - suffix.len()..);
1746 suffix_number += 1;
1747 suffix = suffix_number.to_string();
1748 id += &suffix;
1749 }
1750 }
1751
1752 used_identifiers.insert(id.clone());
1753 self.symbol_ids.insert(symbol, id);
1754 }
1755
1756 fn field_id(&self, field_name: &str) -> String {
1757 format!("field_{field_name}")
1758 }
1759
1760 fn metadata_for_symbol(&self, symbol: Symbol) -> (&str, VariableType) {
1761 match symbol.kind {
1762 SymbolType::End | SymbolType::EndOfNonTerminalExtra => ("end", VariableType::Hidden),
1763 SymbolType::NonTerminal => {
1764 let variable = &self.syntax_grammar.variables[symbol.index];
1765 (&variable.name, variable.kind)
1766 }
1767 SymbolType::Terminal => {
1768 let variable = &self.lexical_grammar.variables[symbol.index];
1769 (&variable.name, variable.kind)
1770 }
1771 SymbolType::External => {
1772 let token = &self.syntax_grammar.external_tokens[symbol.index];
1773 (&token.name, token.kind)
1774 }
1775 }
1776 }
1777
1778 fn symbols_for_alias(&self, alias: &Alias) -> Vec<Symbol> {
1779 self.parse_table
1780 .symbols
1781 .iter()
1782 .copied()
1783 .filter(move |symbol| {
1784 self.default_aliases.get(symbol).map_or_else(
1785 || {
1786 let (name, kind) = self.metadata_for_symbol(*symbol);
1787 name == alias.value && kind == alias.kind()
1788 },
1789 |default_alias| default_alias == alias,
1790 )
1791 })
1792 .collect()
1793 }
1794
1795 fn sanitize_identifier(&self, name: &str) -> String {
1796 let mut result = String::with_capacity(name.len());
1797 for c in name.chars() {
1798 if c.is_ascii_alphanumeric() || c == '_' {
1799 result.push(c);
1800 } else {
1801 'special_chars: {
1802 let replacement = match c {
1803 ' ' if name.len() == 1 => "SPACE",
1804 '~' => "TILDE",
1805 '`' => "BQUOTE",
1806 '!' => "BANG",
1807 '@' => "AT",
1808 '#' => "POUND",
1809 '$' => "DOLLAR",
1810 '%' => "PERCENT",
1811 '^' => "CARET",
1812 '&' => "AMP",
1813 '*' => "STAR",
1814 '(' => "LPAREN",
1815 ')' => "RPAREN",
1816 '-' => "DASH",
1817 '+' => "PLUS",
1818 '=' => "EQ",
1819 '{' => "LBRACE",
1820 '}' => "RBRACE",
1821 '[' => "LBRACK",
1822 ']' => "RBRACK",
1823 '\\' => "BSLASH",
1824 '|' => "PIPE",
1825 ':' => "COLON",
1826 ';' => "SEMI",
1827 '"' => "DQUOTE",
1828 '\'' => "SQUOTE",
1829 '<' => "LT",
1830 '>' => "GT",
1831 ',' => "COMMA",
1832 '.' => "DOT",
1833 '?' => "QMARK",
1834 '/' => "SLASH",
1835 '\n' => "LF",
1836 '\r' => "CR",
1837 '\t' => "TAB",
1838 '\0' => "NULL",
1839 '\u{0001}' => "SOH",
1840 '\u{0002}' => "STX",
1841 '\u{0003}' => "ETX",
1842 '\u{0004}' => "EOT",
1843 '\u{0005}' => "ENQ",
1844 '\u{0006}' => "ACK",
1845 '\u{0007}' => "BEL",
1846 '\u{0008}' => "BS",
1847 '\u{000b}' => "VTAB",
1848 '\u{000c}' => "FF",
1849 '\u{000e}' => "SO",
1850 '\u{000f}' => "SI",
1851 '\u{0010}' => "DLE",
1852 '\u{0011}' => "DC1",
1853 '\u{0012}' => "DC2",
1854 '\u{0013}' => "DC3",
1855 '\u{0014}' => "DC4",
1856 '\u{0015}' => "NAK",
1857 '\u{0016}' => "SYN",
1858 '\u{0017}' => "ETB",
1859 '\u{0018}' => "CAN",
1860 '\u{0019}' => "EM",
1861 '\u{001a}' => "SUB",
1862 '\u{001b}' => "ESC",
1863 '\u{001c}' => "FS",
1864 '\u{001d}' => "GS",
1865 '\u{001e}' => "RS",
1866 '\u{001f}' => "US",
1867 '\u{007F}' => "DEL",
1868 '\u{FEFF}' => "BOM",
1869 '\u{0080}'..='\u{FFFF}' => {
1870 write!(result, "u{:04x}", c as u32).unwrap();
1871 break 'special_chars;
1872 }
1873 '\u{10000}'..='\u{10FFFF}' => {
1874 write!(result, "U{:08x}", c as u32).unwrap();
1875 break 'special_chars;
1876 }
1877 '0'..='9' | 'a'..='z' | 'A'..='Z' | '_' => unreachable!(),
1878 ' ' => break 'special_chars,
1879 };
1880 if !result.is_empty() && !result.ends_with('_') {
1881 result.push('_');
1882 }
1883 result += replacement;
1884 }
1885 }
1886 }
1887 result
1888 }
1889
1890 fn sanitize_string(&self, name: &str) -> String {
1891 let mut result = String::with_capacity(name.len());
1892 for c in name.chars() {
1893 match c {
1894 '\"' => result += "\\\"",
1895 '?' => result += "\\?",
1896 '\\' => result += "\\\\",
1897 '\u{0007}' => result += "\\a",
1898 '\u{0008}' => result += "\\b",
1899 '\u{000b}' => result += "\\v",
1900 '\u{000c}' => result += "\\f",
1901 '\n' => result += "\\n",
1902 '\r' => result += "\\r",
1903 '\t' => result += "\\t",
1904 '\0' => result += "\\0",
1905 '\u{0001}'..='\u{001f}' => write!(result, "\\x{:02x}", c as u32).unwrap(),
1906 '\u{007F}'..='\u{FFFF}' => write!(result, "\\u{:04x}", c as u32).unwrap(),
1907 '\u{10000}'..='\u{10FFFF}' => write!(result, "\\U{:08x}", c as u32).unwrap(),
1908 _ => result.push(c),
1909 }
1910 }
1911 result
1912 }
1913
1914 fn add_character(&mut self, c: char) {
1915 match c {
1916 '\'' => add!(self, "'\\''"),
1917 '\\' => add!(self, "'\\\\'"),
1918 '\u{000c}' => add!(self, "'\\f'"),
1919 '\n' => add!(self, "'\\n'"),
1920 '\t' => add!(self, "'\\t'"),
1921 '\r' => add!(self, "'\\r'"),
1922 _ => {
1923 if c == '\0' {
1924 add!(self, "0");
1925 } else if c == ' ' || c.is_ascii_graphic() {
1926 add!(self, "'{c}'");
1927 } else {
1928 add!(self, "0x{:02x}", c as u32);
1929 }
1930 }
1931 }
1932 }
1933}
1934
1935#[allow(clippy::too_many_arguments)]
1953pub fn render_c_code(
1954 name: &str,
1955 tables: Tables,
1956 syntax_grammar: SyntaxGrammar,
1957 lexical_grammar: LexicalGrammar,
1958 default_aliases: AliasMap,
1959 abi_version: usize,
1960 semantic_version: Option<(u8, u8, u8)>,
1961 supertype_symbol_map: BTreeMap<Symbol, Vec<ChildType>>,
1962) -> RenderResult<String> {
1963 if !(ABI_VERSION_MIN..=ABI_VERSION_MAX).contains(&abi_version) {
1964 Err(RenderError::ABI(abi_version))?;
1965 }
1966
1967 Generator {
1968 language_name: name.to_string(),
1969 parse_table: tables.parse_table,
1970 main_lex_table: tables.main_lex_table,
1971 keyword_lex_table: tables.keyword_lex_table,
1972 large_character_sets: tables.large_character_sets,
1973 large_character_set_info: Vec::new(),
1974 syntax_grammar,
1975 lexical_grammar,
1976 default_aliases,
1977 abi_version,
1978 metadata: semantic_version.map(|(major_version, minor_version, patch_version)| Metadata {
1979 major_version,
1980 minor_version,
1981 patch_version,
1982 }),
1983 supertype_symbol_map,
1984 ..Default::default()
1985 }
1986 .generate()
1987}