Skip to main content

tree_sitter_generate/
node_types.rs

1use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
2
3use serde::Serialize;
4use thiserror::Error;
5
6use super::{
7    grammars::{LexicalGrammar, SyntaxGrammar, VariableType},
8    rules::{Alias, AliasMap, Symbol, SymbolType},
9};
10
11#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
12pub enum ChildType {
13    Normal(Symbol),
14    Aliased(Alias),
15}
16
17#[derive(Clone, Debug, Default, PartialEq, Eq)]
18pub struct FieldInfo {
19    pub quantity: ChildQuantity,
20    pub types: Vec<ChildType>,
21}
22
23#[derive(Clone, Debug, Default, PartialEq, Eq)]
24pub struct VariableInfo {
25    pub fields: HashMap<String, FieldInfo>,
26    pub children: FieldInfo,
27    pub children_without_fields: FieldInfo,
28    pub has_multi_step_production: bool,
29}
30
31#[derive(Debug, Serialize, PartialEq, Eq, Default, PartialOrd, Ord)]
32#[cfg(feature = "load")]
33pub struct NodeInfoJSON {
34    #[serde(rename = "type")]
35    kind: String,
36    named: bool,
37    #[serde(skip_serializing_if = "std::ops::Not::not")]
38    root: bool,
39    #[serde(skip_serializing_if = "std::ops::Not::not")]
40    extra: bool,
41    #[serde(skip_serializing_if = "Option::is_none")]
42    fields: Option<BTreeMap<String, FieldInfoJSON>>,
43    #[serde(skip_serializing_if = "Option::is_none")]
44    children: Option<FieldInfoJSON>,
45    #[serde(skip_serializing_if = "Option::is_none")]
46    subtypes: Option<Vec<NodeTypeJSON>>,
47}
48
49#[derive(Clone, Debug, Serialize, PartialEq, Eq, PartialOrd, Ord, Hash)]
50#[cfg(feature = "load")]
51pub struct NodeTypeJSON {
52    #[serde(rename = "type")]
53    kind: String,
54    named: bool,
55}
56
57#[derive(Debug, Serialize, PartialEq, Eq, PartialOrd, Ord)]
58#[cfg(feature = "load")]
59pub struct FieldInfoJSON {
60    multiple: bool,
61    required: bool,
62    types: Vec<NodeTypeJSON>,
63}
64
65#[derive(Clone, Copy, Debug, PartialEq, Eq)]
66pub struct ChildQuantity {
67    exists: bool,
68    required: bool,
69    multiple: bool,
70}
71
72#[cfg(feature = "load")]
73impl Default for FieldInfoJSON {
74    fn default() -> Self {
75        Self {
76            multiple: false,
77            required: true,
78            types: Vec::new(),
79        }
80    }
81}
82
83impl Default for ChildQuantity {
84    fn default() -> Self {
85        Self::one()
86    }
87}
88
89impl ChildQuantity {
90    #[must_use]
91    const fn zero() -> Self {
92        Self {
93            exists: false,
94            required: false,
95            multiple: false,
96        }
97    }
98
99    #[must_use]
100    const fn one() -> Self {
101        Self {
102            exists: true,
103            required: true,
104            multiple: false,
105        }
106    }
107
108    const fn append(&mut self, other: Self) {
109        if other.exists {
110            if self.exists || other.multiple {
111                self.multiple = true;
112            }
113            if other.required {
114                self.required = true;
115            }
116            self.exists = true;
117        }
118    }
119
120    const fn union(&mut self, other: Self) -> bool {
121        let mut result = false;
122        if !self.exists && other.exists {
123            result = true;
124            self.exists = true;
125        }
126        if self.required && !other.required {
127            result = true;
128            self.required = false;
129        }
130        if !self.multiple && other.multiple {
131            result = true;
132            self.multiple = true;
133        }
134        result
135    }
136}
137
138pub type VariableInfoResult<T> = Result<T, VariableInfoError>;
139
140#[derive(Debug, Error, Serialize)]
141pub enum VariableInfoError {
142    #[error("Grammar error: Supertype symbols must always have a single visible child, but `{0}` can have multiple")]
143    InvalidSupertype(String),
144}
145
146/// Compute a summary of the public-facing structure of each variable in the
147/// grammar. Each variable in the grammar corresponds to a distinct public-facing
148/// node type.
149///
150/// The information collected about each node type `N` is:
151/// 1. `child_types` - The types of visible children that can appear within `N`.
152/// 2. `fields` - The fields that `N` can have. Data regarding each field:
153///    * `types` - The types of visible children the field can contain.
154///    * `optional` - Do `N` nodes always have this field?
155///    * `multiple` - Can `N` nodes have multiple children for this field?
156/// 3. `children_without_fields` - The *other* named children of `N` that are not associated with
157///    fields. Data regarding these children:
158///    * `types` - The types of named children with no field.
159///    * `optional` - Do `N` nodes always have at least one named child with no field?
160///    * `multiple` - Can `N` nodes have multiple named children with no field?
161///
162/// Each summary must account for some indirect factors:
163/// 1. hidden nodes. When a parent node `N` has a hidden child `C`, the visible children of `C`
164///    *appear* to be direct children of `N`.
165/// 2. aliases. If a parent node type `M` is aliased as some other type `N`, then nodes which
166///    *appear* to have type `N` may have internal structure based on `M`.
167pub fn get_variable_info(
168    syntax_grammar: &SyntaxGrammar,
169    lexical_grammar: &LexicalGrammar,
170    default_aliases: &AliasMap,
171) -> VariableInfoResult<Vec<VariableInfo>> {
172    let child_type_is_visible = |t: &ChildType| {
173        variable_type_for_child_type(t, syntax_grammar, lexical_grammar) >= VariableType::Anonymous
174    };
175
176    let child_type_is_named = |t: &ChildType| {
177        variable_type_for_child_type(t, syntax_grammar, lexical_grammar) == VariableType::Named
178    };
179
180    // Each variable's summary can depend on the summaries of other hidden variables,
181    // and variables can have mutually recursive structure. So we compute the summaries
182    // iteratively, in a loop that terminates only when no more changes are possible.
183    let mut did_change = true;
184    let mut all_initialized = false;
185    let mut result = vec![VariableInfo::default(); syntax_grammar.variables.len()];
186    while did_change {
187        did_change = false;
188
189        for (i, variable) in syntax_grammar.variables.iter().enumerate() {
190            let mut variable_info = result[i].clone();
191
192            // Examine each of the variable's productions. The variable's child types can be
193            // immediately combined across all productions, but the child quantities must be
194            // recorded separately for each production.
195            for production in &variable.productions {
196                let mut production_field_quantities = HashMap::new();
197                let mut production_children_quantity = ChildQuantity::zero();
198                let mut production_children_without_fields_quantity = ChildQuantity::zero();
199                let mut production_has_uninitialized_invisible_children = false;
200
201                if production.steps.len() > 1 {
202                    variable_info.has_multi_step_production = true;
203                }
204
205                for step in &production.steps {
206                    let child_symbol = step.symbol;
207                    let child_type = if let Some(alias) = &step.alias {
208                        ChildType::Aliased(alias.clone())
209                    } else if let Some(alias) = default_aliases.get(&step.symbol) {
210                        ChildType::Aliased(alias.clone())
211                    } else {
212                        ChildType::Normal(child_symbol)
213                    };
214
215                    let child_is_hidden = !child_type_is_visible(&child_type)
216                        && !syntax_grammar.supertype_symbols.contains(&child_symbol);
217
218                    // Maintain the set of all child types for this variable, and the quantity of
219                    // visible children in this production.
220                    did_change |=
221                        extend_sorted(&mut variable_info.children.types, Some(&child_type));
222                    if !child_is_hidden {
223                        production_children_quantity.append(ChildQuantity::one());
224                    }
225
226                    // Maintain the set of child types associated with each field, and the quantity
227                    // of children associated with each field in this production.
228                    if let Some(field_name) = &step.field_name {
229                        let field_info = variable_info
230                            .fields
231                            .entry(field_name.clone())
232                            .or_insert_with(FieldInfo::default);
233                        did_change |= extend_sorted(&mut field_info.types, Some(&child_type));
234
235                        let production_field_quantity = production_field_quantities
236                            .entry(field_name)
237                            .or_insert_with(ChildQuantity::zero);
238
239                        // Inherit the types and quantities of hidden children associated with
240                        // fields.
241                        if child_is_hidden && child_symbol.is_non_terminal() {
242                            let child_variable_info = &result[child_symbol.index];
243                            did_change |= extend_sorted(
244                                &mut field_info.types,
245                                &child_variable_info.children.types,
246                            );
247                            production_field_quantity.append(child_variable_info.children.quantity);
248                        } else {
249                            production_field_quantity.append(ChildQuantity::one());
250                        }
251                    }
252                    // Maintain the set of named children without fields within this variable.
253                    else if child_type_is_named(&child_type) {
254                        production_children_without_fields_quantity.append(ChildQuantity::one());
255                        did_change |= extend_sorted(
256                            &mut variable_info.children_without_fields.types,
257                            Some(&child_type),
258                        );
259                    }
260
261                    // Inherit all child information from hidden children.
262                    if child_is_hidden && child_symbol.is_non_terminal() {
263                        let child_variable_info = &result[child_symbol.index];
264
265                        // If a hidden child can have multiple children, then its parent node can
266                        // appear to have multiple children.
267                        if child_variable_info.has_multi_step_production {
268                            variable_info.has_multi_step_production = true;
269                        }
270
271                        // If a hidden child has fields, then the parent node can appear to have
272                        // those same fields.
273                        for (field_name, child_field_info) in &child_variable_info.fields {
274                            production_field_quantities
275                                .entry(field_name)
276                                .or_insert_with(ChildQuantity::zero)
277                                .append(child_field_info.quantity);
278                            did_change |= extend_sorted(
279                                &mut variable_info
280                                    .fields
281                                    .entry(field_name.clone())
282                                    .or_insert_with(FieldInfo::default)
283                                    .types,
284                                &child_field_info.types,
285                            );
286                        }
287
288                        // If a hidden child has children, then the parent node can appear to have
289                        // those same children.
290                        production_children_quantity.append(child_variable_info.children.quantity);
291                        did_change |= extend_sorted(
292                            &mut variable_info.children.types,
293                            &child_variable_info.children.types,
294                        );
295
296                        // If a hidden child can have named children without fields, then the parent
297                        // node can appear to have those same children.
298                        if step.field_name.is_none() {
299                            let grandchildren_info = &child_variable_info.children_without_fields;
300                            if !grandchildren_info.types.is_empty() {
301                                production_children_without_fields_quantity
302                                    .append(child_variable_info.children_without_fields.quantity);
303                                did_change |= extend_sorted(
304                                    &mut variable_info.children_without_fields.types,
305                                    &child_variable_info.children_without_fields.types,
306                                );
307                            }
308                        }
309                    }
310
311                    // Note whether or not this production contains children whose summaries
312                    // have not yet been computed.
313                    if child_symbol.index >= i && !all_initialized {
314                        production_has_uninitialized_invisible_children = true;
315                    }
316                }
317
318                // If this production's children all have had their summaries initialized,
319                // then expand the quantity information with all of the possibilities introduced
320                // by this production.
321                if !production_has_uninitialized_invisible_children {
322                    did_change |= variable_info
323                        .children
324                        .quantity
325                        .union(production_children_quantity);
326
327                    did_change |= variable_info
328                        .children_without_fields
329                        .quantity
330                        .union(production_children_without_fields_quantity);
331
332                    for (field_name, info) in &mut variable_info.fields {
333                        did_change |= info.quantity.union(
334                            production_field_quantities
335                                .get(field_name)
336                                .copied()
337                                .unwrap_or_else(ChildQuantity::zero),
338                        );
339                    }
340                }
341            }
342
343            result[i] = variable_info;
344        }
345
346        all_initialized = true;
347    }
348
349    for supertype_symbol in &syntax_grammar.supertype_symbols {
350        if result[supertype_symbol.index].has_multi_step_production {
351            let variable = &syntax_grammar.variables[supertype_symbol.index];
352            Err(VariableInfoError::InvalidSupertype(variable.name.clone()))?;
353        }
354    }
355
356    // Update all of the node type lists to eliminate hidden nodes.
357    for supertype_symbol in &syntax_grammar.supertype_symbols {
358        result[supertype_symbol.index]
359            .children
360            .types
361            .retain(child_type_is_visible);
362    }
363    for variable_info in &mut result {
364        for field_info in variable_info.fields.values_mut() {
365            field_info.types.retain(child_type_is_visible);
366        }
367        variable_info.fields.retain(|_, v| !v.types.is_empty());
368        variable_info
369            .children_without_fields
370            .types
371            .retain(child_type_is_visible);
372    }
373
374    Ok(result)
375}
376
377fn get_aliases_by_symbol(
378    syntax_grammar: &SyntaxGrammar,
379    default_aliases: &AliasMap,
380) -> HashMap<Symbol, BTreeSet<Option<Alias>>> {
381    let mut aliases_by_symbol = HashMap::new();
382    for (symbol, alias) in default_aliases {
383        aliases_by_symbol.insert(*symbol, {
384            let mut aliases = BTreeSet::new();
385            aliases.insert(Some(alias.clone()));
386            aliases
387        });
388    }
389    for extra_symbol in &syntax_grammar.extra_symbols {
390        if !default_aliases.contains_key(extra_symbol) {
391            aliases_by_symbol
392                .entry(*extra_symbol)
393                .or_insert_with(BTreeSet::new)
394                .insert(None);
395        }
396    }
397    for variable in &syntax_grammar.variables {
398        for production in &variable.productions {
399            for step in &production.steps {
400                aliases_by_symbol
401                    .entry(step.symbol)
402                    .or_insert_with(BTreeSet::new)
403                    .insert(
404                        step.alias
405                            .as_ref()
406                            .or_else(|| default_aliases.get(&step.symbol))
407                            .cloned(),
408                    );
409            }
410        }
411    }
412    aliases_by_symbol.insert(
413        Symbol::non_terminal(0),
414        std::iter::once(&None).cloned().collect(),
415    );
416    aliases_by_symbol
417}
418
419pub fn get_supertype_symbol_map(
420    syntax_grammar: &SyntaxGrammar,
421    default_aliases: &AliasMap,
422    variable_info: &[VariableInfo],
423) -> BTreeMap<Symbol, Vec<ChildType>> {
424    let aliases_by_symbol = get_aliases_by_symbol(syntax_grammar, default_aliases);
425    let mut supertype_symbol_map = BTreeMap::new();
426
427    let mut symbols_by_alias = HashMap::new();
428    for (symbol, aliases) in &aliases_by_symbol {
429        for alias in aliases.iter().flatten() {
430            symbols_by_alias
431                .entry(alias)
432                .or_insert_with(Vec::new)
433                .push(*symbol);
434        }
435    }
436
437    for (i, info) in variable_info.iter().enumerate() {
438        let symbol = Symbol::non_terminal(i);
439        if syntax_grammar.supertype_symbols.contains(&symbol) {
440            let subtypes = info.children.types.clone();
441            supertype_symbol_map.insert(symbol, subtypes);
442        }
443    }
444    supertype_symbol_map
445}
446
447#[cfg(feature = "load")]
448pub type SuperTypeCycleResult<T> = Result<T, SuperTypeCycleError>;
449
450#[derive(Debug, Error, Serialize)]
451pub struct SuperTypeCycleError {
452    items: Vec<String>,
453}
454
455impl std::fmt::Display for SuperTypeCycleError {
456    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
457        write!(f, "Dependency cycle detected in node types:")?;
458        for (i, item) in self.items.iter().enumerate() {
459            write!(f, " {item}")?;
460            if i < self.items.len() - 1 {
461                write!(f, ",")?;
462            }
463        }
464
465        Ok(())
466    }
467}
468
469#[cfg(feature = "load")]
470pub fn generate_node_types_json(
471    syntax_grammar: &SyntaxGrammar,
472    lexical_grammar: &LexicalGrammar,
473    default_aliases: &AliasMap,
474    variable_info: &[VariableInfo],
475) -> SuperTypeCycleResult<Vec<NodeInfoJSON>> {
476    let mut node_types_json = BTreeMap::new();
477
478    let child_type_to_node_type = |child_type: &ChildType| match child_type {
479        ChildType::Aliased(alias) => NodeTypeJSON {
480            kind: alias.value.clone(),
481            named: alias.is_named,
482        },
483        ChildType::Normal(symbol) => {
484            if let Some(alias) = default_aliases.get(symbol) {
485                NodeTypeJSON {
486                    kind: alias.value.clone(),
487                    named: alias.is_named,
488                }
489            } else {
490                match symbol.kind {
491                    SymbolType::NonTerminal => {
492                        let variable = &syntax_grammar.variables[symbol.index];
493                        NodeTypeJSON {
494                            kind: variable.name.clone(),
495                            named: variable.kind != VariableType::Anonymous,
496                        }
497                    }
498                    SymbolType::Terminal => {
499                        let variable = &lexical_grammar.variables[symbol.index];
500                        NodeTypeJSON {
501                            kind: variable.name.clone(),
502                            named: variable.kind != VariableType::Anonymous,
503                        }
504                    }
505                    SymbolType::External => {
506                        let variable = &syntax_grammar.external_tokens[symbol.index];
507                        NodeTypeJSON {
508                            kind: variable.name.clone(),
509                            named: variable.kind != VariableType::Anonymous,
510                        }
511                    }
512                    _ => panic!("Unexpected symbol type"),
513                }
514            }
515        }
516    };
517
518    let populate_field_info_json = |json: &mut FieldInfoJSON, info: &FieldInfo| {
519        if info.types.is_empty() {
520            json.required = false;
521        } else {
522            json.multiple |= info.quantity.multiple;
523            json.required &= info.quantity.required;
524            json.types
525                .extend(info.types.iter().map(child_type_to_node_type));
526            json.types.sort_unstable();
527            json.types.dedup();
528        }
529    };
530
531    let aliases_by_symbol = get_aliases_by_symbol(syntax_grammar, default_aliases);
532
533    let empty = BTreeSet::new();
534    let extra_names = syntax_grammar
535        .extra_symbols
536        .iter()
537        .flat_map(|symbol| {
538            aliases_by_symbol
539                .get(symbol)
540                .unwrap_or(&empty)
541                .iter()
542                .map(|alias| {
543                    alias.as_ref().map_or(
544                        match symbol.kind {
545                            SymbolType::NonTerminal => &syntax_grammar.variables[symbol.index].name,
546                            SymbolType::Terminal => &lexical_grammar.variables[symbol.index].name,
547                            SymbolType::External => {
548                                &syntax_grammar.external_tokens[symbol.index].name
549                            }
550                            _ => unreachable!(),
551                        },
552                        |alias| &alias.value,
553                    )
554                })
555        })
556        .collect::<HashSet<_>>();
557
558    let mut subtype_map = Vec::new();
559    for (i, info) in variable_info.iter().enumerate() {
560        let symbol = Symbol::non_terminal(i);
561        let variable = &syntax_grammar.variables[i];
562        if syntax_grammar.supertype_symbols.contains(&symbol) {
563            let node_type_json =
564                node_types_json
565                    .entry(variable.name.clone())
566                    .or_insert_with(|| NodeInfoJSON {
567                        kind: variable.name.clone(),
568                        named: true,
569                        root: false,
570                        extra: extra_names.contains(&variable.name),
571                        fields: None,
572                        children: None,
573                        subtypes: None,
574                    });
575            let mut subtypes = info
576                .children
577                .types
578                .iter()
579                .map(child_type_to_node_type)
580                .collect::<Vec<_>>();
581            subtypes.sort_unstable();
582            subtypes.dedup();
583            let supertype = NodeTypeJSON {
584                kind: node_type_json.kind.clone(),
585                named: true,
586            };
587            subtype_map.push((supertype, subtypes.clone()));
588            node_type_json.subtypes = Some(subtypes);
589        } else if !syntax_grammar.variables_to_inline.contains(&symbol) {
590            // If a rule is aliased under multiple names, then its information
591            // contributes to multiple entries in the final JSON.
592            for alias in aliases_by_symbol.get(&symbol).unwrap_or(&BTreeSet::new()) {
593                let kind;
594                let is_named;
595                if let Some(alias) = alias {
596                    kind = &alias.value;
597                    is_named = alias.is_named;
598                } else if variable.kind.is_visible() {
599                    kind = &variable.name;
600                    is_named = variable.kind == VariableType::Named;
601                } else {
602                    continue;
603                }
604
605                // There may already be an entry with this name, because multiple
606                // rules may be aliased with the same name.
607                let mut node_type_existed = true;
608                let node_type_json = node_types_json.entry(kind.clone()).or_insert_with(|| {
609                    node_type_existed = false;
610                    NodeInfoJSON {
611                        kind: kind.clone(),
612                        named: is_named,
613                        root: i == 0,
614                        extra: extra_names.contains(&kind),
615                        fields: Some(BTreeMap::new()),
616                        children: None,
617                        subtypes: None,
618                    }
619                });
620
621                let fields_json = node_type_json.fields.as_mut().unwrap();
622                for (new_field, field_info) in &info.fields {
623                    let field_json = fields_json.entry(new_field.clone()).or_insert_with(|| {
624                        // If another rule is aliased with the same name, and does *not* have this
625                        // field, then this field cannot be required.
626                        let mut field_json = FieldInfoJSON::default();
627                        if node_type_existed {
628                            field_json.required = false;
629                        }
630                        field_json
631                    });
632                    populate_field_info_json(field_json, field_info);
633                }
634
635                // If another rule is aliased with the same name, any fields that aren't present in
636                // this cannot be required.
637                for (existing_field, field_json) in fields_json.iter_mut() {
638                    if !info.fields.contains_key(existing_field) {
639                        field_json.required = false;
640                    }
641                }
642
643                populate_field_info_json(
644                    node_type_json
645                        .children
646                        .get_or_insert(FieldInfoJSON::default()),
647                    &info.children_without_fields,
648                );
649            }
650        }
651    }
652
653    // Sort the subtype map topologically so that subtypes are listed before their supertypes.
654    let mut sorted_kinds = Vec::with_capacity(subtype_map.len());
655    let mut top_sort = topological_sort::TopologicalSort::<String>::new();
656    for (supertype, subtypes) in &subtype_map {
657        for subtype in subtypes {
658            top_sort.add_dependency(subtype.kind.clone(), supertype.kind.clone());
659        }
660    }
661    loop {
662        let mut next_kinds = top_sort.pop_all();
663        match (next_kinds.is_empty(), top_sort.is_empty()) {
664            (true, true) => break,
665            (true, false) => {
666                let mut items = top_sort.collect::<Vec<String>>();
667                items.sort();
668                return Err(SuperTypeCycleError { items });
669            }
670            (false, _) => {
671                next_kinds.sort();
672                sorted_kinds.extend(next_kinds);
673            }
674        }
675    }
676    subtype_map.sort_by(|a, b| {
677        let a_idx = sorted_kinds.iter().position(|n| n.eq(&a.0.kind)).unwrap();
678        let b_idx = sorted_kinds.iter().position(|n| n.eq(&b.0.kind)).unwrap();
679        a_idx.cmp(&b_idx)
680    });
681
682    for node_type_json in node_types_json.values_mut() {
683        if node_type_json
684            .children
685            .as_ref()
686            .is_some_and(|c| c.types.is_empty())
687        {
688            node_type_json.children = None;
689        }
690
691        if let Some(children) = &mut node_type_json.children {
692            process_supertypes(children, &subtype_map);
693        }
694        if let Some(fields) = &mut node_type_json.fields {
695            for field_info in fields.values_mut() {
696                process_supertypes(field_info, &subtype_map);
697            }
698        }
699    }
700
701    let mut anonymous_node_types = Vec::new();
702
703    let regular_tokens = lexical_grammar
704        .variables
705        .iter()
706        .enumerate()
707        .flat_map(|(i, variable)| {
708            aliases_by_symbol
709                .get(&Symbol::terminal(i))
710                .unwrap_or(&empty)
711                .iter()
712                .map(move |alias| {
713                    alias
714                        .as_ref()
715                        .map_or((&variable.name, variable.kind), |alias| {
716                            (&alias.value, alias.kind())
717                        })
718                })
719        });
720    let external_tokens =
721        syntax_grammar
722            .external_tokens
723            .iter()
724            .enumerate()
725            .flat_map(|(i, token)| {
726                aliases_by_symbol
727                    .get(&Symbol::external(i))
728                    .unwrap_or(&empty)
729                    .iter()
730                    .map(move |alias| {
731                        alias.as_ref().map_or((&token.name, token.kind), |alias| {
732                            (&alias.value, alias.kind())
733                        })
734                    })
735            });
736
737    for (name, kind) in regular_tokens.chain(external_tokens) {
738        match kind {
739            VariableType::Named => {
740                let node_type_json =
741                    node_types_json
742                        .entry(name.clone())
743                        .or_insert_with(|| NodeInfoJSON {
744                            kind: name.clone(),
745                            named: true,
746                            root: false,
747                            extra: extra_names.contains(&name),
748                            fields: None,
749                            children: None,
750                            subtypes: None,
751                        });
752                if let Some(children) = &mut node_type_json.children {
753                    children.required = false;
754                }
755                if let Some(fields) = &mut node_type_json.fields {
756                    for field in fields.values_mut() {
757                        field.required = false;
758                    }
759                }
760            }
761            VariableType::Anonymous => anonymous_node_types.push(NodeInfoJSON {
762                kind: name.clone(),
763                named: false,
764                root: false,
765                extra: extra_names.contains(&name),
766                fields: None,
767                children: None,
768                subtypes: None,
769            }),
770            _ => {}
771        }
772    }
773
774    let mut result = node_types_json.into_iter().map(|e| e.1).collect::<Vec<_>>();
775    result.extend(anonymous_node_types);
776    result.sort_unstable_by(|a, b| {
777        b.subtypes
778            .is_some()
779            .cmp(&a.subtypes.is_some())
780            .then_with(|| {
781                let a_is_leaf = a.children.is_none() && a.fields.is_none();
782                let b_is_leaf = b.children.is_none() && b.fields.is_none();
783                a_is_leaf.cmp(&b_is_leaf)
784            })
785            .then_with(|| a.kind.cmp(&b.kind))
786            .then_with(|| a.named.cmp(&b.named))
787            .then_with(|| a.root.cmp(&b.root))
788            .then_with(|| a.extra.cmp(&b.extra))
789    });
790    result.dedup();
791    Ok(result)
792}
793
794#[cfg(feature = "load")]
795fn process_supertypes(info: &mut FieldInfoJSON, subtype_map: &[(NodeTypeJSON, Vec<NodeTypeJSON>)]) {
796    for (supertype, subtypes) in subtype_map {
797        if info.types.contains(supertype) {
798            info.types.retain(|t| !subtypes.contains(t));
799        }
800    }
801}
802
803fn variable_type_for_child_type(
804    child_type: &ChildType,
805    syntax_grammar: &SyntaxGrammar,
806    lexical_grammar: &LexicalGrammar,
807) -> VariableType {
808    match child_type {
809        ChildType::Aliased(alias) => alias.kind(),
810        ChildType::Normal(symbol) => {
811            if syntax_grammar.supertype_symbols.contains(symbol) {
812                VariableType::Named
813            } else if syntax_grammar.variables_to_inline.contains(symbol) {
814                VariableType::Hidden
815            } else {
816                match symbol.kind {
817                    SymbolType::NonTerminal => syntax_grammar.variables[symbol.index].kind,
818                    SymbolType::Terminal => lexical_grammar.variables[symbol.index].kind,
819                    SymbolType::External => syntax_grammar.external_tokens[symbol.index].kind,
820                    _ => VariableType::Hidden,
821                }
822            }
823        }
824    }
825}
826
827fn extend_sorted<'a, T>(vec: &mut Vec<T>, values: impl IntoIterator<Item = &'a T>) -> bool
828where
829    T: 'a + Clone + Eq + Ord,
830{
831    values.into_iter().fold(false, |acc, value| {
832        if let Err(i) = vec.binary_search(value) {
833            vec.insert(i, value.clone());
834            true
835        } else {
836            acc
837        }
838    })
839}
840
841#[cfg(all(test, feature = "load"))]
842mod tests {
843    use super::*;
844    use crate::{
845        grammars::{
846            InputGrammar, LexicalVariable, Production, ProductionStep, SyntaxVariable, Variable,
847        },
848        prepare_grammar::prepare_grammar,
849        rules::Rule,
850    };
851
852    #[test]
853    fn test_node_types_simple() {
854        let node_types = get_node_types(&InputGrammar {
855            variables: vec![
856                Variable {
857                    name: "v1".to_string(),
858                    kind: VariableType::Named,
859                    rule: Rule::seq(vec![
860                        Rule::field("f1".to_string(), Rule::named("v2")),
861                        Rule::field("f2".to_string(), Rule::string(";")),
862                    ]),
863                },
864                Variable {
865                    name: "v2".to_string(),
866                    kind: VariableType::Named,
867                    rule: Rule::string("x"),
868                },
869                // This rule is not reachable from the start symbol
870                // so it won't be present in the node_types
871                Variable {
872                    name: "v3".to_string(),
873                    kind: VariableType::Named,
874                    rule: Rule::string("y"),
875                },
876            ],
877            ..Default::default()
878        })
879        .unwrap();
880
881        assert_eq!(node_types.len(), 3);
882
883        assert_eq!(
884            node_types[0],
885            NodeInfoJSON {
886                kind: "v1".to_string(),
887                named: true,
888                root: true,
889                extra: false,
890                subtypes: None,
891                children: None,
892                fields: Some(
893                    vec![
894                        (
895                            "f1".to_string(),
896                            FieldInfoJSON {
897                                multiple: false,
898                                required: true,
899                                types: vec![NodeTypeJSON {
900                                    kind: "v2".to_string(),
901                                    named: true,
902                                }]
903                            }
904                        ),
905                        (
906                            "f2".to_string(),
907                            FieldInfoJSON {
908                                multiple: false,
909                                required: true,
910                                types: vec![NodeTypeJSON {
911                                    kind: ";".to_string(),
912                                    named: false,
913                                }]
914                            }
915                        ),
916                    ]
917                    .into_iter()
918                    .collect()
919                )
920            }
921        );
922        assert_eq!(
923            node_types[1],
924            NodeInfoJSON {
925                kind: ";".to_string(),
926                named: false,
927                root: false,
928                extra: false,
929                subtypes: None,
930                children: None,
931                fields: None
932            }
933        );
934        assert_eq!(
935            node_types[2],
936            NodeInfoJSON {
937                kind: "v2".to_string(),
938                named: true,
939                root: false,
940                extra: false,
941                subtypes: None,
942                children: None,
943                fields: None
944            }
945        );
946    }
947
948    #[test]
949    fn test_node_types_simple_extras() {
950        let node_types = get_node_types(&InputGrammar {
951            extra_symbols: vec![Rule::named("v3")],
952            variables: vec![
953                Variable {
954                    name: "v1".to_string(),
955                    kind: VariableType::Named,
956                    rule: Rule::seq(vec![
957                        Rule::field("f1".to_string(), Rule::named("v2")),
958                        Rule::field("f2".to_string(), Rule::string(";")),
959                    ]),
960                },
961                Variable {
962                    name: "v2".to_string(),
963                    kind: VariableType::Named,
964                    rule: Rule::string("x"),
965                },
966                // This rule is not reachable from the start symbol, but
967                // it is reachable from the 'extra_symbols' so it
968                // should be present in the node_types.
969                // But because it's only a literal, it will get replaced by
970                // a lexical variable.
971                Variable {
972                    name: "v3".to_string(),
973                    kind: VariableType::Named,
974                    rule: Rule::string("y"),
975                },
976            ],
977            ..Default::default()
978        })
979        .unwrap();
980
981        assert_eq!(node_types.len(), 4);
982
983        assert_eq!(
984            node_types[0],
985            NodeInfoJSON {
986                kind: "v1".to_string(),
987                named: true,
988                root: true,
989                extra: false,
990                subtypes: None,
991                children: None,
992                fields: Some(
993                    vec![
994                        (
995                            "f1".to_string(),
996                            FieldInfoJSON {
997                                multiple: false,
998                                required: true,
999                                types: vec![NodeTypeJSON {
1000                                    kind: "v2".to_string(),
1001                                    named: true,
1002                                }]
1003                            }
1004                        ),
1005                        (
1006                            "f2".to_string(),
1007                            FieldInfoJSON {
1008                                multiple: false,
1009                                required: true,
1010                                types: vec![NodeTypeJSON {
1011                                    kind: ";".to_string(),
1012                                    named: false,
1013                                }]
1014                            }
1015                        ),
1016                    ]
1017                    .into_iter()
1018                    .collect()
1019                )
1020            }
1021        );
1022        assert_eq!(
1023            node_types[1],
1024            NodeInfoJSON {
1025                kind: ";".to_string(),
1026                named: false,
1027                root: false,
1028                extra: false,
1029                subtypes: None,
1030                children: None,
1031                fields: None
1032            }
1033        );
1034        assert_eq!(
1035            node_types[2],
1036            NodeInfoJSON {
1037                kind: "v2".to_string(),
1038                named: true,
1039                root: false,
1040                extra: false,
1041                subtypes: None,
1042                children: None,
1043                fields: None
1044            }
1045        );
1046        assert_eq!(
1047            node_types[3],
1048            NodeInfoJSON {
1049                kind: "v3".to_string(),
1050                named: true,
1051                root: false,
1052                extra: true,
1053                subtypes: None,
1054                children: None,
1055                fields: None
1056            }
1057        );
1058    }
1059
1060    #[test]
1061    fn test_node_types_deeper_extras() {
1062        let node_types = get_node_types(&InputGrammar {
1063            extra_symbols: vec![Rule::named("v3")],
1064            variables: vec![
1065                Variable {
1066                    name: "v1".to_string(),
1067                    kind: VariableType::Named,
1068                    rule: Rule::seq(vec![
1069                        Rule::field("f1".to_string(), Rule::named("v2")),
1070                        Rule::field("f2".to_string(), Rule::string(";")),
1071                    ]),
1072                },
1073                Variable {
1074                    name: "v2".to_string(),
1075                    kind: VariableType::Named,
1076                    rule: Rule::string("x"),
1077                },
1078                // This rule is not reachable from the start symbol, but
1079                // it is reachable from the 'extra_symbols' so it
1080                // should be present in the node_types.
1081                // Because it is not just a literal, it won't get replaced
1082                // by a lexical variable.
1083                Variable {
1084                    name: "v3".to_string(),
1085                    kind: VariableType::Named,
1086                    rule: Rule::seq(vec![Rule::string("y"), Rule::repeat(Rule::string("z"))]),
1087                },
1088            ],
1089            ..Default::default()
1090        })
1091        .unwrap();
1092
1093        assert_eq!(node_types.len(), 6);
1094
1095        assert_eq!(
1096            node_types[0],
1097            NodeInfoJSON {
1098                kind: "v1".to_string(),
1099                named: true,
1100                root: true,
1101                extra: false,
1102                subtypes: None,
1103                children: None,
1104                fields: Some(
1105                    vec![
1106                        (
1107                            "f1".to_string(),
1108                            FieldInfoJSON {
1109                                multiple: false,
1110                                required: true,
1111                                types: vec![NodeTypeJSON {
1112                                    kind: "v2".to_string(),
1113                                    named: true,
1114                                }]
1115                            }
1116                        ),
1117                        (
1118                            "f2".to_string(),
1119                            FieldInfoJSON {
1120                                multiple: false,
1121                                required: true,
1122                                types: vec![NodeTypeJSON {
1123                                    kind: ";".to_string(),
1124                                    named: false,
1125                                }]
1126                            }
1127                        ),
1128                    ]
1129                    .into_iter()
1130                    .collect()
1131                )
1132            }
1133        );
1134        assert_eq!(
1135            node_types[1],
1136            NodeInfoJSON {
1137                kind: "v3".to_string(),
1138                named: true,
1139                root: false,
1140                extra: true,
1141                subtypes: None,
1142                children: None,
1143                fields: Some(BTreeMap::default())
1144            }
1145        );
1146        assert_eq!(
1147            node_types[2],
1148            NodeInfoJSON {
1149                kind: ";".to_string(),
1150                named: false,
1151                root: false,
1152                extra: false,
1153                subtypes: None,
1154                children: None,
1155                fields: None
1156            }
1157        );
1158        assert_eq!(
1159            node_types[3],
1160            NodeInfoJSON {
1161                kind: "v2".to_string(),
1162                named: true,
1163                root: false,
1164                extra: false,
1165                subtypes: None,
1166                children: None,
1167                fields: None
1168            }
1169        );
1170    }
1171
1172    #[test]
1173    fn test_node_types_with_supertypes() {
1174        let node_types = get_node_types(&InputGrammar {
1175            supertype_symbols: vec!["_v2".to_string()],
1176            variables: vec![
1177                Variable {
1178                    name: "v1".to_string(),
1179                    kind: VariableType::Named,
1180                    rule: Rule::field("f1".to_string(), Rule::named("_v2")),
1181                },
1182                Variable {
1183                    name: "_v2".to_string(),
1184                    kind: VariableType::Hidden,
1185                    rule: Rule::choice(vec![
1186                        Rule::named("v3"),
1187                        Rule::named("v4"),
1188                        Rule::string("*"),
1189                    ]),
1190                },
1191                Variable {
1192                    name: "v3".to_string(),
1193                    kind: VariableType::Named,
1194                    rule: Rule::string("x"),
1195                },
1196                Variable {
1197                    name: "v4".to_string(),
1198                    kind: VariableType::Named,
1199                    rule: Rule::string("y"),
1200                },
1201            ],
1202            ..Default::default()
1203        })
1204        .unwrap();
1205
1206        assert_eq!(
1207            node_types[0],
1208            NodeInfoJSON {
1209                kind: "_v2".to_string(),
1210                named: true,
1211                root: false,
1212                extra: false,
1213                fields: None,
1214                children: None,
1215                subtypes: Some(vec![
1216                    NodeTypeJSON {
1217                        kind: "*".to_string(),
1218                        named: false,
1219                    },
1220                    NodeTypeJSON {
1221                        kind: "v3".to_string(),
1222                        named: true,
1223                    },
1224                    NodeTypeJSON {
1225                        kind: "v4".to_string(),
1226                        named: true,
1227                    },
1228                ]),
1229            }
1230        );
1231        assert_eq!(
1232            node_types[1],
1233            NodeInfoJSON {
1234                kind: "v1".to_string(),
1235                named: true,
1236                root: true,
1237                extra: false,
1238                subtypes: None,
1239                children: None,
1240                fields: Some(
1241                    vec![(
1242                        "f1".to_string(),
1243                        FieldInfoJSON {
1244                            multiple: false,
1245                            required: true,
1246                            types: vec![NodeTypeJSON {
1247                                kind: "_v2".to_string(),
1248                                named: true,
1249                            }]
1250                        }
1251                    ),]
1252                    .into_iter()
1253                    .collect()
1254                )
1255            }
1256        );
1257    }
1258
1259    #[test]
1260    fn test_node_types_for_children_without_fields() {
1261        let node_types = get_node_types(&InputGrammar {
1262            variables: vec![
1263                Variable {
1264                    name: "v1".to_string(),
1265                    kind: VariableType::Named,
1266                    rule: Rule::seq(vec![
1267                        Rule::named("v2"),
1268                        Rule::field("f1".to_string(), Rule::named("v3")),
1269                        Rule::named("v4"),
1270                    ]),
1271                },
1272                Variable {
1273                    name: "v2".to_string(),
1274                    kind: VariableType::Named,
1275                    rule: Rule::seq(vec![
1276                        Rule::string("{"),
1277                        Rule::choice(vec![Rule::named("v3"), Rule::Blank]),
1278                        Rule::string("}"),
1279                    ]),
1280                },
1281                Variable {
1282                    name: "v3".to_string(),
1283                    kind: VariableType::Named,
1284                    rule: Rule::string("x"),
1285                },
1286                Variable {
1287                    name: "v4".to_string(),
1288                    kind: VariableType::Named,
1289                    rule: Rule::string("y"),
1290                },
1291            ],
1292            ..Default::default()
1293        })
1294        .unwrap();
1295
1296        assert_eq!(
1297            node_types[0],
1298            NodeInfoJSON {
1299                kind: "v1".to_string(),
1300                named: true,
1301                root: true,
1302                extra: false,
1303                subtypes: None,
1304                children: Some(FieldInfoJSON {
1305                    multiple: true,
1306                    required: true,
1307                    types: vec![
1308                        NodeTypeJSON {
1309                            kind: "v2".to_string(),
1310                            named: true,
1311                        },
1312                        NodeTypeJSON {
1313                            kind: "v4".to_string(),
1314                            named: true,
1315                        },
1316                    ]
1317                }),
1318                fields: Some(
1319                    vec![(
1320                        "f1".to_string(),
1321                        FieldInfoJSON {
1322                            multiple: false,
1323                            required: true,
1324                            types: vec![NodeTypeJSON {
1325                                kind: "v3".to_string(),
1326                                named: true,
1327                            }]
1328                        }
1329                    ),]
1330                    .into_iter()
1331                    .collect()
1332                )
1333            }
1334        );
1335        assert_eq!(
1336            node_types[1],
1337            NodeInfoJSON {
1338                kind: "v2".to_string(),
1339                named: true,
1340                root: false,
1341                extra: false,
1342                subtypes: None,
1343                children: Some(FieldInfoJSON {
1344                    multiple: false,
1345                    required: false,
1346                    types: vec![NodeTypeJSON {
1347                        kind: "v3".to_string(),
1348                        named: true,
1349                    },]
1350                }),
1351                fields: Some(BTreeMap::new()),
1352            }
1353        );
1354    }
1355
1356    #[test]
1357    fn test_node_types_with_inlined_rules() {
1358        let node_types = get_node_types(&InputGrammar {
1359            variables_to_inline: vec!["v2".to_string()],
1360            variables: vec![
1361                Variable {
1362                    name: "v1".to_string(),
1363                    kind: VariableType::Named,
1364                    rule: Rule::seq(vec![Rule::named("v2"), Rule::named("v3")]),
1365                },
1366                // v2 should not appear in the node types, since it is inlined
1367                Variable {
1368                    name: "v2".to_string(),
1369                    kind: VariableType::Named,
1370                    rule: Rule::alias(Rule::string("a"), "x".to_string(), true),
1371                },
1372                Variable {
1373                    name: "v3".to_string(),
1374                    kind: VariableType::Named,
1375                    rule: Rule::string("b"),
1376                },
1377            ],
1378            ..Default::default()
1379        })
1380        .unwrap();
1381
1382        assert_eq!(
1383            node_types[0],
1384            NodeInfoJSON {
1385                kind: "v1".to_string(),
1386                named: true,
1387                root: true,
1388                extra: false,
1389                subtypes: None,
1390                children: Some(FieldInfoJSON {
1391                    multiple: true,
1392                    required: true,
1393                    types: vec![
1394                        NodeTypeJSON {
1395                            kind: "v3".to_string(),
1396                            named: true,
1397                        },
1398                        NodeTypeJSON {
1399                            kind: "x".to_string(),
1400                            named: true,
1401                        },
1402                    ]
1403                }),
1404                fields: Some(BTreeMap::new()),
1405            }
1406        );
1407    }
1408
1409    #[test]
1410    fn test_node_types_for_aliased_nodes() {
1411        let node_types = get_node_types(&InputGrammar {
1412            variables: vec![
1413                Variable {
1414                    name: "thing".to_string(),
1415                    kind: VariableType::Named,
1416                    rule: Rule::choice(vec![Rule::named("type"), Rule::named("expression")]),
1417                },
1418                Variable {
1419                    name: "type".to_string(),
1420                    kind: VariableType::Named,
1421                    rule: Rule::choice(vec![
1422                        Rule::alias(
1423                            Rule::named("identifier"),
1424                            "type_identifier".to_string(),
1425                            true,
1426                        ),
1427                        Rule::string("void"),
1428                    ]),
1429                },
1430                Variable {
1431                    name: "expression".to_string(),
1432                    kind: VariableType::Named,
1433                    rule: Rule::choice(vec![
1434                        Rule::named("identifier"),
1435                        Rule::alias(
1436                            Rule::named("foo_identifier"),
1437                            "identifier".to_string(),
1438                            true,
1439                        ),
1440                    ]),
1441                },
1442                Variable {
1443                    name: "identifier".to_string(),
1444                    kind: VariableType::Named,
1445                    rule: Rule::pattern("\\w+", ""),
1446                },
1447                Variable {
1448                    name: "foo_identifier".to_string(),
1449                    kind: VariableType::Named,
1450                    rule: Rule::pattern("[\\w-]+", ""),
1451                },
1452            ],
1453            ..Default::default()
1454        })
1455        .unwrap();
1456
1457        assert_eq!(node_types.iter().find(|t| t.kind == "foo_identifier"), None);
1458        assert_eq!(
1459            node_types.iter().find(|t| t.kind == "identifier"),
1460            Some(&NodeInfoJSON {
1461                kind: "identifier".to_string(),
1462                named: true,
1463                root: false,
1464                extra: false,
1465                subtypes: None,
1466                children: None,
1467                fields: None,
1468            })
1469        );
1470        assert_eq!(
1471            node_types.iter().find(|t| t.kind == "type_identifier"),
1472            Some(&NodeInfoJSON {
1473                kind: "type_identifier".to_string(),
1474                named: true,
1475                root: false,
1476                extra: false,
1477                subtypes: None,
1478                children: None,
1479                fields: None,
1480            })
1481        );
1482    }
1483
1484    #[test]
1485    fn test_node_types_with_multiple_valued_fields() {
1486        let node_types = get_node_types(&InputGrammar {
1487            variables: vec![
1488                Variable {
1489                    name: "a".to_string(),
1490                    kind: VariableType::Named,
1491                    rule: Rule::seq(vec![
1492                        Rule::choice(vec![
1493                            Rule::Blank,
1494                            Rule::repeat(Rule::field("f1".to_string(), Rule::named("b"))),
1495                        ]),
1496                        Rule::repeat(Rule::named("c")),
1497                    ]),
1498                },
1499                Variable {
1500                    name: "b".to_string(),
1501                    kind: VariableType::Named,
1502                    rule: Rule::string("b"),
1503                },
1504                Variable {
1505                    name: "c".to_string(),
1506                    kind: VariableType::Named,
1507                    rule: Rule::string("c"),
1508                },
1509            ],
1510            ..Default::default()
1511        })
1512        .unwrap();
1513
1514        assert_eq!(
1515            node_types[0],
1516            NodeInfoJSON {
1517                kind: "a".to_string(),
1518                named: true,
1519                root: true,
1520                extra: false,
1521                subtypes: None,
1522                children: Some(FieldInfoJSON {
1523                    multiple: true,
1524                    required: true,
1525                    types: vec![NodeTypeJSON {
1526                        kind: "c".to_string(),
1527                        named: true,
1528                    },]
1529                }),
1530                fields: Some(
1531                    vec![(
1532                        "f1".to_string(),
1533                        FieldInfoJSON {
1534                            multiple: true,
1535                            required: false,
1536                            types: vec![NodeTypeJSON {
1537                                kind: "b".to_string(),
1538                                named: true,
1539                            }]
1540                        }
1541                    )]
1542                    .into_iter()
1543                    .collect()
1544                ),
1545            }
1546        );
1547    }
1548
1549    #[test]
1550    fn test_node_types_with_fields_on_hidden_tokens() {
1551        let node_types = get_node_types(&InputGrammar {
1552            variables: vec![Variable {
1553                name: "script".to_string(),
1554                kind: VariableType::Named,
1555                rule: Rule::seq(vec![
1556                    Rule::field("a".to_string(), Rule::pattern("hi", "")),
1557                    Rule::field("b".to_string(), Rule::pattern("bye", "")),
1558                ]),
1559            }],
1560            ..Default::default()
1561        })
1562        .unwrap();
1563
1564        assert_eq!(
1565            node_types,
1566            [NodeInfoJSON {
1567                kind: "script".to_string(),
1568                named: true,
1569                root: true,
1570                extra: false,
1571                fields: Some(BTreeMap::new()),
1572                children: None,
1573                subtypes: None
1574            }]
1575        );
1576    }
1577
1578    #[test]
1579    fn test_node_types_with_multiple_rules_same_alias_name() {
1580        let node_types = get_node_types(&InputGrammar {
1581            variables: vec![
1582                Variable {
1583                    name: "script".to_string(),
1584                    kind: VariableType::Named,
1585                    rule: Rule::choice(vec![
1586                        Rule::named("a"),
1587                        // Rule `b` is aliased as rule `a`
1588                        Rule::alias(Rule::named("b"), "a".to_string(), true),
1589                    ]),
1590                },
1591                Variable {
1592                    name: "a".to_string(),
1593                    kind: VariableType::Named,
1594                    rule: Rule::seq(vec![
1595                        Rule::field("f1".to_string(), Rule::string("1")),
1596                        Rule::field("f2".to_string(), Rule::string("2")),
1597                    ]),
1598                },
1599                Variable {
1600                    name: "b".to_string(),
1601                    kind: VariableType::Named,
1602                    rule: Rule::seq(vec![
1603                        Rule::field("f2".to_string(), Rule::string("22")),
1604                        Rule::field("f2".to_string(), Rule::string("222")),
1605                        Rule::field("f3".to_string(), Rule::string("3")),
1606                    ]),
1607                },
1608            ],
1609            ..Default::default()
1610        })
1611        .unwrap();
1612
1613        assert_eq!(
1614            &node_types
1615                .iter()
1616                .map(|t| t.kind.as_str())
1617                .collect::<Vec<_>>(),
1618            &["a", "script", "1", "2", "22", "222", "3"]
1619        );
1620
1621        assert_eq!(
1622            &node_types[0..2],
1623            &[
1624                // A combination of the types for `a` and `b`.
1625                NodeInfoJSON {
1626                    kind: "a".to_string(),
1627                    named: true,
1628                    root: false,
1629                    extra: false,
1630                    subtypes: None,
1631                    children: None,
1632                    fields: Some(
1633                        vec![
1634                            (
1635                                "f1".to_string(),
1636                                FieldInfoJSON {
1637                                    multiple: false,
1638                                    required: false,
1639                                    types: vec![NodeTypeJSON {
1640                                        kind: "1".to_string(),
1641                                        named: false,
1642                                    }]
1643                                }
1644                            ),
1645                            (
1646                                "f2".to_string(),
1647                                FieldInfoJSON {
1648                                    multiple: true,
1649                                    required: true,
1650                                    types: vec![
1651                                        NodeTypeJSON {
1652                                            kind: "2".to_string(),
1653                                            named: false,
1654                                        },
1655                                        NodeTypeJSON {
1656                                            kind: "22".to_string(),
1657                                            named: false,
1658                                        },
1659                                        NodeTypeJSON {
1660                                            kind: "222".to_string(),
1661                                            named: false,
1662                                        }
1663                                    ]
1664                                },
1665                            ),
1666                            (
1667                                "f3".to_string(),
1668                                FieldInfoJSON {
1669                                    multiple: false,
1670                                    required: false,
1671                                    types: vec![NodeTypeJSON {
1672                                        kind: "3".to_string(),
1673                                        named: false,
1674                                    }]
1675                                }
1676                            ),
1677                        ]
1678                        .into_iter()
1679                        .collect()
1680                    ),
1681                },
1682                NodeInfoJSON {
1683                    kind: "script".to_string(),
1684                    named: true,
1685                    root: true,
1686                    extra: false,
1687                    subtypes: None,
1688                    // Only one node
1689                    children: Some(FieldInfoJSON {
1690                        multiple: false,
1691                        required: true,
1692                        types: vec![NodeTypeJSON {
1693                            kind: "a".to_string(),
1694                            named: true,
1695                        }]
1696                    }),
1697                    fields: Some(BTreeMap::new()),
1698                }
1699            ]
1700        );
1701    }
1702
1703    #[test]
1704    fn test_node_types_with_tokens_aliased_to_match_rules() {
1705        let node_types = get_node_types(&InputGrammar {
1706            variables: vec![
1707                Variable {
1708                    name: "a".to_string(),
1709                    kind: VariableType::Named,
1710                    rule: Rule::seq(vec![Rule::named("b"), Rule::named("c")]),
1711                },
1712                // Ordinarily, `b` nodes have two named `c` children.
1713                Variable {
1714                    name: "b".to_string(),
1715                    kind: VariableType::Named,
1716                    rule: Rule::seq(vec![Rule::named("c"), Rule::string("B"), Rule::named("c")]),
1717                },
1718                Variable {
1719                    name: "c".to_string(),
1720                    kind: VariableType::Named,
1721                    rule: Rule::choice(vec![
1722                        Rule::string("C"),
1723                        // This token is aliased as a `b`, which will produce a `b` node
1724                        // with no children.
1725                        Rule::alias(Rule::string("D"), "b".to_string(), true),
1726                    ]),
1727                },
1728            ],
1729            ..Default::default()
1730        })
1731        .unwrap();
1732
1733        assert_eq!(
1734            node_types.iter().map(|n| &n.kind).collect::<Vec<_>>(),
1735            &["a", "b", "c", "B", "C"]
1736        );
1737        assert_eq!(
1738            node_types[1],
1739            NodeInfoJSON {
1740                kind: "b".to_string(),
1741                named: true,
1742                root: false,
1743                extra: false,
1744                subtypes: None,
1745                children: Some(FieldInfoJSON {
1746                    multiple: true,
1747                    required: false,
1748                    types: vec![NodeTypeJSON {
1749                        kind: "c".to_string(),
1750                        named: true,
1751                    }]
1752                }),
1753                fields: Some(BTreeMap::new()),
1754            }
1755        );
1756    }
1757
1758    #[test]
1759    fn test_get_variable_info() {
1760        let variable_info = get_variable_info(
1761            &build_syntax_grammar(
1762                vec![
1763                    // Required field `field1` has only one node type.
1764                    SyntaxVariable {
1765                        name: "rule0".to_string(),
1766                        kind: VariableType::Named,
1767                        productions: vec![Production {
1768                            dynamic_precedence: 0,
1769                            steps: vec![
1770                                ProductionStep::new(Symbol::terminal(0)),
1771                                ProductionStep::new(Symbol::non_terminal(1))
1772                                    .with_field_name("field1"),
1773                            ],
1774                        }],
1775                    },
1776                    // Hidden node
1777                    SyntaxVariable {
1778                        name: "_rule1".to_string(),
1779                        kind: VariableType::Hidden,
1780                        productions: vec![Production {
1781                            dynamic_precedence: 0,
1782                            steps: vec![ProductionStep::new(Symbol::terminal(1))],
1783                        }],
1784                    },
1785                    // Optional field `field2` can have two possible node types.
1786                    SyntaxVariable {
1787                        name: "rule2".to_string(),
1788                        kind: VariableType::Named,
1789                        productions: vec![
1790                            Production {
1791                                dynamic_precedence: 0,
1792                                steps: vec![ProductionStep::new(Symbol::terminal(0))],
1793                            },
1794                            Production {
1795                                dynamic_precedence: 0,
1796                                steps: vec![
1797                                    ProductionStep::new(Symbol::terminal(0)),
1798                                    ProductionStep::new(Symbol::terminal(2))
1799                                        .with_field_name("field2"),
1800                                ],
1801                            },
1802                            Production {
1803                                dynamic_precedence: 0,
1804                                steps: vec![
1805                                    ProductionStep::new(Symbol::terminal(0)),
1806                                    ProductionStep::new(Symbol::terminal(3))
1807                                        .with_field_name("field2"),
1808                                ],
1809                            },
1810                        ],
1811                    },
1812                ],
1813                vec![],
1814            ),
1815            &build_lexical_grammar(),
1816            &AliasMap::new(),
1817        )
1818        .unwrap();
1819
1820        assert_eq!(
1821            variable_info[0].fields,
1822            vec![(
1823                "field1".to_string(),
1824                FieldInfo {
1825                    quantity: ChildQuantity {
1826                        exists: true,
1827                        required: true,
1828                        multiple: false,
1829                    },
1830                    types: vec![ChildType::Normal(Symbol::terminal(1))],
1831                }
1832            )]
1833            .into_iter()
1834            .collect::<HashMap<_, _>>()
1835        );
1836
1837        assert_eq!(
1838            variable_info[2].fields,
1839            vec![(
1840                "field2".to_string(),
1841                FieldInfo {
1842                    quantity: ChildQuantity {
1843                        exists: true,
1844                        required: false,
1845                        multiple: false,
1846                    },
1847                    types: vec![
1848                        ChildType::Normal(Symbol::terminal(2)),
1849                        ChildType::Normal(Symbol::terminal(3)),
1850                    ],
1851                }
1852            )]
1853            .into_iter()
1854            .collect::<HashMap<_, _>>()
1855        );
1856    }
1857
1858    #[test]
1859    fn test_get_variable_info_with_repetitions_inside_fields() {
1860        let variable_info = get_variable_info(
1861            &build_syntax_grammar(
1862                vec![
1863                    // Field associated with a repetition.
1864                    SyntaxVariable {
1865                        name: "rule0".to_string(),
1866                        kind: VariableType::Named,
1867                        productions: vec![
1868                            Production {
1869                                dynamic_precedence: 0,
1870                                steps: vec![ProductionStep::new(Symbol::non_terminal(1))
1871                                    .with_field_name("field1")],
1872                            },
1873                            Production {
1874                                dynamic_precedence: 0,
1875                                steps: vec![],
1876                            },
1877                        ],
1878                    },
1879                    // Repetition node
1880                    SyntaxVariable {
1881                        name: "_rule0_repeat".to_string(),
1882                        kind: VariableType::Hidden,
1883                        productions: vec![
1884                            Production {
1885                                dynamic_precedence: 0,
1886                                steps: vec![ProductionStep::new(Symbol::terminal(1))],
1887                            },
1888                            Production {
1889                                dynamic_precedence: 0,
1890                                steps: vec![
1891                                    ProductionStep::new(Symbol::non_terminal(1)),
1892                                    ProductionStep::new(Symbol::non_terminal(1)),
1893                                ],
1894                            },
1895                        ],
1896                    },
1897                ],
1898                vec![],
1899            ),
1900            &build_lexical_grammar(),
1901            &AliasMap::new(),
1902        )
1903        .unwrap();
1904
1905        assert_eq!(
1906            variable_info[0].fields,
1907            vec![(
1908                "field1".to_string(),
1909                FieldInfo {
1910                    quantity: ChildQuantity {
1911                        exists: true,
1912                        required: false,
1913                        multiple: true,
1914                    },
1915                    types: vec![ChildType::Normal(Symbol::terminal(1))],
1916                }
1917            )]
1918            .into_iter()
1919            .collect::<HashMap<_, _>>()
1920        );
1921    }
1922
1923    #[test]
1924    fn test_get_variable_info_with_inherited_fields() {
1925        let variable_info = get_variable_info(
1926            &build_syntax_grammar(
1927                vec![
1928                    SyntaxVariable {
1929                        name: "rule0".to_string(),
1930                        kind: VariableType::Named,
1931                        productions: vec![
1932                            Production {
1933                                dynamic_precedence: 0,
1934                                steps: vec![
1935                                    ProductionStep::new(Symbol::terminal(0)),
1936                                    ProductionStep::new(Symbol::non_terminal(1)),
1937                                    ProductionStep::new(Symbol::terminal(1)),
1938                                ],
1939                            },
1940                            Production {
1941                                dynamic_precedence: 0,
1942                                steps: vec![ProductionStep::new(Symbol::non_terminal(1))],
1943                            },
1944                        ],
1945                    },
1946                    // Hidden node with fields
1947                    SyntaxVariable {
1948                        name: "_rule1".to_string(),
1949                        kind: VariableType::Hidden,
1950                        productions: vec![Production {
1951                            dynamic_precedence: 0,
1952                            steps: vec![
1953                                ProductionStep::new(Symbol::terminal(2)).with_alias(".", false),
1954                                ProductionStep::new(Symbol::terminal(3)).with_field_name("field1"),
1955                            ],
1956                        }],
1957                    },
1958                ],
1959                vec![],
1960            ),
1961            &build_lexical_grammar(),
1962            &AliasMap::new(),
1963        )
1964        .unwrap();
1965
1966        assert_eq!(
1967            variable_info[0].fields,
1968            vec![(
1969                "field1".to_string(),
1970                FieldInfo {
1971                    quantity: ChildQuantity {
1972                        exists: true,
1973                        required: true,
1974                        multiple: false,
1975                    },
1976                    types: vec![ChildType::Normal(Symbol::terminal(3))],
1977                }
1978            )]
1979            .into_iter()
1980            .collect::<HashMap<_, _>>()
1981        );
1982
1983        assert_eq!(
1984            variable_info[0].children_without_fields,
1985            FieldInfo {
1986                quantity: ChildQuantity {
1987                    exists: true,
1988                    required: false,
1989                    multiple: true,
1990                },
1991                types: vec![
1992                    ChildType::Normal(Symbol::terminal(0)),
1993                    ChildType::Normal(Symbol::terminal(1)),
1994                ],
1995            }
1996        );
1997    }
1998
1999    #[test]
2000    fn test_get_variable_info_with_supertypes() {
2001        let variable_info = get_variable_info(
2002            &build_syntax_grammar(
2003                vec![
2004                    SyntaxVariable {
2005                        name: "rule0".to_string(),
2006                        kind: VariableType::Named,
2007                        productions: vec![Production {
2008                            dynamic_precedence: 0,
2009                            steps: vec![
2010                                ProductionStep::new(Symbol::terminal(0)),
2011                                ProductionStep::new(Symbol::non_terminal(1))
2012                                    .with_field_name("field1"),
2013                                ProductionStep::new(Symbol::terminal(1)),
2014                            ],
2015                        }],
2016                    },
2017                    SyntaxVariable {
2018                        name: "_rule1".to_string(),
2019                        kind: VariableType::Hidden,
2020                        productions: vec![
2021                            Production {
2022                                dynamic_precedence: 0,
2023                                steps: vec![ProductionStep::new(Symbol::terminal(2))],
2024                            },
2025                            Production {
2026                                dynamic_precedence: 0,
2027                                steps: vec![ProductionStep::new(Symbol::terminal(3))],
2028                            },
2029                        ],
2030                    },
2031                ],
2032                // _rule1 is a supertype
2033                vec![Symbol::non_terminal(1)],
2034            ),
2035            &build_lexical_grammar(),
2036            &AliasMap::new(),
2037        )
2038        .unwrap();
2039
2040        assert_eq!(
2041            variable_info[0].fields,
2042            vec![(
2043                "field1".to_string(),
2044                FieldInfo {
2045                    quantity: ChildQuantity {
2046                        exists: true,
2047                        required: true,
2048                        multiple: false,
2049                    },
2050                    types: vec![ChildType::Normal(Symbol::non_terminal(1))],
2051                }
2052            )]
2053            .into_iter()
2054            .collect::<HashMap<_, _>>()
2055        );
2056    }
2057
2058    fn get_node_types(grammar: &InputGrammar) -> SuperTypeCycleResult<Vec<NodeInfoJSON>> {
2059        let (syntax_grammar, lexical_grammar, _, default_aliases) =
2060            prepare_grammar(grammar).unwrap();
2061        let variable_info =
2062            get_variable_info(&syntax_grammar, &lexical_grammar, &default_aliases).unwrap();
2063        generate_node_types_json(
2064            &syntax_grammar,
2065            &lexical_grammar,
2066            &default_aliases,
2067            &variable_info,
2068        )
2069    }
2070
2071    fn build_syntax_grammar(
2072        variables: Vec<SyntaxVariable>,
2073        supertype_symbols: Vec<Symbol>,
2074    ) -> SyntaxGrammar {
2075        SyntaxGrammar {
2076            variables,
2077            supertype_symbols,
2078            ..SyntaxGrammar::default()
2079        }
2080    }
2081
2082    fn build_lexical_grammar() -> LexicalGrammar {
2083        let mut lexical_grammar = LexicalGrammar::default();
2084        for i in 0..10 {
2085            lexical_grammar.variables.push(LexicalVariable {
2086                name: format!("token_{i}"),
2087                kind: VariableType::Named,
2088                implicit_precedence: 0,
2089                start_state: 0,
2090            });
2091        }
2092        lexical_grammar
2093    }
2094}