regen/core/
rule.rs

1//! Core logic for language rules
2
3use crate::grammar::{pt, Ctx, Token};
4use crate::merge_list_tail_optional_first;
5use crate::sdk::{Error, MergedListTail};
6use heck::ToUpperCamelCase;
7
8use super::hook::Hook;
9use super::param::Param;
10
11/// Definition for a rule (a.k.a. derivation)
12///
13/// A rule is a derivation step that can be used to generate the Abstract Syntax Tree (AST).
14///
15/// There are two types of derivations:
16/// - Union: a union of derivations. The first derivation that succeeds is used.
17///   For example: `rule Biz = Foo | Bar`, where Foo and Bar are also defined by the keyword `rule`
18///   The AST generator will try to derive `Foo` first, and if it fails, it will try to derive `Bar`.
19/// - Derivation: Derive into smaller rules. Each part is represented by a parameter analogous to a function parameter.
20///   For example: `rule Foo(bat: Bar, baz: Baz);`,
21///     where Bar and Baz are also defined by the keyword `rule`
22///   The AST generator will derive Bar and Baz in order, and then combine them into a Foo AST node.
23///
24/// With Union rules, only other rules can be specified as part of the union.
25/// With derivation rules, token types can also be specified.
26#[derive(Debug, Clone)]
27pub struct Rule {
28    /// Name of the rule. The AST and PT generated types depend on this name.
29    pub name: String,
30    /// Optional parser hook
31    pub hook: Option<Hook>,
32    /// Value of the rule
33    pub value: RuleValue,
34}
35
36impl Rule {
37    /// Get the base name of the struct for this rule,
38    /// which is the UpperCamelCase of the name
39    #[inline]
40    pub fn struct_name(&self) -> String {
41        self.name.to_upper_camel_case()
42    }
43}
44
45/// Parser hook for Rule
46pub fn parse_rule(pt: &mut pt::DefineRuleStatement, ctx: &mut Ctx) -> Option<()> {
47    // Take value which might be unresolved
48    let value = match pt.m_body.as_ref().val {
49        None => {
50            // Rule body is not resolved
51            // we will still pass the rule through to make sure rule names can be resolved
52            // otherwise there might be an unhealthy amount of errors
53            RuleValue::Union(vec![])
54        }
55        Some(_) => pt.m_body.take_unchecked(),
56    };
57
58    if let RuleValue::Union(union) = &value {
59        // Validate that rule union cannot be recursive
60        if union.contains(&pt.m_rule_name) {
61            let msg = "Union rule cannot be recursive".to_owned();
62            let help = format!(
63                "consider removing \"{name}\" from the union",
64                name = &pt.m_rule_name
65            );
66            ctx.err
67                .push(Error::from_token(&pt.ast.m_rule_name, msg, help));
68        }
69    }
70    // Take hook which might be None or unresolved
71    let hook = pt.m_hook_attr.as_mut().as_mut().map(|x| x.take_unchecked());
72
73    // Add the rule to the context
74    if !ctx.val.add_rule(Rule {
75        name: pt.m_rule_name.clone(),
76        hook,
77        value,
78    }) {
79        let name = &pt.m_rule_name;
80        let msg = format!("Duplicate rule definition: {name}.");
81        let help = "Remove or rename the duplicate definition".to_owned();
82        ctx.err
83            .push(Error::from_token(&pt.ast.m_rule_name, msg, help));
84    }
85
86    None
87}
88
89/// Value of a rule
90#[derive(Debug, Clone)]
91pub enum RuleValue {
92    /// Union derivation
93    Union(Vec<String>),
94    /// Function-like derivation
95    Function(Vec<Param>),
96}
97/// Parser hook for RuleValue
98pub fn parse_rule_value(pt: &mut pt::RuleDefineBody, ctx: &mut Ctx) -> Option<RuleValue> {
99    match pt {
100        pt::RuleDefineBody::UnionRuleBody(pt) => parse_rule_value_union(pt, &mut ctx.err),
101        pt::RuleDefineBody::FunctionalRuleBody(pt) => parse_rule_value_function(pt, &mut ctx.err),
102    }
103}
104
105fn parse_rule_value_union(
106    pt: &mut pt::UnionRuleBody,
107    errors: &mut Vec<Error>,
108) -> Option<RuleValue> {
109    // make sure at least one rule exists in the union
110    // There aren't going to be many subrules, so it's fine to use a vec instead of hashset for deduplication
111
112    let merged_subrules: MergedListTail<&Token, &str> =
113        merge_list_tail_optional_first!(pt, m_first, m_rest, m_r);
114    if !merged_subrules.has_first {
115        if merged_subrules.vals.is_empty() {
116            let msg = "Empty union is not allowed.".to_owned();
117            let help = "Add at least one rule after \"=\", or consider using the optional keyword from the referencing rule.".to_owned();
118            errors.push(Error::from_token(&pt.ast.m_0, msg, help));
119            return None;
120        } else {
121            let msg = "Expecting a rule".to_owned();
122            let help = "Add a rule after \"=\".".to_owned();
123            errors.push(Error::from_token(&pt.ast.m_0, msg, help));
124            return None;
125        }
126    }
127
128    let mut subrules = vec![];
129
130    for (ast, subrule) in merged_subrules.into_iter() {
131        if subrules.iter().any(|r| r == subrule) {
132            // Don't add duplicate rules and give an error
133            let msg = "Duplicate rule in a union".to_owned();
134            let help = format!("Remove this duplicated \"{}\"", subrule);
135            errors.push(Error::from_token(ast, msg, help));
136        } else {
137            subrules.push(subrule.to_string());
138        }
139    }
140
141    Some(RuleValue::Union(subrules))
142}
143
144fn parse_rule_value_function(
145    pt: &mut Box<pt::FunctionalRuleBody>,
146    errors: &mut Vec<Error>,
147) -> Option<RuleValue> {
148    // Validate parameters
149    let merged_params = merge_list_tail_optional_first!(mut pt, m_first_param, m_rest_params, m_p);
150
151    if !merged_params.has_first {
152        if merged_params.vals.is_empty() {
153            let msg = "Functional rules must have at least one parameter".to_owned();
154            let help = "Add a parameter after \"(\", or consider using the optional keyword from the referencing rule.".to_owned();
155            errors.push(Error::from_token(&pt.ast.m_0 /*(*/, msg, help));
156        } else {
157            let msg = "Expecting a parameter".to_owned();
158            let help = "Add a parameter after \"(\".".to_owned();
159            errors.push(Error::from_token(&pt.ast.m_0 /*(*/, msg, help));
160        }
161        return None;
162    }
163
164    let mut params = vec![];
165
166    // There aren't going to be many params, so it's fine to use a vec instead of hashset for deduplication
167    for param in merged_params.vals.into_iter() {
168        // Make sure param is valid before taking it out
169        let param_val = match param.val.as_ref() {
170            None => {
171                continue;
172            }
173            Some(_) => param.take_unchecked(),
174        };
175        // If param won't be in PT, it's fine to have duplicate names
176        if !param_val.is_in_pt() {
177            // Still need to add it so we can give better error message when validation function bodies.
178            params.push(param_val);
179            continue;
180        }
181
182        let name = &param_val.name;
183        if params.iter().any(|p| &p.name == name) {
184            // Don't add duplicate params
185            let msg = format!("Duplicate parameter name: \"{}\"", name);
186            let help = "Rename the parameter or remove the duplicate.".to_string();
187            errors.push(Error::from_token(&param.pt.ast.m_variable, msg, help));
188        } else {
189            params.push(param_val);
190        }
191    }
192
193    Some(RuleValue::Function(params))
194}