regen/core/lang/
mod.rs

1//! Core logic for language
2
3use crate::core::{Rule, TokenDef, TokenRule};
4use crate::grammar::pt;
5use crate::sdk::Error;
6use std::collections::BTreeMap;
7
8mod validate;
9
10/// Definition of a language
11///
12/// A language consists of:
13/// - a set of tokens, defined with the `token` keyword
14/// - a set of tokenizer rules, defined with `TokenType "literal"` or `TokenType /regex/`.
15///   The syntax of the regex depends on the target language
16/// - a set of semantics, defined with the `semantics` keyword
17/// - a set of derivations, defined with the `rule` keyword
18/// - a context object, defined with the `context` keyword
19/// - a list of include files in the output
20#[derive(Debug, Clone, Default)]
21pub struct Language {
22    /// The context type name.
23    ///
24    /// If this is [`None`], it will be set to the reasonable "None" in the target language. (For example, `()` in Rust
25    pub context: Option<String>,
26    /// The target rule name
27    pub target: String,
28    /// The token definitions
29    pub tokens: Vec<TokenDef>,
30    /// The token rules
31    pub token_rules: Vec<TokenRule>,
32    /// The semantic definitions
33    pub semantics: Vec<String>,
34    /// The rule definitions
35    pub rules: BTreeMap<String, Rule>,
36    /// The files to include (relative path from the grammar file)
37    pub includes: Vec<String>,
38}
39
40/// Builder for the language
41///
42/// This is used as the context in the parse process to collect definitions from the parse tree.
43/// This serves as the "partial" definition of a language, and [`Language`] is the result when all definitions are collected and validated.
44#[derive(Default)]
45pub struct LangBuilder {
46    context: Option<String>,
47    target: Option<String>,
48    tokens: Vec<TokenDef>,
49    token_rules: Vec<TokenRule>,
50    semantics: Vec<String>,
51    rules: BTreeMap<String, Rule>,
52    includes: Vec<String>,
53}
54
55impl LangBuilder {
56    /// Create new builder
57    pub fn new() -> Self {
58        Default::default()
59    }
60
61    /// Set the context type
62    ///
63    /// Returns false if the context type is already set
64    #[inline]
65    pub fn set_context(&mut self, context: String) -> bool {
66        if self.context.is_some() {
67            false
68        } else {
69            self.context = Some(context);
70            true
71        }
72    }
73
74    /// Add a token definition
75    ///
76    /// Returns false if a token is already defined with the same name
77    #[inline]
78    pub fn add_token(&mut self, token: TokenDef) -> bool {
79        if self.tokens.iter().any(|t| t.name == token.name) {
80            false
81        } else {
82            self.tokens.push(token);
83            true
84        }
85    }
86
87    /// Add a token rule
88    #[inline]
89    pub fn add_token_rule(&mut self, token_rule: TokenRule) {
90        self.token_rules.push(token_rule);
91    }
92
93    /// Add a semantic type
94    ///
95    /// Returns false if a semantic type is already defined with the same name
96    #[inline]
97    pub fn add_semantic(&mut self, semantic: String) -> bool {
98        if self.semantics.iter().any(|s| s == &semantic) {
99            false
100        } else {
101            self.semantics.push(semantic);
102            true
103        }
104    }
105
106    /// Add a rule
107    ///
108    /// Returns false if a rule is already defined with the same name
109    #[inline]
110    pub fn add_rule(&mut self, rule: Rule) -> bool {
111        if self.rules.is_empty() {
112            self.target = Some(rule.name.clone());
113        }
114        if self.rules.contains_key(&rule.name) {
115            false
116        } else {
117            self.rules.insert(rule.name.clone(), rule);
118            true
119        }
120    }
121
122    /// Add a file to include
123    #[inline]
124    pub fn add_include(&mut self, path: String) {
125        self.includes.push(path);
126    }
127
128    /// Build the language
129    ///
130    /// This will take out the current stored definitions and return a [`Language`].
131    pub fn build(&mut self, pt: &[pt::TopLevelStatement]) -> Result<Language, Vec<Error>> {
132        if self.rules.is_empty() {
133            return Err(vec![Error::global(
134                "No rule defined.".to_owned(),
135                "Define a rule with the \"rule\" keyword.".to_owned(),
136            )]);
137        }
138        let lang = Language {
139            context: self.context.take(),
140            target: self.target.take().unwrap(),
141            tokens: std::mem::take(&mut self.tokens),
142            token_rules: std::mem::take(&mut self.token_rules),
143            semantics: std::mem::take(&mut self.semantics),
144            rules: std::mem::take(&mut self.rules),
145            includes: std::mem::take(&mut self.includes),
146        };
147
148        // Validate the language
149        validate::validate_references(pt, lang)
150    }
151}