1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
//! Core logic for language

use crate::core::{Rule, TokenDef, TokenRule};
use crate::grammar::pt;
use crate::sdk::Error;
use std::collections::HashMap;

mod validate;

/// Definition of a language
///
/// A language consists of:
/// - a set of tokens, defined with the `token` keyword
/// - a set of tokenizer rules, defined with `TokenType "literal"` or `TokenType /regex/`.
///   The syntax of the regex depends on the target language
/// - a set of semantics, defined with the `semantics` keyword
/// - a set of derivations, defined with the `rule` keyword
/// - a context object, defined with the `context` keyword
/// - a list of include files in the output
#[derive(Debug, Clone, Default)]
pub struct Language {
    /// The context type name.
    ///
    /// If this is [`None`], it will be set to the reasonable "None" in the target language. (For example, `()` in Rust
    pub context: Option<String>,
    /// The target rule name
    pub target: String,
    /// The token definitions
    pub tokens: Vec<TokenDef>,
    /// The token rules
    pub token_rules: Vec<TokenRule>,
    /// The semantic definitions
    pub semantics: Vec<String>,
    /// The rule definitions
    pub rules: HashMap<String, Rule>,
    /// The files to include (relative path from the grammar file)
    pub includes: Vec<String>,
}

/// Builder for the language
///
/// This is used as the context in the parse process to collect definitions from the parse tree.
/// This serves as the "partial" definition of a language, and [`Language`] is the result when all definitions are collected and validated.
#[derive(Default)]
pub struct LangBuilder {
    context: Option<String>,
    target: Option<String>,
    tokens: Vec<TokenDef>,
    token_rules: Vec<TokenRule>,
    semantics: Vec<String>,
    rules: HashMap<String, Rule>,
    includes: Vec<String>,
}

impl LangBuilder {
    /// Create new builder
    pub fn new() -> Self {
        Default::default()
    }

    /// Set the context type
    ///
    /// Returns false if the context type is already set
    #[inline]
    pub fn set_context(&mut self, context: String) -> bool {
        if self.context.is_some() {
            false
        } else {
            self.context = Some(context);
            true
        }
    }

    /// Add a token definition
    ///
    /// Returns false if a token is already defined with the same name
    #[inline]
    pub fn add_token(&mut self, token: TokenDef) -> bool {
        if self.tokens.iter().any(|t| t.name == token.name) {
            false
        } else {
            self.tokens.push(token);
            true
        }
    }

    /// Add a token rule
    #[inline]
    pub fn add_token_rule(&mut self, token_rule: TokenRule) {
        self.token_rules.push(token_rule);
    }

    /// Add a semantic type
    ///
    /// Returns false if a semantic type is already defined with the same name
    #[inline]
    pub fn add_semantic(&mut self, semantic: String) -> bool {
        if self.semantics.iter().any(|s| s == &semantic) {
            false
        } else {
            self.semantics.push(semantic);
            true
        }
    }

    /// Add a rule
    ///
    /// Returns false if a rule is already defined with the same name
    #[inline]
    pub fn add_rule(&mut self, rule: Rule) -> bool {
        if self.rules.is_empty() {
            self.target = Some(rule.name.clone());
        }
        if self.rules.contains_key(&rule.name) {
            false
        } else {
            self.rules.insert(rule.name.clone(), rule);
            true
        }
    }

    /// Add a file to include
    #[inline]
    pub fn add_include(&mut self, path: String) {
        self.includes.push(path);
    }

    /// Build the language
    ///
    /// This will take out the current stored definitions and return a [`Language`].
    pub fn build(&mut self, pt: &[pt::TopLevelStatement]) -> Result<Language, Vec<Error>> {
        if self.rules.is_empty() {
            return Err(vec![Error::global(
                "No rule defined.".to_owned(),
                "Define a rule with the \"rule\" keyword.".to_owned(),
            )]);
        }
        let lang = Language {
            context: self.context.take(),
            target: self.target.take().unwrap(),
            tokens: std::mem::take(&mut self.tokens),
            token_rules: std::mem::take(&mut self.token_rules),
            semantics: std::mem::take(&mut self.semantics),
            rules: std::mem::take(&mut self.rules),
            includes: std::mem::take(&mut self.includes),
        };

        // Validate the language
        validate::validate_references(pt, lang)
    }
}