1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
use crate::Cfg;
use parol_runtime::{lexer::FIRST_USER_TOKEN, TerminalIndex};
use std::fmt::{Debug, Display, Error, Formatter};

// ---------------------------------------------------
// Part of the Public API
// *Changes will affect crate's version according to semver*
// ---------------------------------------------------
///
/// Configuration information for a scanner.
/// Contains features like to optionally switch automatic handling off and newlines off.
///
#[derive(Debug, Clone, Eq, PartialEq, Ord, PartialOrd)]
pub struct ScannerConfig {
    ///
    /// The name of the scanner state taken from the grammar description
    ///
    pub scanner_name: String,

    ///
    /// Index of the scanner, aka scanner state
    ///
    pub scanner_state: usize,

    ///
    /// Strings with the characters that starts line comments
    ///
    pub line_comments: Vec<String>,

    ///
    /// (String, String) tuples with the characters that start and end
    /// a block comments, respectively.
    ///
    pub block_comments: Vec<(String, String)>,

    ///
    /// If true the lexer handles (and skips) newlines.
    /// If false the user has to handle newlines on its own.
    ///
    pub auto_newline: bool,

    ///
    /// If true the lexer handles (and skips) whitespace.
    /// If false the user has to handle whitespace on its own.
    ///
    pub auto_ws: bool,

    /// Scanner state transitions
    /// Maps from token to scanner state, where the token is identified by its TerminalIndex
    /// The scanner state is identified by its index.
    pub transitions: Vec<(TerminalIndex, usize)>,
}

impl ScannerConfig {
    /// Creates a new item
    pub fn new(scanner_name: String, scanner_state: usize) -> Self {
        Self {
            scanner_name,
            scanner_state,
            line_comments: Vec::new(),
            block_comments: Vec::new(),
            auto_newline: true,
            auto_ws: true,
            transitions: Vec::new(),
        }
    }

    /// Adds line comments to self
    pub fn with_line_comments(mut self, line_comments: Vec<String>) -> Self {
        self.line_comments = line_comments;
        self
    }

    /// Adds block comments to self
    pub fn with_block_comments(mut self, block_comments: Vec<(String, String)>) -> Self {
        self.block_comments = block_comments;
        self
    }

    /// Sets auto newline behavior
    pub fn with_auto_newline(mut self, auto_newline: bool) -> Self {
        self.auto_newline = auto_newline;
        self
    }

    /// Sets auto whitespace behavior
    pub fn with_auto_ws(mut self, auto_ws: bool) -> Self {
        self.auto_ws = auto_ws;
        self
    }

    ///
    /// Generates the data needed by the lexer generator.
    /// The tuple contains of the specific internal tokens of the scanner (ws,
    /// comments etc.) and the indices of the terminals that are valid in this
    /// scanner.
    ///
    pub fn generate_build_information(
        &self,
        cfg: &Cfg,
    ) -> (Vec<String>, Vec<TerminalIndex>, String) {
        let mut scanner_specific = vec![
            "UNMATCHABLE_TOKEN".to_owned(),
            if self.auto_newline {
                "NEW_LINE_TOKEN".to_owned()
            } else {
                "UNMATCHABLE_TOKEN".to_owned()
            },
            if self.auto_ws {
                "WHITESPACE_TOKEN".to_owned()
            } else {
                "UNMATCHABLE_TOKEN".to_owned()
            },
        ];
        if !self.line_comments.is_empty() {
            let line_comments_rx = self
                .line_comments
                .iter()
                .map(|s| format!(r###"({}.*(\r\n|\r|\n|$))"###, s))
                .collect::<Vec<String>>()
                .join("|");
            scanner_specific.push(line_comments_rx);
        } else {
            scanner_specific.push("UNMATCHABLE_TOKEN".to_owned());
        }
        if !self.block_comments.is_empty() {
            let block_comments_rx = self
                .block_comments
                .iter()
                .map(|(s, e)| format!(r###"((?ms){}.*?{})"###, s, e))
                .collect::<Vec<String>>()
                .join("|");
            scanner_specific.push(block_comments_rx);
        } else {
            scanner_specific.push("UNMATCHABLE_TOKEN".to_owned());
        }

        let terminals = cfg.get_ordered_terminals();

        let term_indices =
            terminals
                .iter()
                .enumerate()
                .fold(Vec::new(), |mut acc, (i, (_, _, s))| {
                    if s.contains(&self.scanner_state) {
                        acc.push(i as TerminalIndex + FIRST_USER_TOKEN);
                    }
                    acc
                });

        (scanner_specific, term_indices, self.scanner_name.clone())
    }
}

impl Default for ScannerConfig {
    fn default() -> Self {
        Self {
            scanner_name: "INITIAL".to_string(),
            scanner_state: 0,
            line_comments: Vec::new(),
            block_comments: Vec::new(),
            auto_newline: true,
            auto_ws: true,
            transitions: Vec::new(),
        }
    }
}

impl Display for ScannerConfig {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::result::Result<(), Error> {
        writeln!(f, "scanner_name: {}", self.scanner_name)?;
        writeln!(f, "scanner_state: {}", self.scanner_state)?;
        writeln!(f, "line_comments: {:?}", self.line_comments)?;
        writeln!(f, "block_comments: {:?}", self.block_comments)?;
        writeln!(f, "auto_newline: {:?}", self.auto_newline)?;
        writeln!(f, "auto_ws: {:?}", self.auto_ws)?;
        self.transitions
            .iter()
            .try_for_each(|(k, v)| write!(f, "on {} enter {};", k, v))
    }
}