kconfig_parser/lex/
macro_lexer.rs

1/*
2 Cargo KConfig - KConfig parser
3 Copyright (C) 2022  Sjoerd van Leent
4
5--------------------------------------------------------------------------------
6
7Copyright Notice: Apache
8
9Licensed under the Apache License, Version 2.0 (the "License"); you may not use
10this file except in compliance with the License. You may obtain a copy of the
11License at
12
13   https://www.apache.org/licenses/LICENSE-2.0
14
15Unless required by applicable law or agreed to in writing, software distributed
16under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
17CONDITIONS OF ANY KIND, either express or implied. See the License for the
18specific language governing permissions and limitations under the License.
19
20--------------------------------------------------------------------------------
21
22Copyright Notice: GPLv2
23
24This program is free software: you can redistribute it and/or modify
25it under the terms of the GNU General Public License as published by
26the Free Software Foundation, either version 2 of the License, or
27(at your option) any later version.
28
29This program is distributed in the hope that it will be useful,
30but WITHOUT ANY WARRANTY; without even the implied warranty of
31MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
32GNU General Public License for more details.
33
34You should have received a copy of the GNU General Public License
35along with this program.  If not, see <https://www.gnu.org/licenses/>.
36
37--------------------------------------------------------------------------------
38
39Copyright Notice: MIT
40
41Permission is hereby granted, free of charge, to any person obtaining a copy of
42this software and associated documentation files (the “Software”), to deal in
43the Software without restriction, including without limitation the rights to
44use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
45the Software, and to permit persons to whom the Software is furnished to do so,
46subject to the following conditions:
47
48The above copyright notice and this permission notice shall be included in all
49copies or substantial portions of the Software.
50
51THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
52IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
53FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
54COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
55IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
56CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
57*/
58
59//! This file contains the macro lexer, which behaves like a regular lexer,
60//! but captures all macro statements and evaluates them.
61
62use std::collections::{HashMap, HashSet, VecDeque};
63use std::hash::{Hash, Hasher};
64
65use super::{
66    structs::{Lexicon, Token},
67    LexerBase,
68};
69
70#[derive(Clone, Debug, Eq, PartialEq)]
71pub struct Symbol {
72    name: String,
73    line: usize,
74    column: usize,
75    value: String,
76}
77
78impl Hash for Symbol {
79    fn hash<H: Hasher>(&self, state: &mut H) {
80        self.name.hash(state);
81    }
82}
83
84impl Symbol {
85    pub fn name(&self) -> String {
86        self.name.to_string()
87    }
88
89    pub fn value(&self) -> String {
90        self.value.to_string()
91    }
92}
93
94/// The macro lexer interprets and evaluates assignment statements and
95/// expressions.
96pub struct MacroLexer<LB, U>
97where
98    LB: LexerBase,
99    U: Clone,
100{
101    /// This table composes the found symbols and their assignments.
102    symbol_table: HashSet<Symbol>,
103
104    /// This is the foundation lexer on which the macro lexer applies
105    /// macro interpretation
106    base_lexer: LB,
107
108    /// A queue is used to contain specified tokens for look-ahead
109    /// purposes, this queue contains already expanded tokens.
110    expanded_q: VecDeque<Token>,
111
112    /// A queue is used to contain specified tokens for look-ahead
113    /// purposes, this queue contains not expanded tokens.
114    raw_q: VecDeque<Token>,
115
116    /// A map of functions which can be executed. The first parameter
117    /// contains the arguments to the macro, the second parameter the
118    /// file/stream name, where applicable, the third and fourth
119    /// parameters the row and column of the cursor and finally the
120    /// user object as created while constructing the Macro lexer
121    functions:
122        HashMap<String, fn(Vec<String>, &str, usize, usize, &U) -> Result<Vec<Lexicon>, String>>,
123
124    /// A user struct, object or value which can be used by the
125    /// functions to utilize behavior defined on it.
126    user_object: U,
127}
128
129impl<LB, U> MacroLexer<LB, U>
130where
131    LB: LexerBase,
132    U: Clone,
133{
134    /// Creates a new macro lexer, given an original base lexer. This
135    /// will interpret all tokens necessary for macro interpretation
136    /// and expansion, and result into tokens specific for the user
137    /// of this lexer.
138    pub fn new(base_lexer: LB, user_object: &U) -> Self {
139        Self {
140            symbol_table: HashSet::new(),
141            base_lexer,
142            expanded_q: VecDeque::new(),
143            raw_q: VecDeque::new(),
144            functions: HashMap::new(),
145            user_object: user_object.clone(),
146        }
147    }
148
149    /// Retrieves the symbol table to be used apart from the Macro Lexer
150    pub fn symbol_table(&self) -> HashSet<Symbol> {
151        self.symbol_table.clone()
152    }
153
154    /// Adds a macro function to be able to be called by the interpreter
155    pub fn add_fn(
156        &mut self,
157        name: &str,
158        func: fn(Vec<String>, &str, usize, usize, &U) -> Result<Vec<Lexicon>, String>,
159    ) {
160        self.functions.insert(name.to_string(), func);
161    }
162
163    /// Interprets a macro call, typically only existing out of either
164    /// identifiers or commas.
165    fn interpret(&mut self, mut tokens: VecDeque<Token>) -> Vec<Token> {
166        // If the first identifier starts with "call", contains a space
167        // and further characters, it is a call macro, and needs to
168        // be separated from the rest of the macro call.
169        let mut is_call = false;
170
171        let token = match tokens.front() {
172            Some(t) => {
173                let (c, t) = is_call_token(t);
174                is_call = c;
175                if is_call {
176                    tokens.pop_front();
177                    tokens.push_front(t.clone());
178                };
179                t
180            }
181            _ => Token::create_eot(0, 0),
182        };
183
184        let mut identifiers = match macro_simplify(&tokens) {
185            Ok(i) => i,
186            Err(s) => return vec![create_error(&tokens, s)],
187        };
188
189        if identifiers.len() == 1 && !is_call {
190            let identifier = identifiers.front().unwrap();
191            for symbol in &self.symbol_table {
192                if symbol.name.eq(identifier) {
193                    let value = &symbol.value;
194                    let (column, line) = get_tokens_column_line(&tokens);
195                    return vec![Token::create(
196                        Lexicon::Identifier(value.to_string()),
197                        column,
198                        line,
199                        value,
200                    )];
201                };
202            }
203        };
204
205        if identifiers.len() > 0 {
206            let name = identifiers.pop_front().unwrap();
207            let result = match self.functions.get(&name) {
208                Some(f) => {
209                    let arguments = Vec::from_iter(identifiers);
210                    let line = token.line();
211                    let column = token.column();
212                    let stream = self.base_lexer.current_stream();
213                    match f(
214                        arguments,
215                        &stream.unwrap_or_default(),
216                        line,
217                        column,
218                        &self.user_object,
219                    ) {
220                        Ok(s) => s,
221                        Err(s) => {
222                            return vec![create_error(
223                                &tokens,
224                                &format!("Expansion {} failed: {}", name, s),
225                            )]
226                        }
227                    }
228                }
229                None => {
230                    return vec![create_error(
231                        &tokens,
232                        &format!("Expansion with name {} not found", name),
233                    )]
234                }
235            };
236
237            let (column, line) = get_tokens_column_line(&tokens);
238
239            return result
240                .iter()
241                .map(|l| Token::create(l.clone(), column, line, &l.to_string()))
242                .collect();
243        }
244
245        vec![create_error(&tokens, "Macro expansion failed")]
246    }
247
248    fn expand(&mut self) -> Vec<Token> {
249        let mut input_tokens: VecDeque<Token> = VecDeque::new();
250
251        let mut open_state = 0;
252        let mut next = self.queued_next_token();
253        while next.term() != Lexicon::Close || open_state != 0 {
254            match next.term() {
255                Lexicon::Open => {
256                    open_state += 1;
257                    input_tokens.push_back(next.clone());
258                }
259                Lexicon::Close => {
260                    open_state -= 1;
261                    input_tokens.push_back(next.clone());
262                }
263                Lexicon::MacroOpen => input_tokens.extend(self.expand()),
264                _ => input_tokens.push_back(next.clone()),
265            };
266            next = self.queued_next_token();
267        }
268
269        self.interpret(input_tokens)
270    }
271
272    /// Extracts the next token from the lexer, but does not evaluate it yet.
273    /// Pushes the token onto the queue, to be handled by macro_next_token().
274    fn queue_raw_token(&mut self) -> Token {
275        let result = self.queued_next_token();
276        self.raw_q.push_back(result.clone());
277        result
278    }
279
280    /// Extracts the next token from the queue, if the queue is depleted,
281    /// fills the queue from the base lexer, then extracts it.
282    fn queued_next_token(&mut self) -> Token {
283        match self.raw_q.pop_front() {
284            Some(t) => t,
285            None => self.base_lexer.next_token(),
286        }
287    }
288
289    fn macro_next_token(&mut self) -> Token {
290        let token = self.queued_next_token();
291        match token.term() {
292            Lexicon::MacroOpen => {
293                for token in self.expand() {
294                    self.raw_q.push_back(token);
295                }
296                self.macro_next_token()
297            }
298            _ => token,
299        }
300    }
301}
302
303impl<LB, U> LexerBase for MacroLexer<LB, U>
304where
305    LB: LexerBase,
306    U: Clone,
307{
308    fn next_token(&mut self) -> Token {
309        // The macro lexer requires a lot of read ahead to perform
310        // expression interpretation and expansion. Therefore, a look
311        // ahead buffer is used.
312
313        // If there still is a token in the queue, and it is not an
314        // identifier or if the token is not the only one in the queuu,
315        // then it can be popped and returned.
316
317        if self.expanded_q.len() > 1 {
318            return self.expanded_q.pop_front().unwrap();
319        }
320
321        match self.expanded_q.front() {
322            Some(token) => match token.term() {
323                Lexicon::Identifier(_) => (),
324                _ => return self.expanded_q.pop_front().unwrap(),
325            },
326            None => (),
327        }
328
329        // If the queue is empty, the next element should be pushed on it,
330        // if it is an identifier, otherwise is should be passed on.
331
332        if self.expanded_q.len() < 1 {
333            let next_token = self.macro_next_token();
334            self.expanded_q.push_back(next_token);
335        };
336
337        // If the next token is an assignment, assignment evaluation should
338        // take place, otherwise the token should be pushed onto the queue
339        // the the queued token should be released.
340        let token = self.macro_next_token();
341        match token.term() {
342            Lexicon::AppendAssignment | Lexicon::ImmediateAssignment => {
343                let lhs = self.expanded_q.pop_front().unwrap();
344                match lhs.term() {
345                    Lexicon::Identifier(symbol_name) => {
346                        let mut value = String::new();
347                        let mut continue_scan = true;
348
349                        while continue_scan {
350                            let rhs = self.macro_next_token();
351                            match rhs.term() {
352                                Lexicon::EOT => continue_scan = false,
353                                Lexicon::Error(_) => return rhs,
354                                _ => {
355                                    value += &rhs.raw();
356                                }
357                            }
358
359                            if continue_scan {
360                                let test = self.queue_raw_token();
361                                if rhs.line() != test.line() {
362                                    continue_scan = false;
363                                }
364                            }
365                        }
366
367                        value = value.replace("\n", "").replace("\r", "");
368
369                        // Create the new symbol
370                        let mut sym = Symbol {
371                            name: symbol_name.to_string(),
372                            line: lhs.line(),
373                            column: lhs.column(),
374                            value: value.to_string(),
375                        };
376
377                        // If the current assignment is an append assignment, find whether the
378                        // symbol has already been used, acquire the value and prefix that value
379                        // in front of the new symbol's value
380                        if let Lexicon::AppendAssignment = token.term() {
381                            for origin in self.symbol_table.iter() {
382                                if origin.name.eq(&symbol_name) {
383                                    sym.value = format!("{}{}", origin.value, sym.value)
384                                }
385                            }
386                        }
387                        self.symbol_table.insert(sym);
388                        self.next_token()
389                    }
390                    _ => Token::create_error(
391                        lhs.column(),
392                        lhs.line(),
393                        "Left-hand side value should be an identifier",
394                    ),
395                }
396            }
397            _ => {
398                self.expanded_q.push_back(token);
399                return self.expanded_q.pop_front().unwrap();
400            }
401        }
402    }
403}
404
405/// If the token is an identifier, and starts with the call semantics,
406/// it is "special" as it identifies a call.
407fn is_call_token(token: &Token) -> (bool, Token) {
408    match token.term() {
409        Lexicon::Identifier(s) => {
410            if s.starts_with("call ")
411                || s.starts_with("call\t")
412                || s.starts_with("call\r")
413                || s.starts_with("call\n")
414            {
415                let result = Token::create(
416                    Lexicon::Identifier(s[4..].trim().to_string()),
417                    token.column(),
418                    token.line(),
419                    &"$(call ",
420                );
421                (true, result.clone())
422            } else {
423                (false, token.clone())
424            }
425        }
426        _ => (false, token.clone()),
427    }
428}
429
430/// Because macros can contain adjecent identifiers because of earlier
431/// macro expansions, the identifiers are to be interpreted as a single
432/// identifier, for example:
433///
434/// FOO := foo
435/// BAR := bar
436/// $($(FOO)$(BAR))
437///
438/// $(FOO) would be replaced with "foo" and $(BAR) would be replaced
439/// with "bar". But "foobar" should be the identifier resulting from
440/// that statement.
441fn macro_simplify(tokens: &VecDeque<Token>) -> Result<VecDeque<String>, &str> {
442    let mut simplified: VecDeque<String> = VecDeque::new();
443    let mut compiled_string: String = "".to_string();
444    for token in tokens {
445        match token.term() {
446            Lexicon::Identifier(s) => {
447                compiled_string = format!("{}{}", compiled_string, s);
448            }
449            Lexicon::Comma => {
450                simplified.push_back(compiled_string.to_string());
451                compiled_string = "".to_string();
452            }
453            _ => return Err(&"Expected macro identifier or comma"),
454        }
455    }
456    simplified.push_back(compiled_string.to_string());
457    Ok(simplified)
458}
459
460fn get_tokens_column_line(tokens: &VecDeque<Token>) -> (usize, usize) {
461    match tokens.front() {
462        Some(t) => (t.column(), t.line()),
463        _ => (0, 0),
464    }
465}
466
467fn create_error(tokens: &VecDeque<Token>, err_msg: &str) -> Token {
468    let (column, line) = get_tokens_column_line(tokens);
469    Token::create_error(column, line, err_msg)
470}
471
472#[cfg(test)]
473mod tests {
474    use super::super::Lexer;
475    use super::super::LexerBase;
476    use super::*;
477
478    fn create_lexer(s: &str) -> MacroLexer<Lexer<&[u8]>, ()> {
479        MacroLexer::new(Lexer::create(s.as_bytes()), &())
480    }
481
482    #[test]
483    fn test_basic_expansion() {
484        let mut l = create_lexer(&"FOO := foo\n$(FOO)");
485        assert_eq!(
486            Lexicon::Identifier("foo".to_string()),
487            l.next_token().term()
488        );
489        assert_eq!(Lexicon::EOT, l.next_token().term());
490    }
491
492    #[test]
493    fn test_dual_expansion() {
494        let mut l = create_lexer(&"FOO := foo\nBAR := bar\n$(FOO) $(BAR)");
495        assert_eq!(
496            Lexicon::Identifier("foo".to_string()),
497            l.next_token().term()
498        );
499        assert_eq!(
500            Lexicon::Identifier("bar".to_string()),
501            l.next_token().term()
502        );
503        assert_eq!(Lexicon::EOT, l.next_token().term());
504    }
505
506    fn foo_func(
507        params: Vec<String>,
508        _: &str,
509        _: usize,
510        _: usize,
511        _: &(),
512    ) -> Result<Vec<Lexicon>, String> {
513        match params.len() {
514            1 => Ok(vec![Lexicon::Identifier(
515                params.first().unwrap().to_string(),
516            )]),
517            _ => Err("Expected exactly one parameter".to_string()),
518        }
519    }
520
521    #[test]
522    fn test_simple_macro_expansion() {
523        let mut l = create_lexer(&"$(call func,foo)");
524        l.add_fn("func", foo_func);
525        assert_eq!(
526            Lexicon::Identifier("foo".to_string()),
527            l.next_token().term()
528        );
529        assert_eq!(Lexicon::EOT, l.next_token().term());
530    }
531
532    #[test]
533    fn test_abbrev_macro_expansion() {
534        let mut l = create_lexer(&"$(func,foo)");
535        l.add_fn("func", foo_func);
536        assert_eq!(
537            Lexicon::Identifier("foo".to_string()),
538            l.next_token().term()
539        );
540        assert_eq!(Lexicon::EOT, l.next_token().term());
541    }
542
543    #[test]
544    fn test_recursive_macro_expansion() {
545        let mut l = create_lexer(&"FOO := foo\nBAR := $(call func,$(FOO))\n$(BAR)");
546        l.add_fn("func", foo_func);
547        assert_eq!(
548            Lexicon::Identifier("foo".to_string()),
549            l.next_token().term()
550        );
551        assert_eq!(Lexicon::EOT, l.next_token().term());
552    }
553}