kconfig_parser/lex/macro_lexer.rs
1/*
2 Cargo KConfig - KConfig parser
3 Copyright (C) 2022 Sjoerd van Leent
4
5--------------------------------------------------------------------------------
6
7Copyright Notice: Apache
8
9Licensed under the Apache License, Version 2.0 (the "License"); you may not use
10this file except in compliance with the License. You may obtain a copy of the
11License at
12
13 https://www.apache.org/licenses/LICENSE-2.0
14
15Unless required by applicable law or agreed to in writing, software distributed
16under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
17CONDITIONS OF ANY KIND, either express or implied. See the License for the
18specific language governing permissions and limitations under the License.
19
20--------------------------------------------------------------------------------
21
22Copyright Notice: GPLv2
23
24This program is free software: you can redistribute it and/or modify
25it under the terms of the GNU General Public License as published by
26the Free Software Foundation, either version 2 of the License, or
27(at your option) any later version.
28
29This program is distributed in the hope that it will be useful,
30but WITHOUT ANY WARRANTY; without even the implied warranty of
31MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
32GNU General Public License for more details.
33
34You should have received a copy of the GNU General Public License
35along with this program. If not, see <https://www.gnu.org/licenses/>.
36
37--------------------------------------------------------------------------------
38
39Copyright Notice: MIT
40
41Permission is hereby granted, free of charge, to any person obtaining a copy of
42this software and associated documentation files (the “Software”), to deal in
43the Software without restriction, including without limitation the rights to
44use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
45the Software, and to permit persons to whom the Software is furnished to do so,
46subject to the following conditions:
47
48The above copyright notice and this permission notice shall be included in all
49copies or substantial portions of the Software.
50
51THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
52IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
53FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
54COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
55IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
56CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
57*/
58
59//! This file contains the macro lexer, which behaves like a regular lexer,
60//! but captures all macro statements and evaluates them.
61
62use std::collections::{HashMap, HashSet, VecDeque};
63use std::hash::{Hash, Hasher};
64
65use super::{
66 structs::{Lexicon, Token},
67 LexerBase,
68};
69
70#[derive(Clone, Debug, Eq, PartialEq)]
71pub struct Symbol {
72 name: String,
73 line: usize,
74 column: usize,
75 value: String,
76}
77
78impl Hash for Symbol {
79 fn hash<H: Hasher>(&self, state: &mut H) {
80 self.name.hash(state);
81 }
82}
83
84impl Symbol {
85 pub fn name(&self) -> String {
86 self.name.to_string()
87 }
88
89 pub fn value(&self) -> String {
90 self.value.to_string()
91 }
92}
93
94/// The macro lexer interprets and evaluates assignment statements and
95/// expressions.
96pub struct MacroLexer<LB, U>
97where
98 LB: LexerBase,
99 U: Clone,
100{
101 /// This table composes the found symbols and their assignments.
102 symbol_table: HashSet<Symbol>,
103
104 /// This is the foundation lexer on which the macro lexer applies
105 /// macro interpretation
106 base_lexer: LB,
107
108 /// A queue is used to contain specified tokens for look-ahead
109 /// purposes, this queue contains already expanded tokens.
110 expanded_q: VecDeque<Token>,
111
112 /// A queue is used to contain specified tokens for look-ahead
113 /// purposes, this queue contains not expanded tokens.
114 raw_q: VecDeque<Token>,
115
116 /// A map of functions which can be executed. The first parameter
117 /// contains the arguments to the macro, the second parameter the
118 /// file/stream name, where applicable, the third and fourth
119 /// parameters the row and column of the cursor and finally the
120 /// user object as created while constructing the Macro lexer
121 functions:
122 HashMap<String, fn(Vec<String>, &str, usize, usize, &U) -> Result<Vec<Lexicon>, String>>,
123
124 /// A user struct, object or value which can be used by the
125 /// functions to utilize behavior defined on it.
126 user_object: U,
127}
128
129impl<LB, U> MacroLexer<LB, U>
130where
131 LB: LexerBase,
132 U: Clone,
133{
134 /// Creates a new macro lexer, given an original base lexer. This
135 /// will interpret all tokens necessary for macro interpretation
136 /// and expansion, and result into tokens specific for the user
137 /// of this lexer.
138 pub fn new(base_lexer: LB, user_object: &U) -> Self {
139 Self {
140 symbol_table: HashSet::new(),
141 base_lexer,
142 expanded_q: VecDeque::new(),
143 raw_q: VecDeque::new(),
144 functions: HashMap::new(),
145 user_object: user_object.clone(),
146 }
147 }
148
149 /// Retrieves the symbol table to be used apart from the Macro Lexer
150 pub fn symbol_table(&self) -> HashSet<Symbol> {
151 self.symbol_table.clone()
152 }
153
154 /// Adds a macro function to be able to be called by the interpreter
155 pub fn add_fn(
156 &mut self,
157 name: &str,
158 func: fn(Vec<String>, &str, usize, usize, &U) -> Result<Vec<Lexicon>, String>,
159 ) {
160 self.functions.insert(name.to_string(), func);
161 }
162
163 /// Interprets a macro call, typically only existing out of either
164 /// identifiers or commas.
165 fn interpret(&mut self, mut tokens: VecDeque<Token>) -> Vec<Token> {
166 // If the first identifier starts with "call", contains a space
167 // and further characters, it is a call macro, and needs to
168 // be separated from the rest of the macro call.
169 let mut is_call = false;
170
171 let token = match tokens.front() {
172 Some(t) => {
173 let (c, t) = is_call_token(t);
174 is_call = c;
175 if is_call {
176 tokens.pop_front();
177 tokens.push_front(t.clone());
178 };
179 t
180 }
181 _ => Token::create_eot(0, 0),
182 };
183
184 let mut identifiers = match macro_simplify(&tokens) {
185 Ok(i) => i,
186 Err(s) => return vec![create_error(&tokens, s)],
187 };
188
189 if identifiers.len() == 1 && !is_call {
190 let identifier = identifiers.front().unwrap();
191 for symbol in &self.symbol_table {
192 if symbol.name.eq(identifier) {
193 let value = &symbol.value;
194 let (column, line) = get_tokens_column_line(&tokens);
195 return vec![Token::create(
196 Lexicon::Identifier(value.to_string()),
197 column,
198 line,
199 value,
200 )];
201 };
202 }
203 };
204
205 if identifiers.len() > 0 {
206 let name = identifiers.pop_front().unwrap();
207 let result = match self.functions.get(&name) {
208 Some(f) => {
209 let arguments = Vec::from_iter(identifiers);
210 let line = token.line();
211 let column = token.column();
212 let stream = self.base_lexer.current_stream();
213 match f(
214 arguments,
215 &stream.unwrap_or_default(),
216 line,
217 column,
218 &self.user_object,
219 ) {
220 Ok(s) => s,
221 Err(s) => {
222 return vec![create_error(
223 &tokens,
224 &format!("Expansion {} failed: {}", name, s),
225 )]
226 }
227 }
228 }
229 None => {
230 return vec![create_error(
231 &tokens,
232 &format!("Expansion with name {} not found", name),
233 )]
234 }
235 };
236
237 let (column, line) = get_tokens_column_line(&tokens);
238
239 return result
240 .iter()
241 .map(|l| Token::create(l.clone(), column, line, &l.to_string()))
242 .collect();
243 }
244
245 vec![create_error(&tokens, "Macro expansion failed")]
246 }
247
248 fn expand(&mut self) -> Vec<Token> {
249 let mut input_tokens: VecDeque<Token> = VecDeque::new();
250
251 let mut open_state = 0;
252 let mut next = self.queued_next_token();
253 while next.term() != Lexicon::Close || open_state != 0 {
254 match next.term() {
255 Lexicon::Open => {
256 open_state += 1;
257 input_tokens.push_back(next.clone());
258 }
259 Lexicon::Close => {
260 open_state -= 1;
261 input_tokens.push_back(next.clone());
262 }
263 Lexicon::MacroOpen => input_tokens.extend(self.expand()),
264 _ => input_tokens.push_back(next.clone()),
265 };
266 next = self.queued_next_token();
267 }
268
269 self.interpret(input_tokens)
270 }
271
272 /// Extracts the next token from the lexer, but does not evaluate it yet.
273 /// Pushes the token onto the queue, to be handled by macro_next_token().
274 fn queue_raw_token(&mut self) -> Token {
275 let result = self.queued_next_token();
276 self.raw_q.push_back(result.clone());
277 result
278 }
279
280 /// Extracts the next token from the queue, if the queue is depleted,
281 /// fills the queue from the base lexer, then extracts it.
282 fn queued_next_token(&mut self) -> Token {
283 match self.raw_q.pop_front() {
284 Some(t) => t,
285 None => self.base_lexer.next_token(),
286 }
287 }
288
289 fn macro_next_token(&mut self) -> Token {
290 let token = self.queued_next_token();
291 match token.term() {
292 Lexicon::MacroOpen => {
293 for token in self.expand() {
294 self.raw_q.push_back(token);
295 }
296 self.macro_next_token()
297 }
298 _ => token,
299 }
300 }
301}
302
303impl<LB, U> LexerBase for MacroLexer<LB, U>
304where
305 LB: LexerBase,
306 U: Clone,
307{
308 fn next_token(&mut self) -> Token {
309 // The macro lexer requires a lot of read ahead to perform
310 // expression interpretation and expansion. Therefore, a look
311 // ahead buffer is used.
312
313 // If there still is a token in the queue, and it is not an
314 // identifier or if the token is not the only one in the queuu,
315 // then it can be popped and returned.
316
317 if self.expanded_q.len() > 1 {
318 return self.expanded_q.pop_front().unwrap();
319 }
320
321 match self.expanded_q.front() {
322 Some(token) => match token.term() {
323 Lexicon::Identifier(_) => (),
324 _ => return self.expanded_q.pop_front().unwrap(),
325 },
326 None => (),
327 }
328
329 // If the queue is empty, the next element should be pushed on it,
330 // if it is an identifier, otherwise is should be passed on.
331
332 if self.expanded_q.len() < 1 {
333 let next_token = self.macro_next_token();
334 self.expanded_q.push_back(next_token);
335 };
336
337 // If the next token is an assignment, assignment evaluation should
338 // take place, otherwise the token should be pushed onto the queue
339 // the the queued token should be released.
340 let token = self.macro_next_token();
341 match token.term() {
342 Lexicon::AppendAssignment | Lexicon::ImmediateAssignment => {
343 let lhs = self.expanded_q.pop_front().unwrap();
344 match lhs.term() {
345 Lexicon::Identifier(symbol_name) => {
346 let mut value = String::new();
347 let mut continue_scan = true;
348
349 while continue_scan {
350 let rhs = self.macro_next_token();
351 match rhs.term() {
352 Lexicon::EOT => continue_scan = false,
353 Lexicon::Error(_) => return rhs,
354 _ => {
355 value += &rhs.raw();
356 }
357 }
358
359 if continue_scan {
360 let test = self.queue_raw_token();
361 if rhs.line() != test.line() {
362 continue_scan = false;
363 }
364 }
365 }
366
367 value = value.replace("\n", "").replace("\r", "");
368
369 // Create the new symbol
370 let mut sym = Symbol {
371 name: symbol_name.to_string(),
372 line: lhs.line(),
373 column: lhs.column(),
374 value: value.to_string(),
375 };
376
377 // If the current assignment is an append assignment, find whether the
378 // symbol has already been used, acquire the value and prefix that value
379 // in front of the new symbol's value
380 if let Lexicon::AppendAssignment = token.term() {
381 for origin in self.symbol_table.iter() {
382 if origin.name.eq(&symbol_name) {
383 sym.value = format!("{}{}", origin.value, sym.value)
384 }
385 }
386 }
387 self.symbol_table.insert(sym);
388 self.next_token()
389 }
390 _ => Token::create_error(
391 lhs.column(),
392 lhs.line(),
393 "Left-hand side value should be an identifier",
394 ),
395 }
396 }
397 _ => {
398 self.expanded_q.push_back(token);
399 return self.expanded_q.pop_front().unwrap();
400 }
401 }
402 }
403}
404
405/// If the token is an identifier, and starts with the call semantics,
406/// it is "special" as it identifies a call.
407fn is_call_token(token: &Token) -> (bool, Token) {
408 match token.term() {
409 Lexicon::Identifier(s) => {
410 if s.starts_with("call ")
411 || s.starts_with("call\t")
412 || s.starts_with("call\r")
413 || s.starts_with("call\n")
414 {
415 let result = Token::create(
416 Lexicon::Identifier(s[4..].trim().to_string()),
417 token.column(),
418 token.line(),
419 &"$(call ",
420 );
421 (true, result.clone())
422 } else {
423 (false, token.clone())
424 }
425 }
426 _ => (false, token.clone()),
427 }
428}
429
430/// Because macros can contain adjecent identifiers because of earlier
431/// macro expansions, the identifiers are to be interpreted as a single
432/// identifier, for example:
433///
434/// FOO := foo
435/// BAR := bar
436/// $($(FOO)$(BAR))
437///
438/// $(FOO) would be replaced with "foo" and $(BAR) would be replaced
439/// with "bar". But "foobar" should be the identifier resulting from
440/// that statement.
441fn macro_simplify(tokens: &VecDeque<Token>) -> Result<VecDeque<String>, &str> {
442 let mut simplified: VecDeque<String> = VecDeque::new();
443 let mut compiled_string: String = "".to_string();
444 for token in tokens {
445 match token.term() {
446 Lexicon::Identifier(s) => {
447 compiled_string = format!("{}{}", compiled_string, s);
448 }
449 Lexicon::Comma => {
450 simplified.push_back(compiled_string.to_string());
451 compiled_string = "".to_string();
452 }
453 _ => return Err(&"Expected macro identifier or comma"),
454 }
455 }
456 simplified.push_back(compiled_string.to_string());
457 Ok(simplified)
458}
459
460fn get_tokens_column_line(tokens: &VecDeque<Token>) -> (usize, usize) {
461 match tokens.front() {
462 Some(t) => (t.column(), t.line()),
463 _ => (0, 0),
464 }
465}
466
467fn create_error(tokens: &VecDeque<Token>, err_msg: &str) -> Token {
468 let (column, line) = get_tokens_column_line(tokens);
469 Token::create_error(column, line, err_msg)
470}
471
472#[cfg(test)]
473mod tests {
474 use super::super::Lexer;
475 use super::super::LexerBase;
476 use super::*;
477
478 fn create_lexer(s: &str) -> MacroLexer<Lexer<&[u8]>, ()> {
479 MacroLexer::new(Lexer::create(s.as_bytes()), &())
480 }
481
482 #[test]
483 fn test_basic_expansion() {
484 let mut l = create_lexer(&"FOO := foo\n$(FOO)");
485 assert_eq!(
486 Lexicon::Identifier("foo".to_string()),
487 l.next_token().term()
488 );
489 assert_eq!(Lexicon::EOT, l.next_token().term());
490 }
491
492 #[test]
493 fn test_dual_expansion() {
494 let mut l = create_lexer(&"FOO := foo\nBAR := bar\n$(FOO) $(BAR)");
495 assert_eq!(
496 Lexicon::Identifier("foo".to_string()),
497 l.next_token().term()
498 );
499 assert_eq!(
500 Lexicon::Identifier("bar".to_string()),
501 l.next_token().term()
502 );
503 assert_eq!(Lexicon::EOT, l.next_token().term());
504 }
505
506 fn foo_func(
507 params: Vec<String>,
508 _: &str,
509 _: usize,
510 _: usize,
511 _: &(),
512 ) -> Result<Vec<Lexicon>, String> {
513 match params.len() {
514 1 => Ok(vec![Lexicon::Identifier(
515 params.first().unwrap().to_string(),
516 )]),
517 _ => Err("Expected exactly one parameter".to_string()),
518 }
519 }
520
521 #[test]
522 fn test_simple_macro_expansion() {
523 let mut l = create_lexer(&"$(call func,foo)");
524 l.add_fn("func", foo_func);
525 assert_eq!(
526 Lexicon::Identifier("foo".to_string()),
527 l.next_token().term()
528 );
529 assert_eq!(Lexicon::EOT, l.next_token().term());
530 }
531
532 #[test]
533 fn test_abbrev_macro_expansion() {
534 let mut l = create_lexer(&"$(func,foo)");
535 l.add_fn("func", foo_func);
536 assert_eq!(
537 Lexicon::Identifier("foo".to_string()),
538 l.next_token().term()
539 );
540 assert_eq!(Lexicon::EOT, l.next_token().term());
541 }
542
543 #[test]
544 fn test_recursive_macro_expansion() {
545 let mut l = create_lexer(&"FOO := foo\nBAR := $(call func,$(FOO))\n$(BAR)");
546 l.add_fn("func", foo_func);
547 assert_eq!(
548 Lexicon::Identifier("foo".to_string()),
549 l.next_token().term()
550 );
551 assert_eq!(Lexicon::EOT, l.next_token().term());
552 }
553}