pp_rs/
pp.rs

1use crate::lexer::{self, Token as LexerToken, TokenValue as LexerTokenValue};
2use crate::token::*;
3use std::{
4    cmp::Ordering,
5    collections::{HashMap, HashSet},
6    convert::TryFrom,
7    rc::Rc,
8};
9
10mod if_parser;
11
12#[derive(Clone, PartialEq, Debug)]
13struct Define {
14    name: String,
15    function_like: bool,
16    params: HashMap<String, usize>,
17    tokens: Vec<Token>,
18}
19
20#[derive(Debug)]
21struct DefineInvocation {
22    define: Rc<Define>,
23    define_position: usize,
24
25    parameters: Vec<Vec<Token>>,
26    parameter_expanding: usize,
27    parameter_position: usize,
28}
29
30pub type Step<T> = Result<T, StepExit>;
31
32#[derive(Clone, PartialEq, Debug)]
33pub enum StepExit {
34    Error((PreprocessorError, Location)),
35    Continue,
36    Finished,
37}
38
39use self::StepExit::Continue;
40use self::StepExit::Finished;
41
42impl<T> From<StepExit> for Step<T> {
43    fn from(exit: StepExit) -> Step<T> {
44        Err(exit)
45    }
46}
47
48trait MeLexer {
49    fn step(&mut self) -> Step<Token>;
50    fn get_define(&self, name: &str) -> Option<&Rc<Define>>;
51    fn apply_line_offset(&self, line: u32, location: Location) -> Step<u32>;
52}
53
54fn make_unexpected_error(token: LexerToken) -> StepExit {
55    let error = match token.value {
56        LexerTokenValue::Integer(i) => PreprocessorError::UnexpectedToken(TokenValue::Integer(i)),
57        LexerTokenValue::Float(f) => PreprocessorError::UnexpectedToken(TokenValue::Float(f)),
58        LexerTokenValue::Ident(s) => PreprocessorError::UnexpectedToken(TokenValue::Ident(s)),
59        LexerTokenValue::Punct(p) => PreprocessorError::UnexpectedToken(TokenValue::Punct(p)),
60        LexerTokenValue::NewLine => PreprocessorError::UnexpectedNewLine,
61        LexerTokenValue::Hash => PreprocessorError::UnexpectedHash,
62    };
63    StepExit::Error((error, token.location))
64}
65
66fn make_line_overflow_error(location: Location) -> StepExit {
67    StepExit::Error((PreprocessorError::LineOverflow, location))
68}
69
70struct DirectiveBlock {
71    start_location: Location,
72    had_valid_segment: bool,
73    had_else: bool,
74    outer_skipped: bool,
75}
76
77struct DirectiveProcessor<'a> {
78    lexer: lexer::Lexer<'a>,
79    defines: HashMap<String, Rc<Define>>,
80    skipping: bool,
81    blocks: Vec<DirectiveBlock>,
82    line_offset: i64,
83    had_directive: bool,
84    had_non_directive_token: bool,
85}
86
87pub fn convert_lexer_token(token: LexerToken) -> Result<Token, (PreprocessorError, Location)> {
88    let location = token.location;
89    match token.value {
90        LexerTokenValue::Integer(i) => Ok(Token {
91            value: TokenValue::Integer(i),
92            location,
93        }),
94        LexerTokenValue::Float(f) => Ok(Token {
95            value: TokenValue::Float(f),
96            location,
97        }),
98        LexerTokenValue::Ident(s) => Ok(Token {
99            value: TokenValue::Ident(s),
100            location,
101        }),
102        LexerTokenValue::Punct(p) => Ok(Token {
103            value: TokenValue::Punct(p),
104            location,
105        }),
106
107        LexerTokenValue::Hash => Err((PreprocessorError::UnexpectedHash, location)),
108        // In all the places that call convert_lexer_token, newlines are handled specially as they
109        // are either noops, or act as a separator.
110        LexerTokenValue::NewLine => unreachable!(),
111    }
112}
113
114pub fn convert_lexer_token_to_step(token: LexerToken) -> Step<Token> {
115    convert_lexer_token(token).map_err(StepExit::Error)
116}
117
118fn legal_redefinition(a: &Define, b: &Define) -> bool {
119    assert!(a.name == b.name);
120    a.function_like == b.function_like
121        && a.params == b.params
122        && a.tokens.len() == b.tokens.len()
123        && a.tokens
124            .iter()
125            .zip(&b.tokens)
126            .all(|(ta, tb)| ta.value == tb.value)
127}
128
129impl<'a> DirectiveProcessor<'a> {
130    pub fn new(input: &'a str) -> DirectiveProcessor {
131        DirectiveProcessor {
132            lexer: lexer::Lexer::new(input),
133            defines: Default::default(),
134            skipping: false,
135            blocks: Default::default(),
136            line_offset: 0,
137            had_directive: false,
138            had_non_directive_token: false,
139        }
140    }
141
142    fn get_lexer_token(&mut self) -> Step<LexerToken> {
143        match self.lexer.next() {
144            None => Finished.into(),
145            Some(Ok(tok)) => Ok(tok),
146            Some(Err(err)) => Err(StepExit::Error(err)),
147        }
148    }
149
150    fn expect_a_lexer_token(&mut self, current_location: Location) -> Step<LexerToken> {
151        match self.get_lexer_token() {
152            Ok(token) => Ok(token),
153            Err(Finished) => Err(StepExit::Error((
154                PreprocessorError::UnexpectedEndOfInput,
155                current_location,
156            ))),
157            Err(e) => Err(e),
158        }
159    }
160
161    fn expect_lexer_token(
162        &mut self,
163        expected: LexerTokenValue,
164        current_location: Location,
165    ) -> Step<Location> {
166        let token = self.expect_a_lexer_token(current_location)?;
167        if token.value == expected {
168            Ok(token.location)
169        } else {
170            Err(make_unexpected_error(token))
171        }
172    }
173
174    fn expect_lexer_ident(&mut self, current_location: Location) -> Step<(String, Location)> {
175        let token = self.expect_a_lexer_token(current_location)?;
176        if let LexerTokenValue::Ident(name) = token.value {
177            Ok((name, token.location))
178        } else {
179            Err(make_unexpected_error(token))
180        }
181    }
182
183    fn consume_until_newline(&mut self) -> Step<()> {
184        loop {
185            // TODO allow unexpected character errors because we are skipping.
186            if let LexerTokenValue::NewLine = self.get_lexer_token()?.value {
187                return Ok(());
188            }
189        }
190    }
191
192    fn gather_until_newline(&mut self) -> Step<Vec<Token>> {
193        let mut tokens = Vec::new();
194        loop {
195            let token = self.get_lexer_token()?;
196            if token.value == LexerTokenValue::NewLine {
197                return Ok(tokens);
198            }
199            tokens.push(convert_lexer_token_to_step(token)?);
200        }
201    }
202
203    fn parse_define_directive(&mut self, directive_location: Location) -> Step<()> {
204        if self.skipping {
205            return self.consume_until_newline();
206        }
207
208        let (name, name_location) = self.expect_lexer_ident(directive_location)?;
209
210        // TODO validate the name?
211        let mut define = Define {
212            name,
213            function_like: false,
214            params: Default::default(),
215            tokens: Default::default(),
216        };
217
218        // TODO what if token is none? EOF but still need to check it is not a redefinition?
219        let mut token = self.get_lexer_token()?;
220
221        // The is a function-like define, parse the argument list, leaving token as the next
222        // token.
223        if token.value == Punct::LeftParen.into() && !token.leading_whitespace {
224            define.function_like = true;
225            let mut first_param = true;
226            loop {
227                token = self.get_lexer_token()?;
228                match &token.value {
229                    LexerTokenValue::Punct(Punct::RightParen) => {
230                        token = self.get_lexer_token()?;
231                        break;
232                    }
233
234                    LexerTokenValue::Ident(param_name) => {
235                        if !first_param {
236                            return Err(make_unexpected_error(token));
237                        }
238                        first_param = false;
239                        define
240                            .params
241                            .insert(param_name.clone(), define.params.len());
242                    }
243
244                    LexerTokenValue::Punct(Punct::Comma) => {
245                        if first_param {
246                            return Err(make_unexpected_error(token));
247                        }
248
249                        let (param_name, param_location) =
250                            self.expect_lexer_ident(token.location)?;
251                        if define.params.contains_key(&param_name) {
252                            return Err(StepExit::Error((
253                                PreprocessorError::DuplicateParameter,
254                                param_location,
255                            )));
256                        }
257                        define.params.insert(param_name, define.params.len());
258                    }
259                    _ => {
260                        return Err(make_unexpected_error(token));
261                    }
262                }
263            }
264        }
265
266        // Tokens until the newline are that define's tokens (including the current one)
267        loop {
268            if token.value == LexerTokenValue::NewLine {
269                break;
270            }
271            define.tokens.push(convert_lexer_token_to_step(token)?);
272            token = self.get_lexer_token()?;
273        }
274
275        // Defines are allowed to be redefined if they are exactly the same up to token locations.
276        if let Some(previous_define) = self.defines.get(&define.name) {
277            if legal_redefinition(&*previous_define, &define) {
278                Ok(())
279            } else {
280                Err(StepExit::Error((
281                    PreprocessorError::DefineRedefined,
282                    name_location,
283                )))
284            }
285        } else {
286            self.defines.insert(define.name.clone(), Rc::new(define));
287            Ok(())
288        }
289    }
290
291    fn add_define(
292        &mut self,
293        name: &str,
294        content: &str,
295    ) -> Result<(), (PreprocessorError, Location)> {
296        let mut define = Define {
297            name: name.to_string(),
298            function_like: false,
299            params: Default::default(),
300            tokens: Default::default(),
301        };
302
303        // Convert the content to tokens and add it to the define.
304        let mut lexer = lexer::Lexer::new(content);
305        loop {
306            match lexer.next() {
307                Some(Ok(lexer_token)) => {
308                    // Skip over newlines (the lexer always adds a newline, which would cause an
309                    // error in convert_lexer_token).
310                    if lexer_token.value == LexerTokenValue::NewLine {
311                        continue;
312                    }
313
314                    define.tokens.push(convert_lexer_token(lexer_token)?);
315                }
316
317                Some(Err(err)) => return Err(err),
318                None => break,
319            }
320        }
321
322        // Note this overwrites existing defines, we might want to add an option to make this
323        // an error in the future.
324        self.defines.insert(define.name.clone(), Rc::new(define));
325
326        Ok(())
327    }
328
329    fn parse_undef_directive(&mut self, directive_location: Location) -> Step<()> {
330        if self.skipping {
331            return self.consume_until_newline();
332        }
333
334        let (name, name_location) = self.expect_lexer_ident(directive_location)?;
335        // TODO check predefine
336        // It is valid to undef a name that is not defined.
337        self.defines.remove(&name);
338
339        self.expect_lexer_token(LexerTokenValue::NewLine, name_location)?;
340        Ok(())
341    }
342
343    fn parse_error_directive(&mut self, directive_location: Location) -> Step<()> {
344        if self.skipping {
345            self.consume_until_newline()
346        } else {
347            // TODO consume the rest of the line and make a nice error message
348            Err(StepExit::Error((
349                PreprocessorError::ErrorDirective,
350                directive_location,
351            )))
352        }
353    }
354
355    fn parse_line_directive(&mut self, directive_location: Location) -> Step<()> {
356        if self.skipping {
357            return self.consume_until_newline();
358        }
359
360        let line = self.gather_until_newline()?;
361
362        let mut parser = if_parser::IfParser::new(line, &self.defines, directive_location, false);
363        let line = parser.evaluate_expression()?;
364
365        // Validates that the line is between 0 and 2^31 as per the C standard.
366        if line as u64 >= (1 << 32) {
367            return Err(make_line_overflow_error(directive_location));
368        }
369        self.line_offset = line - directive_location.line as i64;
370
371        if parser.peek()?.is_some() {
372            // TODO figure out what to do with the file, either number or string?
373            let _source_id = parser.evaluate_expression()?;
374        }
375
376        if let Some(token) = parser.peek()? {
377            return Err(StepExit::Error((
378                PreprocessorError::UnexpectedToken(token.value),
379                token.location,
380            )));
381        }
382
383        Ok(())
384    }
385
386    fn evaluate_if_expression(&mut self, location: Location, line: Vec<Token>) -> Step<bool> {
387        let mut parser = if_parser::IfParser::new(line, &self.defines, location, true);
388        let res = parser.evaluate_expression()?;
389
390        if let Some(token) = parser.peek()? {
391            Err(StepExit::Error((
392                PreprocessorError::UnexpectedToken(token.value),
393                token.location,
394            )))
395        } else {
396            Ok(res != 0)
397        }
398    }
399
400    fn parse_if_directive(&mut self, directive_location: Location) -> Step<()> {
401        self.parse_if_like_directive(directive_location, |this, location| {
402            let line = this.gather_until_newline()?;
403            this.evaluate_if_expression(location, line)
404        })
405    }
406
407    fn parse_ifdef_directive(&mut self, directive_location: Location) -> Step<()> {
408        self.parse_if_like_directive(directive_location, |this, location| {
409            let (name, name_location) = this.expect_lexer_ident(location)?;
410            this.expect_lexer_token(LexerTokenValue::NewLine, name_location)?;
411            Ok(this.defines.contains_key(&name))
412        })
413    }
414
415    fn parse_ifndef_directive(&mut self, directive_location: Location) -> Step<()> {
416        self.parse_if_like_directive(directive_location, |this, location| {
417            let (name, name_location) = this.expect_lexer_ident(location)?;
418            this.expect_lexer_token(LexerTokenValue::NewLine, name_location)?;
419            Ok(!this.defines.contains_key(&name))
420        })
421    }
422
423    fn parse_elif_directive(&mut self, directive_location: Location) -> Step<()> {
424        self.skipping = true;
425
426        // Do checks that the #elif block is well structured even if skipping.
427        let block = self.blocks.last().ok_or(StepExit::Error((
428            PreprocessorError::ElifOutsideOfBlock,
429            directive_location,
430        )))?;
431
432        if block.had_else {
433            return Err(StepExit::Error((
434                PreprocessorError::ElifAfterElse,
435                directive_location,
436            )));
437        }
438
439        // The condition isn't parsed if it doesn't need to (and doesn't produce errors).
440        if block.outer_skipped || block.had_valid_segment {
441            return self.consume_until_newline();
442        }
443
444        let line = self.gather_until_newline()?;
445        if self.evaluate_if_expression(directive_location, line)? {
446            self.skipping = false;
447            self.blocks.last_mut().unwrap().had_valid_segment = true;
448        }
449
450        Ok(())
451    }
452
453    fn parse_else_directive(&mut self, directive_location: Location) -> Step<()> {
454        self.expect_lexer_token(LexerTokenValue::NewLine, directive_location)?;
455
456        let block = self.blocks.last_mut().ok_or(StepExit::Error((
457            PreprocessorError::ElseOutsideOfBlock,
458            directive_location,
459        )))?;
460
461        // #else can only appear once in a block.
462        if block.had_else {
463            Err(StepExit::Error((
464                PreprocessorError::MoreThanOneElse,
465                directive_location,
466            )))
467        } else {
468            self.skipping = block.outer_skipped || block.had_valid_segment;
469            block.had_else = true;
470            Ok(())
471        }
472    }
473
474    fn parse_endif_directive(&mut self, directive_location: Location) -> Step<()> {
475        let block = self.blocks.pop().ok_or(StepExit::Error((
476            PreprocessorError::EndifOutsideOfBlock,
477            directive_location,
478        )))?;
479
480        // After #endif we start processing tokens iff the block was not skipped.
481        self.skipping = block.outer_skipped;
482
483        if self.skipping {
484            self.consume_until_newline()
485        } else {
486            self.expect_lexer_token(LexerTokenValue::NewLine, directive_location)?;
487            Ok(())
488        }
489    }
490
491    fn parse_if_like_directive(
492        &mut self,
493        directive_location: Location,
494        parse: impl Fn(&mut DirectiveProcessor, Location) -> Step<bool>,
495    ) -> Step<()> {
496        if self.skipping {
497            self.blocks.push(DirectiveBlock {
498                start_location: directive_location,
499                had_valid_segment: false,
500                had_else: false,
501                outer_skipped: true,
502            });
503            self.consume_until_newline()
504        } else {
505            let result = parse(self, directive_location)?;
506            self.skipping = !result;
507
508            self.blocks.push(DirectiveBlock {
509                start_location: directive_location,
510                had_valid_segment: !self.skipping,
511                had_else: false,
512                outer_skipped: false,
513            });
514            Ok(())
515        }
516    }
517
518    fn parse_version_directive(&mut self, directive_location: Location) -> Step<Token> {
519        if self.skipping {
520            self.consume_until_newline()?;
521            Continue.into()
522        } else {
523            Ok(Token {
524                location: directive_location,
525                value: TokenValue::Version(Version {
526                    tokens: self.gather_until_newline()?,
527                    is_first_directive: !(self.had_directive || self.had_non_directive_token),
528                    has_comments_before: self.lexer.had_comments(),
529                }),
530            })
531        }
532    }
533
534    fn parse_extension_directive(&mut self, directive_location: Location) -> Step<Token> {
535        if self.skipping {
536            self.consume_until_newline()?;
537            Continue.into()
538        } else {
539            Ok(Token {
540                location: directive_location,
541                value: TokenValue::Extension(Extension {
542                    tokens: self.gather_until_newline()?,
543                    has_non_directive_before: self.had_non_directive_token,
544                }),
545            })
546        }
547    }
548
549    fn parse_pragma_directive(&mut self, directive_location: Location) -> Step<Token> {
550        if self.skipping {
551            self.consume_until_newline()?;
552            Continue.into()
553        } else {
554            Ok(Token {
555                location: directive_location,
556                value: TokenValue::Pragma(Pragma {
557                    tokens: self.gather_until_newline()?,
558                }),
559            })
560        }
561    }
562
563    fn parse_directive(&mut self, hash_location: Location) -> Step<Token> {
564        let token = self.expect_a_lexer_token(hash_location)?;
565
566        if let LexerTokenValue::Ident(ref directive) = token.value {
567            match directive.as_str() {
568                // TODO elif line
569                "error" => self.parse_error_directive(token.location)?,
570                "line" => self.parse_line_directive(token.location)?,
571
572                "define" => self.parse_define_directive(token.location)?,
573                "undef" => self.parse_undef_directive(token.location)?,
574
575                "if" => self.parse_if_directive(token.location)?,
576                "ifdef" => self.parse_ifdef_directive(token.location)?,
577                "ifndef" => self.parse_ifndef_directive(token.location)?,
578                "elif" => self.parse_elif_directive(token.location)?,
579                "else" => self.parse_else_directive(token.location)?,
580                "endif" => self.parse_endif_directive(token.location)?,
581
582                "version" => {
583                    let result = self.parse_version_directive(token.location);
584                    self.had_directive = true;
585                    return result;
586                }
587                "extension" => {
588                    let result = self.parse_extension_directive(token.location);
589                    self.had_directive = true;
590                    return result;
591                }
592                "pragma" => {
593                    let result = self.parse_pragma_directive(token.location);
594                    self.had_directive = true;
595                    return result;
596                }
597                _ => {
598                    if !self.skipping {
599                        return Err(StepExit::Error((
600                            PreprocessorError::UnknownDirective,
601                            token.location,
602                        )));
603                    }
604                }
605            }
606            self.had_directive = true;
607            Continue.into()
608        } else if !self.skipping {
609            make_unexpected_error(token).into()
610        } else {
611            Continue.into()
612        }
613    }
614}
615
616impl<'a> MeLexer for DirectiveProcessor<'a> {
617    fn step(&mut self) -> Step<Token> {
618        let step = (|| {
619            // TODO: if we are skipping invalid characters should be allowed.
620            let lexer_token = self.get_lexer_token()?;
621            match lexer_token.value {
622                LexerTokenValue::NewLine => Continue.into(),
623                LexerTokenValue::Hash => {
624                    if lexer_token.start_of_line {
625                        self.parse_directive(lexer_token.location)
626                    } else if !self.skipping {
627                        make_unexpected_error(lexer_token).into()
628                    } else {
629                        Continue.into()
630                    }
631                }
632
633                _ => {
634                    if !self.skipping {
635                        self.had_non_directive_token = true;
636                        convert_lexer_token_to_step(lexer_token)
637                    } else {
638                        Continue.into()
639                    }
640                }
641            }
642        })();
643
644        if step == Err(StepExit::Finished) {
645            if let Some(block) = self.blocks.pop() {
646                return Err(StepExit::Error((
647                    PreprocessorError::UnfinishedBlock,
648                    block.start_location,
649                )));
650            }
651        }
652
653        step
654    }
655
656    fn get_define(&self, name: &str) -> Option<&Rc<Define>> {
657        self.defines.get(name)
658    }
659
660    fn apply_line_offset(&self, line: u32, location: Location) -> Step<u32> {
661        if let Ok(offset_line) = u32::try_from(line as i64 + self.line_offset) {
662            Ok(offset_line)
663        } else {
664            Err(make_line_overflow_error(location))
665        }
666    }
667}
668
669#[derive(Default)]
670struct MacroProcessor {
671    define_invocations: Vec<DefineInvocation>,
672    defines_being_expanded: HashSet<String>,
673
674    peeked: Option<Step<Token>>,
675    define_line: u32,
676}
677
678impl MacroProcessor {
679    fn start_define_invocation(
680        &mut self,
681        name: &str,
682        location: Location,
683        lexer: &mut dyn MeLexer,
684    ) -> Step<bool> {
685        // Defines can be expanding only once, it is not possible to do recursive defines
686        if self.defines_being_expanded.contains(name) {
687            return Ok(false);
688        }
689
690        if let Some(define) = lexer.get_define(name) {
691            let mut invocation = DefineInvocation {
692                define: define.clone(),
693                define_position: 0,
694
695                parameters: Default::default(),
696                parameter_position: 0,
697                parameter_expanding: std::usize::MAX,
698            };
699
700            // If this is a not a function-like define, __LINE__ inside the define is the line of the first
701            // character of the invocation. Only the line of the top-level invocation counts.
702            if !self.is_expanding_define() {
703                self.define_line = location.line;
704            }
705
706            if invocation.define.function_like {
707                let lparen_location = match self.step_no_continue(lexer) {
708                    Ok(Token {
709                        value: TokenValue::Punct(Punct::LeftParen),
710                        location,
711                    }) => location,
712
713                    // Function-like macros are not processed if there is no ( right after the identifier
714                    token => {
715                        self.peeked = Some(token);
716                        return Ok(false);
717                    }
718                };
719
720                // TODO still bail out if define was undefined until now? This would match
721                // clang and GCC
722                let (parameters, closing_location) =
723                    self.parse_define_call_arguments(lexer, lparen_location)?;
724
725                if !self.is_expanding_define() {
726                    self.define_line = closing_location.line;
727                }
728
729                // Check for the number of arguments.
730                match parameters.len().cmp(&invocation.define.params.len()) {
731                    Ordering::Greater => {
732                        let params_empty = parameters.len() == 1 && parameters[0].is_empty();
733                        let expects_zero_args = invocation.define.params.is_empty();
734
735                        if !(params_empty && expects_zero_args) {
736                            return Err(StepExit::Error((
737                                PreprocessorError::TooManyDefineArguments,
738                                lparen_location,
739                            )));
740                        }
741                    }
742                    Ordering::Less => {
743                        return Err(StepExit::Error((
744                            PreprocessorError::TooFewDefineArguments,
745                            lparen_location,
746                        )));
747                    }
748                    _ => {}
749                }
750
751                // Fully expand the parameters
752                for parameter in parameters {
753                    invocation
754                        .parameters
755                        .push(self.expand_parameter(lexer, parameter)?);
756                }
757            }
758
759            assert!(self
760                .defines_being_expanded
761                .insert(invocation.define.name.clone()));
762            self.define_invocations.push(invocation);
763
764            return Ok(true);
765        }
766
767        Ok(false)
768    }
769
770    // Parse the arguments of the function-like define starting after the first (. Also returns
771    // the location of the closing ).
772    fn parse_define_call_arguments(
773        &mut self,
774        lexer: &mut dyn MeLexer,
775        mut current_location: Location,
776    ) -> Step<(Vec<Vec<Token>>, Location)> {
777        let mut paren_nesting = 0u32;
778        let mut arguments = vec![vec![]];
779
780        loop {
781            // Get the next token (without additional expansion)
782            let token = match self.step(lexer) {
783                Err(StepExit::Continue) => continue,
784                Err(StepExit::Finished) => {
785                    return Err(StepExit::Error((
786                        PreprocessorError::UnexpectedEndOfInput,
787                        current_location,
788                    )));
789                }
790                Err(err @ StepExit::Error(_)) => {
791                    return Err(err);
792                }
793                Ok(token) => token,
794            };
795
796            current_location = token.location;
797
798            // Handle special tokens
799            match token.value {
800                // Avoid overflow on parenthesis nesting counting.
801                TokenValue::Punct(Punct::LeftParen) => match paren_nesting.checked_add(1) {
802                    // TODO figure out a way to cover this code path? Maybe make it a max nesting
803                    // so that it can be set to a small value?
804                    None => {
805                        return Err(StepExit::Error((
806                            PreprocessorError::IntegerOverflow,
807                            current_location,
808                        )));
809                    }
810                    Some(v) => paren_nesting = v,
811                },
812
813                TokenValue::Punct(Punct::RightParen) => {
814                    // Return the arguments when we find our )
815                    if paren_nesting == 0 {
816                        return Ok((arguments, token.location));
817                    }
818                    paren_nesting -= 1;
819                }
820
821                TokenValue::Punct(Punct::Comma) => {
822                    // Commas outside of () split arguments and must not be added to them.
823                    if paren_nesting == 0 {
824                        arguments.push(Default::default());
825                        continue;
826                    }
827                }
828
829                _ => {}
830            }
831
832            arguments.last_mut().unwrap().push(token);
833        }
834    }
835
836    fn expand_parameter(&self, lexer: &mut dyn MeLexer, parameter: Vec<Token>) -> Step<Vec<Token>> {
837        struct ExpandParameterLexer<'a> {
838            parent_lexer: &'a dyn MeLexer,
839            expander: &'a MacroProcessor,
840            tokens: &'a Vec<Token>,
841            position: usize,
842        }
843
844        impl<'a> MeLexer for ExpandParameterLexer<'a> {
845            fn step(&mut self) -> Step<Token> {
846                if let Some(token) = self.tokens.get(self.position) {
847                    self.position += 1;
848                    Ok(token.clone())
849                } else {
850                    Finished.into()
851                }
852            }
853
854            fn get_define(&self, name: &str) -> Option<&Rc<Define>> {
855                if self.expander.defines_being_expanded.contains(name) {
856                    None
857                } else {
858                    self.parent_lexer.get_define(name)
859                }
860            }
861
862            fn apply_line_offset(&self, line: u32, _: Location) -> Step<u32> {
863                Ok(line)
864            }
865        }
866
867        let mut parameter_lexer = ExpandParameterLexer {
868            parent_lexer: lexer,
869            expander: self,
870            tokens: &parameter,
871            position: 0,
872        };
873
874        let mut processor: MacroProcessor = Default::default();
875        let mut expanded_parameters = Default::default();
876        loop {
877            match processor.step(&mut parameter_lexer) {
878                Err(err @ StepExit::Error(_)) => return Err(err),
879                Err(StepExit::Finished) => return Ok(expanded_parameters),
880                Err(StepExit::Continue) => continue,
881                Ok(token) => {
882                    if let TokenValue::Ident(name) = &token.value {
883                        if processor.start_define_invocation(
884                            name,
885                            token.location,
886                            &mut parameter_lexer,
887                        )? {
888                            continue;
889                        }
890                    }
891
892                    expanded_parameters.push(token);
893                }
894            }
895        }
896    }
897
898    fn is_expanding_define(&self) -> bool {
899        !self.define_invocations.is_empty()
900    }
901
902    fn step_internal(&mut self, lexer: &mut dyn MeLexer) -> Step<Token> {
903        if let Some(step) = self.peeked.take() {
904            return step;
905        }
906
907        if let Some(invocation) = self.define_invocations.last_mut() {
908            // Keep expanding the parameters
909            if let Some(argument) = invocation.parameters.get(invocation.parameter_expanding) {
910                if let Some(token) = argument.get(invocation.parameter_position) {
911                    invocation.parameter_position += 1;
912                    return Ok(token.clone());
913                } else {
914                    invocation.parameter_expanding = std::usize::MAX;
915                    return Continue.into();
916                }
917            }
918
919            // Take tokens from the define definition.
920            if let Some(token) = invocation.define.tokens.get(invocation.define_position) {
921                invocation.define_position += 1;
922
923                // We found a parameter! Start expanding it.
924                if let TokenValue::Ident(name) = &token.value {
925                    if let Some(parameter_index) = invocation.define.params.get(name) {
926                        invocation.parameter_expanding = *parameter_index;
927                        invocation.parameter_position = 0;
928                        return Continue.into();
929                    }
930                }
931
932                return Ok(token.clone());
933            } else {
934                self.defines_being_expanded.remove(&invocation.define.name);
935                self.define_invocations.pop();
936                return Continue.into();
937            }
938        }
939
940        lexer.step()
941    }
942
943    fn step(&mut self, lexer: &mut dyn MeLexer) -> Step<Token> {
944        let token = self.step_internal(lexer)?;
945
946        if let TokenValue::Ident(name) = &token.value {
947            if name == "__LINE__" {
948                // When inside a define, __LINE__ is that define's line.
949                let line = if self.is_expanding_define() {
950                    self.define_line
951                } else {
952                    token.location.line
953                };
954
955                return Ok(Token {
956                    value: TokenValue::Integer(Integer {
957                        value: lexer.apply_line_offset(line, token.location)? as u64,
958                        signed: false,
959                        width: 32,
960                    }),
961                    location: token.location,
962                });
963            }
964        }
965
966        Ok(token)
967    }
968
969    fn step_no_continue(&mut self, lexer: &mut dyn MeLexer) -> Step<Token> {
970        loop {
971            let step = self.step(lexer);
972            if step != Continue.into() {
973                return step;
974            }
975        }
976    }
977}
978
979pub struct Preprocessor<'a> {
980    directive_processor: DirectiveProcessor<'a>,
981    macro_processor: MacroProcessor,
982}
983
984impl<'a> Preprocessor<'a> {
985    pub fn new(input: &'a str) -> Preprocessor {
986        Preprocessor {
987            directive_processor: DirectiveProcessor::new(input),
988            macro_processor: Default::default(),
989        }
990    }
991
992    pub fn add_define(
993        &mut self,
994        name: &str,
995        content: &str,
996    ) -> Result<(), (PreprocessorError, Location)> {
997        self.directive_processor.add_define(name, content)
998    }
999
1000    fn step(&mut self) -> Step<Token> {
1001        let token = self.macro_processor.step(&mut self.directive_processor)?;
1002
1003        // Is this token the start of a new macro?
1004        if let TokenValue::Ident(name) = &token.value {
1005            // Returns Continue if it started the define, token otherwise.
1006            if self.macro_processor.start_define_invocation(
1007                name,
1008                token.location,
1009                &mut self.directive_processor,
1010            )? {
1011                return Continue.into();
1012            }
1013        }
1014
1015        Ok(token)
1016    }
1017}
1018
1019pub type PreprocessorItem = Result<Token, (PreprocessorError, Location)>;
1020
1021impl<'a> Iterator for Preprocessor<'a> {
1022    type Item = PreprocessorItem;
1023
1024    fn next(&mut self) -> Option<Self::Item> {
1025        loop {
1026            match self.step() {
1027                Ok(token) => return Some(Ok(token)),
1028                Err(StepExit::Error(err)) => return Some(Err(err)),
1029                Err(StepExit::Finished) => return None,
1030                Err(StepExit::Continue) => continue,
1031            };
1032        }
1033    }
1034}