1use std::any::Any;
7use std::collections::{HashMap, HashSet};
8use std::fs;
9use std::path::{Path, PathBuf};
10
11use crate::error::{CompileError, PPError};
12use crate::token_source::TokenSource;
13use crate::intern::{InternedStr, StringInterner};
14use crate::lexer::Lexer;
15use crate::macro_def::{MacroDef, MacroKind, MacroTable};
16use crate::pp_expr::PPExprEvaluator;
17use crate::source::{FileId, FileRegistry, SourceLocation};
18use crate::token::{
19 Comment, MacroBeginInfo, MacroEndInfo, MacroInvocationKind, Token, TokenId, TokenKind,
20};
21
22pub trait MacroDefCallback {
27 fn on_macro_defined(&mut self, def: &MacroDef);
29
30 fn into_any(self: Box<Self>) -> Box<dyn Any>;
32}
33
34pub struct CallbackPair<A, B> {
36 pub first: A,
37 pub second: B,
38}
39
40impl<A, B> CallbackPair<A, B> {
41 pub fn new(first: A, second: B) -> Self {
42 Self { first, second }
43 }
44}
45
46impl<A: MacroDefCallback + 'static, B: MacroDefCallback + 'static> MacroDefCallback for CallbackPair<A, B> {
47 fn on_macro_defined(&mut self, def: &MacroDef) {
48 self.first.on_macro_defined(def);
49 self.second.on_macro_defined(def);
50 }
51
52 fn into_any(self: Box<Self>) -> Box<dyn Any> {
53 self
54 }
55}
56
57pub trait MacroCalledCallback {
62 fn on_macro_called(&mut self, args: Option<&[Vec<Token>]>, interner: &StringInterner);
67
68 fn as_any(&self) -> &dyn Any;
70 fn as_any_mut(&mut self) -> &mut dyn Any;
71}
72
73pub struct MacroCallWatcher {
77 called: std::cell::Cell<bool>,
79 last_args: std::cell::RefCell<Option<Vec<String>>>,
81}
82
83impl MacroCallWatcher {
84 pub fn new() -> Self {
86 Self {
87 called: std::cell::Cell::new(false),
88 last_args: std::cell::RefCell::new(None),
89 }
90 }
91
92 pub fn take_called(&self) -> bool {
94 self.called.replace(false)
95 }
96
97 pub fn take_args(&self) -> Option<Vec<String>> {
99 self.last_args.borrow_mut().take()
100 }
101
102 pub fn clear(&self) {
104 self.called.set(false);
105 *self.last_args.borrow_mut() = None;
106 }
107
108 pub fn was_called(&self) -> bool {
110 self.called.get()
111 }
112
113 pub fn last_args(&self) -> Option<Vec<String>> {
115 self.last_args.borrow().clone()
116 }
117
118 fn tokens_to_string(tokens: &[Token], interner: &StringInterner) -> String {
120 tokens
121 .iter()
122 .map(|t| t.kind.format(interner))
123 .collect::<Vec<_>>()
124 .join("")
125 }
126}
127
128impl Default for MacroCallWatcher {
129 fn default() -> Self {
130 Self::new()
131 }
132}
133
134impl MacroCalledCallback for MacroCallWatcher {
135 fn on_macro_called(&mut self, args: Option<&[Vec<Token>]>, interner: &StringInterner) {
136 self.called.set(true);
137 if let Some(args) = args {
138 let strs: Vec<String> = args
139 .iter()
140 .map(|tokens| Self::tokens_to_string(tokens, interner))
141 .collect();
142 *self.last_args.borrow_mut() = Some(strs);
143 }
144 }
145
146 fn as_any(&self) -> &dyn Any {
147 self
148 }
149
150 fn as_any_mut(&mut self) -> &mut dyn Any {
151 self
152 }
153}
154
155pub trait CommentCallback {
160 fn on_comment(&mut self, comment: &Comment, file_id: FileId, is_target: bool);
166
167 fn into_any(self: Box<Self>) -> Box<dyn Any>;
169}
170
171#[derive(Debug, Clone, Copy, PartialEq, Eq)]
173pub enum IncludeKind {
174 System,
176 Local,
178}
179
180#[derive(Debug, Default, Clone)]
182pub struct PPConfig {
183 pub include_paths: Vec<PathBuf>,
185 pub predefined: Vec<(String, Option<String>)>,
187 pub debug_pp: bool,
189 pub target_dir: Option<PathBuf>,
191 pub emit_markers: bool,
193}
194
195#[derive(Debug, Clone)]
197struct CondState {
198 active: bool,
200 seen_active: bool,
202 seen_else: bool,
204 loc: SourceLocation,
206}
207
208#[derive(Debug, Default)]
213pub struct NoExpandRegistry {
214 map: HashMap<TokenId, HashSet<InternedStr>>,
215}
216
217impl NoExpandRegistry {
218 pub fn new() -> Self {
220 Self {
221 map: HashMap::new(),
222 }
223 }
224
225 pub fn add(&mut self, token_id: TokenId, macro_id: InternedStr) {
227 self.map.entry(token_id).or_default().insert(macro_id);
228 }
229
230 pub fn extend(&mut self, token_id: TokenId, macros: impl IntoIterator<Item = InternedStr>) {
232 self.map.entry(token_id).or_default().extend(macros);
233 }
234
235 pub fn is_blocked(&self, token_id: TokenId, macro_id: InternedStr) -> bool {
237 self.map
238 .get(&token_id)
239 .map_or(false, |s| s.contains(¯o_id))
240 }
241
242 pub fn inherit(&mut self, from: TokenId, to: TokenId) {
244 if let Some(set) = self.map.get(&from).cloned() {
245 self.map.entry(to).or_default().extend(set);
246 }
247 }
248
249 pub fn get(&self, token_id: TokenId) -> Option<&HashSet<InternedStr>> {
251 self.map.get(&token_id)
252 }
253
254 pub fn is_empty(&self) -> bool {
256 self.map.is_empty()
257 }
258
259 pub fn len(&self) -> usize {
261 self.map.len()
262 }
263}
264
265struct InputSource {
267 source: Vec<u8>,
269 pos: usize,
271 line: u32,
273 column: u32,
275 file_id: FileId,
277 at_line_start: bool,
279 tokens: Option<Vec<Token>>,
281 token_pos: usize,
283}
284
285impl InputSource {
286 fn from_file(source: Vec<u8>, file_id: FileId) -> Self {
288 Self {
289 source,
290 pos: 0,
291 line: 1,
292 column: 1,
293 file_id,
294 at_line_start: true,
295 tokens: None,
296 token_pos: 0,
297 }
298 }
299
300 #[allow(dead_code)]
302 fn from_tokens(tokens: Vec<Token>, loc: SourceLocation) -> Self {
303 Self {
304 source: Vec::new(),
305 pos: 0,
306 line: loc.line,
307 column: loc.column,
308 file_id: loc.file_id,
309 at_line_start: false,
310 tokens: Some(tokens),
311 token_pos: 0,
312 }
313 }
314
315 fn is_token_source(&self) -> bool {
317 self.tokens.is_some()
318 }
319
320 fn next_buffered_token(&mut self) -> Option<Token> {
322 if let Some(ref tokens) = self.tokens {
323 if self.token_pos < tokens.len() {
324 let token = tokens[self.token_pos].clone();
325 self.token_pos += 1;
326 return Some(token);
327 }
328 }
329 None
330 }
331
332 fn is_at_line_start(&self) -> bool {
334 self.at_line_start
335 }
336
337 fn current_location(&self) -> SourceLocation {
339 SourceLocation::new(self.file_id, self.line, self.column)
340 }
341
342 fn skip_line_continuations(&self, start_pos: usize) -> usize {
344 let mut pos = start_pos;
345 loop {
346 if self.source.get(pos) == Some(&b'\\') {
348 let next = self.source.get(pos + 1);
349 if next == Some(&b'\n') {
350 pos += 2;
351 continue;
352 } else if next == Some(&b'\r') && self.source.get(pos + 2) == Some(&b'\n') {
353 pos += 3;
355 continue;
356 }
357 }
358 break;
359 }
360 pos
361 }
362
363 fn peek(&self) -> Option<u8> {
365 let pos = self.skip_line_continuations(self.pos);
366 self.source.get(pos).copied()
367 }
368
369 fn peek_n(&self, n: usize) -> Option<u8> {
371 let mut pos = self.pos;
372 for i in 0..=n {
373 pos = self.skip_line_continuations(pos);
374 if pos >= self.source.len() {
375 return None;
376 }
377 if i < n {
378 pos += 1;
379 }
380 }
381 self.source.get(pos).copied()
382 }
383
384 fn advance(&mut self) -> Option<u8> {
386 let old_pos = self.pos;
388 self.pos = self.skip_line_continuations(self.pos);
389
390 for i in old_pos..self.pos {
392 if self.source.get(i) == Some(&b'\n') {
393 self.line += 1;
394 }
395 }
396
397 let c = self.source.get(self.pos).copied()?;
398 self.pos += 1;
399
400 if c == b'\n' {
401 self.line += 1;
402 self.column = 1;
403 self.at_line_start = true;
404 } else {
405 self.column += 1;
406 if c != b' ' && c != b'\t' && c != b'\r' {
407 self.at_line_start = false;
408 }
409 }
410 Some(c)
411 }
412
413 fn skip_whitespace(&mut self) {
415 while let Some(c) = self.peek() {
416 if c == b' ' || c == b'\t' || c == b'\r' || c == 0x0C || c == 0x0B {
418 self.advance();
419 } else {
420 break;
421 }
422 }
423 }
424}
425
426pub struct Preprocessor {
428 files: FileRegistry,
430 interner: StringInterner,
432 macros: MacroTable,
434 config: PPConfig,
436 sources: Vec<InputSource>,
438 cond_stack: Vec<CondState>,
440 lookahead: Vec<Token>,
442 pending_comments: Vec<Comment>,
444 cond_active: bool,
446 return_spaces: bool,
448 defining_builtin: bool,
450 no_expand_registry: NoExpandRegistry,
452 macro_def_callback: Option<Box<dyn MacroDefCallback>>,
454 macro_called_callbacks: HashMap<InternedStr, Box<dyn MacroCalledCallback>>,
456 wrapped_macros: HashSet<InternedStr>,
458 comment_callback: Option<Box<dyn CommentCallback>>,
460 skip_expand_macros: HashSet<InternedStr>,
462 explicit_expand_macros: HashSet<InternedStr>,
464}
465
466impl Preprocessor {
467 pub fn new(config: PPConfig) -> Self {
469 let mut pp = Self {
470 files: FileRegistry::new(),
471 interner: StringInterner::new(),
472 macros: MacroTable::new(),
473 config,
474 sources: Vec::new(),
475 cond_stack: Vec::new(),
476 lookahead: Vec::new(),
477 pending_comments: Vec::new(),
478 cond_active: true,
479 return_spaces: false,
480 defining_builtin: false,
481 no_expand_registry: NoExpandRegistry::new(),
482 macro_def_callback: None,
483 macro_called_callbacks: HashMap::new(),
484 wrapped_macros: HashSet::new(),
485 comment_callback: None,
486 skip_expand_macros: HashSet::new(),
487 explicit_expand_macros: HashSet::new(),
488 };
489
490 pp.define_predefined_macros();
492
493 pp
494 }
495
496 pub fn set_macro_def_callback(&mut self, callback: Box<dyn MacroDefCallback>) {
498 self.macro_def_callback = Some(callback);
499 }
500
501 pub fn take_macro_def_callback(&mut self) -> Option<Box<dyn MacroDefCallback>> {
503 self.macro_def_callback.take()
504 }
505
506 pub fn set_comment_callback(&mut self, callback: Box<dyn CommentCallback>) {
508 self.comment_callback = Some(callback);
509 }
510
511 pub fn take_comment_callback(&mut self) -> Option<Box<dyn CommentCallback>> {
513 self.comment_callback.take()
514 }
515
516 pub fn set_macro_called_callback(
520 &mut self,
521 macro_name: InternedStr,
522 callback: Box<dyn MacroCalledCallback>,
523 ) {
524 self.macro_called_callbacks.insert(macro_name, callback);
525 }
526
527 pub fn take_macro_called_callback(
529 &mut self,
530 macro_name: InternedStr,
531 ) -> Option<Box<dyn MacroCalledCallback>> {
532 self.macro_called_callbacks.remove(¯o_name)
533 }
534
535 pub fn get_macro_called_callback(
537 &self,
538 macro_name: InternedStr,
539 ) -> Option<&Box<dyn MacroCalledCallback>> {
540 self.macro_called_callbacks.get(¯o_name)
541 }
542
543 pub fn get_macro_called_callback_mut(
545 &mut self,
546 macro_name: InternedStr,
547 ) -> Option<&mut Box<dyn MacroCalledCallback>> {
548 self.macro_called_callbacks.get_mut(¯o_name)
549 }
550
551 pub fn add_wrapped_macro(&mut self, macro_name: &str) {
556 let id = self.interner.intern(macro_name);
557 self.wrapped_macros.insert(id);
558 }
559
560 pub fn add_skip_expand_macro(&mut self, name: InternedStr) {
566 self.skip_expand_macros.insert(name);
567 }
568
569 pub fn add_skip_expand_macros(&mut self, names: impl IntoIterator<Item = InternedStr>) {
571 self.skip_expand_macros.extend(names);
572 }
573
574 pub fn add_explicit_expand_macro(&mut self, name: InternedStr) {
576 self.explicit_expand_macros.insert(name);
577 }
578
579 pub fn add_explicit_expand_macros(&mut self, names: impl IntoIterator<Item = InternedStr>) {
581 self.explicit_expand_macros.extend(names);
582 }
583
584 fn define_predefined_macros(&mut self) {
586 let mut defines_source = String::new();
589
590 defines_source.push_str("#define _Pragma(x)\n");
592
593 for (name, value) in &self.config.predefined {
594 if let Some(val) = value {
595 defines_source.push_str(&format!("#define {} {}\n", name, val));
596 } else {
597 defines_source.push_str(&format!("#define {} 1\n", name));
598 }
599 }
600
601 if !defines_source.is_empty() {
602 let file_id = self.files.register(PathBuf::from("<cmdline>"));
604 let input = InputSource::from_file(defines_source.into_bytes(), file_id);
605 self.sources.push(input);
606
607 self.defining_builtin = true;
609
610 loop {
612 match self.next_raw_token() {
613 Ok(token) => {
614 match token.kind {
615 TokenKind::Eof => break,
616 TokenKind::Hash => {
617 if let Err(_) = self.process_directive(token.loc) {
619 break;
620 }
621 }
622 TokenKind::Newline => continue,
623 _ => {} }
625 }
626 Err(_) => break,
627 }
628 }
629
630 self.defining_builtin = false;
631
632 self.sources.pop();
634 }
635 }
636
637 fn tokenize_string(&mut self, s: &str) -> Vec<Token> {
639 let bytes = s.as_bytes();
640 let file_id = FileId::default();
641 let mut lexer = Lexer::new(bytes, file_id, &mut self.interner);
642
643 let mut tokens = Vec::new();
644 loop {
645 match lexer.next_token() {
646 Ok(token) => {
647 if matches!(token.kind, TokenKind::Eof) {
648 break;
649 }
650 if !matches!(token.kind, TokenKind::Newline) {
651 tokens.push(token);
652 }
653 }
654 Err(_) => break,
655 }
656 }
657 tokens
658 }
659
660 pub fn add_source_file(&mut self, path: &Path) -> Result<(), CompileError> {
666 let source = fs::read(path).map_err(|e| {
667 CompileError::Preprocess {
668 loc: SourceLocation::default(),
669 kind: PPError::IoError(path.to_path_buf(), e.to_string()),
670 }
671 })?;
672
673 let file_id = self.files.register(path.to_path_buf());
674 let input = InputSource::from_file(source, file_id);
675 self.sources.push(input);
676
677 Ok(())
678 }
679
680 fn lex_token_from_source(&mut self) -> Result<Option<Token>, CompileError> {
682 {
684 let Some(source) = self.sources.last_mut() else {
685 return Ok(None);
686 };
687
688 if source.is_token_source() {
689 return Ok(source.next_buffered_token());
690 }
691
692 if self.return_spaces {
694 if let Some(c) = source.peek() {
695 if c == b' ' || c == b'\t' || c == 0x0C || c == 0x0B {
697 let loc = source.current_location();
698 source.advance();
699 while let Some(c) = source.peek() {
701 if c == b' ' || c == b'\t' || c == 0x0C || c == 0x0B {
702 source.advance();
703 } else {
704 break;
705 }
706 }
707 return Ok(Some(Token::new(TokenKind::Space, loc)));
708 }
709 }
710 } else {
711 source.skip_whitespace();
712 }
713 }
714
715 let mut leading_comments = Vec::new();
717 loop {
718 {
719 let Some(source) = self.sources.last_mut() else {
720 return Ok(None);
721 };
722 if !self.return_spaces {
723 source.skip_whitespace();
724 }
725 }
726
727 let (is_line_comment, is_block_comment) = {
728 let Some(source) = self.sources.last() else {
729 return Ok(None);
730 };
731 (
732 source.peek() == Some(b'/') && source.peek_n(1) == Some(b'/'),
733 source.peek() == Some(b'/') && source.peek_n(1) == Some(b'*'),
734 )
735 };
736
737 if is_line_comment {
738 let comment = self.scan_line_comment();
739 leading_comments.push(comment);
740 } else if is_block_comment {
741 let comment = self.scan_block_comment()?;
742 leading_comments.push(comment);
743 } else {
744 break;
745 }
746 }
747
748 let loc = {
749 let Some(source) = self.sources.last() else {
750 return Ok(None);
751 };
752 source.current_location()
753 };
754
755 let kind = self.scan_token_kind()?;
756
757 let mut token = Token::new(kind, loc);
758 token.leading_comments = leading_comments;
759 Ok(Some(token))
760 }
761
762 fn scan_line_comment(&mut self) -> Comment {
764 let (text, loc, file_id) = {
765 let source = self.sources.last_mut().unwrap();
766 let loc = source.current_location();
767 let file_id = source.file_id;
768 source.advance(); source.advance(); let start = source.pos;
772 while source.peek().is_some_and(|c| c != b'\n') {
773 source.advance();
774 }
775 let text = String::from_utf8_lossy(&source.source[start..source.pos]).to_string();
776 (text, loc, file_id)
777 };
778
779 let comment = Comment::new(crate::token::CommentKind::Line, text, loc);
780 let is_target = self.is_file_in_target(file_id);
781
782 if is_target {
784 if let Some(cb) = &mut self.comment_callback {
785 cb.on_comment(&comment, file_id, is_target);
786 }
787 }
788
789 comment
790 }
791
792 fn scan_block_comment(&mut self) -> Result<Comment, CompileError> {
794 let result = {
796 let source = self.sources.last_mut().unwrap();
797 let loc = source.current_location();
798 let file_id = source.file_id;
799 source.advance(); source.advance(); let start = source.pos;
803 loop {
804 match (source.peek(), source.peek_n(1)) {
805 (Some(b'*'), Some(b'/')) => {
806 let end = source.pos;
807 source.advance(); source.advance(); let text = String::from_utf8_lossy(&source.source[start..end]).to_string();
810 break Ok((text, loc, file_id));
811 }
812 (Some(_), _) => {
813 source.advance();
814 }
815 (None, _) => {
816 break Err(CompileError::Lex {
817 loc,
818 kind: crate::error::LexError::UnterminatedComment,
819 });
820 }
821 }
822 }
823 };
824
825 let (text, loc, file_id) = result?;
826 let comment = Comment::new(crate::token::CommentKind::Block, text, loc);
827 let is_target = self.is_file_in_target(file_id);
828
829 if is_target {
831 if let Some(cb) = &mut self.comment_callback {
832 cb.on_comment(&comment, file_id, is_target);
833 }
834 }
835
836 Ok(comment)
837 }
838
839 fn scan_token_kind(&mut self) -> Result<TokenKind, CompileError> {
841 let source = self.sources.last_mut().unwrap();
842 let Some(c) = source.peek() else {
843 return Ok(TokenKind::Eof);
844 };
845
846 match c {
847 b'\n' => {
848 source.advance();
849 Ok(TokenKind::Newline)
850 }
851
852 b'L' if matches!(source.peek_n(1), Some(b'"') | Some(b'\'')) => {
854 source.advance(); if source.peek() == Some(b'"') {
856 self.scan_wide_string()
857 } else {
858 self.scan_wide_char()
859 }
860 }
861
862 b'a'..=b'z' | b'A'..=b'Z' | b'_' => self.scan_identifier(),
864
865 b'0'..=b'9' => self.scan_number(),
867
868 b'"' => self.scan_string(),
870
871 b'\'' => self.scan_char(),
873
874 b'+' => self.scan_operator(b'+', &[(b'+', TokenKind::PlusPlus), (b'=', TokenKind::PlusEq)], TokenKind::Plus),
876 b'-' => self.scan_operator(b'-', &[(b'-', TokenKind::MinusMinus), (b'=', TokenKind::MinusEq), (b'>', TokenKind::Arrow)], TokenKind::Minus),
877 b'*' => self.scan_operator(b'*', &[(b'=', TokenKind::StarEq)], TokenKind::Star),
878 b'/' => self.scan_operator(b'/', &[(b'=', TokenKind::SlashEq)], TokenKind::Slash),
879 b'%' => self.scan_operator(b'%', &[(b'=', TokenKind::PercentEq)], TokenKind::Percent),
880 b'&' => self.scan_operator(b'&', &[(b'&', TokenKind::AmpAmp), (b'=', TokenKind::AmpEq)], TokenKind::Amp),
881 b'|' => self.scan_operator(b'|', &[(b'|', TokenKind::PipePipe), (b'=', TokenKind::PipeEq)], TokenKind::Pipe),
882 b'^' => self.scan_operator(b'^', &[(b'=', TokenKind::CaretEq)], TokenKind::Caret),
883 b'~' => {
884 source.advance();
885 Ok(TokenKind::Tilde)
886 }
887 b'!' => self.scan_operator(b'!', &[(b'=', TokenKind::BangEq)], TokenKind::Bang),
888 b'<' => self.scan_lt(),
889 b'>' => self.scan_gt(),
890 b'=' => self.scan_operator(b'=', &[(b'=', TokenKind::EqEq)], TokenKind::Eq),
891 b'?' => {
892 source.advance();
893 Ok(TokenKind::Question)
894 }
895 b':' => {
896 source.advance();
897 Ok(TokenKind::Colon)
898 }
899 b'.' => self.scan_dot(),
900 b',' => {
901 source.advance();
902 Ok(TokenKind::Comma)
903 }
904 b';' => {
905 source.advance();
906 Ok(TokenKind::Semi)
907 }
908 b'(' => {
909 source.advance();
910 Ok(TokenKind::LParen)
911 }
912 b')' => {
913 source.advance();
914 Ok(TokenKind::RParen)
915 }
916 b'[' => {
917 source.advance();
918 Ok(TokenKind::LBracket)
919 }
920 b']' => {
921 source.advance();
922 Ok(TokenKind::RBracket)
923 }
924 b'{' => {
925 source.advance();
926 Ok(TokenKind::LBrace)
927 }
928 b'}' => {
929 source.advance();
930 Ok(TokenKind::RBrace)
931 }
932 b'#' => {
933 source.advance();
934 if source.peek() == Some(b'#') {
935 source.advance();
936 Ok(TokenKind::HashHash)
937 } else {
938 Ok(TokenKind::Hash)
939 }
940 }
941
942 b'\\' => {
944 source.advance();
945 Ok(TokenKind::Backslash)
946 }
947
948 _ => {
949 let loc = source.current_location();
950 source.advance();
951 Err(CompileError::Lex {
952 loc,
953 kind: crate::error::LexError::InvalidChar(c as char),
954 })
955 }
956 }
957 }
958
959 fn scan_operator(&mut self, _first: u8, continuations: &[(u8, TokenKind)], default: TokenKind) -> Result<TokenKind, CompileError> {
961 let source = self.sources.last_mut().unwrap();
962 source.advance();
963 for (next, kind) in continuations {
964 if source.peek() == Some(*next) {
965 source.advance();
966 return Ok(kind.clone());
967 }
968 }
969 Ok(default)
970 }
971
972 fn scan_lt(&mut self) -> Result<TokenKind, CompileError> {
974 let source = self.sources.last_mut().unwrap();
975 source.advance();
976 match source.peek() {
977 Some(b'<') => {
978 source.advance();
979 if source.peek() == Some(b'=') {
980 source.advance();
981 Ok(TokenKind::LtLtEq)
982 } else {
983 Ok(TokenKind::LtLt)
984 }
985 }
986 Some(b'=') => {
987 source.advance();
988 Ok(TokenKind::LtEq)
989 }
990 _ => Ok(TokenKind::Lt),
991 }
992 }
993
994 fn scan_gt(&mut self) -> Result<TokenKind, CompileError> {
996 let source = self.sources.last_mut().unwrap();
997 source.advance();
998 match source.peek() {
999 Some(b'>') => {
1000 source.advance();
1001 if source.peek() == Some(b'=') {
1002 source.advance();
1003 Ok(TokenKind::GtGtEq)
1004 } else {
1005 Ok(TokenKind::GtGt)
1006 }
1007 }
1008 Some(b'=') => {
1009 source.advance();
1010 Ok(TokenKind::GtEq)
1011 }
1012 _ => Ok(TokenKind::Gt),
1013 }
1014 }
1015
1016 fn scan_dot(&mut self) -> Result<TokenKind, CompileError> {
1018 let source = self.sources.last_mut().unwrap();
1019 source.advance();
1020 if source.peek() == Some(b'.') && source.peek_n(1) == Some(b'.') {
1021 source.advance();
1022 source.advance();
1023 Ok(TokenKind::Ellipsis)
1024 } else {
1025 Ok(TokenKind::Dot)
1026 }
1027 }
1028
1029 fn scan_identifier(&mut self) -> Result<TokenKind, CompileError> {
1031 let source = self.sources.last_mut().unwrap();
1032 let mut chars = Vec::new();
1033 while let Some(c) = source.peek() {
1034 if c.is_ascii_alphanumeric() || c == b'_' {
1035 chars.push(c);
1036 source.advance();
1037 } else {
1038 break;
1039 }
1040 }
1041
1042 let text = std::str::from_utf8(&chars).unwrap();
1043
1044 if let Some(kw) = TokenKind::from_keyword(text) {
1046 Ok(kw)
1047 } else {
1048 let interned = self.interner.intern(text);
1049 Ok(TokenKind::Ident(interned))
1050 }
1051 }
1052
1053 fn scan_number(&mut self) -> Result<TokenKind, CompileError> {
1055 let source = self.sources.last_mut().unwrap();
1056 let loc = source.current_location();
1057 let start = source.pos;
1058
1059 if source.peek() == Some(b'0') {
1061 source.advance();
1062 match source.peek() {
1063 Some(b'x') | Some(b'X') => {
1064 source.advance();
1065 while source.peek().is_some_and(|c| c.is_ascii_hexdigit()) {
1066 source.advance();
1067 }
1068 }
1069 Some(b'b') | Some(b'B') => {
1070 source.advance();
1071 while matches!(source.peek(), Some(b'0') | Some(b'1')) {
1072 source.advance();
1073 }
1074 }
1075 Some(b'0'..=b'7') => {
1076 while source.peek().is_some_and(|c| matches!(c, b'0'..=b'7')) {
1077 source.advance();
1078 }
1079 }
1080 Some(b'.') | Some(b'e') | Some(b'E') => {
1081 return self.scan_float_from(start, loc);
1082 }
1083 _ => {}
1084 }
1085 } else {
1086 while source.peek().is_some_and(|c| c.is_ascii_digit()) {
1087 source.advance();
1088 }
1089 if matches!(source.peek(), Some(b'.') | Some(b'e') | Some(b'E')) {
1090 return self.scan_float_from(start, loc);
1091 }
1092 }
1093
1094 self.finish_integer(start, loc)
1095 }
1096
1097 fn scan_float_from(&mut self, start: usize, loc: SourceLocation) -> Result<TokenKind, CompileError> {
1099 let source = self.sources.last_mut().unwrap();
1100
1101 if source.peek() == Some(b'.') {
1102 source.advance();
1103 while source.peek().is_some_and(|c| c.is_ascii_digit()) {
1104 source.advance();
1105 }
1106 }
1107
1108 if matches!(source.peek(), Some(b'e') | Some(b'E')) {
1109 source.advance();
1110 if matches!(source.peek(), Some(b'+') | Some(b'-')) {
1111 source.advance();
1112 }
1113 while source.peek().is_some_and(|c| c.is_ascii_digit()) {
1114 source.advance();
1115 }
1116 }
1117
1118 if matches!(source.peek(), Some(b'f') | Some(b'F') | Some(b'l') | Some(b'L')) {
1119 source.advance();
1120 }
1121
1122 let text = std::str::from_utf8(&source.source[start..source.pos]).unwrap();
1123 let value: f64 = text
1124 .trim_end_matches(|c| c == 'f' || c == 'F' || c == 'l' || c == 'L')
1125 .parse()
1126 .map_err(|_| CompileError::Lex {
1127 loc: loc.clone(),
1128 kind: crate::error::LexError::InvalidNumber(text.to_string()),
1129 })?;
1130
1131 Ok(TokenKind::FloatLit(value))
1132 }
1133
1134 fn finish_integer(&mut self, start: usize, loc: SourceLocation) -> Result<TokenKind, CompileError> {
1136 let source = self.sources.last_mut().unwrap();
1137
1138 let mut is_unsigned = false;
1140 let mut is_long = false;
1141 let mut is_longlong = false;
1142
1143 loop {
1144 match source.peek() {
1145 Some(b'u') | Some(b'U') => {
1146 is_unsigned = true;
1147 source.advance();
1148 }
1149 Some(b'l') | Some(b'L') => {
1150 if is_long {
1151 is_longlong = true;
1152 }
1153 is_long = true;
1154 source.advance();
1155 }
1156 _ => break,
1157 }
1158 }
1159
1160 let text = std::str::from_utf8(&source.source[start..source.pos]).unwrap();
1161
1162 let (num_text, radix) = if text.starts_with("0x") || text.starts_with("0X") {
1164 (&text[2..], 16)
1165 } else if text.starts_with("0b") || text.starts_with("0B") {
1166 (&text[2..], 2)
1167 } else if text.starts_with('0') && text.len() > 1 {
1168 let without_suffix = text.trim_end_matches(|c: char| c == 'u' || c == 'U' || c == 'l' || c == 'L');
1170 if without_suffix.len() > 1 {
1171 (without_suffix, 8)
1172 } else {
1173 (without_suffix, 10)
1174 }
1175 } else {
1176 (text, 10)
1177 };
1178
1179 let num_text = num_text.trim_end_matches(|c: char| c == 'u' || c == 'U' || c == 'l' || c == 'L');
1181
1182 if is_unsigned || is_longlong {
1183 let value = u64::from_str_radix(num_text, radix).map_err(|_| CompileError::Lex {
1184 loc: loc.clone(),
1185 kind: crate::error::LexError::InvalidNumber(text.to_string()),
1186 })?;
1187 Ok(TokenKind::UIntLit(value))
1188 } else {
1189 match i64::from_str_radix(num_text, radix) {
1192 Ok(value) => Ok(TokenKind::IntLit(value)),
1193 Err(_) => {
1194 let value = u64::from_str_radix(num_text, radix).map_err(|_| CompileError::Lex {
1195 loc: loc.clone(),
1196 kind: crate::error::LexError::InvalidNumber(text.to_string()),
1197 })?;
1198 Ok(TokenKind::UIntLit(value))
1199 }
1200 }
1201 }
1202 }
1203
1204 fn scan_string(&mut self) -> Result<TokenKind, CompileError> {
1206 let loc = {
1207 let source = self.sources.last_mut().unwrap();
1208 let loc = source.current_location();
1209 source.advance(); loc
1211 };
1212
1213 let mut bytes = Vec::new();
1214 loop {
1215 let c = {
1216 let source = self.sources.last_mut().unwrap();
1217 source.peek()
1218 };
1219
1220 match c {
1221 Some(b'"') => {
1222 let source = self.sources.last_mut().unwrap();
1223 source.advance();
1224 return Ok(TokenKind::StringLit(bytes));
1225 }
1226 Some(b'\\') => {
1227 {
1228 let source = self.sources.last_mut().unwrap();
1229 source.advance();
1230 }
1231 let escaped = self.scan_escape_sequence(&loc)?;
1232 bytes.push(escaped);
1233 }
1234 Some(b'\n') | None => {
1235 return Err(CompileError::Lex {
1236 loc,
1237 kind: crate::error::LexError::UnterminatedString,
1238 });
1239 }
1240 Some(c) => {
1241 let source = self.sources.last_mut().unwrap();
1242 source.advance();
1243 bytes.push(c);
1244 }
1245 }
1246 }
1247 }
1248
1249 fn scan_wide_string(&mut self) -> Result<TokenKind, CompileError> {
1251 let loc = {
1252 let source = self.sources.last_mut().unwrap();
1253 let loc = source.current_location();
1254 source.advance(); loc
1256 };
1257
1258 let mut chars = Vec::new();
1259 loop {
1260 let c = {
1261 let source = self.sources.last_mut().unwrap();
1262 source.peek()
1263 };
1264
1265 match c {
1266 Some(b'"') => {
1267 let source = self.sources.last_mut().unwrap();
1268 source.advance();
1269 return Ok(TokenKind::WideStringLit(chars));
1270 }
1271 Some(b'\\') => {
1272 {
1273 let source = self.sources.last_mut().unwrap();
1274 source.advance();
1275 }
1276 let escaped = self.scan_escape_sequence(&loc)?;
1277 chars.push(escaped as u32);
1278 }
1279 Some(b'\n') | None => {
1280 return Err(CompileError::Lex {
1281 loc,
1282 kind: crate::error::LexError::UnterminatedString,
1283 });
1284 }
1285 Some(c) => {
1286 let source = self.sources.last_mut().unwrap();
1287 source.advance();
1288 chars.push(c as u32);
1289 }
1290 }
1291 }
1292 }
1293
1294 fn scan_char(&mut self) -> Result<TokenKind, CompileError> {
1296 let loc = {
1297 let source = self.sources.last_mut().unwrap();
1298 let loc = source.current_location();
1299 source.advance(); loc
1301 };
1302
1303 let first_char = {
1304 let source = self.sources.last().unwrap();
1305 source.peek()
1306 };
1307
1308 let value = match first_char {
1309 Some(b'\'') => {
1310 return Err(CompileError::Lex {
1311 loc,
1312 kind: crate::error::LexError::EmptyCharLit,
1313 });
1314 }
1315 Some(b'\\') => {
1316 {
1317 let source = self.sources.last_mut().unwrap();
1318 source.advance();
1319 }
1320 self.scan_escape_sequence(&loc)?
1321 }
1322 Some(c) => {
1323 let source = self.sources.last_mut().unwrap();
1324 source.advance();
1325 c
1326 }
1327 None => {
1328 return Err(CompileError::Lex {
1329 loc,
1330 kind: crate::error::LexError::UnterminatedChar,
1331 });
1332 }
1333 };
1334
1335 let source = self.sources.last_mut().unwrap();
1336 if source.peek() != Some(b'\'') {
1337 return Err(CompileError::Lex {
1338 loc,
1339 kind: crate::error::LexError::UnterminatedChar,
1340 });
1341 }
1342 source.advance();
1343
1344 Ok(TokenKind::CharLit(value))
1345 }
1346
1347 fn scan_wide_char(&mut self) -> Result<TokenKind, CompileError> {
1349 let loc = {
1350 let source = self.sources.last_mut().unwrap();
1351 let loc = source.current_location();
1352 source.advance(); loc
1354 };
1355
1356 let first_char = {
1357 let source = self.sources.last().unwrap();
1358 source.peek()
1359 };
1360
1361 let value = match first_char {
1362 Some(b'\'') => {
1363 return Err(CompileError::Lex {
1364 loc,
1365 kind: crate::error::LexError::EmptyCharLit,
1366 });
1367 }
1368 Some(b'\\') => {
1369 {
1370 let source = self.sources.last_mut().unwrap();
1371 source.advance();
1372 }
1373 self.scan_escape_sequence(&loc)? as u32
1374 }
1375 Some(c) => {
1376 let source = self.sources.last_mut().unwrap();
1377 source.advance();
1378 c as u32
1379 }
1380 None => {
1381 return Err(CompileError::Lex {
1382 loc,
1383 kind: crate::error::LexError::UnterminatedChar,
1384 });
1385 }
1386 };
1387
1388 let source = self.sources.last_mut().unwrap();
1389 if source.peek() != Some(b'\'') {
1390 return Err(CompileError::Lex {
1391 loc,
1392 kind: crate::error::LexError::UnterminatedChar,
1393 });
1394 }
1395 source.advance();
1396
1397 Ok(TokenKind::WideCharLit(value))
1398 }
1399
1400 fn scan_escape_sequence(&mut self, loc: &SourceLocation) -> Result<u8, CompileError> {
1402 let source = self.sources.last_mut().unwrap();
1403 match source.peek() {
1404 Some(b'n') => { source.advance(); Ok(b'\n') }
1405 Some(b't') => { source.advance(); Ok(b'\t') }
1406 Some(b'r') => { source.advance(); Ok(b'\r') }
1407 Some(b'\\') => { source.advance(); Ok(b'\\') }
1408 Some(b'\'') => { source.advance(); Ok(b'\'') }
1409 Some(b'"') => { source.advance(); Ok(b'"') }
1410 Some(b'0') => { source.advance(); Ok(0) }
1411 Some(b'a') => { source.advance(); Ok(0x07) }
1412 Some(b'b') => { source.advance(); Ok(0x08) }
1413 Some(b'f') => { source.advance(); Ok(0x0C) }
1414 Some(b'v') => { source.advance(); Ok(0x0B) }
1415 Some(b'x') => {
1416 source.advance();
1417 let mut value = 0u8;
1418 let mut count = 0;
1419 while let Some(c) = source.peek() {
1420 if let Some(digit) = (c as char).to_digit(16) {
1421 value = value.wrapping_mul(16).wrapping_add(digit as u8);
1422 source.advance();
1423 count += 1;
1424 if count >= 2 { break; }
1425 } else {
1426 break;
1427 }
1428 }
1429 if count == 0 {
1430 Ok(b'x')
1432 } else {
1433 Ok(value)
1434 }
1435 }
1436 Some(c @ b'0'..=b'7') => {
1437 let mut value = (c - b'0') as u8;
1438 source.advance();
1439 for _ in 0..2 {
1440 if let Some(c @ b'0'..=b'7') = source.peek() {
1441 value = value * 8 + (c - b'0');
1442 source.advance();
1443 } else {
1444 break;
1445 }
1446 }
1447 Ok(value)
1448 }
1449 Some(c) => {
1450 source.advance();
1452 Ok(c)
1453 }
1454 None => Err(CompileError::Lex {
1455 loc: loc.clone(),
1456 kind: crate::error::LexError::UnterminatedString,
1457 }),
1458 }
1459 }
1460
1461 pub fn next_token(&mut self) -> Result<Token, CompileError> {
1463 loop {
1464 let token = if let Some(token) = self.lookahead.pop() {
1466 token
1467 } else {
1468 match self.lex_token_from_source()? {
1469 Some(t) => t,
1470 None => {
1471 if self.sources.len() > 1 {
1473 self.sources.pop();
1474 continue;
1475 }
1476 Token::new(TokenKind::Eof, SourceLocation::default())
1477 }
1478 }
1479 };
1480
1481 if !token.leading_comments.is_empty() {
1483 self.pending_comments.extend(token.leading_comments.iter().cloned());
1484 }
1485
1486 match &token.kind {
1487 TokenKind::Eof => {
1488 if self.sources.len() > 1 {
1490 self.sources.pop();
1491 continue;
1492 }
1493
1494 if !self.cond_stack.is_empty() {
1496 let state = &self.cond_stack[0];
1497 return Err(CompileError::Preprocess {
1498 loc: state.loc.clone(),
1499 kind: PPError::MissingEndif,
1500 });
1501 }
1502
1503 return Ok(token);
1504 }
1505
1506 TokenKind::Newline => {
1507 continue;
1509 }
1510
1511 TokenKind::Hash => {
1512 let at_line_start = self.sources.last().map(|s| s.is_at_line_start()).unwrap_or(false);
1514 if at_line_start || self.sources.last().map(|s| s.is_token_source()).unwrap_or(false) {
1515 }
1517 self.process_directive(token.loc.clone())?;
1518 continue;
1519 }
1520
1521 TokenKind::Ident(id) if self.cond_active => {
1522 let id = *id;
1524 if let Some(expanded) = self.try_expand_macro(id, &token)? {
1525 for t in expanded.into_iter().rev() {
1527 self.lookahead.push(t);
1528 }
1529 continue;
1530 }
1531 return Ok(self.attach_comments(token));
1532 }
1533
1534 _ if !self.cond_active => {
1535 continue;
1537 }
1538
1539 _ => {
1540 return Ok(self.attach_comments(token));
1541 }
1542 }
1543 }
1544 }
1545
1546 pub fn unget_token(&mut self, token: Token) {
1550 self.lookahead.push(token);
1551 }
1552
1553 fn next_raw_token(&mut self) -> Result<Token, CompileError> {
1555 loop {
1556 if let Some(token) = self.lookahead.pop() {
1558 return Ok(token);
1559 }
1560
1561 match self.lex_token_from_source()? {
1562 Some(token) => {
1563 if !token.leading_comments.is_empty() {
1564 self.pending_comments.extend(token.leading_comments.iter().cloned());
1565 }
1566 return Ok(token);
1567 }
1568 None => {
1569 if self.sources.len() > 1 {
1570 self.sources.pop();
1571 continue;
1572 }
1573 return Ok(Token::new(TokenKind::Eof, SourceLocation::default()));
1574 }
1575 }
1576 }
1577 }
1578
1579 fn attach_comments(&mut self, mut token: Token) -> Token {
1581 if !self.pending_comments.is_empty() {
1582 token.leading_comments = std::mem::take(&mut self.pending_comments);
1583 }
1584 token
1585 }
1586
1587 fn process_directive(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
1589 let directive_token = self.next_raw_token()?;
1591
1592 match &directive_token.kind {
1593 TokenKind::Newline | TokenKind::Eof => {
1594 return Ok(());
1596 }
1597 TokenKind::Ident(id) => {
1598 let name = self.interner.get(*id).to_string();
1599 self.process_directive_by_name(&name, loc)?;
1600 }
1601 TokenKind::KwIf => self.process_directive_by_name("if", loc)?,
1604 TokenKind::KwElse => self.process_directive_by_name("else", loc)?,
1605 TokenKind::KwFor => self.process_directive_by_name("for", loc)?, TokenKind::IntLit(_) => {
1607 self.skip_to_eol()?;
1609 }
1610 _ => {
1611 return Err(CompileError::Preprocess {
1612 loc,
1613 kind: PPError::InvalidDirective(format!("{:?}", directive_token.kind)),
1614 });
1615 }
1616 }
1617
1618 Ok(())
1619 }
1620
1621 fn process_directive_by_name(&mut self, name: &str, loc: SourceLocation) -> Result<(), CompileError> {
1623 match name {
1624 "define" => {
1625 if self.cond_active {
1626 self.process_define(loc)?;
1627 } else {
1628 self.skip_to_eol()?;
1629 }
1630 }
1631 "undef" => {
1632 if self.cond_active {
1633 self.process_undef()?;
1634 } else {
1635 self.skip_to_eol()?;
1636 }
1637 }
1638 "include" => {
1639 if self.cond_active {
1640 self.process_include(loc, false)?;
1641 } else {
1642 self.skip_to_eol()?;
1643 }
1644 }
1645 "include_next" => {
1646 if self.cond_active {
1647 self.process_include(loc, true)?;
1648 } else {
1649 self.skip_to_eol()?;
1650 }
1651 }
1652 "if" => self.process_if(loc)?,
1653 "ifdef" => self.process_ifdef(loc, false)?,
1654 "ifndef" => self.process_ifdef(loc, true)?,
1655 "elif" => self.process_elif(loc)?,
1656 "else" => self.process_else(loc)?,
1657 "endif" => self.process_endif()?,
1658 "error" => {
1659 if self.cond_active {
1660 self.process_error(loc)?;
1661 } else {
1662 self.skip_to_eol()?;
1663 }
1664 }
1665 "warning" | "pragma" | "line" => {
1666 self.skip_to_eol()?;
1667 }
1668 _ => {
1669 if self.cond_active {
1670 return Err(CompileError::Preprocess {
1671 loc,
1672 kind: PPError::InvalidDirective(name.to_string()),
1673 });
1674 } else {
1675 self.skip_to_eol()?;
1676 }
1677 }
1678 }
1679
1680 Ok(())
1681 }
1682
1683 fn process_define(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
1685 let name_token = self.next_raw_token()?;
1686 let name = match name_token.kind {
1690 TokenKind::Ident(id) => id,
1691 ref kind => {
1692 if let Some(s) = kind.keyword_str() {
1693 self.interner.intern(s)
1694 } else {
1695 return Err(CompileError::Preprocess {
1696 loc,
1697 kind: PPError::InvalidDirective("expected macro name".to_string()),
1698 });
1699 }
1700 }
1701 };
1702
1703 self.return_spaces = true;
1706 let next = self.next_raw_token()?;
1707 self.return_spaces = false;
1708
1709 let (kind, body_start) = if matches!(next.kind, TokenKind::LParen) {
1710 let (params, is_variadic) = self.parse_macro_params()?;
1712 (MacroKind::Function { params, is_variadic }, None)
1713 } else if matches!(next.kind, TokenKind::Space) {
1714 let body_first = self.next_raw_token()?;
1716 (MacroKind::Object, Some(body_first))
1717 } else {
1718 (MacroKind::Object, Some(next))
1720 };
1721
1722 let mut body = Vec::new();
1723 let mut need_more = true;
1724 if let Some(first) = body_start {
1725 if matches!(first.kind, TokenKind::Newline | TokenKind::Eof) {
1726 need_more = false;
1728 } else {
1729 body.push(first);
1730 }
1731 }
1732
1733 if need_more {
1734 loop {
1735 let token = self.next_raw_token()?;
1736 match token.kind {
1737 TokenKind::Newline | TokenKind::Eof => break,
1738 _ => body.push(token),
1739 }
1740 }
1741 }
1742
1743 let is_target = self.is_current_file_in_target();
1744 let has_token_pasting = body.iter()
1745 .any(|t| matches!(t.kind, TokenKind::HashHash));
1746 let def = MacroDef {
1747 name,
1748 kind,
1749 body,
1750 def_loc: loc,
1751 leading_comments: std::mem::take(&mut self.pending_comments),
1752 is_builtin: self.defining_builtin,
1753 is_target,
1754 has_token_pasting,
1755 };
1756
1757 if let Some(ref mut callback) = self.macro_def_callback {
1759 callback.on_macro_defined(&def);
1760 }
1761
1762 self.macros.define(def, &self.interner);
1763 Ok(())
1764 }
1765
1766 fn parse_macro_params(&mut self) -> Result<(Vec<InternedStr>, bool), CompileError> {
1769 let mut params = Vec::new();
1770 let mut is_variadic = false;
1771
1772 loop {
1773 let token = self.next_raw_token()?;
1774 let param_id: Option<InternedStr> = match &token.kind {
1777 TokenKind::Ident(id) => Some(*id),
1778 kind => kind.keyword_str().map(|s| self.interner.intern(s)),
1779 };
1780 match token.kind {
1781 TokenKind::RParen => break,
1782 _ if param_id.is_some() => {
1783 params.push(param_id.unwrap());
1784 let next = self.next_raw_token()?;
1785 match next.kind {
1786 TokenKind::Comma => continue,
1787 TokenKind::RParen => break,
1788 TokenKind::Ellipsis => {
1789 is_variadic = true;
1792 let rparen = self.next_raw_token()?;
1793 if !matches!(rparen.kind, TokenKind::RParen) {
1794 return Err(CompileError::Preprocess {
1795 loc: token.loc,
1796 kind: PPError::InvalidMacroArgs("expected ')' after '...'".to_string()),
1797 });
1798 }
1799 break;
1800 }
1801 _ => {
1802 return Err(CompileError::Preprocess {
1803 loc: token.loc,
1804 kind: PPError::InvalidMacroArgs("expected ',' or ')'".to_string()),
1805 });
1806 }
1807 }
1808 }
1809 TokenKind::Ellipsis => {
1810 is_variadic = true;
1813 let va_args_id = self.interner.intern("__VA_ARGS__");
1814 params.push(va_args_id);
1815 let next = self.next_raw_token()?;
1816 if !matches!(next.kind, TokenKind::RParen) {
1817 return Err(CompileError::Preprocess {
1818 loc: token.loc,
1819 kind: PPError::InvalidMacroArgs("expected ')' after '...'".to_string()),
1820 });
1821 }
1822 break;
1823 }
1824 _ => {
1825 return Err(CompileError::Preprocess {
1826 loc: token.loc,
1827 kind: PPError::InvalidMacroArgs("expected parameter name".to_string()),
1828 });
1829 }
1830 }
1831 }
1832
1833 Ok((params, is_variadic))
1834 }
1835
1836 fn process_undef(&mut self) -> Result<(), CompileError> {
1838 let token = self.next_raw_token()?;
1839 let name = match token.kind {
1841 TokenKind::Ident(id) => Some(id),
1842 ref kind => kind.keyword_str().map(|s| self.interner.intern(s)),
1843 };
1844 if let Some(id) = name {
1845 self.macros.undefine(id);
1846 }
1847 self.skip_to_eol()?;
1848 Ok(())
1849 }
1850
1851 fn process_include(&mut self, loc: SourceLocation, is_include_next: bool) -> Result<(), CompileError> {
1853 let token = self.next_raw_token()?;
1854
1855 let (path, kind) = match &token.kind {
1856 TokenKind::StringLit(bytes) => {
1857 let path = String::from_utf8_lossy(bytes).to_string();
1858 (path, IncludeKind::Local)
1859 }
1860 TokenKind::Lt => {
1861 let path = self.scan_include_path('>')?;
1864 (path, IncludeKind::System)
1865 }
1866 _ => {
1867 return Err(CompileError::Preprocess {
1868 loc,
1869 kind: PPError::InvalidDirective("expected include path".to_string()),
1870 });
1871 }
1872 };
1873
1874 self.skip_to_eol()?;
1875
1876 let resolved = self.resolve_include(&path, kind, &loc, is_include_next)?;
1877
1878 let source = fs::read(&resolved).map_err(|e| {
1879 CompileError::Preprocess {
1880 loc: loc.clone(),
1881 kind: PPError::IoError(resolved.clone(), e.to_string()),
1882 }
1883 })?;
1884
1885 let file_id = self.files.register(resolved);
1886 let input = InputSource::from_file(source, file_id);
1887 self.sources.push(input);
1888
1889 Ok(())
1890 }
1891
1892 fn resolve_include(&self, path: &str, kind: IncludeKind, loc: &SourceLocation, is_include_next: bool) -> Result<PathBuf, CompileError> {
1894 let path = Path::new(path);
1895
1896 let start_index = if is_include_next {
1898 self.find_current_include_index()
1899 } else {
1900 0
1901 };
1902
1903 if kind == IncludeKind::Local && !is_include_next {
1904 if let Some(source) = self.sources.last() {
1905 if !source.is_token_source() {
1906 let current_path = self.files.get_path(source.file_id);
1907 if let Some(parent) = current_path.parent() {
1908 let candidate = parent.join(path);
1909 if candidate.exists() {
1910 return Ok(candidate);
1911 }
1912 }
1913 }
1914 }
1915 }
1916
1917 for dir in self.config.include_paths.iter().skip(start_index) {
1918 let candidate = dir.join(path);
1919 if candidate.exists() {
1920 return Ok(candidate);
1921 }
1922 }
1923
1924 Err(CompileError::Preprocess {
1925 loc: loc.clone(),
1926 kind: PPError::IncludeNotFound(path.to_path_buf()),
1927 })
1928 }
1929
1930 fn find_current_include_index(&self) -> usize {
1932 let current_file_path = if let Some(source) = self.sources.iter().rev().find(|s| !s.is_token_source()) {
1934 self.files.get_path(source.file_id).to_path_buf()
1935 } else {
1936 return 0;
1937 };
1938
1939 for (i, dir) in self.config.include_paths.iter().enumerate() {
1941 if current_file_path.starts_with(dir) {
1942 return i + 1; }
1944 }
1945
1946 0
1947 }
1948
1949 fn process_if(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
1951 if !self.cond_active {
1953 self.cond_stack.push(CondState {
1954 active: false,
1955 seen_active: false,
1956 seen_else: false,
1957 loc: loc.clone(),
1958 });
1959 self.skip_false_branch(loc)?;
1960 return Ok(());
1961 }
1962
1963 let tokens = self.collect_if_condition()?;
1965
1966 let mut eval = PPExprEvaluator::new(&tokens, &self.interner, &self.macros, loc.clone());
1967 let active = eval.evaluate()? != 0;
1968
1969 self.cond_stack.push(CondState {
1970 active,
1971 seen_active: active,
1972 seen_else: false,
1973 loc: loc.clone(),
1974 });
1975
1976 self.update_cond_active();
1977
1978 if !active {
1980 self.skip_false_branch(loc)?;
1981 }
1982
1983 Ok(())
1984 }
1985
1986 fn skip_false_branch(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
1988 loop {
1989 let directive = self.preprocess_skip()?;
1990 match directive.as_str() {
1991 "endif" => {
1992 self.cond_stack.pop();
1994 self.update_cond_active();
1995 return Ok(());
1996 }
1997 "else" => {
1998 if let Some(state) = self.cond_stack.last_mut() {
2000 if state.seen_else {
2001 return Err(CompileError::Preprocess {
2002 loc,
2003 kind: PPError::UnmatchedElse,
2004 });
2005 }
2006 state.seen_else = true;
2007 if !state.seen_active {
2008 state.active = true;
2009 state.seen_active = true;
2010 self.update_cond_active();
2011 return Ok(());
2012 }
2013 }
2015 }
2016 "elif" => {
2017 if let Some(state) = self.cond_stack.last() {
2019 if state.seen_else {
2020 return Err(CompileError::Preprocess {
2021 loc,
2022 kind: PPError::ElifAfterElse,
2023 });
2024 }
2025 if state.seen_active {
2026 self.skip_to_eol()?;
2028 continue;
2029 }
2030 }
2031 let tokens = self.collect_if_condition()?;
2033 let new_active = {
2034 let mut eval = PPExprEvaluator::new(&tokens, &self.interner, &self.macros, loc.clone());
2035 eval.evaluate()? != 0
2036 };
2037 if let Some(state) = self.cond_stack.last_mut() {
2038 if new_active {
2039 state.active = true;
2040 state.seen_active = true;
2041 self.update_cond_active();
2042 return Ok(());
2043 }
2044 }
2046 }
2047 _ => unreachable!(),
2048 }
2049 }
2050 }
2051
2052 fn process_ifdef(&mut self, loc: SourceLocation, negate: bool) -> Result<(), CompileError> {
2054 if !self.cond_active {
2056 self.cond_stack.push(CondState {
2057 active: false,
2058 seen_active: false,
2059 seen_else: false,
2060 loc: loc.clone(),
2061 });
2062 self.skip_false_branch(loc)?;
2063 return Ok(());
2064 }
2065
2066 let token = self.next_raw_token()?;
2067 let defined = match token.kind {
2069 TokenKind::Ident(id) => self.macros.is_defined(id),
2070 ref kind => match kind.keyword_str() {
2071 Some(s) => {
2072 let id = self.interner.intern(s);
2073 self.macros.is_defined(id)
2074 }
2075 None => false,
2076 },
2077 };
2078
2079 self.skip_to_eol()?;
2080
2081 let active = if negate { !defined } else { defined };
2082
2083 self.cond_stack.push(CondState {
2084 active,
2085 seen_active: active,
2086 seen_else: false,
2087 loc: loc.clone(),
2088 });
2089
2090 self.update_cond_active();
2091
2092 if !active {
2094 self.skip_false_branch(loc)?;
2095 }
2096
2097 Ok(())
2098 }
2099
2100 fn process_elif(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
2103 if self.cond_stack.is_empty() {
2104 return Err(CompileError::Preprocess {
2105 loc,
2106 kind: PPError::UnmatchedEndif,
2107 });
2108 }
2109
2110 let seen_else = self.cond_stack.last().unwrap().seen_else;
2111 if seen_else {
2112 return Err(CompileError::Preprocess {
2113 loc,
2114 kind: PPError::ElifAfterElse,
2115 });
2116 }
2117
2118 self.skip_to_eol()?;
2121 self.skip_false_branch(loc)?;
2122
2123 Ok(())
2124 }
2125
2126 fn process_else(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
2129 if self.cond_stack.is_empty() {
2130 return Err(CompileError::Preprocess {
2131 loc,
2132 kind: PPError::UnmatchedElse,
2133 });
2134 }
2135
2136 let seen_else = self.cond_stack.last().unwrap().seen_else;
2137 if seen_else {
2138 return Err(CompileError::Preprocess {
2139 loc,
2140 kind: PPError::UnmatchedElse,
2141 });
2142 }
2143
2144 if let Some(state) = self.cond_stack.last_mut() {
2146 state.seen_else = true;
2147 }
2148
2149 self.skip_to_eol()?;
2151 self.skip_false_branch(loc)?;
2152
2153 Ok(())
2154 }
2155
2156 fn process_endif(&mut self) -> Result<(), CompileError> {
2158 if self.cond_stack.is_empty() {
2159 return Err(CompileError::Preprocess {
2160 loc: SourceLocation::default(),
2161 kind: PPError::UnmatchedEndif,
2162 });
2163 }
2164
2165 self.cond_stack.pop();
2166 self.skip_to_eol()?;
2167 self.update_cond_active();
2168 Ok(())
2169 }
2170
2171 fn process_error(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
2173 let mut message = String::new();
2174 loop {
2175 let token = self.next_raw_token()?;
2176 match token.kind {
2177 TokenKind::Newline | TokenKind::Eof => break,
2178 TokenKind::Ident(id) => {
2179 if !message.is_empty() { message.push(' '); }
2180 message.push_str(self.interner.get(id));
2181 }
2182 TokenKind::StringLit(bytes) => {
2183 if !message.is_empty() { message.push(' '); }
2184 message.push_str(&String::from_utf8_lossy(&bytes));
2185 }
2186 _ => {
2187 if !message.is_empty() { message.push(' '); }
2188 message.push_str(&format!("{:?}", token.kind));
2189 }
2190 }
2191 }
2192
2193 Err(CompileError::Preprocess {
2194 loc,
2195 kind: PPError::InvalidDirective(format!("#error {}", message)),
2196 })
2197 }
2198
2199 fn update_cond_active(&mut self) {
2201 self.cond_active = self.cond_stack.iter().all(|s| s.active);
2202 }
2203
2204 fn collect_if_condition(&mut self) -> Result<Vec<Token>, CompileError> {
2207 let mut tokens = Vec::new();
2208 let defined_id = self.interner.intern("defined");
2209
2210 loop {
2211 let token = self.next_raw_token()?;
2213
2214 match &token.kind {
2215 TokenKind::Newline | TokenKind::Eof => break,
2216 TokenKind::Ident(id) if *id == defined_id => {
2217 tokens.push(token);
2219
2220 let next = self.next_raw_token()?;
2222 if matches!(next.kind, TokenKind::LParen) {
2223 tokens.push(next);
2224 let ident = self.next_raw_token()?;
2226 tokens.push(ident);
2227 let rparen = self.next_raw_token()?;
2228 tokens.push(rparen);
2229 } else {
2230 tokens.push(next);
2232 }
2233 }
2234 TokenKind::Ident(id) => {
2235 let id = *id;
2236 if let Some(expanded) = self.try_expand_macro(id, &token)? {
2238 for t in expanded.into_iter().rev() {
2240 self.lookahead.push(t);
2241 }
2242 } else {
2243 tokens.push(token);
2245 }
2246 }
2247 _ => {
2248 tokens.push(token);
2249 }
2250 }
2251 }
2252
2253 if self.config.debug_pp {
2255 eprintln!("DEBUG: collected tokens for #if condition:");
2256 for t in &tokens {
2257 eprintln!(" {:?}", t.kind);
2258 }
2259 }
2260
2261 Ok(tokens)
2262 }
2263
2264 fn scan_include_path(&mut self, terminator: char) -> Result<String, CompileError> {
2266 let source = self.sources.last_mut().ok_or_else(|| {
2267 CompileError::Preprocess {
2268 loc: SourceLocation::default(),
2269 kind: PPError::InvalidDirective("no source".to_string()),
2270 }
2271 })?;
2272
2273 let loc = source.current_location();
2274 let mut path = String::new();
2275
2276 loop {
2277 match source.peek() {
2278 Some(c) if c == terminator as u8 => {
2279 source.advance();
2280 break;
2281 }
2282 Some(b'\n') | None => {
2283 return Err(CompileError::Preprocess {
2284 loc,
2285 kind: PPError::InvalidDirective("unterminated include path".to_string()),
2286 });
2287 }
2288 Some(c) => {
2289 source.advance();
2290 path.push(c as char);
2291 }
2292 }
2293 }
2294
2295 Ok(path)
2296 }
2297
2298 fn skip_to_eol(&mut self) -> Result<(), CompileError> {
2300 loop {
2301 let token = self.next_raw_token()?;
2302 if matches!(token.kind, TokenKind::Newline | TokenKind::Eof) {
2303 break;
2304 }
2305 }
2306 Ok(())
2307 }
2308
2309 fn skip_to_eol_raw(source: &mut InputSource) {
2312 loop {
2313 match source.peek() {
2314 Some(b'\n') | None => break,
2315 Some(b'/') => {
2316 if source.peek_n(1) == Some(b'*') {
2318 source.advance(); source.advance(); loop {
2322 match (source.peek(), source.peek_n(1)) {
2323 (Some(b'*'), Some(b'/')) => {
2324 source.advance();
2325 source.advance();
2326 break;
2327 }
2328 (Some(_), _) => { source.advance(); }
2329 (None, _) => break,
2330 }
2331 }
2332 } else if source.peek_n(1) == Some(b'/') {
2333 while source.peek().is_some_and(|c| c != b'\n') {
2335 source.advance();
2336 }
2337 break;
2338 } else {
2339 source.advance();
2340 }
2341 }
2342 Some(b'\\') => {
2343 source.advance();
2345 if source.peek() == Some(b'\n') {
2346 source.advance();
2347 } else if source.peek() == Some(b'\r') {
2348 source.advance();
2349 if source.peek() == Some(b'\n') {
2350 source.advance();
2351 }
2352 }
2353 }
2354 Some(_) => { source.advance(); }
2355 }
2356 }
2357 }
2358
2359 fn preprocess_skip(&mut self) -> Result<String, CompileError> {
2363 let mut depth = 0i32; loop {
2366 let source = match self.sources.last_mut() {
2367 Some(s) => s,
2368 None => {
2369 return Err(CompileError::Preprocess {
2370 loc: SourceLocation::default(),
2371 kind: PPError::MissingEndif,
2372 });
2373 }
2374 };
2375
2376 let mut at_line_start = source.is_at_line_start();
2378
2379 loop {
2380 let c = match source.peek() {
2381 Some(c) => c,
2382 None => break, };
2384
2385 match c {
2386 b' ' | b'\t' | b'\r' | 0x0C | 0x0B => {
2388 source.advance();
2389 }
2390 b'\n' => {
2392 source.advance();
2393 at_line_start = true;
2394 }
2395 b'\\' => {
2397 source.advance();
2398 if source.peek() == Some(b'\n') {
2399 source.advance();
2400 } else if source.peek() == Some(b'\r') {
2401 source.advance();
2402 if source.peek() == Some(b'\n') {
2403 source.advance();
2404 }
2405 }
2406 }
2407 b'"' | b'\'' => {
2409 let quote = c;
2410 source.advance();
2411 loop {
2412 match source.peek() {
2413 Some(c) if c == quote => {
2414 source.advance();
2415 break;
2416 }
2417 Some(b'\\') => {
2418 source.advance();
2419 source.advance(); }
2421 Some(b'\n') | None => break,
2422 Some(_) => {
2423 source.advance();
2424 }
2425 }
2426 }
2427 at_line_start = false;
2428 }
2429 b'/' => {
2431 source.advance();
2432 match source.peek() {
2433 Some(b'/') => {
2434 while source.peek().is_some_and(|c| c != b'\n') {
2436 source.advance();
2437 }
2438 }
2439 Some(b'*') => {
2440 source.advance();
2442 loop {
2443 match (source.peek(), source.peek_n(1)) {
2444 (Some(b'*'), Some(b'/')) => {
2445 source.advance();
2446 source.advance();
2447 break;
2448 }
2449 (Some(_), _) => {
2450 source.advance();
2451 }
2452 (None, _) => break,
2453 }
2454 }
2455 }
2456 _ => {}
2457 }
2458 at_line_start = false;
2459 }
2460 b'#' if at_line_start => {
2462 source.advance();
2463 while matches!(source.peek(), Some(b' ') | Some(b'\t')) {
2465 source.advance();
2466 }
2467 let mut directive = String::new();
2469 while let Some(c) = source.peek() {
2470 if c.is_ascii_alphabetic() || c == b'_' {
2471 directive.push(c as char);
2472 source.advance();
2473 } else {
2474 break;
2475 }
2476 }
2477
2478 match directive.as_str() {
2479 "if" | "ifdef" | "ifndef" => {
2480 depth += 1;
2481 while source.peek().is_some_and(|c| c != b'\n') {
2483 source.advance();
2484 }
2485 }
2486 "endif" => {
2487 if depth == 0 {
2488 Self::skip_to_eol_raw(source);
2490 return Ok("endif".to_string());
2491 }
2492 depth -= 1;
2493 Self::skip_to_eol_raw(source);
2494 }
2495 "else" if depth == 0 => {
2496 Self::skip_to_eol_raw(source);
2497 return Ok("else".to_string());
2498 }
2499 "elif" if depth == 0 => {
2500 return Ok("elif".to_string());
2502 }
2503 _ => {
2504 Self::skip_to_eol_raw(source);
2506 }
2507 }
2508 at_line_start = false;
2509 }
2510 _ => {
2512 source.advance();
2513 at_line_start = false;
2514 }
2515 }
2516 }
2517
2518 if self.sources.len() > 1 {
2520 self.sources.pop();
2521 } else {
2522 return Err(CompileError::Preprocess {
2523 loc: SourceLocation::default(),
2524 kind: PPError::MissingEndif,
2525 });
2526 }
2527 }
2528 }
2529
2530 fn try_expand_macro(&mut self, id: InternedStr, token: &Token) -> Result<Option<Vec<Token>>, CompileError> {
2532 self.try_expand_macro_internal(id, token, false)
2533 }
2534
2535 fn try_expand_macro_internal(
2537 &mut self,
2538 id: InternedStr,
2539 token: &Token,
2540 preserve_function_macros: bool,
2541 ) -> Result<Option<Vec<Token>>, CompileError> {
2542 if self.skip_expand_macros.contains(&id) {
2544 return Ok(None);
2545 }
2546
2547 if self.no_expand_registry.is_blocked(token.id, id) {
2549 return Ok(None);
2550 }
2551
2552 let def = match self.macros.get(id) {
2553 Some(def) => def.clone(),
2554 None => return Ok(None),
2555 };
2556
2557 let trigger_token_id = token.id;
2559
2560 let call_loc = token.loc.clone();
2562
2563 match &def.kind {
2564 MacroKind::Object => {
2565 let empty = HashMap::new();
2566 let expanded = self.expand_tokens(&def.body, &empty, &empty)?;
2567 let marked = self.mark_expanded_with_registry(expanded, trigger_token_id, id, &call_loc);
2569 if let Some(mut cb) = self.macro_called_callbacks.remove(&id) {
2572 cb.on_macro_called(None, &self.interner);
2573 self.macro_called_callbacks.insert(id, cb);
2574 }
2575 let wrapped = self.wrap_with_markers(
2577 marked,
2578 id,
2579 token,
2580 MacroInvocationKind::Object,
2581 &call_loc,
2582 def.has_token_pasting,
2583 );
2584 Ok(Some(wrapped))
2585 }
2586 MacroKind::Function { params, is_variadic } => {
2587 if preserve_function_macros && !self.explicit_expand_macros.contains(&id) {
2589 return Ok(None);
2590 }
2591
2592 let mut skipped_newlines = Vec::new();
2595 let next = loop {
2596 let t = self.next_raw_token()?;
2597 if matches!(t.kind, TokenKind::Newline) {
2598 skipped_newlines.push(t);
2599 } else {
2600 break t;
2601 }
2602 };
2603 if !matches!(next.kind, TokenKind::LParen) {
2604 self.lookahead.push(next);
2606 for t in skipped_newlines.into_iter().rev() {
2607 self.lookahead.push(t);
2608 }
2609 return Ok(None);
2610 }
2611
2612 let args = self.collect_macro_args(params.len(), *is_variadic)?;
2613
2614 let mut arg_map = HashMap::new();
2615
2616 if *is_variadic && !params.is_empty() {
2617 let va_args_id = self.interner.intern("__VA_ARGS__");
2621 let last_param = *params.last().unwrap();
2622 let is_gnu_style = last_param != va_args_id;
2623
2624 let normal_param_count = params.len() - 1;
2626 for (i, param) in params.iter().take(normal_param_count).enumerate() {
2627 if i < args.len() {
2628 arg_map.insert(*param, args[i].clone());
2629 } else {
2630 arg_map.insert(*param, Vec::new());
2631 }
2632 }
2633
2634 let mut va = Vec::new();
2636 let va_start = normal_param_count;
2637 for (i, arg) in args.iter().enumerate().skip(va_start) {
2638 if i > va_start {
2639 va.push(Token::new(TokenKind::Comma, token.loc.clone()));
2640 }
2641 va.extend(arg.clone());
2642 }
2643
2644 if is_gnu_style {
2645 arg_map.insert(last_param, va.clone());
2647 arg_map.insert(va_args_id, va);
2649 } else {
2650 arg_map.insert(va_args_id, va);
2652 }
2653 } else {
2654 for (i, param) in params.iter().enumerate() {
2656 if i < args.len() {
2657 arg_map.insert(*param, args[i].clone());
2658 } else {
2659 arg_map.insert(*param, Vec::new());
2660 }
2661 }
2662 }
2663
2664 let prescanned_args = self.prescan_args(&arg_map)?;
2666
2667 let expanded = self.expand_tokens(&def.body, &arg_map, &prescanned_args)?;
2668 let marked = self.mark_expanded_with_registry(expanded, trigger_token_id, id, &call_loc);
2670 if let Some(mut cb) = self.macro_called_callbacks.remove(&id) {
2673 cb.on_macro_called(Some(&args), &self.interner);
2674 self.macro_called_callbacks.insert(id, cb);
2675 }
2676 let kind = if self.wrapped_macros.contains(&id) {
2679 let expanded_args: Result<Vec<_>, _> = args.into_iter()
2680 .map(|arg_tokens| {
2681 let expanded = self.expand_token_list_preserve_fn(&arg_tokens)?;
2683 Ok(expanded.into_iter()
2685 .filter(|t| !matches!(t.kind, TokenKind::MacroBegin(_) | TokenKind::MacroEnd(_)))
2686 .collect())
2687 })
2688 .collect();
2689 MacroInvocationKind::Function { args: expanded_args? }
2690 } else {
2691 MacroInvocationKind::Function { args }
2692 };
2693 let wrapped = self.wrap_with_markers(
2694 marked,
2695 id,
2696 token,
2697 kind,
2698 &call_loc,
2699 def.has_token_pasting,
2700 );
2701 Ok(Some(wrapped))
2702 }
2703 }
2704 }
2705
2706 fn mark_expanded_with_registry(
2711 &mut self,
2712 tokens: Vec<Token>,
2713 trigger_token_id: TokenId,
2714 macro_id: InternedStr,
2715 call_loc: &SourceLocation,
2716 ) -> Vec<Token> {
2717 tokens.into_iter().map(|mut t| {
2718 self.no_expand_registry.inherit(trigger_token_id, t.id);
2720 self.no_expand_registry.add(t.id, macro_id);
2722 t.loc = call_loc.clone();
2724 t
2725 }).collect()
2726 }
2727
2728 fn wrap_with_markers(
2737 &self,
2738 tokens: Vec<Token>,
2739 macro_name: InternedStr,
2740 trigger_token: &Token,
2741 kind: MacroInvocationKind,
2742 call_loc: &SourceLocation,
2743 has_token_pasting: bool,
2744 ) -> Vec<Token> {
2745 let is_wrapped = self.wrapped_macros.contains(¯o_name);
2746
2747 if !self.config.emit_markers && !is_wrapped {
2749 return tokens;
2750 }
2751
2752 let marker_id = TokenId::next();
2753
2754 let is_function_macro = matches!(kind, MacroInvocationKind::Function { .. });
2760 let preserve_call = is_function_macro
2761 && !has_token_pasting
2762 && !self.explicit_expand_macros.contains(¯o_name);
2763
2764 let begin_info = MacroBeginInfo {
2766 marker_id,
2767 trigger_token_id: trigger_token.id,
2768 macro_name,
2769 kind,
2770 call_loc: call_loc.clone(),
2771 is_wrapped,
2772 preserve_call,
2773 };
2774 let begin_token = Token::new(
2775 TokenKind::MacroBegin(Box::new(begin_info)),
2776 call_loc.clone(),
2777 );
2778
2779 let end_info = MacroEndInfo {
2781 begin_marker_id: marker_id,
2782 };
2783 let end_token = Token::new(TokenKind::MacroEnd(end_info), call_loc.clone());
2784
2785 let mut result = Vec::with_capacity(tokens.len() + 2);
2787 result.push(begin_token);
2788 result.extend(tokens);
2789 result.push(end_token);
2790 result
2791 }
2792
2793 fn collect_macro_args(&mut self, param_count: usize, is_variadic: bool) -> Result<Vec<Vec<Token>>, CompileError> {
2795 let mut args = Vec::new();
2796 let mut current_arg = Vec::new();
2797 let mut paren_depth = 0;
2798
2799 loop {
2800 let token = self.next_raw_token()?;
2801 match token.kind {
2802 TokenKind::LParen => {
2803 paren_depth += 1;
2804 current_arg.push(token);
2805 }
2806 TokenKind::RParen => {
2807 if paren_depth == 0 {
2808 if !current_arg.is_empty() || !args.is_empty() {
2809 args.push(current_arg);
2810 }
2811 break;
2812 }
2813 paren_depth -= 1;
2814 current_arg.push(token);
2815 }
2816 TokenKind::Comma if paren_depth == 0 => {
2817 if is_variadic && args.len() >= param_count {
2818 current_arg.push(token);
2819 } else {
2820 args.push(current_arg);
2821 current_arg = Vec::new();
2822 }
2823 }
2824 TokenKind::Eof => {
2825 return Err(CompileError::Preprocess {
2826 loc: token.loc,
2827 kind: PPError::InvalidMacroArgs("unterminated macro arguments".to_string()),
2828 });
2829 }
2830 TokenKind::Newline => continue,
2831 _ => current_arg.push(token),
2832 }
2833 }
2834
2835 Ok(args)
2836 }
2837
2838 fn prescan_args(&mut self, args: &HashMap<InternedStr, Vec<Token>>) -> Result<HashMap<InternedStr, Vec<Token>>, CompileError> {
2841 let mut prescanned = HashMap::new();
2842 for (param, tokens) in args.iter() {
2843 let expanded = self.expand_token_list(tokens)?;
2845 prescanned.insert(*param, expanded);
2846 }
2847 Ok(prescanned)
2848 }
2849
2850 fn expand_token_list(&mut self, tokens: &[Token]) -> Result<Vec<Token>, CompileError> {
2853 self.expand_token_list_internal(tokens, false)
2854 }
2855
2856 fn expand_token_list_preserve_fn(&mut self, tokens: &[Token]) -> Result<Vec<Token>, CompileError> {
2861 self.expand_token_list_internal(tokens, true)
2862 }
2863
2864 fn expand_token_list_internal(
2866 &mut self,
2867 tokens: &[Token],
2868 preserve_function_macros: bool,
2869 ) -> Result<Vec<Token>, CompileError> {
2870 if tokens.is_empty() {
2871 return Ok(Vec::new());
2872 }
2873
2874 let saved_lookahead = std::mem::take(&mut self.lookahead);
2876
2877 self.lookahead.push(Token::new(TokenKind::Eof, SourceLocation::default()));
2880 for token in tokens.iter().rev() {
2881 self.lookahead.push(token.clone());
2882 }
2883
2884 let mut result = Vec::new();
2886 while let Some(token) = self.lookahead.pop() {
2887 if matches!(token.kind, TokenKind::Eof) {
2889 break;
2890 }
2891
2892 if matches!(token.kind, TokenKind::Newline) {
2894 continue;
2895 }
2896
2897 if let TokenKind::Ident(id) = token.kind {
2899 if let Some(expanded) = self.try_expand_macro_internal(id, &token, preserve_function_macros)? {
2900 for t in expanded.into_iter().rev() {
2902 self.lookahead.push(t);
2903 }
2904 continue;
2905 }
2906 }
2907
2908 result.push(token);
2909 }
2910
2911 self.lookahead = saved_lookahead;
2913
2914 Ok(result)
2915 }
2916
2917 fn expand_tokens(&mut self, tokens: &[Token], raw_args: &HashMap<InternedStr, Vec<Token>>, prescanned_args: &HashMap<InternedStr, Vec<Token>>) -> Result<Vec<Token>, CompileError> {
2921 let mut result = Vec::new();
2922 let mut i = 0;
2923
2924 while i < tokens.len() {
2925 let token = &tokens[i];
2926
2927 match &token.kind {
2928 TokenKind::Hash if i + 1 < tokens.len() => {
2929 if let TokenKind::Ident(param_id) = tokens[i + 1].kind {
2930 if let Some(arg_tokens) = raw_args.get(¶m_id) {
2932 let stringified = self.stringify_tokens(arg_tokens);
2933 result.push(Token::new(
2934 TokenKind::StringLit(stringified.into_bytes()),
2935 token.loc.clone(),
2936 ));
2937 i += 2;
2938 continue;
2939 }
2940 }
2941 return Err(CompileError::Preprocess {
2942 loc: token.loc.clone(),
2943 kind: PPError::InvalidStringize,
2944 });
2945 }
2946 TokenKind::HashHash => {
2947 if result.is_empty() || i + 1 >= tokens.len() {
2948 return Err(CompileError::Preprocess {
2949 loc: token.loc.clone(),
2950 kind: PPError::InvalidTokenPaste,
2951 });
2952 }
2953
2954 let left = result.pop().unwrap();
2956
2957 i += 1;
2959 let right_token = &tokens[i];
2960 let right_tokens = if let TokenKind::Ident(id) = right_token.kind {
2961 if let Some(arg_tokens) = raw_args.get(&id) {
2962 arg_tokens.clone()
2963 } else {
2964 vec![right_token.clone()]
2965 }
2966 } else {
2967 vec![right_token.clone()]
2968 };
2969
2970 let pasted = self.paste_tokens(&left, &right_tokens, &token.loc)?;
2972 result.extend(pasted);
2973 i += 1;
2974 continue;
2975 }
2976 TokenKind::Ident(id) => {
2977 if let Some(arg_tokens) = prescanned_args.get(id) {
2979 result.extend(arg_tokens.iter().cloned());
2980 } else {
2981 result.push(token.clone());
2982 }
2983 }
2984 _ => result.push(token.clone()),
2985 }
2986
2987 i += 1;
2988 }
2989
2990 Ok(result)
2991 }
2992
2993 fn paste_tokens(&mut self, left: &Token, right: &[Token], loc: &SourceLocation) -> Result<Vec<Token>, CompileError> {
2995 let left_str = self.token_to_string(left);
2997
2998 if right.is_empty() {
3000 return Ok(vec![left.clone()]);
3001 }
3002
3003 let right_first_str = self.token_to_string(&right[0]);
3005 let pasted_str = format!("{}{}", left_str, right_first_str);
3006
3007 let pasted_tokens = self.tokenize_string(&pasted_str);
3009
3010 let mut result = pasted_tokens;
3012 result.extend(right.iter().skip(1).cloned());
3013
3014 for t in &mut result {
3016 t.loc = loc.clone();
3017 }
3018
3019 Ok(result)
3020 }
3021
3022 fn token_to_string(&self, token: &Token) -> String {
3024 match &token.kind {
3025 TokenKind::Ident(id) => self.interner.get(*id).to_string(),
3026 TokenKind::IntLit(n) => n.to_string(),
3027 TokenKind::UIntLit(n) => n.to_string(),
3028 TokenKind::FloatLit(f) => f.to_string(),
3029 TokenKind::StringLit(s) => format!("\"{}\"", String::from_utf8_lossy(s)),
3030 TokenKind::CharLit(c) => format!("'{}'", *c as char),
3031 TokenKind::WideCharLit(c) => format!("L'{}'", char::from_u32(*c).unwrap_or('?')),
3032 TokenKind::Plus => "+".to_string(),
3033 TokenKind::Minus => "-".to_string(),
3034 TokenKind::Star => "*".to_string(),
3035 TokenKind::Slash => "/".to_string(),
3036 TokenKind::Percent => "%".to_string(),
3037 TokenKind::Amp => "&".to_string(),
3038 TokenKind::Pipe => "|".to_string(),
3039 TokenKind::Caret => "^".to_string(),
3040 TokenKind::Tilde => "~".to_string(),
3041 TokenKind::Bang => "!".to_string(),
3042 TokenKind::Lt => "<".to_string(),
3043 TokenKind::Gt => ">".to_string(),
3044 TokenKind::Eq => "=".to_string(),
3045 TokenKind::Question => "?".to_string(),
3046 TokenKind::Colon => ":".to_string(),
3047 TokenKind::Dot => ".".to_string(),
3048 TokenKind::Comma => ",".to_string(),
3049 TokenKind::Semi => ";".to_string(),
3050 TokenKind::LParen => "(".to_string(),
3051 TokenKind::RParen => ")".to_string(),
3052 TokenKind::LBracket => "[".to_string(),
3053 TokenKind::RBracket => "]".to_string(),
3054 TokenKind::LBrace => "{".to_string(),
3055 TokenKind::RBrace => "}".to_string(),
3056 TokenKind::Arrow => "->".to_string(),
3057 TokenKind::PlusPlus => "++".to_string(),
3058 TokenKind::MinusMinus => "--".to_string(),
3059 TokenKind::LtLt => "<<".to_string(),
3060 TokenKind::GtGt => ">>".to_string(),
3061 TokenKind::LtEq => "<=".to_string(),
3062 TokenKind::GtEq => ">=".to_string(),
3063 TokenKind::EqEq => "==".to_string(),
3064 TokenKind::BangEq => "!=".to_string(),
3065 TokenKind::AmpAmp => "&&".to_string(),
3066 TokenKind::PipePipe => "||".to_string(),
3067 TokenKind::PlusEq => "+=".to_string(),
3068 TokenKind::MinusEq => "-=".to_string(),
3069 TokenKind::StarEq => "*=".to_string(),
3070 TokenKind::SlashEq => "/=".to_string(),
3071 TokenKind::PercentEq => "%=".to_string(),
3072 TokenKind::AmpEq => "&=".to_string(),
3073 TokenKind::PipeEq => "|=".to_string(),
3074 TokenKind::CaretEq => "^=".to_string(),
3075 TokenKind::LtLtEq => "<<=".to_string(),
3076 TokenKind::GtGtEq => ">>=".to_string(),
3077 TokenKind::Ellipsis => "...".to_string(),
3078 TokenKind::Hash => "#".to_string(),
3079 TokenKind::HashHash => "##".to_string(),
3080 _ => String::new(),
3081 }
3082 }
3083
3084 fn stringify_tokens(&self, tokens: &[Token]) -> String {
3086 let mut result = String::new();
3087 for (i, token) in tokens.iter().enumerate() {
3088 if i > 0 { result.push(' '); }
3089 match &token.kind {
3090 TokenKind::Ident(id) => result.push_str(self.interner.get(*id)),
3091 TokenKind::IntLit(n) => result.push_str(&n.to_string()),
3092 TokenKind::UIntLit(n) => result.push_str(&format!("{}u", n)),
3093 TokenKind::FloatLit(f) => result.push_str(&f.to_string()),
3094 TokenKind::StringLit(s) => {
3095 result.push('"');
3096 result.push_str(&String::from_utf8_lossy(s));
3097 result.push('"');
3098 }
3099 TokenKind::CharLit(c) => {
3100 result.push('\'');
3101 result.push(*c as char);
3102 result.push('\'');
3103 }
3104 _ => result.push_str(&format!("{:?}", token.kind)),
3105 }
3106 }
3107 result
3108 }
3109
3110 pub fn files(&self) -> &FileRegistry {
3112 &self.files
3113 }
3114
3115 pub fn interner(&self) -> &StringInterner {
3117 &self.interner
3118 }
3119
3120 pub fn interner_mut(&mut self) -> &mut StringInterner {
3122 &mut self.interner
3123 }
3124
3125 pub fn macros(&self) -> &MacroTable {
3127 &self.macros
3128 }
3129
3130 pub fn expand_macro_body_for_inference(
3144 &mut self,
3145 body: &[Token],
3146 params: &[InternedStr],
3147 args: &[Vec<Token>],
3148 in_progress: &mut HashSet<InternedStr>,
3149 ) -> Result<(Vec<Token>, HashSet<InternedStr>), CompileError> {
3150 let mut called_macros = HashSet::new();
3151
3152 let mut raw_args = HashMap::new();
3154 let mut prescanned_args = HashMap::new();
3155
3156 for (i, ¶m) in params.iter().enumerate() {
3157 if let Some(arg_tokens) = args.get(i) {
3158 raw_args.insert(param, arg_tokens.clone());
3160 let (expanded_arg, arg_called) = self.expand_tokens_for_inference(
3163 arg_tokens,
3164 in_progress,
3165 )?;
3166 called_macros.extend(arg_called);
3167 prescanned_args.insert(param, expanded_arg);
3168 }
3169 }
3170
3171 let substituted = self.expand_tokens(body, &raw_args, &prescanned_args)?;
3173
3174 let (result, more_called) = self.expand_tokens_for_inference(&substituted, in_progress)?;
3176 called_macros.extend(more_called);
3177
3178 Ok((result, called_macros))
3179 }
3180
3181 fn expand_tokens_for_inference(
3185 &mut self,
3186 tokens: &[Token],
3187 in_progress: &mut HashSet<InternedStr>,
3188 ) -> Result<(Vec<Token>, HashSet<InternedStr>), CompileError> {
3189 let mut result = Vec::new();
3190 let mut called_macros = HashSet::new();
3191 let mut i = 0;
3192
3193 while i < tokens.len() {
3194 let token = &tokens[i];
3195
3196 if let TokenKind::Ident(id) = token.kind {
3197 if self.skip_expand_macros.contains(&id) {
3199 result.push(token.clone());
3200 i += 1;
3201 continue;
3202 }
3203
3204 if in_progress.contains(&id) {
3206 result.push(token.clone());
3207 i += 1;
3208 continue;
3209 }
3210
3211 if let Some(def) = self.macros.get(id).cloned() {
3213 match &def.kind {
3214 MacroKind::Object => {
3215 called_macros.insert(id);
3217 in_progress.insert(id);
3218 let (expanded, more_called) = self.expand_macro_body_for_inference(
3219 &def.body,
3220 &[],
3221 &[],
3222 in_progress,
3223 )?;
3224 called_macros.extend(more_called);
3225 result.extend(expanded);
3226 in_progress.remove(&id);
3227 i += 1;
3228 continue;
3229 }
3230 MacroKind::Function { params, is_variadic } => {
3231 if let Some((args, consumed)) = self.try_collect_args_from_tokens(&tokens[i + 1..], params.len(), *is_variadic) {
3233 called_macros.insert(id);
3235
3236 if !self.explicit_expand_macros.contains(&id) {
3238 result.push(token.clone());
3240
3241 result.push(Token::new(TokenKind::LParen, token.loc.clone()));
3244
3245 for (arg_idx, arg_tokens) in args.iter().enumerate() {
3246 if arg_idx > 0 {
3247 result.push(Token::new(TokenKind::Comma, token.loc.clone()));
3248 }
3249 let (expanded_arg, arg_called) = self.expand_tokens_for_inference(
3251 arg_tokens,
3252 in_progress,
3253 )?;
3254 called_macros.extend(arg_called);
3255 result.extend(expanded_arg);
3256 }
3257
3258 result.push(Token::new(TokenKind::RParen, token.loc.clone()));
3260
3261 i += 1 + consumed;
3262 continue;
3263 }
3264
3265 in_progress.insert(id);
3267 let (expanded, more_called) = self.expand_macro_body_for_inference(
3268 &def.body,
3269 params,
3270 &args,
3271 in_progress,
3272 )?;
3273 called_macros.extend(more_called);
3274 result.extend(expanded);
3275 in_progress.remove(&id);
3276 i += 1 + consumed;
3277 continue;
3278 } else {
3279 result.push(token.clone());
3281 }
3282 }
3283 }
3284 } else {
3285 result.push(token.clone());
3286 }
3287 } else {
3288 result.push(token.clone());
3289 }
3290
3291 i += 1;
3292 }
3293
3294 Ok((result, called_macros))
3295 }
3296
3297 fn try_collect_args_from_tokens(
3301 &self,
3302 tokens: &[Token],
3303 param_count: usize,
3304 is_variadic: bool,
3305 ) -> Option<(Vec<Vec<Token>>, usize)> {
3306 let mut start = 0;
3308 while start < tokens.len() {
3309 match &tokens[start].kind {
3310 TokenKind::Space | TokenKind::Newline => start += 1,
3311 TokenKind::LParen => break,
3312 _ => return None,
3313 }
3314 }
3315
3316 if start >= tokens.len() || !matches!(tokens[start].kind, TokenKind::LParen) {
3317 return None;
3318 }
3319
3320 let mut args: Vec<Vec<Token>> = Vec::new();
3322 let mut current_arg = Vec::new();
3323 let mut paren_depth = 0;
3324 let mut i = start + 1;
3325
3326 while i < tokens.len() {
3327 let token = &tokens[i];
3328 match &token.kind {
3329 TokenKind::LParen => {
3330 paren_depth += 1;
3331 current_arg.push(token.clone());
3332 }
3333 TokenKind::RParen => {
3334 if paren_depth == 0 {
3335 if !current_arg.is_empty() || !args.is_empty() {
3337 args.push(current_arg);
3338 }
3339 return Some((args, i + 1));
3341 }
3342 paren_depth -= 1;
3343 current_arg.push(token.clone());
3344 }
3345 TokenKind::Comma if paren_depth == 0 => {
3346 if is_variadic && args.len() >= param_count.saturating_sub(1) {
3348 current_arg.push(token.clone());
3349 } else {
3350 args.push(current_arg);
3351 current_arg = Vec::new();
3352 }
3353 }
3354 TokenKind::Space | TokenKind::Newline => {
3355 if !current_arg.is_empty() {
3357 current_arg.push(token.clone());
3358 }
3359 }
3360 _ => {
3361 current_arg.push(token.clone());
3362 }
3363 }
3364 i += 1;
3365 }
3366
3367 None
3369 }
3370
3371 fn is_current_file_in_target(&self) -> bool {
3373 let target_dir = match &self.config.target_dir {
3374 Some(dir) => dir,
3375 None => return false,
3376 };
3377
3378 let file_id = match self.sources.last() {
3379 Some(source) => source.file_id,
3380 None => return false,
3381 };
3382
3383 let path = self.files.get_path(file_id);
3384 path.starts_with(target_dir)
3385 }
3386
3387 pub fn collect_tokens(&mut self) -> Result<Vec<Token>, CompileError> {
3389 let mut tokens = Vec::new();
3390 loop {
3391 let token = self.next_token()?;
3392 if matches!(token.kind, TokenKind::Eof) {
3393 break;
3394 }
3395 tokens.push(token);
3396 }
3397 Ok(tokens)
3398 }
3399}
3400
3401impl TokenSource for Preprocessor {
3405 fn next_token(&mut self) -> crate::error::Result<Token> {
3406 Preprocessor::next_token(self)
3407 }
3408
3409 fn unget_token(&mut self, token: Token) {
3410 Preprocessor::unget_token(self, token)
3411 }
3412
3413 fn interner(&self) -> &StringInterner {
3414 &self.interner
3415 }
3416
3417 fn interner_mut(&mut self) -> &mut StringInterner {
3418 &mut self.interner
3419 }
3420
3421 fn files(&self) -> &FileRegistry {
3422 &self.files
3423 }
3424
3425 fn is_file_in_target(&self, file_id: crate::source::FileId) -> bool {
3426 let target_dir = match &self.config.target_dir {
3427 Some(dir) => dir,
3428 None => return false,
3429 };
3430 let path = self.files.get_path(file_id);
3431 path.starts_with(target_dir)
3432 }
3433}
3434
3435#[cfg(test)]
3436mod tests {
3437 use super::*;
3438 use std::io::Write;
3439 use tempfile::NamedTempFile;
3440
3441 fn create_temp_file(content: &str) -> NamedTempFile {
3442 let mut file = NamedTempFile::new().unwrap();
3443 file.write_all(content.as_bytes()).unwrap();
3444 file
3445 }
3446
3447 fn has_ident(pp: &Preprocessor, tokens: &[Token], name: &str) -> bool {
3449 tokens.iter().any(|t| {
3450 if let TokenKind::Ident(id) = t.kind {
3451 pp.interner().get(id) == name
3452 } else {
3453 false
3454 }
3455 })
3456 }
3457
3458 fn has_keyword(tokens: &[Token], kind: TokenKind) -> bool {
3460 tokens.iter().any(|t| std::mem::discriminant(&t.kind) == std::mem::discriminant(&kind))
3461 }
3462
3463 #[test]
3464 fn test_simple_tokens() {
3465 let file = create_temp_file("int x;");
3466 let mut pp = Preprocessor::new(PPConfig::default());
3467 pp.add_source_file(file.path()).unwrap();
3468
3469 let tokens = pp.collect_tokens().unwrap();
3470 assert_eq!(tokens.len(), 3);
3472 assert!(has_keyword(&tokens, TokenKind::KwInt));
3474 assert!(has_ident(&pp, &tokens, "x"));
3475 }
3476
3477 #[test]
3478 fn test_object_macro() {
3479 let file = create_temp_file("#define VALUE 42\nint x = VALUE;");
3480 let mut pp = Preprocessor::new(PPConfig::default());
3481 pp.add_source_file(file.path()).unwrap();
3482
3483 let tokens = pp.collect_tokens().unwrap();
3484 assert!(tokens.iter().any(|t| matches!(t.kind, TokenKind::IntLit(42))));
3485 }
3486
3487 #[test]
3488 fn test_function_macro() {
3489 let file = create_temp_file("#define ADD(a, b) a + b\nint x = ADD(1, 2);");
3490 let mut pp = Preprocessor::new(PPConfig::default());
3491 pp.add_source_file(file.path()).unwrap();
3492
3493 let tokens = pp.collect_tokens().unwrap();
3494 assert!(tokens.iter().any(|t| matches!(t.kind, TokenKind::Plus)));
3495 }
3496
3497 #[test]
3498 fn test_ifdef() {
3499 let file = create_temp_file("#define FOO\n#ifdef FOO\nint x;\n#endif");
3500 let mut pp = Preprocessor::new(PPConfig::default());
3501 pp.add_source_file(file.path()).unwrap();
3502
3503 let tokens = pp.collect_tokens().unwrap();
3504 assert!(has_keyword(&tokens, TokenKind::KwInt));
3505 }
3506
3507 #[test]
3508 fn test_ifndef() {
3509 let file = create_temp_file("#ifndef BAR\nint x;\n#endif");
3510 let mut pp = Preprocessor::new(PPConfig::default());
3511 pp.add_source_file(file.path()).unwrap();
3512
3513 let tokens = pp.collect_tokens().unwrap();
3514 assert!(has_keyword(&tokens, TokenKind::KwInt));
3515 }
3516
3517 #[test]
3518 fn test_ifdef_else() {
3519 let file = create_temp_file("#ifdef UNDEFINED\nint x;\n#else\nfloat y;\n#endif");
3520 let mut pp = Preprocessor::new(PPConfig::default());
3521 pp.add_source_file(file.path()).unwrap();
3522
3523 let tokens = pp.collect_tokens().unwrap();
3524 assert!(!has_ident(&pp, &tokens, "x"));
3526 assert!(has_keyword(&tokens, TokenKind::KwFloat));
3528 assert!(has_ident(&pp, &tokens, "y"));
3529 }
3530
3531 #[test]
3532 fn test_if_expression() {
3533 let file = create_temp_file("#if 1 + 1 == 2\nint x;\n#endif");
3534 let mut pp = Preprocessor::new(PPConfig::default());
3535 pp.add_source_file(file.path()).unwrap();
3536
3537 let tokens = pp.collect_tokens().unwrap();
3538 assert!(has_keyword(&tokens, TokenKind::KwInt));
3539 }
3540
3541 #[test]
3542 fn test_predefined_macro() {
3543 let config = PPConfig {
3544 predefined: vec![("VERSION".to_string(), Some("100".to_string()))],
3545 ..Default::default()
3546 };
3547 let file = create_temp_file("int v = VERSION;");
3548 let mut pp = Preprocessor::new(config);
3549 pp.add_source_file(file.path()).unwrap();
3550
3551 let tokens = pp.collect_tokens().unwrap();
3552 assert!(tokens.iter().any(|t| matches!(t.kind, TokenKind::IntLit(100))));
3553 }
3554
3555 #[test]
3556 fn test_undef() {
3557 let file = create_temp_file("#define FOO 1\n#undef FOO\n#ifdef FOO\nint x;\n#endif");
3558 let mut pp = Preprocessor::new(PPConfig::default());
3559 pp.add_source_file(file.path()).unwrap();
3560
3561 let tokens = pp.collect_tokens().unwrap();
3562 assert!(!has_ident(&pp, &tokens, "x"));
3564 }
3565
3566 #[test]
3567 fn test_nested_ifdef() {
3568 let file = create_temp_file(
3569 "#define A\n#ifdef A\n#ifdef B\nint x;\n#else\nfloat y;\n#endif\n#endif"
3570 );
3571 let mut pp = Preprocessor::new(PPConfig::default());
3572 pp.add_source_file(file.path()).unwrap();
3573
3574 let tokens = pp.collect_tokens().unwrap();
3575 assert!(!has_ident(&pp, &tokens, "x"));
3577 assert!(has_keyword(&tokens, TokenKind::KwFloat));
3578 assert!(has_ident(&pp, &tokens, "y"));
3579 }
3580
3581 #[test]
3584 fn test_no_expand_registry_new() {
3585 let registry = NoExpandRegistry::new();
3586 assert!(registry.is_empty());
3587 assert_eq!(registry.len(), 0);
3588 }
3589
3590 #[test]
3591 fn test_no_expand_registry_add() {
3592 let mut interner = crate::intern::StringInterner::new();
3593 let mut registry = NoExpandRegistry::new();
3594
3595 let token_id = TokenId::next();
3596 let macro_name = interner.intern("FOO");
3597
3598 registry.add(token_id, macro_name);
3599
3600 assert!(registry.is_blocked(token_id, macro_name));
3601 assert_eq!(registry.len(), 1);
3602 }
3603
3604 #[test]
3605 fn test_no_expand_registry_extend() {
3606 let mut interner = crate::intern::StringInterner::new();
3607 let mut registry = NoExpandRegistry::new();
3608
3609 let token_id = TokenId::next();
3610 let macro1 = interner.intern("FOO");
3611 let macro2 = interner.intern("BAR");
3612 let macro3 = interner.intern("BAZ");
3613
3614 registry.extend(token_id, vec![macro1, macro2, macro3]);
3615
3616 assert!(registry.is_blocked(token_id, macro1));
3617 assert!(registry.is_blocked(token_id, macro2));
3618 assert!(registry.is_blocked(token_id, macro3));
3619 }
3620
3621 #[test]
3622 fn test_no_expand_registry_not_blocked() {
3623 let mut interner = crate::intern::StringInterner::new();
3624 let mut registry = NoExpandRegistry::new();
3625
3626 let token_id = TokenId::next();
3627 let other_token_id = TokenId::next();
3628 let macro_name = interner.intern("FOO");
3629 let other_macro = interner.intern("BAR");
3630
3631 registry.add(token_id, macro_name);
3632
3633 assert!(!registry.is_blocked(other_token_id, macro_name));
3635 assert!(!registry.is_blocked(token_id, other_macro));
3637 }
3638
3639 #[test]
3640 fn test_no_expand_registry_inherit() {
3641 let mut interner = crate::intern::StringInterner::new();
3642 let mut registry = NoExpandRegistry::new();
3643
3644 let token1 = TokenId::next();
3645 let token2 = TokenId::next();
3646 let macro1 = interner.intern("FOO");
3647 let macro2 = interner.intern("BAR");
3648
3649 registry.add(token1, macro1);
3651 registry.add(token1, macro2);
3652
3653 registry.inherit(token1, token2);
3655
3656 assert!(registry.is_blocked(token2, macro1));
3658 assert!(registry.is_blocked(token2, macro2));
3659 }
3660
3661 #[test]
3662 fn test_no_expand_registry_inherit_merge() {
3663 let mut interner = crate::intern::StringInterner::new();
3664 let mut registry = NoExpandRegistry::new();
3665
3666 let token1 = TokenId::next();
3667 let token2 = TokenId::next();
3668 let macro1 = interner.intern("FOO");
3669 let macro2 = interner.intern("BAR");
3670 let macro3 = interner.intern("BAZ");
3671
3672 registry.add(token1, macro1);
3674
3675 registry.add(token2, macro2);
3677
3678 registry.inherit(token1, token2);
3680
3681 assert!(registry.is_blocked(token2, macro1));
3683 assert!(registry.is_blocked(token2, macro2));
3684
3685 assert!(registry.is_blocked(token1, macro1));
3687 assert!(!registry.is_blocked(token1, macro2));
3688
3689 assert!(!registry.is_blocked(token1, macro3));
3691 assert!(!registry.is_blocked(token2, macro3));
3692 }
3693
3694 #[test]
3697 fn test_emit_markers_disabled() {
3698 let file = create_temp_file("#define FOO 42\nint x = FOO;");
3700 let mut pp = Preprocessor::new(PPConfig::default());
3701 pp.add_source_file(file.path()).unwrap();
3702
3703 let tokens = pp.collect_tokens().unwrap();
3704
3705 let has_marker = tokens.iter().any(|t| {
3707 matches!(t.kind, TokenKind::MacroBegin(_) | TokenKind::MacroEnd(_))
3708 });
3709 assert!(!has_marker, "Markers should not be emitted when emit_markers is false");
3710 }
3711
3712 #[test]
3713 fn test_emit_markers_object_macro() {
3714 let file = create_temp_file("#define FOO 42\nint x = FOO;");
3716 let config = PPConfig {
3717 emit_markers: true,
3718 ..Default::default()
3719 };
3720 let mut pp = Preprocessor::new(config);
3721 pp.add_source_file(file.path()).unwrap();
3722
3723 let tokens = pp.collect_tokens().unwrap();
3724
3725 let begin_count = tokens.iter().filter(|t| {
3727 matches!(t.kind, TokenKind::MacroBegin(_))
3728 }).count();
3729 let end_count = tokens.iter().filter(|t| {
3730 matches!(t.kind, TokenKind::MacroEnd(_))
3731 }).count();
3732
3733 assert_eq!(begin_count, 1, "Should have exactly one MacroBegin");
3734 assert_eq!(end_count, 1, "Should have exactly one MacroEnd");
3735
3736 for t in &tokens {
3738 if let TokenKind::MacroBegin(info) = &t.kind {
3739 assert_eq!(pp.interner().get(info.macro_name), "FOO");
3740 assert!(matches!(info.kind, MacroInvocationKind::Object));
3741 }
3742 }
3743 }
3744
3745 #[test]
3746 fn test_emit_markers_function_macro() {
3747 let file = create_temp_file("#define ADD(a, b) a + b\nint x = ADD(1, 2);");
3749 let config = PPConfig {
3750 emit_markers: true,
3751 ..Default::default()
3752 };
3753 let mut pp = Preprocessor::new(config);
3754 pp.add_source_file(file.path()).unwrap();
3755
3756 let tokens = pp.collect_tokens().unwrap();
3757
3758 let begin_count = tokens.iter().filter(|t| {
3760 matches!(t.kind, TokenKind::MacroBegin(_))
3761 }).count();
3762 let end_count = tokens.iter().filter(|t| {
3763 matches!(t.kind, TokenKind::MacroEnd(_))
3764 }).count();
3765
3766 assert_eq!(begin_count, 1, "Should have exactly one MacroBegin");
3767 assert_eq!(end_count, 1, "Should have exactly one MacroEnd");
3768
3769 for t in &tokens {
3771 if let TokenKind::MacroBegin(info) = &t.kind {
3772 assert_eq!(pp.interner().get(info.macro_name), "ADD");
3773 if let MacroInvocationKind::Function { args } = &info.kind {
3774 assert_eq!(args.len(), 2, "ADD macro should have 2 arguments");
3775 } else {
3776 panic!("Expected Function macro kind");
3777 }
3778 }
3779 }
3780 }
3781
3782 #[test]
3783 fn test_emit_markers_begin_end_matching() {
3784 let file = create_temp_file("#define FOO 1\nint x = FOO;");
3786 let config = PPConfig {
3787 emit_markers: true,
3788 ..Default::default()
3789 };
3790 let mut pp = Preprocessor::new(config);
3791 pp.add_source_file(file.path()).unwrap();
3792
3793 let tokens = pp.collect_tokens().unwrap();
3794
3795 let mut begin_marker_id = None;
3796 let mut end_marker_id = None;
3797
3798 for t in &tokens {
3799 match &t.kind {
3800 TokenKind::MacroBegin(info) => {
3801 begin_marker_id = Some(info.marker_id);
3802 }
3803 TokenKind::MacroEnd(info) => {
3804 end_marker_id = Some(info.begin_marker_id);
3805 }
3806 _ => {}
3807 }
3808 }
3809
3810 assert!(begin_marker_id.is_some(), "Should have MacroBegin");
3811 assert!(end_marker_id.is_some(), "Should have MacroEnd");
3812 assert_eq!(
3813 begin_marker_id.unwrap(),
3814 end_marker_id.unwrap(),
3815 "MacroBegin.marker_id should match MacroEnd.begin_marker_id"
3816 );
3817 }
3818
3819 #[test]
3822 fn test_macro_call_watcher_basic() {
3823 let watcher = MacroCallWatcher::new();
3825
3826 assert!(!watcher.was_called());
3828 assert!(watcher.last_args().is_none());
3829 }
3830
3831 #[test]
3832 fn test_macro_call_watcher_object_macro() {
3833 let file = create_temp_file("#define TEST_MACRO 42\nint x = TEST_MACRO;");
3835 let mut pp = Preprocessor::new(PPConfig::default());
3836 pp.add_source_file(file.path()).unwrap();
3837
3838 let macro_name = pp.interner_mut().intern("TEST_MACRO");
3840 pp.set_macro_called_callback(macro_name, Box::new(MacroCallWatcher::new()));
3841
3842 let _tokens = pp.collect_tokens().unwrap();
3844
3845 if let Some(cb) = pp.get_macro_called_callback(macro_name) {
3847 if let Some(watcher) = cb.as_any().downcast_ref::<MacroCallWatcher>() {
3848 assert!(watcher.was_called(), "TEST_MACRO should have been called");
3849 assert!(watcher.last_args().is_none());
3851 } else {
3852 panic!("Failed to downcast to MacroCallWatcher");
3853 }
3854 } else {
3855 panic!("Callback not found");
3856 }
3857 }
3858
3859 #[test]
3860 fn test_macro_call_watcher_function_macro() {
3861 let file = create_temp_file("#define ADD(a, b) a + b\nint x = ADD(10, 20);");
3863 let mut pp = Preprocessor::new(PPConfig::default());
3864 pp.add_source_file(file.path()).unwrap();
3865
3866 let macro_name = pp.interner_mut().intern("ADD");
3868 pp.set_macro_called_callback(macro_name, Box::new(MacroCallWatcher::new()));
3869
3870 let _tokens = pp.collect_tokens().unwrap();
3872
3873 if let Some(cb) = pp.get_macro_called_callback(macro_name) {
3875 if let Some(watcher) = cb.as_any().downcast_ref::<MacroCallWatcher>() {
3876 assert!(watcher.was_called(), "ADD should have been called");
3877 let args = watcher.last_args();
3879 assert!(args.is_some(), "Function macro should have arguments");
3880 let args = args.unwrap();
3881 assert_eq!(args.len(), 2, "ADD has 2 arguments");
3882 assert_eq!(args[0], "10");
3883 assert_eq!(args[1], "20");
3884 } else {
3885 panic!("Failed to downcast to MacroCallWatcher");
3886 }
3887 } else {
3888 panic!("Callback not found");
3889 }
3890 }
3891
3892 #[test]
3893 fn test_macro_call_watcher_clear() {
3894 let file = create_temp_file("#define FOO(x) x\nint a = FOO(1);\nint b = FOO(2);");
3896 let mut pp = Preprocessor::new(PPConfig::default());
3897 pp.add_source_file(file.path()).unwrap();
3898
3899 let macro_name = pp.interner_mut().intern("FOO");
3901 pp.set_macro_called_callback(macro_name, Box::new(MacroCallWatcher::new()));
3902
3903 let mut count = 0;
3905 while count < 5 {
3906 if pp.next_token().unwrap().kind == TokenKind::Eof {
3908 break;
3909 }
3910 count += 1;
3911 }
3912
3913 {
3915 let cb = pp.get_macro_called_callback(macro_name).unwrap();
3916 let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
3917 assert!(watcher.was_called());
3918 let args = watcher.last_args().unwrap();
3919 assert_eq!(args[0], "1");
3920 }
3921
3922 {
3924 let cb = pp.get_macro_called_callback_mut(macro_name).unwrap();
3925 let watcher = cb.as_any_mut().downcast_mut::<MacroCallWatcher>().unwrap();
3926 watcher.clear();
3927 }
3928
3929 {
3931 let cb = pp.get_macro_called_callback(macro_name).unwrap();
3932 let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
3933 assert!(!watcher.was_called());
3934 assert!(watcher.last_args().is_none());
3935 }
3936 }
3937
3938 #[test]
3939 fn test_macro_call_watcher_take_called() {
3940 let file = create_temp_file("#define BAR 99\nint x = BAR;");
3942 let mut pp = Preprocessor::new(PPConfig::default());
3943 pp.add_source_file(file.path()).unwrap();
3944
3945 let macro_name = pp.interner_mut().intern("BAR");
3946 pp.set_macro_called_callback(macro_name, Box::new(MacroCallWatcher::new()));
3947
3948 let _tokens = pp.collect_tokens().unwrap();
3949
3950 {
3952 let cb = pp.get_macro_called_callback(macro_name).unwrap();
3953 let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
3954 assert!(watcher.take_called(), "First take_called should return true");
3955 assert!(!watcher.take_called(), "Second take_called should return false");
3956 }
3957 }
3958
3959 #[test]
3960 fn test_macro_call_watcher_multiple_macros() {
3961 let file = create_temp_file(
3963 "#define A(x) x\n#define B(x) x\n#define C(x) x\nint a = A(1); int b = B(2);"
3964 );
3965 let mut pp = Preprocessor::new(PPConfig::default());
3966 pp.add_source_file(file.path()).unwrap();
3967
3968 let macro_a = pp.interner_mut().intern("A");
3969 let macro_b = pp.interner_mut().intern("B");
3970 let macro_c = pp.interner_mut().intern("C");
3971
3972 pp.set_macro_called_callback(macro_a, Box::new(MacroCallWatcher::new()));
3973 pp.set_macro_called_callback(macro_b, Box::new(MacroCallWatcher::new()));
3974 pp.set_macro_called_callback(macro_c, Box::new(MacroCallWatcher::new()));
3975
3976 let _tokens = pp.collect_tokens().unwrap();
3977
3978 {
3980 let cb = pp.get_macro_called_callback(macro_a).unwrap();
3981 let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
3982 assert!(watcher.was_called(), "A should have been called");
3983 }
3984 {
3985 let cb = pp.get_macro_called_callback(macro_b).unwrap();
3986 let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
3987 assert!(watcher.was_called(), "B should have been called");
3988 }
3989 {
3990 let cb = pp.get_macro_called_callback(macro_c).unwrap();
3991 let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
3992 assert!(!watcher.was_called(), "C should not have been called");
3993 }
3994 }
3995
3996 #[test]
3997 fn test_macro_call_watcher_sv_head_pattern() {
3998 let file = create_temp_file(
4001 "#define _SV_HEAD(type) void *sv_any; type *sv_type\n\
4002 struct sv { _SV_HEAD(SV); };\n\
4003 struct av { _SV_HEAD(AV); };\n\
4004 struct other { int x; };"
4005 );
4006 let mut pp = Preprocessor::new(PPConfig::default());
4007 pp.add_source_file(file.path()).unwrap();
4008
4009 let sv_head = pp.interner_mut().intern("_SV_HEAD");
4010 pp.set_macro_called_callback(sv_head, Box::new(MacroCallWatcher::new()));
4011
4012 let mut sv_family_members = Vec::new();
4015 let mut current_struct: Option<String> = None;
4016
4017 loop {
4018 let token = pp.next_token().unwrap();
4019 if token.kind == TokenKind::Eof {
4020 break;
4021 }
4022
4023 if token.kind == TokenKind::KwStruct {
4025 if let Some(cb) = pp.get_macro_called_callback_mut(sv_head) {
4027 let watcher = cb.as_any_mut().downcast_mut::<MacroCallWatcher>().unwrap();
4028 watcher.clear();
4029 }
4030
4031 let name_token = pp.next_token().unwrap();
4032 if let TokenKind::Ident(id) = name_token.kind {
4033 current_struct = Some(pp.interner().get(id).to_string());
4034 }
4035 }
4036
4037 if token.kind == TokenKind::Semi {
4039 if let Some(ref struct_name) = current_struct {
4040 if let Some(cb) = pp.get_macro_called_callback(sv_head) {
4042 let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
4043 if watcher.was_called() {
4044 sv_family_members.push(struct_name.clone());
4045 }
4046 }
4047 }
4048 current_struct = None;
4049 }
4050 }
4051
4052 assert!(sv_family_members.contains(&"sv".to_string()), "sv should be SV family");
4054 assert!(sv_family_members.contains(&"av".to_string()), "av should be SV family");
4055 assert!(!sv_family_members.contains(&"other".to_string()), "other should not be SV family");
4056 }
4057}