use std::any::Any;
use std::collections::{HashMap, HashSet};
use std::fs;
use std::path::{Path, PathBuf};
use crate::error::{CompileError, PPError};
use crate::token_source::TokenSource;
use crate::intern::{InternedStr, StringInterner};
use crate::lexer::Lexer;
use crate::macro_def::{MacroDef, MacroKind, MacroTable};
use crate::pp_expr::PPExprEvaluator;
use crate::source::{FileId, FileRegistry, SourceLocation};
use crate::token::{
Comment, MacroBeginInfo, MacroEndInfo, MacroInvocationKind, Token, TokenId, TokenKind,
};
pub trait MacroDefCallback {
fn on_macro_defined(&mut self, def: &MacroDef);
fn into_any(self: Box<Self>) -> Box<dyn Any>;
}
pub struct CallbackPair<A, B> {
pub first: A,
pub second: B,
}
impl<A, B> CallbackPair<A, B> {
pub fn new(first: A, second: B) -> Self {
Self { first, second }
}
}
impl<A: MacroDefCallback + 'static, B: MacroDefCallback + 'static> MacroDefCallback for CallbackPair<A, B> {
fn on_macro_defined(&mut self, def: &MacroDef) {
self.first.on_macro_defined(def);
self.second.on_macro_defined(def);
}
fn into_any(self: Box<Self>) -> Box<dyn Any> {
self
}
}
pub trait MacroCalledCallback {
fn on_macro_called(&mut self, args: Option<&[Vec<Token>]>, interner: &StringInterner);
fn as_any(&self) -> &dyn Any;
fn as_any_mut(&mut self) -> &mut dyn Any;
}
pub struct MacroCallWatcher {
called: std::cell::Cell<bool>,
last_args: std::cell::RefCell<Option<Vec<String>>>,
}
impl MacroCallWatcher {
pub fn new() -> Self {
Self {
called: std::cell::Cell::new(false),
last_args: std::cell::RefCell::new(None),
}
}
pub fn take_called(&self) -> bool {
self.called.replace(false)
}
pub fn take_args(&self) -> Option<Vec<String>> {
self.last_args.borrow_mut().take()
}
pub fn clear(&self) {
self.called.set(false);
*self.last_args.borrow_mut() = None;
}
pub fn was_called(&self) -> bool {
self.called.get()
}
pub fn last_args(&self) -> Option<Vec<String>> {
self.last_args.borrow().clone()
}
fn tokens_to_string(tokens: &[Token], interner: &StringInterner) -> String {
tokens
.iter()
.map(|t| t.kind.format(interner))
.collect::<Vec<_>>()
.join("")
}
}
impl Default for MacroCallWatcher {
fn default() -> Self {
Self::new()
}
}
impl MacroCalledCallback for MacroCallWatcher {
fn on_macro_called(&mut self, args: Option<&[Vec<Token>]>, interner: &StringInterner) {
self.called.set(true);
if let Some(args) = args {
let strs: Vec<String> = args
.iter()
.map(|tokens| Self::tokens_to_string(tokens, interner))
.collect();
*self.last_args.borrow_mut() = Some(strs);
}
}
fn as_any(&self) -> &dyn Any {
self
}
fn as_any_mut(&mut self) -> &mut dyn Any {
self
}
}
pub trait CommentCallback {
fn on_comment(&mut self, comment: &Comment, file_id: FileId, is_target: bool);
fn into_any(self: Box<Self>) -> Box<dyn Any>;
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum IncludeKind {
System,
Local,
}
#[derive(Debug, Default, Clone)]
pub struct PPConfig {
pub include_paths: Vec<PathBuf>,
pub predefined: Vec<(String, Option<String>)>,
pub debug_pp: bool,
pub target_dir: Option<PathBuf>,
pub emit_markers: bool,
}
#[derive(Debug, Clone)]
struct CondState {
active: bool,
seen_active: bool,
seen_else: bool,
loc: SourceLocation,
}
#[derive(Debug, Default)]
pub struct NoExpandRegistry {
map: HashMap<TokenId, HashSet<InternedStr>>,
}
impl NoExpandRegistry {
pub fn new() -> Self {
Self {
map: HashMap::new(),
}
}
pub fn add(&mut self, token_id: TokenId, macro_id: InternedStr) {
self.map.entry(token_id).or_default().insert(macro_id);
}
pub fn extend(&mut self, token_id: TokenId, macros: impl IntoIterator<Item = InternedStr>) {
self.map.entry(token_id).or_default().extend(macros);
}
pub fn is_blocked(&self, token_id: TokenId, macro_id: InternedStr) -> bool {
self.map
.get(&token_id)
.map_or(false, |s| s.contains(¯o_id))
}
pub fn inherit(&mut self, from: TokenId, to: TokenId) {
if let Some(set) = self.map.get(&from).cloned() {
self.map.entry(to).or_default().extend(set);
}
}
pub fn get(&self, token_id: TokenId) -> Option<&HashSet<InternedStr>> {
self.map.get(&token_id)
}
pub fn is_empty(&self) -> bool {
self.map.is_empty()
}
pub fn len(&self) -> usize {
self.map.len()
}
}
struct InputSource {
source: Vec<u8>,
pos: usize,
line: u32,
column: u32,
file_id: FileId,
at_line_start: bool,
tokens: Option<Vec<Token>>,
token_pos: usize,
}
impl InputSource {
fn from_file(source: Vec<u8>, file_id: FileId) -> Self {
Self {
source,
pos: 0,
line: 1,
column: 1,
file_id,
at_line_start: true,
tokens: None,
token_pos: 0,
}
}
#[allow(dead_code)]
fn from_tokens(tokens: Vec<Token>, loc: SourceLocation) -> Self {
Self {
source: Vec::new(),
pos: 0,
line: loc.line,
column: loc.column,
file_id: loc.file_id,
at_line_start: false,
tokens: Some(tokens),
token_pos: 0,
}
}
fn is_token_source(&self) -> bool {
self.tokens.is_some()
}
fn next_buffered_token(&mut self) -> Option<Token> {
if let Some(ref tokens) = self.tokens {
if self.token_pos < tokens.len() {
let token = tokens[self.token_pos].clone();
self.token_pos += 1;
return Some(token);
}
}
None
}
fn is_at_line_start(&self) -> bool {
self.at_line_start
}
fn current_location(&self) -> SourceLocation {
SourceLocation::new(self.file_id, self.line, self.column)
}
fn skip_line_continuations(&self, start_pos: usize) -> usize {
let mut pos = start_pos;
loop {
if self.source.get(pos) == Some(&b'\\') {
let next = self.source.get(pos + 1);
if next == Some(&b'\n') {
pos += 2;
continue;
} else if next == Some(&b'\r') && self.source.get(pos + 2) == Some(&b'\n') {
pos += 3;
continue;
}
}
break;
}
pos
}
fn peek(&self) -> Option<u8> {
let pos = self.skip_line_continuations(self.pos);
self.source.get(pos).copied()
}
fn peek_n(&self, n: usize) -> Option<u8> {
let mut pos = self.pos;
for i in 0..=n {
pos = self.skip_line_continuations(pos);
if pos >= self.source.len() {
return None;
}
if i < n {
pos += 1;
}
}
self.source.get(pos).copied()
}
fn advance(&mut self) -> Option<u8> {
let old_pos = self.pos;
self.pos = self.skip_line_continuations(self.pos);
for i in old_pos..self.pos {
if self.source.get(i) == Some(&b'\n') {
self.line += 1;
}
}
let c = self.source.get(self.pos).copied()?;
self.pos += 1;
if c == b'\n' {
self.line += 1;
self.column = 1;
self.at_line_start = true;
} else {
self.column += 1;
if c != b' ' && c != b'\t' && c != b'\r' {
self.at_line_start = false;
}
}
Some(c)
}
fn skip_whitespace(&mut self) {
while let Some(c) = self.peek() {
if c == b' ' || c == b'\t' || c == b'\r' || c == 0x0C || c == 0x0B {
self.advance();
} else {
break;
}
}
}
}
pub struct Preprocessor {
files: FileRegistry,
interner: StringInterner,
macros: MacroTable,
config: PPConfig,
sources: Vec<InputSource>,
cond_stack: Vec<CondState>,
lookahead: Vec<Token>,
pending_comments: Vec<Comment>,
cond_active: bool,
return_spaces: bool,
defining_builtin: bool,
no_expand_registry: NoExpandRegistry,
macro_def_callback: Option<Box<dyn MacroDefCallback>>,
macro_called_callbacks: HashMap<InternedStr, Box<dyn MacroCalledCallback>>,
wrapped_macros: HashSet<InternedStr>,
comment_callback: Option<Box<dyn CommentCallback>>,
skip_expand_macros: HashSet<InternedStr>,
explicit_expand_macros: HashSet<InternedStr>,
}
impl Preprocessor {
pub fn new(config: PPConfig) -> Self {
let mut pp = Self {
files: FileRegistry::new(),
interner: StringInterner::new(),
macros: MacroTable::new(),
config,
sources: Vec::new(),
cond_stack: Vec::new(),
lookahead: Vec::new(),
pending_comments: Vec::new(),
cond_active: true,
return_spaces: false,
defining_builtin: false,
no_expand_registry: NoExpandRegistry::new(),
macro_def_callback: None,
macro_called_callbacks: HashMap::new(),
wrapped_macros: HashSet::new(),
comment_callback: None,
skip_expand_macros: HashSet::new(),
explicit_expand_macros: HashSet::new(),
};
pp.define_predefined_macros();
pp
}
pub fn set_macro_def_callback(&mut self, callback: Box<dyn MacroDefCallback>) {
self.macro_def_callback = Some(callback);
}
pub fn take_macro_def_callback(&mut self) -> Option<Box<dyn MacroDefCallback>> {
self.macro_def_callback.take()
}
pub fn set_comment_callback(&mut self, callback: Box<dyn CommentCallback>) {
self.comment_callback = Some(callback);
}
pub fn take_comment_callback(&mut self) -> Option<Box<dyn CommentCallback>> {
self.comment_callback.take()
}
pub fn set_macro_called_callback(
&mut self,
macro_name: InternedStr,
callback: Box<dyn MacroCalledCallback>,
) {
self.macro_called_callbacks.insert(macro_name, callback);
}
pub fn take_macro_called_callback(
&mut self,
macro_name: InternedStr,
) -> Option<Box<dyn MacroCalledCallback>> {
self.macro_called_callbacks.remove(¯o_name)
}
pub fn get_macro_called_callback(
&self,
macro_name: InternedStr,
) -> Option<&Box<dyn MacroCalledCallback>> {
self.macro_called_callbacks.get(¯o_name)
}
pub fn get_macro_called_callback_mut(
&mut self,
macro_name: InternedStr,
) -> Option<&mut Box<dyn MacroCalledCallback>> {
self.macro_called_callbacks.get_mut(¯o_name)
}
pub fn add_wrapped_macro(&mut self, macro_name: &str) {
let id = self.interner.intern(macro_name);
self.wrapped_macros.insert(id);
}
pub fn add_skip_expand_macro(&mut self, name: InternedStr) {
self.skip_expand_macros.insert(name);
}
pub fn add_skip_expand_macros(&mut self, names: impl IntoIterator<Item = InternedStr>) {
self.skip_expand_macros.extend(names);
}
pub fn add_explicit_expand_macro(&mut self, name: InternedStr) {
self.explicit_expand_macros.insert(name);
}
pub fn add_explicit_expand_macros(&mut self, names: impl IntoIterator<Item = InternedStr>) {
self.explicit_expand_macros.extend(names);
}
fn define_predefined_macros(&mut self) {
let mut defines_source = String::new();
defines_source.push_str("#define _Pragma(x)\n");
for (name, value) in &self.config.predefined {
if let Some(val) = value {
defines_source.push_str(&format!("#define {} {}\n", name, val));
} else {
defines_source.push_str(&format!("#define {} 1\n", name));
}
}
if !defines_source.is_empty() {
let file_id = self.files.register(PathBuf::from("<cmdline>"));
let input = InputSource::from_file(defines_source.into_bytes(), file_id);
self.sources.push(input);
self.defining_builtin = true;
loop {
match self.next_raw_token() {
Ok(token) => {
match token.kind {
TokenKind::Eof => break,
TokenKind::Hash => {
if let Err(_) = self.process_directive(token.loc) {
break;
}
}
TokenKind::Newline => continue,
_ => {} }
}
Err(_) => break,
}
}
self.defining_builtin = false;
self.sources.pop();
}
}
fn tokenize_string(&mut self, s: &str) -> Vec<Token> {
let bytes = s.as_bytes();
let file_id = FileId::default();
let mut lexer = Lexer::new(bytes, file_id, &mut self.interner);
let mut tokens = Vec::new();
loop {
match lexer.next_token() {
Ok(token) => {
if matches!(token.kind, TokenKind::Eof) {
break;
}
if !matches!(token.kind, TokenKind::Newline) {
tokens.push(token);
}
}
Err(_) => break,
}
}
tokens
}
pub fn add_source_file(&mut self, path: &Path) -> Result<(), CompileError> {
let source = fs::read(path).map_err(|e| {
CompileError::Preprocess {
loc: SourceLocation::default(),
kind: PPError::IoError(path.to_path_buf(), e.to_string()),
}
})?;
let file_id = self.files.register(path.to_path_buf());
let input = InputSource::from_file(source, file_id);
self.sources.push(input);
Ok(())
}
fn lex_token_from_source(&mut self) -> Result<Option<Token>, CompileError> {
{
let Some(source) = self.sources.last_mut() else {
return Ok(None);
};
if source.is_token_source() {
return Ok(source.next_buffered_token());
}
if self.return_spaces {
if let Some(c) = source.peek() {
if c == b' ' || c == b'\t' || c == 0x0C || c == 0x0B {
let loc = source.current_location();
source.advance();
while let Some(c) = source.peek() {
if c == b' ' || c == b'\t' || c == 0x0C || c == 0x0B {
source.advance();
} else {
break;
}
}
return Ok(Some(Token::new(TokenKind::Space, loc)));
}
}
} else {
source.skip_whitespace();
}
}
let mut leading_comments = Vec::new();
loop {
{
let Some(source) = self.sources.last_mut() else {
return Ok(None);
};
if !self.return_spaces {
source.skip_whitespace();
}
}
let (is_line_comment, is_block_comment) = {
let Some(source) = self.sources.last() else {
return Ok(None);
};
(
source.peek() == Some(b'/') && source.peek_n(1) == Some(b'/'),
source.peek() == Some(b'/') && source.peek_n(1) == Some(b'*'),
)
};
if is_line_comment {
let comment = self.scan_line_comment();
leading_comments.push(comment);
} else if is_block_comment {
let comment = self.scan_block_comment()?;
leading_comments.push(comment);
} else {
break;
}
}
let loc = {
let Some(source) = self.sources.last() else {
return Ok(None);
};
source.current_location()
};
let kind = self.scan_token_kind()?;
let mut token = Token::new(kind, loc);
token.leading_comments = leading_comments;
Ok(Some(token))
}
fn scan_line_comment(&mut self) -> Comment {
let (text, loc, file_id) = {
let source = self.sources.last_mut().unwrap();
let loc = source.current_location();
let file_id = source.file_id;
source.advance(); source.advance();
let start = source.pos;
while source.peek().is_some_and(|c| c != b'\n') {
source.advance();
}
let text = String::from_utf8_lossy(&source.source[start..source.pos]).to_string();
(text, loc, file_id)
};
let comment = Comment::new(crate::token::CommentKind::Line, text, loc);
let is_target = self.is_file_in_target(file_id);
if is_target {
if let Some(cb) = &mut self.comment_callback {
cb.on_comment(&comment, file_id, is_target);
}
}
comment
}
fn scan_block_comment(&mut self) -> Result<Comment, CompileError> {
let result = {
let source = self.sources.last_mut().unwrap();
let loc = source.current_location();
let file_id = source.file_id;
source.advance(); source.advance();
let start = source.pos;
loop {
match (source.peek(), source.peek_n(1)) {
(Some(b'*'), Some(b'/')) => {
let end = source.pos;
source.advance(); source.advance(); let text = String::from_utf8_lossy(&source.source[start..end]).to_string();
break Ok((text, loc, file_id));
}
(Some(_), _) => {
source.advance();
}
(None, _) => {
break Err(CompileError::Lex {
loc,
kind: crate::error::LexError::UnterminatedComment,
});
}
}
}
};
let (text, loc, file_id) = result?;
let comment = Comment::new(crate::token::CommentKind::Block, text, loc);
let is_target = self.is_file_in_target(file_id);
if is_target {
if let Some(cb) = &mut self.comment_callback {
cb.on_comment(&comment, file_id, is_target);
}
}
Ok(comment)
}
fn scan_token_kind(&mut self) -> Result<TokenKind, CompileError> {
let source = self.sources.last_mut().unwrap();
let Some(c) = source.peek() else {
return Ok(TokenKind::Eof);
};
match c {
b'\n' => {
source.advance();
Ok(TokenKind::Newline)
}
b'L' if matches!(source.peek_n(1), Some(b'"') | Some(b'\'')) => {
source.advance(); if source.peek() == Some(b'"') {
self.scan_wide_string()
} else {
self.scan_wide_char()
}
}
b'a'..=b'z' | b'A'..=b'Z' | b'_' => self.scan_identifier(),
b'0'..=b'9' => self.scan_number(),
b'"' => self.scan_string(),
b'\'' => self.scan_char(),
b'+' => self.scan_operator(b'+', &[(b'+', TokenKind::PlusPlus), (b'=', TokenKind::PlusEq)], TokenKind::Plus),
b'-' => self.scan_operator(b'-', &[(b'-', TokenKind::MinusMinus), (b'=', TokenKind::MinusEq), (b'>', TokenKind::Arrow)], TokenKind::Minus),
b'*' => self.scan_operator(b'*', &[(b'=', TokenKind::StarEq)], TokenKind::Star),
b'/' => self.scan_operator(b'/', &[(b'=', TokenKind::SlashEq)], TokenKind::Slash),
b'%' => self.scan_operator(b'%', &[(b'=', TokenKind::PercentEq)], TokenKind::Percent),
b'&' => self.scan_operator(b'&', &[(b'&', TokenKind::AmpAmp), (b'=', TokenKind::AmpEq)], TokenKind::Amp),
b'|' => self.scan_operator(b'|', &[(b'|', TokenKind::PipePipe), (b'=', TokenKind::PipeEq)], TokenKind::Pipe),
b'^' => self.scan_operator(b'^', &[(b'=', TokenKind::CaretEq)], TokenKind::Caret),
b'~' => {
source.advance();
Ok(TokenKind::Tilde)
}
b'!' => self.scan_operator(b'!', &[(b'=', TokenKind::BangEq)], TokenKind::Bang),
b'<' => self.scan_lt(),
b'>' => self.scan_gt(),
b'=' => self.scan_operator(b'=', &[(b'=', TokenKind::EqEq)], TokenKind::Eq),
b'?' => {
source.advance();
Ok(TokenKind::Question)
}
b':' => {
source.advance();
Ok(TokenKind::Colon)
}
b'.' => self.scan_dot(),
b',' => {
source.advance();
Ok(TokenKind::Comma)
}
b';' => {
source.advance();
Ok(TokenKind::Semi)
}
b'(' => {
source.advance();
Ok(TokenKind::LParen)
}
b')' => {
source.advance();
Ok(TokenKind::RParen)
}
b'[' => {
source.advance();
Ok(TokenKind::LBracket)
}
b']' => {
source.advance();
Ok(TokenKind::RBracket)
}
b'{' => {
source.advance();
Ok(TokenKind::LBrace)
}
b'}' => {
source.advance();
Ok(TokenKind::RBrace)
}
b'#' => {
source.advance();
if source.peek() == Some(b'#') {
source.advance();
Ok(TokenKind::HashHash)
} else {
Ok(TokenKind::Hash)
}
}
b'\\' => {
source.advance();
Ok(TokenKind::Backslash)
}
_ => {
let loc = source.current_location();
source.advance();
Err(CompileError::Lex {
loc,
kind: crate::error::LexError::InvalidChar(c as char),
})
}
}
}
fn scan_operator(&mut self, _first: u8, continuations: &[(u8, TokenKind)], default: TokenKind) -> Result<TokenKind, CompileError> {
let source = self.sources.last_mut().unwrap();
source.advance();
for (next, kind) in continuations {
if source.peek() == Some(*next) {
source.advance();
return Ok(kind.clone());
}
}
Ok(default)
}
fn scan_lt(&mut self) -> Result<TokenKind, CompileError> {
let source = self.sources.last_mut().unwrap();
source.advance();
match source.peek() {
Some(b'<') => {
source.advance();
if source.peek() == Some(b'=') {
source.advance();
Ok(TokenKind::LtLtEq)
} else {
Ok(TokenKind::LtLt)
}
}
Some(b'=') => {
source.advance();
Ok(TokenKind::LtEq)
}
_ => Ok(TokenKind::Lt),
}
}
fn scan_gt(&mut self) -> Result<TokenKind, CompileError> {
let source = self.sources.last_mut().unwrap();
source.advance();
match source.peek() {
Some(b'>') => {
source.advance();
if source.peek() == Some(b'=') {
source.advance();
Ok(TokenKind::GtGtEq)
} else {
Ok(TokenKind::GtGt)
}
}
Some(b'=') => {
source.advance();
Ok(TokenKind::GtEq)
}
_ => Ok(TokenKind::Gt),
}
}
fn scan_dot(&mut self) -> Result<TokenKind, CompileError> {
let source = self.sources.last_mut().unwrap();
source.advance();
if source.peek() == Some(b'.') && source.peek_n(1) == Some(b'.') {
source.advance();
source.advance();
Ok(TokenKind::Ellipsis)
} else {
Ok(TokenKind::Dot)
}
}
fn scan_identifier(&mut self) -> Result<TokenKind, CompileError> {
let source = self.sources.last_mut().unwrap();
let mut chars = Vec::new();
while let Some(c) = source.peek() {
if c.is_ascii_alphanumeric() || c == b'_' {
chars.push(c);
source.advance();
} else {
break;
}
}
let text = std::str::from_utf8(&chars).unwrap();
if let Some(kw) = TokenKind::from_keyword(text) {
Ok(kw)
} else {
let interned = self.interner.intern(text);
Ok(TokenKind::Ident(interned))
}
}
fn scan_number(&mut self) -> Result<TokenKind, CompileError> {
let source = self.sources.last_mut().unwrap();
let loc = source.current_location();
let start = source.pos;
if source.peek() == Some(b'0') {
source.advance();
match source.peek() {
Some(b'x') | Some(b'X') => {
source.advance();
while source.peek().is_some_and(|c| c.is_ascii_hexdigit()) {
source.advance();
}
}
Some(b'b') | Some(b'B') => {
source.advance();
while matches!(source.peek(), Some(b'0') | Some(b'1')) {
source.advance();
}
}
Some(b'0'..=b'7') => {
while source.peek().is_some_and(|c| matches!(c, b'0'..=b'7')) {
source.advance();
}
}
Some(b'.') | Some(b'e') | Some(b'E') => {
return self.scan_float_from(start, loc);
}
_ => {}
}
} else {
while source.peek().is_some_and(|c| c.is_ascii_digit()) {
source.advance();
}
if matches!(source.peek(), Some(b'.') | Some(b'e') | Some(b'E')) {
return self.scan_float_from(start, loc);
}
}
self.finish_integer(start, loc)
}
fn scan_float_from(&mut self, start: usize, loc: SourceLocation) -> Result<TokenKind, CompileError> {
let source = self.sources.last_mut().unwrap();
if source.peek() == Some(b'.') {
source.advance();
while source.peek().is_some_and(|c| c.is_ascii_digit()) {
source.advance();
}
}
if matches!(source.peek(), Some(b'e') | Some(b'E')) {
source.advance();
if matches!(source.peek(), Some(b'+') | Some(b'-')) {
source.advance();
}
while source.peek().is_some_and(|c| c.is_ascii_digit()) {
source.advance();
}
}
if matches!(source.peek(), Some(b'f') | Some(b'F') | Some(b'l') | Some(b'L')) {
source.advance();
}
let text = std::str::from_utf8(&source.source[start..source.pos]).unwrap();
let value: f64 = text
.trim_end_matches(|c| c == 'f' || c == 'F' || c == 'l' || c == 'L')
.parse()
.map_err(|_| CompileError::Lex {
loc: loc.clone(),
kind: crate::error::LexError::InvalidNumber(text.to_string()),
})?;
Ok(TokenKind::FloatLit(value))
}
fn finish_integer(&mut self, start: usize, loc: SourceLocation) -> Result<TokenKind, CompileError> {
let source = self.sources.last_mut().unwrap();
let mut is_unsigned = false;
let mut is_long = false;
let mut is_longlong = false;
loop {
match source.peek() {
Some(b'u') | Some(b'U') => {
is_unsigned = true;
source.advance();
}
Some(b'l') | Some(b'L') => {
if is_long {
is_longlong = true;
}
is_long = true;
source.advance();
}
_ => break,
}
}
let text = std::str::from_utf8(&source.source[start..source.pos]).unwrap();
let (num_text, radix) = if text.starts_with("0x") || text.starts_with("0X") {
(&text[2..], 16)
} else if text.starts_with("0b") || text.starts_with("0B") {
(&text[2..], 2)
} else if text.starts_with('0') && text.len() > 1 {
let without_suffix = text.trim_end_matches(|c: char| c == 'u' || c == 'U' || c == 'l' || c == 'L');
if without_suffix.len() > 1 {
(without_suffix, 8)
} else {
(without_suffix, 10)
}
} else {
(text, 10)
};
let num_text = num_text.trim_end_matches(|c: char| c == 'u' || c == 'U' || c == 'l' || c == 'L');
if is_unsigned || is_longlong {
let value = u64::from_str_radix(num_text, radix).map_err(|_| CompileError::Lex {
loc: loc.clone(),
kind: crate::error::LexError::InvalidNumber(text.to_string()),
})?;
Ok(TokenKind::UIntLit(value))
} else {
match i64::from_str_radix(num_text, radix) {
Ok(value) => Ok(TokenKind::IntLit(value)),
Err(_) => {
let value = u64::from_str_radix(num_text, radix).map_err(|_| CompileError::Lex {
loc: loc.clone(),
kind: crate::error::LexError::InvalidNumber(text.to_string()),
})?;
Ok(TokenKind::UIntLit(value))
}
}
}
}
fn scan_string(&mut self) -> Result<TokenKind, CompileError> {
let loc = {
let source = self.sources.last_mut().unwrap();
let loc = source.current_location();
source.advance(); loc
};
let mut bytes = Vec::new();
loop {
let c = {
let source = self.sources.last_mut().unwrap();
source.peek()
};
match c {
Some(b'"') => {
let source = self.sources.last_mut().unwrap();
source.advance();
return Ok(TokenKind::StringLit(bytes));
}
Some(b'\\') => {
{
let source = self.sources.last_mut().unwrap();
source.advance();
}
let escaped = self.scan_escape_sequence(&loc)?;
bytes.push(escaped);
}
Some(b'\n') | None => {
return Err(CompileError::Lex {
loc,
kind: crate::error::LexError::UnterminatedString,
});
}
Some(c) => {
let source = self.sources.last_mut().unwrap();
source.advance();
bytes.push(c);
}
}
}
}
fn scan_wide_string(&mut self) -> Result<TokenKind, CompileError> {
let loc = {
let source = self.sources.last_mut().unwrap();
let loc = source.current_location();
source.advance(); loc
};
let mut chars = Vec::new();
loop {
let c = {
let source = self.sources.last_mut().unwrap();
source.peek()
};
match c {
Some(b'"') => {
let source = self.sources.last_mut().unwrap();
source.advance();
return Ok(TokenKind::WideStringLit(chars));
}
Some(b'\\') => {
{
let source = self.sources.last_mut().unwrap();
source.advance();
}
let escaped = self.scan_escape_sequence(&loc)?;
chars.push(escaped as u32);
}
Some(b'\n') | None => {
return Err(CompileError::Lex {
loc,
kind: crate::error::LexError::UnterminatedString,
});
}
Some(c) => {
let source = self.sources.last_mut().unwrap();
source.advance();
chars.push(c as u32);
}
}
}
}
fn scan_char(&mut self) -> Result<TokenKind, CompileError> {
let loc = {
let source = self.sources.last_mut().unwrap();
let loc = source.current_location();
source.advance(); loc
};
let first_char = {
let source = self.sources.last().unwrap();
source.peek()
};
let value = match first_char {
Some(b'\'') => {
return Err(CompileError::Lex {
loc,
kind: crate::error::LexError::EmptyCharLit,
});
}
Some(b'\\') => {
{
let source = self.sources.last_mut().unwrap();
source.advance();
}
self.scan_escape_sequence(&loc)?
}
Some(c) => {
let source = self.sources.last_mut().unwrap();
source.advance();
c
}
None => {
return Err(CompileError::Lex {
loc,
kind: crate::error::LexError::UnterminatedChar,
});
}
};
let source = self.sources.last_mut().unwrap();
if source.peek() != Some(b'\'') {
return Err(CompileError::Lex {
loc,
kind: crate::error::LexError::UnterminatedChar,
});
}
source.advance();
Ok(TokenKind::CharLit(value))
}
fn scan_wide_char(&mut self) -> Result<TokenKind, CompileError> {
let loc = {
let source = self.sources.last_mut().unwrap();
let loc = source.current_location();
source.advance(); loc
};
let first_char = {
let source = self.sources.last().unwrap();
source.peek()
};
let value = match first_char {
Some(b'\'') => {
return Err(CompileError::Lex {
loc,
kind: crate::error::LexError::EmptyCharLit,
});
}
Some(b'\\') => {
{
let source = self.sources.last_mut().unwrap();
source.advance();
}
self.scan_escape_sequence(&loc)? as u32
}
Some(c) => {
let source = self.sources.last_mut().unwrap();
source.advance();
c as u32
}
None => {
return Err(CompileError::Lex {
loc,
kind: crate::error::LexError::UnterminatedChar,
});
}
};
let source = self.sources.last_mut().unwrap();
if source.peek() != Some(b'\'') {
return Err(CompileError::Lex {
loc,
kind: crate::error::LexError::UnterminatedChar,
});
}
source.advance();
Ok(TokenKind::WideCharLit(value))
}
fn scan_escape_sequence(&mut self, loc: &SourceLocation) -> Result<u8, CompileError> {
let source = self.sources.last_mut().unwrap();
match source.peek() {
Some(b'n') => { source.advance(); Ok(b'\n') }
Some(b't') => { source.advance(); Ok(b'\t') }
Some(b'r') => { source.advance(); Ok(b'\r') }
Some(b'\\') => { source.advance(); Ok(b'\\') }
Some(b'\'') => { source.advance(); Ok(b'\'') }
Some(b'"') => { source.advance(); Ok(b'"') }
Some(b'0') => { source.advance(); Ok(0) }
Some(b'a') => { source.advance(); Ok(0x07) }
Some(b'b') => { source.advance(); Ok(0x08) }
Some(b'f') => { source.advance(); Ok(0x0C) }
Some(b'v') => { source.advance(); Ok(0x0B) }
Some(b'x') => {
source.advance();
let mut value = 0u8;
let mut count = 0;
while let Some(c) = source.peek() {
if let Some(digit) = (c as char).to_digit(16) {
value = value.wrapping_mul(16).wrapping_add(digit as u8);
source.advance();
count += 1;
if count >= 2 { break; }
} else {
break;
}
}
if count == 0 {
Ok(b'x')
} else {
Ok(value)
}
}
Some(c @ b'0'..=b'7') => {
let mut value = (c - b'0') as u8;
source.advance();
for _ in 0..2 {
if let Some(c @ b'0'..=b'7') = source.peek() {
value = value * 8 + (c - b'0');
source.advance();
} else {
break;
}
}
Ok(value)
}
Some(c) => {
source.advance();
Ok(c)
}
None => Err(CompileError::Lex {
loc: loc.clone(),
kind: crate::error::LexError::UnterminatedString,
}),
}
}
pub fn next_token(&mut self) -> Result<Token, CompileError> {
loop {
let token = if let Some(token) = self.lookahead.pop() {
token
} else {
match self.lex_token_from_source()? {
Some(t) => t,
None => {
if self.sources.len() > 1 {
self.sources.pop();
continue;
}
Token::new(TokenKind::Eof, SourceLocation::default())
}
}
};
if !token.leading_comments.is_empty() {
self.pending_comments.extend(token.leading_comments.iter().cloned());
}
match &token.kind {
TokenKind::Eof => {
if self.sources.len() > 1 {
self.sources.pop();
continue;
}
if !self.cond_stack.is_empty() {
let state = &self.cond_stack[0];
return Err(CompileError::Preprocess {
loc: state.loc.clone(),
kind: PPError::MissingEndif,
});
}
return Ok(token);
}
TokenKind::Newline => {
continue;
}
TokenKind::Hash => {
let at_line_start = self.sources.last().map(|s| s.is_at_line_start()).unwrap_or(false);
if at_line_start || self.sources.last().map(|s| s.is_token_source()).unwrap_or(false) {
}
self.process_directive(token.loc.clone())?;
continue;
}
TokenKind::Ident(id) if self.cond_active => {
let id = *id;
if let Some(expanded) = self.try_expand_macro(id, &token)? {
for t in expanded.into_iter().rev() {
self.lookahead.push(t);
}
continue;
}
return Ok(self.attach_comments(token));
}
_ if !self.cond_active => {
continue;
}
_ => {
return Ok(self.attach_comments(token));
}
}
}
}
pub fn unget_token(&mut self, token: Token) {
self.lookahead.push(token);
}
fn next_raw_token(&mut self) -> Result<Token, CompileError> {
loop {
if let Some(token) = self.lookahead.pop() {
return Ok(token);
}
match self.lex_token_from_source()? {
Some(token) => {
if !token.leading_comments.is_empty() {
self.pending_comments.extend(token.leading_comments.iter().cloned());
}
return Ok(token);
}
None => {
if self.sources.len() > 1 {
self.sources.pop();
continue;
}
return Ok(Token::new(TokenKind::Eof, SourceLocation::default()));
}
}
}
}
fn attach_comments(&mut self, mut token: Token) -> Token {
if !self.pending_comments.is_empty() {
token.leading_comments = std::mem::take(&mut self.pending_comments);
}
token
}
fn process_directive(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
let directive_token = self.next_raw_token()?;
match &directive_token.kind {
TokenKind::Newline | TokenKind::Eof => {
return Ok(());
}
TokenKind::Ident(id) => {
let name = self.interner.get(*id).to_string();
self.process_directive_by_name(&name, loc)?;
}
TokenKind::KwIf => self.process_directive_by_name("if", loc)?,
TokenKind::KwElse => self.process_directive_by_name("else", loc)?,
TokenKind::KwFor => self.process_directive_by_name("for", loc)?, TokenKind::IntLit(_) => {
self.skip_to_eol()?;
}
_ => {
return Err(CompileError::Preprocess {
loc,
kind: PPError::InvalidDirective(format!("{:?}", directive_token.kind)),
});
}
}
Ok(())
}
fn process_directive_by_name(&mut self, name: &str, loc: SourceLocation) -> Result<(), CompileError> {
match name {
"define" => {
if self.cond_active {
self.process_define(loc)?;
} else {
self.skip_to_eol()?;
}
}
"undef" => {
if self.cond_active {
self.process_undef()?;
} else {
self.skip_to_eol()?;
}
}
"include" => {
if self.cond_active {
self.process_include(loc, false)?;
} else {
self.skip_to_eol()?;
}
}
"include_next" => {
if self.cond_active {
self.process_include(loc, true)?;
} else {
self.skip_to_eol()?;
}
}
"if" => self.process_if(loc)?,
"ifdef" => self.process_ifdef(loc, false)?,
"ifndef" => self.process_ifdef(loc, true)?,
"elif" => self.process_elif(loc)?,
"else" => self.process_else(loc)?,
"endif" => self.process_endif()?,
"error" => {
if self.cond_active {
self.process_error(loc)?;
} else {
self.skip_to_eol()?;
}
}
"warning" | "pragma" | "line" => {
self.skip_to_eol()?;
}
_ => {
if self.cond_active {
return Err(CompileError::Preprocess {
loc,
kind: PPError::InvalidDirective(name.to_string()),
});
} else {
self.skip_to_eol()?;
}
}
}
Ok(())
}
fn process_define(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
let name_token = self.next_raw_token()?;
let name = match name_token.kind {
TokenKind::Ident(id) => id,
ref kind => {
if let Some(s) = kind.keyword_str() {
self.interner.intern(s)
} else {
return Err(CompileError::Preprocess {
loc,
kind: PPError::InvalidDirective("expected macro name".to_string()),
});
}
}
};
self.return_spaces = true;
let next = self.next_raw_token()?;
self.return_spaces = false;
let (kind, body_start) = if matches!(next.kind, TokenKind::LParen) {
let (params, is_variadic) = self.parse_macro_params()?;
(MacroKind::Function { params, is_variadic }, None)
} else if matches!(next.kind, TokenKind::Space) {
let body_first = self.next_raw_token()?;
(MacroKind::Object, Some(body_first))
} else {
(MacroKind::Object, Some(next))
};
let mut body = Vec::new();
let mut need_more = true;
if let Some(first) = body_start {
if matches!(first.kind, TokenKind::Newline | TokenKind::Eof) {
need_more = false;
} else {
body.push(first);
}
}
if need_more {
loop {
let token = self.next_raw_token()?;
match token.kind {
TokenKind::Newline | TokenKind::Eof => break,
_ => body.push(token),
}
}
}
let is_target = self.is_current_file_in_target();
let has_token_pasting = body.iter()
.any(|t| matches!(t.kind, TokenKind::HashHash));
let def = MacroDef {
name,
kind,
body,
def_loc: loc,
leading_comments: std::mem::take(&mut self.pending_comments),
is_builtin: self.defining_builtin,
is_target,
has_token_pasting,
};
if let Some(ref mut callback) = self.macro_def_callback {
callback.on_macro_defined(&def);
}
self.macros.define(def, &self.interner);
Ok(())
}
fn parse_macro_params(&mut self) -> Result<(Vec<InternedStr>, bool), CompileError> {
let mut params = Vec::new();
let mut is_variadic = false;
loop {
let token = self.next_raw_token()?;
let param_id: Option<InternedStr> = match &token.kind {
TokenKind::Ident(id) => Some(*id),
kind => kind.keyword_str().map(|s| self.interner.intern(s)),
};
match token.kind {
TokenKind::RParen => break,
_ if param_id.is_some() => {
params.push(param_id.unwrap());
let next = self.next_raw_token()?;
match next.kind {
TokenKind::Comma => continue,
TokenKind::RParen => break,
TokenKind::Ellipsis => {
is_variadic = true;
let rparen = self.next_raw_token()?;
if !matches!(rparen.kind, TokenKind::RParen) {
return Err(CompileError::Preprocess {
loc: token.loc,
kind: PPError::InvalidMacroArgs("expected ')' after '...'".to_string()),
});
}
break;
}
_ => {
return Err(CompileError::Preprocess {
loc: token.loc,
kind: PPError::InvalidMacroArgs("expected ',' or ')'".to_string()),
});
}
}
}
TokenKind::Ellipsis => {
is_variadic = true;
let va_args_id = self.interner.intern("__VA_ARGS__");
params.push(va_args_id);
let next = self.next_raw_token()?;
if !matches!(next.kind, TokenKind::RParen) {
return Err(CompileError::Preprocess {
loc: token.loc,
kind: PPError::InvalidMacroArgs("expected ')' after '...'".to_string()),
});
}
break;
}
_ => {
return Err(CompileError::Preprocess {
loc: token.loc,
kind: PPError::InvalidMacroArgs("expected parameter name".to_string()),
});
}
}
}
Ok((params, is_variadic))
}
fn process_undef(&mut self) -> Result<(), CompileError> {
let token = self.next_raw_token()?;
let name = match token.kind {
TokenKind::Ident(id) => Some(id),
ref kind => kind.keyword_str().map(|s| self.interner.intern(s)),
};
if let Some(id) = name {
self.macros.undefine(id);
}
self.skip_to_eol()?;
Ok(())
}
fn process_include(&mut self, loc: SourceLocation, is_include_next: bool) -> Result<(), CompileError> {
let token = self.next_raw_token()?;
let (path, kind) = match &token.kind {
TokenKind::StringLit(bytes) => {
let path = String::from_utf8_lossy(bytes).to_string();
(path, IncludeKind::Local)
}
TokenKind::Lt => {
let path = self.scan_include_path('>')?;
(path, IncludeKind::System)
}
_ => {
return Err(CompileError::Preprocess {
loc,
kind: PPError::InvalidDirective("expected include path".to_string()),
});
}
};
self.skip_to_eol()?;
let resolved = self.resolve_include(&path, kind, &loc, is_include_next)?;
let source = fs::read(&resolved).map_err(|e| {
CompileError::Preprocess {
loc: loc.clone(),
kind: PPError::IoError(resolved.clone(), e.to_string()),
}
})?;
let file_id = self.files.register(resolved);
let input = InputSource::from_file(source, file_id);
self.sources.push(input);
Ok(())
}
fn resolve_include(&self, path: &str, kind: IncludeKind, loc: &SourceLocation, is_include_next: bool) -> Result<PathBuf, CompileError> {
let path = Path::new(path);
let start_index = if is_include_next {
self.find_current_include_index()
} else {
0
};
if kind == IncludeKind::Local && !is_include_next {
if let Some(source) = self.sources.last() {
if !source.is_token_source() {
let current_path = self.files.get_path(source.file_id);
if let Some(parent) = current_path.parent() {
let candidate = parent.join(path);
if candidate.exists() {
return Ok(candidate);
}
}
}
}
}
for dir in self.config.include_paths.iter().skip(start_index) {
let candidate = dir.join(path);
if candidate.exists() {
return Ok(candidate);
}
}
Err(CompileError::Preprocess {
loc: loc.clone(),
kind: PPError::IncludeNotFound(path.to_path_buf()),
})
}
fn find_current_include_index(&self) -> usize {
let current_file_path = if let Some(source) = self.sources.iter().rev().find(|s| !s.is_token_source()) {
self.files.get_path(source.file_id).to_path_buf()
} else {
return 0;
};
for (i, dir) in self.config.include_paths.iter().enumerate() {
if current_file_path.starts_with(dir) {
return i + 1; }
}
0
}
fn process_if(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
if !self.cond_active {
self.cond_stack.push(CondState {
active: false,
seen_active: false,
seen_else: false,
loc: loc.clone(),
});
self.skip_false_branch(loc)?;
return Ok(());
}
let tokens = self.collect_if_condition()?;
let mut eval = PPExprEvaluator::new(&tokens, &self.interner, &self.macros, loc.clone());
let active = eval.evaluate()? != 0;
self.cond_stack.push(CondState {
active,
seen_active: active,
seen_else: false,
loc: loc.clone(),
});
self.update_cond_active();
if !active {
self.skip_false_branch(loc)?;
}
Ok(())
}
fn skip_false_branch(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
loop {
let directive = self.preprocess_skip()?;
match directive.as_str() {
"endif" => {
self.cond_stack.pop();
self.update_cond_active();
return Ok(());
}
"else" => {
if let Some(state) = self.cond_stack.last_mut() {
if state.seen_else {
return Err(CompileError::Preprocess {
loc,
kind: PPError::UnmatchedElse,
});
}
state.seen_else = true;
if !state.seen_active {
state.active = true;
state.seen_active = true;
self.update_cond_active();
return Ok(());
}
}
}
"elif" => {
if let Some(state) = self.cond_stack.last() {
if state.seen_else {
return Err(CompileError::Preprocess {
loc,
kind: PPError::ElifAfterElse,
});
}
if state.seen_active {
self.skip_to_eol()?;
continue;
}
}
let tokens = self.collect_if_condition()?;
let new_active = {
let mut eval = PPExprEvaluator::new(&tokens, &self.interner, &self.macros, loc.clone());
eval.evaluate()? != 0
};
if let Some(state) = self.cond_stack.last_mut() {
if new_active {
state.active = true;
state.seen_active = true;
self.update_cond_active();
return Ok(());
}
}
}
_ => unreachable!(),
}
}
}
fn process_ifdef(&mut self, loc: SourceLocation, negate: bool) -> Result<(), CompileError> {
if !self.cond_active {
self.cond_stack.push(CondState {
active: false,
seen_active: false,
seen_else: false,
loc: loc.clone(),
});
self.skip_false_branch(loc)?;
return Ok(());
}
let token = self.next_raw_token()?;
let defined = match token.kind {
TokenKind::Ident(id) => self.macros.is_defined(id),
ref kind => match kind.keyword_str() {
Some(s) => {
let id = self.interner.intern(s);
self.macros.is_defined(id)
}
None => false,
},
};
self.skip_to_eol()?;
let active = if negate { !defined } else { defined };
self.cond_stack.push(CondState {
active,
seen_active: active,
seen_else: false,
loc: loc.clone(),
});
self.update_cond_active();
if !active {
self.skip_false_branch(loc)?;
}
Ok(())
}
fn process_elif(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
if self.cond_stack.is_empty() {
return Err(CompileError::Preprocess {
loc,
kind: PPError::UnmatchedEndif,
});
}
let seen_else = self.cond_stack.last().unwrap().seen_else;
if seen_else {
return Err(CompileError::Preprocess {
loc,
kind: PPError::ElifAfterElse,
});
}
self.skip_to_eol()?;
self.skip_false_branch(loc)?;
Ok(())
}
fn process_else(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
if self.cond_stack.is_empty() {
return Err(CompileError::Preprocess {
loc,
kind: PPError::UnmatchedElse,
});
}
let seen_else = self.cond_stack.last().unwrap().seen_else;
if seen_else {
return Err(CompileError::Preprocess {
loc,
kind: PPError::UnmatchedElse,
});
}
if let Some(state) = self.cond_stack.last_mut() {
state.seen_else = true;
}
self.skip_to_eol()?;
self.skip_false_branch(loc)?;
Ok(())
}
fn process_endif(&mut self) -> Result<(), CompileError> {
if self.cond_stack.is_empty() {
return Err(CompileError::Preprocess {
loc: SourceLocation::default(),
kind: PPError::UnmatchedEndif,
});
}
self.cond_stack.pop();
self.skip_to_eol()?;
self.update_cond_active();
Ok(())
}
fn process_error(&mut self, loc: SourceLocation) -> Result<(), CompileError> {
let mut message = String::new();
loop {
let token = self.next_raw_token()?;
match token.kind {
TokenKind::Newline | TokenKind::Eof => break,
TokenKind::Ident(id) => {
if !message.is_empty() { message.push(' '); }
message.push_str(self.interner.get(id));
}
TokenKind::StringLit(bytes) => {
if !message.is_empty() { message.push(' '); }
message.push_str(&String::from_utf8_lossy(&bytes));
}
_ => {
if !message.is_empty() { message.push(' '); }
message.push_str(&format!("{:?}", token.kind));
}
}
}
Err(CompileError::Preprocess {
loc,
kind: PPError::InvalidDirective(format!("#error {}", message)),
})
}
fn update_cond_active(&mut self) {
self.cond_active = self.cond_stack.iter().all(|s| s.active);
}
fn collect_if_condition(&mut self) -> Result<Vec<Token>, CompileError> {
let mut tokens = Vec::new();
let defined_id = self.interner.intern("defined");
loop {
let token = self.next_raw_token()?;
match &token.kind {
TokenKind::Newline | TokenKind::Eof => break,
TokenKind::Ident(id) if *id == defined_id => {
tokens.push(token);
let next = self.next_raw_token()?;
if matches!(next.kind, TokenKind::LParen) {
tokens.push(next);
let ident = self.next_raw_token()?;
tokens.push(ident);
let rparen = self.next_raw_token()?;
tokens.push(rparen);
} else {
tokens.push(next);
}
}
TokenKind::Ident(id) => {
let id = *id;
if let Some(expanded) = self.try_expand_macro(id, &token)? {
for t in expanded.into_iter().rev() {
self.lookahead.push(t);
}
} else {
tokens.push(token);
}
}
_ => {
tokens.push(token);
}
}
}
if self.config.debug_pp {
eprintln!("DEBUG: collected tokens for #if condition:");
for t in &tokens {
eprintln!(" {:?}", t.kind);
}
}
Ok(tokens)
}
fn scan_include_path(&mut self, terminator: char) -> Result<String, CompileError> {
let source = self.sources.last_mut().ok_or_else(|| {
CompileError::Preprocess {
loc: SourceLocation::default(),
kind: PPError::InvalidDirective("no source".to_string()),
}
})?;
let loc = source.current_location();
let mut path = String::new();
loop {
match source.peek() {
Some(c) if c == terminator as u8 => {
source.advance();
break;
}
Some(b'\n') | None => {
return Err(CompileError::Preprocess {
loc,
kind: PPError::InvalidDirective("unterminated include path".to_string()),
});
}
Some(c) => {
source.advance();
path.push(c as char);
}
}
}
Ok(path)
}
fn skip_to_eol(&mut self) -> Result<(), CompileError> {
loop {
let token = self.next_raw_token()?;
if matches!(token.kind, TokenKind::Newline | TokenKind::Eof) {
break;
}
}
Ok(())
}
fn skip_to_eol_raw(source: &mut InputSource) {
loop {
match source.peek() {
Some(b'\n') | None => break,
Some(b'/') => {
if source.peek_n(1) == Some(b'*') {
source.advance(); source.advance(); loop {
match (source.peek(), source.peek_n(1)) {
(Some(b'*'), Some(b'/')) => {
source.advance();
source.advance();
break;
}
(Some(_), _) => { source.advance(); }
(None, _) => break,
}
}
} else if source.peek_n(1) == Some(b'/') {
while source.peek().is_some_and(|c| c != b'\n') {
source.advance();
}
break;
} else {
source.advance();
}
}
Some(b'\\') => {
source.advance();
if source.peek() == Some(b'\n') {
source.advance();
} else if source.peek() == Some(b'\r') {
source.advance();
if source.peek() == Some(b'\n') {
source.advance();
}
}
}
Some(_) => { source.advance(); }
}
}
}
fn preprocess_skip(&mut self) -> Result<String, CompileError> {
let mut depth = 0i32;
loop {
let source = match self.sources.last_mut() {
Some(s) => s,
None => {
return Err(CompileError::Preprocess {
loc: SourceLocation::default(),
kind: PPError::MissingEndif,
});
}
};
let mut at_line_start = source.is_at_line_start();
loop {
let c = match source.peek() {
Some(c) => c,
None => break, };
match c {
b' ' | b'\t' | b'\r' | 0x0C | 0x0B => {
source.advance();
}
b'\n' => {
source.advance();
at_line_start = true;
}
b'\\' => {
source.advance();
if source.peek() == Some(b'\n') {
source.advance();
} else if source.peek() == Some(b'\r') {
source.advance();
if source.peek() == Some(b'\n') {
source.advance();
}
}
}
b'"' | b'\'' => {
let quote = c;
source.advance();
loop {
match source.peek() {
Some(c) if c == quote => {
source.advance();
break;
}
Some(b'\\') => {
source.advance();
source.advance(); }
Some(b'\n') | None => break,
Some(_) => {
source.advance();
}
}
}
at_line_start = false;
}
b'/' => {
source.advance();
match source.peek() {
Some(b'/') => {
while source.peek().is_some_and(|c| c != b'\n') {
source.advance();
}
}
Some(b'*') => {
source.advance();
loop {
match (source.peek(), source.peek_n(1)) {
(Some(b'*'), Some(b'/')) => {
source.advance();
source.advance();
break;
}
(Some(_), _) => {
source.advance();
}
(None, _) => break,
}
}
}
_ => {}
}
at_line_start = false;
}
b'#' if at_line_start => {
source.advance();
while matches!(source.peek(), Some(b' ') | Some(b'\t')) {
source.advance();
}
let mut directive = String::new();
while let Some(c) = source.peek() {
if c.is_ascii_alphabetic() || c == b'_' {
directive.push(c as char);
source.advance();
} else {
break;
}
}
match directive.as_str() {
"if" | "ifdef" | "ifndef" => {
depth += 1;
while source.peek().is_some_and(|c| c != b'\n') {
source.advance();
}
}
"endif" => {
if depth == 0 {
Self::skip_to_eol_raw(source);
return Ok("endif".to_string());
}
depth -= 1;
Self::skip_to_eol_raw(source);
}
"else" if depth == 0 => {
Self::skip_to_eol_raw(source);
return Ok("else".to_string());
}
"elif" if depth == 0 => {
return Ok("elif".to_string());
}
_ => {
Self::skip_to_eol_raw(source);
}
}
at_line_start = false;
}
_ => {
source.advance();
at_line_start = false;
}
}
}
if self.sources.len() > 1 {
self.sources.pop();
} else {
return Err(CompileError::Preprocess {
loc: SourceLocation::default(),
kind: PPError::MissingEndif,
});
}
}
}
fn try_expand_macro(&mut self, id: InternedStr, token: &Token) -> Result<Option<Vec<Token>>, CompileError> {
self.try_expand_macro_internal(id, token, false)
}
fn try_expand_macro_internal(
&mut self,
id: InternedStr,
token: &Token,
preserve_function_macros: bool,
) -> Result<Option<Vec<Token>>, CompileError> {
if self.skip_expand_macros.contains(&id) {
return Ok(None);
}
if self.no_expand_registry.is_blocked(token.id, id) {
return Ok(None);
}
let def = match self.macros.get(id) {
Some(def) => def.clone(),
None => return Ok(None),
};
let trigger_token_id = token.id;
let call_loc = token.loc.clone();
match &def.kind {
MacroKind::Object => {
let empty = HashMap::new();
let expanded = self.expand_tokens(&def.body, &empty, &empty)?;
let marked = self.mark_expanded_with_registry(expanded, trigger_token_id, id, &call_loc);
if let Some(mut cb) = self.macro_called_callbacks.remove(&id) {
cb.on_macro_called(None, &self.interner);
self.macro_called_callbacks.insert(id, cb);
}
let wrapped = self.wrap_with_markers(
marked,
id,
token,
MacroInvocationKind::Object,
&call_loc,
def.has_token_pasting,
);
Ok(Some(wrapped))
}
MacroKind::Function { params, is_variadic } => {
if preserve_function_macros && !self.explicit_expand_macros.contains(&id) {
return Ok(None);
}
let mut skipped_newlines = Vec::new();
let next = loop {
let t = self.next_raw_token()?;
if matches!(t.kind, TokenKind::Newline) {
skipped_newlines.push(t);
} else {
break t;
}
};
if !matches!(next.kind, TokenKind::LParen) {
self.lookahead.push(next);
for t in skipped_newlines.into_iter().rev() {
self.lookahead.push(t);
}
return Ok(None);
}
let args = self.collect_macro_args(params.len(), *is_variadic)?;
let mut arg_map = HashMap::new();
if *is_variadic && !params.is_empty() {
let va_args_id = self.interner.intern("__VA_ARGS__");
let last_param = *params.last().unwrap();
let is_gnu_style = last_param != va_args_id;
let normal_param_count = params.len() - 1;
for (i, param) in params.iter().take(normal_param_count).enumerate() {
if i < args.len() {
arg_map.insert(*param, args[i].clone());
} else {
arg_map.insert(*param, Vec::new());
}
}
let mut va = Vec::new();
let va_start = normal_param_count;
for (i, arg) in args.iter().enumerate().skip(va_start) {
if i > va_start {
va.push(Token::new(TokenKind::Comma, token.loc.clone()));
}
va.extend(arg.clone());
}
if is_gnu_style {
arg_map.insert(last_param, va.clone());
arg_map.insert(va_args_id, va);
} else {
arg_map.insert(va_args_id, va);
}
} else {
for (i, param) in params.iter().enumerate() {
if i < args.len() {
arg_map.insert(*param, args[i].clone());
} else {
arg_map.insert(*param, Vec::new());
}
}
}
let prescanned_args = self.prescan_args(&arg_map)?;
let expanded = self.expand_tokens(&def.body, &arg_map, &prescanned_args)?;
let marked = self.mark_expanded_with_registry(expanded, trigger_token_id, id, &call_loc);
if let Some(mut cb) = self.macro_called_callbacks.remove(&id) {
cb.on_macro_called(Some(&args), &self.interner);
self.macro_called_callbacks.insert(id, cb);
}
let kind = if self.wrapped_macros.contains(&id) {
let expanded_args: Result<Vec<_>, _> = args.into_iter()
.map(|arg_tokens| {
let expanded = self.expand_token_list_preserve_fn(&arg_tokens)?;
Ok(expanded.into_iter()
.filter(|t| !matches!(t.kind, TokenKind::MacroBegin(_) | TokenKind::MacroEnd(_)))
.collect())
})
.collect();
MacroInvocationKind::Function { args: expanded_args? }
} else {
MacroInvocationKind::Function { args }
};
let wrapped = self.wrap_with_markers(
marked,
id,
token,
kind,
&call_loc,
def.has_token_pasting,
);
Ok(Some(wrapped))
}
}
}
fn mark_expanded_with_registry(
&mut self,
tokens: Vec<Token>,
trigger_token_id: TokenId,
macro_id: InternedStr,
call_loc: &SourceLocation,
) -> Vec<Token> {
tokens.into_iter().map(|mut t| {
self.no_expand_registry.inherit(trigger_token_id, t.id);
self.no_expand_registry.add(t.id, macro_id);
t.loc = call_loc.clone();
t
}).collect()
}
fn wrap_with_markers(
&self,
tokens: Vec<Token>,
macro_name: InternedStr,
trigger_token: &Token,
kind: MacroInvocationKind,
call_loc: &SourceLocation,
has_token_pasting: bool,
) -> Vec<Token> {
let is_wrapped = self.wrapped_macros.contains(¯o_name);
if !self.config.emit_markers && !is_wrapped {
return tokens;
}
let marker_id = TokenId::next();
let is_function_macro = matches!(kind, MacroInvocationKind::Function { .. });
let preserve_call = is_function_macro
&& !has_token_pasting
&& !self.explicit_expand_macros.contains(¯o_name);
let begin_info = MacroBeginInfo {
marker_id,
trigger_token_id: trigger_token.id,
macro_name,
kind,
call_loc: call_loc.clone(),
is_wrapped,
preserve_call,
};
let begin_token = Token::new(
TokenKind::MacroBegin(Box::new(begin_info)),
call_loc.clone(),
);
let end_info = MacroEndInfo {
begin_marker_id: marker_id,
};
let end_token = Token::new(TokenKind::MacroEnd(end_info), call_loc.clone());
let mut result = Vec::with_capacity(tokens.len() + 2);
result.push(begin_token);
result.extend(tokens);
result.push(end_token);
result
}
fn collect_macro_args(&mut self, param_count: usize, is_variadic: bool) -> Result<Vec<Vec<Token>>, CompileError> {
let mut args = Vec::new();
let mut current_arg = Vec::new();
let mut paren_depth = 0;
loop {
let token = self.next_raw_token()?;
match token.kind {
TokenKind::LParen => {
paren_depth += 1;
current_arg.push(token);
}
TokenKind::RParen => {
if paren_depth == 0 {
if !current_arg.is_empty() || !args.is_empty() {
args.push(current_arg);
}
break;
}
paren_depth -= 1;
current_arg.push(token);
}
TokenKind::Comma if paren_depth == 0 => {
if is_variadic && args.len() >= param_count {
current_arg.push(token);
} else {
args.push(current_arg);
current_arg = Vec::new();
}
}
TokenKind::Eof => {
return Err(CompileError::Preprocess {
loc: token.loc,
kind: PPError::InvalidMacroArgs("unterminated macro arguments".to_string()),
});
}
TokenKind::Newline => continue,
_ => current_arg.push(token),
}
}
Ok(args)
}
fn prescan_args(&mut self, args: &HashMap<InternedStr, Vec<Token>>) -> Result<HashMap<InternedStr, Vec<Token>>, CompileError> {
let mut prescanned = HashMap::new();
for (param, tokens) in args.iter() {
let expanded = self.expand_token_list(tokens)?;
prescanned.insert(*param, expanded);
}
Ok(prescanned)
}
fn expand_token_list(&mut self, tokens: &[Token]) -> Result<Vec<Token>, CompileError> {
self.expand_token_list_internal(tokens, false)
}
fn expand_token_list_preserve_fn(&mut self, tokens: &[Token]) -> Result<Vec<Token>, CompileError> {
self.expand_token_list_internal(tokens, true)
}
fn expand_token_list_internal(
&mut self,
tokens: &[Token],
preserve_function_macros: bool,
) -> Result<Vec<Token>, CompileError> {
if tokens.is_empty() {
return Ok(Vec::new());
}
let saved_lookahead = std::mem::take(&mut self.lookahead);
self.lookahead.push(Token::new(TokenKind::Eof, SourceLocation::default()));
for token in tokens.iter().rev() {
self.lookahead.push(token.clone());
}
let mut result = Vec::new();
while let Some(token) = self.lookahead.pop() {
if matches!(token.kind, TokenKind::Eof) {
break;
}
if matches!(token.kind, TokenKind::Newline) {
continue;
}
if let TokenKind::Ident(id) = token.kind {
if let Some(expanded) = self.try_expand_macro_internal(id, &token, preserve_function_macros)? {
for t in expanded.into_iter().rev() {
self.lookahead.push(t);
}
continue;
}
}
result.push(token);
}
self.lookahead = saved_lookahead;
Ok(result)
}
fn expand_tokens(&mut self, tokens: &[Token], raw_args: &HashMap<InternedStr, Vec<Token>>, prescanned_args: &HashMap<InternedStr, Vec<Token>>) -> Result<Vec<Token>, CompileError> {
let mut result = Vec::new();
let mut i = 0;
while i < tokens.len() {
let token = &tokens[i];
match &token.kind {
TokenKind::Hash if i + 1 < tokens.len() => {
if let TokenKind::Ident(param_id) = tokens[i + 1].kind {
if let Some(arg_tokens) = raw_args.get(¶m_id) {
let stringified = self.stringify_tokens(arg_tokens);
result.push(Token::new(
TokenKind::StringLit(stringified.into_bytes()),
token.loc.clone(),
));
i += 2;
continue;
}
}
return Err(CompileError::Preprocess {
loc: token.loc.clone(),
kind: PPError::InvalidStringize,
});
}
TokenKind::HashHash => {
if result.is_empty() || i + 1 >= tokens.len() {
return Err(CompileError::Preprocess {
loc: token.loc.clone(),
kind: PPError::InvalidTokenPaste,
});
}
let left = result.pop().unwrap();
i += 1;
let right_token = &tokens[i];
let right_tokens = if let TokenKind::Ident(id) = right_token.kind {
if let Some(arg_tokens) = raw_args.get(&id) {
arg_tokens.clone()
} else {
vec![right_token.clone()]
}
} else {
vec![right_token.clone()]
};
let pasted = self.paste_tokens(&left, &right_tokens, &token.loc)?;
result.extend(pasted);
i += 1;
continue;
}
TokenKind::Ident(id) => {
if let Some(arg_tokens) = prescanned_args.get(id) {
result.extend(arg_tokens.iter().cloned());
} else {
result.push(token.clone());
}
}
_ => result.push(token.clone()),
}
i += 1;
}
Ok(result)
}
fn paste_tokens(&mut self, left: &Token, right: &[Token], loc: &SourceLocation) -> Result<Vec<Token>, CompileError> {
let left_str = self.token_to_string(left);
if right.is_empty() {
return Ok(vec![left.clone()]);
}
let right_first_str = self.token_to_string(&right[0]);
let pasted_str = format!("{}{}", left_str, right_first_str);
let pasted_tokens = self.tokenize_string(&pasted_str);
let mut result = pasted_tokens;
result.extend(right.iter().skip(1).cloned());
for t in &mut result {
t.loc = loc.clone();
}
Ok(result)
}
fn token_to_string(&self, token: &Token) -> String {
match &token.kind {
TokenKind::Ident(id) => self.interner.get(*id).to_string(),
TokenKind::IntLit(n) => n.to_string(),
TokenKind::UIntLit(n) => n.to_string(),
TokenKind::FloatLit(f) => f.to_string(),
TokenKind::StringLit(s) => format!("\"{}\"", String::from_utf8_lossy(s)),
TokenKind::CharLit(c) => format!("'{}'", *c as char),
TokenKind::WideCharLit(c) => format!("L'{}'", char::from_u32(*c).unwrap_or('?')),
TokenKind::Plus => "+".to_string(),
TokenKind::Minus => "-".to_string(),
TokenKind::Star => "*".to_string(),
TokenKind::Slash => "/".to_string(),
TokenKind::Percent => "%".to_string(),
TokenKind::Amp => "&".to_string(),
TokenKind::Pipe => "|".to_string(),
TokenKind::Caret => "^".to_string(),
TokenKind::Tilde => "~".to_string(),
TokenKind::Bang => "!".to_string(),
TokenKind::Lt => "<".to_string(),
TokenKind::Gt => ">".to_string(),
TokenKind::Eq => "=".to_string(),
TokenKind::Question => "?".to_string(),
TokenKind::Colon => ":".to_string(),
TokenKind::Dot => ".".to_string(),
TokenKind::Comma => ",".to_string(),
TokenKind::Semi => ";".to_string(),
TokenKind::LParen => "(".to_string(),
TokenKind::RParen => ")".to_string(),
TokenKind::LBracket => "[".to_string(),
TokenKind::RBracket => "]".to_string(),
TokenKind::LBrace => "{".to_string(),
TokenKind::RBrace => "}".to_string(),
TokenKind::Arrow => "->".to_string(),
TokenKind::PlusPlus => "++".to_string(),
TokenKind::MinusMinus => "--".to_string(),
TokenKind::LtLt => "<<".to_string(),
TokenKind::GtGt => ">>".to_string(),
TokenKind::LtEq => "<=".to_string(),
TokenKind::GtEq => ">=".to_string(),
TokenKind::EqEq => "==".to_string(),
TokenKind::BangEq => "!=".to_string(),
TokenKind::AmpAmp => "&&".to_string(),
TokenKind::PipePipe => "||".to_string(),
TokenKind::PlusEq => "+=".to_string(),
TokenKind::MinusEq => "-=".to_string(),
TokenKind::StarEq => "*=".to_string(),
TokenKind::SlashEq => "/=".to_string(),
TokenKind::PercentEq => "%=".to_string(),
TokenKind::AmpEq => "&=".to_string(),
TokenKind::PipeEq => "|=".to_string(),
TokenKind::CaretEq => "^=".to_string(),
TokenKind::LtLtEq => "<<=".to_string(),
TokenKind::GtGtEq => ">>=".to_string(),
TokenKind::Ellipsis => "...".to_string(),
TokenKind::Hash => "#".to_string(),
TokenKind::HashHash => "##".to_string(),
_ => String::new(),
}
}
fn stringify_tokens(&self, tokens: &[Token]) -> String {
let mut result = String::new();
for (i, token) in tokens.iter().enumerate() {
if i > 0 { result.push(' '); }
match &token.kind {
TokenKind::Ident(id) => result.push_str(self.interner.get(*id)),
TokenKind::IntLit(n) => result.push_str(&n.to_string()),
TokenKind::UIntLit(n) => result.push_str(&format!("{}u", n)),
TokenKind::FloatLit(f) => result.push_str(&f.to_string()),
TokenKind::StringLit(s) => {
result.push('"');
result.push_str(&String::from_utf8_lossy(s));
result.push('"');
}
TokenKind::CharLit(c) => {
result.push('\'');
result.push(*c as char);
result.push('\'');
}
_ => result.push_str(&format!("{:?}", token.kind)),
}
}
result
}
pub fn files(&self) -> &FileRegistry {
&self.files
}
pub fn interner(&self) -> &StringInterner {
&self.interner
}
pub fn interner_mut(&mut self) -> &mut StringInterner {
&mut self.interner
}
pub fn macros(&self) -> &MacroTable {
&self.macros
}
pub fn expand_macro_body_for_inference(
&mut self,
body: &[Token],
params: &[InternedStr],
args: &[Vec<Token>],
in_progress: &mut HashSet<InternedStr>,
) -> Result<(Vec<Token>, HashSet<InternedStr>), CompileError> {
let mut called_macros = HashSet::new();
let mut raw_args = HashMap::new();
let mut prescanned_args = HashMap::new();
for (i, ¶m) in params.iter().enumerate() {
if let Some(arg_tokens) = args.get(i) {
raw_args.insert(param, arg_tokens.clone());
let (expanded_arg, arg_called) = self.expand_tokens_for_inference(
arg_tokens,
in_progress,
)?;
called_macros.extend(arg_called);
prescanned_args.insert(param, expanded_arg);
}
}
let substituted = self.expand_tokens(body, &raw_args, &prescanned_args)?;
let (result, more_called) = self.expand_tokens_for_inference(&substituted, in_progress)?;
called_macros.extend(more_called);
Ok((result, called_macros))
}
fn expand_tokens_for_inference(
&mut self,
tokens: &[Token],
in_progress: &mut HashSet<InternedStr>,
) -> Result<(Vec<Token>, HashSet<InternedStr>), CompileError> {
let mut result = Vec::new();
let mut called_macros = HashSet::new();
let mut i = 0;
while i < tokens.len() {
let token = &tokens[i];
if let TokenKind::Ident(id) = token.kind {
if self.skip_expand_macros.contains(&id) {
result.push(token.clone());
i += 1;
continue;
}
if in_progress.contains(&id) {
result.push(token.clone());
i += 1;
continue;
}
if let Some(def) = self.macros.get(id).cloned() {
match &def.kind {
MacroKind::Object => {
called_macros.insert(id);
in_progress.insert(id);
let (expanded, more_called) = self.expand_macro_body_for_inference(
&def.body,
&[],
&[],
in_progress,
)?;
called_macros.extend(more_called);
result.extend(expanded);
in_progress.remove(&id);
i += 1;
continue;
}
MacroKind::Function { params, is_variadic } => {
if let Some((args, consumed)) = self.try_collect_args_from_tokens(&tokens[i + 1..], params.len(), *is_variadic) {
called_macros.insert(id);
if !self.explicit_expand_macros.contains(&id) {
result.push(token.clone());
result.push(Token::new(TokenKind::LParen, token.loc.clone()));
for (arg_idx, arg_tokens) in args.iter().enumerate() {
if arg_idx > 0 {
result.push(Token::new(TokenKind::Comma, token.loc.clone()));
}
let (expanded_arg, arg_called) = self.expand_tokens_for_inference(
arg_tokens,
in_progress,
)?;
called_macros.extend(arg_called);
result.extend(expanded_arg);
}
result.push(Token::new(TokenKind::RParen, token.loc.clone()));
i += 1 + consumed;
continue;
}
in_progress.insert(id);
let (expanded, more_called) = self.expand_macro_body_for_inference(
&def.body,
params,
&args,
in_progress,
)?;
called_macros.extend(more_called);
result.extend(expanded);
in_progress.remove(&id);
i += 1 + consumed;
continue;
} else {
result.push(token.clone());
}
}
}
} else {
result.push(token.clone());
}
} else {
result.push(token.clone());
}
i += 1;
}
Ok((result, called_macros))
}
fn try_collect_args_from_tokens(
&self,
tokens: &[Token],
param_count: usize,
is_variadic: bool,
) -> Option<(Vec<Vec<Token>>, usize)> {
let mut start = 0;
while start < tokens.len() {
match &tokens[start].kind {
TokenKind::Space | TokenKind::Newline => start += 1,
TokenKind::LParen => break,
_ => return None,
}
}
if start >= tokens.len() || !matches!(tokens[start].kind, TokenKind::LParen) {
return None;
}
let mut args: Vec<Vec<Token>> = Vec::new();
let mut current_arg = Vec::new();
let mut paren_depth = 0;
let mut i = start + 1;
while i < tokens.len() {
let token = &tokens[i];
match &token.kind {
TokenKind::LParen => {
paren_depth += 1;
current_arg.push(token.clone());
}
TokenKind::RParen => {
if paren_depth == 0 {
if !current_arg.is_empty() || !args.is_empty() {
args.push(current_arg);
}
return Some((args, i + 1));
}
paren_depth -= 1;
current_arg.push(token.clone());
}
TokenKind::Comma if paren_depth == 0 => {
if is_variadic && args.len() >= param_count.saturating_sub(1) {
current_arg.push(token.clone());
} else {
args.push(current_arg);
current_arg = Vec::new();
}
}
TokenKind::Space | TokenKind::Newline => {
if !current_arg.is_empty() {
current_arg.push(token.clone());
}
}
_ => {
current_arg.push(token.clone());
}
}
i += 1;
}
None
}
fn is_current_file_in_target(&self) -> bool {
let target_dir = match &self.config.target_dir {
Some(dir) => dir,
None => return false,
};
let file_id = match self.sources.last() {
Some(source) => source.file_id,
None => return false,
};
let path = self.files.get_path(file_id);
path.starts_with(target_dir)
}
pub fn collect_tokens(&mut self) -> Result<Vec<Token>, CompileError> {
let mut tokens = Vec::new();
loop {
let token = self.next_token()?;
if matches!(token.kind, TokenKind::Eof) {
break;
}
tokens.push(token);
}
Ok(tokens)
}
}
impl TokenSource for Preprocessor {
fn next_token(&mut self) -> crate::error::Result<Token> {
Preprocessor::next_token(self)
}
fn unget_token(&mut self, token: Token) {
Preprocessor::unget_token(self, token)
}
fn interner(&self) -> &StringInterner {
&self.interner
}
fn interner_mut(&mut self) -> &mut StringInterner {
&mut self.interner
}
fn files(&self) -> &FileRegistry {
&self.files
}
fn is_file_in_target(&self, file_id: crate::source::FileId) -> bool {
let target_dir = match &self.config.target_dir {
Some(dir) => dir,
None => return false,
};
let path = self.files.get_path(file_id);
path.starts_with(target_dir)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
fn create_temp_file(content: &str) -> NamedTempFile {
let mut file = NamedTempFile::new().unwrap();
file.write_all(content.as_bytes()).unwrap();
file
}
fn has_ident(pp: &Preprocessor, tokens: &[Token], name: &str) -> bool {
tokens.iter().any(|t| {
if let TokenKind::Ident(id) = t.kind {
pp.interner().get(id) == name
} else {
false
}
})
}
fn has_keyword(tokens: &[Token], kind: TokenKind) -> bool {
tokens.iter().any(|t| std::mem::discriminant(&t.kind) == std::mem::discriminant(&kind))
}
#[test]
fn test_simple_tokens() {
let file = create_temp_file("int x;");
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
assert_eq!(tokens.len(), 3);
assert!(has_keyword(&tokens, TokenKind::KwInt));
assert!(has_ident(&pp, &tokens, "x"));
}
#[test]
fn test_object_macro() {
let file = create_temp_file("#define VALUE 42\nint x = VALUE;");
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
assert!(tokens.iter().any(|t| matches!(t.kind, TokenKind::IntLit(42))));
}
#[test]
fn test_function_macro() {
let file = create_temp_file("#define ADD(a, b) a + b\nint x = ADD(1, 2);");
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
assert!(tokens.iter().any(|t| matches!(t.kind, TokenKind::Plus)));
}
#[test]
fn test_ifdef() {
let file = create_temp_file("#define FOO\n#ifdef FOO\nint x;\n#endif");
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
assert!(has_keyword(&tokens, TokenKind::KwInt));
}
#[test]
fn test_ifndef() {
let file = create_temp_file("#ifndef BAR\nint x;\n#endif");
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
assert!(has_keyword(&tokens, TokenKind::KwInt));
}
#[test]
fn test_ifdef_else() {
let file = create_temp_file("#ifdef UNDEFINED\nint x;\n#else\nfloat y;\n#endif");
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
assert!(!has_ident(&pp, &tokens, "x"));
assert!(has_keyword(&tokens, TokenKind::KwFloat));
assert!(has_ident(&pp, &tokens, "y"));
}
#[test]
fn test_if_expression() {
let file = create_temp_file("#if 1 + 1 == 2\nint x;\n#endif");
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
assert!(has_keyword(&tokens, TokenKind::KwInt));
}
#[test]
fn test_predefined_macro() {
let config = PPConfig {
predefined: vec![("VERSION".to_string(), Some("100".to_string()))],
..Default::default()
};
let file = create_temp_file("int v = VERSION;");
let mut pp = Preprocessor::new(config);
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
assert!(tokens.iter().any(|t| matches!(t.kind, TokenKind::IntLit(100))));
}
#[test]
fn test_undef() {
let file = create_temp_file("#define FOO 1\n#undef FOO\n#ifdef FOO\nint x;\n#endif");
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
assert!(!has_ident(&pp, &tokens, "x"));
}
#[test]
fn test_nested_ifdef() {
let file = create_temp_file(
"#define A\n#ifdef A\n#ifdef B\nint x;\n#else\nfloat y;\n#endif\n#endif"
);
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
assert!(!has_ident(&pp, &tokens, "x"));
assert!(has_keyword(&tokens, TokenKind::KwFloat));
assert!(has_ident(&pp, &tokens, "y"));
}
#[test]
fn test_no_expand_registry_new() {
let registry = NoExpandRegistry::new();
assert!(registry.is_empty());
assert_eq!(registry.len(), 0);
}
#[test]
fn test_no_expand_registry_add() {
let mut interner = crate::intern::StringInterner::new();
let mut registry = NoExpandRegistry::new();
let token_id = TokenId::next();
let macro_name = interner.intern("FOO");
registry.add(token_id, macro_name);
assert!(registry.is_blocked(token_id, macro_name));
assert_eq!(registry.len(), 1);
}
#[test]
fn test_no_expand_registry_extend() {
let mut interner = crate::intern::StringInterner::new();
let mut registry = NoExpandRegistry::new();
let token_id = TokenId::next();
let macro1 = interner.intern("FOO");
let macro2 = interner.intern("BAR");
let macro3 = interner.intern("BAZ");
registry.extend(token_id, vec![macro1, macro2, macro3]);
assert!(registry.is_blocked(token_id, macro1));
assert!(registry.is_blocked(token_id, macro2));
assert!(registry.is_blocked(token_id, macro3));
}
#[test]
fn test_no_expand_registry_not_blocked() {
let mut interner = crate::intern::StringInterner::new();
let mut registry = NoExpandRegistry::new();
let token_id = TokenId::next();
let other_token_id = TokenId::next();
let macro_name = interner.intern("FOO");
let other_macro = interner.intern("BAR");
registry.add(token_id, macro_name);
assert!(!registry.is_blocked(other_token_id, macro_name));
assert!(!registry.is_blocked(token_id, other_macro));
}
#[test]
fn test_no_expand_registry_inherit() {
let mut interner = crate::intern::StringInterner::new();
let mut registry = NoExpandRegistry::new();
let token1 = TokenId::next();
let token2 = TokenId::next();
let macro1 = interner.intern("FOO");
let macro2 = interner.intern("BAR");
registry.add(token1, macro1);
registry.add(token1, macro2);
registry.inherit(token1, token2);
assert!(registry.is_blocked(token2, macro1));
assert!(registry.is_blocked(token2, macro2));
}
#[test]
fn test_no_expand_registry_inherit_merge() {
let mut interner = crate::intern::StringInterner::new();
let mut registry = NoExpandRegistry::new();
let token1 = TokenId::next();
let token2 = TokenId::next();
let macro1 = interner.intern("FOO");
let macro2 = interner.intern("BAR");
let macro3 = interner.intern("BAZ");
registry.add(token1, macro1);
registry.add(token2, macro2);
registry.inherit(token1, token2);
assert!(registry.is_blocked(token2, macro1));
assert!(registry.is_blocked(token2, macro2));
assert!(registry.is_blocked(token1, macro1));
assert!(!registry.is_blocked(token1, macro2));
assert!(!registry.is_blocked(token1, macro3));
assert!(!registry.is_blocked(token2, macro3));
}
#[test]
fn test_emit_markers_disabled() {
let file = create_temp_file("#define FOO 42\nint x = FOO;");
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
let has_marker = tokens.iter().any(|t| {
matches!(t.kind, TokenKind::MacroBegin(_) | TokenKind::MacroEnd(_))
});
assert!(!has_marker, "Markers should not be emitted when emit_markers is false");
}
#[test]
fn test_emit_markers_object_macro() {
let file = create_temp_file("#define FOO 42\nint x = FOO;");
let config = PPConfig {
emit_markers: true,
..Default::default()
};
let mut pp = Preprocessor::new(config);
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
let begin_count = tokens.iter().filter(|t| {
matches!(t.kind, TokenKind::MacroBegin(_))
}).count();
let end_count = tokens.iter().filter(|t| {
matches!(t.kind, TokenKind::MacroEnd(_))
}).count();
assert_eq!(begin_count, 1, "Should have exactly one MacroBegin");
assert_eq!(end_count, 1, "Should have exactly one MacroEnd");
for t in &tokens {
if let TokenKind::MacroBegin(info) = &t.kind {
assert_eq!(pp.interner().get(info.macro_name), "FOO");
assert!(matches!(info.kind, MacroInvocationKind::Object));
}
}
}
#[test]
fn test_emit_markers_function_macro() {
let file = create_temp_file("#define ADD(a, b) a + b\nint x = ADD(1, 2);");
let config = PPConfig {
emit_markers: true,
..Default::default()
};
let mut pp = Preprocessor::new(config);
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
let begin_count = tokens.iter().filter(|t| {
matches!(t.kind, TokenKind::MacroBegin(_))
}).count();
let end_count = tokens.iter().filter(|t| {
matches!(t.kind, TokenKind::MacroEnd(_))
}).count();
assert_eq!(begin_count, 1, "Should have exactly one MacroBegin");
assert_eq!(end_count, 1, "Should have exactly one MacroEnd");
for t in &tokens {
if let TokenKind::MacroBegin(info) = &t.kind {
assert_eq!(pp.interner().get(info.macro_name), "ADD");
if let MacroInvocationKind::Function { args } = &info.kind {
assert_eq!(args.len(), 2, "ADD macro should have 2 arguments");
} else {
panic!("Expected Function macro kind");
}
}
}
}
#[test]
fn test_emit_markers_begin_end_matching() {
let file = create_temp_file("#define FOO 1\nint x = FOO;");
let config = PPConfig {
emit_markers: true,
..Default::default()
};
let mut pp = Preprocessor::new(config);
pp.add_source_file(file.path()).unwrap();
let tokens = pp.collect_tokens().unwrap();
let mut begin_marker_id = None;
let mut end_marker_id = None;
for t in &tokens {
match &t.kind {
TokenKind::MacroBegin(info) => {
begin_marker_id = Some(info.marker_id);
}
TokenKind::MacroEnd(info) => {
end_marker_id = Some(info.begin_marker_id);
}
_ => {}
}
}
assert!(begin_marker_id.is_some(), "Should have MacroBegin");
assert!(end_marker_id.is_some(), "Should have MacroEnd");
assert_eq!(
begin_marker_id.unwrap(),
end_marker_id.unwrap(),
"MacroBegin.marker_id should match MacroEnd.begin_marker_id"
);
}
#[test]
fn test_macro_call_watcher_basic() {
let watcher = MacroCallWatcher::new();
assert!(!watcher.was_called());
assert!(watcher.last_args().is_none());
}
#[test]
fn test_macro_call_watcher_object_macro() {
let file = create_temp_file("#define TEST_MACRO 42\nint x = TEST_MACRO;");
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let macro_name = pp.interner_mut().intern("TEST_MACRO");
pp.set_macro_called_callback(macro_name, Box::new(MacroCallWatcher::new()));
let _tokens = pp.collect_tokens().unwrap();
if let Some(cb) = pp.get_macro_called_callback(macro_name) {
if let Some(watcher) = cb.as_any().downcast_ref::<MacroCallWatcher>() {
assert!(watcher.was_called(), "TEST_MACRO should have been called");
assert!(watcher.last_args().is_none());
} else {
panic!("Failed to downcast to MacroCallWatcher");
}
} else {
panic!("Callback not found");
}
}
#[test]
fn test_macro_call_watcher_function_macro() {
let file = create_temp_file("#define ADD(a, b) a + b\nint x = ADD(10, 20);");
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let macro_name = pp.interner_mut().intern("ADD");
pp.set_macro_called_callback(macro_name, Box::new(MacroCallWatcher::new()));
let _tokens = pp.collect_tokens().unwrap();
if let Some(cb) = pp.get_macro_called_callback(macro_name) {
if let Some(watcher) = cb.as_any().downcast_ref::<MacroCallWatcher>() {
assert!(watcher.was_called(), "ADD should have been called");
let args = watcher.last_args();
assert!(args.is_some(), "Function macro should have arguments");
let args = args.unwrap();
assert_eq!(args.len(), 2, "ADD has 2 arguments");
assert_eq!(args[0], "10");
assert_eq!(args[1], "20");
} else {
panic!("Failed to downcast to MacroCallWatcher");
}
} else {
panic!("Callback not found");
}
}
#[test]
fn test_macro_call_watcher_clear() {
let file = create_temp_file("#define FOO(x) x\nint a = FOO(1);\nint b = FOO(2);");
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let macro_name = pp.interner_mut().intern("FOO");
pp.set_macro_called_callback(macro_name, Box::new(MacroCallWatcher::new()));
let mut count = 0;
while count < 5 {
if pp.next_token().unwrap().kind == TokenKind::Eof {
break;
}
count += 1;
}
{
let cb = pp.get_macro_called_callback(macro_name).unwrap();
let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
assert!(watcher.was_called());
let args = watcher.last_args().unwrap();
assert_eq!(args[0], "1");
}
{
let cb = pp.get_macro_called_callback_mut(macro_name).unwrap();
let watcher = cb.as_any_mut().downcast_mut::<MacroCallWatcher>().unwrap();
watcher.clear();
}
{
let cb = pp.get_macro_called_callback(macro_name).unwrap();
let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
assert!(!watcher.was_called());
assert!(watcher.last_args().is_none());
}
}
#[test]
fn test_macro_call_watcher_take_called() {
let file = create_temp_file("#define BAR 99\nint x = BAR;");
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let macro_name = pp.interner_mut().intern("BAR");
pp.set_macro_called_callback(macro_name, Box::new(MacroCallWatcher::new()));
let _tokens = pp.collect_tokens().unwrap();
{
let cb = pp.get_macro_called_callback(macro_name).unwrap();
let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
assert!(watcher.take_called(), "First take_called should return true");
assert!(!watcher.take_called(), "Second take_called should return false");
}
}
#[test]
fn test_macro_call_watcher_multiple_macros() {
let file = create_temp_file(
"#define A(x) x\n#define B(x) x\n#define C(x) x\nint a = A(1); int b = B(2);"
);
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let macro_a = pp.interner_mut().intern("A");
let macro_b = pp.interner_mut().intern("B");
let macro_c = pp.interner_mut().intern("C");
pp.set_macro_called_callback(macro_a, Box::new(MacroCallWatcher::new()));
pp.set_macro_called_callback(macro_b, Box::new(MacroCallWatcher::new()));
pp.set_macro_called_callback(macro_c, Box::new(MacroCallWatcher::new()));
let _tokens = pp.collect_tokens().unwrap();
{
let cb = pp.get_macro_called_callback(macro_a).unwrap();
let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
assert!(watcher.was_called(), "A should have been called");
}
{
let cb = pp.get_macro_called_callback(macro_b).unwrap();
let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
assert!(watcher.was_called(), "B should have been called");
}
{
let cb = pp.get_macro_called_callback(macro_c).unwrap();
let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
assert!(!watcher.was_called(), "C should not have been called");
}
}
#[test]
fn test_macro_call_watcher_sv_head_pattern() {
let file = create_temp_file(
"#define _SV_HEAD(type) void *sv_any; type *sv_type\n\
struct sv { _SV_HEAD(SV); };\n\
struct av { _SV_HEAD(AV); };\n\
struct other { int x; };"
);
let mut pp = Preprocessor::new(PPConfig::default());
pp.add_source_file(file.path()).unwrap();
let sv_head = pp.interner_mut().intern("_SV_HEAD");
pp.set_macro_called_callback(sv_head, Box::new(MacroCallWatcher::new()));
let mut sv_family_members = Vec::new();
let mut current_struct: Option<String> = None;
loop {
let token = pp.next_token().unwrap();
if token.kind == TokenKind::Eof {
break;
}
if token.kind == TokenKind::KwStruct {
if let Some(cb) = pp.get_macro_called_callback_mut(sv_head) {
let watcher = cb.as_any_mut().downcast_mut::<MacroCallWatcher>().unwrap();
watcher.clear();
}
let name_token = pp.next_token().unwrap();
if let TokenKind::Ident(id) = name_token.kind {
current_struct = Some(pp.interner().get(id).to_string());
}
}
if token.kind == TokenKind::Semi {
if let Some(ref struct_name) = current_struct {
if let Some(cb) = pp.get_macro_called_callback(sv_head) {
let watcher = cb.as_any().downcast_ref::<MacroCallWatcher>().unwrap();
if watcher.was_called() {
sv_family_members.push(struct_name.clone());
}
}
}
current_struct = None;
}
}
assert!(sv_family_members.contains(&"sv".to_string()), "sv should be SV family");
assert!(sv_family_members.contains(&"av".to_string()), "av should be SV family");
assert!(!sv_family_members.contains(&"other".to_string()), "other should not be SV family");
}
}