use lazy_static::lazy_static;
use std::borrow::Cow;
use std::collections::{HashMap, VecDeque};
use std::convert::TryFrom;
use std::path::{Path, PathBuf};
use std::rc::Rc;
use super::files::FileProcessor;
use super::replace::{replace, Definition, Definitions};
use super::{Lexer, Token};
use crate::arch::TARGET;
use crate::data::error::CppError;
use crate::data::lex::{Keyword, Literal};
use crate::data::*;
use crate::get_str;
use crate::Files;
pub struct PreProcessorBuilder<'a> {
buf: Rc<str>,
filename: PathBuf,
debug: bool,
search_path: Vec<Cow<'a, Path>>,
definitions: Definitions,
}
impl<'a> PreProcessorBuilder<'a> {
pub fn new<S: Into<Rc<str>>>(buf: S) -> PreProcessorBuilder<'a> {
PreProcessorBuilder {
debug: false,
filename: PathBuf::default(),
buf: buf.into(),
search_path: Vec::new(),
definitions: Definitions::new(),
}
}
pub fn filename<P: Into<PathBuf>>(mut self, name: P) -> Self {
self.filename = name.into();
self
}
pub fn debug(mut self, yes: bool) -> Self {
self.debug = yes;
self
}
pub fn search_path<C: Into<Cow<'a, Path>>>(mut self, path: C) -> Self {
self.search_path.push(path.into());
self
}
pub fn definition<D: Into<Definition>>(mut self, name: InternedStr, def: D) -> Self {
self.definitions.insert(name, def.into());
self
}
pub fn build(self) -> PreProcessor<'a> {
PreProcessor::new(
self.buf,
self.filename,
self.debug,
self.search_path,
self.definitions,
)
}
}
pub struct PreProcessor<'a> {
error_handler: ErrorHandler,
nested_ifs: Vec<IfState>,
pending: VecDeque<Locatable<PendingToken>>,
search_path: Vec<Cow<'a, Path>>,
definitions: Definitions,
file_processor: FileProcessor,
}
enum PendingToken {
Replaced(Token),
NeedsReplacement(Token),
}
impl From<Token> for CppToken {
fn from(t: Token) -> CppToken {
CppToken::Token(t)
}
}
impl From<Vec<Token>> for Definition {
fn from(tokens: Vec<Token>) -> Definition {
Definition::Object(tokens)
}
}
impl TryFrom<&str> for Definition {
type Error = error::LexError;
fn try_from(value: &str) -> Result<Self, Self::Error> {
let value = Rc::from(format!("{}\n", value));
let mut files = codespan::Files::new();
let dummy_id = files.add("<impl TryFrom<&str> for Definition>", Rc::clone(&value));
let lexer = Lexer::new(dummy_id, value, false);
lexer
.map(|res| match res {
Ok(loc) => Ok(loc.data),
Err(err) => Err(err.data),
})
.collect::<Result<_, _>>()
.map(Definition::Object)
}
}
#[derive(Debug)]
enum IfState {
If,
Elif,
Else,
}
pub(super) type CppResult<T> = Result<Locatable<T>, CompileError>;
impl Iterator for PreProcessor<'_> {
type Item = CppResult<Token>;
fn next(&mut self) -> Option<Self::Item> {
loop {
let replacement = if let Some(err) = self.error_handler.pop_front() {
return Some(Err(err));
} else if let Some(token) = self.pending.pop_front() {
self.handle_token(token.data, token.location)
} else {
match self.next_cpp_token()? {
Err(err) => return Some(Err(err)),
Ok(loc) => match loc.data {
CppToken::Directive(directive) => {
let start = loc.location.span.start;
match self.directive(directive, start) {
Err(err) => return Some(Err(err)),
Ok(()) => continue,
}
}
CppToken::Token(token) => {
self.handle_token(PendingToken::NeedsReplacement(token), loc.location)
}
},
}
};
if let Some(token) = replacement {
return Some(token);
}
}
}
}
impl<'a> PreProcessor<'a> {
fn handle_token(
&mut self,
token: PendingToken,
location: Location,
) -> Option<CppResult<Token>> {
let mut token = {
match token {
PendingToken::Replaced(t) => Some(Ok(Locatable::new(t, location))),
PendingToken::NeedsReplacement(token) => {
let mut replacement_list =
replace(&self.definitions, token, &mut self.file_processor, location)
.into_iter();
let first = replacement_list.next();
for remaining in replacement_list {
match remaining {
Err(err) => self.error_handler.push_back(err),
Ok(token) => self.pending.push_back(token.map(PendingToken::Replaced)),
}
}
first
}
}
};
if let Some(Ok(Locatable {
data: data @ Token::Id(_),
..
})) = &mut token
{
if let Token::Id(name) = &data {
if let Some(keyword) = KEYWORDS.get(get_str!(name)) {
*data = Token::Keyword(*keyword);
}
}
}
token
}
pub fn new<'search: 'a, I: IntoIterator<Item = Cow<'search, Path>>, S: Into<Rc<str>>>(
chars: S,
filename: impl Into<std::ffi::OsString>,
debug: bool,
user_search_path: I,
user_definitions: HashMap<InternedStr, Definition>,
) -> Self {
let system_path = format!(
"{}-{}-{}",
TARGET.architecture, TARGET.operating_system, TARGET.environment
);
let int = |i| Definition::Object(vec![Token::Literal(Literal::Int(i))]);
#[allow(clippy::inconsistent_digit_grouping)]
let mut definitions = map! {
format!("__{}__", TARGET.architecture).into() => int(1),
format!("__{}__", TARGET.operating_system).into() => int(1),
"__STDC__".into() => int(1),
"__STDC_HOSTED__".into() => int(1),
"__STDC_VERSION__".into() => int(2011_12),
"__STDC_NO_ATOMICS__".into() => int(1),
"__STDC_NO_COMPLEX__".into() => int(1),
"__STDC_NO_THREADS__".into() => int(1),
"__STDC_NO_VLA__".into() => int(1),
};
definitions.extend(user_definitions);
let mut search_path = vec![
PathBuf::from(format!("/usr/local/include/{}", system_path)).into(),
Path::new("/usr/local/include").into(),
PathBuf::from(format!("/usr/include/{}", system_path)).into(),
Path::new("/usr/include").into(),
];
search_path.extend(user_search_path.into_iter());
let file_processor = FileProcessor::new(chars, filename, debug);
Self {
error_handler: Default::default(),
nested_ifs: Default::default(),
pending: Default::default(),
search_path,
definitions,
file_processor,
}
}
pub fn warnings(&mut self) -> VecDeque<CompileWarning> {
let mut warnings = std::mem::take(&mut self.error_handler.warnings);
warnings.extend(std::mem::take(
&mut self.file_processor.error_handler.warnings,
));
warnings
}
pub fn eof(&self) -> Location {
self.file_processor.eof()
}
pub fn into_files(self) -> Files {
self.file_processor.into_files()
}
fn span(&self, start: u32) -> Location {
self.file_processor.span(start)
}
fn lexer(&mut self) -> &Lexer {
self.file_processor.lexer()
}
fn lexer_mut(&mut self) -> &mut Lexer {
self.file_processor.lexer_mut()
}
fn line(&self) -> usize {
self.file_processor.line()
}
fn tokens_until_newline(&mut self, whitespace: bool) -> Vec<CompileResult<Locatable<Token>>> {
self.file_processor.tokens_until_newline(whitespace)
}
fn is_whitespace(res: &CppResult<Token>) -> bool {
matches!(
res,
Ok(Locatable {
data: Token::Whitespace(_),
..
})
)
}
fn is_not_whitespace(res: &CppResult<Token>) -> bool {
!PreProcessor::is_whitespace(res)
}
fn next_cpp_token(&mut self) -> Option<CppResult<CppToken>> {
let next_token = self.file_processor.next()?;
let is_hash = match next_token {
Ok(Locatable {
data: Token::Hash, ..
}) => true,
_ => false,
};
Some(if is_hash && !self.file_processor.seen_line_token() {
let line = self.file_processor.line();
match self.file_processor.next_non_whitespace()? {
Ok(Locatable {
data: Token::Id(id),
location,
}) if self.file_processor.line() == line => {
if let Ok(directive) = DirectiveKind::try_from(get_str!(id)) {
Ok(Locatable::new(CppToken::Directive(directive), location))
} else {
Err(Locatable::new(CppError::InvalidDirective.into(), location))
}
}
Ok(other) => {
if self.file_processor.line() == line {
Err(other.map(|tok| CppError::UnexpectedToken("directive", tok).into()))
} else {
Ok(other.into())
}
}
other => other.map(Locatable::from),
}
} else {
next_token.map(Locatable::from)
})
}
fn expect_id(&mut self) -> CppResult<InternedStr> {
let location = self.file_processor.span(self.file_processor.offset());
match self.file_processor.next() {
Some(Ok(Locatable {
data: Token::Id(name),
location,
})) => Ok(Locatable::new(name, location)),
Some(Err(err)) => Err(err),
Some(Ok(other)) => {
Err(other.map(|tok| CppError::UnexpectedToken("identifier", tok).into()))
}
None => Err(CompileError {
data: CppError::EndOfFile("identifier").into(),
location,
}),
}
}
fn directive(&mut self, kind: DirectiveKind, start: u32) -> Result<(), CompileError> {
use crate::data::error::Warning as WarningDiagnostic;
use DirectiveKind::*;
match kind {
If => {
let condition = self.boolean_expr()?;
self.if_directive(condition, start)
}
IfNDef => {
self.consume_whitespace_oneline(start, CppError::ExpectedMacroId)?;
let name = self.expect_id()?;
self.if_directive(!self.definitions.contains_key(&name.data), start)
}
IfDef => {
self.consume_whitespace_oneline(start, CppError::ExpectedMacroId)?;
let name = self.expect_id()?;
self.if_directive(self.definitions.contains_key(&name.data), start)
}
Elif => match self.nested_ifs.last() {
None => Err(CompileError::new(
CppError::UnexpectedElif { early: true }.into(),
self.span(start),
)),
Some(IfState::If) | Some(IfState::Elif) => self.consume_directive(start, false),
Some(IfState::Else) => Err(CompileError::new(
CppError::UnexpectedElif { early: false }.into(),
self.span(start),
)),
},
Else => match self.nested_ifs.last() {
None => Err(CompileError::new(
CppError::UnexpectedElse.into(),
self.span(start),
)),
Some(IfState::If) | Some(IfState::Elif) => self.consume_directive(start, false),
Some(IfState::Else) => Err(CompileError::new(
CppError::UnexpectedElse.into(),
self.span(start),
)),
},
EndIf => {
if self.nested_ifs.pop().is_none() {
Err(CompileError::new(
CppError::UnexpectedEndIf.into(),
self.span(start),
))
} else {
Ok(())
}
}
Define => self.define(start),
Undef => {
self.consume_whitespace_oneline(start, CppError::EmptyExpression)?;
let name = self.expect_id()?;
self.definitions.remove(&name.data);
Ok(())
}
Pragma => {
self.error_handler
.warn(WarningDiagnostic::IgnoredPragma, self.span(start));
drop(self.tokens_until_newline(false));
Ok(())
}
Warning => {
let tokens: Vec<_> = self
.tokens_until_newline(false)
.into_iter()
.map(|res| res.map(|l| l.data))
.collect::<Result<_, _>>()?;
self.error_handler
.warn(WarningDiagnostic::User(tokens), self.span(start));
Ok(())
}
Error => {
let tokens: Vec<_> = self
.tokens_until_newline(false)
.into_iter()
.map(|res| res.map(|l| l.data))
.collect::<Result<_, _>>()?;
self.error_handler
.error(CppError::User(tokens), self.span(start));
Ok(())
}
Line => {
self.error_handler.warn(
WarningDiagnostic::Generic("#line is not yet implemented".into()),
self.span(start),
);
drop(self.tokens_until_newline(false));
Ok(())
}
Include => self.include(start),
}
}
fn boolean_expr(&mut self) -> Result<bool, CompileError> {
let start = self.file_processor.offset();
let lex_tokens: Vec<_> = self
.tokens_until_newline(false)
.into_iter()
.collect::<Result<_, CompileError>>()?;
let location = self.span(start);
match Self::cpp_expr(&self.definitions, lex_tokens.into_iter(), location)?
.truthy(&mut self.error_handler)
.constexpr()?
.data
{
(Literal::Int(i), Type::Bool) => Ok(i != 0),
_ => unreachable!("bug in const_fold or parser: cpp cond should be boolean"),
}
}
fn defined(
mut lex_tokens: impl Iterator<Item = Locatable<Token>>,
location: Location,
) -> Result<InternedStr, CompileError> {
enum State {
Start,
SawParen,
SawId(InternedStr),
};
use State::*;
let mut state = Start;
loop {
return match lex_tokens.next() {
None => Err(CompileError::new(
CppError::EndOfFile("defined(identifier)").into(),
location,
)),
Some(Locatable {
data: Token::Id(def),
location,
}) => match state {
Start => Ok(def),
SawParen => {
state = SawId(def);
continue;
}
SawId(_) => Err(CompileError::new(
CppError::UnexpectedToken("right paren", Token::Id(def)).into(),
location,
)),
},
Some(Locatable {
data: Token::LeftParen,
location,
}) => match state {
Start => {
state = SawParen;
continue;
}
_ => Err(CompileError::new(
CppError::UnexpectedToken("identifier or right paren", Token::LeftParen)
.into(),
location,
)),
},
Some(Locatable {
data: Token::RightParen,
location,
}) => match state {
Start => Err(CompileError::new(
CppError::UnexpectedToken("identifier or left paren", Token::RightParen)
.into(),
location,
)),
SawParen => Err(CompileError::new(
CppError::UnexpectedToken("identifier", Token::RightParen).into(),
location,
)),
SawId(def) => Ok(def),
},
Some(other) => Err(CompileError::new(
CppError::UnexpectedToken("identifier", other.data).into(),
other.location,
)),
};
}
}
pub fn cpp_expr<L>(
definitions: &Definitions,
mut lex_tokens: L,
location: Location,
) -> CompileResult<hir::Expr>
where
L: Iterator<Item = Locatable<Token>>,
{
let mut cpp_tokens = Vec::with_capacity(lex_tokens.size_hint().1.unwrap_or_default());
let defined = "defined".into();
while let Some(token) = lex_tokens.next() {
let token = match token {
Locatable {
data: Token::Id(name),
location,
} if name == defined => {
let def = Self::defined(&mut lex_tokens, location)?;
let literal = if definitions.contains_key(&def) {
Literal::Int(1)
} else {
Literal::Int(0)
};
location.with(Token::Literal(literal))
}
_ => token,
};
cpp_tokens.push(token);
}
let mut expr_location = None;
let cpp_tokens: Vec<_> = cpp_tokens
.into_iter()
.map(|t| replace(definitions, t.data, std::iter::empty(), t.location))
.flatten()
.filter(PreProcessor::is_not_whitespace)
.map(|mut token| {
if let Ok(tok) = &mut token {
expr_location = Some(location.maybe_merge(expr_location));
if let Token::Id(_) = tok.data {
tok.data = Token::Literal(Literal::Int(0));
}
}
token
})
.collect();
if cpp_tokens.is_empty() {
return Err(CompileError::new(
CppError::EmptyExpression.into(),
location,
));
}
use crate::{analyze::PureAnalyzer, Parser};
let mut parser = Parser::new(cpp_tokens.into_iter(), false);
let expr = parser.expr()?;
if !parser.is_empty() {
return Err(CompileError::new(
CppError::TooManyTokens.into(),
expr_location.unwrap(),
));
}
Ok(PureAnalyzer::new().expr(expr))
}
fn if_directive(&mut self, condition: bool, start: u32) -> Result<(), CompileError> {
if condition {
self.nested_ifs.push(IfState::If);
Ok(())
} else {
self.consume_directive(start, true)
}
}
fn consume_directive(&mut self, start: u32, consume_if: bool) -> Result<(), CompileError> {
let mut depth = 1;
while depth > 0 {
let directive = match self.next_cpp_token() {
Some(Ok(Locatable {
data: CppToken::Directive(d),
..
})) => d,
Some(_) => continue,
None => {
return Err(Locatable::new(CppError::UnterminatedIf, self.span(start)).into())
}
};
if directive == DirectiveKind::If
|| directive == DirectiveKind::IfDef
|| directive == DirectiveKind::IfNDef
{
depth += 1;
} else if directive == DirectiveKind::EndIf {
depth -= 1;
} else if depth == 1 {
if consume_if {
if directive == DirectiveKind::Elif {
let condition = self.boolean_expr()?;
if !condition {
continue;
} else {
self.nested_ifs.push(IfState::Elif);
return Ok(());
}
} else if directive == DirectiveKind::Else {
self.nested_ifs.push(IfState::Else);
return Ok(());
}
}
}
}
Ok(())
}
fn fn_args(&mut self, start: u32) -> Result<Vec<InternedStr>, Locatable<Error>> {
let mut arguments = Vec::new();
loop {
match self.file_processor.next_non_whitespace() {
None => {
return Err(CompileError::new(
CppError::EndOfFile("identifier or ')'").into(),
self.lexer().span(start),
));
}
Some(Err(err)) => {
self.error_handler.push_back(err);
continue;
}
Some(Ok(Locatable {
data: Token::Ellipsis,
..
})) => {
let location = self.lexer().span(start);
self.error_handler
.warn(crate::data::error::Warning::IgnoredVariadic, location);
}
Some(Ok(Locatable {
data: Token::Id(id),
..
})) => arguments.push(id),
Some(Ok(Locatable {
data: other,
location,
})) => self.error_handler.error(
CppError::UnexpectedToken("identifier or ')'", other),
location,
),
}
if self.lexer_mut().match_next(')') {
return Ok(arguments);
}
if self.lexer_mut().match_next(',') {
continue;
}
match self.file_processor.next() {
None => {
return Err(CompileError::new(
CppError::EndOfFile("identifier or ')'").into(),
self.lexer().span(start),
))
}
Some(Err(err)) => return Err(err),
Some(Ok(other)) => self.error_handler.error(
CppError::UnexpectedToken("',' or ')'", other.data),
other.location,
),
}
}
}
fn define(&mut self, start: u32) -> Result<(), Locatable<Error>> {
let body = |this: &mut PreProcessor| {
this.tokens_until_newline(true)
.into_iter()
.skip_while(PreProcessor::is_whitespace) .map(|res| res.map(|loc| loc.data))
.collect::<Result<Vec<_>, Locatable<Error>>>()
};
self.consume_whitespace_oneline(start, CppError::EmptyDefine)?;
let id = self.expect_id()?;
if self.lexer_mut().match_next('(') {
self.consume_whitespace_oneline(
self.file_processor.offset(),
CppError::Expected(")", "macro parameter list"),
)?;
let params = if !self.lexer_mut().match_next(')') {
self.fn_args(start)?
} else {
Vec::new()
};
let body = body(self)?;
self.define_macro(id.data, Definition::Function { params, body })
.map_err(|e| self.span(start).with(e))?;
Ok(())
} else {
let tokens = body(self)?;
self.define_macro(id.data, Definition::Object(tokens))
.map_err(|e| self.span(start).with(e))?;
Ok(())
}
}
fn define_macro(&mut self, name: InternedStr, definition: Definition) -> Result<(), CppError> {
use std::collections::hash_map::Entry;
match self.definitions.entry(name) {
Entry::Vacant(entry) => {
entry.insert(definition);
Ok(())
}
Entry::Occupied(entry) => {
if entry.get() != &definition {
Err(CppError::IncompatibleRedefinition(name))
} else {
Ok(())
}
}
}
}
fn include(&mut self, start: u32) -> Result<(), Locatable<Error>> {
use crate::data::lex::ComparisonToken;
self.consume_whitespace_oneline(start, CppError::EmptyInclude)?;
let lexer = self.lexer_mut();
let local = if lexer.match_next('"') {
true
} else if lexer.match_next('<') {
false
} else {
let (id, location) = match self.file_processor.next() {
Some(Ok(Locatable {
data: Token::Id(id),
location,
})) => (id, location),
Some(Err(err)) => return Err(err),
Some(Ok(other)) => {
return Err(CompileError::new(
CppError::UnexpectedToken("include file", other.data).into(),
other.location,
))
}
None => {
return Err(CompileError::new(
CppError::EndOfFile("include file").into(),
self.span(start),
))
}
};
match replace(
&self.definitions,
Token::Id(id),
&mut self.file_processor,
location,
)
.into_iter()
.next()
{
Some(Ok(Locatable {
data: Token::Literal(Literal::Str(_)),
..
})) => unimplemented!("#include for macros"), Some(Ok(Locatable {
data: Token::Comparison(ComparisonToken::Less),
..
})) => unimplemented!("#include for macros"),
Some(Err(err)) => return Err(err),
Some(Ok(other)) => {
return Err(CompileError::new(
CppError::UnexpectedToken("include file", other.data).into(),
other.location,
))
}
None => {
return Err(CompileError::new(
CppError::EndOfFile("include file").into(),
self.span(start),
))
}
}
};
let end = if local { '"' } else { '>' };
let filename = PathBuf::from(self.chars_until(end).to_owned());
self.include_path(filename, local, start)
}
fn find_include_path(
&mut self,
filename: &Path,
local: bool,
start: u32,
) -> Result<PathBuf, Locatable<Error>> {
if filename.as_os_str().is_empty() {
return Err(CompileError::new(
CppError::EmptyInclude.into(),
self.span(start),
));
}
let not_found = |this: &Self, filename: &Path| {
Err(this.span(start).error(CppError::FileNotFound(
filename.to_string_lossy().to_string(),
)))
};
if filename.is_absolute() {
return if filename.exists() {
Ok(filename.to_owned())
} else {
not_found(self, filename)
};
}
if local {
let current_path = self.file_processor.path();
let relative_path = ¤t_path
.parent()
.unwrap_or_else(|| std::path::Path::new(""));
let resolved = relative_path.join(filename);
if resolved.exists() {
return Ok(resolved);
}
}
for path in &self.search_path {
let mut buf = path.clone().into_owned();
buf.push(filename);
if buf.exists() {
return Ok(buf);
}
}
not_found(self, filename)
}
fn include_path(
&mut self,
filename: PathBuf,
local: bool,
start: u32,
) -> Result<(), Locatable<Error>> {
let (path, src) = match self.find_include_path(&filename, local, start) {
Ok(path) => {
let src = std::fs::read_to_string(&path)
.map_err(|err| Locatable {
data: CppError::IO(err.to_string()),
location: self.span(start),
})?
.into();
(path, src)
}
Err(not_found) => {
let filename = match filename.file_name().and_then(|f| f.to_str()) {
None => return Err(not_found),
Some(f) => f,
};
match get_builtin_header(filename) {
Some(file) => {
let mut path = PathBuf::from("<builtin>");
path.push(filename);
(path, Rc::from(file))
}
None => return Err(not_found),
}
}
};
let source = crate::Source {
path,
code: Rc::clone(&src),
};
self.file_processor.add_file(filename, source);
Ok(())
}
fn chars_until(&mut self, end: char) -> &str {
let lexer = self.file_processor.lexer_mut();
let offset = lexer.location.offset as usize;
match lexer.chars[offset..].find(end) {
None => {
lexer.location.offset += (lexer.chars.len() - offset) as u32;
&lexer.chars[offset..]
}
Some(idx) => {
lexer.location.offset += idx as u32;
let s = &lexer.chars[offset..lexer.location.offset as usize];
lexer.location.offset += 1; s
}
}
}
pub fn next_non_whitespace(&mut self) -> Option<CppResult<Token>> {
loop {
match self.next() {
Some(Ok(Locatable {
data: Token::Whitespace(_),
..
})) => continue,
other => break other,
}
}
}
#[inline]
fn consume_whitespace_oneline(
&mut self,
start: u32,
error: CppError,
) -> Result<String, CompileError> {
let line = self.line();
let ret = self.file_processor.consume_whitespace();
if self.line() != line {
return Err(self.span(start).error(error));
}
Ok(ret)
}
}
macro_rules! built_in_headers {
( $($filename: literal),+ $(,)? ) => {
[
$( ($filename, include_str!(concat!("../../headers/", $filename))) ),+
]
};
}
const PRECOMPILED_HEADERS: [(&str, &str); 2] = built_in_headers! {
"stdarg.h",
"stddef.h",
};
fn get_builtin_header(expected: impl AsRef<str>) -> Option<&'static str> {
PRECOMPILED_HEADERS
.iter()
.find(|&(path, _)| path == &expected.as_ref())
.map(|x| x.1)
}
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum DirectiveKind {
If,
IfDef,
IfNDef,
Elif,
Else,
EndIf,
Include,
Define,
Undef,
Line,
Warning,
Error,
Pragma,
}
#[derive(Clone, Debug, PartialEq)]
enum CppToken {
Token(Token),
Directive(DirectiveKind),
}
impl From<Locatable<Token>> for Locatable<CppToken> {
fn from(token: Locatable<Token>) -> Locatable<CppToken> {
token.map(CppToken::Token)
}
}
impl TryFrom<&str> for DirectiveKind {
type Error = ();
fn try_from(s: &str) -> Result<Self, ()> {
use DirectiveKind::*;
Ok(match s {
"if" => If,
"elif" => Elif,
"endif" => EndIf,
"else" => Else,
"ifdef" => IfDef,
"ifndef" => IfNDef,
"include" => Include,
"define" => Define,
"undef" => Undef,
"line" => Line,
"warning" => Warning,
"error" => Error,
"pragma" => Pragma,
_ => return Err(()),
})
}
}
lazy_static! {
static ref KEYWORDS: HashMap<&'static str, Keyword> = map!{
"if" => Keyword::If,
"else" => Keyword::Else,
"do" => Keyword::Do,
"while" => Keyword::While,
"for" => Keyword::For,
"switch" => Keyword::Switch,
"case" => Keyword::Case,
"default" => Keyword::Default,
"break" => Keyword::Break,
"continue" => Keyword::Continue,
"return" => Keyword::Return,
"goto" => Keyword::Goto,
"__builtin_va_list" => Keyword::VaList,
"_Bool" => Keyword::Bool,
"char" => Keyword::Char,
"short" => Keyword::Short,
"int" => Keyword::Int,
"long" => Keyword::Long,
"float" => Keyword::Float,
"double" => Keyword::Double,
"_Complex" => Keyword::Complex,
"_Imaginary" => Keyword::Imaginary,
"void" => Keyword::Void,
"signed" => Keyword::Signed,
"unsigned" => Keyword::Unsigned,
"typedef" => Keyword::Typedef,
"enum" => Keyword::Enum,
"union" => Keyword::Union,
"struct" => Keyword::Struct,
"const" => Keyword::Const,
"volatile" => Keyword::Volatile,
"restrict" => Keyword::Restrict,
"_Atomic" => Keyword::Atomic,
"_Thread_local" => Keyword::ThreadLocal,
"inline" => Keyword::Inline,
"_Noreturn" => Keyword::NoReturn,
"auto" => Keyword::Auto,
"register" => Keyword::Register,
"static" => Keyword::Static,
"extern" => Keyword::Extern,
"sizeof" => Keyword::Sizeof,
"_Alignof" => Keyword::Alignof,
"_Alignas" => Keyword::Alignas,
"_Generic" => Keyword::Generic,
"_Static_assert" => Keyword::StaticAssert,
};
}
#[cfg(test)]
mod tests {
use super::*;
use crate::data::lex::test::{cpp, cpp_no_newline};
macro_rules! assert_err {
($src: expr, $err: pat, $description: expr $(,)?) => {
match cpp($src).next_non_whitespace().unwrap().unwrap_err().data {
Error::PreProcessor($err) => {}
Error::PreProcessor(other) => panic!("expected {}, got {}", $description, other),
_ => panic!("expected cpp err"),
}
};
}
fn assert_keyword(token: Option<CppResult<Token>>, expected: Keyword) {
match token {
Some(Ok(Locatable {
data: Token::Keyword(actual),
..
})) => assert_eq!(actual, expected),
_ => panic!("not a keyword: {:?}", token),
}
}
fn is_same_preprocessed(xs: PreProcessor, ys: PreProcessor) -> bool {
let to_vec = |xs: PreProcessor| {
xs.filter(PreProcessor::is_not_whitespace)
.map(|res| res.map(|token| token.data))
.collect::<Vec<_>>()
};
to_vec(xs) == to_vec(ys)
}
fn assert_same(src: &str, cpp_src: &str) {
assert!(
is_same_preprocessed(cpp(src), cpp(cpp_src)),
"{} is not the same as {}",
src,
cpp_src,
);
}
fn assert_same_exact(src: &str, cpp_src: &str) {
let pprint = cpp(src)
.filter_map(|res| res.ok().map(|token| token.data.to_string()))
.collect::<Vec<_>>()
.join("");
assert_eq!(pprint, format!("{}\n", cpp_src)); }
#[test]
fn keywords() {
for keyword in KEYWORDS.values() {
if *keyword != Keyword::VaList {
println!("{}", keyword);
assert_keyword(cpp(&keyword.to_string()).next(), *keyword);
}
}
}
#[test]
fn if_directive() {
assert_same(
"
#if a
b
#else
c
#endif",
"c",
);
assert_same(
"
#if 0 + 2
b
#endif",
"b",
);
assert_same(
"
#if 1^1
b
#endif",
"",
);
}
#[test]
fn ifdef() {
let code = "#ifdef a
whatever, doesn't matter
#endif";
assert_eq!(cpp(code).next_non_whitespace(), None);
let code = "#ifdef a\n#endif";
assert_eq!(cpp(code).next_non_whitespace(), None);
assert!(cpp("#ifdef").next_non_whitespace().unwrap().is_err());
let nested = "#ifdef a
#ifdef b
int main() {}
#endif
#endif
char;";
assert_eq!(
cpp(nested).next_non_whitespace().unwrap().unwrap().data,
Token::Keyword(Keyword::Char)
);
assert!(cpp("#endif").next_non_whitespace().unwrap().is_err());
let same_line = "#ifdef a #endif\nint main() {}";
assert!(cpp(same_line).next_non_whitespace().unwrap().is_err());
}
#[test]
fn ifndef() {
let src = "
#ifndef A
#define A
#endif
A";
assert!(cpp(src).next_non_whitespace().is_none());
}
#[test]
fn object_macros() {
let src = "
#define a b
int a() { return 1; }";
let cpp_src = "int b() { return 1; }";
assert_same(src, cpp_src);
let multidef = "
#define a b + c
int d() { return a; }";
assert_same(multidef, "int d() { return b + c; }");
let opdef = "
#define BEGIN {
#define END }
int f() BEGIN return 5; END";
assert_same(opdef, "int f() { return 5; }");
}
#[test]
fn recursive_macros() {
assert_same("#define a a\na", "a");
assert_same("#define a a + b\na", "a + b");
let mutual_recursion = "
#define a b
#define b a
a";
assert_same(mutual_recursion, "a");
let mutual_recursion_2 = "
#define a b
#define b c
#define c a
a";
assert_same(mutual_recursion_2, "a");
let mutual_recursion_3 = "
#define a b
#define b c
#define c b
a";
assert_same(mutual_recursion_3, "b");
assert_same("#define a \n a", "");
}
#[test]
fn empty_def() {
assert_err!("#define", CppError::EmptyDefine, "empty define");
assert_err!(
"#define
int",
CppError::EmptyDefine,
"empty define",
);
}
#[test]
fn redefinition() {
let src = "
#define a b
#define a c
a
";
assert_err!(
src,
CppError::IncompatibleRedefinition(_),
"incompatible redfinition"
);
let src = "
#define a b
#define a
a
";
assert_err!(
src,
CppError::IncompatibleRedefinition(_),
"incompatible redefinition"
);
let src = "
#define a b
#define a b
a
";
assert_same(src, "b");
let src = "
#define a(b) b+1
#define a(b) b+2
a(2)
";
assert_err!(
src,
CppError::IncompatibleRedefinition(_),
"incompatible redefinition"
);
let src = "
#define a(b) b+1
#define a(c) c+1
a(2)
";
assert_err!(
src,
CppError::IncompatibleRedefinition(_),
"incompatible redefinition"
);
let src = "
#define a(b) b+1
#define a(b) b+1
a(2)
";
assert_same(src, "2+1");
}
#[test]
fn undef() {
let src = "
#define a b
a
#undef a
a";
assert_same(src, "b a");
let src = "
#define a
#undef a
a
";
assert_same(src, "a");
}
#[test]
fn else_directive() {
use super::CppError;
let src = "
#if 1
#if 0
b
#else
// this should be an error
#else
d
#endif
";
assert_err!(src, CppError::UnexpectedElse, "duplicate else",);
}
#[test]
fn elif() {
let src = "
#define __WORDSIZE 64
#if 0
wrong1
#elif __WORDSIZE == 64
right
#else
wrong2
#endif
";
assert_same(src, "right");
}
#[test]
fn function_body_replacement() {
let src = "#define a b
#define f(c) a
f(1)";
assert_same(src, "b")
}
#[test]
fn object_body_replacement() {
let src = "#define NULL ((void*)0)
int *p = NULL;";
assert_same(src, "int *p = ((void*)0);")
}
#[test]
fn pragma() {
let src = "#pragma gcc __attribute__((inline))";
assert!(cpp(src).next_non_whitespace().is_none());
}
#[test]
fn line() {
let src = "#line 1";
let mut cpp = cpp(src);
assert!(cpp.next_non_whitespace().is_none());
assert!(cpp.warnings().pop_front().is_some());
}
#[test]
fn warning() {
let src = "#warning your pants are on file";
let mut cpp = cpp(src);
assert!(cpp.next_non_whitespace().is_none());
assert!(cpp.warnings().pop_front().is_some());
}
#[test]
fn error() {
assert_err!("#error cannot drink and drive", CppError::User(_), "#error",);
}
#[test]
fn invalid_directive() {
assert_err!("#wrong", CppError::InvalidDirective, "invalid directive",);
assert_err!("#1", CppError::UnexpectedToken(_, _), "unexpected token",);
assert_err!("#include", CppError::EmptyInclude, "empty include");
assert_err!("#if defined", CppError::EndOfFile(_), "unexpected eof");
for s in &[
"#if defined()",
"#if defined(+)",
"#if defined)",
"#if defined(()",
"#if defined(a a",
] {
assert_err!(s, CppError::UnexpectedToken(_, _), "unexpected token");
}
assert_err!("#if", CppError::EmptyExpression, "empty expression");
}
#[test]
fn str_at_eol() {
let src = r#"
#define a "b"
#define c a
c
"#;
assert_same(src, "\"b\"");
}
#[test]
fn test_comment_newline() {
let tokens = cpp_no_newline(
"
#if 1 //
int main() {}
#endif
",
);
assert!(is_same_preprocessed(tokens, cpp("int main() {}")));
assert_same(
"
#if 1 /**//**/
int main(){}
#endif
",
"int main() {}",
);
}
#[test]
fn cycle_detection() {
let src = "
#define sa_handler __sa_handler.sa_handler
s.sa_handler";
assert_same(src, "s.__sa_handler.sa_handler");
}
#[test]
fn parens() {
let original = "#define f(a, b) a\nf((1, 2, 3), 2)";
let expected = "(1, 2, 3)";
assert_same(original, expected);
let original = "#define foo(x, y) { x, y }\nfoo(5 (6), 7)";
let expected = "{ 5 (6), 7 }";
assert_same(original, expected);
let original = "#define f(a, b, c) a + b + c\nf((((1))), ((2)), (3))";
let expected = "(((1))) + ((2)) + (3)";
assert_same(original, expected);
}
#[test]
fn recursive_function() {
let original = "#define f(a) f(a + 1)\nf(1)";
let expected = "f(1 + 1)";
assert_same(original, expected);
}
#[test]
fn mutually_recursive_function() {
let original = "
#define a(c) b(c)
#define b(c) a(c)
a(1)
";
assert_same(original, "a(1)");
}
#[test]
fn preprocess_only() {
assert_same_exact("int \t\n\r main() {}", "int \t\n\r main() {}");
assert_same_exact("int/* */main() {}", "int main() {}");
assert_same_exact("int/*\n\n\n*/main() {}", "int\n\n\nmain() {}");
assert_same_exact("#define a(c) c\tc\na(1);a(2)", "\n1\t1;2\t2");
assert_same_exact("#define a //\n#if defined a\n x\n#endif", "\n\n x\n");
assert_same_exact("#define x\n#undef x\n x", "\n\n x");
assert_same_exact("#pragma once\n x", "\n x");
assert_same_exact("#warning dont panic\n x", "\n x");
assert_same_exact("#error dont panic\n x", "\n x");
assert_same_exact("#line 1\n x", "\n x");
assert_same_exact(
"---
#define a
---
#if 1
x
y
z
#endif
---
#if 0
x
#endif
---
#ifdef a
x
#endif
---
#ifndef a
x
#endif
---",
"---
---
x
y
z
---
---
x
---
---",
);
}
#[test]
fn space_separated_function_macro() {
assert_same_exact("#define f(a) <a>\nf (a)", "\n<a>");
assert_same_exact("#define f(a) <a>\nf(a)", "\n<a>");
assert_same_exact("#define f(a) <a>\nf", "\nf");
assert_same_exact("#define f(a) <a>\nf ", "\nf ");
assert_same_exact("#define f(a) <a>\nf ;", "\nf ;");
assert_same_exact("#define f(a) <a>\nf;", "\nf;");
assert_same_exact(
"#define f(a) 1
#define h f (2)
h",
"\n\n1",
);
}
#[test]
fn eof_after_macro_call() {
use crate::data::lex::test::cpp_no_newline;
let cpp = cpp_no_newline("#define f(a)\nf")
.filter_map(|res| res.ok().map(|token| token.data.to_string()))
.collect::<Vec<_>>()
.join("");
assert_eq!(cpp, "\nf");
}
}