use crate::error::Location;
use cranelift_codegen::ir::types;
use cranelift_codegen::ir::{Block, Value};
#[allow(unused_imports, deprecated)]
use std::ascii::AsciiExt;
use std::str::CharIndices;
use std::u16;
#[derive(Debug, PartialEq, Eq, Clone, Copy)]
pub enum Token<'a> {
Comment(&'a str),
LPar, RPar, LBrace, RBrace, LBracket, RBracket, Minus, Plus, Multiply, Comma, Dot, Colon, Equal, Not, Arrow, Float(&'a str), Integer(&'a str), Type(types::Type), DynamicType(u32), Value(Value), Block(Block), Cold, StackSlot(u32), DynamicStackSlot(u32), GlobalValue(u32), Table(u32), Constant(u32), FuncRef(u32), SigRef(u32), UserRef(u32), UserNameRef(u32), Name(&'a str), String(&'a str), HexSequence(&'a str), Identifier(&'a str), SourceLoc(&'a str), }
#[derive(Debug, PartialEq, Eq)]
pub struct LocatedToken<'a> {
pub token: Token<'a>,
pub location: Location,
}
fn token(token: Token, loc: Location) -> Result<LocatedToken, LocatedError> {
Ok(LocatedToken {
token,
location: loc,
})
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum LexError {
InvalidChar,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct LocatedError {
pub error: LexError,
pub location: Location,
}
fn error<'a>(error: LexError, loc: Location) -> Result<LocatedToken<'a>, LocatedError> {
Err(LocatedError {
error,
location: loc,
})
}
fn trailing_digits(s: &str) -> usize {
s.as_bytes()
.iter()
.rev()
.take_while(|&&b| b'0' <= b && b <= b'9')
.count()
}
pub fn split_entity_name(name: &str) -> Option<(&str, u32)> {
let (head, tail) = name.split_at(name.len() - trailing_digits(name));
if tail.len() > 1 && tail.starts_with('0') {
None
} else {
tail.parse().ok().map(|n| (head, n))
}
}
pub struct Lexer<'a> {
source: &'a str,
chars: CharIndices<'a>,
lookahead: Option<char>,
pos: usize,
line_number: usize,
}
impl<'a> Lexer<'a> {
pub fn new(s: &'a str) -> Self {
let mut lex = Self {
source: s,
chars: s.char_indices(),
lookahead: None,
pos: 0,
line_number: 1,
};
lex.next_ch();
lex
}
fn next_ch(&mut self) -> Option<char> {
if self.lookahead == Some('\n') {
self.line_number += 1;
}
match self.chars.next() {
Some((idx, ch)) => {
self.pos = idx;
self.lookahead = Some(ch);
}
None => {
self.pos = self.source.len();
self.lookahead = None;
}
}
self.lookahead
}
fn loc(&self) -> Location {
Location {
line_number: self.line_number,
}
}
fn looking_at(&self, prefix: &str) -> bool {
self.source[self.pos..].starts_with(prefix)
}
fn looking_at_numeric(&self) -> bool {
if let Some(c) = self.lookahead {
match c {
'0'..='9' => return true,
'-' => return true,
'+' => return true,
'.' => return true,
_ => {}
}
if self.looking_at("NaN") || self.looking_at("Inf") || self.looking_at("sNaN") {
return true;
}
}
false
}
fn scan_char(&mut self, tok: Token<'a>) -> Result<LocatedToken<'a>, LocatedError> {
assert_ne!(self.lookahead, None);
let loc = self.loc();
self.next_ch();
token(tok, loc)
}
fn scan_chars(
&mut self,
count: usize,
tok: Token<'a>,
) -> Result<LocatedToken<'a>, LocatedError> {
let loc = self.loc();
for _ in 0..count {
assert_ne!(self.lookahead, None);
self.next_ch();
}
token(tok, loc)
}
pub fn rest_of_line(&mut self) -> &'a str {
let begin = self.pos;
loop {
match self.next_ch() {
None | Some('\n') => return &self.source[begin..self.pos],
_ => {}
}
}
}
fn scan_comment(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
let loc = self.loc();
let text = self.rest_of_line();
token(Token::Comment(text), loc)
}
fn scan_number(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
let begin = self.pos;
let loc = self.loc();
let mut is_float = false;
match self.lookahead {
Some('-') => {
self.next_ch();
if !self.looking_at_numeric() {
return token(Token::Minus, loc);
}
}
Some('+') => {
self.next_ch();
if !self.looking_at_numeric() {
return token(Token::Plus, loc);
}
}
_ => {}
}
if self.looking_at("NaN:") || self.looking_at("sNaN:") {
while self.next_ch() != Some(':') {}
is_float = true;
} else if self.looking_at("NaN") || self.looking_at("Inf") {
is_float = true;
}
loop {
match self.next_ch() {
Some('-') | Some('_') => {}
Some('.') => is_float = true,
Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
_ => break,
}
}
let text = &self.source[begin..self.pos];
if is_float {
token(Token::Float(text), loc)
} else {
token(Token::Integer(text), loc)
}
}
fn scan_word(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
let begin = self.pos;
let loc = self.loc();
assert!(self.lookahead == Some('_') || self.lookahead.unwrap().is_ascii_alphabetic());
loop {
match self.next_ch() {
Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
_ => break,
}
}
let text = &self.source[begin..self.pos];
token(
split_entity_name(text)
.and_then(|(prefix, number)| {
Self::numbered_entity(prefix, number)
.or_else(|| Self::value_type(text, prefix, number))
})
.unwrap_or_else(|| match text {
"cold" => Token::Cold,
_ => Token::Identifier(text),
}),
loc,
)
}
fn numbered_entity(prefix: &str, number: u32) -> Option<Token<'a>> {
match prefix {
"v" => Value::with_number(number).map(Token::Value),
"block" => Block::with_number(number).map(Token::Block),
"ss" => Some(Token::StackSlot(number)),
"dss" => Some(Token::DynamicStackSlot(number)),
"dt" => Some(Token::DynamicType(number)),
"gv" => Some(Token::GlobalValue(number)),
"table" => Some(Token::Table(number)),
"const" => Some(Token::Constant(number)),
"fn" => Some(Token::FuncRef(number)),
"sig" => Some(Token::SigRef(number)),
"u" => Some(Token::UserRef(number)),
"userextname" => Some(Token::UserNameRef(number)),
_ => None,
}
}
fn value_type(text: &str, prefix: &str, number: u32) -> Option<Token<'a>> {
let is_vector = prefix.ends_with('x');
let scalar = if is_vector {
&prefix[0..prefix.len() - 1]
} else {
text
};
let base_type = match scalar {
"i8" => types::I8,
"i16" => types::I16,
"i32" => types::I32,
"i64" => types::I64,
"i128" => types::I128,
"f32" => types::F32,
"f64" => types::F64,
"r32" => types::R32,
"r64" => types::R64,
_ => return None,
};
if is_vector {
if number <= u32::from(u16::MAX) {
base_type.by(number).map(Token::Type)
} else {
None
}
} else {
Some(Token::Type(base_type))
}
}
fn scan_name(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
let loc = self.loc();
let begin = self.pos + 1;
assert_eq!(self.lookahead, Some('%'));
loop {
match self.next_ch() {
Some('_') | Some('0'..='9') | Some('a'..='z') | Some('A'..='Z') => {}
_ => break,
}
}
let end = self.pos;
token(Token::Name(&self.source[begin..end]), loc)
}
fn scan_string(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
let loc = self.loc();
let begin = self.pos + 1;
assert_eq!(self.lookahead, Some('"'));
while let Some(c) = self.next_ch() {
if c == '"' {
break;
}
}
let end = self.pos;
if self.lookahead != Some('"') {
return error(LexError::InvalidChar, self.loc());
}
self.next_ch();
token(Token::String(&self.source[begin..end]), loc)
}
fn scan_hex_sequence(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
let loc = self.loc();
let begin = self.pos + 1;
assert_eq!(self.lookahead, Some('#'));
while let Some(c) = self.next_ch() {
if !char::is_digit(c, 16) {
break;
}
}
let end = self.pos;
token(Token::HexSequence(&self.source[begin..end]), loc)
}
fn scan_srcloc(&mut self) -> Result<LocatedToken<'a>, LocatedError> {
let loc = self.loc();
let begin = self.pos + 1;
assert_eq!(self.lookahead, Some('@'));
while let Some(c) = self.next_ch() {
if !char::is_digit(c, 16) {
break;
}
}
let end = self.pos;
token(Token::SourceLoc(&self.source[begin..end]), loc)
}
#[allow(clippy::cognitive_complexity)]
pub fn next(&mut self) -> Option<Result<LocatedToken<'a>, LocatedError>> {
loop {
let loc = self.loc();
return match self.lookahead {
None => None,
Some(';') => Some(self.scan_comment()),
Some('(') => Some(self.scan_char(Token::LPar)),
Some(')') => Some(self.scan_char(Token::RPar)),
Some('{') => Some(self.scan_char(Token::LBrace)),
Some('}') => Some(self.scan_char(Token::RBrace)),
Some('[') => Some(self.scan_char(Token::LBracket)),
Some(']') => Some(self.scan_char(Token::RBracket)),
Some(',') => Some(self.scan_char(Token::Comma)),
Some('.') => Some(self.scan_char(Token::Dot)),
Some(':') => Some(self.scan_char(Token::Colon)),
Some('=') => Some(self.scan_char(Token::Equal)),
Some('!') => Some(self.scan_char(Token::Not)),
Some('+') => Some(self.scan_number()),
Some('*') => Some(self.scan_char(Token::Multiply)),
Some('-') => {
if self.looking_at("->") {
Some(self.scan_chars(2, Token::Arrow))
} else {
Some(self.scan_number())
}
}
Some('0'..='9') => Some(self.scan_number()),
Some('a'..='z') | Some('A'..='Z') => {
if self.looking_at("NaN") || self.looking_at("Inf") {
Some(self.scan_number())
} else {
Some(self.scan_word())
}
}
Some('%') => Some(self.scan_name()),
Some('"') => Some(self.scan_string()),
Some('#') => Some(self.scan_hex_sequence()),
Some('@') => Some(self.scan_srcloc()),
Some(' ') | Some('\x09'..='\x0d') => {
self.next_ch();
continue;
}
_ => {
self.next_ch();
Some(error(LexError::InvalidChar, loc))
}
};
}
}
}
#[cfg(test)]
mod tests {
use super::trailing_digits;
use super::*;
use crate::error::Location;
use cranelift_codegen::ir::types;
use cranelift_codegen::ir::{Block, Value};
#[test]
fn digits() {
assert_eq!(trailing_digits(""), 0);
assert_eq!(trailing_digits("x"), 0);
assert_eq!(trailing_digits("0x"), 0);
assert_eq!(trailing_digits("x1"), 1);
assert_eq!(trailing_digits("1x1"), 1);
assert_eq!(trailing_digits("1x01"), 2);
}
#[test]
fn entity_name() {
assert_eq!(split_entity_name(""), None);
assert_eq!(split_entity_name("x"), None);
assert_eq!(split_entity_name("x+"), None);
assert_eq!(split_entity_name("x+1"), Some(("x+", 1)));
assert_eq!(split_entity_name("x-1"), Some(("x-", 1)));
assert_eq!(split_entity_name("1"), Some(("", 1)));
assert_eq!(split_entity_name("x1"), Some(("x", 1)));
assert_eq!(split_entity_name("xy0"), Some(("xy", 0)));
assert_eq!(split_entity_name("inst01"), None);
}
fn token<'a>(token: Token<'a>, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
Some(super::token(token, Location { line_number: line }))
}
fn error<'a>(error: LexError, line: usize) -> Option<Result<LocatedToken<'a>, LocatedError>> {
Some(super::error(error, Location { line_number: line }))
}
#[test]
fn make_lexer() {
let mut l1 = Lexer::new("");
let mut l2 = Lexer::new(" ");
let mut l3 = Lexer::new("\n ");
assert_eq!(l1.next(), None);
assert_eq!(l2.next(), None);
assert_eq!(l3.next(), None);
}
#[test]
fn lex_comment() {
let mut lex = Lexer::new("; hello");
assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
assert_eq!(lex.next(), None);
lex = Lexer::new("\n ;hello\n;foo");
assert_eq!(lex.next(), token(Token::Comment(";hello"), 2));
assert_eq!(lex.next(), token(Token::Comment(";foo"), 3));
assert_eq!(lex.next(), None);
let mut lex = Lexer::new("$; hello");
assert_eq!(lex.next(), error(LexError::InvalidChar, 1));
assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
assert_eq!(lex.next(), None);
}
#[test]
fn lex_chars() {
let mut lex = Lexer::new("(); hello\n = :{, }.");
assert_eq!(lex.next(), token(Token::LPar, 1));
assert_eq!(lex.next(), token(Token::RPar, 1));
assert_eq!(lex.next(), token(Token::Comment("; hello"), 1));
assert_eq!(lex.next(), token(Token::Equal, 2));
assert_eq!(lex.next(), token(Token::Colon, 2));
assert_eq!(lex.next(), token(Token::LBrace, 2));
assert_eq!(lex.next(), token(Token::Comma, 2));
assert_eq!(lex.next(), token(Token::RBrace, 2));
assert_eq!(lex.next(), token(Token::Dot, 2));
assert_eq!(lex.next(), None);
}
#[test]
fn lex_numbers() {
let mut lex = Lexer::new(" 0 2_000 -1,0xf -0x0 0.0 0x0.4p-34 NaN +5");
assert_eq!(lex.next(), token(Token::Integer("0"), 1));
assert_eq!(lex.next(), token(Token::Integer("2_000"), 1));
assert_eq!(lex.next(), token(Token::Integer("-1"), 1));
assert_eq!(lex.next(), token(Token::Comma, 1));
assert_eq!(lex.next(), token(Token::Integer("0xf"), 1));
assert_eq!(lex.next(), token(Token::Integer("-0x0"), 1));
assert_eq!(lex.next(), token(Token::Float("0.0"), 1));
assert_eq!(lex.next(), token(Token::Float("0x0.4p-34"), 1));
assert_eq!(lex.next(), token(Token::Float("NaN"), 1));
assert_eq!(lex.next(), token(Token::Integer("+5"), 1));
assert_eq!(lex.next(), None);
}
#[test]
fn lex_identifiers() {
let mut lex = Lexer::new(
"v0 v00 vx01 block1234567890 block5234567890 v1x vx1 vxvx4 \
function0 function i8 i32x4 f32x5",
);
assert_eq!(
lex.next(),
token(Token::Value(Value::with_number(0).unwrap()), 1)
);
assert_eq!(lex.next(), token(Token::Identifier("v00"), 1));
assert_eq!(lex.next(), token(Token::Identifier("vx01"), 1));
assert_eq!(
lex.next(),
token(Token::Block(Block::with_number(1234567890).unwrap()), 1)
);
assert_eq!(lex.next(), token(Token::Identifier("block5234567890"), 1));
assert_eq!(lex.next(), token(Token::Identifier("v1x"), 1));
assert_eq!(lex.next(), token(Token::Identifier("vx1"), 1));
assert_eq!(lex.next(), token(Token::Identifier("vxvx4"), 1));
assert_eq!(lex.next(), token(Token::Identifier("function0"), 1));
assert_eq!(lex.next(), token(Token::Identifier("function"), 1));
assert_eq!(lex.next(), token(Token::Type(types::I8), 1));
assert_eq!(lex.next(), token(Token::Type(types::I32X4), 1));
assert_eq!(lex.next(), token(Token::Identifier("f32x5"), 1));
assert_eq!(lex.next(), None);
}
#[test]
fn lex_hex_sequences() {
let mut lex = Lexer::new("#0 #DEADbeef123 #789");
assert_eq!(lex.next(), token(Token::HexSequence("0"), 1));
assert_eq!(lex.next(), token(Token::HexSequence("DEADbeef123"), 1));
assert_eq!(lex.next(), token(Token::HexSequence("789"), 1));
}
#[test]
fn lex_names() {
let mut lex = Lexer::new("%0 %x3 %function %123_abc %ss0 %v3 %block11 %const42 %_");
assert_eq!(lex.next(), token(Token::Name("0"), 1));
assert_eq!(lex.next(), token(Token::Name("x3"), 1));
assert_eq!(lex.next(), token(Token::Name("function"), 1));
assert_eq!(lex.next(), token(Token::Name("123_abc"), 1));
assert_eq!(lex.next(), token(Token::Name("ss0"), 1));
assert_eq!(lex.next(), token(Token::Name("v3"), 1));
assert_eq!(lex.next(), token(Token::Name("block11"), 1));
assert_eq!(lex.next(), token(Token::Name("const42"), 1));
assert_eq!(lex.next(), token(Token::Name("_"), 1));
}
#[test]
fn lex_strings() {
let mut lex = Lexer::new(
r#""" "0" "x3""function" "123 abc" "\" "start
and end on
different lines" "#,
);
assert_eq!(lex.next(), token(Token::String(""), 1));
assert_eq!(lex.next(), token(Token::String("0"), 1));
assert_eq!(lex.next(), token(Token::String("x3"), 1));
assert_eq!(lex.next(), token(Token::String("function"), 1));
assert_eq!(lex.next(), token(Token::String("123 abc"), 1));
assert_eq!(lex.next(), token(Token::String(r#"\"#), 1));
assert_eq!(
lex.next(),
token(
Token::String(
r#"start
and end on
different lines"#
),
1
)
);
}
#[test]
fn lex_userrefs() {
let mut lex = Lexer::new("u0 u1 u234567890 u9:8765");
assert_eq!(lex.next(), token(Token::UserRef(0), 1));
assert_eq!(lex.next(), token(Token::UserRef(1), 1));
assert_eq!(lex.next(), token(Token::UserRef(234567890), 1));
assert_eq!(lex.next(), token(Token::UserRef(9), 1));
assert_eq!(lex.next(), token(Token::Colon, 1));
assert_eq!(lex.next(), token(Token::Integer("8765"), 1));
assert_eq!(lex.next(), None);
}
}