use crate::rustlite::{CompileError, Span};
use crate::error_codes as codes;
#[derive(Debug, Clone, PartialEq)]
pub struct SolTok {
pub kind: SolKind,
pub span: Span,
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub enum SolKind {
Facet,
Function,
External,
View,
Pure,
Returns,
Return,
Mapping,
Delete,
TypeName(String),
Ident(String),
Int([u8; 32]),
Str(String),
LBrace,
RBrace,
LParen,
RParen,
LBracket,
RBracket,
Semi,
Comma,
Assign,
FatArrow,
Dot,
Plus,
Minus,
Star,
Slash,
Percent,
Gt,
Lt,
Ge,
Le,
EqEq,
BangEq,
Eof,
}
pub fn lex(source: &str) -> Result<Vec<SolTok>, CompileError> {
let mut lx = Lexer { src: source.as_bytes(), pos: 0 };
let mut out = Vec::new();
loop {
let tok = lx.next_token()?;
let is_eof = tok.kind == SolKind::Eof;
out.push(tok);
if is_eof {
break;
}
}
Ok(out)
}
struct Lexer<'a> {
src: &'a [u8],
pos: usize,
}
impl Lexer<'_> {
fn skip_trivia(&mut self) {
loop {
while self.pos < self.src.len() && self.src[self.pos].is_ascii_whitespace() {
self.pos += 1;
}
if self.pos + 1 < self.src.len() && self.src[self.pos] == b'/' && self.src[self.pos + 1] == b'/' {
while self.pos < self.src.len() && self.src[self.pos] != b'\n' {
self.pos += 1;
}
continue;
}
if self.pos + 1 < self.src.len() && self.src[self.pos] == b'/' && self.src[self.pos + 1] == b'*' {
self.pos += 2;
while self.pos + 1 < self.src.len() && !(self.src[self.pos] == b'*' && self.src[self.pos + 1] == b'/') {
self.pos += 1;
}
self.pos = (self.pos + 2).min(self.src.len());
continue;
}
break;
}
}
fn next_token(&mut self) -> Result<SolTok, CompileError> {
self.skip_trivia();
let start = self.pos;
if self.pos >= self.src.len() {
return Ok(SolTok { kind: SolKind::Eof, span: Span { start, end: start } });
}
let b = self.src[self.pos];
let next = self.src.get(self.pos + 1).copied();
let two = match (b, next) {
(b'=', Some(b'>')) => Some(SolKind::FatArrow),
(b'=', Some(b'=')) => Some(SolKind::EqEq),
(b'!', Some(b'=')) => Some(SolKind::BangEq),
(b'>', Some(b'=')) => Some(SolKind::Ge),
(b'<', Some(b'=')) => Some(SolKind::Le),
_ => None,
};
if let Some(kind) = two {
self.pos += 2;
return Ok(SolTok { kind, span: Span { start, end: self.pos } });
}
if b == b'"' {
return self.lex_string(start);
}
let punct = match b {
b'{' => Some(SolKind::LBrace),
b'}' => Some(SolKind::RBrace),
b'(' => Some(SolKind::LParen),
b')' => Some(SolKind::RParen),
b'[' => Some(SolKind::LBracket),
b']' => Some(SolKind::RBracket),
b';' => Some(SolKind::Semi),
b',' => Some(SolKind::Comma),
b'=' => Some(SolKind::Assign),
b'.' => Some(SolKind::Dot),
b'+' => Some(SolKind::Plus),
b'-' => Some(SolKind::Minus),
b'*' => Some(SolKind::Star),
b'/' => Some(SolKind::Slash),
b'%' => Some(SolKind::Percent),
b'>' => Some(SolKind::Gt),
b'<' => Some(SolKind::Lt),
_ => None,
};
if let Some(kind) = punct {
self.pos += 1;
return Ok(SolTok { kind, span: Span { start, end: self.pos } });
}
if b.is_ascii_alphabetic() || b == b'_' {
while self.pos < self.src.len()
&& (self.src[self.pos].is_ascii_alphanumeric() || self.src[self.pos] == b'_')
{
self.pos += 1;
}
let word = std::str::from_utf8(&self.src[start..self.pos]).unwrap_or("");
let kind = keyword(word).unwrap_or_else(|| SolKind::Ident(word.to_string()));
return Ok(SolTok { kind, span: Span { start, end: self.pos } });
}
if b.is_ascii_digit() {
return self.lex_int(start);
}
Err(CompileError::at_code(
codes::UNEXPECTED_BYTE,
format!("unexpected byte {:?} in SolidityLite source", b as char),
Span { start, end: start + 1 },
))
}
fn lex_int(&mut self, start: usize) -> Result<SolTok, CompileError> {
let is_hex = self.src[self.pos] == b'0'
&& self.pos + 1 < self.src.len()
&& (self.src[self.pos + 1] == b'x' || self.src[self.pos + 1] == b'X');
let word = if is_hex {
self.pos += 2; let digits_start = self.pos;
while self.pos < self.src.len() && self.src[self.pos].is_ascii_hexdigit() {
self.pos += 1;
}
if self.pos == digits_start {
return Err(CompileError::at_code(
codes::BAD_NUMBER,
"hex literal `0x` with no digits".to_string(),
Span { start, end: self.pos },
));
}
std::str::from_utf8(&self.src[digits_start..self.pos]).unwrap_or("")
} else {
while self.pos < self.src.len() && self.src[self.pos].is_ascii_digit() {
self.pos += 1;
}
std::str::from_utf8(&self.src[start..self.pos]).unwrap_or("")
};
if self.pos < self.src.len()
&& (self.src[self.pos].is_ascii_alphanumeric() || self.src[self.pos] == b'_')
{
while self.pos < self.src.len()
&& (self.src[self.pos].is_ascii_alphanumeric() || self.src[self.pos] == b'_')
{
self.pos += 1;
}
return Err(CompileError::at_code(
codes::BAD_NUMBER,
"malformed numeric literal".to_string(),
Span { start, end: self.pos },
));
}
let span = Span { start, end: self.pos };
let word_be32 = if is_hex {
parse_hex_be32(word, span)?
} else {
parse_dec_be32(word, span)?
};
Ok(SolTok { kind: SolKind::Int(word_be32), span })
}
fn lex_string(&mut self, start: usize) -> Result<SolTok, CompileError> {
self.pos += 1; let content_start = self.pos;
while self.pos < self.src.len() && self.src[self.pos] != b'"' {
self.pos += 1;
}
if self.pos >= self.src.len() {
return Err(CompileError::at_code(
codes::UNEXPECTED_BYTE,
"unterminated string literal".to_string(),
Span { start, end: self.pos },
));
}
let text = std::str::from_utf8(&self.src[content_start..self.pos])
.unwrap_or("")
.to_string();
self.pos += 1; Ok(SolTok { kind: SolKind::Str(text), span: Span { start, end: self.pos } })
}
}
fn keyword(word: &str) -> Option<SolKind> {
Some(match word {
"facet" | "contract" => SolKind::Facet,
"function" => SolKind::Function,
"external" => SolKind::External,
"view" => SolKind::View,
"pure" => SolKind::Pure,
"returns" => SolKind::Returns,
"return" => SolKind::Return,
"mapping" => SolKind::Mapping,
"delete" => SolKind::Delete,
"uint256" | "address" | "bool" | "bytes32" | "string" | "bytes" => {
SolKind::TypeName(word.to_string())
}
_ => return None,
})
}
fn parse_dec_be32(digits: &str, span: Span) -> Result<[u8; 32], CompileError> {
let mut word = [0u8; 32]; for ch in digits.bytes() {
let d = (ch - b'0') as u16;
let mut carry = d;
for byte in word.iter_mut().rev() {
let v = (*byte as u16) * 10 + carry;
*byte = (v & 0xFF) as u8;
carry = v >> 8;
}
if carry != 0 {
return Err(CompileError::at_code(
codes::BAD_NUMBER,
"integer literal exceeds uint256 (2^256-1)".to_string(),
span,
));
}
}
Ok(word)
}
fn parse_hex_be32(digits: &str, span: Span) -> Result<[u8; 32], CompileError> {
if digits.len() > 64 {
return Err(CompileError::at_code(
codes::BAD_NUMBER,
"hex literal exceeds 32 bytes (uint256)".to_string(),
span,
));
}
let mut word = [0u8; 32];
let mut nibbles: Vec<u8> = Vec::with_capacity(digits.len());
for ch in digits.bytes() {
let v = match ch {
b'0'..=b'9' => ch - b'0',
b'a'..=b'f' => ch - b'a' + 10,
b'A'..=b'F' => ch - b'A' + 10,
_ => {
return Err(CompileError::at_code(
codes::BAD_NUMBER,
"invalid hex digit".to_string(),
span,
))
}
};
nibbles.push(v);
}
let reversed: Vec<u8> = nibbles.into_iter().rev().collect();
for (byte_off, pair) in reversed.chunks(2).enumerate() {
let low = pair[0];
let high = pair.get(1).copied().unwrap_or(0);
word[31 - byte_off] = (high << 4) | low;
}
Ok(word)
}
#[cfg(test)]
mod tests {
use super::*;
fn kinds(src: &str) -> Vec<SolKind> {
lex(src).unwrap().into_iter().map(|t| t.kind).collect()
}
#[test]
fn lexes_the_floor_grammar() {
let k = kinds("facet C { function get() external view returns (uint256) { return 42; } }");
assert_eq!(k[0], SolKind::Facet);
assert_eq!(k[1], SolKind::Ident("C".into()));
assert_eq!(k[2], SolKind::LBrace);
assert_eq!(k[3], SolKind::Function);
assert_eq!(k[4], SolKind::Ident("get".into()));
assert!(matches!(k.last(), Some(SolKind::Eof)));
let mut w = [0u8; 32];
w[31] = 42;
assert!(k.contains(&SolKind::Int(w)));
}
#[test]
fn decimal_and_hex_agree() {
let dec = lex("255").unwrap()[0].kind.clone();
let hex = lex("0xff").unwrap()[0].kind.clone();
assert_eq!(dec, hex);
let mut w = [0u8; 32];
w[31] = 0xff;
assert_eq!(dec, SolKind::Int(w));
let dec = lex("256").unwrap()[0].kind.clone();
let mut w = [0u8; 32];
w[30] = 0x01;
assert_eq!(dec, SolKind::Int(w));
}
#[test]
fn lexes_mapping_index_and_msg_sender_tokens() {
let k = kinds(
"mapping(address => uint256) bal; bal[msg.sender] = amt; f(uint256 a, address b)",
);
assert!(k.contains(&SolKind::Mapping), "`mapping` keyword");
assert!(k.contains(&SolKind::FatArrow), "`=>` arrow");
assert!(k.contains(&SolKind::LBracket), "`[`");
assert!(k.contains(&SolKind::RBracket), "`]`");
assert!(k.contains(&SolKind::Dot), "`.` (msg.sender)");
assert!(k.contains(&SolKind::Comma), "`,` param separator");
assert!(k.contains(&SolKind::Assign), "standalone `=` is Assign");
assert!(k.contains(&SolKind::Ident("msg".into())));
assert!(k.contains(&SolKind::Ident("sender".into())));
}
#[test]
fn comments_are_trivia() {
let k = kinds("// a line\nfacet /* block */ C {}");
assert_eq!(k[0], SolKind::Facet);
assert_eq!(k[1], SolKind::Ident("C".into()));
}
#[test]
fn unexpected_byte_is_a_clean_error() {
let e = lex("facet C { @ }").unwrap_err();
assert_eq!(e.code, Some(codes::UNEXPECTED_BYTE));
}
#[test]
fn lexes_comparison_operators() {
let k = kinds("a > b < c >= d <= e == f");
assert!(k.contains(&SolKind::Gt), "`>`");
assert!(k.contains(&SolKind::Lt), "`<`");
assert!(k.contains(&SolKind::Ge), "`>=`");
assert!(k.contains(&SolKind::Le), "`<=`");
assert!(k.contains(&SolKind::EqEq), "`==`");
assert!(!k.contains(&SolKind::Assign), "`==` is one EqEq, not two Assigns");
}
#[test]
fn ge_le_eqeq_beat_their_single_char_prefixes() {
let k = kinds(">=");
assert_eq!(k[0], SolKind::Ge);
assert!(matches!(k.get(1), Some(SolKind::Eof)));
let k = kinds("<=");
assert_eq!(k[0], SolKind::Le);
let k = kinds("=>");
assert_eq!(k[0], SolKind::FatArrow);
}
#[test]
fn lexes_a_string_literal() {
let k = kinds("require(n > 0, \"zero\")");
assert!(k.contains(&SolKind::Str("zero".into())), "the message string");
let k = kinds("\"\"");
assert_eq!(k[0], SolKind::Str(String::new()));
}
#[test]
fn unterminated_string_is_a_clean_error() {
let e = lex("\"no closing quote").unwrap_err();
assert_eq!(e.code, Some(codes::UNEXPECTED_BYTE));
}
#[test]
fn overflow_decimal_is_rejected() {
let two_256 =
"115792089237316195423570985008687907853269984665640564039457584007913129639936";
let e = lex(two_256).unwrap_err();
assert_eq!(e.code, Some(codes::BAD_NUMBER));
let max =
"115792089237316195423570985008687907853269984665640564039457584007913129639935";
assert!(lex(max).is_ok());
}
}