use std::num::IntErrorKind;
use logos::{Lexer, Logos};
#[derive(Debug, Logos, PartialEq, Eq)]
#[logos(skip r"[ \t]+", error = LexErr)]
pub enum Token {
#[regex(r"\d\w*", lex_unsigned_dec)]
#[regex(r"#\d?\w*", lex_unsigned_dec)]
#[regex(r"[Xx][\dA-Fa-f]\w*", lex_unsigned_hex)]
Unsigned(u16),
#[regex(r"-\w*", lex_signed_dec)]
#[regex(r"#-\w*", lex_signed_dec)]
#[regex(r"[Xx]-\w*", lex_signed_hex)]
Signed(i16),
#[regex(r"[Rr]\d+", lex_reg)]
Reg(u8),
#[regex(r"[A-Za-z_]\w*", |lx| lx.slice().parse::<Ident>().expect("should be infallible"))]
Ident(Ident),
#[regex(r"\.[A-Za-z_]\w*", |lx| lx.slice()[1..].to_string())]
Directive(String),
#[token(r#"""#, lex_str_literal)]
String(String),
#[token(":")]
Colon,
#[token(",")]
Comma,
#[regex(r";.*")]
Comment,
#[regex(r"\r?\n")]
NewLine
}
impl Token {
pub(crate) fn is_whitespace(&self) -> bool {
matches!(self, Token::NewLine)
}
}
macro_rules! ident_enum {
($($instr:ident),+) => {
#[derive(Debug, PartialEq, Eq, Clone)]
pub enum Ident {
$(
#[allow(missing_docs)]
$instr
),+,
#[allow(missing_docs)]
Label(String)
}
impl std::str::FromStr for Ident {
type Err = std::convert::Infallible;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match &*s.to_uppercase() {
$(stringify!($instr) => Ok(Self::$instr)),*,
_ => Ok(Self::Label(s.to_string()))
}
}
}
impl std::fmt::Display for Ident {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
$(Self::$instr => f.write_str(stringify!($instr))),*,
Self::Label(id) => f.write_str(id)
}
}
}
};
}
ident_enum! {
ADD, AND, NOT, BR, BRP, BRZ, BRZP, BRN, BRNP, BRNZ, BRNZP,
JMP, JSR, JSRR, LD, LDI, LDR, LEA, ST, STI, STR, TRAP, NOP,
RET, RTI, GETC, OUT, PUTC, PUTS, IN, PUTSP, HALT
}
#[derive(Debug, PartialEq, Eq, Clone, Copy, Default)]
pub enum LexErr {
DoesNotFitU16,
DoesNotFitI16,
InvalidHex,
InvalidNumeric,
InvalidHexEmpty,
InvalidDecEmpty,
UnknownIntErr,
UnclosedStrLit,
StrLitTooBig,
InvalidReg,
#[default]
InvalidSymbol
}
impl std::fmt::Display for LexErr {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
LexErr::DoesNotFitU16 => f.write_str("numeric token does not fit 16-bit unsigned integer"),
LexErr::DoesNotFitI16 => f.write_str("numeric token does not fit 16-bit signed integer"),
LexErr::InvalidHex => f.write_str("invalid hex literal"),
LexErr::InvalidNumeric => f.write_str("invalid decimal literal"),
LexErr::InvalidHexEmpty => f.write_str("invalid hex literal"),
LexErr::InvalidDecEmpty => f.write_str("invalid decimal literal"),
LexErr::UnknownIntErr => f.write_str("could not parse integer"),
LexErr::UnclosedStrLit => f.write_str("unclosed string literal"),
LexErr::StrLitTooBig => f.write_str("string literal is too large"),
LexErr::InvalidReg => f.write_str("invalid register"),
LexErr::InvalidSymbol => f.write_str("unrecognized symbol"),
}
}
}
impl std::error::Error for LexErr {}
impl crate::err::Error for LexErr {
fn help(&self) -> Option<std::borrow::Cow<str>> {
match self {
LexErr::DoesNotFitU16 => Some(format!("the range for a 16-bit unsigned integer is [{}, {}]", u16::MIN, u16::MAX).into()),
LexErr::DoesNotFitI16 => Some(format!("the range for a 16-bit signed integer is [{}, {}]", i16::MIN, i16::MAX).into()),
LexErr::InvalidHex => Some("a hex literal starts with 'x' and consists of 0-9, A-F".into()),
LexErr::InvalidNumeric => Some("a decimal literal only consists of digits 0-9".into()),
LexErr::InvalidHexEmpty => Some("there should be hex digits (0-9, A-F) here".into()),
LexErr::InvalidDecEmpty => Some("there should be digits (0-9) here".into()),
LexErr::UnknownIntErr => None,
LexErr::UnclosedStrLit => Some("add a quote to the end of the string literal".into()),
LexErr::StrLitTooBig => Some(format!("string literals are limited to at most {} characters", u16::MAX - 1).into()),
LexErr::InvalidReg => Some("this must be R0-R7".into()),
LexErr::InvalidSymbol => Some("this char does not occur in any token in LC-3 assembly".into()),
}
}
}
fn convert_int_error(
e: &std::num::IntErrorKind,
invalid_digits_err: LexErr,
empty_err: LexErr,
overflow_err: LexErr,
src: &str
) -> LexErr {
match e {
IntErrorKind::Empty => empty_err,
IntErrorKind::InvalidDigit if src == "-" => empty_err,
IntErrorKind::InvalidDigit => invalid_digits_err,
IntErrorKind::PosOverflow => overflow_err,
IntErrorKind::NegOverflow => overflow_err,
IntErrorKind::Zero => unreachable!("IntErrorKind::Zero should not be emitted in parsing u16"),
_ => LexErr::UnknownIntErr,
}
}
fn lex_unsigned_dec(lx: &Lexer<'_, Token>) -> Result<u16, LexErr> {
let mut string = lx.slice();
if lx.slice().starts_with('#') {
string = &string[1..];
}
string.parse::<u16>()
.map_err(|e| convert_int_error(e.kind(), LexErr::InvalidNumeric, LexErr::InvalidDecEmpty, LexErr::DoesNotFitU16, string))
}
fn lex_signed_dec(lx: &Lexer<'_, Token>) -> Result<i16, LexErr> {
let mut string = lx.slice();
if lx.slice().starts_with('#') {
string = &string[1..];
}
string.parse::<i16>()
.map_err(|e| convert_int_error(e.kind(), LexErr::InvalidNumeric, LexErr::InvalidDecEmpty, LexErr::DoesNotFitI16, string))
}
fn lex_unsigned_hex(lx: &Lexer<'_, Token>) -> Result<u16, LexErr> {
let Some(hex) = lx.slice().strip_prefix(['X', 'x']) else {
unreachable!("Lexer slice should have contained an X or x");
};
u16::from_str_radix(hex, 16)
.map_err(|e| convert_int_error(e.kind(), LexErr::InvalidHex, LexErr::InvalidHexEmpty, LexErr::DoesNotFitU16, hex))
}
fn lex_signed_hex(lx: &Lexer<'_, Token>) -> Result<i16, LexErr> {
let Some(hex) = lx.slice().strip_prefix(['X', 'x']) else {
unreachable!("Lexer slice should have contained an X or x");
};
i16::from_str_radix(hex, 16)
.map_err(|e| convert_int_error(e.kind(), LexErr::InvalidHex, LexErr::InvalidHexEmpty, LexErr::DoesNotFitI16, hex))
}
fn lex_reg(lx: &Lexer<'_, Token>) -> Result<u8, LexErr> {
lx.slice()[1..].parse::<u8>().ok()
.filter(|&r| r < 8)
.ok_or(LexErr::InvalidReg)
}
fn lex_str_literal(lx: &mut Lexer<'_, Token>) -> Result<String, LexErr> {
let rem = lx.remainder()
.lines()
.next()
.unwrap_or("");
let mlen = rem.match_indices('"')
.map(|(n, _)| n)
.find(|&n| !matches!(rem.get((n - 1)..(n + 1)), Some("\\\"")));
match mlen {
Some(len) => lx.bump(len + 1),
None => {
lx.bump(rem.len());
return Err(LexErr::UnclosedStrLit);
}
}
let mut remaining = &lx.slice()[1..(lx.slice().len() - 1)];
let mut buf = String::with_capacity(remaining.len());
while let Some((left, right)) = remaining.split_once('\\') {
buf.push_str(left);
let esc = right.as_bytes()
.first()
.unwrap_or_else(|| unreachable!("expected character after escape")); match esc {
b'n' => buf.push('\n'),
b'r' => buf.push('\r'),
b't' => buf.push('\t'),
b'\\' => buf.push('\\'),
b'0' => buf.push('\0'),
b'"' => buf.push('\"'),
&c => {
buf.push('\\');
buf.push(char::from(c));
}
}
remaining = &right[1..];
}
buf.push_str(remaining);
match buf.len() < usize::from(u16::MAX) {
true => Ok(buf),
false => Err(LexErr::StrLitTooBig),
}
}