use super::span::Span;
use std::fmt;
use thiserror::Error;
pub type LexerResult<T> = Result<T, LexerError>;
#[derive(Debug, Clone, Error)]
pub struct LexerError {
pub kind: LexerErrorKind,
pub span: Span,
pub help: Option<String>,
}
impl LexerError {
pub fn new(kind: LexerErrorKind, span: Span) -> Self {
Self {
kind,
span,
help: None,
}
}
pub fn with_help(mut self, help: impl Into<String>) -> Self {
self.help = Some(help.into());
self
}
pub fn message(&self) -> String {
self.kind.to_string()
}
}
impl fmt::Display for LexerError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.kind)?;
if let Some(help) = &self.help {
write!(f, "\n help: {}", help)?;
}
Ok(())
}
}
#[derive(Debug, Clone, PartialEq, Eq, Error)]
pub enum LexerErrorKind {
#[error("unexpected character `{0}`")]
UnexpectedChar(char),
#[error("unexpected end of file")]
UnexpectedEof,
#[error("unterminated string literal")]
UnterminatedString,
#[error("unterminated character literal")]
UnterminatedChar,
#[error("unterminated byte string literal")]
UnterminatedByteString,
#[error("unterminated raw string literal")]
UnterminatedRawString {
expected_hashes: u8,
},
#[error("empty character literal")]
EmptyCharLiteral,
#[error("character literal may only contain one codepoint")]
MultipleCharsInCharLiteral,
#[error("unknown escape sequence `\\{0}`")]
UnknownEscape(char),
#[error("invalid Unicode escape sequence")]
InvalidUnicodeEscape,
#[error("Unicode escape sequence missing opening brace")]
UnicodeEscapeMissingBrace,
#[error("Unicode escape sequence missing closing brace")]
UnicodeEscapeUnclosed,
#[error("Unicode escape sequence has too many digits (max 6)")]
UnicodeEscapeTooLong,
#[error("invalid Unicode codepoint `{0:#X}`")]
InvalidUnicodeCodepoint(u32),
#[error("invalid hex escape sequence")]
InvalidHexEscape,
#[error("hex escape value `{0:#X}` is out of range (max 0x7F for characters, 0xFF for bytes)")]
HexEscapeOutOfRange(u32),
#[error("escape character at end of file")]
EscapeAtEof,
#[error("invalid numeric literal")]
InvalidNumber,
#[error("integer literal is too large")]
IntegerOverflow,
#[error("float literal is out of range")]
FloatOverflow,
#[error("expected digits after exponent marker")]
EmptyExponent,
#[error("expected digits after `{0}` prefix")]
NoDigitsAfterPrefix(String),
#[error("invalid digit `{0}` for base {1}")]
InvalidDigit(char, u32),
#[error("float literals cannot use base {0}")]
FloatWithBase(u32),
#[error("invalid suffix `{0}` for numeric literal")]
InvalidNumericSuffix(String),
#[error("integer suffix `{0}` cannot be used with float literals")]
IntSuffixOnFloat(String),
#[error("float suffix `{0}` cannot be used with non-decimal integers")]
FloatSuffixOnNonDecimal(String),
#[error("numeric literal cannot start with underscore")]
LeadingUnderscore,
#[error("numeric literal cannot end with underscore")]
TrailingUnderscore,
#[error("consecutive underscores in numeric literal")]
ConsecutiveUnderscores,
#[error("unterminated block comment")]
UnterminatedBlockComment {
depth: u32,
},
#[error("invalid documentation comment")]
InvalidDocComment,
#[error("invalid character `{0}` in identifier")]
InvalidIdentChar(char),
#[error("expected identifier after `r#`")]
ExpectedRawIdent,
#[error("`{0}` cannot be used as a raw identifier")]
CannotBeRawIdent(String),
#[error("expected lifetime name after `'`")]
ExpectedLifetime,
#[error("invalid lifetime name")]
InvalidLifetime,
#[error("mismatched DSL block delimiters")]
MismatchedDslDelimiters,
#[error("unterminated DSL block")]
UnterminatedDslBlock,
#[error("unterminated format string literal")]
UnterminatedFormatString,
#[error("unclosed `{{` in format string")]
UnclosedInterpolation,
#[error("empty interpolation expression in format string")]
EmptyInterpolation,
#[error("interpolation nesting too deep (max depth: {0})")]
InterpolationTooDeep(u32),
#[error("invalid format specifier `{0}`")]
InvalidFormatSpecifier(String),
#[error("shebang must be at the start of the file")]
ShebangNotAtStart,
#[error("invalid UTF-8 sequence")]
InvalidUtf8,
#[error("NUL character not allowed in source")]
NulInSource,
#[error("non-ASCII character in byte literal")]
NonAsciiInByteLiteral,
#[error("internal lexer error: {0}")]
Internal(String),
}
impl LexerErrorKind {
pub fn help(&self) -> Option<&'static str> {
match self {
LexerErrorKind::UnterminatedString => {
Some("string literals must end with a closing `\"`")
}
LexerErrorKind::UnterminatedChar => {
Some("character literals must end with a closing `'`")
}
LexerErrorKind::EmptyCharLiteral => {
Some("use `'\\0'` for a NUL character or `\"\"` for an empty string")
}
LexerErrorKind::MultipleCharsInCharLiteral => {
Some("consider using a string literal instead")
}
LexerErrorKind::UnknownEscape(_) => Some(
"valid escape sequences: \\n, \\r, \\t, \\\\, \\', \\\", \\0, \\xNN, \\u{NNNN}",
),
LexerErrorKind::UnicodeEscapeMissingBrace => {
Some("Unicode escapes use the format \\u{NNNN}")
}
LexerErrorKind::EmptyExponent => {
Some("add digits after the exponent marker (e.g., `1e10` or `1e-5`)")
}
LexerErrorKind::NoDigitsAfterPrefix(_) => Some("add digits after the radix prefix"),
LexerErrorKind::LeadingUnderscore => {
Some("remove the leading underscore or use an identifier")
}
LexerErrorKind::ConsecutiveUnderscores => {
Some("use only a single underscore as a separator")
}
LexerErrorKind::NonAsciiInByteLiteral => {
Some("use a \\xNN escape sequence for non-ASCII bytes")
}
_ => None,
}
}
pub fn is_recoverable(&self) -> bool {
!matches!(
self,
LexerErrorKind::InvalidUtf8 | LexerErrorKind::Internal(_)
)
}
}
#[derive(Debug, Clone, Default)]
pub struct LexerErrors {
errors: Vec<LexerError>,
}
impl LexerErrors {
pub fn new() -> Self {
Self::default()
}
pub fn push(&mut self, error: LexerError) {
self.errors.push(error);
}
pub fn emit(&mut self, kind: LexerErrorKind, span: Span) {
self.push(LexerError::new(kind, span));
}
pub fn is_empty(&self) -> bool {
self.errors.is_empty()
}
pub fn len(&self) -> usize {
self.errors.len()
}
pub fn errors(&self) -> &[LexerError] {
&self.errors
}
pub fn into_errors(self) -> Vec<LexerError> {
self.errors
}
pub fn iter(&self) -> impl Iterator<Item = &LexerError> {
self.errors.iter()
}
}
impl IntoIterator for LexerErrors {
type Item = LexerError;
type IntoIter = std::vec::IntoIter<LexerError>;
fn into_iter(self) -> Self::IntoIter {
self.errors.into_iter()
}
}
impl<'a> IntoIterator for &'a LexerErrors {
type Item = &'a LexerError;
type IntoIter = std::slice::Iter<'a, LexerError>;
fn into_iter(self) -> Self::IntoIter {
self.errors.iter()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_error_display() {
let err = LexerError::new(LexerErrorKind::UnexpectedChar('$'), Span::dummy());
assert!(err.to_string().contains("unexpected character"));
}
#[test]
fn test_error_with_help() {
let err = LexerError::new(LexerErrorKind::UnterminatedString, Span::dummy())
.with_help("add a closing quote");
assert!(err.help.is_some());
assert!(err.to_string().contains("help"));
}
#[test]
fn test_error_collection() {
let mut errors = LexerErrors::new();
assert!(errors.is_empty());
errors.emit(LexerErrorKind::UnexpectedChar('$'), Span::dummy());
errors.emit(LexerErrorKind::UnterminatedString, Span::dummy());
assert_eq!(errors.len(), 2);
}
}