mod keyword;
use mago_database::file::FileId;
use mago_database::file::HasFileId;
use mago_span::Position;
use mago_syntax_core::float_exponent;
use mago_syntax_core::float_separator;
use mago_syntax_core::input::Input;
use mago_syntax_core::number_sign;
use mago_syntax_core::part_of_identifier;
use mago_syntax_core::start_of_binary_number;
use mago_syntax_core::start_of_float_number;
use mago_syntax_core::start_of_hexadecimal_number;
use mago_syntax_core::start_of_identifier;
use mago_syntax_core::start_of_octal_number;
use mago_syntax_core::start_of_octal_or_float_number;
use mago_syntax_core::utils::read_digits_of_base;
use crate::error::SyntaxError;
use crate::token::TypeToken;
use crate::token::TypeTokenKind;
#[derive(Debug)]
pub struct TypeLexer<'arena> {
input: Input<'arena>,
}
impl<'arena> TypeLexer<'arena> {
#[inline]
#[must_use]
pub fn new(input: Input<'arena>) -> TypeLexer<'arena> {
TypeLexer { input }
}
#[inline]
#[must_use]
pub fn has_reached_eof(&self) -> bool {
self.input.has_reached_eof()
}
#[inline]
#[must_use]
pub fn current_position(&self) -> Position {
self.input.current_position()
}
#[inline]
#[must_use]
pub fn slice_in_range(&self, from: u32, to: u32) -> &'arena [u8] {
self.input.slice_in_range(from, to)
}
#[inline]
pub fn advance(&mut self) -> Option<Result<TypeToken<'arena>, SyntaxError>> {
if self.input.has_reached_eof() {
return None;
}
let start = self.input.current_position();
let whitespaces = self.input.consume_whitespaces();
if !whitespaces.is_empty() {
let end = self.input.current_position();
return Some(Ok(self.token(TypeTokenKind::Whitespace, whitespaces, start, end)));
}
let remaining = self.input.read_remaining();
let first = unsafe { *remaining.get_unchecked(0) };
let second = remaining.get(1).copied();
let (kind, length) = match first {
b'*' => (TypeTokenKind::Asterisk, 1),
b':' => {
if second == Some(b':') {
(TypeTokenKind::ColonColon, 2)
} else {
(TypeTokenKind::Colon, 1)
}
}
b'=' => (TypeTokenKind::Equals, 1),
b'?' => (TypeTokenKind::Question, 1),
b'!' => (TypeTokenKind::Exclamation, 1),
b'&' => (TypeTokenKind::Ampersand, 1),
b'|' => (TypeTokenKind::Pipe, 1),
b'>' => (TypeTokenKind::GreaterThan, 1),
b'<' => (TypeTokenKind::LessThan, 1),
b'(' => (TypeTokenKind::LeftParenthesis, 1),
b')' => (TypeTokenKind::RightParenthesis, 1),
b'[' => (TypeTokenKind::LeftBracket, 1),
b']' => (TypeTokenKind::RightBracket, 1),
b'{' => (TypeTokenKind::LeftBrace, 1),
b'}' => (TypeTokenKind::RightBrace, 1),
b',' => (TypeTokenKind::Comma, 1),
b'+' => (TypeTokenKind::Plus, 1),
b'-' => (TypeTokenKind::Minus, 1),
b'.' => match remaining.get(..3) {
Some([b'.', b'.', b'.']) => (TypeTokenKind::Ellipsis, 3),
_ if matches!(second, Some(b'0'..=b'9')) => self.read_decimal(),
_ => {
return Some(Err(SyntaxError::UnrecognizedToken(
self.file_id(),
first,
self.input.current_position(),
)));
}
},
b'/' if second == Some(b'/') => self.read_single_line_comment(),
b'\'' | b'"' => self.read_literal_string(first),
b'\\' if second.is_some_and(|b| b.is_ascii_alphabetic() || b == b'_' || b >= 0x80) => {
self.read_fully_qualified_identifier()
}
b'$' if second.is_some_and(|b| b.is_ascii_alphabetic() || b == b'_' || b >= 0x80) => self.read_variable(),
b'0'..=b'9' => self.read_number(),
b if b.is_ascii_alphabetic() || b == b'_' || b >= 0x80 => self.read_identifier_or_keyword(),
_ => {
return Some(Err(SyntaxError::UnrecognizedToken(self.file_id(), first, self.input.current_position())));
}
};
let buffer = self.input.consume(length);
let end = self.input.current_position();
Some(Ok(self.token(kind, buffer, start, end)))
}
#[inline]
fn read_variable(&self) -> (TypeTokenKind, usize) {
let mut length = 2;
while let [part_of_identifier!(), ..] = self.input.peek(length, 1) {
length += 1;
}
(TypeTokenKind::Variable, length)
}
#[inline]
fn read_single_line_comment(&self) -> (TypeTokenKind, usize) {
let mut length = 2;
loop {
match self.input.peek(length, 1) {
[b'\n', ..] | [] => break,
[_, ..] => length += 1,
}
}
(TypeTokenKind::SingleLineComment, length)
}
#[inline]
fn read_decimal(&self) -> (TypeTokenKind, usize) {
let mut length = read_digits_of_base(&self.input, 2, 10);
if let float_exponent!() = self.input.peek(length, 1) {
length += 1;
if let number_sign!() = self.input.peek(length, 1) {
length += 1;
}
length = read_digits_of_base(&self.input, length, 10);
}
(TypeTokenKind::LiteralFloat, length)
}
#[inline]
fn read_number(&self) -> (TypeTokenKind, usize) {
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum NumberKind {
Integer,
Float,
OctalOrFloat,
IntegerOrFloat,
}
let mut length = 1;
let (base, kind): (u8, NumberKind) = match self.input.read(3) {
start_of_binary_number!() => {
length += 1;
(2, NumberKind::Integer)
}
start_of_octal_number!() => {
length += 1;
(8, NumberKind::Integer)
}
start_of_hexadecimal_number!() => {
length += 1;
(16, NumberKind::Integer)
}
start_of_octal_or_float_number!() => (10, NumberKind::OctalOrFloat),
start_of_float_number!() => (10, NumberKind::Float),
_ => (10, NumberKind::IntegerOrFloat),
};
if kind != NumberKind::Float {
length = read_digits_of_base(&self.input, length, base);
if kind == NumberKind::Integer {
return (TypeTokenKind::LiteralInteger, length);
}
}
let is_float = matches!(self.input.peek(length, 3), float_separator!());
if !is_float {
return (TypeTokenKind::LiteralInteger, length);
}
if let [b'.'] = self.input.peek(length, 1) {
length += 1;
length = read_digits_of_base(&self.input, length, 10);
}
if let float_exponent!() = self.input.peek(length, 1) {
let mut exp_length = length + 1;
if let number_sign!() = self.input.peek(exp_length, 1) {
exp_length += 1;
}
let after_exp = read_digits_of_base(&self.input, exp_length, 10);
if after_exp > exp_length {
length = after_exp;
}
}
(TypeTokenKind::LiteralFloat, length)
}
#[inline]
fn read_literal_string(&self, quote: u8) -> (TypeTokenKind, usize) {
let total = self.input.len();
let start = self.input.current_offset();
let mut length = 1;
let mut last_was_backslash = false;
let mut partial = false;
loop {
let pos = start + length;
if pos >= total {
partial = true;
break;
}
let byte = self.input.read_at(pos);
if *byte == b'\\' {
last_was_backslash = !last_was_backslash;
length += 1;
} else {
if byte == "e && !last_was_backslash {
length += 1;
break;
}
length += 1;
last_was_backslash = false;
}
}
if partial { (TypeTokenKind::PartialLiteralString, length) } else { (TypeTokenKind::LiteralString, length) }
}
#[inline]
fn read_fully_qualified_identifier(&self) -> (TypeTokenKind, usize) {
let mut length = 2;
let mut last_was_slash = false;
loop {
match self.input.peek(length, 1) {
[start_of_identifier!(), ..] if last_was_slash => {
length += 1;
last_was_slash = false;
}
[part_of_identifier!(), ..] if !last_was_slash => {
length += 1;
}
[b'\\', ..] => {
if last_was_slash {
length -= 1;
break;
}
length += 1;
last_was_slash = true;
}
_ => break,
}
}
(TypeTokenKind::FullyQualifiedIdentifier, length)
}
#[inline]
fn read_identifier_or_keyword(&self) -> (TypeTokenKind, usize) {
let remaining = self.input.read_remaining();
let total = remaining.len();
let mut length = 1;
let mut next_is_hyphen = false;
let mut next_is_backslash = false;
while length < total {
let b = unsafe { *remaining.get_unchecked(length) };
if mago_syntax_core::utils::is_part_of_identifier(&b) {
length += 1;
continue;
}
if b == b'-' && length + 1 < total {
let b2 = unsafe { *remaining.get_unchecked(length + 1) };
if mago_syntax_core::utils::is_part_of_identifier(&b2) {
next_is_hyphen = true;
}
} else if b == b'\\' && length + 1 < total {
let b2 = unsafe { *remaining.get_unchecked(length + 1) };
if mago_syntax_core::utils::is_start_of_identifier(&b2) {
next_is_backslash = true;
}
} else {
}
break;
}
if next_is_backslash {
return self.finish_qualified_identifier(length);
}
if !next_is_hyphen {
let bytes = unsafe { remaining.get_unchecked(..length) };
if let Some(kind) = keyword::lookup_keyword(bytes) {
return (kind, length);
}
return (TypeTokenKind::Identifier, length);
}
let base_len = length;
while length < total {
let b = unsafe { *remaining.get_unchecked(length) };
if mago_syntax_core::utils::is_part_of_identifier(&b) {
length += 1;
continue;
}
if b == b'-' && length + 1 < total {
let b2 = unsafe { *remaining.get_unchecked(length + 1) };
if mago_syntax_core::utils::is_part_of_identifier(&b2) {
length += 1;
continue;
}
}
break;
}
let bytes = unsafe { remaining.get_unchecked(..length) };
if let Some(kind) = keyword::lookup_keyword(bytes) {
return (kind, length);
}
let base_bytes = unsafe { remaining.get_unchecked(..base_len) };
if let Some(kind) = keyword::lookup_keyword(base_bytes) {
return (kind, base_len);
}
(TypeTokenKind::Identifier, base_len)
}
#[inline]
fn finish_qualified_identifier(&self, start_len: usize) -> (TypeTokenKind, usize) {
let mut length = start_len;
let mut slashes = 0;
let mut last_was_slash = false;
loop {
match self.input.peek(length, 1) {
[start_of_identifier!(), ..] if last_was_slash => {
length += 1;
last_was_slash = false;
}
[part_of_identifier!(), ..] if !last_was_slash => {
length += 1;
}
[b'\\', ..] => {
if last_was_slash {
length -= 1;
slashes -= 1;
break;
}
length += 1;
slashes += 1;
last_was_slash = true;
}
_ => break,
}
}
if last_was_slash {
length -= 1;
slashes -= 1;
}
if slashes > 0 { (TypeTokenKind::QualifiedIdentifier, length) } else { (TypeTokenKind::Identifier, length) }
}
#[inline]
fn token(&self, kind: TypeTokenKind, value: &'arena [u8], start: Position, _end: Position) -> TypeToken<'arena> {
TypeToken { kind, start, value }
}
}
impl HasFileId for TypeLexer<'_> {
#[inline]
fn file_id(&self) -> FileId {
self.input.file_id()
}
}