#![cfg_attr(coverage_nightly, coverage(off))]
use super::types::{RuchyToken, KEYWORD_MAP, SINGLE_CHAR_TOKEN_MAP};
pub struct RuchyLexer {
input: String,
position: usize,
current_char: Option<char>,
line: u32,
column: u32,
}
impl RuchyLexer {
#[must_use]
pub fn new(input: String) -> Self {
let lexer = Self {
input: input.clone(),
position: 0,
current_char: input.chars().next(),
line: 1,
column: 1,
};
lexer
}
fn advance(&mut self) {
if let Some(ch) = self.current_char {
if ch == '\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
}
self.position += 1;
self.current_char = self.input.chars().nth(self.position);
}
fn peek(&self) -> Option<char> {
self.input.chars().nth(self.position + 1)
}
fn skip_whitespace(&mut self) {
while let Some(ch) = self.current_char {
if ch.is_whitespace() {
self.advance();
} else {
break;
}
}
}
fn skip_comment(&mut self) {
if self.current_char == Some('/') && self.peek() == Some('/') {
while self.current_char.is_some() && self.current_char != Some('\n') {
self.advance();
}
}
}
fn read_identifier(&mut self) -> String {
let mut result = String::new();
while let Some(ch) = self.current_char {
if ch.is_alphanumeric() || ch == '_' {
result.push(ch);
self.advance();
} else {
break;
}
}
result
}
fn read_number(&mut self) -> RuchyToken {
let mut num_str = String::new();
let mut is_float = false;
while let Some(ch) = self.current_char {
if ch.is_numeric() {
num_str.push(ch);
self.advance();
} else if ch == '.' && !is_float && self.peek().is_some_and(char::is_numeric) {
is_float = true;
num_str.push(ch);
self.advance();
} else if (ch == 'e' || ch == 'E') && !num_str.contains('e') && !num_str.contains('E') {
num_str.push(ch);
self.advance();
if let Some(sign) = self.current_char {
if sign == '+' || sign == '-' {
num_str.push(sign);
self.advance();
}
}
} else {
break;
}
}
if is_float {
RuchyToken::Float(num_str.parse().unwrap_or(0.0))
} else {
RuchyToken::Integer(num_str.parse().unwrap_or(0))
}
}
fn read_string(&mut self, quote: char) -> String {
let mut result = String::new();
self.advance();
while let Some(ch) = self.current_char {
if ch == quote {
self.advance(); break;
} else if ch == '\\' {
self.advance();
if let Some(escaped) = self.current_char {
match escaped {
'n' => result.push('\n'),
't' => result.push('\t'),
'r' => result.push('\r'),
'\\' => result.push('\\'),
'"' => result.push('"'),
'\'' => result.push('\''),
_ => {
result.push('\\');
result.push(escaped);
}
}
self.advance();
}
} else {
result.push(ch);
self.advance();
}
}
result
}
pub fn next_token(&mut self) -> RuchyToken {
self.skip_whitespace();
self.skip_comment();
match self.current_char {
None => RuchyToken::Eof,
Some(ch) if ch.is_alphabetic() || ch == '_' => self.handle_identifier(),
Some(ch) if ch.is_numeric() => self.read_number(),
Some('"') => {
let s = self.read_string('"');
RuchyToken::String(s)
}
Some('\'') => self.handle_char_literal(),
Some(ch) => self.handle_operator_or_punctuation(ch),
}
}
fn handle_identifier(&mut self) -> RuchyToken {
let ident = self.read_identifier();
KEYWORD_MAP
.get(ident.as_str())
.cloned()
.unwrap_or(RuchyToken::Identifier(ident))
}
fn handle_char_literal(&mut self) -> RuchyToken {
self.advance();
let ch = self.current_char.unwrap_or('\0');
self.advance();
if self.current_char == Some('\'') {
self.advance();
}
RuchyToken::Char(ch)
}
fn handle_operator_or_punctuation(&mut self, ch: char) -> RuchyToken {
if let Some(token) = SINGLE_CHAR_TOKEN_MAP.get(&ch) {
return self.handle_single_char_token(token.clone());
}
match ch {
'-' => self.handle_dash(),
'/' => self.handle_slash(),
'=' => self.handle_equals(),
'|' => self.handle_pipe(),
'&' => self.handle_ampersand(),
'@' => self.handle_annotation(),
'.' => self.handle_dot(),
':' => self.handle_colon(),
'!' => self.handle_exclamation(),
'<' => self.handle_less_than(),
'>' => self.handle_greater_than(),
_ => {
self.advance();
RuchyToken::Error
}
}
}
fn handle_single_char_token(&mut self, token: RuchyToken) -> RuchyToken {
self.advance();
token
}
fn handle_dash(&mut self) -> RuchyToken {
self.advance();
if self.current_char == Some('>') {
self.advance();
RuchyToken::Arrow
} else {
RuchyToken::Minus
}
}
fn handle_slash(&mut self) -> RuchyToken {
self.advance();
if self.current_char == Some('/') {
self.skip_comment();
self.next_token()
} else {
RuchyToken::Slash
}
}
fn handle_equals(&mut self) -> RuchyToken {
self.advance();
match self.current_char {
Some('=') => {
self.advance();
RuchyToken::EqualEqual
}
Some('>') => {
self.advance();
RuchyToken::FatArrow
}
_ => RuchyToken::Equal,
}
}
fn handle_pipe(&mut self) -> RuchyToken {
self.advance();
match self.current_char {
Some('>') => {
self.advance();
RuchyToken::PipeForward
}
Some('|') => {
self.advance();
RuchyToken::Or
}
_ => RuchyToken::Identifier("|".to_string()),
}
}
fn handle_ampersand(&mut self) -> RuchyToken {
self.advance();
if self.current_char == Some('&') {
self.advance();
RuchyToken::And
} else {
RuchyToken::Identifier("&".to_string())
}
}
fn handle_annotation(&mut self) -> RuchyToken {
self.advance();
let ident = self.read_identifier();
RuchyToken::Annotation(format!("@{ident}"))
}
fn handle_dot(&mut self) -> RuchyToken {
self.advance();
if self.current_char == Some('.') {
self.advance();
if self.current_char == Some('.') {
self.advance();
RuchyToken::DotDotDot
} else {
RuchyToken::DotDot
}
} else {
RuchyToken::Dot
}
}
fn handle_colon(&mut self) -> RuchyToken {
self.advance();
if self.current_char == Some(':') {
self.advance();
RuchyToken::DoubleColon
} else {
RuchyToken::Colon
}
}
fn handle_exclamation(&mut self) -> RuchyToken {
self.advance();
if self.current_char == Some('=') {
self.advance();
RuchyToken::NotEqual
} else {
RuchyToken::Not
}
}
fn handle_less_than(&mut self) -> RuchyToken {
self.advance();
match self.current_char {
Some('=') => {
self.advance();
RuchyToken::LessEqual
}
Some('<') => {
self.advance();
RuchyToken::LeftShift
}
_ => RuchyToken::Less,
}
}
fn handle_greater_than(&mut self) -> RuchyToken {
self.advance();
match self.current_char {
Some('=') => {
self.advance();
RuchyToken::GreaterEqual
}
Some('>') => {
self.advance();
RuchyToken::RightShift
}
_ => RuchyToken::Greater,
}
}
}