use super::Lexer;
use crate::ast::Token;
impl Lexer {
pub(super) fn next_token(&mut self) -> crate::error::Result<Token> {
let _ = self.skip_hspace();
let Some(b) = self.peek_byte() else {
return Ok(Token::Sof); };
let ch = b as char;
if ch.is_ascii_alphabetic() || ch == '_' {
return Ok(self.parse_ident_or_keyword());
}
match ch {
'{' => Ok(self.single(Token::BraceOpen)),
'}' => Ok(self.single(Token::BraceClose)),
'[' => Ok(self.single(Token::BrackOpen)),
']' => Ok(self.single(Token::BrackClose)),
'(' => Ok(self.single(Token::ParenOpen)),
')' => Ok(self.single(Token::ParenClose)),
'=' => Ok(self.try_two_char('=', Token::Equal, Token::Assign)),
'@' => Ok(self.single(Token::At)),
':' => Ok(self.single(Token::Colon)),
',' => Ok(self.single(Token::Comma)),
';' => Ok(self.single(Token::Semicolon)),
'?' => Ok(self.single(Token::Question)),
'.' => Ok(self.parse_dot_token()),
'+' => Ok(self.try_two_char('+', Token::Concat, Token::Plus)),
'-' => Ok(self.try_two_char('>', Token::Implies, Token::Minus)),
'*' => Ok(self.single(Token::Mul)),
'/' => Ok(self.try_two_char('/', Token::Update, Token::Div)),
'!' => Ok(self.try_two_char('=', Token::Unequal, Token::Not)),
'<' if self.peek_ahead(1).is_some_and(char::is_alphanumeric) => self.parse_env_path(),
'<' => {
self.advance();
Ok(match self.peek() {
Some('=') => self.single(Token::LessEqual),
Some('|') => self.single(Token::PipeBackward),
_ => Token::Less,
})
}
'>' => Ok(self.try_two_char('=', Token::GreaterEqual, Token::Greater)),
'&' => {
if self.at("&&") {
self.advance_by(2);
Ok(Token::And)
} else {
self.err_unexpected(&["'&&'"], "'&'")
}
}
'|' => {
if self.at("||") {
self.advance_by(2);
Ok(Token::Or)
} else if self.at("|>") {
self.advance_by(2);
Ok(Token::PipeForward)
} else {
self.err_unexpected(&["'||'", "'|>'"], "'|'")
}
}
'"' => Ok(self.single(Token::DoubleQuote)),
'\'' => {
if self.at("''") {
self.advance_by(2);
Ok(Token::DoubleSingleQuote)
} else {
self.err_unexpected(&["''"], "'")
}
}
'$' => {
if self.at("${") {
self.advance_by(2);
Ok(Token::InterOpen)
} else {
self.err_unexpected(&["'${'"], "'$'")
}
}
'0'..='9' => Ok(self.parse_number()),
'~' => Ok(self.single(Token::Tilde)),
_ => {
let ch = self.peek().unwrap();
self.err_unexpected(&[], &format!("'{ch}'"))
}
}
}
fn parse_ident_or_keyword(&mut self) -> Token {
let len = self
.take_ascii_while(|b| b.is_ascii_alphanumeric() || matches!(b, b'_' | b'-' | b'\''))
.len();
let start_byte = self.byte_pos - len;
let bytes = self.source.as_bytes();
let text = &self.source[start_byte..self.byte_pos];
match (len, bytes[start_byte]) {
(6, b'a') if text == "assert" => Token::Assert,
(4, b'e') if text == "else" => Token::Else,
(2, b'i') if text == "if" => Token::If,
(2, b'i') if text == "in" => Token::In,
(7, b'i') if text == "inherit" => Token::Inherit,
(3, b'l') if text == "let" => Token::Let,
(3, b'r') if text == "rec" => Token::Rec,
(4, b't') if text == "then" => Token::Then,
(4, b'w') if text == "with" => Token::With,
_ => Token::Identifier(text.into()),
}
}
fn parse_dot_token(&mut self) -> Token {
if self.at("...") {
self.advance_by(3);
Token::Ellipsis
} else if self.peek_ahead(1).is_some_and(|c| c.is_ascii_digit()) {
self.advance();
let mut num = String::from(".");
num.push_str(&self.consume_digits());
if let Some(exp) = self.parse_exponent() {
num.push_str(&exp);
}
Token::Float(num.into())
} else {
self.advance();
Token::Dot
}
}
fn parse_env_path(&mut self) -> crate::error::Result<Token> {
let opening_span = self.current_pos();
self.advance();
let mut path = String::new();
while let Some(ch) = self.peek() {
match ch {
'>' => {
self.advance();
return Ok(Token::EnvPath(path.into()));
}
_ if ch.is_alphanumeric() || matches!(ch, '_' | '-' | '/' | '.') => {
path.push(self.advance().unwrap());
}
_ => {
return Err(crate::error::ParseError {
span: self.current_pos(),
kind: crate::error::ErrorKind::InvalidSyntax {
description: format!("invalid character '{ch}' in path"),
hint: Some("paths can only contain alphanumeric characters, '.', '_', '-', and '/'".to_string()),
},
});
}
}
}
Err(crate::error::ParseError {
span: self.current_pos(),
kind: crate::error::ErrorKind::UnclosedDelimiter {
delimiter: '<',
opening_span,
},
})
}
#[cold]
fn err_unexpected<T>(&self, expected: &[&str], found: &str) -> crate::error::Result<T> {
Err(crate::error::ParseError {
span: self.current_pos(),
kind: crate::error::ErrorKind::UnexpectedToken {
expected: expected
.iter()
.map(std::string::ToString::to_string)
.collect(),
found: found.to_string(),
},
})
}
fn try_two_char(&mut self, second: char, if_match: Token, if_single: Token) -> Token {
self.advance();
if self.peek() == Some(second) {
self.advance();
if_match
} else {
if_single
}
}
#[inline]
fn single(&mut self, tok: Token) -> Token {
self.advance();
tok
}
}