use std::ops::Range;
pub const IMAGINARY_UNIT: char = 'i';
#[derive(Debug, Clone, Copy, PartialEq)]
pub struct Span {
start: usize,
end: usize,
}
impl Span {
pub fn new() -> Self
{
Self { start: 0, end: 0 }
}
pub fn start(&self) -> usize
{
self.start
}
pub fn end(&self) -> usize
{
self.end
}
}
impl Default for Span {
fn default() -> Self {
Self::new()
}
}
impl From<Range<usize>> for Span {
fn from(value: Range<usize>) -> Self {
Self {
start: value.start,
end: value.end,
}
}
}
impl std::fmt::Display for Span {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{start}..{end}", start = self.start, end = self.end)
}
}
#[derive(Debug, Clone, PartialEq)]
pub(crate) struct Lexeme<'a> {
text: &'a str,
span: Span,
}
impl<'a> Lexeme<'a> {
pub(crate) fn new(text: &'a str, span: Range<usize>) -> Self {
Self {
text,
span: Span::from(span),
}
}
pub(crate) fn text(&self) -> &str {
self.text
}
pub(crate) fn span(&self) -> Span {
self.span
}
}
impl<'a> std::fmt::Display for Lexeme<'a>
{
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
write!(f, "{name} [{start}, {end})", name=self.text, start=self.span.start, end=self.span.end)
}
}
pub(crate) type Lexemes<'a> = Vec<Lexeme<'a>>;
type CharIter<'a> = std::iter::Peekable<std::str::CharIndices<'a>>;
fn parse_ident(start_idx: usize, chars: &mut CharIter) -> usize
{
let mut end = start_idx + 1;
while let Some(&(_, ch)) = chars.peek() {
if ch.is_alphanumeric() || ch == '_' {
let (idx, ch) = chars.next().unwrap();
end = idx + ch.len_utf8();
} else {
break;
}
}
end
}
fn parse_number(start_idx: usize, chars: &mut CharIter) -> usize
{
let mut end = start_idx + 1;
let mut seen_e = false;
while let Some(&(_, ch)) = chars.peek() {
let accept = match ch {
d if d.is_ascii_digit() || d == '.' => true,
'e' | 'E' if !seen_e => { seen_e = true; true },
'+' | '-' if seen_e => true,
IMAGINARY_UNIT => {
let (idx, ch) = chars.next().unwrap();
end = idx + ch.len_utf8();
break;
},
_ => false,
};
if accept {
let (idx, ch) = chars.next().unwrap();
end = idx + ch.len_utf8();
} else {
break;
}
}
end
}
pub(crate) fn from<'a>(input: &'a str) -> Lexemes<'a> {
let mut lexemes = Lexemes::default();
let mut chars = input.char_indices().peekable();
while let Some((start_idx, ch)) = chars.next() {
if ch.is_whitespace() {
continue;
}
let end_idx = match ch {
'0'..='9' | '.' => parse_number(start_idx, &mut chars),
'a'..='z' | 'A'..='Z' | '_' => parse_ident(start_idx, &mut chars),
_ => start_idx + ch.len_utf8(),
};
if start_idx < end_idx {
lexemes.push(Lexeme::new(&input[start_idx..end_idx], start_idx..end_idx));
}
}
lexemes
}
#[cfg(test)]
mod tests {
use super::*;
fn lex_texts(input: &str) -> Vec<String> {
from(input).iter().map(|l| l.text().to_string()).collect()
}
#[test]
fn test_empty_and_whitespace() {
let lexemes = from("");
assert!(lexemes.is_empty());
let lexemes = from(" \t\n ");
assert!(lexemes.is_empty());
}
#[test]
fn test_identifiers() {
assert_eq!(lex_texts("x"), vec!["x"]);
assert_eq!(lex_texts("var_1"), vec!["var_1"]);
assert_eq!(lex_texts("a b_c D1"), vec!["a", "b_c", "D1"]);
}
#[test]
fn test_numbers() {
assert_eq!(lex_texts("123"), vec!["123"]);
assert_eq!(lex_texts("3.14"), vec!["3.14"]);
assert_eq!(lex_texts("1e10"), vec!["1e10"]);
assert_eq!(lex_texts("2E-3"), vec!["2E-3"]);
assert_eq!(lex_texts("5.0e+2"), vec!["5.0e+2"]);
assert_eq!(lex_texts("x1 2.0"), vec!["x1", "2.0"]);
}
#[test]
fn test_imaginary_numbers() {
assert_eq!(lex_texts("i"), vec!["i"]);
assert_eq!(lex_texts("3i"), vec!["3i"]);
assert_eq!(lex_texts("3.14i"), vec!["3.14i"]);
assert_eq!(lex_texts("2e10i"), vec!["2e10i"]);
}
#[test]
fn test_single_char_tokens() {
assert_eq!(lex_texts("()+-*/^,"), vec!["(", ")", "+", "-", "*", "/", "^", ","]);
}
#[test]
fn test_mixed_expression() {
let expr = "sin(x) + 3.0i - var_1 / 2e-3";
let expected = vec!["sin", "(", "x", ")", "+", "3.0i", "-", "var_1", "/", "2e-3"];
assert_eq!(lex_texts(expr), expected);
}
#[test]
fn test_boundary_values() {
assert_eq!(lex_texts("0001 0.0"), vec!["0001", "0.0"]);
assert_eq!(lex_texts("1e-100 1e+100"), vec!["1e-100", "1e+100"]);
assert_eq!(lex_texts(".5 0.5"), vec![".5", "0.5"]);
}
#[test]
fn test_invalid_cases() {
assert_eq!(lex_texts("@#%"), vec!["@", "#", "%"]);
assert_eq!(lex_texts("x$3i"), vec!["x", "$", "3i"]);
assert_eq!(lex_texts("++--**//"), vec!["+", "+", "-", "-", "*", "*", "/", "/"]);
}
#[test]
fn test_whitespace_sensitivity() {
assert_eq!(lex_texts(" x + 3 "), vec!["x", "+", "3"]);
assert_eq!(lex_texts("\t\na\tb\n"), vec!["a", "b"]);
}
#[test]
fn test_identifier_and_number_boundary() {
assert_eq!(lex_texts("var123"), vec!["var123"]);
assert_eq!(lex_texts("123abc"), vec!["123", "abc"]);
}
#[test]
fn test_signed_numbers_and_operators() {
assert_eq!(lex_texts("+ -"), vec!["+", "-"]);
assert_eq!(lex_texts("+3 -4.5 -2e10 +0.1"), vec!["+", "3", "-", "4.5", "-", "2e10", "+", "0.1"]);
assert_eq!(lex_texts("x+3 y-2"), vec!["x", "+", "3", "y", "-", "2"]);
assert_eq!(lex_texts("x+-y"), vec!["x", "+", "-", "y"]);
}
}