use std::str::Chars;
use super::{lexeme::Lexeme, DELIMITERS, WHITESPACE};
#[derive(Debug, Clone)]
pub struct Source<'source> {
chars: Chars<'source>,
source: &'source str,
current: usize,
line: usize,
column: usize,
}
impl<'source> Source<'source> {
pub fn lex(&mut self) -> Vec<Lexeme> {
let mut lexemes = vec![];
loop {
match self.peek() {
None => break,
Some(c) if DELIMITERS.contains(c) => lexemes.push(
self.consume()
.expect("There should be a delimiter character here to be consumed"),
),
Some(c) if WHITESPACE.contains(c) => self.ignore_whitespace(),
Some('"') => lexemes.push(
self.consume_string()
.expect("There should be a valid string lexeme here"),
),
Some(';') => {
self.consume_while(|c| c != '\n');
}
_ => lexemes.push(
self.consume_lexeme()
.expect("There should be a valid lexeme here to be consumed"),
),
}
}
lexemes
}
fn peek(&self) -> Option<char> {
self.chars.clone().next()
}
fn consume(&mut self) -> Option<Lexeme> {
let start = self.current;
let line = self.line;
let column = self.column;
let next = self.chars.next();
if let Some(c) = next {
self.current += c.len_utf8();
if c == '\n' {
self.line += 1;
self.column = 1;
} else {
self.column += 1;
}
Some(Lexeme {
start,
end: self.current,
line,
column,
})
} else {
None
}
}
fn consume_while<F: Fn(char) -> bool>(&mut self, f: F) -> Option<Lexeme> {
let start = self.current;
let line = self.line;
let column = self.column;
while let Some(c) = self.peek() {
if f(c) {
self.consume();
} else {
break;
}
}
if start == self.current {
None
} else {
Some(Lexeme {
start,
end: self.current,
line,
column,
})
}
}
fn ignore_whitespace(&mut self) {
self.consume_while(|c| WHITESPACE.contains(c));
}
fn consume_lexeme(&mut self) -> Option<Lexeme> {
self.consume_while(|c| !DELIMITERS.contains(c) && !WHITESPACE.contains(c))
}
fn consume_string(&mut self) -> Option<Lexeme> {
let start = self.current;
let line = self.line;
let column = self.column;
if let Some('"') = self.peek() {
self.consume();
} else {
return None;
}
while let Some(c) = self.peek() {
match c {
'"' => {
self.consume();
break;
}
'\\' => {
self.consume();
self.consume();
}
_ => {
self.consume();
}
}
}
Some(Lexeme {
start,
end: self.current,
line,
column,
})
}
pub fn span(&self, lexeme: Lexeme) -> &'source str {
&self.source[lexeme.start..lexeme.end]
}
}
impl<'source> From<&'source str> for Source<'source> {
fn from(value: &'source str) -> Self {
Self {
chars: value.chars(),
source: value,
current: 0,
line: 1,
column: 1,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn ignore_whitespace() {
let mut source: Source = " \t\n\r,()[]{}'\"abc".into();
source.ignore_whitespace();
assert_eq!(&source.source[source.current..], "()[]{}'\"abc");
}
#[test]
fn lexemes_no_string() {
let mut source: Source = "(defn hello 123)".into();
let lexemes = source.lex();
let tokens = lexemes
.iter()
.map(|l| source.span(*l))
.collect::<Vec<&str>>();
assert_eq!(tokens, vec!["(", "defn", "hello", "123", ")"])
}
#[test]
fn string_lexemes() {
let mut source: Source = "(def msg \"Hello World\")".into();
let lexemes = source.lex();
let tokens = lexemes
.iter()
.map(|l| source.span(*l))
.collect::<Vec<&str>>();
assert_eq!(tokens, vec!["(", "def", "msg", "\"Hello World\"", ")"])
}
#[test]
fn line_count() {
let mut source: Source = "(defn\nhello\n123)".into();
let lexemes = source.lex();
let tokens = lexemes.iter().map(|l| l.position()).collect::<Vec<_>>();
assert_eq!(tokens, vec![(1, 1), (1, 2), (2, 1), (3, 1), (3, 4)])
}
#[test]
fn commentary() {
let mut source: Source = ";; A comment\n(defn hello 123) ; Another comment".into();
let lexemes = source.lex();
assert_eq!(
lexemes
.iter()
.map(|l| source.span(*l))
.collect::<Vec<&str>>(),
vec!["(", "defn", "hello", "123", ")"]
);
}
}