use crate::prelude::*;
use crate::token::Context;
use crate::token::DelimiterKind;
use crate::token::IdentifierKind;
use crate::token::LiteralKind;
use crate::token::Location;
use crate::token::Position;
use crate::token::TokenKind;
#[derive(Default)]
pub struct Lexer {
index: usize,
column: usize,
line: usize,
}
impl Lexer {
fn index(&self) -> usize {
self.index
}
fn column(&self) -> usize {
self.column
}
fn line(&self) -> usize {
self.line
}
fn next(&mut self) {
self.index += 1;
self.column += 1;
}
fn next_line(&mut self) {
self.line += 1;
self.column = 1;
self.index += 1;
}
fn position(&self) -> Position {
Position::new(self.line(), self.column(), self.index())
}
fn string<'a>(&mut self, source: &'a [u8]) -> Result<TokenKind<'a>> {
self.next();
let start = self.position();
while let Some(&b) = source.get(self.index()) {
match b {
b'"' => break,
b'\n' => {
return Err(Error {
description: "cannot use newline character in strings".to_string(),
})
}
_ => self.next(),
}
}
if source.get(self.index()).is_none() {
return Err(Error {
description: format!("unterminated string ({}:{})", start.line(), start.column()),
});
}
let end = self.position();
self.next();
let raw = &source[start.index()..end.index()];
Ok(TokenKind::Literal(LiteralKind::String(Context::new(
Location::new(start, end),
raw,
))))
}
fn identifier<'a>(&mut self, source: &'a [u8]) -> Result<TokenKind<'a>> {
let start = self.position();
while let Some(b) = source.get(self.index()) {
if !(b.is_ascii_alphanumeric() || *b == b'_') {
break;
}
self.next();
}
let end = self.position();
let raw = &source[start.index()..end.index()];
match raw {
b"true" => Ok(TokenKind::Literal(LiteralKind::True(Context::new(
Location::new(start, end),
raw,
)))),
b"false" => Ok(TokenKind::Literal(LiteralKind::False(Context::new(
Location::new(start, end),
raw,
)))),
b"null" => Ok(TokenKind::Literal(LiteralKind::Null(Context::new(
Location::new(start, end),
raw,
)))),
_ => Ok(TokenKind::Identifier(IdentifierKind::String(Context::new(
Location::new(start, end),
raw,
)))),
}
}
fn number<'a>(&mut self, source: &'a [u8]) -> Result<TokenKind<'a>> {
let mut point = false;
let mut zero = false;
if let Some(&b) = source.get(self.index()) {
if b == b'0' {
zero = true;
}
}
let start = self.position();
self.next();
while let Some(&b) = source.get(self.index()) {
if b.is_ascii_digit() && !zero {
self.next()
} else if b == b'.' && !point {
point = true;
zero = false;
self.next();
if let Some(b) = source.get(self.index()) {
if !b.is_ascii_digit() {
return Err(Error {
description: format!(
"decimal point must be followed with a digit, not '{}' ({}:{})",
*b as char,
self.line(),
self.column(),
),
});
}
} else {
return Err(Error {
description:
"decimal point must be followed with a digit, but no bytes left"
.to_string(),
});
}
} else {
break;
}
}
let end = self.position();
let raw = &source[start.index()..end.index()];
Ok(TokenKind::Literal(LiteralKind::Number(Context::new(
Location::new(start, end),
raw,
))))
}
fn template_string<'a>(&mut self, source: &'a [u8]) -> Result<TokenKind<'a>> {
self.next();
let start = self.position();
while let Some(&b) = source.get(self.index()) {
match b {
b'\\' => {
self.next();
self.next();
}
b'\n' => {
self.next_line();
}
b'`' => break,
_ => {
self.next();
}
}
}
if source.get(self.index()).is_none() {
return Err(Error {
description: format!(
"unterminated template string ({}:{})",
start.line(),
start.column()
),
});
}
let end = self.position();
self.next();
let raw = &source[start.index()..end.index()];
Ok(TokenKind::Literal(LiteralKind::String(Context::new(
Location::new(start, end),
raw,
))))
}
fn ignore_comment(&mut self, source: &[u8]) -> Result<()> {
let start = self.position();
self.next();
loop {
if let Some(&b) = source.get(self.index()) {
if b == b'\n' {
break;
} else {
self.next();
}
} else {
return Err(Error {
description: format!(
"unterminated comment ({}:{})",
start.line(),
start.column()
),
});
}
}
self.next_line();
Ok(())
}
fn ignore_multiline_comment(&mut self, source: &[u8]) -> Result<()> {
let start = self.position();
self.next();
loop {
if let Some(&b) = source.get(self.index()) {
if b == b'*' {
self.next();
if let Some(b'/') = source.get(self.index()) {
self.next();
break;
}
} else if b == b'\n' {
self.next_line();
} else {
self.next();
}
} else {
return Err(Error {
description: format!(
"unterminated comment ({}:{})",
start.line(),
start.column()
),
});
}
}
Ok(())
}
fn comment(&mut self, source: &[u8]) -> Result<()> {
let start = self.position();
self.next();
match source.get(self.index()) {
Some(b'/') => match self.ignore_comment(source) {
Ok(()) => Ok(()),
Err(e) => Err(e),
},
Some(b'*') => match self.ignore_multiline_comment(source) {
Ok(()) => Ok(()),
Err(e) => Err(e),
},
Some(c) => Err(Error {
description: format!(
"expected '/' or '*' not '{}' ({}:{})",
*c as char,
self.line(),
self.column()
),
}),
None => Err(Error {
description: format!(
"expected '/' or '*' but no bytes left ({}:{})",
start.line(),
start.column()
),
}),
}
}
fn object_preceder_symbol<'a>(&mut self, source: &'a [u8]) -> Result<TokenKind<'a>> {
let start = self.position();
self.next();
let end = self.position();
let raw = &source[start.index()..end.index()];
Ok(TokenKind::Delimiter(DelimiterKind::ObjectPrec(
Context::new(Location::new(start, end), raw),
)))
}
fn object_terminal_symbol<'a>(&mut self, source: &'a [u8]) -> Result<TokenKind<'a>> {
let start = self.position();
self.next();
let end = self.position();
let raw = &source[start.index()..end.index()];
Ok(TokenKind::Delimiter(DelimiterKind::ObjectTerm(
Context::new(Location::new(start, end), raw),
)))
}
fn array_preceder_symbol<'a>(&mut self, source: &'a [u8]) -> Result<TokenKind<'a>> {
let start = self.position();
self.next();
let end = self.position();
let raw = &source[start.index()..end.index()];
Ok(TokenKind::Delimiter(DelimiterKind::ArrayPrec(
Context::new(Location::new(start, end), raw),
)))
}
fn array_terminal_symbol<'a>(&mut self, source: &'a [u8]) -> Result<TokenKind<'a>> {
let start = self.position();
self.next();
let end = self.position();
let raw = &source[start.index()..end.index()];
Ok(TokenKind::Delimiter(DelimiterKind::ArrayTerm(
Context::new(Location::new(start, end), raw),
)))
}
pub fn new() -> Self {
Lexer {
index: 0,
column: 1,
line: 1,
}
}
pub fn tokenize<'a>(&mut self, source: &'a [u8]) -> Result<Vec<TokenKind<'a>>> {
let mut tokens = Vec::with_capacity(source.len());
while let Some(b) = source.get(self.index()) {
match b {
b'{' => match self.object_preceder_symbol(source) {
Ok(t) => tokens.push(t),
Err(e) => return Err(e),
},
b'}' => match self.object_terminal_symbol(source) {
Ok(t) => tokens.push(t),
Err(e) => return Err(e),
},
b'[' => match self.array_preceder_symbol(source) {
Ok(t) => tokens.push(t),
Err(e) => return Err(e),
},
b']' => match self.array_terminal_symbol(source) {
Ok(t) => tokens.push(t),
Err(e) => return Err(e),
},
b'\n' => self.next_line(),
b'\r' | b'\t' | b' ' => self.next(),
b'a'..=b'z' | b'A'..=b'Z' => match self.identifier(source) {
Ok(t) => tokens.push(t),
Err(e) => return Err(e),
},
b'"' => match self.string(source) {
Ok(t) => tokens.push(t),
Err(e) => return Err(e),
},
b'`' => match self.template_string(source) {
Ok(t) => tokens.push(t),
Err(e) => return Err(e),
},
b'0'..=b'9' | b'+' | b'-' => match self.number(source) {
Ok(t) => tokens.push(t),
Err(e) => return Err(e),
},
b'/' => match self.comment(source) {
Ok(()) => {}
Err(e) => return Err(e),
},
_ => {
return Err(Error {
description: format!(
"unrecognized character '{}' ({}:{})",
*b as char,
self.line(),
self.column(),
),
})
}
}
}
Ok(tokens)
}
}