#![allow(dead_code)]
use cursor::*;
use derive_new::*;
use std::str::FromStr;
derive_debug_partials! {
#[derive(Clone, Copy)]
enum TokenKind {
LeftParen,
RightParen,
LeftBrace,
RightBrace,
Comma,
Dot,
Minus,
Plus,
Semicolon,
Slash,
Star,
Ampersand,
VerticalBar,
Circumflex,
Bang,
BangEqual,
Equal,
EqualEqual,
Greater,
GreaterEqual,
Less,
LessEqual,
Identifier,
String,
Number,
And,
Class,
Else,
False,
Fun,
For,
If,
Nil,
Or,
Print,
Return,
Super,
This,
True,
Var,
While,
Eof,
}
#[derive(Clone)]
enum Object {
Identifier(String),
String(String),
Number(i32),
Boolean(bool),
Nil,
None,
}
}
derive_debug_partials! {
#[derive(Default, PartialOrd, Ord, Clone, Copy, new)]
struct Offset {
pub pos: usize,
pub line: usize,
pub column: usize,
}
#[derive(Default, Clone, Copy, new)]
struct Span {
pub start: Offset,
pub end: Offset,
}
}
impl From<Range<LexerExtras>> for Span {
fn from(v: Range<LexerExtras>) -> Self {
Span {
start: v.start.offset,
end: v.end.offset,
}
}
}
#[derive(PartialEq, Eq, Clone, Copy, new)]
struct SourceChunk<'s> {
pub source: &'s str, span: Span,
}
impl<'s> fmt::Debug for SourceChunk<'s> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:?}", self.as_str())
}
}
impl<'s> From<&StrCursor<'s, LexerExtras>> for SourceChunk<'s> {
fn from(cursor: &StrCursor<'s, LexerExtras>) -> Self {
SourceChunk {
source: cursor.as_str(),
span: Span::from(cursor.to_range_extras()),
}
}
}
impl<'s> SourceChunk<'s> {
pub fn as_str(&self) -> &'s str {
let Span { start, end } = self.span;
&self.source[start.pos..end.pos.saturating_add(1)]
}
}
#[derive(Debug, PartialEq, Eq, Clone, new)]
struct Token<'s> {
pub kind: TokenKind,
pub lexeme: SourceChunk<'s>,
pub literal: Object,
}
type Tokens<'s> = Vec<Token<'s>>;
#[derive(Default, Debug)]
struct LexerExtras {
prev_offset: Offset,
offset: Offset,
}
impl LexerExtras {
pub fn load(&mut self) {
self.offset = self.prev_offset;
}
pub fn save(&mut self) {
self.prev_offset = self.offset;
}
}
impl Extras<char> for LexerExtras {
fn new() -> Self {
LexerExtras::default()
}
fn clone(&self) -> Self {
LexerExtras {
prev_offset: self.prev_offset,
offset: self.offset,
}
}
fn reset(&mut self) {
let def = Offset::default();
self.prev_offset = def;
self.offset = def;
}
fn change(&mut self, input: &char, pos: usize) {
if pos < self.offset.pos {
self.load(); } else {
self.save();
self.offset.pos = pos;
match *input {
'\n' => {
self.offset.line += 1;
self.offset.column = 0;
}
_ => self.offset.column += 1,
}
}
}
}
fn main() {
example1();
println!();
}
#[inline]
fn example1() {
let mut cursor = StrCursor::new_with_extras::<LexerExtras>(
r#"
print 2 + 1;
print "one";
print true;
if () {}
/* U(Uj#$*()@#)(@!#&%#%NM) */
if (1 != 2) {
print "yes";
}"#,
);
let mut tokens = Tokens::new();
while let Some(c) = cursor.next() {
match c {
'+' => {
cursor.save();
tokens.push(Token::new(
TokenKind::Plus,
SourceChunk::from(&cursor),
Object::None,
))
}
'"' => {
cursor.save();
cursor.next_to_until(|c| c == '"');
let s = cursor.as_str_loaded();
let literal = (&s[1..s.len().saturating_sub(1)]).to_string();
tokens.push(Token::new(
TokenKind::String,
SourceChunk::from(&cursor),
Object::String(literal),
));
}
'0'..='9' => {
cursor.save();
cursor.next_to_while(|c| c.is_digit(10));
cursor.prev();
let literal = i32::from_str(cursor.as_str_loaded()).unwrap();
tokens.push(Token::new(
TokenKind::Number,
SourceChunk::from(&cursor),
Object::Number(literal),
));
}
'a'..='z' | 'A'..='Z' | '_' => {
cursor.save();
cursor.next_to_while(|c| matches!(c, 'a'..='z' | 'A'..='Z' | '0'..='9' | '_'));
cursor.prev();
let literal = cursor.as_str_loaded().to_string();
tokens.push(Token::new(
TokenKind::Identifier,
SourceChunk::from(&cursor),
Object::Identifier(literal),
));
}
_ => {}
}
}
let s = format!("{:#?}", tokens);
assert_eq!(
s,
r#"[
Token {
kind: Identifier,
lexeme: "print",
literal: Identifier(
"print",
),
},
Token {
kind: Number,
lexeme: "2",
literal: Number(
2,
),
},
Token {
kind: Plus,
lexeme: "+",
literal: None,
},
Token {
kind: Number,
lexeme: "1",
literal: Number(
1,
),
},
Token {
kind: Identifier,
lexeme: "print",
literal: Identifier(
"print",
),
},
Token {
kind: String,
lexeme: "\"one\"",
literal: String(
"one",
),
},
Token {
kind: Identifier,
lexeme: "print",
literal: Identifier(
"print",
),
},
Token {
kind: Identifier,
lexeme: "true",
literal: Identifier(
"true",
),
},
Token {
kind: Identifier,
lexeme: "if",
literal: Identifier(
"if",
),
},
Token {
kind: Identifier,
lexeme: "U",
literal: Identifier(
"U",
),
},
Token {
kind: Identifier,
lexeme: "Uj",
literal: Identifier(
"Uj",
),
},
Token {
kind: Identifier,
lexeme: "NM",
literal: Identifier(
"NM",
),
},
Token {
kind: Identifier,
lexeme: "if",
literal: Identifier(
"if",
),
},
Token {
kind: Number,
lexeme: "1",
literal: Number(
1,
),
},
Token {
kind: Number,
lexeme: "2",
literal: Number(
2,
),
},
Token {
kind: Identifier,
lexeme: "print",
literal: Identifier(
"print",
),
},
Token {
kind: String,
lexeme: "\"yes\"",
literal: String(
"yes",
),
},
]"#
)
}
#[macro_export]
macro_rules! derive_debug_partials {
(
$(
$i:item
)*
) => {
$(
#[derive(Debug, PartialEq, Eq)]
$i
)*
};
}