1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
use crate::error::{bail, Result};
#[derive(Debug, Eq, PartialEq)]
pub(crate) enum TokenKind {
Node(String),
Token(String),
Eq,
Star,
Pipe,
QMark,
Colon,
LParen,
RParen,
}
#[derive(Debug)]
pub(crate) struct Token {
pub(crate) kind: TokenKind,
pub(crate) loc: Location,
}
#[derive(Copy, Clone, Default, Debug)]
pub(crate) struct Location {
pub(crate) line: usize,
pub(crate) column: usize,
}
impl Location {
fn advance(&mut self, text: &str) {
match text.rfind('\n') {
Some(idx) => {
self.line += text.chars().filter(|&it| it == '\n').count();
self.column = text[idx + 1..].chars().count();
}
None => self.column += text.chars().count(),
}
}
}
pub(crate) fn tokenize(mut input: &str) -> Result<Vec<Token>> {
let mut res = Vec::new();
let mut loc = Location::default();
while !input.is_empty() {
let old_input = input;
skip_ws(&mut input);
skip_comment(&mut input);
if old_input.len() == input.len() {
match advance(&mut input) {
Ok(kind) => {
res.push(Token { kind, loc });
}
Err(err) => return Err(err.with_location(loc)),
}
}
let consumed = old_input.len() - input.len();
loc.advance(&old_input[..consumed]);
}
Ok(res)
}
fn skip_ws(input: &mut &str) {
*input = input.trim_start_matches(is_whitespace)
}
fn skip_comment(input: &mut &str) {
if input.starts_with("//") {
let idx = input.find('\n').map_or(input.len(), |it| it + 1);
*input = &input[idx..]
}
}
fn advance(input: &mut &str) -> Result<TokenKind> {
let mut chars = input.chars();
let c = chars.next().unwrap();
let res = match c {
'=' => TokenKind::Eq,
'*' => TokenKind::Star,
'?' => TokenKind::QMark,
'(' => TokenKind::LParen,
')' => TokenKind::RParen,
'|' => TokenKind::Pipe,
':' => TokenKind::Colon,
'\'' => {
let mut buf = String::new();
loop {
match chars.next() {
None => bail!("unclosed token literal"),
Some('\\') => match chars.next() {
Some(c) if is_escapable(c) => buf.push(c),
_ => bail!("invalid escape in token literal"),
},
Some('\'') => break,
Some(c) => buf.push(c),
}
}
TokenKind::Token(buf)
}
c if is_ident_char(c) => {
let mut buf = String::new();
buf.push(c);
loop {
match chars.clone().next() {
Some(c) if is_ident_char(c) => {
chars.next();
buf.push(c);
}
_ => break,
}
}
TokenKind::Node(buf)
}
'\r' => bail!("unexpected `\\r`, only Unix-style line endings allowed"),
c => bail!("unexpected character: `{}`", c),
};
*input = chars.as_str();
Ok(res)
}
fn is_escapable(c: char) -> bool {
matches!(c, '\\' | '\'')
}
fn is_whitespace(c: char) -> bool {
matches!(c, ' ' | '\t' | '\n')
}
fn is_ident_char(c: char) -> bool {
matches!(c, 'a'..='z' | 'A'..='Z' | '_')
}