1use crate::error::{bail, Result};
3
4#[derive(Debug, Eq, PartialEq)]
5pub(crate) enum TokenKind {
6 Node(String),
7 Token(String),
8 Eq,
9 Star,
10 Pipe,
11 QMark,
12 Colon,
13 LParen,
14 RParen,
15}
16
17#[derive(Debug)]
18pub(crate) struct Token {
19 pub(crate) kind: TokenKind,
20 pub(crate) loc: Location,
21}
22
23#[derive(Copy, Clone, Default, Debug)]
24pub(crate) struct Location {
25 pub(crate) line: usize,
26 pub(crate) column: usize,
27}
28
29impl Location {
30 fn advance(&mut self, text: &str) {
31 match text.rfind('\n') {
32 Some(idx) => {
33 self.line += text.chars().filter(|&it| it == '\n').count();
34 self.column = text[idx + 1..].chars().count();
35 }
36 None => self.column += text.chars().count(),
37 }
38 }
39}
40
41pub(crate) fn tokenize(mut input: &str) -> Result<Vec<Token>> {
42 let mut res = Vec::new();
43 let mut loc = Location::default();
44 while !input.is_empty() {
45 let old_input = input;
46 skip_ws(&mut input);
47 skip_comment(&mut input);
48 if old_input.len() == input.len() {
49 match advance(&mut input) {
50 Ok(kind) => {
51 res.push(Token { kind, loc });
52 }
53 Err(err) => return Err(err.with_location(loc)),
54 }
55 }
56 let consumed = old_input.len() - input.len();
57 loc.advance(&old_input[..consumed]);
58 }
59
60 Ok(res)
61}
62
63fn skip_ws(input: &mut &str) {
64 *input = input.trim_start_matches(is_whitespace)
65}
66fn skip_comment(input: &mut &str) {
67 if input.starts_with("//") {
68 let idx = input.find('\n').map_or(input.len(), |it| it + 1);
69 *input = &input[idx..]
70 }
71}
72
73fn advance(input: &mut &str) -> Result<TokenKind> {
74 let mut chars = input.chars();
75 let c = chars.next().unwrap();
76 let res = match c {
77 '=' => TokenKind::Eq,
78 '*' => TokenKind::Star,
79 '?' => TokenKind::QMark,
80 '(' => TokenKind::LParen,
81 ')' => TokenKind::RParen,
82 '|' => TokenKind::Pipe,
83 ':' => TokenKind::Colon,
84 '\'' => {
85 let mut buf = String::new();
86 loop {
87 match chars.next() {
88 None => bail!("unclosed token literal"),
89 Some('\\') => match chars.next() {
90 Some(c) if is_escapable(c) => buf.push(c),
91 _ => bail!("invalid escape in token literal"),
92 },
93 Some('\'') => break,
94 Some(c) => buf.push(c),
95 }
96 }
97 TokenKind::Token(buf)
98 }
99 c if is_ident_char(c) => {
100 let mut buf = String::new();
101 buf.push(c);
102 loop {
103 match chars.clone().next() {
104 Some(c) if is_ident_char(c) => {
105 chars.next();
106 buf.push(c);
107 }
108 _ => break,
109 }
110 }
111 TokenKind::Node(buf)
112 }
113 '\r' => bail!("unexpected `\\r`, only Unix-style line endings allowed"),
114 c => bail!("unexpected character: `{}`", c),
115 };
116
117 *input = chars.as_str();
118 Ok(res)
119}
120
121fn is_escapable(c: char) -> bool {
122 matches!(c, '\\' | '\'')
123}
124fn is_whitespace(c: char) -> bool {
125 matches!(c, ' ' | '\t' | '\n')
126}
127fn is_ident_char(c: char) -> bool {
128 matches!(c, 'a'..='z' | 'A'..='Z' | '_')
129}