1#[cfg(test)]
4mod tests;
5mod utils;
6
7use utils::*;
8
9#[derive(Debug, PartialEq)]
13pub enum Token<'a> {
14 Paren(char),
16
17 Symbol(char),
19
20 Number(i64),
23
24 Regex(String, String),
31
32 Comment(&'a str),
37
38 Identifier(String),
43
44 String(String, bool),
51
52 Newline,
54
55 Comma,
57}
58
59impl Token<'_> {
60 fn significant(&self) -> bool {
61 match self {
62 Token::Number(_) => true,
63 Token::Paren(_) => true,
64 Token::Regex(_, _) => true,
65 Token::Comment(_) => false,
66 Token::Identifier(_) => true,
67 Token::String(_, _) => true,
68 Token::Symbol(_) => true,
69 Token::Newline => false,
70 Token::Comma => true,
71 }
72 }
73}
74
75pub fn lex(buf: &str) -> Result<Vec<Token>, String> {
81 let tokens = full_lex(buf)?;
82
83 Ok(tokens
84 .into_iter()
85 .filter(|t| t.significant())
86 .collect::<Vec<Token>>())
87}
88
89pub fn full_lex(buf: &str) -> Result<Vec<Token>, String> {
91 let mut tokens = Vec::new();
92 let mut it = buf.chars().enumerate().peekable();
93
94 let lower = 'a'..='z';
95 let upper = 'A'..='Z';
96 let under_score = &['_'];
97 let newline_chars = &['\n', '\r', ';'];
98 let number_chars = '0'..='9';
99 let regexflag_chars = &['i', 'U'];
100 let x = [&lower, &upper, &number_chars];
101 let ident_chars = (Multi(&x), under_score);
102
103 while let Some((start, ch)) = it.peek() {
104 let start = *start;
105 match ch {
106 '0'..='9' => {
107 let end = chomp(&number_chars, &mut it);
108 tokens.push(Token::Number(get_number(&buf[start..end])));
109 }
110
111 '{' | '[' | '(' | '}' | ']' | ')' => {
112 tokens.push(Token::Paren(*ch));
113 it.next();
114 }
115
116 ' ' | '\t' => {
117 it.next();
118 }
119
120 '\n' | '\r' | ';' => {
121 chomp(&newline_chars, &mut it);
122 tokens.push(Token::Newline);
123 }
124
125 '#' => {
126 it.next();
127 let end = chomp_until(&newline_chars, &mut it);
128 tokens.push(Token::Comment(&buf[start + 1..end]));
129 }
130
131 ',' => {
132 it.next();
133 tokens.push(Token::Comma);
134 }
135
136 '/' => {
137 it.next();
138 let chars = chomp_until_escaped(
139 &mut it,
140 '/',
141 &[
142 '{', '}', '[', ']', '.', '^', '$', '*', '+', '?', '|', '(', ')', 'd', 'D',
143 's', 'S', 'w', 'W', 'p', 'P', 'b', 'B', 'A', 'z', 'a', 'f', 't', 'n', 'r',
144 'v', 'x', 'u', 'U', '\\',
145 ],
146 )?;
147 let pattern = chars;
148 if let Some((_, '/')) = it.next() {
149 } else {
150 return Err("expected character: '/'".to_string());
151 }
152
153 let flags = chomp_str(®exflag_chars, &mut it);
154
155 tokens.push(Token::Regex(pattern, flags));
156 }
157
158 '"' => {
159 it.next();
160 let content = chomp_until_escaped(&mut it, '"', &['$'])?;
161 it.next();
162
163 tokens.push(Token::String(content, true));
164 }
165
166 '\'' => {
167 it.next();
168 let content = chomp_until_escaped(&mut it, '\'', &[])?;
169 it.next();
170
171 tokens.push(Token::String(content, false));
172 }
173
174 '_' | 'a'..='z' | 'A'..='Z' => {
175 let content = chomp_str(&ident_chars, &mut it);
176
177 tokens.push(Token::Identifier(content));
178 }
179
180 '^' | '$' | '!' | '&' | '|' => {
181 tokens.push(Token::Symbol(*ch));
182 it.next();
183 }
184
185 a => {
186 return Err(format!("unknown character: '{}'", a));
187 }
188 }
189 }
190
191 Ok(tokens)
192}