1use std::borrow::Cow;
2use std::fmt::Formatter;
3
4const TOKEN_NAMES: [&str; 7] = ["n/a", "<EOF>", "LBRACK", "RBRACK", "ALIAS", "PATH", "GLOB"];
5
6pub const TOKEN_EOF: i32 = 1;
7pub const TOKEN_LBRACK: i32 = 2;
8pub const TOKEN_RBRACK: i32 = 3;
9pub const TOKEN_ALIAS: i32 = 4;
10pub const TOKEN_PATH: i32 = 5;
11pub const TOKEN_GLOB: i32 = 6;
12
13const EOF: char = !0 as char;
14
15const UNDERSCORE: char = '_';
16const HYPHEN: char = '-';
17const ASTERISK: char = '*';
18
19#[derive(Debug, Eq, PartialEq)]
21pub struct Token<'a> {
22 pub kind: i32,
24 pub text: Cow<'a, String>,
26}
27
28impl<'a> Token<'a> {
29 pub fn new(kind: i32, text: Cow<'a, String>) -> Self {
30 Self { kind, text }
31 }
32}
33
34impl<'a> std::fmt::Display for Token<'a> {
35 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
36 write!(f, "<'{}', {}>", self.text, TOKEN_NAMES[self.kind as usize])
37 }
38}
39
40#[derive(Debug)]
42pub struct Cursor {
43 input: String,
45 pointer: usize,
47 current_char: char,
49}
50
51impl Cursor {
52 fn new(input: &str, pointer: usize, c: char) -> Self {
54 Self {
55 input: input.to_string(),
56 pointer,
57 current_char: c,
58 }
59 }
60
61 fn consume(&mut self) {
63 self.pointer += 1;
64 if self.pointer >= self.input.len() {
65 self.current_char = EOF;
66 } else if let Some(c) = self.input.chars().nth(self.pointer) {
67 self.current_char = c
68 }
69 }
70}
71
72#[derive(Debug)]
74pub struct Lexer<'a> {
75 pub cursor: Cursor,
76 token_names: Vec<&'a str>,
77}
78
79impl<'a> Lexer<'a> {
80 pub fn new(input: &str, pointer: usize, c: char) -> Self {
81 Self {
82 cursor: Cursor::new(input, pointer, c),
83 token_names: Vec::from(TOKEN_NAMES),
84 }
85 }
86
87 pub fn token_names(&self, i: usize) -> String {
88 self.token_names[i].to_string()
89 }
90
91 fn is_not_end_line(&self) -> bool {
92 !matches!(self.cursor.current_char, '\u{ff}' | '\0' | '\n')
93 }
94
95 fn is_alias_name(&self) -> bool {
96 self.cursor.current_char.is_ascii_alphanumeric()
97 || self.cursor.current_char == UNDERSCORE
98 || self.cursor.current_char == HYPHEN
99 }
100
101 fn is_glob_alias(&self) -> bool {
102 self.cursor.current_char == ASTERISK
103 }
104
105 pub fn next_token(&mut self) -> Result<Token<'a>, String> {
106 while self.cursor.current_char != EOF {
107 match self.cursor.current_char {
108 ' ' | '\t' | '\n' | '\r' => {
109 self.whitespace();
110 continue;
111 }
112 '[' => {
113 self.cursor.consume();
114 return Ok(Token::new(TOKEN_LBRACK, Cow::Owned("[".into())));
115 }
116 ']' => {
117 self.cursor.consume();
118 return Ok(Token::new(TOKEN_RBRACK, Cow::Owned("]".into())));
119 }
120 _ => {
121 if self.is_alias_name() {
122 return Ok(self.alias());
123 } else if self.is_glob_alias() {
124 return Ok(self.glob());
125 } else if self.is_not_end_line() {
126 return Ok(self.path());
127 }
128 return Err(format!("invalid character {}", self.cursor.current_char));
129 }
130 }
131 }
132
133 Ok(Token::new(TOKEN_EOF, Cow::Owned("<EOF>".into())))
134 }
135
136 fn whitespace(&mut self) {
137 while self.cursor.current_char.is_whitespace() {
138 self.cursor.consume()
139 }
140 }
141
142 fn alias(&mut self) -> crate::lexer::Token<'a> {
143 let mut a: String = String::new();
144 while self.is_alias_name() {
145 a.push(self.cursor.current_char);
146 self.cursor.consume();
147 }
148 Token::new(TOKEN_ALIAS, Cow::Owned(a))
149 }
150
151 fn glob(&mut self) -> crate::lexer::Token<'a> {
152 let mut a: String = String::new();
153 a.push(self.cursor.current_char);
154 self.cursor.consume();
155 Token::new(TOKEN_GLOB, Cow::Owned(a))
156 }
157
158 fn path(&mut self) -> crate::lexer::Token<'a> {
159 let mut p = String::new();
160 while self.is_not_end_line() {
161 p.push(self.cursor.current_char);
162 self.cursor.consume();
163 }
164 Token::new(TOKEN_PATH, Cow::Owned(p))
165 }
166}
167
168#[cfg(test)]
169mod tests {
170 use super::*;
171
172 #[test]
173 fn test_token_display() {
174 let tok = Token::new(TOKEN_EOF, Cow::Owned("<EOF>".into()));
175 assert_eq!("<'<EOF>', <EOF>>", tok.to_string())
176 }
177
178 #[test]
179 fn test_create_cursor() {
180 let cur = Cursor::new("", 0, !0 as char);
181 assert_eq!("".to_string(), cur.input);
182 assert_eq!(0, cur.pointer);
183 assert_eq!(!0 as char, cur.current_char);
184 }
185
186 #[test]
187 fn test_cursor_consumes_characters() {
188 let mut cur = Cursor::new("test", 0, 'e');
189 cur.consume();
190 assert_eq!("test".to_string(), cur.input);
191 assert_eq!(1, cur.pointer);
192 assert_eq!('e', cur.current_char);
193 }
194
195 #[test]
196 fn test_cursor_consumes_end_of_file() {
197 let mut cur = Cursor::new("test", 4, 't');
198 cur.consume();
199 assert_eq!("test".to_string(), cur.input);
200 assert_eq!(5, cur.pointer);
201 assert_eq!(!0 as char, cur.current_char);
202 }
203
204 #[test]
205 fn test_lexer_gets_token_name() {
206 let lexer = Lexer::new("test", 0, 't');
207 let token_name = lexer.token_names(2);
208 assert_eq!(TOKEN_NAMES[2], token_name);
209 }
210
211 #[test]
212 fn test_lexer_detects_line_feed_character() {
213 let lexer = Lexer::new("\0", 0, '\0');
214 assert!(
215 !lexer.is_not_end_line(),
216 "current character was not a LINE FEED"
217 );
218 }
219
220 #[test]
221 fn test_lexer_does_not_detect_non_line_feed_character() {
222 let lexer = Lexer::new("test", 0, 't');
223 assert!(lexer.is_not_end_line(), "current character was LINE FEED");
224 }
225
226 #[test]
227 fn test_lexer_consumes_whitespace() {
228 let mut lexer = Lexer::new(" test", 0, ' ');
229 lexer.whitespace();
230 assert_eq!('t', lexer.cursor.current_char);
231 }
232
233 #[test]
234 fn test_lexer_can_check_is_alis_name() {
235 let lexer = Lexer::new("test0123", 0, 't');
236 assert!(lexer.is_alias_name());
237 }
238
239 #[test]
240 fn test_lexer_can_check_is_alis_name_fails() {
241 let lexer = Lexer::new("*", 0, '*');
242 assert!(!lexer.is_alias_name());
243 }
244
245 #[test]
246 fn test_lexer_creates_alias_token() {
247 let mut lexer = Lexer::new("alias", 0, 'a');
248 let token = lexer.alias();
249 assert_eq!(TOKEN_ALIAS, token.kind);
250 assert_eq!("alias", token.text.as_str());
251 }
252
253 #[test]
254 fn test_lexer_creates_path_token() {
255 let mut lexer = Lexer::new("/some/absolute/path", 0, '/');
256 let token = lexer.path();
257 assert_eq!(TOKEN_PATH, token.kind);
258 assert_eq!("/some/absolute/path", token.text.as_str());
259 }
260
261 #[test]
262 fn test_lexer_next_token() {
263 let input = r#"[test]/some/absolute/path
264 /another/absolute/path
265 "#;
266 let mut lexer = Lexer::new(input, 0, '[');
267 let mut tokens: Vec<Token> = Vec::new();
268 while let Ok(t) = lexer.next_token() {
269 if t.kind == TOKEN_EOF {
270 break;
271 }
272 tokens.push(t);
273 }
274 assert_eq!(Token::new(TOKEN_LBRACK, Cow::Owned("[".into())), tokens[0]);
275 assert_eq!(
276 Token::new(TOKEN_ALIAS, Cow::Owned("test".into())),
277 tokens[1]
278 );
279 assert_eq!(Token::new(TOKEN_RBRACK, Cow::Owned("]".into())), tokens[2]);
280 assert_eq!(
281 Token::new(TOKEN_PATH, Cow::Owned("/some/absolute/path".into())),
282 tokens[3]
283 );
284 assert_eq!(
285 Token::new(TOKEN_PATH, Cow::Owned("/another/absolute/path".into())),
286 tokens[4]
287 );
288 }
289
290 #[test]
291 fn test_lexer_parses_path_without_initial_slash() {
292 let input = "some/absolute/path";
293 let mut lexer = Lexer::new(input, 0, 's');
294 let mut tokens: Vec<Token> = Vec::new();
295 while let Ok(t) = lexer.next_token() {
296 if t.kind == TOKEN_EOF {
297 break;
298 }
299 tokens.push(t);
300 }
301 assert!(!tokens.is_empty());
302 assert_eq!(2, tokens.len())
303 }
304
305 #[test]
306 fn test_lexer_parses_glob() {
307 let input = "[*]/some/absolute/path";
308 let mut lexer = Lexer::new(input, 0, '[');
309 let mut tokens: Vec<Token> = Vec::new();
310 while let Ok(t) = lexer.next_token() {
311 if t.kind == TOKEN_EOF {
312 break;
313 }
314 tokens.push(t);
315 }
316 assert_eq!(Token::new(TOKEN_LBRACK, Cow::Owned("[".into())), tokens[0]);
317 assert_eq!(Token::new(TOKEN_GLOB, Cow::Owned("*".into())), tokens[1]);
318 assert_eq!(Token::new(TOKEN_RBRACK, Cow::Owned("]".into())), tokens[2]);
319 assert_eq!(
320 Token::new(TOKEN_PATH, Cow::Owned("/some/absolute/path".into())),
321 tokens[3]
322 );
323 }
324}