1use luthor::token::{Category, Token};
2use luthor::{StateFunction, Tokenizer};
3
4fn initial_state(lexer: &mut Tokenizer) -> Option<StateFunction> {
5    if lexer.has_prefix("::") {
6        lexer.tokenize(Category::Text);
7        lexer.tokenize_next(2, Category::Text);
8    }
9
10    match lexer.current_char() {
11        Some(c) => {
12            match c {
13                ' ' | '\n' | '\t' => {
14                    lexer.tokenize(Category::Text);
15                    lexer.advance();
16                    return Some(StateFunction(whitespace));
17                }
18                '`' | '=' | '_' | '-' | '.' | '(' | ')' | '{' | '}' | ';' | '|' | ',' | ':'
19                | '<' | '>' | '\'' | '"' | '?' | '@' | '#' | '/' | '\\' | '[' | ']' => {
20                    lexer.tokenize(Category::Text);
21                    lexer.tokenize_next(1, Category::Text);
22                    return Some(StateFunction(whitespace));
23                }
24                _ => {
25                    if c.is_uppercase() {
26                        lexer.tokenize(Category::Text);
27                        lexer.advance();
28                        return Some(StateFunction(uppercase));
29                    }
30
31                    lexer.advance()
32                }
33            }
34
35            Some(StateFunction(initial_state))
36        }
37
38        None => {
39            lexer.tokenize(Category::Text);
40            None
41        }
42    }
43}
44
45fn whitespace(lexer: &mut Tokenizer) -> Option<StateFunction> {
46    match lexer.current_char() {
47        Some(c) => match c {
48            ' ' | '\n' | '\t' => {
49                lexer.advance();
50                Some(StateFunction(whitespace))
51            }
52            _ => {
53                lexer.tokenize(Category::Whitespace);
54                Some(StateFunction(initial_state))
55            }
56        },
57
58        None => {
59            lexer.tokenize(Category::Whitespace);
60            None
61        }
62    }
63}
64
65fn uppercase(lexer: &mut Tokenizer) -> Option<StateFunction> {
66    match lexer.current_char() {
67        Some(c) => {
68            if c.is_alphabetic() {
69                lexer.advance();
70
71                if c.is_uppercase() {
72                    Some(StateFunction(uppercase))
73                } else {
74                    Some(StateFunction(initial_state))
75                }
76            } else {
77                lexer.tokenize(Category::Text);
78                Some(StateFunction(initial_state))
79            }
80        }
81        None => {
82            lexer.tokenize(Category::Text);
83            None
84        }
85    }
86}
87
88pub fn lex(data: &str) -> Vec<Token> {
89    let mut lexer = Tokenizer::new(data);
90    let mut state_function = StateFunction(initial_state);
91    loop {
92        let StateFunction(actual_function) = state_function;
93        match actual_function(&mut lexer) {
94            Some(f) => state_function = f,
95            None => return lexer.tokens(),
96        }
97    }
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103    use luthor::token::{Category, Token};
104
105    #[test]
106    fn it_works() {
107        let data =
108            "local_variable = camelCase.method(param)\n CamelCaseClass something-else CONSTANT val";
109        let tokens = lex(data);
110        let expected_tokens = vec![
111            Token {
112                lexeme: "local".to_string(),
113                category: Category::Text,
114            },
115            Token {
116                lexeme: "_".to_string(),
117                category: Category::Text,
118            },
119            Token {
120                lexeme: "variable".to_string(),
121                category: Category::Text,
122            },
123            Token {
124                lexeme: " ".to_string(),
125                category: Category::Whitespace,
126            },
127            Token {
128                lexeme: "=".to_string(),
129                category: Category::Text,
130            },
131            Token {
132                lexeme: " ".to_string(),
133                category: Category::Whitespace,
134            },
135            Token {
136                lexeme: "camel".to_string(),
137                category: Category::Text,
138            },
139            Token {
140                lexeme: "Case".to_string(),
141                category: Category::Text,
142            },
143            Token {
144                lexeme: ".".to_string(),
145                category: Category::Text,
146            },
147            Token {
148                lexeme: "method".to_string(),
149                category: Category::Text,
150            },
151            Token {
152                lexeme: "(".to_string(),
153                category: Category::Text,
154            },
155            Token {
156                lexeme: "param".to_string(),
157                category: Category::Text,
158            },
159            Token {
160                lexeme: ")".to_string(),
161                category: Category::Text,
162            },
163            Token {
164                lexeme: "\n ".to_string(),
165                category: Category::Whitespace,
166            },
167            Token {
168                lexeme: "Camel".to_string(),
169                category: Category::Text,
170            },
171            Token {
172                lexeme: "Case".to_string(),
173                category: Category::Text,
174            },
175            Token {
176                lexeme: "Class".to_string(),
177                category: Category::Text,
178            },
179            Token {
180                lexeme: " ".to_string(),
181                category: Category::Whitespace,
182            },
183            Token {
184                lexeme: "something".to_string(),
185                category: Category::Text,
186            },
187            Token {
188                lexeme: "-".to_string(),
189                category: Category::Text,
190            },
191            Token {
192                lexeme: "else".to_string(),
193                category: Category::Text,
194            },
195            Token {
196                lexeme: " ".to_string(),
197                category: Category::Whitespace,
198            },
199            Token {
200                lexeme: "CONSTANT".to_string(),
201                category: Category::Text,
202            },
203            Token {
204                lexeme: " ".to_string(),
205                category: Category::Whitespace,
206            },
207            Token {
208                lexeme: "val".to_string(),
209                category: Category::Text,
210            },
211        ];
212
213        for (index, token) in tokens.iter().enumerate() {
214            assert_eq!(*token, expected_tokens[index]);
215        }
216    }
217}