1use super::error::{Error, Result, ErrorType, CodePos};
2
3#[derive(Debug, PartialEq, Eq, Clone, Copy)]
4enum LexerMode {
5 None,
6 String,
7 Raw
8}
9
10#[derive(Debug, PartialEq, Eq, Clone)]
11pub enum TokenType {
12 StringLiteral(String),
13 RawLiteral(String),
14 OpenBrace,
15 CloseBrace,
16 OpenParen,
17 CloseParen,
18 Semicolon
19}
20
21#[derive(Debug, PartialEq, Eq, Clone)]
22pub struct Token {
23 pub token_type: TokenType,
24 pub line: u32,
25 pub col: u16
26}
27
28impl Token {
29 pub fn new(line: u32, col: u16, ty: TokenType) -> Token {
30 Token {
31 line: line,
32 col: col,
33 token_type: ty
34 }
35 }
36}
37
38struct LexerState {
39 line: u32,
40 col: u16,
41 input: Box<Iterator<Item=char>>,
42 mode: LexerMode,
43 escaped: bool,
44 tmp: String,
45 tokens: Vec<Token>,
46 force_next: Option<char>
47}
48
49impl CodePos for LexerState {
50 fn location(&self) -> (u32, u16) {
51 (self.line, self.col)
52 }
53}
54
55fn end_token(state: &mut LexerState) {
56 if state.mode != LexerMode::None {
57 let t = match state.mode {
58 LexerMode::None => unreachable!("Invalid mode when generating token"),
59 LexerMode::String => Token::new(state.line, state.col, TokenType::StringLiteral(state.tmp.clone())),
60 LexerMode::Raw => Token::new(state.line, state.col, TokenType::RawLiteral(state.tmp.clone()))
61 };
62 state.mode = LexerMode::None;
63 state.tokens.push(t);
64 }
65}
66
67fn start_token(state: &mut LexerState, mode: LexerMode) {
68 if state.mode != LexerMode::None {
69 end_token(state);
70 }
71 state.tmp = String::new();
72 state.mode = mode;
73}
74
75fn token(state: &mut LexerState, t: TokenType) {
76 end_token(state);
77 state.tokens.push(Token::new(state.line, state.col, t));
78}
79
80pub fn run(input: Box<Iterator<Item=char>>) -> Result<Vec<Token>> {
81 let mut state = LexerState { line: 1, col: 0, input: input, mode: LexerMode::None, escaped: false, tmp: String::new(), tokens: vec![], force_next: None};
82 loop {
83 let c = { next(&mut state) };
84 let mode = state.mode.clone();
85 let esc = state.escaped;
86 match (c, mode, esc) {
87 (Some('"'), LexerMode::String, false) => {
88 end_token(&mut state);
89 },
90 (Some('"'), LexerMode::None, false) => {
91 start_token(&mut state, LexerMode::String);
92 },
93 (Some('\\'), LexerMode::String, false) => {
94 state.escaped = true;
95 },
96 (Some('\\'), LexerMode::String, true ) => {
97 state.tmp.push('\\');
98 state.escaped = false;
99 }
100 (Some('n'), LexerMode::String, true ) => {
101 state.tmp.push('\n');
102 state.escaped = false;
103 },
104 (Some(x), LexerMode::String, false) => {
105 state.tmp.push(x);
106 },
107 (None, LexerMode::String, _ ) => {
108 return fail(&state, ErrorType::UnexpectedEOF)
109 }
110 (Some(' '), LexerMode::None, false) => {},
111 (Some(' '), LexerMode::Raw, false) => {
112 end_token(&mut state);
113 }
114 (Some('('), LexerMode::Raw, false) => {
115 token(&mut state, TokenType::OpenParen);
116 },
117 (Some(')'), LexerMode::Raw, false) => {
118 token(&mut state, TokenType::CloseParen);
119 },
120 (Some('{'), LexerMode::Raw, false) => {
121 token(&mut state, TokenType::OpenBrace);
122 },
123 (Some('}'), LexerMode::Raw, false) => {
124 token(&mut state, TokenType::CloseBrace);
125 },
126 (Some(';'), LexerMode::Raw, false) => {
127 token(&mut state, TokenType::Semicolon);
128 }
129 (Some('('), LexerMode::None, false) => {
130 token(&mut state, TokenType::OpenParen);
131 },
132 (Some(')'), LexerMode::None, false) => {
133 token(&mut state, TokenType::CloseParen);
134 },
135 (Some('{'), LexerMode::None, false) => {
136 token(&mut state, TokenType::OpenBrace);
137 },
138 (Some('}'), LexerMode::None, false) => {
139 token(&mut state, TokenType::CloseBrace);
140 },
141 (Some(';'), LexerMode::None, false) => {
142 token(&mut state, TokenType::Semicolon);
143 }
144 (Some(x), LexerMode::None, false) => {
145 start_token(&mut state, LexerMode::Raw);
146 state.tmp.push(x);
147 },
148 (Some(x), LexerMode::Raw, false) => {
149 state.tmp.push(x);
150 },
151 (None, LexerMode::Raw, false) => {
152 end_token(&mut state);
153 break;
154 }
155 (None, LexerMode::None, false) => {
156 break;
157 }
158 (c, mode, esc ) => {
159 unreachable!("Invalid Parser State Reached: {:?}, {:?}, {:?}", c, mode, esc);
160 }
161 }
162 }
163 Ok(state.tokens)
164
165}
166
167fn fail<T>(state: &LexerState, error_type: ErrorType) -> Result<T> {
168 Err(Error::from_state(state, error_type, None))
169}
170
171#[derive(Debug, Clone, Copy)]
172enum PreProcState {
173 Default,
174 LineComment,
175 MultiComment(u8)
176}
177
178fn next_char(state: &mut LexerState) -> Option<char> {
179 match state.force_next {
180 Some(c) => {
181 state.force_next = None;
182 Some(c)
183 },
184 None => state.input.next()
185 }
186}
187
188fn lookahead(state: &mut LexerState) -> Option<char> {
189 match state.force_next {
190 Some(c) => Some(c),
191 None => {
192 let c = state.input.next();
193 state.force_next = c;
194 c
195 }
196 }
197}
198
199fn next(state: &mut LexerState) -> Option<char> {
200 let mut line = state.line;
201 let mut col = state.col;
202 let mut result: Option<char> = None;
203 let mut ps = PreProcState::Default;
204 loop {
205 let c = match next_char(state) {
206 Some(c) => c,
207 None => break
208 };
209 match (c, ps) {
210 ('\n', PreProcState::Default) => {
211 line += 1;
212 col = 0;
213 result = Some(' ');
214 break;
215 },
216 ('\r', PreProcState::Default) => {},
217 ('/', PreProcState::Default) => {
218 col += 1;
219 let n = lookahead(state);
220 match n {
221 Some('/') => ps = PreProcState::LineComment,
222 Some('*') => ps = PreProcState::MultiComment(1),
223 _ => {
224 result = Some(c);
225 break;
226 }
227 }
228 },
229 ('#', PreProcState::Default) => {
230 ps = PreProcState::LineComment;
231 col += 1;
232 },
233 (c, PreProcState::Default) if c.is_whitespace() => {
234 col += 1;
235 result = Some(' ');
236 break;
237 },
238 (_, PreProcState::Default) => {
239 result = Some(c);
240 col += 1;
241 break;
242 },
243
244 ('\n', PreProcState::LineComment) => {
245 line += 1;
246 col = 0;
247 result = Some(' ');
248 break;
249 },
250 (_, PreProcState::LineComment) => {
251 col += 1;
252 },
253
254 ('\n', PreProcState::MultiComment(_)) => {
255 line += 1;
256 col = 0;
257 },
258 ('*', PreProcState::MultiComment(level)) => {
259 match lookahead(state) {
260 Some('/') => {
261 if level <= 1 {
262 next(state).unwrap(); ps = PreProcState::Default
264 } else {
265 ps = PreProcState::MultiComment(level - 1)
266 }
267 },
268 _ => {}
269 }
270 },
271 ('/', PreProcState::MultiComment(level)) => {
272 match lookahead(state) {
273 Some('*') => {
274 ps = PreProcState::MultiComment(level + 1);
275 },
276 _ => {}
277 }
278 },
279 (_, PreProcState::MultiComment(_)) => {
280 col += 1;
281 }
282 }
283 }
284 state.line = line;
285 state.col = col;
286 result
287}
288
289#[cfg(test)]
290mod test {
291 use super::*;
292 use super::super::error::{ErrorType, Error, Result};
293
294 #[test]
295 fn successfully_parses_empty_string() {
296 assert_eq!(run(Box::new("".chars())), Ok(vec![]));
297 }
298
299 #[test]
300 fn successfully_parses_raw_token() {
301 assert_eq!(unwrap_tokens(run(Box::new("test".chars()))), Ok(vec![TokenType::RawLiteral(String::from("test"))]));
302 }
303
304 #[test]
305 fn successfully_parses_string_token() {
306 assert_eq!(unwrap_tokens(run(Box::new("\"test\"".chars()))), Ok(vec![TokenType::StringLiteral(String::from("test"))]));
307 }
308
309 #[test]
310 fn successfully_parse_basic_tokens() {
311 assert_eq!(
312 unwrap_tokens(run(Box::new("(){};".chars()))),
313 Ok(vec![
314 TokenType::OpenParen,
315 TokenType::CloseParen,
316 TokenType::OpenBrace,
317 TokenType::CloseBrace,
318 TokenType::Semicolon,
319 ]));
320 }
321
322 fn unwrap_tokens(tokens: Result<Vec<Token>>) -> Result<Vec<TokenType>> {
323 tokens.map(|toks| toks.iter().map(|t| t.token_type.clone()).collect())
324 }
325
326 #[test]
327 fn successfully_parse_a_typical_example() {
328 assert_eq!(
329 unwrap_tokens(run(Box::new("option param { inner_option \"value\"; };".chars()))),
330 Ok(vec![
331 TokenType::RawLiteral(String::from("option")),
332 TokenType::RawLiteral(String::from("param")),
333 TokenType::OpenBrace,
334 TokenType::RawLiteral(String::from("inner_option")),
335 TokenType::StringLiteral(String::from("value")),
336 TokenType::Semicolon,
337 TokenType::CloseBrace,
338 TokenType::Semicolon
339 ]));
340 }
341
342 #[test]
343 fn ignores_comments() {
344 assert_eq!(
345 unwrap_tokens(run(Box::new("/* shit */
346 // crap
347 # shit".chars()))),
348 Ok(vec![]));
349 }
350
351 #[test]
352 fn fails_on_unterminated_string() {
353 assert_eq!(
354 run(Box::new("\"yo dawg".chars())),
355 Err(Error::new(1, 8, ErrorType::UnexpectedEOF, None))
356 );
357 }
358}