mlem_asm/
lex.rs

1#![allow(dead_code)]
2
3#[derive(Debug, PartialEq)]
4enum LexState {
5    Seperator,
6    Token,
7    Done,
8}
9
10// Lex a line of characters into tokens
11pub fn lex_line(line: &str) -> Vec<&str> {
12    let mut v = Vec::new();
13    // keep track of the start of the current token
14    let mut start_index = 0;
15    // keep track of the end of the current token
16    let mut stop_index = 0;
17    // the state of the lexer
18    let mut state = LexState::Seperator;
19
20    for character in line.chars() {
21        match character {
22            ';' | '\n' | '\r' => {
23                // A comment is starting or the line is ending. End the current token and stop lexing the line.
24                if state == LexState::Token { v.push(&line[start_index..stop_index]); }
25                state == LexState::Done;
26                break;
27            }
28            ' ' | '\t' => { 
29                // The current token is ending, or we'er in a long seperator.
30                // End the token and set the state, and proceed.
31                if state == LexState::Token { v.push(&line[start_index..stop_index]); }
32                state = LexState::Seperator;
33            }
34            _ => {
35                // Something else; a token.
36                // Reset the window and set the state.
37                if state == LexState::Seperator { start_index = stop_index; }
38                state = LexState::Token;
39            }
40        }
41        // Advance the window's end index
42        stop_index += 1;
43    }
44    // Input is over; potentially end a token
45    // This is kind of a hack, to prevent double ending tokens.
46    if state == LexState::Token {
47        let new_token = &line[start_index..stop_index];
48        // If there's nothing, there's no possibility for a duplicate.
49        if v.len() == 0 { v.push(new_token); }
50        // Check for duplicates
51        else {
52            let last_token = v.pop().unwrap();
53            // If they're not the same, put it back
54            if new_token != last_token { v.push(last_token); }
55            // No matter what, put the new one in. That way, if they're the same,
56            // there will only be one.
57            v.push(new_token); 
58        }
59    }
60    return v;
61}
62
63pub fn lex(source: &str) -> Vec<Vec<&str>> {
64    let mut v = Vec::new();
65    for line in source.lines() {
66        v.push(lex_line(line));
67    };
68    return v;
69}
70
71#[cfg(test)]
72mod test_lex {
73    use super::*;
74    #[test]
75    fn test_lex_line_with_comment() {
76        let result = lex_line("ident1 ident2:ident2more\tident3; comment");
77        assert_eq!(&result[..], ["ident1", "ident2:ident2more", "ident3"]);
78    }
79
80    #[test]
81    fn test_lex_line_lexes_only_one_line() {
82        let result = lex_line("ident1 ident2 \n ident3");
83        assert_eq!(&result[..], ["ident1", "ident2"]);
84    }
85
86    #[test]
87    fn test_lex_multiple_lines() {
88        let result = lex("l1i1 l1i2 ; line 1 comment\nl2i1 l2i2 l2i3 ; line 2 comment");
89        assert_eq!(&result[..], &[vec!["l1i1", "l1i2"], vec!["l2i1", "l2i2", "l2i3"]]);
90    }
91}