1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
use regex::Regex;

pub struct Tokenizer<'a> {
    source: &'a str,
    duos: Vec<(&'a str, Regex)>,
    cursor: usize,
    line: u32,
    column: u32,
}

#[derive(Debug, Clone)]
pub struct Token<'a> {
    pub kind: Option<&'a str>,
    pub value: &'a str,
    pub line: u32,
    pub column: u32,
}

impl<'a> Tokenizer<'a> {
    pub fn new(source: &'a str, duos: &[(&'a str, &str)]) -> Tokenizer<'a> {
        let duos = duos
            .iter()
            .map(|&(k, v)| (k, Regex::new(v).expect("Invalid regex.")))
            .collect();
        Self {
            source,
            duos,
            cursor: 0,
            line: 1,
            column: 1,
        }
    }

    pub fn eat(&mut self) -> Option<Token<'a>> {
        if self.cursor >= self.source.len() {
            return None;
        }

        let mut kind: Option<&'a str> = None;
        let mut value = &self.source[self.cursor..self.cursor + 1];
        let mut newlines = 0;

        for (duo_kind, duo_regex) in &self.duos {
            if let Some(result) = duo_regex.find(&self.source[self.cursor..]) {
                kind = Some(duo_kind);
                value = result.as_str();
                newlines = value.chars().filter(|&c| c == '\n').count() as u32;
                break;
            }
        }

        self.cursor += value.len();
        self.line += newlines;
        self.column = if newlines > 0 {
            value.len() as u32
        } else {
            self.column + value.len() as u32
        };

        Some(Token {
            kind,
            value,
            line: self.line,
            column: self.column,
        })
    }
}