jayce/
lib.rs

1pub mod internal;
2use regex::Regex;
3
4pub struct Duo<T> {
5    pub kind: T,
6    pub regex: Regex,
7    pub preserve: bool,
8}
9
10impl<T> Duo<T> {
11    pub fn new(kind: T, regex: &str, preserve: bool) -> Self {
12        Self {
13            kind,
14            regex: Regex::new(regex).unwrap(),
15            preserve,
16        }
17    }
18}
19
20pub struct Tokenizer<'a, T> {
21    source: &'a str,
22    duos: &'a [Duo<T>],
23    pub cursor: usize,
24    pub line: usize,
25    pub column: usize,
26    next: Option<Token<'a, T>>,
27}
28
29#[derive(Debug, Clone, Copy, PartialEq)]
30pub struct Token<'a, T> {
31    pub kind: &'a T,
32    pub value: &'a str,
33    pub pos: (usize, usize),
34}
35
36impl<'a, T> Tokenizer<'a, T> {
37    #[inline]
38    pub fn new(source: &'a str, duos: &'a [Duo<T>]) -> Self {
39        Self {
40            source,
41            duos,
42            cursor: 0,
43            line: 1,
44            column: 1,
45            next: None,
46        }
47    }
48
49    fn advance(&mut self) -> Result<Option<Token<'a, T>>, Box<dyn std::error::Error>> {
50        while self.cursor < self.source.len() {
51            let mut matched = false;
52
53            for duo in self.duos.iter() {
54                if let Some(result) = duo.regex.find(&self.source[self.cursor..]) {
55                    let value: &str = result.as_str();
56                    let token_pos = (self.line, self.column);
57                    let len = result.len();
58                    self.cursor += len;
59                    let newlines_count = bytecount::count(value.as_bytes(), b'\n');
60                    if newlines_count > 0 {
61                        self.line += newlines_count;
62                        self.column = len - value.rfind('\n').unwrap_or(1);
63                    } else {
64                        self.column += len;
65                    }
66
67                    if duo.preserve {
68                        return Ok(Some(Token {
69                            kind: &duo.kind,
70                            value,
71                            pos: token_pos,
72                        }));
73                    } else {
74                        matched = true;
75                        break;
76                    }
77                }
78            }
79
80            if !matched {
81                return Err(format!(
82                    "Failed to match at line {}, column {}.",
83                    self.line, self.column
84                ))?;
85            }
86        }
87
88        Ok(None)
89    }
90
91    pub fn peek(&mut self) -> Result<Option<Token<'a, T>>, Box<dyn std::error::Error>>
92    where
93        T: Clone,
94    {
95        if self.next.is_none() {
96            self.next = self.advance()?;
97        }
98
99        Ok(self.next.clone())
100    }
101
102    pub fn consume(&mut self) -> Result<Option<Token<'a, T>>, Box<dyn std::error::Error>> {
103        if self.next.is_none() {
104            self.next = self.advance()?;
105        }
106
107        let result = Ok(self.next.take());
108        self.next = self.advance()?;
109        result
110    }
111
112    pub fn consume_all(&mut self) -> Result<Vec<Token<'a, T>>, Box<dyn std::error::Error>> {
113        let mut tokens: Vec<Token<'_, T>> = Vec::new();
114        while let Some(token) = self.consume()? {
115            tokens.push(token);
116        }
117        Ok(tokens)
118    }
119}