1pub mod internal;
2use regex::Regex;
3
4pub struct Duo<T> {
5 pub kind: T,
6 pub regex: Regex,
7 pub preserve: bool,
8}
9
10impl<T> Duo<T> {
11 pub fn new(kind: T, regex: &str, preserve: bool) -> Self {
12 Self {
13 kind,
14 regex: Regex::new(regex).unwrap(),
15 preserve,
16 }
17 }
18}
19
20pub struct Tokenizer<'a, T> {
21 source: &'a str,
22 duos: &'a [Duo<T>],
23 pub cursor: usize,
24 pub line: usize,
25 pub column: usize,
26 next: Option<Token<'a, T>>,
27}
28
29#[derive(Debug, Clone, Copy, PartialEq)]
30pub struct Token<'a, T> {
31 pub kind: &'a T,
32 pub value: &'a str,
33 pub pos: (usize, usize),
34}
35
36impl<'a, T> Tokenizer<'a, T> {
37 #[inline]
38 pub fn new(source: &'a str, duos: &'a [Duo<T>]) -> Self {
39 Self {
40 source,
41 duos,
42 cursor: 0,
43 line: 1,
44 column: 1,
45 next: None,
46 }
47 }
48
49 fn advance(&mut self) -> Result<Option<Token<'a, T>>, Box<dyn std::error::Error>> {
50 while self.cursor < self.source.len() {
51 let mut matched = false;
52
53 for duo in self.duos.iter() {
54 if let Some(result) = duo.regex.find(&self.source[self.cursor..]) {
55 let value: &str = result.as_str();
56 let token_pos = (self.line, self.column);
57 let len = result.len();
58 self.cursor += len;
59 let newlines_count = bytecount::count(value.as_bytes(), b'\n');
60 if newlines_count > 0 {
61 self.line += newlines_count;
62 self.column = len - value.rfind('\n').unwrap_or(1);
63 } else {
64 self.column += len;
65 }
66
67 if duo.preserve {
68 return Ok(Some(Token {
69 kind: &duo.kind,
70 value,
71 pos: token_pos,
72 }));
73 } else {
74 matched = true;
75 break;
76 }
77 }
78 }
79
80 if !matched {
81 return Err(format!(
82 "Failed to match at line {}, column {}.",
83 self.line, self.column
84 ))?;
85 }
86 }
87
88 Ok(None)
89 }
90
91 pub fn peek(&mut self) -> Result<Option<Token<'a, T>>, Box<dyn std::error::Error>>
92 where
93 T: Clone,
94 {
95 if self.next.is_none() {
96 self.next = self.advance()?;
97 }
98
99 Ok(self.next.clone())
100 }
101
102 pub fn consume(&mut self) -> Result<Option<Token<'a, T>>, Box<dyn std::error::Error>> {
103 if self.next.is_none() {
104 self.next = self.advance()?;
105 }
106
107 let result = Ok(self.next.take());
108 self.next = self.advance()?;
109 result
110 }
111
112 pub fn consume_all(&mut self) -> Result<Vec<Token<'a, T>>, Box<dyn std::error::Error>> {
113 let mut tokens: Vec<Token<'_, T>> = Vec::new();
114 while let Some(token) = self.consume()? {
115 tokens.push(token);
116 }
117 Ok(tokens)
118 }
119}