oxihuman_core/
lexer_token_stream.rs1#![allow(dead_code)]
4
5#[derive(Debug, Clone, PartialEq)]
12pub struct LexToken {
13 pub kind: LexTokenKind,
14 pub text: String,
15 pub line: usize,
16 pub col: usize,
17}
18
19impl LexToken {
20 pub fn new(kind: LexTokenKind, text: &str, line: usize, col: usize) -> Self {
21 Self {
22 kind,
23 text: text.to_string(),
24 line,
25 col,
26 }
27 }
28
29 pub fn is_eof(&self) -> bool {
30 self.kind == LexTokenKind::Eof
31 }
32}
33
34#[derive(Debug, Clone, PartialEq)]
36pub enum LexTokenKind {
37 Word,
38 Number,
39 Punctuation,
40 Whitespace,
41 Newline,
42 Eof,
43 Custom(String),
44}
45
46#[derive(Debug, Clone)]
48pub struct LexerStream {
49 tokens: Vec<LexToken>,
50 pos: usize,
51 mark: Option<usize>,
52}
53
54impl LexerStream {
55 pub fn new(tokens: Vec<LexToken>) -> Self {
56 Self {
57 tokens,
58 pos: 0,
59 mark: None,
60 }
61 }
62
63 pub fn is_empty(&self) -> bool {
64 self.pos >= self.tokens.len()
65 }
66
67 pub fn remaining(&self) -> usize {
68 self.tokens.len().saturating_sub(self.pos)
69 }
70
71 pub fn current_pos(&self) -> usize {
72 self.pos
73 }
74
75 pub fn peek(&self) -> Option<&LexToken> {
76 self.tokens.get(self.pos)
77 }
78
79 pub fn peek_nth(&self, n: usize) -> Option<&LexToken> {
80 self.tokens.get(self.pos + n)
81 }
82
83 pub fn next_token(&mut self) -> Option<&LexToken> {
84 let t = self.tokens.get(self.pos);
85 if t.is_some() {
86 self.pos += 1;
87 }
88 t
89 }
90
91 pub fn skip(&mut self, n: usize) {
92 self.pos = (self.pos + n).min(self.tokens.len());
93 }
94
95 pub fn set_mark(&mut self) {
96 self.mark = Some(self.pos);
97 }
98
99 pub fn restore_mark(&mut self) {
100 if let Some(m) = self.mark {
101 self.pos = m;
102 }
103 }
104
105 pub fn clear_mark(&mut self) {
106 self.mark = None;
107 }
108
109 pub fn consume_while(&mut self, pred: impl Fn(&LexToken) -> bool) -> Vec<&LexToken> {
111 let mut result = Vec::new();
112 while let Some(t) = self.tokens.get(self.pos) {
113 if pred(t) {
114 result.push(t);
115 self.pos += 1;
116 } else {
117 break;
118 }
119 }
120 result
121 }
122
123 pub fn total(&self) -> usize {
124 self.tokens.len()
125 }
126
127 pub fn peek_rest(&self) -> &[LexToken] {
129 &self.tokens[self.pos..]
130 }
131}
132
133pub fn lex_string(text: &str) -> LexerStream {
135 let mut tokens: Vec<LexToken> = Vec::new();
136 let mut line = 1usize;
137 let mut col = 1usize;
138
139 for word in text.split_whitespace() {
140 let kind = if word.chars().all(|c| c.is_ascii_digit()) {
141 LexTokenKind::Number
142 } else if word.chars().all(|c| c.is_alphanumeric() || c == '_') {
143 LexTokenKind::Word
144 } else {
145 LexTokenKind::Punctuation
146 };
147 tokens.push(LexToken::new(kind, word, line, col));
148 col += word.len() + 1;
149 if word.contains('\n') {
150 line += 1;
151 col = 1;
152 }
153 }
154 tokens.push(LexToken::new(LexTokenKind::Eof, "", line, col));
155 LexerStream::new(tokens)
156}
157
158pub fn count_tokens_of_kind(stream: &LexerStream, kind: &LexTokenKind) -> usize {
160 stream.tokens.iter().filter(|t| &t.kind == kind).count()
161}
162
163#[cfg(test)]
164mod tests {
165 use super::*;
166
167 #[test]
168 fn test_lex_produces_eof() {
169 let s = lex_string("hello world");
170 assert!(s.tokens.last().map(|t| t.is_eof()).unwrap_or(false));
171 }
172
173 #[test]
174 fn test_lex_word_count() {
175 let s = lex_string("one two three");
176 assert_eq!(count_tokens_of_kind(&s, &LexTokenKind::Word), 3);
177 }
178
179 #[test]
180 fn test_peek_does_not_advance() {
181 let mut s = lex_string("a b");
182 let first = s.peek().expect("should succeed").text.clone();
183 let _ = s.peek();
184 let next = s.next_token().expect("should succeed").text.clone();
185 assert_eq!(first, next);
186 }
187
188 #[test]
189 fn test_skip_advances() {
190 let mut s = lex_string("a b c");
191 s.skip(1);
192 assert_eq!(s.current_pos(), 1);
193 }
194
195 #[test]
196 fn test_mark_restore() {
197 let mut s = lex_string("a b c");
198 s.next_token();
199 s.set_mark();
200 s.next_token();
201 s.restore_mark();
202 assert_eq!(s.current_pos(), 1);
203 }
204
205 #[test]
206 fn test_remaining_decreases() {
207 let mut s = lex_string("a b");
208 let before = s.remaining();
209 s.next_token();
210 assert!(s.remaining() < before);
211 }
212
213 #[test]
214 fn test_peek_nth() {
215 let s = lex_string("x y z");
216 assert_eq!(s.peek_nth(1).map(|t| t.text.as_str()), Some("y"));
217 }
218
219 #[test]
220 fn test_consume_while() {
221 let mut s = lex_string("1 2 3 word");
222 let nums = s.consume_while(|t| t.kind == LexTokenKind::Number);
223 assert_eq!(nums.len(), 3);
224 }
225
226 #[test]
227 fn test_is_empty_after_all_consumed() {
228 let mut s = lex_string("a");
229 while !s.is_empty() {
231 s.next_token();
232 }
233 assert!(s.is_empty());
234 }
235}