oak_core/source/
cursor.rs1use crate::source::{Source, TextChunk, simd::SimdScanner};
2use core::range::Range;
3use std::fmt;
4
5pub struct SourceCursor<'s, S: Source + ?Sized> {
7 source: &'s S,
8 offset: usize,
9 chunk: TextChunk<'s>,
10 scratch: String,
11}
12
13impl<'s, S: Source + ?Sized> fmt::Debug for SourceCursor<'s, S> {
14 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
15 f.debug_struct("SourceCursor").field("offset", &self.offset).field("chunk_start", &self.chunk.start).field("chunk_end", &self.chunk.end()).finish()
16 }
17}
18
19impl<'s, S: Source + ?Sized> SourceCursor<'s, S> {
20 pub fn new(source: &'s S) -> Self {
22 Self::new_at(source, 0)
23 }
24
25 pub fn new_at(source: &'s S, offset: usize) -> Self {
27 let end = source.length();
28 let offset = offset.min(end);
29 let chunk = source.chunk_at(offset);
30 Self { source, offset, chunk, scratch: String::new() }
31 }
32
33 #[inline]
35 pub fn position(&self) -> usize {
36 self.offset
37 }
38
39 #[inline]
42 pub fn set_position(&mut self, offset: usize) -> usize {
43 let last = self.offset;
44 self.offset = offset.min(self.source.length());
45 last
46 }
47
48 #[inline]
50 pub fn source(&self) -> &'s S {
51 self.source
52 }
53
54 fn ensure_chunk(&mut self) {
56 let end = self.source.length();
57 if self.offset > end {
58 self.offset = end;
59 }
60 if self.offset < self.chunk.start || self.offset > self.chunk.end() {
61 self.chunk = self.source.chunk_at(self.offset);
62 }
63 }
64
65 pub fn rest(&mut self) -> &str {
67 self.ensure_chunk();
68 self.chunk.slice_from(self.offset)
69 }
70
71 pub fn chunk_end(&mut self) -> usize {
73 self.ensure_chunk();
74 self.chunk.end()
75 }
76
77 pub fn peek_char(&mut self) -> Option<char> {
79 if self.offset >= self.chunk.start {
80 let rel = self.offset - self.chunk.start;
81 if rel < self.chunk.text.len() {
82 let text = unsafe { self.chunk.text.get_unchecked(rel..) };
84 return text.chars().next();
85 }
86 }
87 self.rest().chars().next()
88 }
89
90 pub fn peek_next_n(&mut self, n: usize) -> Option<char> {
92 let target_offset = self.offset + n;
93 if target_offset >= self.source.length() {
94 return None;
95 }
96 if target_offset >= self.chunk.start && target_offset < self.chunk.end() {
97 let rel = target_offset - self.chunk.start;
98 let text = unsafe { self.chunk.text.get_unchecked(rel..) };
99 return text.chars().next();
100 }
101 self.source.get_char_at(target_offset)
102 }
103
104 pub fn skip_ascii_whitespace(&mut self) -> Range<usize> {
106 let start = self.offset;
107 loop {
108 self.ensure_chunk();
109 let rel = self.offset.saturating_sub(self.chunk.start);
110 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
111
112 if bytes.is_empty() {
113 if self.offset >= self.source.length() {
114 break;
115 }
116 self.chunk = self.source.chunk_at(self.offset);
117 continue;
118 }
119
120 let skipped = SimdScanner::skip_ascii_whitespace(bytes);
121 self.offset += skipped;
122
123 if skipped < bytes.len() || self.offset >= self.source.length() {
124 break;
125 }
126 }
127 Range { start, end: self.offset }
128 }
129
130 pub fn skip_ascii_digits(&mut self) -> Range<usize> {
132 let start = self.offset;
133 loop {
134 self.ensure_chunk();
135 let rel = self.offset.saturating_sub(self.chunk.start);
136 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
137
138 if bytes.is_empty() {
139 if self.offset >= self.source.length() {
140 break;
141 }
142 self.chunk = self.source.chunk_at(self.offset);
143 continue;
144 }
145
146 let skipped = SimdScanner::skip_ascii_digits(bytes);
147 self.offset += skipped;
148
149 if skipped < bytes.len() || self.offset >= self.source.length() {
150 break;
151 }
152 }
153 Range { start, end: self.offset }
154 }
155
156 pub fn skip_ascii_ident_continue(&mut self) -> Range<usize> {
158 let start = self.offset;
159 loop {
160 self.ensure_chunk();
161 let rel = self.offset.saturating_sub(self.chunk.start);
162 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
163
164 if bytes.is_empty() {
165 if self.offset >= self.source.length() {
166 break;
167 }
168 self.chunk = self.source.chunk_at(self.offset);
169 continue;
170 }
171
172 let skipped = SimdScanner::skip_ascii_ident_continue(bytes);
173 self.offset += skipped;
174
175 if skipped < bytes.len() || self.offset >= self.source.length() {
176 break;
177 }
178 }
179 Range { start, end: self.offset }
180 }
181
182 pub fn skip_until(&mut self, target: u8) -> Range<usize> {
184 let start = self.offset;
185 loop {
186 self.ensure_chunk();
187 let rel = self.offset.saturating_sub(self.chunk.start);
188 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
189
190 if bytes.is_empty() {
191 if self.offset >= self.source.length() {
192 break;
193 }
194 self.chunk = self.source.chunk_at(self.offset);
195 continue;
196 }
197
198 let skipped = SimdScanner::skip_until(bytes, target);
199 self.offset += skipped;
200
201 if skipped < bytes.len() || self.offset >= self.source.length() {
202 break;
203 }
204 }
205 Range { start, end: self.offset }
206 }
207
208 #[inline(always)]
210 pub fn peek_byte(&mut self) -> Option<u8> {
211 if self.offset >= self.chunk.start {
212 let rel = self.offset - self.chunk.start;
213 let bytes = self.chunk.text.as_bytes();
214 if rel < bytes.len() {
215 return Some(unsafe { *bytes.get_unchecked(rel) });
216 }
217 }
218 self.ensure_chunk();
219 let rel = self.offset - self.chunk.start;
220 let bytes = self.chunk.text.as_bytes();
221 bytes.get(rel).copied()
222 }
223
224 pub fn advance_bytes(&mut self, len: usize) -> usize {
226 self.offset = (self.offset + len).min(self.source.length());
227 self.offset
228 }
229
230 pub fn advance_char(&mut self) -> Option<char> {
232 let ch = self.peek_char()?;
233 self.advance_bytes(ch.len_utf8());
234 Some(ch)
235 }
236
237 #[inline(always)]
239 pub fn advance_byte(&mut self) -> Option<u8> {
240 let b = self.peek_byte()?;
241 self.offset += 1;
242 Some(b)
243 }
244
245 pub fn take_while(&mut self, mut pred: impl FnMut(char) -> bool) -> Range<usize> {
247 let start = self.offset;
248
249 loop {
250 self.ensure_chunk();
252
253 let rel = self.offset.saturating_sub(self.chunk.start);
255 let text = if rel < self.chunk.text.len() { unsafe { self.chunk.text.get_unchecked(rel..) } } else { "" };
256
257 if text.is_empty() {
258 if self.offset >= self.source.length() {
261 break;
262 }
263 self.chunk = self.source.chunk_at(self.offset);
265 continue;
267 }
268
269 let mut advanced = 0;
270 let mut stop = false;
271
272 for (i, ch) in text.char_indices() {
274 if !pred(ch) {
275 advanced = i;
276 stop = true;
277 break;
278 }
279 advanced = i + ch.len_utf8();
280 }
281
282 self.offset += advanced;
283
284 if stop {
285 break;
286 }
287
288 if self.offset >= self.source.length() {
290 break;
291 }
292 }
294
295 Range { start, end: self.offset }
296 }
297
298 #[inline(always)]
300 pub fn take_while_byte(&mut self, mut pred: impl FnMut(u8) -> bool) -> Range<usize> {
301 let start = self.offset;
302
303 loop {
304 self.ensure_chunk();
305 let rel = self.offset.saturating_sub(self.chunk.start);
306 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
307
308 if bytes.is_empty() {
309 if self.offset >= self.source.length() {
310 break;
311 }
312 self.chunk = self.source.chunk_at(self.offset);
313 continue;
314 }
315
316 let mut advanced = 0;
317 let mut stop = false;
318
319 for (i, &b) in bytes.iter().enumerate() {
320 if !pred(b) {
321 advanced = i;
322 stop = true;
323 break;
324 }
325 advanced = i + 1;
326 }
327
328 self.offset += advanced;
329 if stop || self.offset >= self.source.length() {
330 break;
331 }
332 }
333
334 Range { start, end: self.offset }
335 }
336
337 pub fn starts_with(&mut self, pattern: &str) -> bool {
339 self.ensure_chunk();
340 let chunk_text = self.chunk.text;
341 let rest = chunk_text.get(self.offset.saturating_sub(self.chunk.start)..).unwrap_or("");
342 if rest.len() >= pattern.len() {
343 return rest.as_bytes().get(..pattern.len()) == Some(pattern.as_bytes());
344 }
345
346 self.scratch.clear();
347 self.scratch.push_str(rest);
348 let mut next = self.chunk.end();
349 let end = self.source.length();
350 while self.scratch.len() < pattern.len() && next < end {
351 let chunk = self.source.chunk_at(next);
352 self.scratch.push_str(chunk.text);
353 next = chunk.end();
354 }
355 self.scratch.as_bytes().get(..pattern.len()) == Some(pattern.as_bytes())
356 }
357
358 pub fn consume_if_starts_with(&mut self, pattern: &str) -> bool {
360 if !self.starts_with(pattern) {
361 return false;
362 }
363 self.advance_bytes(pattern.len());
364 true
365 }
366
367 pub fn find_str(&mut self, pattern: &str) -> Option<usize> {
369 if pattern.is_empty() {
370 return Some(self.offset);
371 }
372
373 let pat_len = pattern.len();
374 let mut offset = self.offset;
375 let end = self.source.length();
376 while offset < end {
377 self.offset = offset;
378 self.ensure_chunk();
379 let text = self.chunk.slice_from(offset);
380 if let Some(pos) = text.find(pattern) {
381 return Some(offset + pos);
382 }
383 let chunk_end = self.chunk.end();
384 if chunk_end >= end {
385 return None;
386 }
387
388 if pat_len > 1 {
389 let keep = pat_len - 1;
390 self.scratch.clear();
391 let tail = text.get(text.len().saturating_sub(keep)..).unwrap_or("");
392 self.scratch.push_str(tail);
393 let tail_abs_start = chunk_end.saturating_sub(tail.len());
394 let next_chunk = self.source.chunk_at(chunk_end);
395 self.scratch.push_str(next_chunk.text);
396 if let Some(pos) = self.scratch.find(pattern) {
397 return Some(tail_abs_start + pos);
398 }
399 }
400
401 offset = chunk_end;
402 }
403 None
404 }
405}