oak_core/source/
cursor.rs1use crate::source::{Source, TextChunk, simd::SimdScanner};
2use core::range::Range;
3use std::fmt;
4
5pub struct SourceCursor<'s, S: Source + ?Sized> {
7 source: &'s S,
8 offset: usize,
9 chunk: TextChunk<'s>,
10 scratch: String,
11}
12
13impl<'s, S: Source + ?Sized> fmt::Debug for SourceCursor<'s, S> {
14 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
15 f.debug_struct("SourceCursor").field("offset", &self.offset).field("chunk_start", &self.chunk.start).field("chunk_end", &self.chunk.end()).finish()
16 }
17}
18
19impl<'s, S: Source + ?Sized> SourceCursor<'s, S> {
20 pub fn new(source: &'s S) -> Self {
22 Self::new_at(source, 0)
23 }
24
25 pub fn new_at(source: &'s S, offset: usize) -> Self {
27 let end = source.length();
28 let offset = offset.min(end);
29 let chunk = source.chunk_at(offset);
30 Self { source, offset, chunk, scratch: String::new() }
31 }
32
33 #[inline]
35 pub fn position(&self) -> usize {
36 self.offset
37 }
38
39 #[inline]
42 pub fn set_position(&mut self, offset: usize) -> usize {
43 let last = self.offset;
44 self.offset = offset.min(self.source.length());
45 last
46 }
47
48 #[inline]
50 pub fn source(&self) -> &'s S {
51 self.source
52 }
53
54 fn ensure_chunk(&mut self) {
56 let end = self.source.length();
57 if self.offset > end {
58 self.offset = end;
59 }
60 if self.offset < self.chunk.start || self.offset > self.chunk.end() {
61 self.chunk = self.source.chunk_at(self.offset);
62 }
63 }
64
65 pub fn rest(&mut self) -> &str {
67 self.ensure_chunk();
68 self.chunk.slice_from(self.offset)
69 }
70
71 pub fn chunk_end(&mut self) -> usize {
73 self.ensure_chunk();
74 self.chunk.end()
75 }
76
77 pub fn peek_char(&mut self) -> Option<char> {
79 if self.offset >= self.chunk.start {
80 let rel = self.offset - self.chunk.start;
81 if rel < self.chunk.text.len() {
82 let text = unsafe { self.chunk.text.get_unchecked(rel..) };
84 return text.chars().next();
85 }
86 }
87 self.rest().chars().next()
88 }
89
90 pub fn skip_ascii_whitespace(&mut self) -> Range<usize> {
92 let start = self.offset;
93 loop {
94 self.ensure_chunk();
95 let rel = self.offset.saturating_sub(self.chunk.start);
96 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
97
98 if bytes.is_empty() {
99 if self.offset >= self.source.length() {
100 break;
101 }
102 self.chunk = self.source.chunk_at(self.offset);
103 continue;
104 }
105
106 let skipped = SimdScanner::skip_ascii_whitespace(bytes);
107 self.offset += skipped;
108
109 if skipped < bytes.len() || self.offset >= self.source.length() {
110 break;
111 }
112 }
113 Range { start, end: self.offset }
114 }
115
116 pub fn skip_ascii_digits(&mut self) -> Range<usize> {
118 let start = self.offset;
119 loop {
120 self.ensure_chunk();
121 let rel = self.offset.saturating_sub(self.chunk.start);
122 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
123
124 if bytes.is_empty() {
125 if self.offset >= self.source.length() {
126 break;
127 }
128 self.chunk = self.source.chunk_at(self.offset);
129 continue;
130 }
131
132 let skipped = SimdScanner::skip_ascii_digits(bytes);
133 self.offset += skipped;
134
135 if skipped < bytes.len() || self.offset >= self.source.length() {
136 break;
137 }
138 }
139 Range { start, end: self.offset }
140 }
141
142 pub fn skip_ascii_ident_continue(&mut self) -> Range<usize> {
144 let start = self.offset;
145 loop {
146 self.ensure_chunk();
147 let rel = self.offset.saturating_sub(self.chunk.start);
148 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
149
150 if bytes.is_empty() {
151 if self.offset >= self.source.length() {
152 break;
153 }
154 self.chunk = self.source.chunk_at(self.offset);
155 continue;
156 }
157
158 let skipped = SimdScanner::skip_ascii_ident_continue(bytes);
159 self.offset += skipped;
160
161 if skipped < bytes.len() || self.offset >= self.source.length() {
162 break;
163 }
164 }
165 Range { start, end: self.offset }
166 }
167
168 pub fn skip_until(&mut self, target: u8) -> Range<usize> {
170 let start = self.offset;
171 loop {
172 self.ensure_chunk();
173 let rel = self.offset.saturating_sub(self.chunk.start);
174 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
175
176 if bytes.is_empty() {
177 if self.offset >= self.source.length() {
178 break;
179 }
180 self.chunk = self.source.chunk_at(self.offset);
181 continue;
182 }
183
184 let skipped = SimdScanner::skip_until(bytes, target);
185 self.offset += skipped;
186
187 if skipped < bytes.len() || self.offset >= self.source.length() {
188 break;
189 }
190 }
191 Range { start, end: self.offset }
192 }
193
194 #[inline(always)]
196 pub fn peek_byte(&mut self) -> Option<u8> {
197 if self.offset >= self.chunk.start {
198 let rel = self.offset - self.chunk.start;
199 let bytes = self.chunk.text.as_bytes();
200 if rel < bytes.len() {
201 return Some(unsafe { *bytes.get_unchecked(rel) });
202 }
203 }
204 self.ensure_chunk();
205 let rel = self.offset - self.chunk.start;
206 let bytes = self.chunk.text.as_bytes();
207 bytes.get(rel).copied()
208 }
209
210 pub fn advance_bytes(&mut self, len: usize) -> usize {
212 self.offset = (self.offset + len).min(self.source.length());
213 self.offset
214 }
215
216 pub fn advance_char(&mut self) -> Option<char> {
218 let ch = self.peek_char()?;
219 self.advance_bytes(ch.len_utf8());
220 Some(ch)
221 }
222
223 #[inline(always)]
225 pub fn advance_byte(&mut self) -> Option<u8> {
226 let b = self.peek_byte()?;
227 self.offset += 1;
228 Some(b)
229 }
230
231 pub fn take_while(&mut self, mut pred: impl FnMut(char) -> bool) -> Range<usize> {
233 let start = self.offset;
234
235 loop {
236 self.ensure_chunk();
238
239 let rel = self.offset.saturating_sub(self.chunk.start);
241 let text = if rel < self.chunk.text.len() { unsafe { self.chunk.text.get_unchecked(rel..) } } else { "" };
242
243 if text.is_empty() {
244 if self.offset >= self.source.length() {
247 break;
248 }
249 self.chunk = self.source.chunk_at(self.offset);
251 continue;
253 }
254
255 let mut advanced = 0;
256 let mut stop = false;
257
258 for (i, ch) in text.char_indices() {
260 if !pred(ch) {
261 advanced = i;
262 stop = true;
263 break;
264 }
265 advanced = i + ch.len_utf8();
266 }
267
268 self.offset += advanced;
269
270 if stop {
271 break;
272 }
273
274 if self.offset >= self.source.length() {
276 break;
277 }
278 }
280
281 Range { start, end: self.offset }
282 }
283
284 #[inline(always)]
286 pub fn take_while_byte(&mut self, mut pred: impl FnMut(u8) -> bool) -> Range<usize> {
287 let start = self.offset;
288
289 loop {
290 self.ensure_chunk();
291 let rel = self.offset.saturating_sub(self.chunk.start);
292 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
293
294 if bytes.is_empty() {
295 if self.offset >= self.source.length() {
296 break;
297 }
298 self.chunk = self.source.chunk_at(self.offset);
299 continue;
300 }
301
302 let mut advanced = 0;
303 let mut stop = false;
304
305 for (i, &b) in bytes.iter().enumerate() {
306 if !pred(b) {
307 advanced = i;
308 stop = true;
309 break;
310 }
311 advanced = i + 1;
312 }
313
314 self.offset += advanced;
315 if stop || self.offset >= self.source.length() {
316 break;
317 }
318 }
319
320 Range { start, end: self.offset }
321 }
322
323 pub fn starts_with(&mut self, pattern: &str) -> bool {
325 self.ensure_chunk();
326 let chunk_text = self.chunk.text;
327 let rest = chunk_text.get(self.offset.saturating_sub(self.chunk.start)..).unwrap_or("");
328 if rest.len() >= pattern.len() {
329 return rest.as_bytes().get(..pattern.len()) == Some(pattern.as_bytes());
330 }
331
332 self.scratch.clear();
333 self.scratch.push_str(rest);
334 let mut next = self.chunk.end();
335 let end = self.source.length();
336 while self.scratch.len() < pattern.len() && next < end {
337 let chunk = self.source.chunk_at(next);
338 self.scratch.push_str(chunk.text);
339 next = chunk.end();
340 }
341 self.scratch.as_bytes().get(..pattern.len()) == Some(pattern.as_bytes())
342 }
343
344 pub fn consume_if_starts_with(&mut self, pattern: &str) -> bool {
346 if !self.starts_with(pattern) {
347 return false;
348 }
349 self.advance_bytes(pattern.len());
350 true
351 }
352
353 pub fn find_str(&mut self, pattern: &str) -> Option<usize> {
355 if pattern.is_empty() {
356 return Some(self.offset);
357 }
358
359 let pat_len = pattern.len();
360 let mut offset = self.offset;
361 let end = self.source.length();
362 while offset < end {
363 self.offset = offset;
364 self.ensure_chunk();
365 let text = self.chunk.slice_from(offset);
366 if let Some(pos) = text.find(pattern) {
367 return Some(offset + pos);
368 }
369 let chunk_end = self.chunk.end();
370 if chunk_end >= end {
371 return None;
372 }
373
374 if pat_len > 1 {
375 let keep = pat_len - 1;
376 self.scratch.clear();
377 let tail = text.get(text.len().saturating_sub(keep)..).unwrap_or("");
378 self.scratch.push_str(tail);
379 let tail_abs_start = chunk_end.saturating_sub(tail.len());
380 let next_chunk = self.source.chunk_at(chunk_end);
381 self.scratch.push_str(next_chunk.text);
382 if let Some(pos) = self.scratch.find(pattern) {
383 return Some(tail_abs_start + pos);
384 }
385 }
386
387 offset = chunk_end;
388 }
389 None
390 }
391}