oak_core/source/
cursor.rs1use crate::source::{Source, TextChunk, simd::SimdScanner};
2use core::range::Range;
3use std::fmt;
4
5pub struct SourceCursor<'s, S: Source + ?Sized> {
20 source: &'s S,
21 offset: usize,
22 chunk: TextChunk<'s>,
23 scratch: String,
24}
25
26impl<'s, S: Source + ?Sized> fmt::Debug for SourceCursor<'s, S> {
27 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
28 f.debug_struct("SourceCursor").field("offset", &self.offset).field("chunk_start", &self.chunk.start).field("chunk_end", &self.chunk.end()).finish()
29 }
30}
31
32impl<'s, S: Source + ?Sized> SourceCursor<'s, S> {
33 pub fn new(source: &'s S) -> Self {
35 Self::new_at(source, 0)
36 }
37
38 pub fn new_at(source: &'s S, offset: usize) -> Self {
40 let end = source.length();
41 let offset = offset.min(end);
42 let chunk = source.chunk_at(offset);
43 Self { source, offset, chunk, scratch: String::new() }
44 }
45
46 #[inline]
48 pub fn position(&self) -> usize {
49 self.offset
50 }
51
52 #[inline]
55 pub fn set_position(&mut self, offset: usize) -> usize {
56 let last = self.offset;
57 self.offset = offset.min(self.source.length());
58 last
59 }
60
61 #[inline]
63 pub fn source(&self) -> &'s S {
64 self.source
65 }
66
67 fn ensure_chunk(&mut self) {
69 let end = self.source.length();
70 if self.offset > end {
71 self.offset = end
72 }
73 if self.offset < self.chunk.start || self.offset > self.chunk.end() || (self.offset == self.chunk.end() && self.offset < end) {
76 self.chunk = self.source.chunk_at(self.offset)
77 }
78 }
79
80 pub fn rest(&mut self) -> &str {
82 self.ensure_chunk();
83 self.chunk.slice_from(self.offset)
84 }
85
86 pub fn chunk_end(&mut self) -> usize {
88 self.ensure_chunk();
89 self.chunk.end()
90 }
91
92 pub fn peek_char(&mut self) -> Option<char> {
94 if self.offset >= self.chunk.start {
95 let rel = self.offset - self.chunk.start;
96 if rel < self.chunk.text.len() {
97 if self.chunk.text.is_char_boundary(rel) {
99 let text = unsafe { self.chunk.text.get_unchecked(rel..) };
100 return text.chars().next();
101 }
102 else {
103 let mut i = rel;
106 while i < self.chunk.text.len() && !self.chunk.text.is_char_boundary(i) {
107 i += 1
108 }
109 if i < self.chunk.text.len() {
110 let text = unsafe { self.chunk.text.get_unchecked(i..) };
111 return text.chars().next();
112 }
113 }
114 }
115 }
116 self.rest().chars().next()
117 }
118
119 pub fn peek_next_n(&mut self, n: usize) -> Option<char> {
121 let target_offset = self.offset + n;
122 if target_offset >= self.source.length() {
123 return None;
124 }
125 if target_offset >= self.chunk.start && target_offset < self.chunk.end() {
126 let rel = target_offset - self.chunk.start;
127 let text = self.chunk.text.get(rel..).unwrap_or("");
128 return text.chars().next();
129 }
130 self.source.get_char_at(target_offset)
131 }
132
133 pub fn peek_next_char(&mut self) -> Option<char> {
135 let ch = self.peek_char()?;
136 self.peek_next_n(ch.len_utf8())
137 }
138
139 pub fn skip_ascii_whitespace(&mut self) -> Range<usize> {
141 let start = self.offset;
142 loop {
143 self.ensure_chunk();
144 let rel = self.offset.saturating_sub(self.chunk.start);
145 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
146
147 if bytes.is_empty() {
148 if self.offset >= self.source.length() {
149 break;
150 }
151 self.chunk = self.source.chunk_at(self.offset);
152 continue;
153 }
154
155 let skipped = SimdScanner::skip_ascii_whitespace(bytes);
156 self.offset += skipped;
157
158 if skipped < bytes.len() || self.offset >= self.source.length() {
159 break;
160 }
161 }
162 Range { start, end: self.offset }
163 }
164
165 pub fn skip_ascii_digits(&mut self) -> Range<usize> {
167 let start = self.offset;
168 loop {
169 self.ensure_chunk();
170 let rel = self.offset.saturating_sub(self.chunk.start);
171 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
172
173 if bytes.is_empty() {
174 if self.offset >= self.source.length() {
175 break;
176 }
177 self.chunk = self.source.chunk_at(self.offset);
178 continue;
179 }
180
181 let skipped = SimdScanner::skip_ascii_digits(bytes);
182 self.offset += skipped;
183
184 if skipped < bytes.len() || self.offset >= self.source.length() {
185 break;
186 }
187 }
188 Range { start, end: self.offset }
189 }
190
191 pub fn skip_ascii_ident_continue(&mut self) -> Range<usize> {
193 let start = self.offset;
194 loop {
195 self.ensure_chunk();
196 let rel = self.offset.saturating_sub(self.chunk.start);
197 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
198
199 if bytes.is_empty() {
200 if self.offset >= self.source.length() {
201 break;
202 }
203 self.chunk = self.source.chunk_at(self.offset);
204 continue;
205 }
206
207 let skipped = SimdScanner::skip_ascii_ident_continue(bytes);
208 self.offset += skipped;
209
210 if skipped < bytes.len() || self.offset >= self.source.length() {
211 break;
212 }
213 }
214 Range { start, end: self.offset }
215 }
216
217 pub fn skip_until(&mut self, target: u8) -> Range<usize> {
219 let start = self.offset;
220 loop {
221 self.ensure_chunk();
222 let rel = self.offset.saturating_sub(self.chunk.start);
223 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
224
225 if bytes.is_empty() {
226 if self.offset >= self.source.length() {
227 break;
228 }
229 self.chunk = self.source.chunk_at(self.offset);
230 continue;
231 }
232
233 let skipped = SimdScanner::skip_until(bytes, target);
234 self.offset += skipped;
235
236 if skipped < bytes.len() || self.offset >= self.source.length() {
237 break;
238 }
239 }
240 Range { start, end: self.offset }
241 }
242
243 #[inline(always)]
245 pub fn peek_byte(&mut self) -> Option<u8> {
246 if self.offset >= self.chunk.start {
247 let rel = self.offset - self.chunk.start;
248 let bytes = self.chunk.text.as_bytes();
249 if rel < bytes.len() {
250 return Some(unsafe { *bytes.get_unchecked(rel) });
251 }
252 }
253 self.ensure_chunk();
254 let rel = self.offset - self.chunk.start;
255 let bytes = self.chunk.text.as_bytes();
256 bytes.get(rel).copied()
257 }
258
259 pub fn advance_bytes(&mut self, len: usize) -> usize {
261 self.offset = (self.offset + len).min(self.source.length());
262 self.offset
263 }
264
265 pub fn advance_char(&mut self) -> Option<char> {
267 let ch = self.peek_char()?;
268 self.advance_bytes(ch.len_utf8());
269 Some(ch)
270 }
271
272 #[inline(always)]
274 pub fn advance_byte(&mut self) -> Option<u8> {
275 let b = self.peek_byte()?;
276 self.offset += 1;
277 Some(b)
278 }
279
280 pub fn take_while(&mut self, mut pred: impl FnMut(char) -> bool) -> Range<usize> {
282 let start = self.offset;
283
284 loop {
285 self.ensure_chunk();
287
288 let rel = self.offset.saturating_sub(self.chunk.start);
290 let text = if rel < self.chunk.text.len() { unsafe { self.chunk.text.get_unchecked(rel..) } } else { "" };
291
292 if text.is_empty() {
293 if self.offset >= self.source.length() {
296 break;
297 }
298 self.chunk = self.source.chunk_at(self.offset);
300 continue;
302 }
303
304 let mut advanced = 0;
305 let mut stop = false;
306
307 for (i, ch) in text.char_indices() {
309 if !pred(ch) {
310 advanced = i;
311 stop = true;
312 break;
313 }
314 advanced = i + ch.len_utf8()
315 }
316
317 self.offset += advanced;
318
319 if stop {
320 break;
321 }
322
323 if self.offset >= self.source.length() {
325 break;
326 }
327 }
329
330 Range { start, end: self.offset }
331 }
332
333 #[inline(always)]
335 pub fn take_while_byte(&mut self, mut pred: impl FnMut(u8) -> bool) -> Range<usize> {
336 let start = self.offset;
337
338 loop {
339 self.ensure_chunk();
340 let rel = self.offset.saturating_sub(self.chunk.start);
341 let bytes = if rel < self.chunk.text.len() { unsafe { self.chunk.text.as_bytes().get_unchecked(rel..) } } else { &[] };
342
343 if bytes.is_empty() {
344 if self.offset >= self.source.length() {
345 break;
346 }
347 self.chunk = self.source.chunk_at(self.offset);
348 continue;
349 }
350
351 let mut advanced = 0;
352 let mut stop = false;
353
354 for (i, &b) in bytes.iter().enumerate() {
355 if !pred(b) {
356 advanced = i;
357 stop = true;
358 break;
359 }
360 advanced = i + 1
361 }
362
363 self.offset += advanced;
364 if stop || self.offset >= self.source.length() {
365 break;
366 }
367 }
368
369 Range { start, end: self.offset }
370 }
371
372 pub fn starts_with(&mut self, pattern: &str) -> bool {
374 self.ensure_chunk();
375 let chunk_text = self.chunk.text;
376 let offset_in_chunk = self.offset.saturating_sub(self.chunk.start);
377
378 let rest = if chunk_text.is_char_boundary(offset_in_chunk) {
380 chunk_text.get(offset_in_chunk..).unwrap_or("")
381 }
382 else {
383 let mut i = offset_in_chunk;
385 while i < chunk_text.len() && !chunk_text.is_char_boundary(i) {
386 i += 1
387 }
388 chunk_text.get(i..).unwrap_or("")
389 };
390
391 if rest.len() >= pattern.len() {
392 return rest.starts_with(pattern);
393 }
394
395 self.scratch.clear();
396 self.scratch.push_str(rest);
397 let mut next = self.chunk.end();
398 let end = self.source.length();
399 while self.scratch.len() < pattern.len() && next < end {
400 let chunk = self.source.chunk_at(next);
401 self.scratch.push_str(chunk.text);
402 next = chunk.end()
403 }
404 self.scratch.starts_with(pattern)
405 }
406
407 pub fn consume_if_starts_with(&mut self, pattern: &str) -> bool {
409 if !self.starts_with(pattern) {
410 return false;
411 }
412 self.advance_bytes(pattern.len());
413 true
414 }
415
416 pub fn find_str(&mut self, pattern: &str) -> Option<usize> {
418 if pattern.is_empty() {
419 return Some(self.offset);
420 }
421
422 let pat_len = pattern.len();
423 let mut offset = self.offset;
424 let end = self.source.length();
425 while offset < end {
426 self.offset = offset;
427 self.ensure_chunk();
428 let text = self.chunk.slice_from(offset);
429 if let Some(pos) = text.find(pattern) {
430 return Some(offset + pos);
431 }
432 let chunk_end = self.chunk.end();
433 if chunk_end >= end {
434 return None;
435 }
436
437 if pat_len > 1 {
438 let keep = pat_len - 1;
439 self.scratch.clear();
440 let tail = text.get(text.len().saturating_sub(keep)..).unwrap_or("");
441 self.scratch.push_str(tail);
442 let tail_abs_start = chunk_end.saturating_sub(tail.len());
443 let next_chunk = self.source.chunk_at(chunk_end);
444 self.scratch.push_str(next_chunk.text);
445 if let Some(pos) = self.scratch.find(pattern) {
446 return Some(tail_abs_start + pos);
447 }
448 }
449
450 offset = chunk_end
451 }
452 None
453 }
454}