use core::cell::UnsafeCell;
use core::str;
use crate::span::Span;
const CHAR_LENGTHS: [u8; 256] = [
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 0, 0, 0, 0, 0, 0, 0, 0, ];
#[inline(always)]
const fn append_continuation_byte(base: u32, byte: u8) -> u32 {
(base << 6) | (byte & 0b00111111) as u32
}
#[derive(Debug)]
pub struct SourceCodeScanner<'src> {
source_bytes: &'src [u8],
final_ptr: *const u8,
cur_char_ptr: UnsafeCell<*const u8>,
next_char_ptr: UnsafeCell<*const u8>,
cached_char: UnsafeCell<char>,
char_index: UnsafeCell<usize>,
}
impl<'src> SourceCodeScanner<'src> {
#[must_use]
pub fn new(source: &'src str) -> Self {
let final_ptr = unsafe { source.as_ptr().add(source.len()) };
let out = Self {
source_bytes: source.as_bytes(),
final_ptr,
next_char_ptr: UnsafeCell::new(source.as_ptr()),
cur_char_ptr: UnsafeCell::new(source.as_ptr()),
cached_char: UnsafeCell::new(' '), char_index: UnsafeCell::new(0),
};
out.refresh_cache();
out
}
#[inline]
pub fn refresh_cache(&self) {
unsafe {
let cur_char_ptr = *self.cur_char_ptr.get();
let next_char_ptr = self.next_char_ptr.get();
if cur_char_ptr >= self.final_ptr {
*next_char_ptr = cur_char_ptr;
return;
}
let (char, ptr) = self.read_next_char();
*next_char_ptr = ptr;
*self.cached_char.get() = char;
}
}
#[inline(always)]
unsafe fn take_byte(ptr: *mut *const u8) -> u8 {
let result = **ptr;
*ptr = (*ptr).byte_add(1);
result
}
unsafe fn read_next_char(&self) -> (char, *const u8) {
let mut ptr = *self.cur_char_ptr.get();
let initial_byte = Self::take_byte(&mut ptr);
if initial_byte <= 0x7F {
return (initial_byte as char, ptr);
}
let mut result = append_continuation_byte(
(initial_byte & 0b00011111).into(),
Self::take_byte(&mut ptr),
);
if initial_byte >= 0xE0 {
result = append_continuation_byte(result, Self::take_byte(&mut ptr));
if initial_byte >= 0xF0 {
result = append_continuation_byte(result, Self::take_byte(&mut ptr));
result &= 0b11111111_10111111_11111111_11111111;
}
}
(char::from_u32_unchecked(result), ptr)
}
unsafe fn skip_next_char(&self) -> *const u8 {
let ptr = *self.cur_char_ptr.get();
let initial_byte = *ptr;
let len = CHAR_LENGTHS.get_unchecked(initial_byte as usize);
ptr.byte_add(*len as usize)
}
#[inline]
pub fn has_next(&self) -> bool {
unsafe { *self.cur_char_ptr.get() < self.final_ptr }
}
#[inline]
#[expect(clippy::if_then_some_else_none)]
pub fn next(&self) -> Option<char> {
unsafe {
if self.has_next() {
let cached_char = self.cached_char.get();
let next_char = self.next_char_ptr.get();
let result = *cached_char;
*self.cur_char_ptr.get() = *next_char;
if self.has_next() {
let (char, ptr) = self.read_next_char();
*next_char = ptr;
*cached_char = char;
}
*self.char_index.get() = (*self.char_index.get()).unchecked_add(1);
Some(result)
} else {
None
}
}
}
#[inline]
pub fn skip(&self) {
unsafe {
if self.has_next() {
let next_char = self.next_char_ptr.get();
*self.cur_char_ptr.get() = *next_char;
if self.has_next() {
let (char, ptr) = self.read_next_char();
*next_char = ptr;
*self.cached_char.get() = char;
}
*self.char_index.get() = (*self.char_index.get()).unchecked_add(1);
}
}
}
#[inline]
pub unsafe fn skip_without_cache(&self) {
unsafe {
if self.has_next() {
let next_char = self.next_char_ptr.get();
*self.cur_char_ptr.get() = *next_char;
if self.has_next() {
*next_char = self.skip_next_char();
}
*self.char_index.get() = (*self.char_index.get()).unchecked_add(1);
}
}
}
#[inline]
pub fn peek(&self) -> Option<char> {
unsafe { self.has_next().then(|| *self.cached_char.get()) }
}
#[inline]
pub fn char_index(&self) -> usize {
unsafe { *self.char_index.get() }
}
#[inline]
pub fn consume_range<F: FnOnce()>(&self, predicate: F) -> &'src str {
unsafe {
let initial_pointer = *self.cur_char_ptr.get();
predicate();
let final_pointer = *self.cur_char_ptr.get();
let arr_ptr = self.source_bytes.as_ptr();
let initial_index = initial_pointer.byte_offset_from(arr_ptr) as usize;
let final_index = final_pointer.byte_offset_from(arr_ptr) as usize;
str::from_utf8_unchecked(self.source_bytes.get_unchecked(initial_index..final_index))
}
}
#[must_use]
#[inline]
pub fn span(&self) -> Span {
unsafe {
if self.has_next() {
Span::new(self.char_index(), 1)
} else {
Span::new(self.char_index(), 0)
}
}
}
}