use std::{cmp::Ordering, marker::PhantomData, slice, str};
use oxc_data_structures::assert_unchecked;
use crate::{MAX_LEN, UniquePromise};
use super::search::SEARCH_BATCH_SIZE;
pub(super) struct Source<'a> {
start: *const u8,
end: *const u8,
ptr: *const u8,
end_for_batch_search_addr: usize,
_marker: PhantomData<&'a str>,
}
impl<'a> Source<'a> {
#[expect(unused_variables, clippy::needless_pass_by_value)]
pub(super) fn new(mut source_text: &'a str, unique: UniquePromise) -> Self {
if source_text.len() > MAX_LEN {
source_text = "\0";
}
let start = source_text.as_ptr();
let end = unsafe { start.add(source_text.len()) };
let end_for_batch_search_addr = (end as usize).saturating_sub(SEARCH_BATCH_SIZE);
Self { start, end, ptr: start, end_for_batch_search_addr, _marker: PhantomData }
}
#[inline]
pub(super) fn whole(&self) -> &'a str {
unsafe { self.str_between_positions_unchecked(self.start(), self.end()) }
}
#[inline]
pub(super) fn remaining(&self) -> &'a str {
unsafe { self.str_between_positions_unchecked(self.position(), self.end()) }
}
#[inline]
fn remaining_bytes(&self) -> usize {
unsafe { self.end().offset_from(self.position()) }
}
#[inline]
fn start(&self) -> SourcePosition<'a> {
unsafe { SourcePosition::new(self.start) }
}
#[inline]
pub(super) fn end(&self) -> SourcePosition<'a> {
unsafe { SourcePosition::new(self.end) }
}
#[inline]
pub(super) fn is_eof(&self) -> bool {
self.ptr == self.end
}
#[inline]
pub(super) fn position(&self) -> SourcePosition<'a> {
unsafe { SourcePosition::new(self.ptr) }
}
#[inline]
pub(super) fn set_position(&mut self, pos: SourcePosition<'a>) {
debug_assert!(
pos.ptr >= self.start
&& pos.ptr <= self.end
&& (pos.ptr == self.end || !is_utf8_cont_byte(unsafe { pos.read() }))
);
self.ptr = pos.ptr;
}
#[inline]
pub(super) fn advance_to_end(&mut self) {
self.ptr = self.end;
}
#[inline]
pub(super) unsafe fn advance_if_ascii_eq(&mut self, ascii_byte: u8) -> bool {
debug_assert!(ascii_byte.is_ascii());
let matched = self.peek_byte() == Some(ascii_byte);
if matched {
self.ptr = unsafe { self.ptr.add(1) };
}
matched
}
#[inline]
pub(super) fn str_from_pos_to_current(&self, pos: SourcePosition<'a>) -> &'a str {
assert!(pos <= self.position());
unsafe { self.str_from_pos_to_current_unchecked(pos) }
}
#[inline]
pub(super) unsafe fn str_from_pos_to_current_unchecked(
&self,
pos: SourcePosition<'a>,
) -> &'a str {
unsafe { self.str_between_positions_unchecked(pos, self.position()) }
}
#[inline]
pub(super) unsafe fn str_from_current_to_pos_unchecked(
&self,
pos: SourcePosition<'a>,
) -> &'a str {
unsafe { self.str_between_positions_unchecked(self.position(), pos) }
}
#[inline]
pub(super) fn str_from_pos_to_end(&self, pos: SourcePosition<'a>) -> &'a str {
unsafe { self.str_between_positions_unchecked(pos, self.end()) }
}
#[inline]
pub(super) unsafe fn str_between_positions_unchecked(
&self,
start: SourcePosition<'a>,
end: SourcePosition<'a>,
) -> &'a str {
debug_assert!(start.ptr <= end.ptr);
debug_assert!(start.ptr >= self.start);
debug_assert!(end.ptr <= self.end);
unsafe {
debug_assert!(start.ptr == self.end || !is_utf8_cont_byte(start.read()));
debug_assert!(end.ptr == self.end || !is_utf8_cont_byte(end.read()));
}
unsafe {
let len = end.offset_from(start);
let slice = slice::from_raw_parts(start.ptr, len);
std::str::from_utf8_unchecked(slice)
}
}
#[inline]
pub(super) fn offset(&self) -> u32 {
self.offset_of(self.position())
}
#[inline]
pub(super) fn offset_usize(&self) -> usize {
self.offset_of_usize(self.position())
}
#[inline]
pub(super) fn offset_of(&self, pos: SourcePosition<'a>) -> u32 {
unsafe { pos.offset_from_u32(self.start()) }
}
#[inline]
pub(super) fn offset_of_usize(&self, pos: SourcePosition<'a>) -> usize {
unsafe { pos.offset_from(self.start()) }
}
#[inline]
pub(super) fn back(&mut self, n: usize) {
assert!(n > 0, "Cannot call `Source::back` with 0");
let offset = self.offset_usize();
assert!(n <= offset, "Cannot go back {n} bytes - only {offset} bytes consumed");
let new_pos = unsafe { self.position().sub(n) };
let byte = unsafe { new_pos.read() };
assert!(!is_utf8_cont_byte(byte), "Offset is not on a UTF-8 character boundary");
self.ptr = new_pos.ptr;
}
#[inline]
pub(super) fn next_char(&mut self) -> Option<char> {
let byte = self.peek_byte()?;
if byte.is_ascii() {
unsafe { self.ptr = self.ptr.add(1) };
Some(byte as char)
} else {
debug_assert!(!is_utf8_cont_byte(byte));
unsafe { self.next_unicode_char() }
}
}
#[expect(clippy::unnecessary_wraps)]
#[cold] unsafe fn next_unicode_char(&mut self) -> Option<char> {
let remaining = self.remaining();
unsafe {
assert_unchecked!(!remaining.is_empty());
assert_unchecked!(!remaining.as_bytes()[0].is_ascii());
}
let mut chars = remaining.chars();
let c = chars.next().unwrap();
self.ptr = chars.as_str().as_ptr();
Some(c)
}
#[inline]
pub(super) fn next_2_chars(&mut self) -> Option<[char; 2]> {
let [byte1, byte2] = self.peek_2_bytes()?;
if byte1.is_ascii() && byte2.is_ascii() {
unsafe { self.ptr = self.ptr.add(2) };
Some([byte1 as char, byte2 as char])
} else {
debug_assert!(!is_utf8_cont_byte(byte1));
unsafe { self.next_2_unicode_chars() }
}
}
#[cold] unsafe fn next_2_unicode_chars(&mut self) -> Option<[char; 2]> {
let remaining = self.remaining();
unsafe { assert_unchecked!(!remaining.is_empty()) };
let mut chars = remaining.chars();
let c1 = chars.next().unwrap();
let c2 = chars.next()?;
self.ptr = chars.as_str().as_ptr();
Some([c1, c2])
}
#[expect(dead_code)]
#[inline]
unsafe fn next_byte(&mut self) -> Option<u8> {
#[expect(clippy::if_not_else)] if !self.is_eof() {
Some(unsafe { self.next_byte_unchecked() })
} else {
None
}
}
#[inline]
pub(super) unsafe fn next_byte_unchecked(&mut self) -> u8 {
unsafe {
let byte = self.peek_byte_unchecked();
self.ptr = self.ptr.add(1);
byte
}
}
#[inline]
pub(super) fn peek_char(&self) -> Option<char> {
let byte = self.peek_byte()?;
if byte.is_ascii() {
Some(byte as char)
} else {
debug_assert!(!is_utf8_cont_byte(byte));
unsafe { self.peek_unicode_char() }
}
}
#[expect(clippy::unnecessary_wraps)]
#[cold] unsafe fn peek_unicode_char(&self) -> Option<char> {
let remaining = self.remaining();
unsafe {
assert_unchecked!(!remaining.is_empty());
assert_unchecked!(!remaining.as_bytes()[0].is_ascii());
}
let mut chars = remaining.chars();
let c = chars.next().unwrap();
Some(c)
}
#[inline]
pub(super) fn peek_byte(&self) -> Option<u8> {
#[expect(clippy::if_not_else)] if !self.is_eof() {
Some(unsafe { self.peek_byte_unchecked() })
} else {
None
}
}
#[inline]
pub(super) fn peek_2_bytes(&self) -> Option<[u8; 2]> {
if self.remaining_bytes() >= 2 {
let bytes = unsafe { self.position().read2() };
Some(bytes)
} else {
None
}
}
#[inline]
pub(super) unsafe fn peek_byte_unchecked(&self) -> u8 {
debug_assert!(self.ptr >= self.start && self.ptr < self.end);
unsafe { self.position().read() }
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct SourcePosition<'a> {
ptr: *const u8,
_marker: PhantomData<&'a u8>,
}
impl<'a> SourcePosition<'a> {
#[inline]
pub(super) unsafe fn new(ptr: *const u8) -> Self {
Self { ptr, _marker: PhantomData }
}
#[inline]
pub(super) unsafe fn add(self, n: usize) -> Self {
unsafe { Self::new(self.ptr.add(n)) }
}
#[inline]
pub(super) unsafe fn sub(self, n: usize) -> Self {
unsafe { Self::new(self.ptr.sub(n)) }
}
#[inline]
pub(super) unsafe fn offset_from(self, origin: Self) -> usize {
unsafe { self.ptr.offset_from_unsigned(origin.ptr) }
}
#[inline]
pub(super) unsafe fn offset_from_u32(self, origin: Self) -> u32 {
let offset = unsafe { self.offset_from(origin) };
#[expect(clippy::cast_possible_truncation)]
let offset = offset as u32;
offset
}
#[inline]
fn offset_from_signed(self, other: Self) -> isize {
unsafe { self.ptr.offset_from(other.ptr) }
}
#[inline]
pub(super) fn is_end_of(self, source: &Source<'a>) -> bool {
self.ptr == source.end
}
#[inline]
pub(super) fn is_not_end_of(self, source: &Source<'a>) -> bool {
!self.is_end_of(source)
}
#[inline]
pub(super) fn can_read_batch_from(&self, source: &Source<'a>) -> bool {
self.ptr as usize <= source.end_for_batch_search_addr
}
#[inline]
pub(super) unsafe fn read(self) -> u8 {
debug_assert!(!self.ptr.is_null());
unsafe { *self.ptr.as_ref().unwrap_unchecked() }
}
#[inline]
pub(super) unsafe fn read2(self) -> [u8; 2] {
debug_assert!(!self.ptr.is_null());
unsafe {
let p = self.ptr.cast::<[u8; 2]>();
*p.as_ref().unwrap_unchecked()
}
}
pub(super) unsafe fn slice(self, len: usize) -> &'a [u8] {
unsafe { slice::from_raw_parts(self.ptr, len) }
}
}
impl Ord for SourcePosition<'_> {
#[inline]
fn cmp(&self, other: &Self) -> Ordering {
let offset = self.offset_from_signed(*other);
#[expect(clippy::comparison_chain)]
if offset < 0 {
Ordering::Less
} else if offset == 0 {
Ordering::Equal
} else {
Ordering::Greater
}
}
}
impl PartialOrd for SourcePosition<'_> {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
Some(self.cmp(other))
}
#[inline]
fn lt(&self, other: &Self) -> bool {
self.offset_from_signed(*other) < 0
}
#[inline]
fn le(&self, other: &Self) -> bool {
self.offset_from_signed(*other) <= 0
}
#[inline]
fn gt(&self, other: &Self) -> bool {
self.offset_from_signed(*other) > 0
}
#[inline]
fn ge(&self, other: &Self) -> bool {
self.offset_from_signed(*other) >= 0
}
}
#[inline]
const fn is_utf8_cont_byte(byte: u8) -> bool {
byte >= 0x80 && byte < 0xC0
}