use super::BytesNewtype;
use crate::error::InvalidString;
#[derive(Clone, Copy, Debug)]
pub struct Splitter<T> {
string: T,
range: Range,
}
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Default)]
enum Encoding {
#[default]
Unknown,
Utf8,
Ascii,
}
#[derive(Clone, Copy, Debug)]
struct Range {
pub start: usize,
pub end: usize,
pub encoding: Encoding,
}
#[derive(Debug)]
pub struct SavedIndices<'a, T> {
splitter: &'a mut Splitter<T>,
saved_range: Range,
preserve: (bool, bool),
}
impl<'a, T> std::ops::Deref for SavedIndices<'a, T> {
type Target = Splitter<T>;
fn deref(&self) -> &Self::Target {
self.splitter
}
}
impl<'a, T> std::ops::DerefMut for SavedIndices<'a, T> {
fn deref_mut(&mut self) -> &mut Self::Target {
self.splitter
}
}
impl<'a, T> Drop for SavedIndices<'a, T> {
fn drop(&mut self) {
let (start, end) = self.preserve;
self.splitter.range.restore(self.saved_range, start, end);
}
}
impl Range {
pub fn len(&self) -> usize {
self.end - self.start
}
pub fn consume(&mut self, mut count: usize, end: bool) -> usize {
count = std::cmp::min(count, self.len());
if end {
self.end = self.start + count;
} else {
self.start += count;
}
if count > 0 && self.encoding == Encoding::Utf8 {
self.encoding = Encoding::Unknown;
}
count
}
pub fn restore(&mut self, saved: Range, start: bool, end: bool) {
if start {
self.start = saved.start;
}
if end {
self.end = saved.end;
}
if start && end {
self.encoding = saved.encoding;
}
}
pub fn constrain<'a, T>(&self, slice: &'a [T]) -> &'a [T] {
&slice[self.start..self.end]
}
}
impl<T> Splitter<T> {
pub fn is_utf8_lazy(&self) -> bool {
self.range.encoding >= Encoding::Utf8
}
pub fn save(&mut self, start: bool, end: bool) -> SavedIndices<'_, T> {
let saved_range = self.range;
SavedIndices { splitter: self, saved_range, preserve: (start, end) }
}
pub fn save_end(&mut self) -> SavedIndices<'_, T> {
self.save(false, true)
}
pub fn is_empty(&self) -> bool {
self.range.len() == 0
}
pub fn len(&self) -> usize {
self.range.len()
}
}
impl<T: AsRef<[u8]>> Splitter<T> {
fn check_encoding_inner(&mut self, accept: Encoding) -> Result<(), std::str::Utf8Error> {
if self.range.encoding < accept {
let slice = self.range.constrain(self.as_ref());
let idx = slice.iter().position(|c| !c.is_ascii());
self.range.encoding = if let Some(idx) = idx {
let slice = &slice[idx..];
std::str::from_utf8(slice)?;
Encoding::Utf8
} else {
Encoding::Ascii
}
}
Ok(())
}
pub fn check_encoding(&mut self) -> Result<(), std::str::Utf8Error> {
self.check_encoding_inner(Encoding::Ascii)
}
pub fn as_slice(&self) -> &[u8] {
self.range.constrain(self.string.as_ref())
}
pub fn peek_byte(&self) -> Option<u8> {
self.as_slice().first().copied()
}
pub fn next_byte(&mut self) -> Option<u8> {
let retval = self.peek_byte();
if let Some(byte) = retval {
self.range.start += 1;
if !byte.is_ascii() {
self.range.encoding = Encoding::Unknown;
}
}
retval
}
pub fn rpeek_byte(&self) -> Option<u8> {
self.as_slice().last().copied()
}
pub fn rnext_byte(&mut self) -> Option<u8> {
let retval = self.rpeek_byte();
if let Some(byte) = retval {
self.range.end -= 1;
if !byte.is_ascii() {
self.range.encoding = Encoding::Unknown;
}
}
retval
}
pub fn consume_invalid<'a, U: BytesNewtype<'a>>(&mut self) {
let slice = self.range.constrain(self.as_ref());
if let Some(idx) = slice.iter().position(|c| !U::is_invalid(c)) {
self.range.start += idx;
} else {
self.range.start = self.range.end;
}
}
pub fn consume_whitespace(&mut self) {
let slice = self.range.constrain(self.as_ref());
if let Some(idx) = slice.iter().position(|c| !c.is_ascii_whitespace()) {
self.range.start += idx;
} else {
self.range.start = self.range.end;
}
}
pub fn until_byte<F: FnMut(&u8) -> bool>(&mut self, f: F) -> &mut Self {
if let Some(idx) = self.as_slice().iter().position(f) {
self.range.consume(idx, true);
}
self
}
pub fn until_byte_eq(&mut self, byte: u8) -> &mut Self {
if let Some(idx) = self.as_slice().iter().position(|b| *b == byte) {
self.range.end = self.range.start + idx;
if self.range.encoding == Encoding::Utf8 && !byte.is_ascii() {
self.range.encoding = Encoding::Unknown;
}
}
self
}
pub fn until_count(&mut self, len: usize) -> &mut Self {
self.range.consume(len, true);
self
}
}
impl<'a, T: BytesNewtype<'a>> Splitter<T> {
pub fn new(string: T) -> Splitter<T> {
let utf8 = string.is_utf8_lazy();
let end = string.as_ref().len();
let range = Range {
start: 0,
end,
encoding: if utf8 { Encoding::Utf8 } else { Encoding::Unknown },
};
Self { string, range }
}
pub fn is_secret(&self) -> bool {
self.string.is_secret()
}
unsafe fn as_slice_unsafe(&self) -> &'a [u8] {
self.range.constrain(self.string.as_bytes_unsafe())
}
pub fn peek_rest<U: BytesNewtype<'a> + From<T>>(&self) -> Result<U, InvalidString> {
if let Some(e) = U::check_others(self.as_slice()) {
return Err(e);
}
unsafe {
let bytes =
self.string.using_value(self.as_slice_unsafe(), self.is_utf8_lazy()).into_bytes();
Ok(U::from_unchecked(bytes))
}
}
pub fn peek_string<U: BytesNewtype<'a>>(&self, require_rest: bool) -> Result<U, InvalidString> {
unsafe {
let mut slice = self.as_slice_unsafe();
slice = if let Some(idx) = slice.iter().position(U::is_invalid) {
if !require_rest {
&slice[..idx]
} else {
return Err(InvalidString::Byte(slice[idx]));
}
} else {
slice
};
if let Some(e) = U::check_others(slice) {
return Err(e);
}
let bytes = self.string.using_value(slice, self.is_utf8_lazy()).into_bytes();
Ok(U::from_unchecked(bytes))
}
}
pub fn rest<U: BytesNewtype<'a> + From<T>>(&mut self) -> Result<U, InvalidString> {
let rest = self.peek_rest::<U>()?;
self.range.start += rest.as_ref().len();
Ok(rest)
}
pub fn rest_or_default<U: BytesNewtype<'a> + From<T> + Default>(&mut self) -> U {
let rest = self.peek_rest::<U>().unwrap_or_default();
self.range.start += rest.as_ref().len();
rest
}
pub fn string<U: BytesNewtype<'a>>(&mut self, require_rest: bool) -> Result<U, InvalidString> {
let next = self.peek_string::<U>(require_rest)?;
self.range.start += next.as_ref().len();
Ok(next)
}
pub fn string_or_default<U: BytesNewtype<'a> + Default>(&mut self, require_rest: bool) -> U {
let next = self.peek_string::<U>(require_rest).unwrap_or_default();
self.range.start += next.as_ref().len();
next
}
}
impl<T: AsRef<[u8]>> AsRef<[u8]> for Splitter<T> {
fn as_ref(&self) -> &[u8] {
self.string.as_ref()
}
}
impl<'a, T: AsRef<[u8]>> AsRef<[u8]> for SavedIndices<'a, T> {
fn as_ref(&self) -> &[u8] {
self.splitter.as_ref()
}
}
impl<T: AsRef<[u8]>> Iterator for Splitter<T> {
type Item = u8;
fn next(&mut self) -> Option<Self::Item> {
self.next_byte()
}
}
impl<'a, T: AsRef<[u8]>> Iterator for SavedIndices<'a, T> {
type Item = u8;
fn next(&mut self) -> Option<Self::Item> {
self.next_byte()
}
}