use core::ops::RangeInclusive;
use alloc::vec::Vec;
const CONT_MASK: u8 = 0x3F;
#[inline]
const fn utf8_first_byte(byte: u8, width: u32) -> u32 {
(byte & (0x7F >> width)) as u32
}
#[inline]
const fn utf8_acc_cont_byte(ch: u32, byte: u8) -> u32 {
(ch << 6) | (byte & CONT_MASK) as u32
}
#[inline]
pub unsafe fn next_code_point<'a, I: Iterator<Item = &'a u8>>(
bytes: &mut I,
) -> Option<(u32, usize)> {
let mut i = 1;
let x = *bytes.next()?;
if x < 128 {
return Some((x as u32, 1));
}
let init = utf8_first_byte(x, 2);
let y = unsafe { *bytes.next().unwrap_unchecked() };
i += 1;
let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
let z = unsafe { *bytes.next().unwrap_unchecked() };
i += 1;
let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
ch = init << 12 | y_z;
if x >= 0xF0 {
let w = unsafe { *bytes.next().unwrap_unchecked() };
i += 1;
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
}
}
Some((ch, i))
}
pub struct Character {
pub ch: char,
pub length: usize,
}
#[allow(async_fn_in_trait)]
pub trait Source {
fn current_position(&self) -> usize;
fn set_position(&mut self, pos: usize);
async fn peek(&mut self) -> Option<Character>;
async fn match_str(&mut self, string: &str) -> bool;
async fn match_char(&mut self, ch: char) -> bool {
if let Some(c) = self.peek().await {
if c.ch == ch {
self.set_position(self.current_position() + c.length);
return true;
}
}
return false;
}
async fn match_char_range(&mut self, r: RangeInclusive<char>) -> Option<char> {
if let Some(c) = self.peek().await {
if r.contains(&c.ch) {
self.set_position(self.current_position() + c.length);
return Some(c.ch);
}
}
return None;
}
}
pub trait IntoSource {
type Source: Source;
fn into(self) -> Self::Source;
}
pub struct StrSource<'a> {
s: &'a str,
pos: usize,
}
impl<'a> IntoSource for StrSource<'a> {
type Source = Self;
fn into(self) -> Self::Source {
self
}
}
impl<'a, T: AsRef<str>> IntoSource for &'a T {
type Source = StrSource<'a>;
fn into(self) -> Self::Source {
StrSource::new(self.as_ref())
}
}
impl<'a> IntoSource for &'a str {
type Source = StrSource<'a>;
fn into(self) -> Self::Source {
StrSource::new(self)
}
}
impl<'a> StrSource<'a> {
pub const fn new(s: &'a str) -> Self {
Self { s: s, pos: 0 }
}
}
impl<'a> Source for StrSource<'a> {
#[inline]
fn current_position(&self) -> usize {
self.pos
}
#[inline]
fn set_position(&mut self, pos: usize) {
self.pos = pos;
}
#[inline]
async fn peek(&mut self) -> Option<Character> {
if self.pos == self.s.len() {
return None;
}
unsafe {
let b = &self.s.as_bytes()[self.pos..];
let mut iter = b.iter();
match next_code_point(&mut iter) {
Some(c) => Some(Character {
ch: char::from_u32_unchecked(c.0),
length: c.1,
}),
None => None,
}
}
}
#[inline]
async fn match_str(&mut self, string: &str) -> bool {
if string.is_empty() {
return true;
}
if self.pos == self.s.len() {
return false;
}
if self.s.len() - self.pos < string.len() {
return false;
}
if (&self.s.as_bytes()[self.pos..self.pos + string.len()]) == string.as_bytes() {
self.pos += string.len();
return true;
}
return false;
}
}
#[cfg(feature = "futures")]
pub struct AsyncStrSource<T: futures::AsyncRead + Unpin> {
reader: T,
pub buffer: Vec<u8>,
pos: usize,
is_eof: bool,
}
impl<T: futures::AsyncRead + Unpin> IntoSource for AsyncStrSource<T> {
type Source = Self;
fn into(self) -> Self::Source {
self
}
}
#[cfg(feature = "futures")]
impl<T: futures::AsyncRead + Unpin> AsyncStrSource<T> {
pub fn new(reader: T) -> Self {
Self {
reader,
buffer: Vec::new(),
pos: 0,
is_eof: false,
}
}
#[inline]
async fn next_code_point(&mut self) -> Option<(u32, usize)> {
let mut i = 1;
let x = self.next_u8(0).await?;
if x < 128 {
return Some((x as u32, 1));
}
let init = utf8_first_byte(x, 2);
let y = self.next_u8(1).await?;
i += 1;
let mut ch = utf8_acc_cont_byte(init, y);
if x >= 0xE0 {
let z = self.next_u8(2).await?;
i += 1;
let y_z = utf8_acc_cont_byte((y & CONT_MASK) as u32, z);
ch = init << 12 | y_z;
if x >= 0xF0 {
let w = self.next_u8(3).await?;
i += 1;
ch = (init & 7) << 18 | utf8_acc_cont_byte(y_z, w);
}
}
Some((ch, i))
}
#[inline]
async fn next_u8(&mut self, offset: usize) -> Option<u8> {
let pos = self.pos + offset;
if self.buffer.len() == pos && self.is_eof {
return None;
}
if let Some(b) = self.buffer.get(pos) {
return Some(*b);
} else {
loop {
if let Some(_) = self.read_buf().await {
if let Some(b) = self.buffer.get(pos) {
return Some(*b);
}
} else {
return None;
};
}
}
}
#[allow(invalid_value)]
#[inline]
async fn read_buf(&mut self) -> Option<usize> {
use futures::AsyncReadExt;
if !self.is_eof {
let mut buf: [u8; 128] = unsafe { core::mem::MaybeUninit::uninit().assume_init() };
match self.reader.read(&mut buf).await {
Ok(l) => {
self.buffer.extend_from_slice(&buf[..l]);
return Some(l);
}
Err(_) => {
self.is_eof = true;
}
};
}
return None;
}
}
#[cfg(feature = "futures")]
impl<T: futures::AsyncRead + Unpin> Source for AsyncStrSource<T> {
fn current_position(&self) -> usize {
self.pos
}
fn set_position(&mut self, pos: usize) {
self.pos = pos;
}
async fn peek(&mut self) -> Option<Character> {
if self.buffer.len() == self.pos && self.is_eof {
return None;
}
if let Some((c, l)) = self.next_code_point().await {
return Some(Character {
ch: unsafe { char::from_u32_unchecked(c) },
length: l,
});
} else {
return None;
}
}
async fn match_str(&mut self, string: &str) -> bool {
if string.len() == 0 {
return true;
}
while (self.buffer.len() - self.pos) < string.len() {
if self.is_eof {
return false;
}
self.read_buf().await;
}
if (&self.buffer[self.pos..self.pos + string.len()]) == string.as_bytes() {
self.pos += string.len();
return true;
}
return false;
}
}