#![cfg_attr(not(feature = "std"), no_std)]
#![forbid(unsafe_code)]
#![warn(rust_2018_idioms)]
#![warn(clippy::missing_docs_in_private_items)]
#![warn(clippy::unwrap_used)]
extern crate alloc;
mod iter;
mod range;
mod utf8;
use core::cmp::Ordering;
use core::fmt;
use core::hash;
use core::mem;
use core::num::NonZeroUsize;
use core::ops::{Bound, RangeBounds};
use core::str;
use alloc::collections::VecDeque;
pub use crate::iter::{Chars, Fragment, Fragments, IntoChars};
pub use crate::range::{BytesRange, CharsRange, CharsRangePoppedLine};
use crate::utf8::REPLACEMENT_CHAR_STR;
#[derive(Default, Debug, Clone)]
pub struct StrQueue {
inner: VecDeque<u8>,
len_incomplete: u8,
}
impl StrQueue {
#[inline]
#[must_use]
pub fn new() -> Self {
Self::default()
}
#[inline]
#[must_use]
pub fn with_capacity(capacity: usize) -> Self {
Self {
inner: VecDeque::with_capacity(capacity),
len_incomplete: 0,
}
}
}
impl StrQueue {
#[inline]
#[must_use]
pub fn capacity(&self) -> usize {
self.inner.capacity()
}
#[inline]
pub fn reserve(&mut self, additional: usize) {
self.inner.reserve(additional);
}
#[inline]
pub fn reserve_exact(&mut self, additional: usize) {
self.inner.reserve_exact(additional);
}
#[inline]
pub fn shrink_to_fit(&mut self) {
self.inner.shrink_to_fit();
}
}
impl StrQueue {
#[inline]
#[must_use]
pub fn bytes_range<R>(&self, range: R) -> BytesRange<'_>
where
R: RangeBounds<usize>,
{
BytesRange::new(self, range)
}
#[inline]
#[must_use]
pub fn chars_range<R>(&self, range: R) -> CharsRange<'_>
where
R: RangeBounds<usize>,
{
CharsRange::new(self, range)
}
}
impl StrQueue {
#[inline]
#[must_use]
pub fn len(&self) -> usize {
self.inner.len()
}
#[inline]
#[must_use]
pub fn is_empty(&self) -> bool {
self.inner.is_empty()
}
#[inline]
#[must_use]
pub fn len_complete(&self) -> usize {
self.inner.len() - self.len_incomplete()
}
#[inline]
#[must_use]
pub fn len_incomplete(&self) -> usize {
usize::from(self.len_incomplete)
}
#[inline]
#[must_use]
pub fn is_empty_complete(&self) -> bool {
self.inner.len() == self.len_incomplete as usize
}
pub fn is_complete(&self) -> bool {
self.len_incomplete == 0
}
}
impl StrQueue {
#[inline]
pub fn clear(&mut self) {
self.inner.clear();
}
pub fn trim_last_incomplete_char(&mut self) -> usize {
self.inner.truncate(self.len_complete());
usize::from(mem::replace(&mut self.len_incomplete, 0))
}
pub fn push_str(&mut self, s: &str) {
if self.len_incomplete != 0 {
self.inner.truncate(self.len_complete());
self.inner.extend(REPLACEMENT_CHAR_STR.as_bytes());
self.len_incomplete = 0;
}
self.inner.extend(s.as_bytes());
}
pub fn push_char(&mut self, c: char) {
let mut buf = [0_u8; 4];
self.push_str(c.encode_utf8(&mut buf));
}
pub fn push_bytes(&mut self, mut bytes: &[u8]) {
loop {
let (rest, has_progress) = self.process_incomplete_bytes(bytes);
if !has_progress {
break;
}
bytes = rest;
}
if self.len_incomplete != 0 {
assert!(
bytes.is_empty(),
"[consistency] it is not expected to give up processing \
incomplete bytes before consuming all the inputs"
);
return;
}
while !bytes.is_empty() {
let e = match str::from_utf8(bytes) {
Ok(s) => {
self.inner.extend(s.as_bytes());
return;
}
Err(e) => e,
};
let valid_up_to = e.valid_up_to();
if let Some(error_len) = e.error_len() {
self.inner.extend(&bytes[..valid_up_to]);
self.inner.extend(REPLACEMENT_CHAR_STR.as_bytes());
bytes = &bytes[(valid_up_to + error_len)..];
continue;
} else {
self.inner.extend(bytes);
self.len_incomplete = (bytes.len() - valid_up_to) as u8;
return;
}
}
}
fn process_incomplete_bytes<'a>(&mut self, bytes: &'a [u8]) -> (&'a [u8], bool) {
let len_incomplete = usize::from(self.len_incomplete);
if len_incomplete == 0 {
return (bytes, false);
}
let mut buf = [0_u8; 4];
self.inner
.range(self.len_complete()..)
.zip(&mut buf[..])
.for_each(|(src, dest)| *dest = *src);
let len_expected = usize::from(utf8::expected_char_len(buf[0]));
let len_from_input = len_expected.saturating_sub(len_incomplete);
let e = if len_from_input != 0 {
buf[len_incomplete..len_expected].copy_from_slice(&bytes[..len_from_input]);
match str::from_utf8(&buf[..len_expected]) {
Ok(_) => {
self.inner.extend(&buf[len_incomplete..len_expected]);
self.len_incomplete = 0;
return (&bytes[len_from_input..], true);
}
Err(e) => e,
}
} else {
match str::from_utf8(&buf[..len_expected]) {
Ok(_) => {
self.len_incomplete = (len_incomplete - len_expected) as u8;
return (bytes, true);
}
Err(e) => e,
}
};
assert_eq!(
e.valid_up_to(),
0,
"[consistency] the buffer must have no valid leading characters here"
);
let error_len = match e.error_len() {
Some(e) => e,
None => {
assert_eq!(
len_from_input,
bytes.len(),
"[consistency] if more inputs are available, bytes must be \
able to be completed or discarded"
);
return (&bytes[len_from_input..], len_from_input != 0);
}
};
self.inner.truncate(self.len_complete());
self.inner.extend(REPLACEMENT_CHAR_STR.as_bytes());
self.inner.extend(&buf[error_len..len_expected]);
self.len_incomplete = (len_expected - error_len) as u8;
(&bytes[len_from_input..], true)
}
pub fn pop_char(&mut self) -> Option<char> {
let (c, len) = self.first_char_and_len()?;
self.inner.drain(..usize::from(len));
Some(c)
}
pub fn pop_char_replaced(&mut self) -> Option<char> {
self.pop_char().or_else(|| {
if self.is_empty() {
return None;
}
self.inner.clear();
Some('\u{FFFD}')
})
}
#[inline]
pub fn pop_line(&mut self) -> Option<PoppedLine<'_>> {
let line_len = self.first_line()?.len();
let line_len = NonZeroUsize::new(line_len)
.expect("[validity] a complete line must not be empty since it has a line break");
Some(PoppedLine {
queue: self,
line_len,
})
}
pub fn pop_fragment(&mut self) -> Option<PoppedFragment<'_>> {
if self.inner.is_empty() {
return None;
}
let (former, latter) = self.inner.as_slices();
if !former.is_empty() {
let last_len = utf8::last_char_len_in_last_4bytes(former)
.expect("[validity] the queue content must start with valid UTF-8 string");
if last_len.is_complete() {
return Some(PoppedFragment {
queue: self,
after_state: StateAfterPoppingFragment::Char { latter_consumed: 0 },
});
}
if usize::from(last_len.available) != former.len() {
return Some(PoppedFragment {
queue: self,
after_state: StateAfterPoppingFragment::Former {
rest: last_len.available,
},
});
}
let len_missing = last_len.len_missing();
if last_len.len_missing() <= latter.len() {
return Some(PoppedFragment {
queue: self,
after_state: StateAfterPoppingFragment::Char {
latter_consumed: len_missing as u8,
},
});
}
return Some(PoppedFragment {
queue: self,
after_state: StateAfterPoppingFragment::AllConsumed,
});
}
let last_len = utf8::last_char_len_in_last_4bytes(latter)
.expect("[validity] the queue content must start with valid UTF-8 string");
if last_len.is_complete() {
return Some(PoppedFragment {
queue: self,
after_state: StateAfterPoppingFragment::Latter { rest: 0 },
});
}
Some(PoppedFragment {
queue: self,
after_state: StateAfterPoppingFragment::AllConsumed,
})
}
}
impl StrQueue {
#[must_use]
fn first_char_and_len(&self) -> Option<(char, u8)> {
let bytes = self.inner.iter().take(4).copied();
utf8::take_char(bytes)
}
#[inline]
#[must_use]
pub fn first_char(&self) -> Option<char> {
self.first_char_and_len().map(|(c, _len)| c)
}
#[inline]
#[must_use]
pub fn first_line(&self) -> Option<CharsRange<'_>> {
self.chars_range(..).first_line()
}
}
impl StrQueue {
#[inline]
#[must_use]
pub fn into_chars(self, partial_handling: PartialHandling) -> IntoChars {
IntoChars::new(self, partial_handling)
}
#[inline]
#[must_use]
pub fn chars(&self, partial_handling: PartialHandling) -> Chars<'_> {
Chars::new(self, .., partial_handling)
}
#[must_use]
pub fn range_chars<R>(&self, range: R, partial_handling: PartialHandling) -> Chars<'_>
where
R: RangeBounds<usize>,
{
Chars::new(self, range, partial_handling)
}
#[inline]
#[must_use]
pub fn fragments(&self, partial_handling: PartialHandling) -> Fragments<'_> {
self.range_fragments(.., partial_handling)
}
#[inline]
#[must_use]
pub fn range_fragments<R>(&self, range: R, partial_handling: PartialHandling) -> Fragments<'_>
where
R: RangeBounds<usize>,
{
Fragments::new(self, range, partial_handling)
}
#[inline]
#[must_use]
pub fn display(&self, partial_handling: PartialHandling) -> Display<'_> {
Display::new(self, partial_handling)
}
}
impl From<&str> for StrQueue {
#[inline]
fn from(s: &str) -> Self {
let mut this = Self::with_capacity(s.len());
this.push_str(s);
this
}
}
impl From<&[u8]> for StrQueue {
#[inline]
fn from(s: &[u8]) -> Self {
let mut this = Self::with_capacity(s.len());
this.push_bytes(s);
this
}
}
impl<const N: usize> From<&[u8; N]> for StrQueue {
#[inline]
fn from(s: &[u8; N]) -> Self {
Self::from(&s[..])
}
}
impl hash::Hash for StrQueue {
fn hash<H: hash::Hasher>(&self, state: &mut H) {
self.inner.hash(state);
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum PartialHandling {
Ignore,
Emit,
}
impl PartialHandling {
#[inline]
#[must_use]
fn is_emit(self) -> bool {
self == Self::Emit
}
#[inline]
#[must_use]
fn is_ignore(self) -> bool {
self == Self::Ignore
}
}
#[derive(Debug)]
pub struct Display<'a> {
queue: &'a StrQueue,
partial_handling: PartialHandling,
}
impl<'a> Display<'a> {
#[inline]
#[must_use]
fn new(queue: &'a StrQueue, partial_handling: PartialHandling) -> Self {
Self {
queue,
partial_handling,
}
}
}
impl fmt::Display for Display<'_> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.queue
.fragments(self.partial_handling)
.try_for_each(|frag| frag.fmt(f))
}
}
#[derive(Debug)]
pub struct PoppedLine<'a> {
queue: &'a mut StrQueue,
line_len: NonZeroUsize,
}
impl<'a> PoppedLine<'a> {
#[inline]
#[must_use]
pub fn to_chars_range(&self) -> CharsRange<'_> {
self.queue.chars_range(..self.line_len.get())
}
#[inline]
#[must_use]
pub fn to_bytes_range(&self) -> BytesRange<'_> {
self.queue.bytes_range(..self.line_len.get())
}
#[inline]
#[must_use]
pub fn len(&self) -> usize {
self.line_len.get()
}
#[inline]
#[must_use]
pub const fn is_empty(&self) -> bool {
false
}
}
impl fmt::Display for PoppedLine<'_> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.to_chars_range().fmt(f)
}
}
impl core::ops::Drop for PoppedLine<'_> {
#[inline]
fn drop(&mut self) {
self.queue.inner.drain(..self.line_len.get());
}
}
impl PartialEq for PoppedLine<'_> {
#[inline]
fn eq(&self, other: &PoppedLine<'_>) -> bool {
self.to_bytes_range().eq(&other.to_bytes_range())
}
}
impl Eq for PoppedLine<'_> {}
impl PartialOrd for PoppedLine<'_> {
#[inline]
fn partial_cmp(&self, rhs: &PoppedLine<'_>) -> Option<Ordering> {
self.to_bytes_range().partial_cmp(&rhs.to_bytes_range())
}
}
impl Ord for PoppedLine<'_> {
#[inline]
fn cmp(&self, rhs: &PoppedLine<'_>) -> Ordering {
self.to_bytes_range().cmp(&rhs.to_bytes_range())
}
}
impl PartialEq<str> for PoppedLine<'_> {
#[inline]
fn eq(&self, other: &str) -> bool {
self.to_bytes_range().eq(other.as_bytes())
}
}
impl PartialOrd<str> for PoppedLine<'_> {
#[inline]
fn partial_cmp(&self, rhs: &str) -> Option<Ordering> {
self.to_bytes_range().partial_cmp(rhs.as_bytes())
}
}
impl PartialEq<PoppedLine<'_>> for str {
#[inline]
fn eq(&self, other: &PoppedLine<'_>) -> bool {
other.to_bytes_range().eq(self.as_bytes())
}
}
impl PartialOrd<PoppedLine<'_>> for str {
#[inline]
fn partial_cmp(&self, rhs: &PoppedLine<'_>) -> Option<Ordering> {
rhs.to_bytes_range()
.partial_cmp(self.as_bytes())
.map(Ordering::reverse)
}
}
impl PartialEq<&str> for PoppedLine<'_> {
#[inline]
fn eq(&self, other: &&str) -> bool {
self.eq(*other)
}
}
impl PartialOrd<&str> for PoppedLine<'_> {
#[inline]
fn partial_cmp(&self, rhs: &&str) -> Option<Ordering> {
self.partial_cmp(*rhs)
}
}
impl PartialEq<PoppedLine<'_>> for &str {
#[inline]
fn eq(&self, other: &PoppedLine<'_>) -> bool {
other.eq(*self)
}
}
impl PartialOrd<PoppedLine<'_>> for &str {
#[inline]
fn partial_cmp(&self, rhs: &PoppedLine<'_>) -> Option<Ordering> {
rhs.partial_cmp(*self).map(Ordering::reverse)
}
}
#[derive(Debug, Clone, Copy)]
enum StateAfterPoppingFragment {
Former {
rest: u8,
},
Char {
latter_consumed: u8,
},
Latter {
rest: u8,
},
AllConsumed,
}
#[derive(Debug)]
pub struct PoppedFragment<'a> {
queue: &'a mut StrQueue,
after_state: StateAfterPoppingFragment,
}
impl<'a> PoppedFragment<'a> {
#[inline]
#[must_use]
pub fn to_fragment(&self) -> Fragment<'_> {
use StateAfterPoppingFragment::*;
match self.after_state {
Former { rest } => {
let (former, _) = self.queue.inner.as_slices();
let len = former.len() - usize::from(rest);
let s = str::from_utf8(&former[..len])
.expect("[consistency] the range must be valid UTF-8 string");
Fragment::Str(s)
}
Char { latter_consumed } => {
let (former, latter) = self.queue.inner.as_slices();
let (c, len) = utf8::take_char(
former
.iter()
.chain(&latter[..usize::from(latter_consumed)])
.copied(),
)
.expect(
"[consistency] it should be already validated \
that a complete character is available",
);
debug_assert_eq!(
usize::from(len),
former.len() + usize::from(latter_consumed),
"[consistency] a character lay on the specified area in the buffers"
);
Fragment::Char(c)
}
Latter { rest } => {
let (_, latter) = self.queue.inner.as_slices();
let len = latter.len() - usize::from(rest);
let s = str::from_utf8(&latter[..len])
.expect("[consistency] the range must be valid UTF-8 string");
Fragment::Str(s)
}
AllConsumed => Fragment::Incomplete,
}
}
#[inline]
#[must_use]
pub fn to_chars_range(&self) -> CharsRange<'_> {
use StateAfterPoppingFragment::*;
match self.after_state {
Former { rest } => {
let (former, _) = self.queue.inner.as_slices();
let len = former.len() - usize::from(rest);
self.queue.chars_range(..len)
}
Char { latter_consumed } => {
let (former, _) = self.queue.inner.as_slices();
debug_assert!(
former.len() < 4,
"[consistency] the former buffer must not have a complete character"
);
let end = former.len() + usize::from(latter_consumed);
self.queue.chars_range(..end)
}
Latter { rest } => {
let (former, latter) = self.queue.inner.as_slices();
debug_assert!(
former.is_empty(),
"[consistency] the former buffer must have been completely consumed"
);
let len = latter.len() - usize::from(rest);
self.queue.chars_range(..len)
}
AllConsumed => self.queue.chars_range(..),
}
}
#[inline]
#[must_use]
pub fn to_bytes_range(&self) -> BytesRange<'_> {
self.to_chars_range().into()
}
#[inline]
#[must_use]
pub fn len(&self) -> usize {
use StateAfterPoppingFragment::*;
match self.after_state {
Former { rest } => {
let (former, _) = self.queue.inner.as_slices();
former.len() - usize::from(rest)
}
Char { latter_consumed } => {
let (former, _) = self.queue.inner.as_slices();
debug_assert!(
former.len() < 4,
"[consistency] the former buffer must not have a complete character"
);
former.len() + usize::from(latter_consumed)
}
Latter { rest } => {
let (former, latter) = self.queue.inner.as_slices();
debug_assert!(
former.is_empty(),
"[consistency] the former buffer must have been completely consumed"
);
latter.len() - usize::from(rest)
}
AllConsumed => self.queue.len(),
}
}
#[inline]
#[must_use]
pub const fn is_empty(&self) -> bool {
false
}
}
impl fmt::Display for PoppedFragment<'_> {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.to_fragment().fmt(f)
}
}
impl core::ops::Drop for PoppedFragment<'_> {
#[inline]
fn drop(&mut self) {
self.queue.inner.drain(..self.len());
}
}
impl PartialEq for PoppedFragment<'_> {
#[inline]
fn eq(&self, other: &PoppedFragment<'_>) -> bool {
self.to_bytes_range().eq(&other.to_bytes_range())
}
}
impl Eq for PoppedFragment<'_> {}
impl PartialOrd for PoppedFragment<'_> {
#[inline]
fn partial_cmp(&self, rhs: &PoppedFragment<'_>) -> Option<Ordering> {
self.to_bytes_range().partial_cmp(&rhs.to_bytes_range())
}
}
impl Ord for PoppedFragment<'_> {
#[inline]
fn cmp(&self, rhs: &PoppedFragment<'_>) -> Ordering {
self.to_bytes_range().cmp(&rhs.to_bytes_range())
}
}
impl PartialEq<str> for PoppedFragment<'_> {
#[inline]
fn eq(&self, other: &str) -> bool {
self.to_bytes_range().eq(other.as_bytes())
}
}
impl PartialOrd<str> for PoppedFragment<'_> {
#[inline]
fn partial_cmp(&self, rhs: &str) -> Option<Ordering> {
self.to_bytes_range().partial_cmp(rhs.as_bytes())
}
}
impl PartialEq<PoppedFragment<'_>> for str {
#[inline]
fn eq(&self, other: &PoppedFragment<'_>) -> bool {
other.to_bytes_range().eq(self.as_bytes())
}
}
impl PartialOrd<PoppedFragment<'_>> for str {
#[inline]
fn partial_cmp(&self, rhs: &PoppedFragment<'_>) -> Option<Ordering> {
rhs.to_bytes_range()
.partial_cmp(self.as_bytes())
.map(Ordering::reverse)
}
}
impl PartialEq<&str> for PoppedFragment<'_> {
#[inline]
fn eq(&self, other: &&str) -> bool {
self.eq(*other)
}
}
impl PartialOrd<&str> for PoppedFragment<'_> {
#[inline]
fn partial_cmp(&self, rhs: &&str) -> Option<Ordering> {
self.partial_cmp(*rhs)
}
}
impl PartialEq<PoppedFragment<'_>> for &str {
#[inline]
fn eq(&self, other: &PoppedFragment<'_>) -> bool {
other.eq(*self)
}
}
impl PartialOrd<PoppedFragment<'_>> for &str {
#[inline]
fn partial_cmp(&self, rhs: &PoppedFragment<'_>) -> Option<Ordering> {
rhs.partial_cmp(*self).map(Ordering::reverse)
}
}
trait BoundExt<T> {
fn cloned(self) -> Bound<T>;
}
impl<T: Clone> BoundExt<T> for Bound<&'_ T> {
fn cloned(self) -> Bound<T> {
match self {
Bound::Included(v) => Bound::Included(v.clone()),
Bound::Excluded(v) => Bound::Excluded(v.clone()),
Bound::Unbounded => Bound::Unbounded,
}
}
}