use core::mem;
use crate::{Result, decoder::Utf8Decoder};
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Utf8CharIndices<I> {
bytes: I,
state: Utf8Decoder,
}
impl<I> Utf8CharIndices<I>
where
I: Iterator<Item = u8>,
{
pub fn new(bytes: I, offset: usize) -> Self {
Self {
bytes,
state: Utf8Decoder::new(offset),
}
}
pub fn into_chars(self) -> Utf8Chars<I> {
Utf8Chars { inner: self }
}
}
impl<I> Iterator for Utf8CharIndices<I>
where
I: Iterator<Item = u8>,
{
type Item = Result<(usize, char)>;
fn next(&mut self) -> Option<Self::Item> {
loop {
if let Some(b) = self.bytes.next() {
if let x @ Some(_) = self.state.push(b) {
break x;
}
} else {
break mem::take(&mut self.state).finish().err().map(Err);
}
}
}
fn size_hint(&self) -> (usize, Option<usize>) {
let (lower, upper) = self.bytes.size_hint();
(lower / 4, upper) }
}
impl<I> From<I> for Utf8CharIndices<I>
where
I: Iterator<Item = u8>,
{
fn from(value: I) -> Self {
Self::new(value, 0)
}
}
impl<I> From<Utf8CharIndices<I>> for Utf8Chars<I>
where
I: Iterator<Item = u8>,
{
fn from(value: Utf8CharIndices<I>) -> Self {
value.into_chars()
}
}
impl<I> core::iter::FusedIterator for Utf8CharIndices<I> where
I: core::iter::FusedIterator<Item = u8>
{
}
#[derive(Debug, Clone, PartialEq, Eq)]
pub struct Utf8Chars<I> {
inner: Utf8CharIndices<I>,
}
impl<I> Utf8Chars<I>
where
I: Iterator<Item = u8>,
{
pub fn new(bytes: I, offset: usize) -> Self {
Self {
inner: Utf8CharIndices::new(bytes, offset),
}
}
pub fn into_char_indices(self) -> Utf8CharIndices<I> {
self.inner
}
}
impl<I> Iterator for Utf8Chars<I>
where
I: Iterator<Item = u8>,
{
type Item = Result<char>;
fn next(&mut self) -> Option<Self::Item> {
self.inner.next().map(|r| r.map(|(_, c)| c))
}
fn size_hint(&self) -> (usize, Option<usize>) {
self.inner.size_hint()
}
}
impl<I> From<I> for Utf8Chars<I>
where
I: Iterator<Item = u8>,
{
fn from(value: I) -> Self {
Self::new(value, 0)
}
}
impl<I> From<Utf8Chars<I>> for Utf8CharIndices<I>
where
I: Iterator<Item = u8>,
{
fn from(value: Utf8Chars<I>) -> Self {
value.into_char_indices()
}
}
impl<I> core::iter::FusedIterator for Utf8Chars<I> where I: core::iter::FusedIterator<Item = u8> {}
#[cfg(test)]
mod tests {
extern crate alloc;
use super::*;
const CRAB: &[u8] = b"\xF0\x9F\xA6\x80";
#[test]
fn unterminated_sequence_yields_eof_error() {
let result: crate::Result<alloc::string::String> =
Utf8Chars::new([0xC3u8].iter().copied(), 0).collect();
assert_eq!(
result.unwrap_err(),
crate::Error {
range: 0..1,
kind: crate::ErrorKind::UnfinishedSequence,
}
);
}
#[test]
fn indices_into_chars() {
let indices = Utf8CharIndices::new(CRAB.iter().copied(), 0);
let chars: Utf8Chars<_> = indices.into();
let s: crate::Result<alloc::string::String> = chars.collect();
assert_eq!(s.unwrap(), "🦀");
}
#[test]
fn chars_into_indices() {
let chars = Utf8Chars::new(CRAB.iter().copied(), 0);
let indices: Utf8CharIndices<_> = chars.into();
let v: crate::Result<alloc::vec::Vec<(usize, char)>> = indices.collect();
assert_eq!(v.unwrap(), [(0, '🦀')]);
}
#[test]
fn chars_dot_into_char_indices() {
let chars = Utf8Chars::new(CRAB.iter().copied(), 0);
let v: crate::Result<alloc::vec::Vec<(usize, char)>> = chars.into_char_indices().collect();
assert_eq!(v.unwrap(), [(0, '🦀')]);
}
#[test]
fn indices_dot_into_chars() {
let chars: Utf8Chars<_> = Utf8CharIndices::new(CRAB.iter().copied(), 0).into_chars();
let s: crate::Result<alloc::string::String> = chars.collect();
assert_eq!(s.unwrap(), "🦀");
}
}