use BytesBuf;
use StrBuf;
use std::mem;
use utf8::{self, Incomplete, DecodeError};
pub struct Utf8Decoder {
input_chunk: BytesBuf,
incomplete_char: Incomplete,
yield_replacement_character_next: bool,
}
impl Utf8Decoder {
pub fn new() -> Self {
Utf8Decoder {
incomplete_char: Incomplete::empty(),
input_chunk: BytesBuf::new(),
yield_replacement_character_next: false,
}
}
fn exhausted(&self) -> bool {
self.input_chunk.is_empty() && !self.yield_replacement_character_next
}
pub fn feed(&mut self, next_input_chunk: BytesBuf) -> &mut Self {
assert!(self.exhausted(), "feeding Utf8Decoder before exhausting the previous input chunk");
self.input_chunk = next_input_chunk;
self
}
pub fn end(&mut self) -> Option<StrBuf> {
assert!(self.exhausted(), "ending Utf8Decoder before exhausting the previous input chunk");
if self.incomplete_char.is_empty() {
None
} else {
self.incomplete_char = Incomplete::empty();
Some(replacement_character())
}
}
fn take_input(&mut self) -> BytesBuf {
mem::replace(&mut self.input_chunk, BytesBuf::new())
}
#[cold]
fn try_complete(&mut self) -> Option<StrBuf> {
let unborrowed = {
let input_chunk = &self.input_chunk;
self.incomplete_char.try_complete(input_chunk)
.map(|(result, remaining_input)| {
let consumed = input_chunk.len() - remaining_input.len();
let result = result.ok().map(StrBuf::from)
.unwrap_or_else(replacement_character);
(consumed, result)
})
};
match unborrowed {
None => {
self.input_chunk = BytesBuf::new();
None
}
Some((consumed_prefix_len, decoded)) => {
self.input_chunk.pop_front(consumed_prefix_len);
Some(decoded)
}
}
}
}
#[inline]
fn replacement_character() -> StrBuf {
StrBuf::from(utf8::REPLACEMENT_CHARACTER)
}
impl Iterator for Utf8Decoder {
type Item = StrBuf;
fn next(&mut self) -> Option<StrBuf> {
if self.yield_replacement_character_next {
self.yield_replacement_character_next = false;
return Some(replacement_character())
}
if self.input_chunk.is_empty() {
return None
}
if !self.incomplete_char.is_empty() {
return self.try_complete()
}
struct IsIncomplete;
let unborrowed = match utf8::decode(&self.input_chunk) {
Ok(_) => Ok(()),
Err(DecodeError::Incomplete { valid_prefix, incomplete_suffix }) => {
self.incomplete_char = incomplete_suffix;
Err((valid_prefix.len(), Ok(IsIncomplete)))
}
Err(DecodeError::Invalid { valid_prefix, invalid_sequence, remaining_input }) => {
let resume_at = if remaining_input.is_empty() {
None
} else {
Some(valid_prefix.len() + invalid_sequence.len())
};
Err((valid_prefix.len(), Err(resume_at)))
}
};
let mut bytes;
match unborrowed {
Ok(()) => {
bytes = self.take_input()
}
Err((0, Ok(IsIncomplete))) => {
self.input_chunk = BytesBuf::new();
return None
}
Err((valid_prefix_len, Ok(IsIncomplete))) => {
bytes = self.take_input();
bytes.truncate(valid_prefix_len)
}
Err((0, Err(None))) => {
self.input_chunk = BytesBuf::new();
return Some(replacement_character())
}
Err((0, Err(Some(resume_at)))) => {
self.input_chunk.pop_front(resume_at);
return Some(replacement_character())
}
Err((valid_prefix_len, Err(None))) => {
self.yield_replacement_character_next = true;
bytes = self.take_input();
bytes.truncate(valid_prefix_len);
}
Err((valid_prefix_len, Err(Some(resume_at)))) => {
self.yield_replacement_character_next = true;
bytes = self.input_chunk.clone();
bytes.truncate(valid_prefix_len);
self.input_chunk.pop_front(resume_at);
}
}
unsafe {
Some(StrBuf::from_utf8_unchecked(bytes))
}
}
}