use crate::{datum_error, DatumOffset, DatumPipe, DatumResult};
const UTF8_DECODE_BUFFER: usize = 4;
#[derive(Clone, Copy, PartialEq, Eq, Debug, Default)]
pub struct DatumUTF8Decoder {
start: DatumOffset,
buffer: [u8; UTF8_DECODE_BUFFER],
buffer_len: u8,
}
impl DatumPipe for DatumUTF8Decoder {
type Input = u8;
type Output = char;
fn feed<F: FnMut(DatumOffset, char) -> DatumResult<()>>(
&mut self,
at: DatumOffset,
byte: Option<u8>,
f: &mut F,
) -> DatumResult<()> {
if byte.is_none() {
return if self.buffer_len != 0 {
Err(datum_error!(Interrupted, at, "utf8: interrupted"))
} else {
Ok(())
};
}
let byte = byte.unwrap();
if self.buffer_len >= (UTF8_DECODE_BUFFER as u8) {
Err(datum_error!(BadData, at, "utf8: overlong"))
} else if self.buffer_len == 0 {
if byte <= 127 {
f(at, byte as char)
} else if (0x80..=0xBF).contains(&byte) {
Err(datum_error!(BadData, at, "utf8: continuation at start"))
} else {
self.start = at;
self.buffer[0] = byte;
self.buffer_len = 1;
Ok(())
}
} else if !(0x80..=0xBF).contains(&byte) {
Err(datum_error!(BadData, at, "utf8: mid-sequence start"))
} else {
self.buffer[self.buffer_len as usize] = byte;
self.buffer_len += 1;
let res = core::str::from_utf8(&self.buffer[0..self.buffer_len as usize]);
if let Ok(res2) = res {
self.buffer_len = 0;
if let Some(v) = res2.chars().next() {
f(self.start, v)?;
} else {
unreachable!()
}
}
Ok(())
}
}
}