use std::borrow::Borrow;
use std::marker::PhantomData;
use std::panic::Location;
use crate::error::DecodingProblem::*;
use crate::helper::{ensure, ParseStringEncoding, ProblemLocation};
use crate::jis_x_0201::Decoder as JisX0201Decoder;
use crate::Result;
mod internal {
include!(concat!(env!("OUT_DIR"), "/shift_jis_1997.rs"));
}
pub struct ShiftJis1997 {}
enum Next {
EndOfInput,
One(char),
}
pub struct Decoder<'x, I>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
iter: <I as IntoIterator>::IntoIter,
_marker: PhantomData<&'x ()>,
}
impl<I> Decoder<'_, I>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
fn new<'x>(iter: I) -> Decoder<'x, I> {
Decoder {
iter: iter.into_iter(),
_marker: PhantomData,
}
}
fn decode_next(iter: &mut <I as IntoIterator>::IntoIter) -> Result<Next> {
let byte = iter.next();
if let Some(byte) = byte {
let byte = *byte.borrow();
if let Some(c) = JisX0201Decoder::<I>::decode_byte(byte) {
return Ok(Next::One(c));
}
match byte {
0x81..=0x9F | 0xE0..=0xFC => {
let next = iter
.next()
.ok_or_else(|| UnexpectedEndOfData(Location::current()))?;
let next = *next.borrow();
let (first, last, offset) = internal::SJIS_1997_UTF8_T[byte as usize];
ensure!(
next >= first && next <= last,
InvalidByte(next, Location::current())
);
let relative = (next - first) as usize;
let index = offset + relative;
let value = internal::SJIS_1997_UTF8_S[index];
ensure!(value != 0, InvalidByte(next, Location::current()));
ensure!(
(value & 0x8000_0000) == 0,
InvalidByte(next, Location::current())
);
Ok(Next::One(unsafe { char::from_u32_unchecked(value) }))
},
_ => Err(InvalidByte(byte, Location::current()).into()),
}
} else {
Ok(Next::EndOfInput)
}
}
}
impl<I> Iterator for Decoder<'_, I>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
type Item = Result<char>;
fn next(&mut self) -> Option<Self::Item> {
match Self::decode_next(&mut self.iter) {
Ok(Next::EndOfInput) => None,
Ok(Next::One(c)) => Some(Ok(c)),
Err(e) => Some(Err(e)),
}
}
}
impl ShiftJis1997 {
pub fn iter<'iter, I>(iter: I) -> Decoder<'iter, I>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
Decoder::new(iter)
}
pub fn all<I>(iter: I) -> Result<String>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
Self::iter(iter).collect()
}
pub fn first<I>(iter: I) -> Result<String>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
Self::iter(iter)
.take_while(|c| match c {
Ok(c) => *c != 0 as char,
Err(_) => true,
})
.collect()
}
}
pub trait IteratorExt
where
Self: IntoIterator + Sized,
Self::Item: Borrow<u8> + Sized,
{
fn sjis1997<'b>(self) -> Decoder<'b, Self> { Decoder::new(self) }
}
impl<I> IteratorExt for I
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
}
impl ParseStringEncoding for ShiftJis1997 {
fn parse_str<I>(iter: I) -> Result<String>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
Self::first(iter)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_str() {
let data = b"abc\x88\x9f\0def";
assert_eq!(ShiftJis1997::parse_str(data).unwrap(), "abc亜".to_string());
}
}