use std::borrow::Borrow;
use std::marker::PhantomData;
use std::panic::Location;
use crate::error::DecodingProblem::*;
use crate::helper::{ensure, ParseStringEncoding, ProblemLocation};
use crate::jis_x_0201::Decoder as JisX0201Decoder;
use crate::Result;
pub struct ShiftJis2004 {}
mod internal {
include!(concat!(env!("OUT_DIR"), "/shift_jis_2004.rs"));
}
enum Next {
EndOfInput,
One(char),
Two(char, char),
}
pub struct Decoder<'x, I>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
iter: <I as IntoIterator>::IntoIter,
buffered: Option<char>,
_marker: PhantomData<&'x ()>,
}
impl<I> Decoder<'_, I>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
fn new<'x>(iter: I) -> Decoder<'x, I> {
Decoder {
iter: iter.into_iter(),
buffered: None,
_marker: PhantomData,
}
}
fn decode_next(iter: &mut <I as IntoIterator>::IntoIter) -> Result<Next> {
let byte = iter.next();
if let Some(byte) = byte {
let byte = *byte.borrow();
if let Some(c) = JisX0201Decoder::<I>::decode_byte(byte) {
return Ok(Next::One(c));
}
match byte {
0x81..=0x9F | 0xE0..=0xFC => {
let next = iter
.next()
.ok_or_else(|| UnexpectedEndOfData(Location::current()))?;
let next = *next.borrow();
let (first, last, offset) = internal::SJIS_2004_UTF8_T[byte as usize];
ensure!(
next >= first && next <= last,
InvalidByte(next, Location::current())
);
let relative = (next - first) as usize;
let index = offset + relative;
let value = internal::SJIS_2004_UTF8_S[index];
ensure!(value != 0, InvalidByte(next, Location::current()));
if value & 0x8000_0000 != 0 {
let index = (value & 0x7fff_ffff) as usize;
let (first, second) = internal::SJIS_2004_UTF8_D[index];
Ok(Next::Two(
unsafe { char::from_u32_unchecked(first) },
unsafe { char::from_u32_unchecked(second) },
))
} else {
Ok(Next::One(unsafe { char::from_u32_unchecked(value) }))
}
},
_ => Err(InvalidByte(byte, Location::current()).into()),
}
} else {
Ok(Next::EndOfInput)
}
}
}
impl<I> Iterator for Decoder<'_, I>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
type Item = Result<char>;
fn next(&mut self) -> Option<Self::Item> {
if let Some(value) = self.buffered {
self.buffered = None;
Some(Ok(value))
} else {
match Self::decode_next(&mut self.iter) {
Ok(Next::EndOfInput) => None,
Ok(Next::One(c)) => Some(Ok(c)),
Ok(Next::Two(first, second)) => {
self.buffered = Some(second);
Some(Ok(first))
},
Err(e) => Some(Err(e)),
}
}
}
}
impl ShiftJis2004 {
pub fn iter<'iter, I>(iter: I) -> Decoder<'iter, I>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
Decoder::new(iter)
}
pub fn all<I>(iter: I) -> Result<String>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
Self::iter(iter).collect()
}
pub fn first<I>(iter: I) -> Result<String>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
Self::iter(iter)
.take_while(|c| match c {
Ok(c) => *c != 0 as char,
Err(_) => true,
})
.collect()
}
}
pub trait IteratorExt
where
Self: IntoIterator + Sized,
Self::Item: Borrow<u8> + Sized,
{
fn sjis2004<'b>(self) -> Decoder<'b, Self> { Decoder::new(self) }
}
impl<I> IteratorExt for I
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
}
impl ParseStringEncoding for ShiftJis2004 {
fn parse_str<I>(iter: I) -> Result<String>
where
I: IntoIterator,
I::Item: Borrow<u8> + Sized,
{
Self::first(iter)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn parse_str() {
let data = b"abc\x88\x9f\0def";
assert_eq!(ShiftJis2004::parse_str(data).unwrap(), "abc亜".to_string());
}
}