use std::cell::Cell;
include!(concat!(env!("OUT_DIR"), "/sjis.rs"));
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum EncodedChar {
One([u8; 1]),
Two([u8; 2]),
}
impl EncodedChar {
pub const REPLACEMENT: EncodedChar = EncodedChar::Two([0x81, 0x45]);
}
impl std::ops::Deref for EncodedChar {
type Target = [u8];
fn deref(&self) -> &Self::Target {
match self {
EncodedChar::One(a) => a,
EncodedChar::Two(a) => a,
}
}
}
impl std::ops::DerefMut for EncodedChar {
fn deref_mut(&mut self) -> &mut Self::Target {
match self {
EncodedChar::One(a) => a,
EncodedChar::Two(a) => a,
}
}
}
impl IntoIterator for EncodedChar {
type Item = u8;
type IntoIter = std::array::IntoIter<u8, 2>;
fn into_iter(self) -> Self::IntoIter {
match self {
EncodedChar::One([a]) => {
let mut it = [0, a].into_iter();
it.next();
it
}
EncodedChar::Two([a, b]) => [a, b].into_iter(),
}
}
}
pub fn encode_char(char: char) -> Option<EncodedChar> {
if char.is_ascii() {
Some(EncodedChar::One([char as u8]))
} else if ('。'..='゚').contains(&char) {
Some(EncodedChar::One([(char as u32 - '。' as u32) as u8 + 0xA1]))
} else if let Some(&[k1, k2]) = u16::try_from(char).ok().and_then(|u| UTF8_SJIS.get(&u)) {
Some(EncodedChar::Two([k1, k2]))
} else {
None
}
}
pub fn decode_char(iter: &mut impl Iterator<Item = u8>) -> Option<Result<char, EncodedChar>> {
iter.next().map(|b1| decode_char_from(b1, || iter.next()))
}
pub fn decode_char_from(b1: u8, b2: impl FnOnce() -> Option<u8>) -> Result<char, EncodedChar> {
let enc = Cell::new(EncodedChar::One([b1]));
let b2 = || {
let b2 = b2().ok_or(enc.get())?;
enc.set(EncodedChar::Two([b1, b2]));
Ok(b2)
};
let a = match b1 {
a @ 0x00..=0x7F => return Ok(char::from(a)),
a @ 0xA1..=0xDF => return Ok(char::from_u32('。' as u32 + (a - 0xA1) as u32).unwrap()),
a @ 0x81..=0x9F => a - 0x81,
a @ 0xE0..=0xEF => a - 0xE0 + 0x1F,
a @ 0xFA..=0xFC => a - 0xFA + 0x2F,
0x80 | 0xA0 => return Err(enc.get()),
0xF0..=0xF9 | 0xFD.. => {
b2()?;
return Err(enc.get());
}
} as usize;
let b = match b2()? {
b @ 0x40..=0x7E => b - 0x40,
b @ 0x80..=0xFC => b - 0x80 + 0x3F,
..=0x3F | 0x7F | 0xFD.. => return Err(enc.get()),
} as usize;
let ch = char::from_u32(SJIS_UTF8[a][b] as u32).unwrap();
if ch == '�' {
return Err(enc.get());
}
Ok(ch)
}
#[test]
fn encode_replacement() {
assert_eq!(EncodedChar::REPLACEMENT, encode_char('・').unwrap())
}
#[test]
fn encode_then_decode() {
for char in (0..=0xFFFF).filter_map(char::from_u32) {
if let Some(enc) = encode_char(char) {
assert_eq!(decode_char(&mut enc.into_iter()), Some(Ok(char)))
}
}
}
#[test]
fn decode_then_encode() {
let duplicates = [
[0x87, 0x90], [0x87, 0x91], [0x87, 0x92], [0x87, 0x95], [0x87, 0x96], [0x87, 0x97], [0x87, 0x9A], [0x87, 0x9B], [0x87, 0x9C], [0xEE, 0xF9], ];
let mut fail = false;
for array in (0..=0xFFFF).map(u16::to_le_bytes) {
let mut it = array.into_iter();
if let Some(Ok(dec)) = decode_char(&mut it) {
let consumed = &array[..2 - it.as_slice().len()];
let enc = encode_char(dec).unwrap();
let enc = enc.into_iter().collect::<Vec<u8>>();
if enc != consumed && !duplicates.contains(&array) && array[0] < 0xFA {
println!("{:02X?}, // {:?} {:02X?}", consumed, dec, enc);
fail = true;
};
}
}
assert!(!fail);
}
pub fn encode(str: &str) -> Result<Vec<u8>, usize> {
let mut out = Vec::new();
for (pos, char) in str.char_indices() {
if let Some(char) = encode_char(char) {
out.extend(char)
} else {
return Err(pos);
}
}
Ok(out)
}
pub fn encode_lossy(str: &str) -> Vec<u8> {
let mut out = Vec::new();
for char in str.chars() {
if let Some(char) = encode_char(char) {
out.extend(char)
} else {
out.extend(EncodedChar::REPLACEMENT)
}
}
out
}
#[rustfmt::skip]
#[test]
fn test_encode() {
assert_eq!(
encode("日本ファルコム").as_deref(),
Ok(&[0x93u8, 0xFA, 0x96, 0x7b, 0x83, 0x74, 0x83, 0x40, 0x83, 0x8B, 0x83, 0x52, 0x83, 0x80] as &[_]),
);
assert_eq!(encode("日本2=₂"), Err("日本2=".len()),);
assert_eq!(decode_lossy(&encode_lossy("日本2=₂")), "日本2=・");
}
pub fn decode(input: &[u8]) -> Result<String, (usize, EncodedChar)> {
let mut out = String::new();
let mut pos = 0;
let mut iter = input.iter().copied().inspect(|_| pos += 1);
while let Some(b1) = iter.next() {
match decode_char_from(b1, || iter.next()) {
Ok(char) => out.push(char),
Err(enc) => return Err((pos - enc.len(), enc)),
}
}
Ok(out)
}
pub fn decode_lossy(input: &[u8]) -> String {
let mut out = String::new();
let mut iter = input.iter().copied();
while let Some(b1) = iter.next() {
match decode_char_from(b1, || iter.next()) {
Ok(char) => out.push(char),
Err(_) => out.push('�'),
}
}
out
}
#[rustfmt::skip]
#[test]
fn test_decode() {
assert_eq!(
decode(&[0x93, 0xFA, 0x96, 0x7b, 0x83, 0x74, 0x83, 0x40, 0x83, 0x8B, 0x83, 0x52, 0x83, 0x80]).as_deref(),
Ok("日本ファルコム"),
);
assert_eq!(
decode_lossy(&[0x93, 0xFA, 0x96, 0x7b, 0x83, 0x74, 0x83, 0x81, 0x40, 0x83, 0x8B, 0x83, 0x52, 0x83, 0x80]),
"日本フメ@ルコム",
);
assert_eq!(
decode(&[0x93, 0xFA, 0x96, 0x7B, 0x32, 0x3D, 0x96, 0x7B, 0xEE, 0xEE, 0x83, 0x40]),
Err((8, EncodedChar::Two([0xEE, 0xEE]))),
);
}