use std::ops::{ Index, IndexMut, Range, RangeInclusive };
use strum_macros::{ EnumIter, EnumString };
use enum_assoc::Assoc;
#[derive(Clone, Copy)]
pub struct Bytes {
pub count: u8,
pub bytes: [u8; 7],
}
impl AsRef<[u8]> for Bytes {
fn as_ref(&self) -> &[u8] {
&self.bytes[0..self.count as usize]
}
}
impl Index<usize> for Bytes {
type Output = u8;
fn index(&self, index: usize) -> &Self::Output {
assert!(index < self.count as usize);
&self.bytes[index]
}
}
impl IndexMut<usize> for Bytes {
fn index_mut(&mut self, index: usize) -> &mut Self::Output {
assert!(index < self.count as usize);
&mut self.bytes[index]
}
}
impl Index<Range<usize>> for Bytes {
type Output = [u8];
fn index(&self, range: Range<usize>) -> &Self::Output {
assert!(range.start <= range.end);
assert!(range.end <= self.count as usize);
&self.bytes[range]
}
}
impl IndexMut<Range<usize>> for Bytes {
fn index_mut(&mut self, range: Range<usize>) -> &mut Self::Output {
assert!(range.start <= range.end);
assert!(range.end <= self.count as usize);
&mut self.bytes[range]
}
}
impl Index<RangeInclusive<usize>> for Bytes {
type Output = [u8];
fn index(&self, range: RangeInclusive<usize>) -> &Self::Output {
assert!(range.start() <= range.end());
assert!(*range.end() < self.count as usize);
&self.bytes[range]
}
}
impl IndexMut<RangeInclusive<usize>> for Bytes {
fn index_mut(&mut self, range: RangeInclusive<usize>) -> &mut Self::Output {
assert!(range.start() <= range.end());
assert!(*range.end() < self.count as usize);
&mut self.bytes[range]
}
}
impl<const N: usize> From<[u8; N]> for Bytes {
fn from(value: [u8; N]) -> Self {
let mut bytes = [0; 7];
assert!(N <= 7);
bytes[0..N].copy_from_slice(&value);
Bytes {
count: N as u8,
bytes,
}
}
}
impl From<&[u8]> for Bytes {
fn from(value: &[u8]) -> Self {
let mut bytes = [0; 7];
assert!(value.len() <= 7);
bytes[0..value.len()].copy_from_slice(value);
Bytes {
count: value.len() as u8,
bytes,
}
}
}
#[derive(Clone, Copy)]
pub struct BytesIter {
pub index_and_count: u8,
pub bytes: [u8; 7],
}
impl Iterator for BytesIter {
type Item = u8;
fn next(&mut self) -> Option<Self::Item> {
let index = (self.index_and_count >> 4) as usize;
let count = (self.index_and_count & 15) as usize;
if index < count {
let byte = self.bytes[index];
self.index_and_count += 0x10;
Some(byte)
} else { None }
}
fn size_hint(&self) -> (usize, Option<usize>) {
let count = (self.index_and_count & 15) as usize;
(count, Some(count))
}
}
impl ExactSizeIterator for BytesIter {}
impl<'a> IntoIterator for &'a Bytes {
type Item = u8;
type IntoIter = BytesIter;
fn into_iter(self) -> Self::IntoIter {
BytesIter { index_and_count: self.count, bytes: self.bytes }
}
}
pub trait Encoding {
fn encode(&self, c: char) -> Option<Bytes>;
fn decode(&self, b: Bytes) -> (u8, Option<char>);
}
#[derive(Clone, Copy)]
pub struct Encoder<'a, E> where E: Encoding + ?Sized {
encoding: &'a E,
text: &'a str,
}
impl<'a, E> Encoder<'a, E> where E: Encoding + ?Sized {
pub fn new(encoding: &'a E, text: &'a str) -> Self {
Self { encoding, text }
}
pub fn encoding(&self) -> &'a E { self.encoding }
pub fn remainder(&self) -> &'a str { self.text }
}
impl<'a, E> Iterator for Encoder<'a, E> where E: Encoding + ?Sized {
type Item = Result<Bytes, char>;
fn next(&mut self) -> Option<Self::Item> {
match self.text.chars().next() {
Some(x) => Some({
self.text = &self.text[x.len_utf8()..];
self.encoding.encode(x).ok_or(x)
}),
None => None,
}
}
}
#[derive(Clone, Copy)]
pub struct Decoder<'a, E> where E: Encoding + ?Sized {
encoding: &'a E,
bytes: &'a [u8],
}
impl<'a, E> Decoder<'a, E> where E: Encoding + ?Sized {
pub fn new(encoding: &'a E, bytes: &'a [u8]) -> Self {
Self { encoding, bytes }
}
pub fn encoding(&self) -> &'a E { self.encoding }
pub fn remainder(&self) -> &'a [u8] { self.bytes }
}
impl<'a, E> Iterator for Decoder<'a, E> where E: Encoding + ?Sized {
type Item = Result<char, Bytes>;
fn next(&mut self) -> Option<Self::Item> {
match self.bytes.len() {
0 => None,
n => Some({
let n = n.min(7);
let b = self.bytes[0..n].into();
let (n, o) = self.encoding.decode(b);
let n = n as usize;
self.bytes = &self.bytes[n..];
o.ok_or(b[0..n].into())
}),
}
}
}
pub type Any = dyn Encoding;
pub type AnyEncoder<'a> = Encoder<'a, Any>;
pub type AnyDecoder<'a> = Decoder<'a, Any>;
impl Any {
pub fn encoder<'a>(&'a self, text: &'a str) -> AnyEncoder<'a> { Encoder::new(self, text) }
pub fn decoder<'a>(&'a self, bytes: &'a [u8]) -> AnyDecoder<'a> { Decoder::new(self, bytes) }
}
#[derive(Clone, Copy)]
pub struct Windows1252;
impl Windows1252 {
pub const HIGH_PLANE: [char; 128] = [
'\u{20AC}', '\u{0000}', '\u{201A}', '\u{0192}', '\u{201E}', '\u{2026}', '\u{2020}', '\u{2021}',
'\u{02C6}', '\u{2030}', '\u{0160}', '\u{2039}', '\u{0152}', '\u{0000}', '\u{017D}', '\u{0000}',
'\u{0000}', '\u{2018}', '\u{2019}', '\u{201C}', '\u{201D}', '\u{2022}', '\u{2013}', '\u{2014}',
'\u{02DC}', '\u{2122}', '\u{0161}', '\u{203A}', '\u{0153}', '\u{0000}', '\u{017E}', '\u{0178}',
'\u{00A0}', '\u{00A1}', '\u{00A2}', '\u{00A3}', '\u{00A4}', '\u{00A5}', '\u{00A6}', '\u{00A7}',
'\u{00A8}', '\u{00A9}', '\u{00AA}', '\u{00AB}', '\u{00AC}', '\u{00AD}', '\u{00AE}', '\u{00AF}',
'\u{00B0}', '\u{00B1}', '\u{00B2}', '\u{00B3}', '\u{00B4}', '\u{00B5}', '\u{00B6}', '\u{00B7}',
'\u{00B8}', '\u{00B9}', '\u{00BA}', '\u{00BB}', '\u{00BC}', '\u{00BD}', '\u{00BE}', '\u{00BF}',
'\u{00C0}', '\u{00C1}', '\u{00C2}', '\u{00C3}', '\u{00C4}', '\u{00C5}', '\u{00C6}', '\u{00C7}',
'\u{00C8}', '\u{00C9}', '\u{00CA}', '\u{00CB}', '\u{00CC}', '\u{00CD}', '\u{00CE}', '\u{00CF}',
'\u{00D0}', '\u{00D1}', '\u{00D2}', '\u{00D3}', '\u{00D4}', '\u{00D5}', '\u{00D6}', '\u{00D7}',
'\u{00D8}', '\u{00D9}', '\u{00DA}', '\u{00DB}', '\u{00DC}', '\u{00DD}', '\u{00DE}', '\u{00DF}',
'\u{00E0}', '\u{00E1}', '\u{00E2}', '\u{00E3}', '\u{00E4}', '\u{00E5}', '\u{00E6}', '\u{00E7}',
'\u{00E8}', '\u{00E9}', '\u{00EA}', '\u{00EB}', '\u{00EC}', '\u{00ED}', '\u{00EE}', '\u{00EF}',
'\u{00F0}', '\u{00F1}', '\u{00F2}', '\u{00F3}', '\u{00F4}', '\u{00F5}', '\u{00F6}', '\u{00F7}',
'\u{00F8}', '\u{00F9}', '\u{00FA}', '\u{00FB}', '\u{00FC}', '\u{00FD}', '\u{00FE}', '\u{00FF}',
];
}
impl Encoding for Windows1252 {
fn encode(&self, ch: char) -> Option<Bytes> {
match ch as u32 {
x @ 0..=127 => Some(Bytes::from([ x as u8 ])),
_ => Self::HIGH_PLANE.iter().position(|&x| x == ch).map(|i| [ i as u8 + 128 ].into()),
}
}
fn decode(&self, b: Bytes) -> (u8, Option<char>) {
match b[0] {
x @ 0..=127 => (1, Some(x as char)),
x @ 128..=255 => match Self::HIGH_PLANE[x as usize - 128] {
'\u{0000}' => (1, None),
x => (1, Some(x)),
},
}
}
}
#[derive(Clone, Copy)]
pub struct MacRoman;
impl MacRoman {
pub const HIGH_PLANE: [char; 128] = [
'\u{00C4}', '\u{00C5}', '\u{00C7}', '\u{00C9}', '\u{00D1}', '\u{00D6}', '\u{00DC}', '\u{00E1}',
'\u{00E0}', '\u{00E2}', '\u{00E4}', '\u{00E3}', '\u{00E5}', '\u{00E7}', '\u{00E9}', '\u{00E8}',
'\u{00EA}', '\u{00EB}', '\u{00ED}', '\u{00EC}', '\u{00EE}', '\u{00EF}', '\u{00F1}', '\u{00F3}',
'\u{00F2}', '\u{00F4}', '\u{00F6}', '\u{00F5}', '\u{00FA}', '\u{00F9}', '\u{00FB}', '\u{00FC}',
'\u{2020}', '\u{00B0}', '\u{00A2}', '\u{00A3}', '\u{00A7}', '\u{2022}', '\u{00B6}', '\u{00DF}',
'\u{00AE}', '\u{00A9}', '\u{2122}', '\u{00B4}', '\u{00A8}', '\u{2260}', '\u{00C6}', '\u{00D8}',
'\u{221E}', '\u{00B1}', '\u{2264}', '\u{2265}', '\u{00A5}', '\u{00B5}', '\u{2202}', '\u{2211}',
'\u{220F}', '\u{03C0}', '\u{222B}', '\u{00AA}', '\u{00BA}', '\u{03A9}', '\u{00E6}', '\u{00F8}',
'\u{00BF}', '\u{00A1}', '\u{00AC}', '\u{221A}', '\u{0192}', '\u{2248}', '\u{2206}', '\u{00AB}',
'\u{00BB}', '\u{2026}', '\u{00A0}', '\u{00C0}', '\u{00C3}', '\u{00D5}', '\u{0152}', '\u{0153}',
'\u{2013}', '\u{2014}', '\u{201C}', '\u{201D}', '\u{2018}', '\u{2019}', '\u{00F7}', '\u{25CA}',
'\u{00FF}', '\u{0178}', '\u{2044}', '\u{20AC}', '\u{2039}', '\u{203A}', '\u{FB01}', '\u{FB02}',
'\u{2021}', '\u{00B7}', '\u{201A}', '\u{201E}', '\u{2030}', '\u{00C2}', '\u{00CA}', '\u{00C1}',
'\u{00CB}', '\u{00C8}', '\u{00CD}', '\u{00CE}', '\u{00CF}', '\u{00CC}', '\u{00D3}', '\u{00D4}',
'\u{F8FF}', '\u{00D2}', '\u{00DA}', '\u{00DB}', '\u{00D9}', '\u{0131}', '\u{02C6}', '\u{02DC}',
'\u{00AF}', '\u{02D8}', '\u{02D9}', '\u{02DA}', '\u{00B8}', '\u{02DD}', '\u{02DB}', '\u{02C7}',
];
}
impl Encoding for MacRoman {
fn encode(&self, ch: char) -> Option<Bytes> {
match ch as u32 {
x @ 0..=127 => Some(Bytes::from([ x as u8 ])),
_ => Self::HIGH_PLANE.iter().position(|&x| x == ch).map(|i| [ i as u8 + 128 ].into()),
}
}
fn decode(&self, b: Bytes) -> (u8, Option<char>) {
match b[0] {
x @ 0..=127 => (1, Some(x as char)),
x @ 128..=255 => (1, Some(Self::HIGH_PLANE[x as usize - 128])),
}
}
}
#[derive(Clone, Copy)]
pub struct Utf16LE;
impl Encoding for Utf16LE {
fn encode(&self, ch: char) -> Option<Bytes> {
match ch as u32 {
0xD800..=0xDF00 => None,
x @ 0x0000..=0xFFFF => Some(Bytes::from([ (x & 0xFF) as u8, (x >> 8) as u8 ])),
x => Some(Bytes::from([
((x >> 10) & 0xFF) as u8,
((x >> 18) & 0x03) as u8 | 0xD8,
(x & 0xFF) as u8,
((x >> 8) & 0x03) as u8 | 0xDC,
])),
}
}
fn decode(&self, b: Bytes) -> (u8, Option<char>) {
match b.as_ref() {
[] => unreachable!(),
&[lo, hi @ 0x00..=0xD7, ..] | &[lo, hi @ 0xE0..=0xFF, ..] => {
let lo = lo as u32;
let hi = ((hi & 3) as u32) << 8;
(2, Some(char::from_u32(lo | hi).unwrap()))
},
&[lo1, hi1 @ 0xD8..=0xDB, lo2, hi2 @ 0xDC..=0xDF, ..] => {
let lo1 = (lo1 as u32) << 10;
let hi1 = ((hi1 & 3) as u32) << 18;
let lo2 = lo2 as u32;
let hi2 = ((hi2 & 3) as u32) << 8;
(4, Some(char::from_u32(lo1 | hi1 | lo2 | hi2).unwrap()))
},
&[_, 0xD8..=0xDB, _, _, ..] => (4, None),
&[_, _, ..] => (2, None),
&[_] => (1, None),
}
}
}
#[derive(Clone, Copy)]
pub struct Utf16BE;
impl Encoding for Utf16BE {
fn encode(&self, ch: char) -> Option<Bytes> {
match ch as u32 {
0xD800..=0xDF00 => None,
x @ 0x0000..=0xFFFF => Some(Bytes::from([ (x >> 8) as u8, (x & 0xFF) as u8 ])),
x => Some(Bytes::from([
((x >> 18) & 0x03) as u8 | 0xD8,
((x >> 10) & 0xFF) as u8,
((x >> 8) & 0x03) as u8 | 0xDC,
(x & 0xFF) as u8,
])),
}
}
fn decode(&self, b: Bytes) -> (u8, Option<char>) {
match b.as_ref() {
[] => unreachable!(),
&[hi @ 0x00..=0xD7, lo, ..] | &[hi @ 0xE0..=0xFF, lo, ..] => {
let hi = ((hi & 3) as u32) << 8;
let lo = lo as u32;
(2, Some(char::from_u32(lo | hi).unwrap()))
},
&[hi1 @ 0xD8..=0xDB, lo1, hi2 @ 0xDC..=0xDF, lo2, ..] => {
let hi1 = ((hi1 & 3) as u32) << 18;
let lo1 = (lo1 as u32) << 10;
let hi2 = ((hi2 & 3) as u32) << 8;
let lo2 = lo2 as u32;
(4, Some(char::from_u32(hi1 | lo1 | hi2 | lo2).unwrap()))
},
&[0xD8..=0xDB, _, _, _, ..] => (4, None),
&[_, _, ..] => (2, None),
&[_] => (1, None),
}
}
}
#[derive(Clone, Copy, EnumIter, EnumString, Assoc)]
#[func(pub const fn as_any(self) -> &'static Any)]
#[func(pub const fn as_const(self) -> &'static Std)]
#[func(pub fn encode(&self, c: char) -> (Option<Bytes>))]
#[func(pub fn decode(&self, b: Bytes) -> (u8, Option<char>))]
pub enum Std {
#[strum(serialize = "Windows-1252", ascii_case_insensitive)]
#[assoc(as_any = &Windows1252)]
#[assoc(as_const = { const CNST: Std = Std::Windows1252; &CNST })]
#[assoc(encode = Windows1252.encode(c))]
#[assoc(decode = Windows1252.decode(b))]
Windows1252,
#[strum(serialize = "MacRoman", ascii_case_insensitive)]
#[assoc(as_any = &MacRoman)]
#[assoc(as_const = { const CNST: Std = Std::MacRoman; &CNST })]
#[assoc(encode = MacRoman.encode(c))]
#[assoc(decode = MacRoman.decode(b))]
MacRoman,
#[strum(serialize = "UTF-16LE", ascii_case_insensitive)]
#[assoc(as_any = &Utf16LE)]
#[assoc(as_const = { const CNST: Std = Std::Utf16LE; &CNST })]
#[assoc(encode = Utf16LE.encode(c))]
#[assoc(decode = Utf16LE.decode(b))]
Utf16LE,
#[strum(serialize = "UTF-16BE", ascii_case_insensitive)]
#[assoc(as_any = &Utf16BE)]
#[assoc(as_const = { const CNST: Std = Std::Utf16BE; &CNST })]
#[assoc(encode = Utf16BE.encode(c))]
#[assoc(decode = Utf16BE.decode(b))]
Utf16BE,
}
pub type StdEncoder<'a> = Encoder<'a, Std>;
pub type StdDecoder<'a> = Decoder<'a, Std>;
impl Std {
pub fn encoder(self, text: &str) -> StdEncoder<'_> { Encoder::new(self.as_const(), text) }
pub fn decoder(self, bytes: &[u8]) -> StdDecoder<'_> { Decoder::new(self.as_const(), bytes) }
}
impl Encoding for Std {
fn encode(&self, c: char) -> Option<Bytes> { Std::encode(self, c) }
fn decode(&self, b: Bytes) -> (u8, Option<char>) { Std::decode(self, b) }
}