use encoding::Encoding as EncodingTrait;
use encoding::{all::ASCII as ASCIIBase, DecoderTrap};
use encoding_rs::{
mem::decode_latin1, Encoding, ISO_8859_3, ISO_8859_6, ISO_8859_7, ISO_8859_8, WINDOWS_1250,
WINDOWS_1251, WINDOWS_1252 as WINDOWS_1252Base, WINDOWS_1253, WINDOWS_1254, WINDOWS_1255,
WINDOWS_1256, WINDOWS_1257, WINDOWS_1258,
};
use oem_cp::code_table::DECODING_TABLE_CP437;
use oem_cp::code_table::ENCODING_TABLE_CP437;
use oem_cp::decode_string_complete_table;
use oem_cp::encode_string_checked;
use rustc_hash::FxHashMap;
static REPLACEMENT_CHAR: char = '\u{FFFD}';
#[derive(Eq, Hash, PartialEq, Clone, Copy, Debug)]
pub enum CodecType {
SloppyWindows1250,
SloppyWindows1251,
SloppyWindows1252,
SloppyWindows1253,
SloppyWindows1254,
SloppyWindows1255,
SloppyWindows1256,
SloppyWindows1257,
SloppyWindows1258,
SloppyIso88593,
SloppyIso88596,
SloppyIso88597,
SloppyIso88598,
Latin1,
Windows1252,
Iso88592,
MacRoman,
Ascii,
Utf8,
Utf8Variant,
Cp437,
}
pub trait Codec: Sync {
fn name(&self) -> &'static str;
fn codec_type(&self) -> CodecType;
fn decode(&self, bytes: &[u8]) -> String;
fn encode(&self, string: &str) -> Result<Vec<u8>, &'static str>;
}
pub struct SloppyCodec {
name: &'static str,
codec_type: CodecType,
decoded_chars: Vec<char>,
encoded_bytes: FxHashMap<char, u8>,
}
impl Codec for SloppyCodec {
fn name(&self) -> &'static str {
self.name
}
fn codec_type(&self) -> CodecType {
self.codec_type
}
fn decode(&self, bytes: &[u8]) -> String {
bytes
.iter()
.map(|&b| self.decoded_chars[b as usize])
.collect()
}
fn encode(&self, string: &str) -> Result<Vec<u8>, &'static str> {
Ok(string
.chars()
.map(|c| *self.encoded_bytes.get(&c).unwrap_or(&0x1A)) .collect())
}
}
#[derive(Eq, Hash, PartialEq)]
pub struct StandardCodec {
name: &'static str,
codec_type: CodecType,
encoding: &'static Encoding,
}
impl Codec for StandardCodec {
fn name(&self) -> &'static str {
self.name
}
fn codec_type(&self) -> CodecType {
self.codec_type
}
fn decode(&self, bytes: &[u8]) -> String {
self.encoding.decode(bytes).0.into_owned()
}
fn encode(&self, string: &str) -> Result<Vec<u8>, &'static str> {
Ok(self.encoding.encode(string).0.into_owned())
}
}
pub struct AsciiCodec {
name: &'static str,
codec_type: CodecType,
}
impl Codec for AsciiCodec {
fn name(&self) -> &'static str {
self.name
}
fn codec_type(&self) -> CodecType {
self.codec_type
}
fn decode(&self, bytes: &[u8]) -> String {
ASCIIBase.decode(bytes, DecoderTrap::Ignore).unwrap()
}
fn encode(&self, string: &str) -> Result<Vec<u8>, &'static str> {
ASCIIBase
.encode(string, encoding::EncoderTrap::Ignore)
.map_err(|_| "Error encoding ASCII")
}
}
pub struct Cp437Codec {
name: &'static str,
codec_type: CodecType,
}
impl Codec for Cp437Codec {
fn name(&self) -> &'static str {
self.name
}
fn codec_type(&self) -> CodecType {
self.codec_type
}
fn decode(&self, bytes: &[u8]) -> String {
decode_string_complete_table(bytes, &DECODING_TABLE_CP437)
}
fn encode(&self, string: &str) -> Result<Vec<u8>, &'static str> {
encode_string_checked(string, &ENCODING_TABLE_CP437).ok_or("Character not in CP437")
}
}
pub struct Latin1Codec {
name: &'static str,
codec_type: CodecType,
}
impl Codec for Latin1Codec {
fn name(&self) -> &'static str {
self.name
}
fn codec_type(&self) -> CodecType {
self.codec_type
}
fn decode(&self, bytes: &[u8]) -> String {
bytes.iter().map(|&c| c as char).collect()
}
fn encode(&self, string: &str) -> Result<Vec<u8>, &'static str> {
string
.chars()
.map(|c| {
let char_code = c as u32;
if char_code <= 0xFF {
Ok(char_code as u8)
} else {
Err("Character out of latin1 range encountered")
}
})
.collect()
}
}
fn make_sloppy_codec(
name: &'static str,
codec_type: CodecType,
base_encoding: &'static Encoding,
) -> SloppyCodec {
let all_bytes: Vec<u8> = (0..=255).collect();
let mut sloppy_chars: Vec<char> = decode_latin1(&all_bytes).chars().collect();
let mut encoded_bytes: FxHashMap<char, u8> = FxHashMap::default();
for (&byte, decoded_char) in all_bytes.iter().zip(sloppy_chars.iter_mut()) {
*decoded_char = base_encoding
.decode(&[byte])
.0
.chars()
.next()
.unwrap_or(REPLACEMENT_CHAR);
encoded_bytes.insert(*decoded_char, byte); }
sloppy_chars[0x1A] = REPLACEMENT_CHAR;
SloppyCodec {
name: &name,
codec_type,
decoded_chars: sloppy_chars,
encoded_bytes,
}
}
lazy_static! {
pub static ref SLOPPY_WINDOWS_1250: SloppyCodec = make_sloppy_codec(
"sloppy-windows-1250",
CodecType::SloppyWindows1250,
&WINDOWS_1250
);
pub static ref SLOPPY_WINDOWS_1251: SloppyCodec = make_sloppy_codec(
"sloppy-windows-1251",
CodecType::SloppyWindows1251,
&WINDOWS_1251
);
pub static ref SLOPPY_WINDOWS_1252: SloppyCodec = make_sloppy_codec(
"sloppy-windows-1252",
CodecType::SloppyWindows1252,
&WINDOWS_1252Base
);
pub static ref SLOPPY_WINDOWS_1253: SloppyCodec = make_sloppy_codec(
"sloppy-windows-1253",
CodecType::SloppyWindows1253,
&WINDOWS_1253
);
pub static ref SLOPPY_WINDOWS_1254: SloppyCodec = make_sloppy_codec(
"sloppy-windows-1254",
CodecType::SloppyWindows1254,
&WINDOWS_1254
);
pub static ref SLOPPY_WINDOWS_1255: SloppyCodec = make_sloppy_codec(
"sloppy-windows-1255",
CodecType::SloppyWindows1255,
&WINDOWS_1255
);
pub static ref SLOPPY_WINDOWS_1256: SloppyCodec = make_sloppy_codec(
"sloppy-windows-1256",
CodecType::SloppyWindows1256,
&WINDOWS_1256
);
pub static ref SLOPPY_WINDOWS_1257: SloppyCodec = make_sloppy_codec(
"sloppy-windows-1257",
CodecType::SloppyWindows1257,
&WINDOWS_1257
);
pub static ref SLOPPY_WINDOWS_1258: SloppyCodec = make_sloppy_codec(
"sloppy-windows-1258",
CodecType::SloppyWindows1258,
&WINDOWS_1258
);
pub static ref SLOPPY_ISO_8859_3: SloppyCodec =
make_sloppy_codec("sloppy-iso-8859-3", CodecType::SloppyIso88593, &ISO_8859_3);
pub static ref SLOPPY_ISO_8859_6: SloppyCodec =
make_sloppy_codec("sloppy-iso-8859-6", CodecType::SloppyIso88596, &ISO_8859_6);
pub static ref SLOPPY_ISO_8859_7: SloppyCodec =
make_sloppy_codec("sloppy-iso-8859-7", CodecType::SloppyIso88597, &ISO_8859_7);
pub static ref SLOPPY_ISO_8859_8: SloppyCodec =
make_sloppy_codec("sloppy-iso-8859-8", CodecType::SloppyIso88598, &ISO_8859_8);
pub static ref WINDOWS_1252: StandardCodec = StandardCodec {
name: "windows-1252",
codec_type: CodecType::Windows1252,
encoding: &encoding_rs::WINDOWS_1252,
};
pub static ref ISO_8859_2: StandardCodec = StandardCodec {
name: "iso-8859-2",
codec_type: CodecType::Iso88592,
encoding: &encoding_rs::ISO_8859_2,
};
pub static ref MACROMAN: StandardCodec = StandardCodec {
name: "macroman",
codec_type: CodecType::MacRoman,
encoding: &encoding_rs::MACINTOSH,
};
pub static ref ASCII: AsciiCodec = AsciiCodec {
name: "ascii",
codec_type: CodecType::Ascii,
};
pub static ref LATIN_1: Latin1Codec = Latin1Codec {
name: "latin-1",
codec_type: CodecType::Latin1,
};
pub static ref CP437: Cp437Codec = Cp437Codec {
name: "cp437",
codec_type: CodecType::Cp437,
};
}