#![doc(html_root_url = "https://docs.rs/codepage-strings/1.0.2")]
use std::borrow::Cow;
#[non_exhaustive]
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum ConvertError {
StringEncoding,
StringDecoding,
UnknownCodepage,
UnsupportedCodepage,
}
impl std::fmt::Display for ConvertError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
let msg = match self {
ConvertError::StringEncoding => "string codepage encoding error",
ConvertError::StringDecoding => "string decoding error",
ConvertError::UnknownCodepage => "invalid / unknown Windows code page",
ConvertError::UnsupportedCodepage => "cannot transcode this Windows code page",
};
write!(f, "{}", msg)
}
}
impl std::error::Error for ConvertError {}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
enum Endian {
Le,
Be,
}
#[derive(Debug, Clone)]
enum Codings {
Ers(&'static encoding_rs::Encoding),
OemCp {
encode: &'static oem_cp::OEMCPHashMap<char, u8>,
decode: &'static oem_cp::code_table_type::TableType,
},
Identity,
UTF16(Endian),
}
#[derive(Debug, Clone)]
pub struct Coding(Codings);
impl Coding {
pub fn new(cp: u16) -> Result<Self, ConvertError> {
if cp == 65001 {
return Ok(Coding(Codings::Identity));
}
if cp == 1200 {
return Ok(Coding(Codings::UTF16(Endian::Le)));
}
if cp == 1201 {
return Ok(Coding(Codings::UTF16(Endian::Be)));
}
if [12000, 12001, 65000].contains(&cp) {
return Err(ConvertError::UnsupportedCodepage);
}
if let Some(c) = codepage::to_encoding(cp) {
return Ok(Coding(Codings::Ers(c)));
}
let encode = match (*oem_cp::code_table::ENCODING_TABLE_CP_MAP).get(&cp) {
Some(e) => e,
None => return Err(ConvertError::UnknownCodepage),
};
let decode = match (*oem_cp::code_table::DECODING_TABLE_CP_MAP).get(&cp) {
Some(e) => e,
None => return Err(ConvertError::UnknownCodepage),
};
Ok(Coding(Codings::OemCp { encode, decode }))
}
pub fn encode<'a, S>(&self, src: S) -> Result<Vec<u8>, ConvertError>
where
S: Into<Cow<'a, str>>,
{
match self.0 {
Codings::Ers(c) => {
let src = src.into();
let oe = c.output_encoding();
let (out, _, fail) = oe.encode(src.as_ref());
if fail {
Err(ConvertError::StringEncoding)
} else {
Ok(out.to_owned().to_vec())
}
}
Codings::OemCp { encode: et, .. } => match oem_cp::encode_string_checked(src, et) {
Some(out) => Ok(out),
None => Err(ConvertError::StringEncoding),
},
Codings::Identity => Ok(src.into().as_ref().as_bytes().to_vec()),
Codings::UTF16(e) => {
let encoded = src
.into()
.as_ref()
.encode_utf16()
.flat_map(|w| {
let lo = (w & 0xff) as u8;
let hi = (w >> 8) as u8;
let bs: Vec<u8> = match e {
Endian::Le => vec![lo, hi],
Endian::Be => vec![hi, lo],
};
bs.into_iter()
})
.collect();
Ok(encoded)
}
}
}
pub fn decode<'a>(&self, src: &'a [u8]) -> Result<Cow<'a, str>, ConvertError> {
match self.0 {
Codings::Ers(c) => {
let (out, _, fail) = c.decode(src.as_ref());
if fail {
Err(ConvertError::StringDecoding)
} else {
Ok(out)
}
}
Codings::OemCp { decode: dt, .. } => match dt.decode_string_checked(src) {
Some(s) => Ok(Cow::from(s)),
None => Err(ConvertError::StringDecoding),
},
Codings::Identity => match std::str::from_utf8(src) {
Ok(s) => Ok(Cow::from(s)),
Err(_) => Err(ConvertError::StringDecoding),
},
Codings::UTF16(e) => {
let ws = src
.chunks(2)
.map(|bs| {
if bs.len() < 2 {
return Err(ConvertError::StringDecoding);
}
let (hi, lo) = (bs[0] as u16, bs[1] as u16);
match e {
Endian::Le => Ok((lo << 8) | hi),
Endian::Be => Ok((hi << 8) | lo),
}
})
.collect::<Result<Vec<u16>, ConvertError>>()?;
match String::from_utf16(&ws) {
Ok(s) => Ok(Cow::from(s)),
Err(_) => Err(ConvertError::StringDecoding),
}
}
}
}
pub fn decode_lossy<'a>(&self, src: &'a [u8]) -> Cow<'a, str> {
match self.0 {
Codings::Ers(c) => {
let (out, _, _) = c.decode(src.as_ref());
out
}
Codings::OemCp { decode: dt, .. } => Cow::from(dt.decode_string_lossy(src)),
Codings::Identity => match std::str::from_utf8(src) {
Ok(s) => Cow::from(s),
Err(_) => String::from_utf8_lossy(src),
},
Codings::UTF16(e) => {
let ws: Vec<u16> = src
.chunks(2)
.map(|bs| {
let (hi, lo) = if bs.len() == 1 {
(0xff, 0xfd)
} else {
(bs[0] as u16, bs[1] as u16)
};
match e {
Endian::Le => (lo << 8) | hi,
Endian::Be => (hi << 8) | lo,
}
})
.collect();
Cow::from(String::from_utf16_lossy(&ws))
}
}
}
}