use std::str::FromStr;
use crate::error::Error;
#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Hash)]
pub enum Encoding {
#[default]
Utf8,
Iso8859_1,
Windows1252,
}
impl Encoding {
fn rs(self) -> &'static encoding_rs::Encoding {
match self {
Encoding::Utf8 => encoding_rs::UTF_8,
Encoding::Iso8859_1 => encoding_rs::WINDOWS_1252,
Encoding::Windows1252 => encoding_rs::WINDOWS_1252,
}
}
pub fn as_str(self) -> &'static str {
match self {
Encoding::Utf8 => "utf-8",
Encoding::Iso8859_1 => "iso-8859-1",
Encoding::Windows1252 => "windows-1252",
}
}
pub fn decode(self, bytes: &[u8]) -> Result<String, Error> {
let (cow, _enc, had_errors) = self.rs().decode(bytes);
if had_errors {
return Err(Error::Encoding(format!(
"input is not valid {}",
self.as_str()
)));
}
Ok(cow.into_owned())
}
pub fn encode(self, s: &str) -> Result<Vec<u8>, Error> {
let (cow, _enc, had_errors) = self.rs().encode(s);
if had_errors {
return Err(Error::Encoding(format!(
"output contains characters that cannot be represented in {}",
self.as_str()
)));
}
Ok(cow.into_owned())
}
}
impl FromStr for Encoding {
type Err = Error;
fn from_str(s: &str) -> Result<Self, Self::Err> {
match s.to_ascii_lowercase().replace('_', "-").as_str() {
"utf-8" | "utf8" => Ok(Encoding::Utf8),
"iso-8859-1" | "latin1" | "latin-1" | "iso8859-1" => Ok(Encoding::Iso8859_1),
"windows-1252" | "cp1252" | "win1252" => Ok(Encoding::Windows1252),
other => Err(Error::UnknownEncoding(other.to_string())),
}
}
}
impl std::fmt::Display for Encoding {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.write_str(self.as_str())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn latin1_roundtrip() {
let s = "café — naïve";
let bytes = Encoding::Iso8859_1.encode(s).expect("encode");
assert!(bytes.contains(&0xE9), "expected 0xE9 for é");
let decoded = Encoding::Iso8859_1.decode(&bytes).expect("decode");
assert_eq!(decoded, s);
}
#[test]
fn utf8_decoder_rejects_latin1_bytes() {
let bytes = b"caf\xE9";
assert!(Encoding::Utf8.decode(bytes).is_err());
assert_eq!(Encoding::Iso8859_1.decode(bytes).unwrap(), "café");
}
#[test]
fn unrepresentable_char_errors_on_encode() {
assert!(Encoding::Iso8859_1.encode("hello 😀").is_err());
}
#[test]
fn parses_aliases() {
assert_eq!("utf8".parse::<Encoding>().unwrap(), Encoding::Utf8);
assert_eq!("UTF-8".parse::<Encoding>().unwrap(), Encoding::Utf8);
assert_eq!("latin1".parse::<Encoding>().unwrap(), Encoding::Iso8859_1);
assert_eq!(
"ISO-8859-1".parse::<Encoding>().unwrap(),
Encoding::Iso8859_1
);
assert_eq!("cp1252".parse::<Encoding>().unwrap(), Encoding::Windows1252);
}
}