use encoding_rs::Encoding;
pub fn encoding_from_code_page(code_page: &str) -> Option<&'static Encoding> {
match code_page.to_ascii_lowercase().as_str() {
"gb2312" | "ansi_936" => Some(encoding_rs::GBK),
"big5" | "ansi_950" => Some(encoding_rs::BIG5),
"korean" | "ansi_949" | "johab" => Some(encoding_rs::EUC_KR),
"ansi_932" => Some(encoding_rs::SHIFT_JIS),
"dos437" => Some(encoding_rs::IBM866), "dos850" => Some(encoding_rs::WINDOWS_1252), "dos852" => Some(encoding_rs::WINDOWS_1250), "dos855" | "dos866" => Some(encoding_rs::IBM866), "dos857" => Some(encoding_rs::WINDOWS_1254), "dos860" => Some(encoding_rs::WINDOWS_1252), "dos861" => Some(encoding_rs::WINDOWS_1252), "dos863" => Some(encoding_rs::WINDOWS_1252), "dos865" => Some(encoding_rs::WINDOWS_1252), "dos869" => Some(encoding_rs::WINDOWS_1253),
"ansi_874" => Some(encoding_rs::WINDOWS_874),
"ansi_1250" => Some(encoding_rs::WINDOWS_1250),
"ansi_1251" => Some(encoding_rs::WINDOWS_1251),
"ansi_1252" => Some(encoding_rs::WINDOWS_1252),
"ansi_1253" => Some(encoding_rs::WINDOWS_1253),
"ansi_1254" => Some(encoding_rs::WINDOWS_1254),
"ansi_1255" => Some(encoding_rs::WINDOWS_1255),
"ansi_1256" => Some(encoding_rs::WINDOWS_1256),
"ansi_1257" => Some(encoding_rs::WINDOWS_1257),
"ansi_1258" => Some(encoding_rs::WINDOWS_1258),
"iso8859-1" | "iso_8859-1" => Some(encoding_rs::WINDOWS_1252),
"iso8859-2" | "iso_8859-2" => Some(encoding_rs::ISO_8859_2),
"iso8859-3" | "iso_8859-3" => Some(encoding_rs::ISO_8859_3),
"iso8859-4" | "iso_8859-4" => Some(encoding_rs::ISO_8859_4),
"iso8859-5" | "iso_8859-5" => Some(encoding_rs::ISO_8859_5),
"iso8859-6" | "iso_8859-6" => Some(encoding_rs::ISO_8859_6),
"iso8859-7" | "iso_8859-7" => Some(encoding_rs::ISO_8859_7),
"iso8859-8" | "iso_8859-8" => Some(encoding_rs::ISO_8859_8),
"iso8859-9" | "iso_8859-9" => Some(encoding_rs::WINDOWS_1254),
"iso8859-10" | "iso_8859-10" => Some(encoding_rs::ISO_8859_10),
"iso8859-13" | "iso_8859-13" => Some(encoding_rs::ISO_8859_13),
"iso8859-14" | "iso_8859-14" => Some(encoding_rs::ISO_8859_14),
"iso8859-15" | "iso_8859-15" => Some(encoding_rs::ISO_8859_15),
"koi8-r" => Some(encoding_rs::KOI8_R),
"koi8-u" => Some(encoding_rs::KOI8_U),
"ascii" | "utf-8" | "utf8" | "unicode" => None,
_ => Some(encoding_rs::WINDOWS_1252),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_ansi_1252() {
let enc = encoding_from_code_page("ANSI_1252");
assert_eq!(enc, Some(encoding_rs::WINDOWS_1252));
}
#[test]
fn test_case_insensitive() {
assert_eq!(
encoding_from_code_page("ansi_1251"),
encoding_from_code_page("ANSI_1251")
);
}
#[test]
fn test_ascii_returns_none() {
assert_eq!(encoding_from_code_page("ASCII"), None);
}
#[test]
fn test_utf8_returns_none() {
assert_eq!(encoding_from_code_page("UTF-8"), None);
}
#[test]
fn test_unknown_returns_windows1252() {
let enc = encoding_from_code_page("SOMETHING_UNKNOWN");
assert_eq!(enc, Some(encoding_rs::WINDOWS_1252));
}
#[test]
fn test_asian_encodings() {
assert_eq!(encoding_from_code_page("GB2312"), Some(encoding_rs::GBK));
assert_eq!(encoding_from_code_page("BIG5"), Some(encoding_rs::BIG5));
assert_eq!(encoding_from_code_page("ANSI_932"), Some(encoding_rs::SHIFT_JIS));
assert_eq!(encoding_from_code_page("KOREAN"), Some(encoding_rs::EUC_KR));
}
}