pub fn to_unicode_text(text: &str, charset: i32) -> Result<String, String> {
match charset {
0..=13 => {
let result = cp850_to_unicode(text);
if charset < 13 {
Ok(apply_charset_substitutions(&result, charset as usize))
} else {
Ok(result)
}
}
27 => {
Ok(windows1252_to_unicode(text))
}
_ => Ok(text.to_string()),
}
}
fn cp850_to_unicode(text: &str) -> String {
text.chars()
.map(|c| {
let b = c as u32;
if b < 128 {
c
} else {
cp850_char(b as u8)
}
})
.collect()
}
fn cp850_char(b: u8) -> char {
match b {
0x80 => '\u{00C7}', 0x81 => '\u{00FC}', 0x82 => '\u{00E9}', 0x83 => '\u{00E2}', 0x84 => '\u{00E4}', 0x85 => '\u{00E0}', 0x86 => '\u{00E5}', 0x87 => '\u{00E7}', 0x88 => '\u{00EA}', 0x89 => '\u{00EB}', 0x8A => '\u{00E8}', 0x8B => '\u{00EF}', 0x8C => '\u{00EE}', 0x8D => '\u{00EC}', 0x8E => '\u{00C4}', 0x8F => '\u{00C5}', 0x90 => '\u{00C9}', 0x91 => '\u{00E6}', 0x92 => '\u{00C6}', 0x93 => '\u{00F4}', 0x94 => '\u{00F6}', 0x95 => '\u{00F2}', 0x96 => '\u{00FB}', 0x97 => '\u{00F9}', 0x98 => '\u{00FF}', 0x99 => '\u{00D6}', 0x9A => '\u{00DC}', 0x9B => '\u{00F8}', 0x9C => '\u{00A3}', 0x9D => '\u{00D8}', 0x9F => '\u{0192}', 0xA0 => '\u{00E1}', 0xA1 => '\u{00ED}', 0xA2 => '\u{00F3}', 0xA3 => '\u{00FA}', 0xA4 => '\u{00F1}', 0xA5 => '\u{00D1}', 0xB5 => '\u{00C1}', 0xD6 => '\u{00CE}', _ => b as char, }
}
static CHARACTER_SETS_013: [[&str; 11]; 14] = [
["#", "0", "@", "[", "\u{00A2}", "]", "^", "`", "{", "|", "}"],
[
"#", "0", "@", "\u{2153}", "\u{00A2}", "\u{2154}", "^", "`", "\u{00BC}", "\u{00BD}",
"\u{00BE}",
],
[
"\u{00A3}", "0", "@", "[", "\u{00A2}", "]", "^", "`", "{", "|", "}",
],
[
"\u{0192}", "0", "\u{00A7}", "[", "IJ", "]", "^", "`", "{", "ij", "}",
],
[
"#", "0", "@", "\u{00C6}", "\u{00D8}", "\u{00C5}", "^", "`", "\u{00E6}", "\u{00F8}",
"\u{00E5}",
],
[
"\u{00DC}", "0", "\u{00C9}", "\u{00C4}", "\u{00D6}", "\u{00C5}", "\u{00DC}", "\u{00E9}",
"\u{00E4}", "\u{00F6}", "\u{00E5}",
],
[
"#", "0", "\u{00A7}", "\u{00C4}", "\u{00D6}", "\u{00DC}", "^", "`", "\u{00E4}", "\u{00F6}",
"\u{00FC}",
],
[
"\u{00A3}", "0", "\u{00E0}", "[", "\u{00E7}", "]", "^", "`", "\u{00E9}", "|", "\u{00F9}",
],
[
"#", "0", "\u{00E0}", "\u{00E2}", "\u{00E7}", "\u{00EA}", "\u{00EE}", "\u{00F4}",
"\u{00E9}", "\u{00F9}", "\u{00E8}",
],
[
"\u{00A3}", "0", "\u{00A7}", "[", "\u{00E7}", "\u{00E9}", "^", "\u{00F9}", "\u{00E0}",
"\u{00F2}", "\u{00E8}",
],
[
"#", "0", "\u{00A7}", "\u{00A1}", "\u{00D1}", "\u{00BF}", "^", "`", "{", "\u{00F1}",
"\u{00E7}",
],
[
"\u{00A3}", "0", "\u{00C9}", "\u{00C4}", "\u{00D6}", "\u{00DC}", "^", "\u{00E4}",
"\u{00EB}", "\u{00EF}", "\u{00F6}",
],
["#", "0", "@", "[", "\u{00A5}", "]", "^", "`", "{", "|", "}"],
["#", "0", "@", "[", "\\", "]", "^", "`", "{", "|", "}"],
];
fn apply_charset_substitutions(text: &str, charset: usize) -> String {
let search = &CHARACTER_SETS_013[13];
let replace = &CHARACTER_SETS_013[charset];
let mut result = text.to_string();
for (i, s) in search.iter().enumerate() {
result = result.replace(s, replace[i]);
}
result
}
fn windows1252_to_unicode(text: &str) -> String {
text.to_string()
}