1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
use std::fmt;
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum Encoding {
Utf8,
Cp437,
ShiftJis,
}
impl fmt::Display for Encoding {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
use Encoding as T;
match self {
T::Utf8 => write!(f, "utf-8"),
T::Cp437 => write!(f, "cp-437"),
T::ShiftJis => write!(f, "shift-jis"),
}
}
}
#[derive(Debug, thiserror::Error)]
pub enum DecodingError {
#[error("invalid utf-8: {0}")]
Utf8Error(std::str::Utf8Error),
#[error("text too large to be converted")]
StringTooLarge,
#[error("encoding error: {0}")]
EncodingError(&'static str),
}
impl From<std::str::Utf8Error> for DecodingError {
fn from(e: std::str::Utf8Error) -> Self {
DecodingError::Utf8Error(e)
}
}
impl Encoding {
pub(crate) fn decode(&self, i: &[u8]) -> Result<String, DecodingError> {
match self {
Encoding::Utf8 => {
let s = std::str::from_utf8(i)?;
Ok(s.to_string())
}
Encoding::Cp437 => Ok(oem_cp::decode_string_complete_table(
i,
&oem_cp::code_table::DECODING_TABLE_CP437,
)),
Encoding::ShiftJis => self.decode_as(i, encoding_rs::SHIFT_JIS),
}
}
fn decode_as(
&self,
i: &[u8],
encoding: &'static encoding_rs::Encoding,
) -> Result<String, DecodingError> {
let mut decoder = encoding.new_decoder();
let len = decoder
.max_utf8_buffer_length(i.len())
.ok_or(DecodingError::StringTooLarge)?;
let mut v = vec![0u8; len];
let last = true;
let (_decoder_result, _decoder_read, decoder_written, had_errors) =
decoder.decode_to_utf8(i, &mut v, last);
if had_errors {
return Err(DecodingError::EncodingError(encoding.name()));
}
v.resize(decoder_written, 0u8);
Ok(unsafe { String::from_utf8_unchecked(v) })
}
}
pub(crate) fn detect_utf8(input: &[u8]) -> (bool, bool) {
match std::str::from_utf8(input) {
Err(_) => {
(false, false)
}
Ok(s) => {
let mut require = false;
for c in s.chars() {
if c < 0x20 as char || c > 0x7d as char || c == 0x5c as char {
require = true
}
}
(true, require)
}
}
}