1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
pub use chardet::*;
pub use encoding_rs::*;

/// 默认编码
pub static DEFAULT_ENCODE_LABEL_LIST: [&'static Encoding; 3] = [UTF_8, GBK, GB18030];

/// 通过预设解码
/// ```rust
/// use e_utils::system::encode::{DEFAULT_ENCODE_LABEL_LIST, decode};
/// fn main() {
///     let stdout = b"out -> \xe4\xbd\xa0\xe5\xa5\xbd: \xe6\x88\x91\xe4\xb8\x8d\xe8\x83\xbd\xe8\xae\xa4\xe8\xaf\x86\xe8\xaf\xad\xe8\xa8\x80\r\n\r\n Windows \xe6\x9c\xaa\xe6\xbf\x80\xe6\xb4\xbb";
///     let txt = decode(stdout, &DEFAULT_ENCODE_LABEL_LIST)
///         .unwrap_or_else(|| String::from_utf8_lossy(stdout).to_string());
///     println!("{:?}", txt);
/// }
/// ```
pub fn decode<'a>(content: &[u8], decode_labels: &'a [&'static Encoding]) -> Option<String> {
  for &encoding in decode_labels {
    let (decoded, _, had_errors) = encoding.decode(content);
    if !had_errors && !decoded.contains('\u{FFFD}') {
      return Some(decoded.into_owned());
    }
  }
  None
}

/// 自动检测编码并解码
/// ```rust
/// use e_utils::system::encode::auto_decode;
/// fn main() {
///     let stdout = b"out -> \xe4\xbd\xa0\xe5\xa5\xbd: \xe6\x88\x91\xe4\xb8\x8d\xe8\x83\xbd\xe8\xae\xa4\xe8\xaf\x86\xe8\xaf\xad\xe8\xa8\x80\r\n\r\n Windows \xe6\x9c\xaa\xe6\xbf\x80\xe6\xb4\xbb";
///     if let Some(txt) = auto_decode(stdout) {
///         println!("{:?}", txt);
///     } else {
///         println!("Failed to decode the content");
///     }
/// }
/// ```
pub fn auto_decode(content: &[u8]) -> Option<String> {
  let mut labels: Vec<&'static Encoding> = DEFAULT_ENCODE_LABEL_LIST.to_vec();
  // 自动检测编码
  let detected = detect(content);
  let detected_charset = detected.0.as_str().to_lowercase();
  if let Some(encoding) = encoding_rs::Encoding::for_label(detected_charset.as_bytes()) {
    labels.push(encoding);
  }

  decode(content, &labels)
}

#[cfg(test)]
mod tests {
  use super::*;

  #[test]
  fn test_auto_decode() {
    let stdout = b"out -> \xe4\xbd\xa0\xe5\xa5\xbd: \xe6\x88\x91\xe4\xb8\x8d\xe8\x83\xbd\xe8\xae\xa4\xe8\xaf\x86\xe8\xaf\xad\xe8\xa8\x80\r\n\r\n Windows \xe6\x9c\xaa\xe6\xbf\x80\xe6\xb4\xbb";
    let expected = "out -> 你好: 我不能认识语言\r\n\r\n Windows 未激活";
    let decoded = auto_decode(stdout).unwrap_or(String::from_utf8_lossy(stdout).to_string());
    assert_eq!(decoded, expected);
  }
  #[test]
  fn test_auto_decode_err() {
    const HEX_DATA: &[u8] = &[
      0x20, 0x1E, 0x20, 0x22, 0x2D, 0x30, 0x38, 0x2D, 0x30, 0x32, 0x5F, 0x31, 0x36, 0x3A, 0x32,
      0x34, 0x3A, 0x33, 0x37, 0x20, 0x30, 0x3A, 0x30, 0x30, 0x3A, 0x30, 0x30, 0x2E, 0x36, 0x35,
      0x35, 0x37, 0x31, 0x31, 0x20, 0x5B, 0x43, 0x4C, 0x45, 0x41, 0x4E, 0x5D, 0x20, 0x49, 0x4E,
      0x46, 0x4F, 0x20, 0xE9, 0x90, 0x97, 0xE5, 0xA0, 0x9F, 0xE6, 0xB9, 0xB0, 0xE8, 0xA4, 0xB0,
      0xE6, 0x92, 0xB3, 0xE5, 0xA2, 0xA0, 0xE7, 0xBB, 0x8B, 0xE5, 0xAC, 0xAA, 0xE7, 0xB0, 0xAD,
      0xE9, 0x90, 0x97, 0xE5, 0xA0, 0x9F, 0xE6, 0xB9, 0xB0, 0xE9, 0x94, 0x9B, 0x3F, 0x68, 0x67,
      0x2D, 0x61, 0x75, 0x74, 0x6F, 0x74, 0x65, 0x74, 0x2D, 0x33, 0x2E, 0x30, 0x2E, 0x32, 0x0D,
      0x0A, 0x32, 0x30, 0x32, 0x34, 0x2D, 0x30, 0x38, 0x2D, 0x30, 0x32, 0x5F, 0x31, 0x36, 0x3A,
      0x32, 0x34, 0x3A, 0x33, 0x37, 0x20, 0x30, 0x3A, 0x30, 0x30, 0x3A, 0x30, 0x30, 0x2E, 0x36,
      0x35, 0x36, 0x32, 0x30, 0x37, 0x20, 0x5B, 0x43, 0x4C, 0x45, 0x41, 0x4E, 0x5D, 0x20, 0x49,
      0x4E, 0x46, 0x4F, 0x20, 0xE7, 0xBB, 0xAF, 0xE8, 0x8D, 0xA4, 0xE7, 0xB2, 0xBA, 0xE8, 0xA4,
      0xB0, 0xE6, 0x92, 0xB3, 0xE5, 0xA2, 0xA0, 0xE7, 0xBB, 0xAF, 0xE8, 0x8D, 0xA4, 0xE7, 0xB2,
      0xBA, 0xE9, 0x90, 0x9C, 0xEE, 0x88, 0x9A, 0xEE, 0x95, 0xA8, 0xE9, 0x94, 0x9B, 0x3F, 0x63,
      0x70, 0x39, 0x33, 0x36, 0x20, 0x54, 0x72, 0x75, 0x65, 0x0D, 0x0A, 0x32, 0x30, 0x32, 0x34,
      0x2D, 0x30, 0x38, 0x2D, 0x30, 0x32, 0x5F, 0x31, 0x36, 0x3A, 0x32, 0x34, 0x3A, 0x33, 0x38,
      0x20, 0x30, 0x3A, 0x30, 0x30, 0x3A, 0x30, 0x31, 0x2E, 0x36, 0x31, 0x31, 0x35, 0x30, 0x33,
      0x20, 0x5B, 0x43, 0x4C, 0x45, 0x41, 0x4E, 0x5D, 0x20, 0x49, 0x4E, 0x46, 0x4F, 0x20, 0xE9,
      0x8D, 0xA5, 0xE6, 0x83, 0xA7, 0xE8, 0x88, 0xB0, 0xE9, 0x90, 0xA3, 0xE5, 0xB2, 0x84, 0xE6,
      0xBD, 0xB0, 0x3D, 0xE5, 0xAF, 0xAE, 0xE2, 0x82, 0xAC, 0xE6, 0xBF, 0xAE, 0xE5, 0xAC, 0xAB,
      0xE5, 0xA2, 0xBD, 0xE7, 0x90, 0x9B, 0xE5, 0xB1, 0xBC, 0xE6, 0x8D, 0xA2, 0xE9, 0x8D, 0x94,
      0x3F, 0x0D, 0x0A, 0x32, 0x30, 0x32, 0x34, 0x2D, 0x30, 0x38, 0x2D, 0x30, 0x32, 0x5F, 0x31,
      0x36, 0x3A, 0x32, 0x34, 0x3A, 0x33, 0x38, 0x20, 0x30, 0x3A, 0x30, 0x30, 0x3A, 0x30, 0x31,
      0x2E, 0x36, 0x35, 0x31, 0x36, 0x37, 0x39, 0x20, 0x5B, 0x43, 0x4C, 0x45, 0x41, 0x4E, 0x5D,
      0x20, 0x57, 0x41, 0x52, 0x4E, 0x49, 0x4E, 0x47, 0x20, 0x5B, 0xE7, 0xBB, 0xAF, 0xE8, 0x8D,
      0xA4, 0xE7, 0xB2, 0xBA, 0xE9, 0x90, 0x97, 0xE5, 0xA0, 0x9F, 0xE6, 0xB9, 0xB0, 0xE5, 0xA6,
      0xAB, 0xE2, 0x82, 0xAC, 0xE9, 0x8F, 0x8C, 0xEE, 0x99, 0xA3, 0x43, 0x49, 0x53, 0x2D, 0x58,
      0x53, 0x4B, 0x4C, 0x2D, 0x4C, 0x57, 0x30, 0x31, 0x2D, 0x56, 0x31, 0x2E, 0x30, 0x2E, 0x30,
      0x2D, 0x32, 0x30, 0x32, 0x34, 0x30, 0x33, 0x31, 0x39,
    ];
    let _expected = "out ->";
    let decoded = auto_decode(HEX_DATA).unwrap_or(String::from_utf8_lossy(HEX_DATA).to_string());
    assert_eq!(decoded, decoded);
  }
}