csv_lib/decoders/
decoders.rs1use crate::encoders::big5::encode_big5;
2use crate::encoders::gbk::encode_gbk;
3use crate::encoders::shiftjis::encode_shiftjis;
4use crate::encoders::koi8r::encode_koi8r;
5use crate::encoders::windows1252::encode_windows1252;
6use crate::encoders::windows1251::encode_windows1251;
7use crate::encoders::latin9::encode_latin9;
8use crate::encoders::latin2::encode_latin2;
9use crate::encoders::latin1::encode_latin1;
10use crate::encoders::utf8::encode_utf8;
11use std::borrow::Cow;
12
13use crate::decoders::{
14 utf8::{decode_utf8},
15 latin1::{decode_latin1},
16 latin2::{decode_latin2},
17 latin9::{decode_latin9},
18 windows1251::{decode_windows1251},
19 windows1252::{decode_windows1252},
20 koi8r::{decode_koi8r},
21 shiftjis::{decode_shiftjis},
22 gbk::{decode_gbk},
23 big5::{decode_big5},
24};
25
26#[derive(Debug, Clone, Copy)]
31#[allow(dead_code)]
32pub enum Encoding {
33 Utf8,
34 Windows1252,
35 ISO8859_1,
36 ISO8859_15,
37 Windows1251,
38 KOI8R,
39 ShiftJIS,
40 GBK,
41 GB2312,
42 Big5,
43 ISO8859_2,
44}
45
46impl Encoding {
47 #[allow(dead_code)]
54 pub fn decode (self, input: &[u8]) -> Cow<'_, str> {
55 match self {
56 Encoding::Utf8 => decode_utf8(input),
57 Encoding::ISO8859_1 => decode_latin1(input),
58 Encoding::ISO8859_2 => decode_latin2(input),
59 Encoding::ISO8859_15 => decode_latin9(input),
60 Encoding::Windows1251 => decode_windows1251(input),
61 Encoding::Windows1252 => decode_windows1252(input),
62 Encoding::KOI8R => decode_koi8r(input),
63 Encoding::ShiftJIS => decode_shiftjis(input),
64 Encoding::GBK | Encoding::GB2312 => decode_gbk(input),
65 Encoding::Big5 => decode_big5(input),
66 }
67 }
68
69 #[allow(dead_code)]
80 pub fn decode_with_bom_removal(self, input: &[u8]) -> (Cow<'_, str>, bool) {
81 let mut sliced_input = input;
82 let mut bom_removed = false;
83 if input.starts_with(&[0xEF, 0xBB, 0xBF]) {
85 sliced_input = &input[3..];
86 bom_removed = true;
87 }
88
89 let decoded = self.decode(sliced_input);
90
91 if !bom_removed && decoded.starts_with('\u{FEFF}') {
92 return match decoded {
93 Cow::Borrowed(s) => (Cow::Borrowed(&s[1..]), true),
94 Cow::Owned(mut s) => {
95 s.remove(0);
96 (Cow::Owned(s), true)
97 }
98 }
99 }
100
101 (decoded, bom_removed)
102 }
103
104 #[allow(dead_code)]
109 pub fn encode (self, input: &str) -> Vec<u8> {
110 match self {
111 Encoding::Utf8 => encode_utf8(input),
112 Encoding::ISO8859_1 => encode_latin1(input),
113 Encoding::ISO8859_2 => encode_latin2(input),
114 Encoding::ISO8859_15 => encode_latin9(input),
115 Encoding::Windows1251 => encode_windows1251(input),
116 Encoding::Windows1252 => encode_windows1252(input),
117 Encoding::KOI8R => encode_koi8r(input),
118 Encoding::ShiftJIS => encode_shiftjis(input),
119 Encoding::GBK | Encoding::GB2312 => encode_gbk(input),
120 Encoding::Big5 => encode_big5(input),
121 }
122 }
123}