rsfbclient_core/
charset.rs

1//! Charset definitions and functions
2//!
3//! [Reference](http://www.destructor.de/firebird/charsets.htm)
4
5use encoding::{all, types::EncodingRef, DecoderTrap, EncoderTrap};
6use std::{borrow::Cow, fmt, str, str::FromStr};
7
8use crate::FbError;
9
10/// Charset definition. Used to encode/decode the
11/// strings.
12pub struct Charset {
13    /// Charset used in firebird
14    pub on_firebird: &'static str,
15
16    /// Equivalent charset used on rust code
17    pub on_rust: Option<EncodingRef>,
18}
19
20impl Charset {
21    /// Decode the bytes using the current charset
22    pub fn decode<'a, B>(&self, bytes: B) -> Result<String, FbError>
23    where
24        B: Into<Cow<'a, [u8]>>,
25    {
26        let bytes = bytes.into();
27
28        if let Some(charset) = self.on_rust {
29            charset.decode(&bytes, DecoderTrap::Strict).map_err(|e| {
30                format!(
31                    "Found column with an invalid {} string: {}",
32                    charset.name(),
33                    e
34                )
35                .into()
36            })
37        } else {
38            String::from_utf8(bytes.into_owned()).map_err(|e| e.into())
39        }
40    }
41
42    // Encode the string into bytes using the current charset
43    pub fn encode<'a, S>(&self, s: S) -> Result<Cow<'a, [u8]>, FbError>
44    where
45        S: Into<Cow<'a, str>>,
46    {
47        let s = s.into();
48
49        if let Some(charset) = self.on_rust {
50            let enc = charset.encode(&s, EncoderTrap::Strict).map_err(|e| {
51                FbError::Other(format!(
52                    "Found param with an invalid {} string: {}",
53                    charset.name(),
54                    e
55                ))
56            })?;
57
58            Ok(enc.into())
59        } else {
60            Ok(match s {
61                Cow::Owned(s) => Cow::Owned(s.into_bytes()),
62                Cow::Borrowed(s) => Cow::Borrowed(s.as_bytes()),
63            })
64        }
65    }
66}
67
68impl Clone for Charset {
69    fn clone(&self) -> Self {
70        Self {
71            on_firebird: self.on_firebird,
72            on_rust: self.on_rust,
73        }
74    }
75}
76
77impl FromStr for Charset {
78    type Err = FbError;
79
80    fn from_str(s: &str) -> Result<Self, Self::Err> {
81        match s.to_lowercase().trim().replace(['_', '-'], "").as_str() {
82            "utf8" => Ok(UTF_8),
83            "iso88591" => Ok(ISO_8859_1),
84            "iso88592" => Ok(ISO_8859_2),
85            "iso88593" => Ok(ISO_8859_3),
86            "iso88594" => Ok(ISO_8859_4),
87            "iso88595" => Ok(ISO_8859_5),
88            "iso88596" => Ok(ISO_8859_6),
89            "iso88597" => Ok(ISO_8859_7),
90            "iso885913" => Ok(ISO_8859_13),
91            "win1250" => Ok(WIN_1250),
92            "win1251" => Ok(WIN_1251),
93            "win1252" => Ok(WIN_1252),
94            "win1253" => Ok(WIN_1253),
95            "win1254" => Ok(WIN_1254),
96            "win1256" => Ok(WIN_1256),
97            "win1257" => Ok(WIN_1257),
98            "win1258" => Ok(WIN_1258),
99            "ascii" => Ok(ASCII),
100            "koi8r" => Ok(KOI8_R),
101            "koi8u" => Ok(KOI8_U),
102            "eucjp" => Ok(EUC_JP),
103            "big52003" => Ok(BIG5_2003),
104            _ => Err(FbError::from(format!(
105                "'{}' doesn't represent any charset",
106                s
107            ))),
108        }
109    }
110}
111
112impl fmt::Debug for Charset {
113    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
114        f.debug_struct("Charset")
115            .field("on_firebird", &self.on_firebird)
116            .finish()
117    }
118}
119
120impl PartialEq for Charset {
121    fn eq(&self, other: &Self) -> bool {
122        self.on_firebird == other.on_firebird
123    }
124}
125
126/// The default charset. Works in most cases
127pub const UTF_8: Charset = Charset {
128    on_firebird: "UTF8",
129    on_rust: None, // Will use the std from_utf8
130};
131
132/// Western Europe. Latin 1
133pub const ISO_8859_1: Charset = Charset {
134    on_firebird: "ISO8859_1",
135    on_rust: Some(all::ISO_8859_1),
136};
137
138/// Central Europe
139pub const ISO_8859_2: Charset = Charset {
140    on_firebird: "ISO8859_2",
141    on_rust: Some(all::ISO_8859_2),
142};
143
144/// Southern Europe
145pub const ISO_8859_3: Charset = Charset {
146    on_firebird: "ISO8859_3",
147    on_rust: Some(all::ISO_8859_3),
148};
149
150/// North European
151pub const ISO_8859_4: Charset = Charset {
152    on_firebird: "ISO8859_4",
153    on_rust: Some(all::ISO_8859_4),
154};
155
156/// Cyrillic
157pub const ISO_8859_5: Charset = Charset {
158    on_firebird: "ISO8859_5",
159    on_rust: Some(all::ISO_8859_5),
160};
161
162/// Arabic
163pub const ISO_8859_6: Charset = Charset {
164    on_firebird: "ISO8859_6",
165    on_rust: Some(all::ISO_8859_6),
166};
167
168/// Modern Greek
169pub const ISO_8859_7: Charset = Charset {
170    on_firebird: "ISO8859_7",
171    on_rust: Some(all::ISO_8859_7),
172};
173
174/// Baltic
175pub const ISO_8859_13: Charset = Charset {
176    on_firebird: "ISO8859_13",
177    on_rust: Some(all::ISO_8859_13),
178};
179
180/// Central Europe
181pub const WIN_1250: Charset = Charset {
182    on_firebird: "WIN1250",
183    on_rust: Some(all::WINDOWS_1250),
184};
185
186/// Cyrillic
187pub const WIN_1251: Charset = Charset {
188    on_firebird: "WIN1251",
189    on_rust: Some(all::WINDOWS_1251),
190};
191
192/// Western Europe, America. Latin-1 with Windows extensions. Brazilian Portuguese
193pub const WIN_1252: Charset = Charset {
194    on_firebird: "WIN1252",
195    on_rust: Some(all::WINDOWS_1252),
196};
197
198/// Modern Greek
199pub const WIN_1253: Charset = Charset {
200    on_firebird: "WIN1253",
201    on_rust: Some(all::WINDOWS_1253),
202};
203
204/// Turkish
205pub const WIN_1254: Charset = Charset {
206    on_firebird: "WIN1254",
207    on_rust: Some(all::WINDOWS_1254),
208};
209
210/// Arabic
211pub const WIN_1256: Charset = Charset {
212    on_firebird: "WIN1256",
213    on_rust: Some(all::WINDOWS_1256),
214};
215
216/// Baltic
217pub const WIN_1257: Charset = Charset {
218    on_firebird: "WIN1257",
219    on_rust: Some(all::WINDOWS_1257),
220};
221
222/// Vietnamese
223pub const WIN_1258: Charset = Charset {
224    on_firebird: "WIN1258",
225    on_rust: Some(all::WINDOWS_1258),
226};
227
228/// English
229pub const ASCII: Charset = Charset {
230    on_firebird: "ASCII",
231    on_rust: Some(all::ASCII),
232};
233
234/// Russian
235pub const KOI8_R: Charset = Charset {
236    on_firebird: "KOI8R",
237    on_rust: Some(all::KOI8_R),
238};
239
240/// Ukrainian
241pub const KOI8_U: Charset = Charset {
242    on_firebird: "KOI8U",
243    on_rust: Some(all::KOI8_U),
244};
245
246/// Japanese
247pub const EUC_JP: Charset = Charset {
248    on_firebird: "EUCJ_0208",
249    on_rust: Some(all::EUC_JP),
250};
251
252/// Chinese
253pub const BIG5_2003: Charset = Charset {
254    on_firebird: "BIG_5",
255    on_rust: Some(all::BIG5_2003),
256};