local_encoding_ng/
windows.rs

1//! 8-bit string converters for Windows systems.
2use std::ptr;
3use std::io::{Error, ErrorKind, Result};
4use std::ffi::OsStr;
5use std::os::windows::ffi::OsStrExt;
6use winapi::shared::minwindef::{BOOL, DWORD};
7use super::Encoder;
8#[cfg(test)]
9use winapi::um::winnls::{CP_ACP, CP_UTF8};
10use winapi::um::winnt::LPSTR;
11use winapi::um::stringapiset::{MultiByteToWideChar, WideCharToMultiByte};
12
13/// Always use precomposed characters, that is, characters having a single character value for
14/// a base or nonspacing character combination.
15pub const MB_PRECOMPOSED: DWORD = 0x00000001;
16/// Always use decomposed characters, that is, characters in which a base character and one or more
17/// nonspacing characters each have distinct code point values.
18pub const MB_COMPOSITE: DWORD = 0x00000002;
19/// Use glyph characters instead of control characters.
20pub const MB_USEGLYPHCHARS: DWORD = 0x00000004;
21/// Fail if an invalid input character is encountered.
22pub const MB_ERR_INVALID_CHARS: DWORD = 0x00000008;
23/// Convert composite characters, consisting of a base character and a nonspacing character,
24/// each with different character values.
25pub const WC_COMPOSITECHECK: DWORD = 0x00000200;
26/// Discard nonspacing characters during conversion.
27pub const WC_DISCARDNS: DWORD = 0x00000010;
28/// Default. Generate separate characters during conversion.
29pub const WC_SEPCHARS: DWORD = 0x00000020;
30/// Replace exceptions with the default character during conversion.
31pub const WC_DEFAULTCHAR: DWORD = 0x00000040;
32/// Fail if an invalid input character is encountered.
33pub const WC_ERR_INVALID_CHARS: DWORD = 0x00000080;
34/// Translate any Unicode characters that do not translate directly to multibyte equivalents to
35/// the default character specified by lpDefaultChar.
36pub const WC_NO_BEST_FIT_CHARS: DWORD = 0x00000400;
37
38/// Encoding for use WinAPI calls: MultiByteToWideChar and WideCharToMultiByte.
39pub struct EncoderCodePage(pub u32);
40
41impl Encoder for EncoderCodePage {
42    ///     Convert from bytes to string.
43    fn to_string(&self, data: &[u8]) -> Result<String> {
44        multi_byte_to_wide_char(self.0, MB_ERR_INVALID_CHARS, data)
45    }
46
47    /// Convert from string to bytes.
48    fn to_bytes(&self, data: &str) -> Result<Vec<u8>> {
49        string_to_multibyte(self.0, data, None)
50    }
51}
52
53/// Convert String to 8-bit string.
54///
55/// * `codepage`     - Code page to use in performing the conversion. This parameter can be set to
56///                    the value of any code page that is installed or available in the operating
57///                    system.
58/// * `data`         - Source string.
59/// * `default_char` - Optional character for replace to use if a character cannot be represented
60///                    in the specified code page.
61///
62/// Returns `Err` if an invalid input character is encountered and `default_char` is `None`.
63pub fn string_to_multibyte(codepage: DWORD,
64                           data: &str,
65                           default_char: Option<u8>)
66                           -> Result<Vec<u8>> {
67    let wstr: Vec<u16> = OsStr::new(data).encode_wide().collect();
68    wide_char_to_multi_byte(codepage,
69                            WC_COMPOSITECHECK,
70                            &wstr,
71                            default_char,
72                            default_char.is_none())
73        .and_then(|(data, invalid)| if invalid {
74            Err(Error::new(ErrorKind::InvalidInput,
75                           "Can't convert some characters to multibyte charset"))
76        } else {
77            Ok(data)
78        })
79}
80
81/// Wrapper for MultiByteToWideChar.
82///
83/// See https://msdn.microsoft.com/en-us/library/windows/desktop/dd319072(v=vs.85).aspx
84/// for more details.
85pub fn multi_byte_to_wide_char(codepage: DWORD,
86                               flags: DWORD,
87                               multi_byte_str: &[u8])
88                               -> Result<String> {
89    // Empty string
90    if multi_byte_str.is_empty() {
91        return Ok(String::new());
92    }
93    unsafe {
94        // Get length of UTF-16 string
95        let len = MultiByteToWideChar(codepage,
96                                                flags,
97                                                multi_byte_str.as_ptr() as LPSTR,
98                                                multi_byte_str.len() as i32,
99                                                ptr::null_mut(),
100                                                0);
101        if len > 0 {
102            // Convert to UTF-16
103            let mut wstr: Vec<u16> = Vec::with_capacity(len as usize);
104            wstr.set_len(len as usize);
105            let len = MultiByteToWideChar(codepage,
106                                                    flags,
107                                                    multi_byte_str.as_ptr() as LPSTR,
108                                                    multi_byte_str.len() as i32,
109                                                    wstr.as_mut_ptr(),
110                                                    len);
111            if len > 0 {
112                return String::from_utf16(&wstr[0..(len as usize)])
113                    .map_err(|e| Error::new(ErrorKind::InvalidInput, e));
114            }
115        }
116        Err(Error::last_os_error())
117    }
118
119}
120
121/// Wrapper for WideCharToMultiByte.
122///
123/// See https://msdn.microsoft.com/ru-ru/library/windows/desktop/dd374130(v=vs.85).aspx
124/// for more details.
125pub fn wide_char_to_multi_byte(codepage: DWORD,
126                               flags: DWORD,
127                               wide_char_str: &[u16],
128                               default_char: Option<u8>,
129                               use_default_char_flag: bool)
130                               -> Result<(Vec<u8>, bool)> {
131    // Empty string
132    if wide_char_str.is_empty() {
133        return Ok((Vec::new(), false));
134    }
135    unsafe {
136        // Get length of multibyte string
137        let len = WideCharToMultiByte(codepage,
138                                                flags,
139                                                wide_char_str.as_ptr(),
140                                                wide_char_str.len() as i32,
141                                                ptr::null_mut(),
142                                                0,
143                                                ptr::null(),
144                                                ptr::null_mut());
145
146        if len > 0 {
147            // Convert from UTF-16 to multibyte
148            let mut astr: Vec<u8> = Vec::with_capacity(len as usize);
149            astr.set_len(len as usize);
150            let default_char_ref: [i8; 1] = match default_char {
151                Some(c) => [c as i8],
152                None => [0],
153            };
154            let mut use_char_ref: [BOOL; 1] = [0];
155            let len = WideCharToMultiByte(codepage,
156                                                    flags,
157                                                    wide_char_str.as_ptr(),
158                                                    wide_char_str.len() as i32,
159                                                    astr.as_mut_ptr() as LPSTR,
160                                                    len,
161                                                    match default_char {
162                                                        Some(_) => default_char_ref.as_ptr(),
163                                                        None => ptr::null(),
164                                                    },
165                                                    match use_default_char_flag {
166                                                        true => use_char_ref.as_mut_ptr(),
167                                                        false => ptr::null_mut(),
168                                                    });
169            if (len as usize) == astr.len() {
170                return Ok((astr, use_char_ref[0] != 0));
171            }
172            if len > 0 {
173                return Ok((astr[0..(len as usize)].to_vec(), use_char_ref[0] != 0));
174            }
175        }
176        Err(Error::last_os_error())
177    }
178}
179
180#[test]
181fn multi_byte_to_wide_char_empty() {
182    assert_eq!(multi_byte_to_wide_char(CP_ACP, MB_ERR_INVALID_CHARS, b"").unwrap(),
183               "");
184}
185
186#[test]
187fn multi_byte_to_wide_char_ascii() {
188    assert_eq!(multi_byte_to_wide_char(CP_ACP, MB_ERR_INVALID_CHARS, b"Test").unwrap(),
189               "Test");
190}
191
192#[test]
193fn multi_byte_to_wide_char_utf8() {
194assert_eq!(multi_byte_to_wide_char(CP_UTF8,
195                                       MB_ERR_INVALID_CHARS,
196                                       b"\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82")
197                   .unwrap(),
198               "Тест");
199}
200
201#[test]
202fn multi_byte_to_wide_char_invalid() {
203    assert!(multi_byte_to_wide_char(CP_UTF8, MB_ERR_INVALID_CHARS, b"Test\xC0").is_err());
204}
205
206#[test]
207fn wide_char_to_multi_byte_empty() {
208    assert_eq!(wide_char_to_multi_byte(CP_UTF8, WC_ERR_INVALID_CHARS, &[], None, false)
209                   .unwrap(),
210               (b"".to_vec(), false));
211}
212
213#[test]
214fn wide_char_to_multi_byte_ascii() {
215    assert_eq!(wide_char_to_multi_byte(CP_ACP,
216                                       WC_COMPOSITECHECK,
217                                       &[0x0054, 0x0065, 0x0073, 0x0074],
218                                       None,
219                                       true)
220                   .unwrap(),
221               (b"Test".to_vec(), false));
222}
223
224#[test]
225fn wide_char_to_multi_byte_utf8() {
226    assert_eq!(wide_char_to_multi_byte(CP_UTF8,
227                                       WC_ERR_INVALID_CHARS,
228                                       &[0x6F22],
229                                       None,
230                                       false)
231                   .unwrap(),
232               (b"\xE6\xBC\xA2".to_vec(), false));
233}
234
235#[test]
236fn wide_char_to_multi_byte_replace() {
237    assert_eq!(wide_char_to_multi_byte(CP_ACP,
238                                       WC_DEFAULTCHAR | WC_COMPOSITECHECK,
239                                       &[0x0054, 0x0065, 0x0073, 0x0074, 0x6F22, 0x0029],
240                                       Some(b':'),
241                                       true)
242                   .unwrap(),
243               (b"Test:)".to_vec(), true));
244}
245
246#[test]
247fn wide_char_to_multi_byte_invalid() {
248    assert_eq!(wide_char_to_multi_byte(CP_ACP,
249                                       WC_COMPOSITECHECK,
250                                       &[0x6F22],
251                                       Some(b':'),
252                                       true)
253                   .unwrap(),
254               (b":".to_vec(), true));
255    assert_eq!(wide_char_to_multi_byte(CP_ACP,
256                                       WC_COMPOSITECHECK,
257                                       &[0x0020],
258                                       Some(b':'),
259                                       true)
260                   .unwrap(),
261               (b" ".to_vec(), false));
262}
263
264#[cfg(test)]
265mod tests {
266    extern crate winapi;
267
268    use super::*;
269    use super::super::Encoder;
270
271    #[test]
272    fn cp1251_to_string_test() {
273        assert_eq!(EncoderCodePage(1251).to_string(b"\xD2\xE5\xF1\xF2").unwrap(),
274                   "Тест");
275    }
276    #[test]
277    fn string_to_cp1251_test() {
278        assert_eq!(EncoderCodePage(1251).to_bytes("Тест").unwrap(),
279                   b"\xD2\xE5\xF1\xF2");
280    }
281
282    #[test]
283    fn cp866_to_string_test() {
284        assert_eq!(EncoderCodePage(866).to_string(b"\x92\xA5\xE1\xE2").unwrap(),
285                   "Тест");
286    }
287
288    #[test]
289    fn string_to_cp866_test() {
290        assert_eq!(EncoderCodePage(866).to_bytes("Тест").unwrap(),
291                   b"\x92\xA5\xE1\xE2");
292    }
293}