1use std::ptr;
3use std::io::{Error, ErrorKind, Result};
4use std::ffi::OsStr;
5use std::os::windows::ffi::OsStrExt;
6use winapi::shared::minwindef::{BOOL, DWORD};
7use super::Encoder;
8#[cfg(test)]
9use winapi::um::winnls::{CP_ACP, CP_UTF8};
10use winapi::um::winnt::LPSTR;
11use winapi::um::stringapiset::{MultiByteToWideChar, WideCharToMultiByte};
12
13pub const MB_PRECOMPOSED: DWORD = 0x00000001;
16pub const MB_COMPOSITE: DWORD = 0x00000002;
19pub const MB_USEGLYPHCHARS: DWORD = 0x00000004;
21pub const MB_ERR_INVALID_CHARS: DWORD = 0x00000008;
23pub const WC_COMPOSITECHECK: DWORD = 0x00000200;
26pub const WC_DISCARDNS: DWORD = 0x00000010;
28pub const WC_SEPCHARS: DWORD = 0x00000020;
30pub const WC_DEFAULTCHAR: DWORD = 0x00000040;
32pub const WC_ERR_INVALID_CHARS: DWORD = 0x00000080;
34pub const WC_NO_BEST_FIT_CHARS: DWORD = 0x00000400;
37
38pub struct EncoderCodePage(pub u32);
40
41impl Encoder for EncoderCodePage {
42 fn to_string(&self, data: &[u8]) -> Result<String> {
44 multi_byte_to_wide_char(self.0, MB_ERR_INVALID_CHARS, data)
45 }
46
47 fn to_bytes(&self, data: &str) -> Result<Vec<u8>> {
49 string_to_multibyte(self.0, data, None)
50 }
51}
52
53pub fn string_to_multibyte(codepage: DWORD,
64 data: &str,
65 default_char: Option<u8>)
66 -> Result<Vec<u8>> {
67 let wstr: Vec<u16> = OsStr::new(data).encode_wide().collect();
68 wide_char_to_multi_byte(codepage,
69 WC_COMPOSITECHECK,
70 &wstr,
71 default_char,
72 default_char.is_none())
73 .and_then(|(data, invalid)| if invalid {
74 Err(Error::new(ErrorKind::InvalidInput,
75 "Can't convert some characters to multibyte charset"))
76 } else {
77 Ok(data)
78 })
79}
80
81pub fn multi_byte_to_wide_char(codepage: DWORD,
86 flags: DWORD,
87 multi_byte_str: &[u8])
88 -> Result<String> {
89 if multi_byte_str.is_empty() {
91 return Ok(String::new());
92 }
93 unsafe {
94 let len = MultiByteToWideChar(codepage,
96 flags,
97 multi_byte_str.as_ptr() as LPSTR,
98 multi_byte_str.len() as i32,
99 ptr::null_mut(),
100 0);
101 if len > 0 {
102 let mut wstr: Vec<u16> = Vec::with_capacity(len as usize);
104 wstr.set_len(len as usize);
105 let len = MultiByteToWideChar(codepage,
106 flags,
107 multi_byte_str.as_ptr() as LPSTR,
108 multi_byte_str.len() as i32,
109 wstr.as_mut_ptr(),
110 len);
111 if len > 0 {
112 return String::from_utf16(&wstr[0..(len as usize)])
113 .map_err(|e| Error::new(ErrorKind::InvalidInput, e));
114 }
115 }
116 Err(Error::last_os_error())
117 }
118
119}
120
121pub fn wide_char_to_multi_byte(codepage: DWORD,
126 flags: DWORD,
127 wide_char_str: &[u16],
128 default_char: Option<u8>,
129 use_default_char_flag: bool)
130 -> Result<(Vec<u8>, bool)> {
131 if wide_char_str.is_empty() {
133 return Ok((Vec::new(), false));
134 }
135 unsafe {
136 let len = WideCharToMultiByte(codepage,
138 flags,
139 wide_char_str.as_ptr(),
140 wide_char_str.len() as i32,
141 ptr::null_mut(),
142 0,
143 ptr::null(),
144 ptr::null_mut());
145
146 if len > 0 {
147 let mut astr: Vec<u8> = Vec::with_capacity(len as usize);
149 astr.set_len(len as usize);
150 let default_char_ref: [i8; 1] = match default_char {
151 Some(c) => [c as i8],
152 None => [0],
153 };
154 let mut use_char_ref: [BOOL; 1] = [0];
155 let len = WideCharToMultiByte(codepage,
156 flags,
157 wide_char_str.as_ptr(),
158 wide_char_str.len() as i32,
159 astr.as_mut_ptr() as LPSTR,
160 len,
161 match default_char {
162 Some(_) => default_char_ref.as_ptr(),
163 None => ptr::null(),
164 },
165 match use_default_char_flag {
166 true => use_char_ref.as_mut_ptr(),
167 false => ptr::null_mut(),
168 });
169 if (len as usize) == astr.len() {
170 return Ok((astr, use_char_ref[0] != 0));
171 }
172 if len > 0 {
173 return Ok((astr[0..(len as usize)].to_vec(), use_char_ref[0] != 0));
174 }
175 }
176 Err(Error::last_os_error())
177 }
178}
179
180#[test]
181fn multi_byte_to_wide_char_empty() {
182 assert_eq!(multi_byte_to_wide_char(CP_ACP, MB_ERR_INVALID_CHARS, b"").unwrap(),
183 "");
184}
185
186#[test]
187fn multi_byte_to_wide_char_ascii() {
188 assert_eq!(multi_byte_to_wide_char(CP_ACP, MB_ERR_INVALID_CHARS, b"Test").unwrap(),
189 "Test");
190}
191
192#[test]
193fn multi_byte_to_wide_char_utf8() {
194assert_eq!(multi_byte_to_wide_char(CP_UTF8,
195 MB_ERR_INVALID_CHARS,
196 b"\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82")
197 .unwrap(),
198 "Тест");
199}
200
201#[test]
202fn multi_byte_to_wide_char_invalid() {
203 assert!(multi_byte_to_wide_char(CP_UTF8, MB_ERR_INVALID_CHARS, b"Test\xC0").is_err());
204}
205
206#[test]
207fn wide_char_to_multi_byte_empty() {
208 assert_eq!(wide_char_to_multi_byte(CP_UTF8, WC_ERR_INVALID_CHARS, &[], None, false)
209 .unwrap(),
210 (b"".to_vec(), false));
211}
212
213#[test]
214fn wide_char_to_multi_byte_ascii() {
215 assert_eq!(wide_char_to_multi_byte(CP_ACP,
216 WC_COMPOSITECHECK,
217 &[0x0054, 0x0065, 0x0073, 0x0074],
218 None,
219 true)
220 .unwrap(),
221 (b"Test".to_vec(), false));
222}
223
224#[test]
225fn wide_char_to_multi_byte_utf8() {
226 assert_eq!(wide_char_to_multi_byte(CP_UTF8,
227 WC_ERR_INVALID_CHARS,
228 &[0x6F22],
229 None,
230 false)
231 .unwrap(),
232 (b"\xE6\xBC\xA2".to_vec(), false));
233}
234
235#[test]
236fn wide_char_to_multi_byte_replace() {
237 assert_eq!(wide_char_to_multi_byte(CP_ACP,
238 WC_DEFAULTCHAR | WC_COMPOSITECHECK,
239 &[0x0054, 0x0065, 0x0073, 0x0074, 0x6F22, 0x0029],
240 Some(b':'),
241 true)
242 .unwrap(),
243 (b"Test:)".to_vec(), true));
244}
245
246#[test]
247fn wide_char_to_multi_byte_invalid() {
248 assert_eq!(wide_char_to_multi_byte(CP_ACP,
249 WC_COMPOSITECHECK,
250 &[0x6F22],
251 Some(b':'),
252 true)
253 .unwrap(),
254 (b":".to_vec(), true));
255 assert_eq!(wide_char_to_multi_byte(CP_ACP,
256 WC_COMPOSITECHECK,
257 &[0x0020],
258 Some(b':'),
259 true)
260 .unwrap(),
261 (b" ".to_vec(), false));
262}
263
264#[cfg(test)]
265mod tests {
266 extern crate winapi;
267
268 use super::*;
269 use super::super::Encoder;
270
271 #[test]
272 fn cp1251_to_string_test() {
273 assert_eq!(EncoderCodePage(1251).to_string(b"\xD2\xE5\xF1\xF2").unwrap(),
274 "Тест");
275 }
276 #[test]
277 fn string_to_cp1251_test() {
278 assert_eq!(EncoderCodePage(1251).to_bytes("Тест").unwrap(),
279 b"\xD2\xE5\xF1\xF2");
280 }
281
282 #[test]
283 fn cp866_to_string_test() {
284 assert_eq!(EncoderCodePage(866).to_string(b"\x92\xA5\xE1\xE2").unwrap(),
285 "Тест");
286 }
287
288 #[test]
289 fn string_to_cp866_test() {
290 assert_eq!(EncoderCodePage(866).to_bytes("Тест").unwrap(),
291 b"\x92\xA5\xE1\xE2");
292 }
293}