1extern crate winapi;
3extern crate kernel32;
4
5use std::ptr;
6use std::io::{Error, ErrorKind, Result};
7use std::ffi::OsStr;
8use std::os::windows::ffi::OsStrExt;
9use self::winapi::{BOOL, DWORD};
10use super::Encoder;
11
12pub const MB_PRECOMPOSED: DWORD = 0x00000001;
15pub const MB_COMPOSITE: DWORD = 0x00000002;
18pub const MB_USEGLYPHCHARS: DWORD = 0x00000004;
20pub const MB_ERR_INVALID_CHARS: DWORD = 0x00000008;
22pub const WC_COMPOSITECHECK: DWORD = 0x00000200;
25pub const WC_DISCARDNS: DWORD = 0x00000010;
27pub const WC_SEPCHARS: DWORD = 0x00000020;
29pub const WC_DEFAULTCHAR: DWORD = 0x00000040;
31pub const WC_ERR_INVALID_CHARS: DWORD = 0x00000080;
33pub const WC_NO_BEST_FIT_CHARS: DWORD = 0x00000400;
36
37pub struct EncoderCodePage(pub u32);
39
40impl Encoder for EncoderCodePage {
41 fn to_string(self: &Self, data: &[u8]) -> Result<String> {
43 multi_byte_to_wide_char(self.0, MB_ERR_INVALID_CHARS, data)
44 }
45
46 fn to_bytes(self: &Self, data: &str) -> Result<Vec<u8>> {
48 string_to_multibyte(self.0, data, None)
49 }
50}
51
52pub fn string_to_multibyte(codepage: DWORD,
63 data: &str,
64 default_char: Option<u8>)
65 -> Result<Vec<u8>> {
66 let wstr: Vec<u16> = OsStr::new(data).encode_wide().collect();
67 wide_char_to_multi_byte(codepage,
68 WC_COMPOSITECHECK,
69 &wstr,
70 default_char,
71 default_char.is_none())
72 .and_then(|(data, invalid)| if invalid {
73 Err(Error::new(ErrorKind::InvalidInput,
74 "Can't convert some characters to multibyte charset"))
75 } else {
76 Ok(data)
77 })
78}
79
80pub fn multi_byte_to_wide_char(codepage: DWORD,
85 flags: DWORD,
86 multi_byte_str: &[u8])
87 -> Result<String> {
88 if multi_byte_str.len() == 0 {
90 return Ok(String::new());
91 }
92 unsafe {
93 let len = kernel32::MultiByteToWideChar(codepage,
95 flags,
96 multi_byte_str.as_ptr() as winapi::LPSTR,
97 multi_byte_str.len() as i32,
98 ptr::null_mut(),
99 0);
100 if len > 0 {
101 let mut wstr: Vec<u16> = Vec::with_capacity(len as usize);
103 wstr.set_len(len as usize);
104 let len = kernel32::MultiByteToWideChar(codepage,
105 flags,
106 multi_byte_str.as_ptr() as winapi::LPSTR,
107 multi_byte_str.len() as i32,
108 wstr.as_mut_ptr(),
109 len);
110 if len > 0 {
111 return String::from_utf16(&wstr[0..(len as usize)])
112 .map_err(|e| Error::new(ErrorKind::InvalidInput, e));
113 }
114 }
115 Err(Error::last_os_error())
116 }
117
118}
119
120pub fn wide_char_to_multi_byte(codepage: DWORD,
125 flags: DWORD,
126 wide_char_str: &[u16],
127 default_char: Option<u8>,
128 use_default_char_flag: bool)
129 -> Result<(Vec<u8>, bool)> {
130 if wide_char_str.len() == 0 {
132 return Ok((Vec::new(), false));
133 }
134 unsafe {
135 let len = kernel32::WideCharToMultiByte(codepage,
137 flags,
138 wide_char_str.as_ptr(),
139 wide_char_str.len() as i32,
140 ptr::null_mut(),
141 0,
142 ptr::null(),
143 ptr::null_mut());
144
145 if len > 0 {
146 let mut astr: Vec<u8> = Vec::with_capacity(len as usize);
148 astr.set_len(len as usize);
149 let default_char_ref: [i8; 1] = match default_char {
150 Some(c) => [c as i8],
151 None => [0],
152 };
153 let mut use_char_ref: [BOOL; 1] = [0];
154 let len = kernel32::WideCharToMultiByte(codepage,
155 flags,
156 wide_char_str.as_ptr(),
157 wide_char_str.len() as i32,
158 astr.as_mut_ptr() as winapi::LPSTR,
159 len,
160 match default_char {
161 Some(_) => default_char_ref.as_ptr(),
162 None => ptr::null(),
163 },
164 match use_default_char_flag {
165 true => use_char_ref.as_mut_ptr(),
166 false => ptr::null_mut(),
167 });
168 if (len as usize) == astr.len() {
169 return Ok((astr, use_char_ref[0] != 0));
170 }
171 if len > 0 {
172 return Ok((astr[0..(len as usize)].to_vec(), use_char_ref[0] != 0));
173 }
174 }
175 Err(Error::last_os_error())
176 }
177}
178
179#[test]
180fn multi_byte_to_wide_char_empty() {
181 assert_eq!(multi_byte_to_wide_char(winapi::CP_ACP, MB_ERR_INVALID_CHARS, b"").unwrap(),
182 "");
183}
184
185#[test]
186fn multi_byte_to_wide_char_ascii() {
187 assert_eq!(multi_byte_to_wide_char(winapi::CP_ACP, MB_ERR_INVALID_CHARS, b"Test").unwrap(),
188 "Test");
189}
190
191#[test]
192fn multi_byte_to_wide_char_utf8() {
193 assert_eq!(multi_byte_to_wide_char(winapi::CP_UTF8,
194 MB_ERR_INVALID_CHARS,
195 b"\xD0\xA2\xD0\xB5\xD1\x81\xD1\x82")
196 .unwrap(),
197 "Тест");
198}
199
200#[test]
201fn multi_byte_to_wide_char_invalid() {
202 assert!(multi_byte_to_wide_char(winapi::CP_UTF8, MB_ERR_INVALID_CHARS, b"Test\xC0").is_err());
203}
204
205#[test]
206fn wide_char_to_multi_byte_empty() {
207 assert_eq!(wide_char_to_multi_byte(winapi::CP_UTF8, WC_ERR_INVALID_CHARS, &[], None, false)
208 .unwrap(),
209 (b"".to_vec(), false));
210}
211
212#[test]
213fn wide_char_to_multi_byte_ascii() {
214 assert_eq!(wide_char_to_multi_byte(winapi::CP_ACP,
215 WC_COMPOSITECHECK,
216 &[0x0054, 0x0065, 0x0073, 0x0074],
217 None,
218 true)
219 .unwrap(),
220 (b"Test".to_vec(), false));
221}
222
223#[test]
224fn wide_char_to_multi_byte_utf8() {
225 assert_eq!(wide_char_to_multi_byte(winapi::CP_UTF8,
226 WC_ERR_INVALID_CHARS,
227 &[0x6F22],
228 None,
229 false)
230 .unwrap(),
231 (b"\xE6\xBC\xA2".to_vec(), false));
232}
233
234#[test]
235fn wide_char_to_multi_byte_replace() {
236 assert_eq!(wide_char_to_multi_byte(winapi::CP_ACP,
237 WC_DEFAULTCHAR | WC_COMPOSITECHECK,
238 &[0x0054, 0x0065, 0x0073, 0x0074, 0x6F22, 0x0029],
239 Some(b':'),
240 true)
241 .unwrap(),
242 (b"Test:)".to_vec(), true));
243}
244
245#[test]
246fn wide_char_to_multi_byte_invalid() {
247 assert_eq!(wide_char_to_multi_byte(winapi::CP_ACP,
248 WC_COMPOSITECHECK,
249 &[0x6F22],
250 Some(b':'),
251 true)
252 .unwrap(),
253 (b":".to_vec(), true));
254 assert_eq!(wide_char_to_multi_byte(winapi::CP_ACP,
255 WC_COMPOSITECHECK,
256 &[0x0020],
257 Some(b':'),
258 true)
259 .unwrap(),
260 (b" ".to_vec(), false));
261}
262
263#[cfg(test)]
264mod tests {
265 extern crate winapi;
266
267 use super::*;
268 use super::super::Encoder;
269
270 #[test]
271 fn cp1251_to_string_test() {
272 assert_eq!(EncoderCodePage(1251).to_string(b"\xD2\xE5\xF1\xF2").unwrap(),
273 "Тест");
274 }
275 #[test]
276 fn string_to_cp1251_test() {
277 assert_eq!(EncoderCodePage(1251).to_bytes("Тест").unwrap(),
278 b"\xD2\xE5\xF1\xF2");
279 }
280
281 #[test]
282 fn cp866_to_string_test() {
283 assert_eq!(EncoderCodePage(866).to_string(b"\x92\xA5\xE1\xE2").unwrap(),
284 "Тест");
285 }
286
287 #[test]
288 fn string_to_cp866_test() {
289 assert_eq!(EncoderCodePage(866).to_bytes("Тест").unwrap(),
290 b"\x92\xA5\xE1\xE2");
291 }
292}