jni/wrapper/java_vm/init_args/
char_encoding_windows.rs1use super::{char_encoding_generic::*, JvmError};
2use std::{
3 borrow::Cow,
4 convert::TryInto,
5 ffi::{c_int, c_uint, CStr},
6 io,
7 mem::MaybeUninit,
8 ptr,
9};
10use windows_sys::Win32::Globalization as winnls;
11
12type WSize = c_int;
14
15type WCodepage = c_uint;
17
18const MAX_INPUT_LEN: usize = 1048576;
30
31pub(super) fn str_to_cstr_win32<'a>(
33 s: Cow<'a, str>,
34 needed_codepage: WCodepage,
35) -> Result<Cow<'static, CStr>, JvmError> {
36 if s.len() > MAX_INPUT_LEN {
38 return Err(JvmError::OptStringTooLong {
39 opt_string: s.into_owned(),
40 });
41 }
42
43 fn convert_error(s: Cow<str>) -> JvmError {
45 JvmError::OptStringTranscodeFailure {
46 opt_string: s.into_owned(),
47 error: io::Error::last_os_error(),
48 }
49 }
50
51 let s_utf16: Vec<u16> = s.encode_utf16().collect();
53
54 let s_utf16_len: WSize = s_utf16
57 .len()
58 .try_into()
59 .expect("UTF-16 form of input string is too long");
60
61 let conversion_flags = match needed_codepage {
63 42
66 | 50220
67 | 50221
68 | 50222
69 | 50225
70 | 50227
71 | 50229
72 | 54936
73 | 57002..=57011
74 | 65000
75 | 65001 => 0,
76
77 _ => winnls::WC_COMPOSITECHECK | winnls::WC_NO_BEST_FIT_CHARS,
78 };
79
80 let mut is_non_representable: Option<MaybeUninit<_>> = match needed_codepage {
83 winnls::CP_UTF7 | winnls::CP_UTF8 => None,
87 _ => Some(MaybeUninit::uninit()),
88 };
89
90 let required_buffer_space = unsafe {
94 winnls::WideCharToMultiByte(
95 needed_codepage,
96 conversion_flags,
97 s_utf16.as_ptr(),
98 s_utf16_len,
99 ptr::null_mut(),
100 0,
101 ptr::null(),
102 match &mut is_non_representable {
103 Some(x) => x.as_mut_ptr(),
104 None => ptr::null_mut(),
105 },
106 )
107 };
108
109 if required_buffer_space == 0 {
111 drop(s_utf16);
112
113 return Err(convert_error(s));
114 }
115
116 if let Some(is_non_representable) = is_non_representable {
118 let is_non_representable = unsafe { is_non_representable.assume_init() };
120
121 if is_non_representable != 0 {
122 drop(s_utf16);
123
124 return Err(JvmError::OptStringNotRepresentable {
125 opt_string: s.into_owned(),
126 });
127 }
128 }
129
130 let required_buffer_space_usize: usize = required_buffer_space as _;
136 let required_buffer_space_usize_with_nul: usize = required_buffer_space_usize + 1;
137
138 let mut output = Vec::<u8>::with_capacity(required_buffer_space_usize_with_nul);
140
141 let used_buffer_space = unsafe {
147 winnls::WideCharToMultiByte(
148 needed_codepage,
149 conversion_flags,
150 s_utf16.as_ptr(),
151 s_utf16_len,
152 output.as_mut_ptr(),
153 required_buffer_space,
154 ptr::null(),
155 ptr::null_mut(),
156 )
157 };
158
159 drop(s_utf16);
160
161 if used_buffer_space == 0 {
163 drop(output);
164
165 return Err(convert_error(s));
166 }
167
168 let used_buffer_space_usize: usize = used_buffer_space as usize;
169
170 unsafe {
177 output.set_len(used_buffer_space_usize);
178 }
179
180 unsafe { bytes_to_cstr(Cow::Owned(output), Some(s.into())) }
183}
184
185pub(super) fn str_to_cstr_win32_default_codepage<'a>(
187 s: Cow<'a, str>,
188) -> Result<Cow<'a, CStr>, JvmError> {
189 let needed_codepage = unsafe { winnls::GetACP() };
195
196 if needed_codepage == winnls::CP_UTF8 {
197 return utf8_to_cstr(s);
199 }
200
201 str_to_cstr_win32(s, needed_codepage)
203}
204
205#[cfg(test)]
208fn codepage_to_string_win32(
209 codepage_string: impl AsRef<[u8]>,
210 codepage: WCodepage,
211 max_expected_utf16_len: WSize,
212) -> io::Result<String> {
213 let codepage_string_slice = codepage_string.as_ref();
214
215 let codepage_string_slice_len: WSize = codepage_string_slice
216 .len()
217 .try_into()
218 .expect("`codepage_string`'s length is too large to transcode with Win32");
219
220 let mut buf = Vec::<u16>::with_capacity(
221 max_expected_utf16_len
222 .try_into()
223 .expect("expected_utf16_len is negative or exceeds address space"),
224 );
225
226 let utf16_units_transcoded = unsafe {
228 winnls::MultiByteToWideChar(
229 codepage,
230 0,
231 codepage_string_slice.as_ptr() as *const _,
232 codepage_string_slice_len,
233 buf.as_mut_ptr(),
234 max_expected_utf16_len,
235 )
236 };
237
238 if utf16_units_transcoded == 0 {
239 return Err(io::Error::last_os_error());
240 }
241
242 unsafe {
244 buf.set_len(utf16_units_transcoded as _);
245 }
246
247 drop(codepage_string);
248
249 let string =
250 String::from_utf16(buf.as_slice()).expect("`MultiByteToWideChar` generated invalid UTF-16");
251
252 Ok(string)
253}
254
255#[test]
256fn test() {
257 use assert_matches::assert_matches;
258
259 {
260 let result = str_to_cstr_win32("Hello, world 😎".into(), winnls::CP_UTF8).unwrap();
261 assert_eq!(
262 result.to_bytes_with_nul(),
263 b"Hello, world \xf0\x9f\x98\x8e\0"
264 );
265 assert_matches!(result, Cow::Owned(_));
266 }
267
268 {
269 let result = str_to_cstr_win32("Hello, world 😎\0".into(), winnls::CP_UTF8).unwrap();
270 assert_eq!(
271 result.to_bytes_with_nul(),
272 b"Hello, world \xf0\x9f\x98\x8e\0"
273 );
274 }
275
276 {
277 let result = str_to_cstr_win32("Hello, world 😎".into(), 1252).unwrap_err();
278 let error_string = assert_matches!(result, JvmError::OptStringNotRepresentable { opt_string } => opt_string);
279 assert_eq!(error_string, "Hello, world 😎");
280 }
281
282 {
283 let result = str_to_cstr_win32("Hello, worldâ„¢".into(), 1252).unwrap();
284 assert_eq!(result.to_bytes_with_nul(), b"Hello, world\x99\0");
285 assert_matches!(result, Cow::Owned(_));
286 }
287}
288
289#[test]
290fn test_overflow() {
291 use assert_matches::assert_matches;
292
293 #[track_caller]
298 fn check_and_clear_error_opt_string(expected_opt_string: &str, error: &mut JvmError) {
299 if let Some(actual_opt_string) = error.opt_string_mut() {
300 if actual_opt_string != expected_opt_string {
301 panic!("opt_string was mangled in moving it to an error");
302 }
303
304 *actual_opt_string = String::new();
305 }
306 }
307
308 #[track_caller]
309 fn expect_success(
310 expected_opt_string: &str,
311 result: Result<Cow<'static, CStr>, JvmError>,
312 ) -> Cow<'static, CStr> {
313 match result {
314 Ok(ok) => ok,
315 Err(mut error) => {
316 check_and_clear_error_opt_string(expected_opt_string, &mut error);
317 panic!("unexpected transcoding failure: {}", error)
318 }
319 }
320 }
321
322 #[track_caller]
323 fn expect_successful_roundtrip(
324 expected_opt_string: &str,
325 result: Result<Cow<'static, CStr>, JvmError>,
326 ) -> Cow<'static, CStr> {
327 let string = expect_success(expected_opt_string, result);
328 assert!(
329 expected_opt_string.as_bytes() == string.to_bytes(),
330 "opt_string was transcoded successfully but mangled"
331 );
332 string
333 }
334
335 #[track_caller]
336 fn expect_opt_string_too_long(
337 expected_opt_string: &str,
338 result: Result<Cow<'static, CStr>, JvmError>,
339 ) {
340 let mut error = match result {
341 Err(err) => err,
342 Ok(ok) => {
343 assert!(
344 expected_opt_string.as_bytes() == ok.to_bytes(),
345 "transcoding unexpectedly succeeded and resulted in mangled output"
346 );
347 panic!("transcoding unexpectedly succeeded")
348 }
349 };
350
351 check_and_clear_error_opt_string(expected_opt_string, &mut error);
352
353 assert_matches!(error, JvmError::OptStringTooLong { .. });
354 }
355
356 {
357 let string = vec![b'H'; MAX_INPUT_LEN.checked_add(1).unwrap()];
363
364 let mut string = String::from_utf8(string).unwrap();
366
367 expect_opt_string_too_long(
371 &string,
372 str_to_cstr_win32(string.as_str().into(), winnls::CP_UTF8),
373 );
374
375 assert_eq!(string.pop(), Some('H'));
377
378 expect_successful_roundtrip(
381 &string,
382 str_to_cstr_win32(string.as_str().into(), winnls::CP_UTF8),
383 );
384 }
385
386 {
387 let string_byte_pairs = vec![u16::from_be(0xdfbf); MAX_INPUT_LEN / 2];
395
396 let string: &str =
398 std::str::from_utf8(bytemuck::cast_slice(string_byte_pairs.as_slice())).unwrap();
399
400 expect_successful_roundtrip(string, str_to_cstr_win32(string.into(), winnls::CP_UTF8));
403
404 {
408 let result = expect_success(string, str_to_cstr_win32(string.into(), winnls::CP_UTF7));
410
411 let result: String = codepage_to_string_win32(
414 result.to_bytes(),
415 winnls::CP_UTF7,
416 (string.len() / 2).try_into().unwrap(),
417 )
418 .unwrap();
419
420 assert!(result == string, "didn't roundtrip via UTF-7");
421 }
422 }
423
424 {
425 let string_byte_pairs = vec![u16::from_be(0xc2ae); MAX_INPUT_LEN / 2];
429
430 let string: &str =
432 std::str::from_utf8(bytemuck::cast_slice(string_byte_pairs.as_slice())).unwrap();
433
434 let result = expect_success(string, str_to_cstr_win32(string.into(), 1252));
436
437 assert!(
439 result.to_bytes().iter().all(|byte| *byte == 0xae),
440 "string didn't transcode to Windows-1252 properly"
441 );
442 }
443}