use crate::{Error, util::string_from_utf16};
use std::sync::OnceLock;
use windows_sys::Win32::Globalization::MultiByteToWideChar;
pub(crate) fn multi_byte_to_string(codepage: u32, data: &[u8]) -> Result<String, Error> {
unsafe {
let len = MultiByteToWideChar(
codepage,
0,
data.as_ptr(),
data.len() as i32,
std::ptr::null_mut(),
0,
);
if len <= 0 {
return Err(Error::decode(format!(
"MultiByteToWideChar failed for code page {codepage}"
)));
}
let mut buf: Vec<u16> = Vec::with_capacity(len as usize);
let written = MultiByteToWideChar(
codepage,
0,
data.as_ptr(),
data.len() as i32,
buf.as_mut_ptr(),
len,
);
if written <= 0 {
return Err(Error::decode(format!(
"MultiByteToWideChar failed for code page {codepage}"
)));
}
buf.set_len(written as usize);
string_from_utf16(&buf, "UTF-16 conversion failed")
}
}
const U_ZERO_ERROR: i32 = 0;
type UcnvOpenFn = unsafe extern "C" fn(name: *const u8, err: *mut i32) -> *mut core::ffi::c_void;
type UcnvToUCharsFn = unsafe extern "C" fn(
cnv: *mut core::ffi::c_void,
dest: *mut u16,
dest_capacity: i32,
src: *const u8,
src_length: i32,
err: *mut i32,
) -> i32;
type UcnvCloseFn = unsafe extern "C" fn(cnv: *mut core::ffi::c_void);
struct IcuFunctions {
ucnv_open: UcnvOpenFn,
ucnv_to_u_chars: UcnvToUCharsFn,
ucnv_close: UcnvCloseFn,
}
static ICU: OnceLock<Option<IcuFunctions>> = OnceLock::new();
fn load_icu() -> Option<IcuFunctions> {
use windows_sys::Win32::System::LibraryLoader::LoadLibraryW;
let dll_name: [u16; 8] = [
b'i' as u16,
b'c' as u16,
b'u' as u16,
b'.' as u16,
b'd' as u16,
b'l' as u16,
b'l' as u16,
0,
];
let h = unsafe { LoadLibraryW(dll_name.as_ptr()) };
if h.is_null() {
return None;
}
let open = get_proc(h, b"ucnv_open\0")?;
let to_u_chars = get_proc(h, b"ucnv_toUChars\0")?;
let close = get_proc(h, b"ucnv_close\0")?;
Some(IcuFunctions {
ucnv_open: unsafe {
std::mem::transmute::<unsafe extern "system" fn() -> isize, UcnvOpenFn>(open)
},
ucnv_to_u_chars: unsafe {
std::mem::transmute::<unsafe extern "system" fn() -> isize, UcnvToUCharsFn>(to_u_chars)
},
ucnv_close: unsafe {
std::mem::transmute::<unsafe extern "system" fn() -> isize, UcnvCloseFn>(close)
},
})
}
fn get_proc(
h: *mut core::ffi::c_void,
name: &[u8], ) -> Option<unsafe extern "system" fn() -> isize> {
use windows_sys::Win32::System::LibraryLoader::GetProcAddress;
unsafe { GetProcAddress(h, name.as_ptr()) }
}
#[cfg(test)]
pub(crate) fn is_icu_available() -> bool {
ICU.get_or_init(load_icu).is_some()
}
pub(crate) fn icu_decode(converter_name: &str, data: &[u8]) -> Result<String, Error> {
if data.is_empty() {
return Ok(String::new());
}
let icu = ICU.get_or_init(load_icu).as_ref().ok_or_else(|| {
Error::decode(format!("charset \"{converter_name}\" requires icu.dll (Windows 10 1903+)"))
})?;
let mut name_buf = Vec::with_capacity(converter_name.len() + 1);
name_buf.extend_from_slice(converter_name.as_bytes());
name_buf.push(0);
let mut open_err = U_ZERO_ERROR;
let cnv = unsafe { (icu.ucnv_open)(name_buf.as_ptr(), &mut open_err) };
if open_err > U_ZERO_ERROR || cnv.is_null() {
return Err(Error::decode(format!(
"ICU cannot open converter \"{converter_name}\" (error code {open_err})"
)));
}
struct CnvGuard {
cnv: *mut core::ffi::c_void,
close: UcnvCloseFn,
}
impl Drop for CnvGuard {
fn drop(&mut self) {
unsafe { (self.close)(self.cnv) };
}
}
let _guard = CnvGuard {
cnv,
close: icu.ucnv_close,
};
let capacity = data.len() + 1;
let mut buf: Vec<u16> = vec![0u16; capacity];
let mut conv_err = U_ZERO_ERROR;
let written = unsafe {
(icu.ucnv_to_u_chars)(
cnv,
buf.as_mut_ptr(),
capacity as i32,
data.as_ptr(),
data.len() as i32,
&mut conv_err,
)
};
if conv_err > U_ZERO_ERROR {
return Err(Error::decode(format!(
"ICU conversion failed for \"{converter_name}\" (error code {conv_err})"
)));
}
let len = written.max(0) as usize;
buf.truncate(len);
string_from_utf16(&buf, "ICU produced invalid UTF-16")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn multi_byte_to_string_table() {
let cases: &[(u32, &[u8], &str, &str)] = &[
(65001, b"hello world", "hello world", "UTF-8 ASCII"),
(1252, &[0xE9], "\u{e9}", "Windows-1252 e-acute"),
];
for &(codepage, data, expected, label) in cases {
let result = multi_byte_to_string(codepage, data)
.unwrap_or_else(|e| panic!("multi_byte_to_string({label}): {e}"));
assert_eq!(result, expected, "{label}");
}
}
#[test]
fn multi_byte_to_string_errors_table() {
let cases: &[(u32, &[u8], &str)] =
&[(99999, b"hello", "invalid code page"), (65001, b"", "empty input")];
for &(codepage, data, label) in cases {
assert!(multi_byte_to_string(codepage, data).is_err(), "{label}: should fail");
}
}
#[test]
fn load_icu_does_not_panic() {
let _ = load_icu();
}
#[test]
fn icu_decode_table() {
if !is_icu_available() {
eprintln!("skipping: icu.dll not available");
return;
}
let cases: &[(&str, &[u8], &str, &str)] = &[
("ISO-8859-10", &[0xA1, 0xA2], "\u{0104}\u{0112}", "ISO-8859-10 Ą Ē"),
("ISO-8859-14", &[0xA1, 0xD0], "\u{1E02}\u{0174}", "ISO-8859-14 Ḃ Ŵ"),
("EUC-JP", &[0xA4, 0xA2], "\u{3042}", "EUC-JP あ"),
(
"EUC-JP",
&[0xC6, 0xFC, 0xCB, 0xDC, 0xB8, 0xEC],
"\u{65E5}\u{672C}\u{8A9E}",
"EUC-JP 日本語",
),
("EUC-JP", &[0x41, 0xA4, 0xA2], "A\u{3042}", "EUC-JP mixed ASCII"),
];
for &(converter, data, expected, label) in cases {
let result =
icu_decode(converter, data).unwrap_or_else(|e| panic!("icu_decode({label}): {e}"));
assert_eq!(result, expected, "{label}");
}
}
#[test]
fn icu_decode_errors_table() {
if !is_icu_available() {
eprintln!("skipping: icu.dll not available");
return;
}
let cases: &[(&str, &[u8], &str)] = &[
("ISO-8859-16", &[0xA1], "ISO-8859-16 data stripped from icu.dll"),
("totally-bogus-encoding", &[0x41], "bogus converter name"),
];
for &(converter, data, label) in cases {
assert!(icu_decode(converter, data).is_err(), "{label}: should fail");
}
}
#[test]
fn icu_decode_empty_input() {
let result = icu_decode("ISO-8859-10", &[]).expect("empty");
assert_eq!(result, "");
}
}