use crate::error::{Error, Result};
use std::os::windows::ffi::OsStrExt;
use std::path::Path;
#[inline]
pub fn to_wide(s: &str) -> Vec<u16> {
let mut result = Vec::with_capacity(s.len() + 1);
result.extend(s.encode_utf16());
result.push(0);
result
}
#[inline]
pub fn path_to_wide(path: &Path) -> Vec<u16> {
let os_str = path.as_os_str();
let mut result = Vec::with_capacity(os_str.len() + 1);
result.extend(os_str.encode_wide());
result.push(0);
result
}
#[inline]
pub fn from_wide(wide: &[u16]) -> Result<String> {
let len = wide.iter().position(|&c| c == 0).unwrap_or(wide.len());
String::from_utf16(&wide[..len])
.map_err(|_| Error::string_conversion("Invalid UTF-16 sequence"))
}
pub unsafe fn from_wide_ptr(ptr: *const u16) -> Result<String> {
if ptr.is_null() {
return Err(Error::null_pointer("from_wide_ptr received null pointer"));
}
let mut len = 0;
while *ptr.add(len) != 0 {
len += 1;
}
let slice = std::slice::from_raw_parts(ptr, len);
from_wide(slice)
}
#[inline]
pub fn from_wide_with_len(wide: &[u16], len: usize) -> Result<String> {
let actual_len = len.min(wide.len());
String::from_utf16(&wide[..actual_len])
.map_err(|_| Error::string_conversion("Invalid UTF-16 sequence"))
}
#[inline]
pub fn from_wide_buffer(buffer: &[u16]) -> Result<String> {
let len = buffer.iter().position(|&c| c == 0).unwrap_or(buffer.len());
String::from_utf16(&buffer[..len])
.map_err(|_| Error::string_conversion("Invalid UTF-16 sequence"))
}
#[derive(Default)]
pub struct WideStringBuilder {
buffer: Vec<u16>,
}
impl WideStringBuilder {
#[inline]
pub fn new() -> Self {
Self::default()
}
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
Self {
buffer: Vec::with_capacity(capacity),
}
}
#[inline]
pub fn push(&mut self, s: &str) -> &mut Self {
self.buffer.extend(s.encode_utf16());
self
}
#[inline]
pub fn push_char(&mut self, c: u16) -> &mut Self {
self.buffer.push(c);
self
}
#[inline]
pub fn build(mut self) -> Vec<u16> {
self.buffer.push(0);
self.buffer
}
#[inline]
pub fn clear(&mut self) {
self.buffer.clear();
}
#[inline]
pub fn build_and_clear(&mut self) -> Vec<u16> {
self.buffer.push(0);
std::mem::take(&mut self.buffer)
}
#[inline]
pub fn len(&self) -> usize {
self.buffer.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.buffer.is_empty()
}
#[inline]
pub fn capacity(&self) -> usize {
self.buffer.capacity()
}
}
const INLINE_CAP: usize = 23;
pub struct WideString {
repr: WideStringRepr,
}
enum WideStringRepr {
Inline {
buf: [u16; INLINE_CAP],
len: u8, },
Heap(Vec<u16>),
}
impl Clone for WideString {
fn clone(&self) -> Self {
match &self.repr {
WideStringRepr::Inline { buf, len } => Self {
repr: WideStringRepr::Inline {
buf: *buf,
len: *len,
},
},
WideStringRepr::Heap(vec) => Self {
repr: WideStringRepr::Heap(vec.clone()),
},
}
}
}
impl WideString {
#[inline]
pub fn new(s: &str) -> Self {
let utf16_len: usize = s.chars().map(|c| c.len_utf16()).sum();
let total_len = utf16_len + 1;
if total_len <= INLINE_CAP {
let mut buf = [0u16; INLINE_CAP];
let mut idx = 0;
for unit in s.encode_utf16() {
buf[idx] = unit;
idx += 1;
}
buf[idx] = 0; Self {
repr: WideStringRepr::Inline {
buf,
len: total_len as u8,
},
}
} else {
Self {
repr: WideStringRepr::Heap(to_wide(s)),
}
}
}
#[inline]
pub fn with_capacity(capacity: usize) -> Self {
if capacity <= INLINE_CAP {
Self {
repr: WideStringRepr::Inline {
buf: [0u16; INLINE_CAP],
len: 1, },
}
} else {
Self {
repr: WideStringRepr::Heap(Vec::with_capacity(capacity)),
}
}
}
#[inline]
pub fn from_path(path: &Path) -> Self {
let wide = path_to_wide(path);
if wide.len() <= INLINE_CAP {
let mut buf = [0u16; INLINE_CAP];
buf[..wide.len()].copy_from_slice(&wide);
Self {
repr: WideStringRepr::Inline {
buf,
len: wide.len() as u8,
},
}
} else {
Self {
repr: WideStringRepr::Heap(wide),
}
}
}
#[inline]
pub fn from_vec(vec: Vec<u16>) -> Self {
if vec.len() <= INLINE_CAP {
let mut buf = [0u16; INLINE_CAP];
buf[..vec.len()].copy_from_slice(&vec);
Self {
repr: WideStringRepr::Inline {
buf,
len: vec.len() as u8,
},
}
} else {
Self {
repr: WideStringRepr::Heap(vec),
}
}
}
#[inline]
pub fn as_ptr(&self) -> *const u16 {
match &self.repr {
WideStringRepr::Inline { buf, .. } => buf.as_ptr(),
WideStringRepr::Heap(vec) => vec.as_ptr(),
}
}
#[inline]
pub fn as_pcwstr(&self) -> windows::core::PCWSTR {
windows::core::PCWSTR::from_raw(self.as_ptr())
}
#[inline]
pub fn len(&self) -> usize {
match &self.repr {
WideStringRepr::Inline { len, .. } => (*len as usize).saturating_sub(1),
WideStringRepr::Heap(vec) => vec.len().saturating_sub(1),
}
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn is_inline(&self) -> bool {
matches!(self.repr, WideStringRepr::Inline { .. })
}
#[inline]
pub fn to_string_lossy(&self) -> String {
from_wide(self.as_slice()).unwrap_or_else(|_| String::from("�"))
}
#[inline]
pub fn as_slice(&self) -> &[u16] {
match &self.repr {
WideStringRepr::Inline { buf, len } => &buf[..*len as usize],
WideStringRepr::Heap(vec) => vec,
}
}
}
impl From<&str> for WideString {
fn from(s: &str) -> Self {
Self::new(s)
}
}
impl From<&Path> for WideString {
fn from(path: &Path) -> Self {
Self::from_path(path)
}
}
impl From<String> for WideString {
fn from(s: String) -> Self {
Self::new(&s)
}
}
impl From<Vec<u16>> for WideString {
fn from(vec: Vec<u16>) -> Self {
Self::from_vec(vec)
}
}
pub struct WideStringPool {
pool: Vec<Vec<u16>>,
max_size: usize,
max_capacity: usize,
}
impl WideStringPool {
#[inline]
pub fn new() -> Self {
Self {
pool: Vec::new(),
max_size: 16,
max_capacity: 4096,
}
}
#[inline]
pub fn with_limits(max_size: usize, max_capacity: usize) -> Self {
Self {
pool: Vec::with_capacity(max_size),
max_size,
max_capacity,
}
}
pub fn with_preallocated(count: usize, capacity: usize) -> Self {
let mut pool = Self::with_limits(count, capacity.max(4096));
for _ in 0..count {
pool.pool.push(Vec::with_capacity(capacity));
}
pool
}
#[inline]
pub fn get(&mut self, s: &str) -> PooledWideString {
let utf16_len: usize = s.chars().map(|c| c.len_utf16()).sum();
let required = utf16_len + 1;
let buffer = if let Some(idx) = self.pool.iter().position(|b| b.capacity() >= required) {
self.pool.swap_remove(idx)
} else {
Vec::with_capacity(required)
};
let mut pooled = PooledWideString { buffer };
pooled.buffer.clear();
pooled.buffer.extend(s.encode_utf16());
pooled.buffer.push(0);
pooled
}
#[inline]
pub fn get_path(&mut self, path: &Path) -> PooledWideString {
let os_str = path.as_os_str();
let required = os_str.len() + 1;
let buffer = if let Some(idx) = self.pool.iter().position(|b| b.capacity() >= required) {
self.pool.swap_remove(idx)
} else {
Vec::with_capacity(required)
};
let mut pooled = PooledWideString { buffer };
pooled.buffer.clear();
pooled.buffer.extend(os_str.encode_wide());
pooled.buffer.push(0);
pooled
}
#[inline]
pub fn put(&mut self, mut pooled: PooledWideString) {
if self.pool.len() < self.max_size && pooled.buffer.capacity() <= self.max_capacity {
pooled.buffer.clear();
self.pool.push(pooled.buffer);
}
}
#[inline]
pub fn len(&self) -> usize {
self.pool.len()
}
#[inline]
pub fn is_empty(&self) -> bool {
self.pool.is_empty()
}
#[inline]
pub fn clear(&mut self) {
self.pool.clear();
}
pub fn shrink_to(&mut self, size: usize) {
self.pool.truncate(size);
}
}
impl Default for WideStringPool {
fn default() -> Self {
Self::new()
}
}
pub struct PooledWideString {
buffer: Vec<u16>,
}
impl PooledWideString {
#[inline]
pub fn as_ptr(&self) -> *const u16 {
self.buffer.as_ptr()
}
#[inline]
pub fn as_pcwstr(&self) -> windows::core::PCWSTR {
windows::core::PCWSTR::from_raw(self.buffer.as_ptr())
}
#[inline]
pub fn len(&self) -> usize {
self.buffer.len().saturating_sub(1)
}
#[inline]
pub fn is_empty(&self) -> bool {
self.len() == 0
}
#[inline]
pub fn as_slice(&self) -> &[u16] {
&self.buffer
}
#[inline]
pub fn to_string_lossy(&self) -> String {
from_wide(&self.buffer).unwrap_or_else(|_| String::from("�"))
}
#[inline]
pub fn into_vec(self) -> Vec<u16> {
self.buffer
}
#[inline]
pub fn into_wide_string(self) -> WideString {
WideString::from_vec(self.buffer)
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_roundtrip() {
let original = "Hello, World! 🌍";
let wide = to_wide(original);
let back = from_wide(&wide).unwrap();
assert_eq!(original, back);
}
#[test]
fn test_roundtrip_control_char() {
let original = "He\x1f";
let wide = to_wide(original);
println!("Wide for 'He\\x1f': {:?}", wide);
let back = from_wide(&wide).unwrap();
assert_eq!(original, back, "Roundtrip with control char failed");
}
#[test]
fn test_roundtrip_soh() {
let original = "test\x01";
println!("Original: {:?} (len={})", original, original.len());
println!("Original bytes: {:?}", original.as_bytes());
let wide = to_wide(original);
println!("Wide: {:?} (len={})", wide, wide.len());
let back = from_wide(&wide).unwrap();
println!("Back: {:?} (len={})", back, back.len());
println!("Back bytes: {:?}", back.as_bytes());
assert_eq!(original, back, "Roundtrip with SOH failed");
}
#[test]
fn test_roundtrip_windows_path_with_control() {
let original = "C:\\Windows\\Sy{e\r\n\x01";
println!("Original bytes: {:?}", original.as_bytes());
let wide = to_wide(original);
println!("Wide: {:?}", wide);
let back = from_wide(&wide).unwrap();
println!("Back bytes: {:?}", back.as_bytes());
assert_eq!(original, back, "Roundtrip with path failed");
}
#[test]
fn test_empty_string() {
let wide = to_wide("");
assert_eq!(wide, vec![0]);
let back = from_wide(&wide).unwrap();
assert_eq!(back, "");
}
#[test]
fn test_wide_string_builder() {
let mut builder = WideStringBuilder::new();
builder.push("Hello").push(", ").push("World!");
let wide = builder.build();
let s = from_wide(&wide).unwrap();
assert_eq!(s, "Hello, World!");
}
#[test]
fn test_wide_string_sso_short() {
let ws = WideString::new("Hello");
assert!(ws.is_inline());
assert_eq!(ws.len(), 5);
assert_eq!(ws.to_string_lossy(), "Hello");
}
#[test]
fn test_wide_string_sso_exact_boundary() {
let s = "a".repeat(INLINE_CAP - 1);
let ws = WideString::new(&s);
assert!(ws.is_inline());
assert_eq!(ws.len(), INLINE_CAP - 1);
}
#[test]
fn test_wide_string_sso_over_boundary() {
let s = "a".repeat(INLINE_CAP);
let ws = WideString::new(&s);
assert!(!ws.is_inline());
assert_eq!(ws.len(), INLINE_CAP);
}
#[test]
fn test_wide_string_sso_empty() {
let ws = WideString::new("");
assert!(ws.is_inline());
assert_eq!(ws.len(), 0);
assert!(ws.is_empty());
}
#[test]
fn test_wide_string_sso_unicode() {
let ws = WideString::new("Hello 🌍"); assert!(ws.is_inline()); assert_eq!(ws.to_string_lossy(), "Hello 🌍");
}
#[test]
fn test_wide_string_clone() {
let ws1 = WideString::new("Hello");
let ws2 = ws1.clone();
assert_eq!(ws1.to_string_lossy(), ws2.to_string_lossy());
assert!(ws1.is_inline());
assert!(ws2.is_inline());
let ws3 = WideString::new(&"a".repeat(100));
let ws4 = ws3.clone();
assert_eq!(ws3.to_string_lossy(), ws4.to_string_lossy());
assert!(!ws3.is_inline());
assert!(!ws4.is_inline());
}
#[test]
fn test_wide_string_pool_basic() {
let mut pool = WideStringPool::new();
assert!(pool.is_empty());
let s1 = pool.get("Hello");
assert_eq!(s1.len(), 5);
assert_eq!(s1.to_string_lossy(), "Hello");
pool.put(s1);
assert_eq!(pool.len(), 1);
let s2 = pool.get("Hi");
assert_eq!(s2.len(), 2);
assert_eq!(pool.len(), 0);
pool.put(s2);
assert_eq!(pool.len(), 1);
}
#[test]
fn test_wide_string_pool_preallocated() {
let mut pool = WideStringPool::with_preallocated(4, 256);
assert_eq!(pool.len(), 4);
let s1 = pool.get("Test");
assert_eq!(pool.len(), 3);
pool.put(s1);
assert_eq!(pool.len(), 4);
}
#[test]
fn test_wide_string_pool_max_size() {
let mut pool = WideStringPool::with_limits(2, 1024);
let s1 = pool.get("A");
let s2 = pool.get("B");
let s3 = pool.get("C");
pool.put(s1);
pool.put(s2);
pool.put(s3);
assert_eq!(pool.len(), 2);
}
#[test]
fn test_wide_string_pool_convert_to_wide_string() {
let mut pool = WideStringPool::new();
let pooled = pool.get("Hello");
let ws = pooled.into_wide_string();
assert_eq!(ws.to_string_lossy(), "Hello");
}
#[test]
fn test_unicode_surrogate_pairs() {
let emoji = "🎉";
let wide = to_wide(emoji);
assert_eq!(wide.len(), 3); assert_eq!(wide[0], 0xD83C); assert_eq!(wide[1], 0xDF89); assert_eq!(wide[2], 0);
let back = from_wide(&wide).unwrap();
assert_eq!(back, emoji);
}
#[test]
fn test_unicode_multiple_surrogate_pairs() {
let text = "Hello 🌍🌎🌏!";
let wide = to_wide(text);
let back = from_wide(&wide).unwrap();
assert_eq!(back, text);
}
#[test]
fn test_unicode_bom() {
let with_bom = "\u{FEFF}Hello";
let wide = to_wide(with_bom);
assert_eq!(wide[0], 0xFEFF); let back = from_wide(&wide).unwrap();
assert_eq!(back, with_bom);
}
#[test]
fn test_unicode_various_scripts() {
let texts = [
"ASCII only",
"日本語テスト", "한국어 테스트", "中文测试", "Тест на русском", "Ελληνικά", "עברית", "العربية", "हिन्दी", "ไทย", ];
for text in texts {
let wide = to_wide(text);
let back = from_wide(&wide).unwrap();
assert_eq!(back, text, "Failed roundtrip for: {}", text);
}
}
#[test]
fn test_unicode_zero_width_chars() {
let text = "a\u{200D}b\u{200C}c";
let wide = to_wide(text);
let back = from_wide(&wide).unwrap();
assert_eq!(back, text);
}
#[test]
fn test_unicode_combining_characters() {
let text = "e\u{0301}";
let wide = to_wide(text);
let back = from_wide(&wide).unwrap();
assert_eq!(back, text);
}
#[test]
fn test_unicode_emoji_sequences() {
let text = "👨\u{200D}👩\u{200D}👧";
let wide = to_wide(text);
let back = from_wide(&wide).unwrap();
assert_eq!(back, text);
}
#[test]
fn test_invalid_utf16_lone_high_surrogate() {
let invalid: Vec<u16> = vec![0xD800, 0];
let result = from_wide(&invalid);
assert!(result.is_err());
}
#[test]
fn test_invalid_utf16_lone_low_surrogate() {
let invalid: Vec<u16> = vec![0xDC00, 0];
let result = from_wide(&invalid);
assert!(result.is_err());
}
#[test]
fn test_invalid_utf16_reversed_surrogates() {
let invalid: Vec<u16> = vec![0xDC00, 0xD800, 0];
let result = from_wide(&invalid);
assert!(result.is_err());
}
#[test]
fn test_wide_string_sso_with_surrogate_pairs() {
let ws = WideString::new("🎉");
assert!(ws.is_inline());
assert_eq!(ws.len(), 2); assert_eq!(ws.to_string_lossy(), "🎉");
}
}