1use std::ffi::CStr;
2use std::ops::Range;
3use std::slice;
4use widestring::U32String;
5
6#[derive(Debug, Clone)]
7#[repr(C)]
8pub enum StringBoxOrigin {
9 Byte(Vec<u8>),
10 String,
11 Wide(U32String),
12}
13
14#[derive(Debug, Clone)]
15#[repr(u8)]
16pub enum StringBoxOriginType {
17 Byte,
18 UTF8,
19 Wide,
20}
21
22impl From<StringBoxOrigin> for StringBoxOriginType {
23 fn from(origin: StringBoxOrigin) -> StringBoxOriginType {
24 match origin {
25 StringBoxOrigin::Byte(_) => StringBoxOriginType::Byte,
26 StringBoxOrigin::String => StringBoxOriginType::UTF8,
27 StringBoxOrigin::Wide(_) => StringBoxOriginType::Wide,
28 }
29 }
30}
31
32#[derive(Debug, Clone)]
33#[repr(C)]
34pub struct StringBox {
35 origin: StringBoxOrigin,
36 string: String,
37}
38
39impl StringBox {
40 pub fn new() -> Self {
41 Self::from_string(String::new())
42 }
43
44 pub fn from_string(string: String) -> Self {
46 Self {
47 origin: StringBoxOrigin::String,
48 string,
49 }
50 }
51
52 pub unsafe fn from_wide_string_data(data: *const u32, length: usize) -> Self {
58 let wide_string = unsafe { slice::from_raw_parts(data, length) }.to_vec();
59 Self::from_wide_string(wide_string)
60 }
61
62 pub fn from_wide_string(data: Vec<u32>) -> Self {
64 let wide_string = U32String::from_vec(data);
65 let string = wide_string.to_string_lossy();
66 Self {
67 origin: StringBoxOrigin::Wide(wide_string),
68 string,
69 }
70 }
71
72 pub unsafe fn from_byte_string_data(data: *const u8, length: usize) -> Self {
78 let byte_string = unsafe { slice::from_raw_parts(data, length) }.to_vec();
79 Self::from_byte_string(byte_string)
80 }
81
82 pub fn from_byte_string(data: Vec<u8>) -> Self {
84 let string = data.iter().map(|&c| c as char).collect::<String>();
85 Self {
86 origin: StringBoxOrigin::Byte(data),
87 string,
88 }
89 }
90
91 pub unsafe fn from_utf8_string_data(data: *const u8, length: usize) -> Self {
98 Self::from_utf8_string(unsafe { slice::from_raw_parts(data, length + 1) })
102 }
103
104 pub fn from_utf8_string(data: &[u8]) -> Self {
107 let string = unsafe {
108 CStr::from_bytes_with_nul_unchecked(data)
109 .to_string_lossy()
110 .into_owned()
111 };
112 Self {
113 origin: StringBoxOrigin::String,
114 string,
115 }
116 }
117
118 pub fn set_string(&mut self, string: String) {
120 self.origin = StringBoxOrigin::String;
121 self.string = string;
122 }
123
124 pub fn len(&self) -> usize {
128 self.string.len()
129 }
130
131 pub fn is_empty(&self) -> bool {
132 self.string.is_empty()
133 }
134
135 pub fn char_count(&self) -> usize {
137 self.string.chars().count()
138 }
139
140 pub fn as_str(&self) -> &str {
141 self.string.as_str()
142 }
143
144 pub fn as_bytes(&self) -> &[u8] {
145 self.string.as_bytes()
146 }
147
148 pub fn as_ptr(&self) -> *const u8 {
149 self.string.as_ptr()
150 }
151
152 pub fn char_index_to_byte_range(&self, index: usize) -> Range<usize> {
153 let mut previous_byte_offset = 0_usize;
154
155 for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate() {
156 if current_char_index == (index + 1) {
157 return previous_byte_offset..current_byte_offset;
158 }
159 previous_byte_offset = current_byte_offset;
160 }
161 previous_byte_offset..self.len()
162 }
163
164 pub fn char_index_to_utf16_range(&self, index: usize) -> Range<usize> {
165 let mut previous_byte_offset = 0_usize;
166 let mut previous_utf16_offset = 0_usize;
167
168 for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate() {
169 let delta = (current_byte_offset - previous_byte_offset).div_ceil(2);
170 if current_char_index == (index + 1) {
171 return previous_utf16_offset..(previous_utf16_offset + delta);
172 }
173 previous_byte_offset = current_byte_offset;
174 previous_utf16_offset += delta;
175 }
176 let delta = (self.len() - previous_byte_offset).div_ceil(2);
177 previous_utf16_offset..(previous_utf16_offset + delta)
178 }
179
180 pub fn utf16_position_to_char_index(&self, index: usize) -> usize {
181 let mut previous_byte_offset = 0_usize;
182 let mut previous_utf16_offset = 0_usize;
183
184 for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate() {
185 let delta = (current_byte_offset - previous_byte_offset).div_ceil(2);
186 let current_utf16_offset = previous_utf16_offset + delta;
187
188 if current_utf16_offset >= index {
189 return current_char_index;
190 }
191
192 previous_byte_offset = current_byte_offset;
193 previous_utf16_offset = current_utf16_offset;
194 }
195 self.string.chars().count()
196 }
197}
198
199impl Default for StringBox {
200 fn default() -> Self {
201 Self::new()
202 }
203}
204
205impl std::fmt::Display for StringBox {
206 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
207 f.write_str(&self.string)
208 }
209}
210
211#[cfg(test)]
212mod test {
213 use super::*;
214
215 #[test]
216 pub fn test_from_wide_string() {
217 let wide_string = vec![1087u32, 1088, 1080, 1074, 1077, 1090];
218 let string = StringBox::from_wide_string(wide_string);
219
220 assert_eq!(string.to_string(), String::from("привет"));
221 }
222
223 #[test]
224 pub fn test_from_byte_string() {
225 let byte_string = vec![104u8, 101, 108, 108, 111];
226 let string = StringBox::from_byte_string(byte_string);
227
228 assert_eq!(string.to_string(), String::from("hello"));
229 }
230
231 #[test]
232 pub fn test_from_utf8_string() {
233 let utf8_string = vec![104u8, 101, 108, 108, 111, 0];
234 let string = StringBox::from_utf8_string(utf8_string.as_slice());
235
236 assert_eq!(string.to_string(), String::from("hello"));
237 }
238
239 #[test]
240 pub fn sparkle() {
241 let sparkle = String::from("💖");
242 let string_box = StringBox::from_string(sparkle.clone());
243
244 assert_eq!(sparkle.len(), 4);
245 assert_eq!(string_box.len(), 4);
246 assert_eq!(string_box.char_count(), 1);
247
248 for char in sparkle.char_indices() {
249 println!("{:?}", char);
250 }
251 println!("{:?}", sparkle.bytes());
252 }
253}