1use std::ffi::CStr;
2use std::ops::Range;
3use std::slice;
4use widestring::U32String;
5
6#[derive(Debug, Clone)]
7#[repr(C)]
8pub enum StringBoxOrigin {
9 Byte(Vec<u8>),
10 String,
11 Wide(U32String),
12}
13
14#[derive(Debug, Clone)]
15#[repr(u8)]
16pub enum StringBoxOriginType {
17 Byte,
18 UTF8,
19 Wide,
20}
21
22impl From<StringBoxOrigin> for StringBoxOriginType {
23 fn from(origin: StringBoxOrigin) -> StringBoxOriginType {
24 match origin {
25 StringBoxOrigin::Byte(_) => StringBoxOriginType::Byte,
26 StringBoxOrigin::String => StringBoxOriginType::UTF8,
27 StringBoxOrigin::Wide(_) => StringBoxOriginType::Wide,
28 }
29 }
30}
31
32#[derive(Debug, Clone)]
33#[repr(C)]
34pub struct StringBox {
35 origin: StringBoxOrigin,
36 string: String,
37}
38
39impl StringBox {
40 pub fn new() -> Self {
41 Self::from_string(String::new())
42 }
43
44 pub fn from_string(string: String) -> Self {
46 Self {
47 origin: StringBoxOrigin::String,
48 string,
49 }
50 }
51
52 pub unsafe fn from_wide_string_data(data: *const u32, length: usize) -> Self {
58 let wide_string = unsafe { slice::from_raw_parts(data, length) }.to_vec();
59 Self::from_wide_string(wide_string)
60 }
61
62 pub fn from_wide_string(data: Vec<u32>) -> Self {
64 let wide_string = U32String::from_vec(data);
65 let string = wide_string.to_string_lossy();
66 Self {
67 origin: StringBoxOrigin::Wide(wide_string),
68 string,
69 }
70 }
71
72 pub unsafe fn from_byte_string_data(data: *const u8, length: usize) -> Self {
78 let byte_string = unsafe { slice::from_raw_parts(data, length) }.to_vec();
79 Self::from_byte_string(byte_string)
80 }
81
82 pub fn from_byte_string(data: Vec<u8>) -> Self {
84 let string = data.iter().map(|&c| c as char).collect::<String>();
85 Self {
86 origin: StringBoxOrigin::Byte(data),
87 string,
88 }
89 }
90
91 pub unsafe fn from_utf8_string_data(data: *const u8, length: usize) -> Self {
98 Self::from_utf8_string(unsafe { slice::from_raw_parts(data, length + 1) })
102 }
103
104 pub fn from_utf8_string(data: &[u8]) -> Self {
107 let string = unsafe {
108 CStr::from_bytes_with_nul_unchecked(data)
109 .to_string_lossy()
110 .into_owned()
111 };
112 Self {
113 origin: StringBoxOrigin::String,
114 string,
115 }
116 }
117
118 pub fn set_string(&mut self, string: String) {
120 self.origin = StringBoxOrigin::String;
121 self.string = string;
122 }
123
124 pub fn len(&self) -> usize {
128 self.string.len()
129 }
130
131 pub fn is_empty(&self) -> bool {
132 self.string.is_empty()
133 }
134
135 pub fn char_count(&self) -> usize {
137 self.string.chars().count()
138 }
139
140 pub fn as_str(&self) -> &str {
141 self.string.as_str()
142 }
143
144 pub fn as_bytes(&self) -> &[u8] {
145 self.string.as_bytes()
146 }
147
148 pub fn as_ptr(&self) -> *const u8 {
149 self.string.as_ptr()
150 }
151
152 pub fn char_index_to_byte_range(&self, index: usize) -> Range<usize> {
153 let mut previous_byte_offset = 0_usize;
154
155 for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate()
156 {
157 if current_char_index == (index + 1) {
158 return previous_byte_offset..current_byte_offset;
159 }
160 previous_byte_offset = current_byte_offset;
161 }
162 previous_byte_offset..self.len()
163 }
164
165 pub fn char_index_to_utf16_range(&self, index: usize) -> Range<usize> {
166 let mut previous_byte_offset = 0_usize;
167 let mut previous_utf16_offset = 0_usize;
168
169 for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate()
170 {
171 let delta = (current_byte_offset - previous_byte_offset).div_ceil(2);
172 if current_char_index == (index + 1) {
173 return previous_utf16_offset..(previous_utf16_offset + delta);
174 }
175 previous_byte_offset = current_byte_offset;
176 previous_utf16_offset += delta;
177 }
178 let delta = (self.len() - previous_byte_offset).div_ceil(2);
179 previous_utf16_offset..(previous_utf16_offset + delta)
180 }
181
182 pub fn utf16_position_to_char_index(&self, index: usize) -> usize {
183 let mut previous_byte_offset = 0_usize;
184 let mut previous_utf16_offset = 0_usize;
185
186 for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate()
187 {
188 let delta = (current_byte_offset - previous_byte_offset).div_ceil(2);
189 let current_utf16_offset = previous_utf16_offset + delta;
190
191 if current_utf16_offset >= index {
192 return current_char_index;
193 }
194
195 previous_byte_offset = current_byte_offset;
196 previous_utf16_offset = current_utf16_offset;
197 }
198 self.string.chars().count()
199 }
200}
201
202impl Default for StringBox {
203 fn default() -> Self {
204 Self::new()
205 }
206}
207
208impl std::fmt::Display for StringBox {
209 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
210 f.write_str(&self.string)
211 }
212}
213
214#[cfg(test)]
215mod test {
216 use super::*;
217
218 #[test]
219 pub fn test_from_wide_string() {
220 let wide_string = vec![1087u32, 1088, 1080, 1074, 1077, 1090];
221 let string = StringBox::from_wide_string(wide_string);
222
223 assert_eq!(string.to_string(), String::from("привет"));
224 }
225
226 #[test]
227 pub fn test_from_byte_string() {
228 let byte_string = vec![104u8, 101, 108, 108, 111];
229 let string = StringBox::from_byte_string(byte_string);
230
231 assert_eq!(string.to_string(), String::from("hello"));
232 }
233
234 #[test]
235 pub fn test_from_utf8_string() {
236 let utf8_string = vec![104u8, 101, 108, 108, 111, 0];
237 let string = StringBox::from_utf8_string(utf8_string.as_slice());
238
239 assert_eq!(string.to_string(), String::from("hello"));
240 }
241
242 #[test]
243 pub fn sparkle() {
244 let sparkle = String::from("💖");
245 let string_box = StringBox::from_string(sparkle.clone());
246
247 assert_eq!(sparkle.len(), 4);
248 assert_eq!(string_box.len(), 4);
249 assert_eq!(string_box.char_count(), 1);
250
251 for char in sparkle.char_indices() {
252 println!("{:?}", char);
253 }
254 println!("{:?}", sparkle.bytes());
255 }
256}