Skip to main content

string_box/
string_box.rs

1use std::ffi::CStr;
2use std::ops::Range;
3use std::slice;
4use widestring::U32String;
5
6#[derive(Debug, Clone)]
7#[repr(C)]
8pub enum StringBoxOrigin {
9    Byte(Vec<u8>),
10    String,
11    Wide(U32String),
12}
13
14#[derive(Debug, Clone)]
15#[repr(u8)]
16pub enum StringBoxOriginType {
17    Byte,
18    UTF8,
19    Wide,
20}
21
22impl From<StringBoxOrigin> for StringBoxOriginType {
23    fn from(origin: StringBoxOrigin) -> StringBoxOriginType {
24        match origin {
25            StringBoxOrigin::Byte(_) => StringBoxOriginType::Byte,
26            StringBoxOrigin::String => StringBoxOriginType::UTF8,
27            StringBoxOrigin::Wide(_) => StringBoxOriginType::Wide,
28        }
29    }
30}
31
32#[derive(Debug, Clone)]
33#[repr(C)]
34pub struct StringBox {
35    origin: StringBoxOrigin,
36    string: String,
37}
38
39impl StringBox {
40    pub fn new() -> Self {
41        Self::from_string(String::new())
42    }
43
44    /// Create from Rust string
45    pub fn from_string(string: String) -> Self {
46        Self {
47            origin: StringBoxOrigin::String,
48            string,
49        }
50    }
51
52    /// Create from a wide string by copying the data
53    ///
54    /// # Safety
55    ///
56    /// `data` must be valid for reads of `length` `u32` values.
57    pub unsafe fn from_wide_string_data(data: *const u32, length: usize) -> Self {
58        let wide_string = unsafe { slice::from_raw_parts(data, length) }.to_vec();
59        Self::from_wide_string(wide_string)
60    }
61
62    /// Create from a wide string vector
63    pub fn from_wide_string(data: Vec<u32>) -> Self {
64        let wide_string = U32String::from_vec(data);
65        let string = wide_string.to_string_lossy();
66        Self {
67            origin: StringBoxOrigin::Wide(wide_string),
68            string,
69        }
70    }
71
72    /// Create from a wide string by copying the data
73    ///
74    /// # Safety
75    ///
76    /// `data` must be valid for reads of `length` `u8` values.
77    pub unsafe fn from_byte_string_data(data: *const u8, length: usize) -> Self {
78        let byte_string = unsafe { slice::from_raw_parts(data, length) }.to_vec();
79        Self::from_byte_string(byte_string)
80    }
81
82    /// Create from a byte string vector
83    pub fn from_byte_string(data: Vec<u8>) -> Self {
84        let string = data.iter().map(|&c| c as char).collect::<String>();
85        Self {
86            origin: StringBoxOrigin::Byte(data),
87            string,
88        }
89    }
90
91    /// data must be nul terminated
92    /// length does not take nul into account
93    ///
94    /// # Safety
95    ///
96    /// `data` must be valid for reads of `length + 1` bytes and end with a nul byte.
97    pub unsafe fn from_utf8_string_data(data: *const u8, length: usize) -> Self {
98        // we are not using CString::from_raw because it retakes the ownership
99        // and will drop the data. Instead we create a borrowed CStr which
100        // we later copy into a String
101        Self::from_utf8_string(unsafe { slice::from_raw_parts(data, length + 1) })
102    }
103
104    /// data must be nul terminated
105    /// length does not take nul into account
106    pub fn from_utf8_string(data: &[u8]) -> Self {
107        let string = unsafe {
108            CStr::from_bytes_with_nul_unchecked(data)
109                .to_string_lossy()
110                .into_owned()
111        };
112        Self {
113            origin: StringBoxOrigin::String,
114            string,
115        }
116    }
117
118    /// Replace the string with a given instance
119    pub fn set_string(&mut self, string: String) {
120        self.origin = StringBoxOrigin::String;
121        self.string = string;
122    }
123
124    /// Returns the length of this `String`, in bytes, not [`char`]s or
125    /// graphemes. In other words, it may not be what a human considers the
126    /// length of the string.
127    pub fn len(&self) -> usize {
128        self.string.len()
129    }
130
131    pub fn is_empty(&self) -> bool {
132        self.string.is_empty()
133    }
134
135    /// Returns the amount of [`char`]
136    pub fn char_count(&self) -> usize {
137        self.string.chars().count()
138    }
139
140    pub fn as_str(&self) -> &str {
141        self.string.as_str()
142    }
143
144    pub fn as_bytes(&self) -> &[u8] {
145        self.string.as_bytes()
146    }
147
148    pub fn as_ptr(&self) -> *const u8 {
149        self.string.as_ptr()
150    }
151
152    pub fn char_index_to_byte_range(&self, index: usize) -> Range<usize> {
153        let mut previous_byte_offset = 0_usize;
154
155        for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate() {
156            if current_char_index == (index + 1) {
157                return previous_byte_offset..current_byte_offset;
158            }
159            previous_byte_offset = current_byte_offset;
160        }
161        previous_byte_offset..self.len()
162    }
163
164    pub fn char_index_to_utf16_range(&self, index: usize) -> Range<usize> {
165        let mut previous_byte_offset = 0_usize;
166        let mut previous_utf16_offset = 0_usize;
167
168        for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate() {
169            let delta = (current_byte_offset - previous_byte_offset).div_ceil(2);
170            if current_char_index == (index + 1) {
171                return previous_utf16_offset..(previous_utf16_offset + delta);
172            }
173            previous_byte_offset = current_byte_offset;
174            previous_utf16_offset += delta;
175        }
176        let delta = (self.len() - previous_byte_offset).div_ceil(2);
177        previous_utf16_offset..(previous_utf16_offset + delta)
178    }
179
180    pub fn utf16_position_to_char_index(&self, index: usize) -> usize {
181        let mut previous_byte_offset = 0_usize;
182        let mut previous_utf16_offset = 0_usize;
183
184        for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate() {
185            let delta = (current_byte_offset - previous_byte_offset).div_ceil(2);
186            let current_utf16_offset = previous_utf16_offset + delta;
187
188            if current_utf16_offset >= index {
189                return current_char_index;
190            }
191
192            previous_byte_offset = current_byte_offset;
193            previous_utf16_offset = current_utf16_offset;
194        }
195        self.string.chars().count()
196    }
197}
198
199impl Default for StringBox {
200    fn default() -> Self {
201        Self::new()
202    }
203}
204
205impl std::fmt::Display for StringBox {
206    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
207        f.write_str(&self.string)
208    }
209}
210
211#[cfg(test)]
212mod test {
213    use super::*;
214
215    #[test]
216    pub fn test_from_wide_string() {
217        let wide_string = vec![1087u32, 1088, 1080, 1074, 1077, 1090];
218        let string = StringBox::from_wide_string(wide_string);
219
220        assert_eq!(string.to_string(), String::from("привет"));
221    }
222
223    #[test]
224    pub fn test_from_byte_string() {
225        let byte_string = vec![104u8, 101, 108, 108, 111];
226        let string = StringBox::from_byte_string(byte_string);
227
228        assert_eq!(string.to_string(), String::from("hello"));
229    }
230
231    #[test]
232    pub fn test_from_utf8_string() {
233        let utf8_string = vec![104u8, 101, 108, 108, 111, 0];
234        let string = StringBox::from_utf8_string(utf8_string.as_slice());
235
236        assert_eq!(string.to_string(), String::from("hello"));
237    }
238
239    #[test]
240    pub fn sparkle() {
241        let sparkle = String::from("💖");
242        let string_box = StringBox::from_string(sparkle.clone());
243
244        assert_eq!(sparkle.len(), 4);
245        assert_eq!(string_box.len(), 4);
246        assert_eq!(string_box.char_count(), 1);
247
248        for char in sparkle.char_indices() {
249            println!("{:?}", char);
250        }
251        println!("{:?}", sparkle.bytes());
252    }
253}