Skip to main content

string_box/
string_box.rs

1use std::ffi::CStr;
2use std::ops::Range;
3use std::slice;
4use widestring::U32String;
5
6#[derive(Debug, Clone)]
7#[repr(C)]
8pub enum StringBoxOrigin {
9    Byte(Vec<u8>),
10    String,
11    Wide(U32String),
12}
13
14#[derive(Debug, Clone)]
15#[repr(u8)]
16pub enum StringBoxOriginType {
17    Byte,
18    UTF8,
19    Wide,
20}
21
22impl From<StringBoxOrigin> for StringBoxOriginType {
23    fn from(origin: StringBoxOrigin) -> StringBoxOriginType {
24        match origin {
25            StringBoxOrigin::Byte(_) => StringBoxOriginType::Byte,
26            StringBoxOrigin::String => StringBoxOriginType::UTF8,
27            StringBoxOrigin::Wide(_) => StringBoxOriginType::Wide,
28        }
29    }
30}
31
32#[derive(Debug, Clone)]
33#[repr(C)]
34pub struct StringBox {
35    origin: StringBoxOrigin,
36    string: String,
37}
38
39impl StringBox {
40    pub fn new() -> Self {
41        Self::from_string(String::new())
42    }
43
44    /// Create from Rust string
45    pub fn from_string(string: String) -> Self {
46        Self {
47            origin: StringBoxOrigin::String,
48            string,
49        }
50    }
51
52    /// Create from a wide string by copying the data
53    ///
54    /// # Safety
55    ///
56    /// `data` must be valid for reads of `length` `u32` values.
57    pub unsafe fn from_wide_string_data(data: *const u32, length: usize) -> Self {
58        let wide_string = unsafe { slice::from_raw_parts(data, length) }.to_vec();
59        Self::from_wide_string(wide_string)
60    }
61
62    /// Create from a wide string vector
63    pub fn from_wide_string(data: Vec<u32>) -> Self {
64        let wide_string = U32String::from_vec(data);
65        let string = wide_string.to_string_lossy();
66        Self {
67            origin: StringBoxOrigin::Wide(wide_string),
68            string,
69        }
70    }
71
72    /// Create from a wide string by copying the data
73    ///
74    /// # Safety
75    ///
76    /// `data` must be valid for reads of `length` `u8` values.
77    pub unsafe fn from_byte_string_data(data: *const u8, length: usize) -> Self {
78        let byte_string = unsafe { slice::from_raw_parts(data, length) }.to_vec();
79        Self::from_byte_string(byte_string)
80    }
81
82    /// Create from a byte string vector
83    pub fn from_byte_string(data: Vec<u8>) -> Self {
84        let string = data.iter().map(|&c| c as char).collect::<String>();
85        Self {
86            origin: StringBoxOrigin::Byte(data),
87            string,
88        }
89    }
90
91    /// data must be nul terminated
92    /// length does not take nul into account
93    ///
94    /// # Safety
95    ///
96    /// `data` must be valid for reads of `length + 1` bytes and end with a nul byte.
97    pub unsafe fn from_utf8_string_data(data: *const u8, length: usize) -> Self {
98        // we are not using CString::from_raw because it retakes the ownership
99        // and will drop the data. Instead we create a borrowed CStr which
100        // we later copy into a String
101        Self::from_utf8_string(unsafe { slice::from_raw_parts(data, length + 1) })
102    }
103
104    /// data must be nul terminated
105    /// length does not take nul into account
106    pub fn from_utf8_string(data: &[u8]) -> Self {
107        let string = unsafe {
108            CStr::from_bytes_with_nul_unchecked(data)
109                .to_string_lossy()
110                .into_owned()
111        };
112        Self {
113            origin: StringBoxOrigin::String,
114            string,
115        }
116    }
117
118    /// Replace the string with a given instance
119    pub fn set_string(&mut self, string: String) {
120        self.origin = StringBoxOrigin::String;
121        self.string = string;
122    }
123
124    /// Returns the length of this `String`, in bytes, not [`char`]s or
125    /// graphemes. In other words, it may not be what a human considers the
126    /// length of the string.
127    pub fn len(&self) -> usize {
128        self.string.len()
129    }
130
131    pub fn is_empty(&self) -> bool {
132        self.string.is_empty()
133    }
134
135    /// Returns the amount of [`char`]
136    pub fn char_count(&self) -> usize {
137        self.string.chars().count()
138    }
139
140    pub fn as_str(&self) -> &str {
141        self.string.as_str()
142    }
143
144    pub fn as_bytes(&self) -> &[u8] {
145        self.string.as_bytes()
146    }
147
148    pub fn as_ptr(&self) -> *const u8 {
149        self.string.as_ptr()
150    }
151
152    pub fn char_index_to_byte_range(&self, index: usize) -> Range<usize> {
153        let mut previous_byte_offset = 0_usize;
154
155        for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate()
156        {
157            if current_char_index == (index + 1) {
158                return previous_byte_offset..current_byte_offset;
159            }
160            previous_byte_offset = current_byte_offset;
161        }
162        previous_byte_offset..self.len()
163    }
164
165    pub fn char_index_to_utf16_range(&self, index: usize) -> Range<usize> {
166        let mut previous_byte_offset = 0_usize;
167        let mut previous_utf16_offset = 0_usize;
168
169        for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate()
170        {
171            let delta = (current_byte_offset - previous_byte_offset).div_ceil(2);
172            if current_char_index == (index + 1) {
173                return previous_utf16_offset..(previous_utf16_offset + delta);
174            }
175            previous_byte_offset = current_byte_offset;
176            previous_utf16_offset += delta;
177        }
178        let delta = (self.len() - previous_byte_offset).div_ceil(2);
179        previous_utf16_offset..(previous_utf16_offset + delta)
180    }
181
182    pub fn utf16_position_to_char_index(&self, index: usize) -> usize {
183        let mut previous_byte_offset = 0_usize;
184        let mut previous_utf16_offset = 0_usize;
185
186        for (current_char_index, (current_byte_offset, _)) in self.string.char_indices().enumerate()
187        {
188            let delta = (current_byte_offset - previous_byte_offset).div_ceil(2);
189            let current_utf16_offset = previous_utf16_offset + delta;
190
191            if current_utf16_offset >= index {
192                return current_char_index;
193            }
194
195            previous_byte_offset = current_byte_offset;
196            previous_utf16_offset = current_utf16_offset;
197        }
198        self.string.chars().count()
199    }
200}
201
202impl Default for StringBox {
203    fn default() -> Self {
204        Self::new()
205    }
206}
207
208impl std::fmt::Display for StringBox {
209    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
210        f.write_str(&self.string)
211    }
212}
213
214#[cfg(test)]
215mod test {
216    use super::*;
217
218    #[test]
219    pub fn test_from_wide_string() {
220        let wide_string = vec![1087u32, 1088, 1080, 1074, 1077, 1090];
221        let string = StringBox::from_wide_string(wide_string);
222
223        assert_eq!(string.to_string(), String::from("привет"));
224    }
225
226    #[test]
227    pub fn test_from_byte_string() {
228        let byte_string = vec![104u8, 101, 108, 108, 111];
229        let string = StringBox::from_byte_string(byte_string);
230
231        assert_eq!(string.to_string(), String::from("hello"));
232    }
233
234    #[test]
235    pub fn test_from_utf8_string() {
236        let utf8_string = vec![104u8, 101, 108, 108, 111, 0];
237        let string = StringBox::from_utf8_string(utf8_string.as_slice());
238
239        assert_eq!(string.to_string(), String::from("hello"));
240    }
241
242    #[test]
243    pub fn sparkle() {
244        let sparkle = String::from("💖");
245        let string_box = StringBox::from_string(sparkle.clone());
246
247        assert_eq!(sparkle.len(), 4);
248        assert_eq!(string_box.len(), 4);
249        assert_eq!(string_box.char_count(), 1);
250
251        for char in sparkle.char_indices() {
252            println!("{:?}", char);
253        }
254        println!("{:?}", sparkle.bytes());
255    }
256}