string_box/
string_box.rs

1use std::ffi::CStr;
2use std::ops::Range;
3use std::slice;
4use widestring::U32String;
5
6#[derive(Debug, Clone)]
7#[repr(C)]
8pub enum StringBoxOrigin {
9    Byte(Vec<u8>),
10    String,
11    Wide(U32String),
12}
13
14#[derive(Debug, Clone)]
15#[repr(u8)]
16pub enum StringBoxOriginType {
17    Byte,
18    UTF8,
19    Wide,
20}
21
22impl From<StringBoxOrigin> for StringBoxOriginType {
23    fn from(origin: StringBoxOrigin) -> StringBoxOriginType {
24        match origin {
25            StringBoxOrigin::Byte(_) => StringBoxOriginType::Byte,
26            StringBoxOrigin::String => StringBoxOriginType::UTF8,
27            StringBoxOrigin::Wide(_) => StringBoxOriginType::Wide,
28        }
29    }
30}
31
32#[derive(Debug, Clone)]
33#[repr(C)]
34pub struct StringBox {
35    origin: StringBoxOrigin,
36    string: String,
37}
38
39impl StringBox {
40    pub fn new() -> Self {
41        Self::from_string(String::new())
42    }
43
44    /// Create from Rust string
45    pub fn from_string(string: String) -> Self {
46        Self {
47            origin: StringBoxOrigin::String,
48            string,
49        }
50    }
51
52    /// Create from a wide string by copying the data
53    pub unsafe fn from_wide_string_data(data: *const u32, length: usize) -> Self {
54        let wide_string = slice::from_raw_parts(data, length).to_vec();
55        Self::from_wide_string(wide_string)
56    }
57
58    /// Create from a wide string vector
59    pub fn from_wide_string(data: Vec<u32>) -> Self {
60        let wide_string = U32String::from_vec(data);
61        let string = wide_string.to_string_lossy();
62        Self {
63            origin: StringBoxOrigin::Wide(wide_string),
64            string,
65        }
66    }
67
68    /// Create from a wide string by copying the data
69    pub unsafe fn from_byte_string_data(data: *const u8, length: usize) -> Self {
70        let byte_string = slice::from_raw_parts(data, length).to_vec();
71        Self::from_byte_string(byte_string)
72    }
73
74    /// Create from a byte string vector
75    pub fn from_byte_string(data: Vec<u8>) -> Self {
76        let string = data.iter().map(|&c| c as char).collect::<String>();
77        Self {
78            origin: StringBoxOrigin::Byte(data),
79            string,
80        }
81    }
82
83    /// data must be nul terminated
84    /// length does not take nul into account
85    pub unsafe fn from_utf8_string_data(data: *const u8, length: usize) -> Self {
86        // we are not using CString::from_raw because it retakes the ownership
87        // and will drop the data. Instead we create a borrowed CStr which
88        // we later copy into a String
89        Self::from_utf8_string(slice::from_raw_parts(data, length + 1))
90    }
91
92    /// data must be nul terminated
93    /// length does not take nul into account
94    pub fn from_utf8_string(data: &[u8]) -> Self {
95        let string = unsafe {
96            CStr::from_bytes_with_nul_unchecked(data)
97                .to_string_lossy()
98                .into_owned()
99        };
100        Self {
101            origin: StringBoxOrigin::String,
102            string,
103        }
104    }
105
106    /// Replace the string with a given instance
107    pub fn set_string(&mut self, string: String) {
108        self.origin = StringBoxOrigin::String;
109        self.string = string;
110    }
111
112    /// Returns the length of this `String`, in bytes, not [`char`]s or
113    /// graphemes. In other words, it may not be what a human considers the
114    /// length of the string.
115    pub fn len(&self) -> usize {
116        self.string.len()
117    }
118
119    /// Returns the amount of [`char`]
120    pub fn char_count(&self) -> usize {
121        self.string.chars().count()
122    }
123
124    pub fn to_string(&self) -> String {
125        self.string.clone()
126    }
127
128    pub fn as_str(&self) -> &str {
129        self.string.as_str()
130    }
131
132    pub fn as_bytes(&self) -> &[u8] {
133        self.string.as_bytes()
134    }
135
136    pub fn as_ptr(&self) -> *const u8 {
137        self.string.as_ptr()
138    }
139
140    pub fn char_index_to_byte_range(&self, index: usize) -> Range<usize> {
141        let mut current_char_index = 0 as usize;
142        let mut previous_byte_offset = 0 as usize;
143
144        for (current_byte_offset, _) in self.string.char_indices() {
145            if current_char_index == (index + 1) {
146                return previous_byte_offset..current_byte_offset;
147            }
148            current_char_index = current_char_index + 1;
149            previous_byte_offset = current_byte_offset;
150        }
151        previous_byte_offset..self.len()
152    }
153
154    pub fn char_index_to_utf16_range(&self, index: usize) -> Range<usize> {
155        let mut current_char_index = 0 as usize;
156        let mut previous_byte_offset = 0 as usize;
157        let mut previous_utf16_offset = 0 as usize;
158
159        for (current_byte_offset, _) in self.string.char_indices() {
160            let delta = ((current_byte_offset - previous_byte_offset) + 1) / 2;
161            if current_char_index == (index + 1) {
162                return previous_utf16_offset..(previous_utf16_offset + delta);
163            }
164            current_char_index = current_char_index + 1;
165            previous_byte_offset = current_byte_offset;
166            previous_utf16_offset = previous_utf16_offset + delta;
167        }
168        let delta = ((self.len() - previous_byte_offset) + 1) / 2;
169        previous_utf16_offset..(previous_utf16_offset + delta)
170    }
171
172    pub fn utf16_position_to_char_index(&self, index: usize) -> usize {
173        let mut current_char_index = 0 as usize;
174        let mut previous_byte_offset = 0 as usize;
175        let mut previous_utf16_offset = 0 as usize;
176
177        for (current_byte_offset, _) in self.string.char_indices() {
178            let delta = ((current_byte_offset - previous_byte_offset) + 1) / 2;
179            let current_utf16_offset = previous_utf16_offset + delta;
180
181            if current_utf16_offset >= index {
182                return current_char_index;
183            }
184
185            current_char_index = current_char_index + 1;
186            previous_byte_offset = current_byte_offset;
187            previous_utf16_offset = current_utf16_offset;
188        }
189        current_char_index
190    }
191}
192
193#[cfg(test)]
194mod test {
195    use super::*;
196
197    #[test]
198    pub fn test_from_wide_string() {
199        let wide_string = vec![1087u32, 1088, 1080, 1074, 1077, 1090];
200        let string = StringBox::from_wide_string(wide_string);
201
202        assert_eq!(string.to_string(), String::from("привет"));
203    }
204
205    #[test]
206    pub fn test_from_byte_string() {
207        let byte_string = vec![104u8, 101, 108, 108, 111];
208        let string = StringBox::from_byte_string(byte_string);
209
210        assert_eq!(string.to_string(), String::from("hello"));
211    }
212
213    #[test]
214    pub fn test_from_utf8_string() {
215        let utf8_string = vec![104u8, 101, 108, 108, 111, 0];
216        let string = StringBox::from_utf8_string(utf8_string.as_slice());
217
218        assert_eq!(string.to_string(), String::from("hello"));
219    }
220
221    #[test]
222    pub fn sparkle() {
223        let sparkle = String::from("💖");
224        let string_box = StringBox::from_string(sparkle.clone());
225
226        assert_eq!(sparkle.len(), 4);
227        assert_eq!(string_box.len(), 4);
228        assert_eq!(string_box.char_count(), 1);
229
230        for char in sparkle.char_indices() {
231            println!("{:?}", char);
232        }
233        println!("{:?}", sparkle.bytes());
234    }
235}