utf8_rune/
rune.rs

1use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd};
2use std::fmt::{Debug, Display, Formatter};
3use std::hash::{Hash, Hasher};
4use std::ops::Deref;
5
6use crate::pointer::{self, get_byte_slice_of};
7use crate::{
8    display_error, format_bytes, get_rune_cutoff_at_index, unwrap_indent, Result,
9    DEFAULT_INDENT,
10};
11
12/// A Rune represents a single visible UTF-8 character. To handle contiguous bytes as multiple runes consider using [Runes](crate::Runes)
13///
14/// # Examples
15///
16///```
17/// use utf8_rune::Rune;
18/// let rune = Rune::new("❤️");
19/// assert_eq!(rune.len(), 6);
20/// assert_eq!(rune.as_str(), "❤️");
21/// assert_eq!(rune.as_bytes(), "❤️".as_bytes());
22///```
23///
24///```
25/// use utf8_rune::Rune;
26/// let rune = Rune::new("👌🏽");
27/// assert_eq!(rune.len(), 8);
28/// assert_eq!(rune.as_str(), "👌🏽");
29/// assert_eq!(rune.as_bytes(), "👌🏽".as_bytes());
30///```
31///
32///```
33/// use utf8_rune::Rune;
34/// let rune = Rune::new("👩🏻‍🚒");
35/// assert_eq!(rune.len(), 15);
36/// assert_eq!(rune.as_str(), "👩🏻‍🚒");
37/// assert_eq!(rune.as_bytes(), "👩🏻‍🚒".as_bytes());
38///```
39
40#[derive(Clone, Copy)]
41pub struct Rune {
42    pub(crate) ptr: *const u8,
43    pub(crate) length: usize,
44}
45
46impl Default for Rune {
47    fn default() -> Rune {
48        Rune::empty().expect("memory allocation")
49    }
50}
51impl Rune {
52    pub fn from_raw_parts(ptr: *const u8, length: usize) -> Rune {
53        Rune { ptr, length }
54    }
55
56    pub fn new<T: Display>(input: T) -> Rune {
57        Rune::allocate(&input)
58            .expect(format!("allocate memory for Rune from {input}").as_str())
59    }
60
61    pub fn allocate<T: Display>(input: T) -> Result<Rune> {
62        let (input_ptr, input_length) = pointer::from_display(input)?;
63        match get_rune_cutoff_at_index(input_ptr, input_length, 0) {
64            Ok(length) => {
65                let ptr = pointer::create(length)?;
66                for offset in 0..length {
67                    unsafe {
68                        ptr.add(offset)
69                            .write(input_ptr.add(offset).read());
70                    }
71                }
72                pointer::destroy(input_ptr, input_length)?;
73                Ok(Rune::from_raw_parts(ptr, length))
74            },
75            Err(error) => {
76                display_error(error, input_ptr, input_length);
77                Ok(Rune::default())
78            },
79        }
80    }
81
82    pub fn empty() -> Result<Rune> {
83        let length = 0;
84        let ptr = pointer::create(length)?;
85        Ok(Rune::from_raw_parts(ptr, length))
86    }
87
88    pub fn from_ptr_cutoff(
89        input_ptr: *const u8,
90        input_length: usize,
91        index: usize,
92    ) -> Result<Rune> {
93        let cutoff = get_rune_cutoff_at_index(input_ptr, input_length, index)?;
94        let length = cutoff - index;
95        let ptr = pointer::create(length)?;
96
97        for (index, byte) in get_byte_slice_of(input_ptr, index, length)
98            .into_iter()
99            .enumerate()
100        {
101            unsafe { ptr.add(index).write(*byte) }
102        }
103        Ok(Rune { ptr, length })
104    }
105
106    pub fn as_str<'g>(&self) -> &'g str {
107        let mut offset = self.length;
108        loop {
109            if let Ok(slice) = std::str::from_utf8(unsafe {
110                std::slice::from_raw_parts(self.ptr, offset)
111            }) {
112                break slice;
113            }
114            if offset > 0 {
115                offset -= 1;
116            } else {
117                break "";
118            }
119        }
120    }
121
122    pub fn as_bytes<'g>(&self) -> &'g [u8] {
123        unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
124    }
125
126    pub fn as_debug(&self, indent: Option<usize>) -> String {
127        let indent = unwrap_indent(indent);
128        format!(
129            "Rune{{{}}}{}",
130            self.as_str(),
131            format_bytes(self.as_bytes(), Some(indent + DEFAULT_INDENT)),
132        )
133    }
134}
135
136impl From<&str> for Rune {
137    fn from(s: &str) -> Rune {
138        Rune::new(s)
139    }
140}
141
142impl From<String> for Rune {
143    fn from(s: String) -> Rune {
144        Rune::new(s)
145    }
146}
147
148impl From<&String> for Rune {
149    fn from(s: &String) -> Rune {
150        Rune::new(s)
151    }
152}
153
154impl Display for Rune {
155    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
156        write!(f, "{}", self.as_str())
157    }
158}
159impl Debug for Rune {
160    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
161        write!(f, "{}", self.as_debug(None))
162    }
163}
164
165impl Deref for Rune {
166    type Target = [u8];
167
168    fn deref(&self) -> &[u8] {
169        self.as_bytes()
170    }
171}
172
173// impl Drop for Rune {
174//     fn drop(&mut self) {
175//         pointer::destroy(self.ptr, self.length)
176//     }
177// }
178
179impl PartialEq<Rune> for Rune {
180    fn eq(&self, other: &Rune) -> bool {
181        self.as_bytes().eq(other.as_bytes())
182    }
183}
184impl Eq for Rune {}
185
186impl PartialOrd<Rune> for Rune {
187    fn partial_cmp(&self, other: &Rune) -> Option<Ordering> {
188        self.as_bytes().partial_cmp(&other.as_bytes())
189    }
190}
191impl<'g> PartialOrd<&'g str> for Rune {
192    fn partial_cmp(&self, other: &&'g str) -> Option<Ordering> {
193        self.as_str().partial_cmp(other)
194    }
195}
196impl<'g> PartialOrd<&'g [u8]> for Rune {
197    fn partial_cmp(&self, other: &&'g [u8]) -> Option<Ordering> {
198        self.as_bytes().partial_cmp(other)
199    }
200}
201impl<'g> PartialOrd<Vec<u8>> for Rune {
202    fn partial_cmp(&self, other: &Vec<u8>) -> Option<Ordering> {
203        self.as_bytes().to_vec().partial_cmp(other)
204    }
205}
206impl<'g> PartialOrd<&Vec<u8>> for Rune {
207    fn partial_cmp(&self, other: &&Vec<u8>) -> Option<Ordering> {
208        self.as_bytes().to_vec().partial_cmp(other)
209    }
210}
211
212impl<'g> PartialEq<&'g str> for Rune {
213    fn eq(&self, other: &&'g str) -> bool {
214        self.as_str().eq(*other)
215    }
216}
217
218impl<'g> PartialEq<&'g [u8]> for Rune {
219    fn eq(&self, other: &&'g [u8]) -> bool {
220        self.as_bytes().eq(*other)
221    }
222}
223impl<'g> PartialEq<Vec<u8>> for Rune {
224    fn eq(&self, other: &Vec<u8>) -> bool {
225        self.as_bytes().to_vec().eq(other)
226    }
227}
228impl<'g> PartialEq<&Vec<u8>> for Rune {
229    fn eq(&self, other: &&Vec<u8>) -> bool {
230        self.as_bytes().to_vec().eq(*other)
231    }
232}
233
234impl Ord for Rune {
235    fn cmp(&self, other: &Self) -> Ordering {
236        self.as_bytes()
237            .to_vec()
238            .cmp(&other.as_bytes().to_vec())
239    }
240}
241
242impl Hash for Rune {
243    fn hash<H: Hasher>(&self, state: &mut H) {
244        self.as_bytes().hash(state);
245    }
246}
247
248#[cfg(test)]
249mod test_rune {
250    use crate::Rune;
251
252    #[test]
253    fn test_single_rune() {
254        let rune = Rune::new("❤️");
255        assert_eq!(rune.len(), 6);
256        assert_eq!(rune.as_str(), "❤️");
257        assert_eq!(rune.as_bytes(), "❤️".as_bytes());
258
259        let rune = Rune::new("👌");
260        assert_eq!(rune.len(), 4);
261        assert_eq!(rune.as_str(), "👌");
262        assert_eq!(rune.as_bytes(), "👌".as_bytes());
263
264        let rune = Rune::new("👌🏻");
265        assert_eq!(rune.len(), 8);
266        assert_eq!(rune.as_str(), "👌🏻");
267        assert_eq!(rune.as_bytes(), "👌🏻".as_bytes());
268
269        let rune = Rune::new("👌🏼");
270        assert_eq!(rune.len(), 8);
271        assert_eq!(rune.as_str(), "👌🏼");
272        assert_eq!(rune.as_bytes(), "👌🏼".as_bytes());
273
274        let rune = Rune::new("👌🏽");
275        assert_eq!(rune.len(), 8);
276        assert_eq!(rune.as_str(), "👌🏽");
277        assert_eq!(rune.as_bytes(), "👌🏽".as_bytes());
278
279        let rune = Rune::new("👌🏾");
280        assert_eq!(rune.len(), 8);
281        assert_eq!(rune.as_str(), "👌🏾");
282        assert_eq!(rune.as_bytes(), "👌🏾".as_bytes());
283
284        let rune = Rune::new("👌🏿");
285        assert_eq!(rune.len(), 8);
286        assert_eq!(rune.len(), 8);
287        assert_eq!(rune.as_str(), "👌🏿");
288        assert_eq!(rune.as_bytes(), "👌🏿".as_bytes());
289    }
290
291    #[test]
292    fn test_from_multiple_to_vec() {
293        let rune = Rune::new("👌👌🏻👌🏼👌🏽👌🏾👌🏿");
294        assert_eq!(rune.len(), 4);
295        assert_eq!(rune.as_str(), "👌");
296        assert_eq!(rune.as_bytes(), "👌".as_bytes());
297
298        let rune = Rune::new("👌🏻👌🏼👌🏽👌🏾👌🏿");
299        assert_eq!(rune.len(), 8);
300        assert_eq!(rune.as_str(), "👌🏻");
301        assert_eq!(rune.as_bytes(), "👌🏻".as_bytes());
302
303        let rune = Rune::new("👌🏼👌🏽👌🏾👌🏿");
304        assert_eq!(rune.len(), 8);
305        assert_eq!(rune.as_str(), "👌🏼");
306        assert_eq!(rune.as_bytes(), "👌🏼".as_bytes());
307
308        let rune = Rune::new("👌🏽👌🏾👌🏿");
309        assert_eq!(rune.len(), 8);
310        assert_eq!(rune.as_str(), "👌🏽");
311        assert_eq!(rune.as_bytes(), "👌🏽".as_bytes());
312
313        let rune = Rune::new("👌🏾👌🏿");
314        assert_eq!(rune.len(), 8);
315        assert_eq!(rune.as_str(), "👌🏾");
316        assert_eq!(rune.as_bytes(), "👌🏾".as_bytes());
317
318        let rune = Rune::new("👌🏿");
319        assert_eq!(rune.len(), 8);
320        assert_eq!(rune.as_str(), "👌🏿");
321        assert_eq!(rune.as_bytes(), "👌🏿".as_bytes());
322    }
323}