utf8_rune/
rune.rs

1use std::cmp::{Eq, Ord, Ordering, PartialEq, PartialOrd};
2use std::fmt::{Debug, Display, Formatter};
3use std::hash::{Hash, Hasher};
4use std::ops::Deref;
5
6use crate::{
7    dealloc_ptr, display_error, format_bytes, get_byte_slice_of,
8    get_rune_cutoff_at_index, new_ptr, slice_ptr_and_length_from_display,
9    unwrap_indent, Result, DEFAULT_INDENT,
10};
11
12/// A Rune represents a single visible UTF-8 character. To handle contiguous bytes as multiple runes consider using [Runes](crate::Runes)
13///
14/// Examples
15///
16///```
17/// use utf8_rune::Rune;
18/// let rune = Rune::new("❤️");
19/// assert_eq!(rune.len(), 6);
20/// assert_eq!(rune.as_str(), "❤️");
21/// assert_eq!(rune.as_bytes(), "❤️".as_bytes());
22///```
23///
24///```
25/// use utf8_rune::Rune;
26/// let rune = Rune::new("👌🏽");
27/// assert_eq!(rune.len(), 8);
28/// assert_eq!(rune.as_str(), "👌🏽");
29/// assert_eq!(rune.as_bytes(), "👌🏽".as_bytes());
30///```
31
32#[derive(Clone, Copy)]
33pub struct Rune {
34    pub(crate) ptr: *const u8,
35    pub(crate) length: usize,
36}
37
38impl Default for Rune {
39    fn default() -> Rune {
40        let (ptr, length) = slice_ptr_and_length_from_display("");
41        Rune { ptr, length }
42    }
43}
44impl Rune {
45    pub fn new<T: Display>(input: T) -> Rune {
46        let (input_ptr, input_length) = slice_ptr_and_length_from_display(input);
47        match get_rune_cutoff_at_index(input_ptr, input_length, 0) {
48            Ok(length) => {
49                let ptr = new_ptr(length);
50                for offset in 0..length {
51                    unsafe {
52                        ptr.add(offset)
53                            .write(input_ptr.add(offset).read());
54                    }
55                }
56                dealloc_ptr(input_ptr, input_length);
57                Rune { ptr, length }
58            },
59            Err(error) => {
60                display_error(error, input_ptr, input_length);
61                Rune::default()
62            },
63        }
64    }
65
66    pub fn from_ptr_cutoff(
67        input_ptr: *const u8,
68        input_length: usize,
69        index: usize,
70    ) -> Result<Rune> {
71        let cutoff = get_rune_cutoff_at_index(input_ptr, input_length, index)?;
72        let length = cutoff - index;
73        let ptr = new_ptr(length);
74
75        for (index, byte) in get_byte_slice_of(input_ptr, index, length)
76            .into_iter()
77            .enumerate()
78        {
79            unsafe { ptr.add(index).write(*byte) }
80        }
81        Ok(Rune { ptr, length })
82    }
83
84    pub fn as_str<'g>(&self) -> &'g str {
85        let mut offset = self.length;
86        loop {
87            if let Ok(slice) = std::str::from_utf8(unsafe {
88                std::slice::from_raw_parts(self.ptr, offset)
89            }) {
90                break slice;
91            }
92            if offset > 0 {
93                offset -= 1;
94            } else {
95                break "";
96            }
97        }
98    }
99
100    pub fn as_bytes<'g>(&self) -> &'g [u8] {
101        unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
102    }
103
104    pub fn as_debug(&self, indent: Option<usize>) -> String {
105        let indent = unwrap_indent(indent);
106        format!(
107            "Rune{{{}}}{}",
108            self.as_str(),
109            format_bytes(self.as_bytes(), Some(indent + DEFAULT_INDENT)),
110        )
111    }
112}
113
114impl From<&str> for Rune {
115    fn from(s: &str) -> Rune {
116        Rune::new(s)
117    }
118}
119
120impl From<String> for Rune {
121    fn from(s: String) -> Rune {
122        Rune::new(s)
123    }
124}
125
126impl From<&String> for Rune {
127    fn from(s: &String) -> Rune {
128        Rune::new(s)
129    }
130}
131
132impl Display for Rune {
133    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
134        write!(f, "{}", self.as_str())
135    }
136}
137impl Debug for Rune {
138    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
139        write!(f, "{}", self.as_debug(None))
140    }
141}
142
143impl Deref for Rune {
144    type Target = [u8];
145
146    fn deref(&self) -> &[u8] {
147        self.as_bytes()
148    }
149}
150
151// impl Drop for Rune {
152//     fn drop(&mut self) {
153//         dealloc_ptr(self.ptr, self.length)
154//     }
155// }
156
157impl PartialEq<Rune> for Rune {
158    fn eq(&self, other: &Rune) -> bool {
159        self.as_bytes().eq(other.as_bytes())
160    }
161}
162impl Eq for Rune {}
163
164impl PartialOrd<Rune> for Rune {
165    fn partial_cmp(&self, other: &Rune) -> Option<Ordering> {
166        self.as_bytes().partial_cmp(&other.as_bytes())
167    }
168}
169impl<'g> PartialOrd<&'g str> for Rune {
170    fn partial_cmp(&self, other: &&'g str) -> Option<Ordering> {
171        self.as_str().partial_cmp(other)
172    }
173}
174impl<'g> PartialOrd<&'g [u8]> for Rune {
175    fn partial_cmp(&self, other: &&'g [u8]) -> Option<Ordering> {
176        self.as_bytes().partial_cmp(other)
177    }
178}
179impl<'g> PartialOrd<Vec<u8>> for Rune {
180    fn partial_cmp(&self, other: &Vec<u8>) -> Option<Ordering> {
181        self.as_bytes().to_vec().partial_cmp(other)
182    }
183}
184impl<'g> PartialOrd<&Vec<u8>> for Rune {
185    fn partial_cmp(&self, other: &&Vec<u8>) -> Option<Ordering> {
186        self.as_bytes().to_vec().partial_cmp(other)
187    }
188}
189
190impl<'g> PartialEq<&'g str> for Rune {
191    fn eq(&self, other: &&'g str) -> bool {
192        self.as_str().eq(*other)
193    }
194}
195
196impl<'g> PartialEq<&'g [u8]> for Rune {
197    fn eq(&self, other: &&'g [u8]) -> bool {
198        self.as_bytes().eq(*other)
199    }
200}
201impl<'g> PartialEq<Vec<u8>> for Rune {
202    fn eq(&self, other: &Vec<u8>) -> bool {
203        self.as_bytes().to_vec().eq(other)
204    }
205}
206impl<'g> PartialEq<&Vec<u8>> for Rune {
207    fn eq(&self, other: &&Vec<u8>) -> bool {
208        self.as_bytes().to_vec().eq(*other)
209    }
210}
211
212impl Ord for Rune {
213    fn cmp(&self, other: &Self) -> Ordering {
214        self.as_bytes()
215            .to_vec()
216            .cmp(&other.as_bytes().to_vec())
217    }
218}
219
220impl Hash for Rune {
221    fn hash<H: Hasher>(&self, state: &mut H) {
222        self.as_bytes().hash(state);
223    }
224}