utf8_rune/
runes.rs

1use std::fmt::{Debug, Display, Formatter};
2use std::marker::PhantomData;
3use std::ops::Index;
4
5use crate::pointer;
6use crate::{get_rune_cutoff_at_index, unwrap_indent, Result, Rune};
7
8/// Represents a slice of bytes which can be automatically parsed into
9/// a sequence of [Rune(s)](crate::Rune)
10///
11/// # Examples
12///
13///```
14/// use utf8_rune::Runes;
15/// let parts = Runes::new("πŸ‘©πŸ»β€πŸš’πŸ‘ŒπŸΏπŸ§‘πŸ½β€πŸš’πŸ‘¨β€πŸš’πŸŒΆοΈπŸŽΉπŸ’”πŸ”₯❀️‍πŸ”₯β€οΈβ€πŸ©Ή");
16/// assert_eq!(
17///     parts
18///         .to_vec()
19///         .iter()
20///         .map(|rune| rune.to_string())
21///         .collect::<Vec<String>>(),
22///     vec![
23///         "πŸ‘©πŸ»β€πŸš’",
24///         "πŸ‘ŒπŸΏ",
25///         "πŸ§‘πŸ½β€πŸš’",
26///         "πŸ‘¨β€πŸš’",
27///         "🌢️",
28///         "🎹",
29///         "πŸ’”",
30///         "πŸ”₯",
31///         "❀️‍πŸ”₯",
32///         "β€οΈβ€πŸ©Ή",
33///     ]
34/// );
35///```
36///
37/// ```
38/// use utf8_rune::Runes;
39/// let runes = Runes::new("πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ");
40///
41/// assert_eq!(runes.rune_indexes(), vec![
42///     (0, 4),
43///     (4, 8),
44///     (12, 8),
45///     (20, 8),
46///     (28, 8),
47///     (36, 8),
48/// ]);
49/// assert_eq!(runes.len(), 6);
50/// assert_eq!(runes[0], "πŸ‘Œ");
51/// assert_eq!(runes[1], "πŸ‘ŒπŸ»");
52/// assert_eq!(runes[2], "πŸ‘ŒπŸΌ");
53/// assert_eq!(runes[3], "πŸ‘ŒπŸ½");
54/// assert_eq!(runes[4], "πŸ‘ŒπŸΎ");
55/// assert_eq!(runes[5], "πŸ‘ŒπŸΏ");
56/// ```
57
58#[derive(Clone)]
59pub struct Runes<'g> {
60    pub(crate) ptr: *const u8,
61    pub(crate) indexes: &'g [usize],
62    pub(crate) length: usize,
63    pub(crate) _marker: PhantomData<&'g usize>,
64}
65impl<'g> Default for Runes<'g> {
66    fn default() -> Runes<'g> {
67        Runes::empty().expect("memory allocation")
68    }
69}
70impl<'g> Runes<'g> {
71    pub fn new<T: Display>(input: T) -> Runes<'g> {
72        Runes::allocate(&input)
73            .expect(format!("allocate memory for Runes from {input}").as_str())
74    }
75
76    pub fn allocate<T: Display>(input: T) -> Result<Runes<'g>> {
77        let input = input.to_string();
78        let (ptr, length) = pointer::from_display(&input)?;
79        let mut cutoff: usize = 0;
80        let mut indexes = vec![cutoff];
81        while cutoff < length {
82            match get_rune_cutoff_at_index(ptr, length, cutoff) {
83                Ok(next) => {
84                    indexes.push(next);
85                    cutoff = next;
86                },
87                Err(_) => break,
88            }
89        }
90        Ok(Runes {
91            ptr,
92            indexes: indexes.leak(),
93            length,
94            _marker: PhantomData,
95        })
96    }
97
98    pub fn empty() -> Result<Runes<'g>> {
99        let length = 0;
100        let ptr = pointer::create(length)?;
101        Ok(Runes {
102            ptr,
103            length,
104            indexes: &[],
105            _marker: PhantomData,
106        })
107    }
108
109    pub fn as_str(&self) -> &'g str {
110        let mut offset = self.length;
111        loop {
112            if let Ok(slice) = std::str::from_utf8(unsafe {
113                std::slice::from_raw_parts(self.ptr, offset)
114            }) {
115                break slice;
116            }
117            if offset > 0 {
118                offset -= 1;
119            } else {
120                break "";
121            }
122        }
123    }
124
125    pub fn as_bytes(&self) -> &'g [u8] {
126        unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
127    }
128
129    pub fn is_empty(&self) -> bool {
130        self.length == 0
131    }
132
133    pub fn len(&self) -> usize {
134        self.indexes.len() -1
135    }
136
137    pub fn as_debug(&self, indent: Option<usize>) -> String {
138        let indent = unwrap_indent(indent);
139        let length = self.len();
140        format!(
141            "Runes{{{}}}",
142            [format!("length: {length}"),]
143                .iter()
144                .map(|c| {
145                    let padding = " ".repeat(indent);
146                    format!("{padding}{c}")
147                })
148                .collect::<Vec<String>>()
149                .join("\n")
150        )
151    }
152
153    pub fn indexes(&self) -> Vec<usize> {
154        let mut indexes = self.indexes.to_vec();
155        if indexes.len() > 0 {
156            indexes.pop();
157        }
158        indexes
159    }
160
161    pub fn rune_indexes(&self) -> Vec<(usize, usize)> {
162        self.indexes()
163            .into_iter()
164            .map(|index| {
165                let next = get_rune_cutoff_at_index(self.ptr, self.length, index)
166                    .unwrap_or_default();
167                let length = if next >= index {
168                    next - index
169                } else {
170                    0
171                };
172                (index, length)
173            })
174            .filter(|(_, length)| *length > 0)
175            .collect()
176    }
177
178    pub fn get(&self, index: usize) -> Option<Rune> {
179        let indexes = self.rune_indexes();
180        if index >= indexes.len() {
181            None
182        } else {
183            let (index, length) = indexes[index];
184            Some(Rune::from_raw_parts(unsafe { self.ptr.add(index) }, length))
185        }
186    }
187
188    pub fn to_vec(&self) -> Vec<Rune> {
189        let mut runes = Vec::<Rune>::new();
190        for (index, length) in self.rune_indexes().into_iter() {
191            runes.push(Rune::from_raw_parts(unsafe { self.ptr.add(index) }, length));
192        }
193        runes
194    }
195}
196impl<'g> From<&str> for Runes<'g> {
197    fn from(s: &str) -> Runes<'g> {
198        Runes::new(s)
199    }
200}
201
202impl<'g> From<String> for Runes<'g> {
203    fn from(s: String) -> Runes<'g> {
204        Runes::new(s)
205    }
206}
207
208impl<'g> From<&String> for Runes<'g> {
209    fn from(s: &String) -> Runes<'g> {
210        Runes::new(s)
211    }
212}
213
214impl<'g> Display for Runes<'g> {
215    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
216        write!(f, "{}", self.as_str())
217    }
218}
219impl<'g> Debug for Runes<'g> {
220    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
221        write!(f, "{}", self.as_debug(None))
222    }
223}
224impl<'g> Index<usize> for Runes<'g> {
225    type Output = &'g str;
226
227    fn index(&self, index: usize) -> &&'g str {
228        if let Some(rune) = self.get(index) {
229            unsafe { std::mem::transmute::<&&str, &&'g str>(&rune.as_str()) }
230        } else {
231            &""
232        }
233    }
234}
235
236#[cfg(test)]
237mod test_runes {
238    use crate::{Result, Runes};
239
240    #[test]
241    fn test_to_vec() -> Result<()> {
242        let parts = Runes::new("πŸ‘©πŸ»β€πŸš’πŸ‘ŒπŸΏπŸ§‘πŸ½β€πŸš’πŸ‘¨β€πŸš’πŸŒΆοΈπŸŽΉπŸ’”πŸ”₯❀️‍πŸ”₯β€οΈβ€πŸ©Ή");
243        assert_eq!(
244            parts
245                .to_vec()
246                .iter()
247                .map(|rune| rune.to_string())
248                .collect::<Vec<String>>(),
249            vec![
250                "πŸ‘©πŸ»β€πŸš’",
251                "πŸ‘ŒπŸΏ",
252                "πŸ§‘πŸ½β€πŸš’",
253                "πŸ‘¨β€πŸš’",
254                "🌢️",
255                "🎹",
256                "πŸ’”",
257                "πŸ”₯",
258                "❀️‍πŸ”₯",
259                "β€οΈβ€πŸ©Ή",
260            ]
261        );
262        Ok(())
263    }
264    #[test]
265    fn test_length() -> Result<()> {
266        let runes = Runes::new("πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ");
267        let vec = runes.to_vec();
268
269        assert_eq!(vec.len(), 6);
270        assert_eq!(
271            runes.rune_indexes(),
272            vec![
273                (0, 4),
274                (4, 8),
275                (12, 8),
276                (20, 8),
277                (28, 8),
278                (36, 8),
279            ]
280        );
281        assert_eq!(runes[0], "πŸ‘Œ");
282        assert_eq!(runes[1], "πŸ‘ŒπŸ»");
283        assert_eq!(runes[2], "πŸ‘ŒπŸΌ");
284        assert_eq!(runes[3], "πŸ‘ŒπŸ½");
285        assert_eq!(runes[4], "πŸ‘ŒπŸΎ");
286        assert_eq!(runes[5], "πŸ‘ŒπŸΏ");
287
288        Ok(())
289    }
290}