utf8_rune/
runes.rs

1use std::fmt::{Debug, Display, Formatter};
2use std::marker::PhantomData;
3use std::ops::Deref;
4
5use crate::{
6    get_rune_cutoff_at_index, get_valid_utf8_str_of, slice_ptr_and_length_from_display,
7    unwrap_indent, Result, Rune,
8};
9
10/// Represents a slice of bytes which can be automatically parsed into
11/// a sequence of [Rune(s)](crate::Rune)
12///
13/// Example
14///
15///```
16/// use utf8_rune::{Runes};
17/// let parts = Runes::new("πŸ‘©πŸ»β€πŸš’πŸ‘ŒπŸΏπŸ§‘πŸ½β€πŸš’πŸ‘¨β€πŸš’πŸŒΆοΈπŸŽΉπŸ’”πŸ”₯❀️‍πŸ”₯β€οΈβ€πŸ©Ή");
18/// assert_eq!(
19///     parts
20///         .runes().unwrap_or_default()
21///         .iter()
22///         .map(|rune| rune.to_string())
23///         .collect::<Vec<String>>(),
24///     vec![
25///         "πŸ‘©πŸ»β€πŸš’",
26///         "πŸ‘ŒπŸΏ",
27///         "πŸ§‘πŸ½β€πŸš’",
28///         "πŸ‘¨β€πŸš’",
29///         "🌢️",
30///         "🎹",
31///         "πŸ’”",
32///         "πŸ”₯",
33///         "❀️‍πŸ”₯",
34///         "β€οΈβ€πŸ©Ή",
35///     ]
36/// );
37///```
38#[derive(Clone)]
39pub struct Runes<'g> {
40    pub(crate) ptr: *const u8,
41    pub(crate) indexes: Vec<usize>,
42    pub(crate) length: usize,
43    pub(crate) _marker: PhantomData<&'g usize>,
44}
45impl<'g> Runes<'g> {
46    pub fn new<T: Display>(input: T) -> Runes<'g> {
47        let input = input.to_string();
48        let (ptr, length) = slice_ptr_and_length_from_display(&input);
49        let mut cutoff: usize = 0;
50        let mut indexes = vec![cutoff];
51        while cutoff < length {
52            match get_rune_cutoff_at_index(ptr, length, cutoff) {
53                Ok(next) => {
54                    indexes.push(next);
55                    cutoff = next;
56                },
57                Err(_) => break,
58            }
59        }
60
61        Runes {
62            ptr,
63            indexes,
64            length,
65            _marker: PhantomData,
66        }
67    }
68
69    pub fn len(&self) -> usize {
70        self.indexes.len()
71    }
72
73    pub fn as_str(&self) -> &'g str {
74        let mut offset = self.length;
75        loop {
76            if let Ok(slice) = std::str::from_utf8(unsafe {
77                std::slice::from_raw_parts(self.ptr, offset)
78            }) {
79                break slice;
80            }
81            if offset > 0 {
82                offset -= 1;
83            } else {
84                break "";
85            }
86        }
87    }
88
89    pub fn as_bytes(&self) -> &'g [u8] {
90        unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
91    }
92
93    pub fn is_empty(&self) -> bool {
94        self.length == 0
95    }
96
97    pub fn as_debug(&self, indent: Option<usize>) -> String {
98        let indent = unwrap_indent(indent);
99        let length = self.len();
100        format!(
101            "Runes{{{}}}",
102            [format!("length: {length}"),]
103                .iter()
104                .map(|c| {
105                    let padding = " ".repeat(indent);
106                    format!("{padding}{c}")
107                })
108                .collect::<Vec<String>>()
109                .join("\n")
110        )
111    }
112
113    pub fn indexes(&self) -> Vec<usize> {
114        let mut indexes = self.indexes.clone();
115        if indexes.len() > 0 {
116            indexes.pop();
117        }
118        indexes
119    }
120
121    pub fn slice_indexes(&self) -> Vec<(usize, usize)> {
122        self.indexes()
123            .into_iter()
124            .map(|index| {
125                let next = get_rune_cutoff_at_index(self.ptr, self.length, index)
126                    .unwrap_or_default();
127                let length = if next >= index {
128                    next - index
129                } else {
130                    0
131                };
132                (index, length)
133            })
134            .filter(|(_, length)| *length > 0)
135            .collect()
136    }
137
138    pub fn runes(&self) -> Result<Vec<Rune>> {
139        let mut runes = Vec::<Rune>::new();
140        for cutoff in self.indexes().into_iter() {
141            runes.push(Rune::from_ptr_cutoff(self.ptr, self.length, cutoff)?);
142        }
143        Ok(runes)
144    }
145}
146impl<'g> From<&str> for Runes<'g> {
147    fn from(s: &str) -> Runes<'g> {
148        Runes::new(s)
149    }
150}
151
152impl<'g> From<String> for Runes<'g> {
153    fn from(s: String) -> Runes<'g> {
154        Runes::new(s)
155    }
156}
157
158impl<'g> From<&String> for Runes<'g> {
159    fn from(s: &String) -> Runes<'g> {
160        Runes::new(s)
161    }
162}
163
164impl<'g> Display for Runes<'g> {
165    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
166        write!(f, "{}", self.as_str())
167    }
168}
169impl<'g> Debug for Runes<'g> {
170    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
171        write!(f, "{}", self.as_debug(None))
172    }
173}
174impl<'g> Deref for Runes<'g> {
175    type Target = [&'g str];
176
177    fn deref(&self) -> &[&'g str] {
178        let runes = self
179            .slice_indexes()
180            .into_iter()
181            .map(|(index, count)| get_valid_utf8_str_of(self.ptr, index, count))
182            .filter(|c| c.is_some())
183            .map(|c| c.unwrap())
184            .collect::<Vec<&'g str>>();
185        unsafe { std::mem::transmute::<&[&str], &'g [&'g str]>(&runes) }
186    }
187}