utf8_rune/
parts.rs

1use std::fmt::{Debug, Display, Formatter};
2use std::marker::PhantomData;
3use crate::pointer::{
4    self,
5};
6use crate::{
7    display_error, format_bytes, get_rune_cutoff_at_index, unwrap_indent, Result, Rune, Runes,
8    DEFAULT_INDENT,
9};
10
11
12///
13/// Represents a memory area with contiguous bytes that serves as
14/// building block for [Runes](crate::Runes) and [Rune](crate::Rune).
15/// # Examples
16///
17///```
18/// use utf8_rune::{RuneParts, Rune, Runes};
19/// let parts = RuneParts::new("πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ");
20/// assert_eq!(parts.len(), 44);
21/// assert_eq!(parts.as_str(), "πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ");
22/// assert_eq!(parts.as_bytes(), "πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ".as_bytes());
23///
24/// let runes = parts.into_runes();
25/// assert_eq!(runes.len(), 6);
26/// assert_eq!(runes[0], "πŸ‘Œ");
27/// assert_eq!(runes[1], "πŸ‘ŒπŸ»");
28/// assert_eq!(runes[2], "πŸ‘ŒπŸΌ");
29/// assert_eq!(runes[3], "πŸ‘ŒπŸ½");
30/// assert_eq!(runes[4], "πŸ‘ŒπŸΎ");
31/// assert_eq!(runes[5], "πŸ‘ŒπŸΏ");
32///```
33///
34#[derive(Clone, Copy)]
35pub struct RuneParts {
36    pub ptr: *const u8,
37    pub length: usize,
38}
39impl RuneParts {
40    pub fn from_raw_parts(ptr: *const u8, length: usize) -> RuneParts {
41        RuneParts { ptr, length }
42    }
43
44    pub fn new<T: Display>(input: T) -> RuneParts {
45        RuneParts::allocate(&input)
46            .expect(format!("allocate memory for RuneParts from {input}").as_str())
47    }
48
49    pub fn into_runes<'g>(self) -> Runes<'g> {
50        let ptr = self.ptr;
51        let length = self.length;
52        let indexes = self.indexes().leak();
53        Runes {
54            ptr,
55            length,
56            indexes,
57            _marker: PhantomData,
58        }
59    }
60
61    pub fn allocate<T: Display>(input: T) -> Result<RuneParts> {
62        let input = input.to_string();
63        let (ptr, length) = pointer::from_display(&input)?;
64        Ok(RuneParts { ptr, length })
65    }
66
67    pub fn rune(&self) -> Option<Rune> {
68        match self.rune_at_index(0) {
69            Ok(rune) => Some(rune),
70            Err(error) => {
71                display_error(error, self.ptr, self.length);
72                None
73            },
74        }
75    }
76
77    pub fn indexes(&self) -> Vec<usize> {
78        let mut cutoff = 0usize;
79        let mut indexes = vec![cutoff];
80        while cutoff < self.length {
81            match get_rune_cutoff_at_index(self.ptr, self.length, cutoff) {
82                Ok(next) => {
83                    indexes.push(next);
84                    cutoff = next;
85                },
86                Err(_) => break,
87            }
88        }
89        indexes
90    }
91
92    pub fn rune_at_index(&self, index: usize) -> Result<Rune> {
93        let cutoff = get_rune_cutoff_at_index(self.ptr, self.length, index)?;
94        let length = cutoff - index;
95        let ptr = pointer::create(length)?;
96        for offset in index..cutoff {
97            unsafe {
98                ptr.add(offset - index)
99                    .write(self.ptr.add(offset).read());
100            }
101        }
102        Ok(Rune { ptr, length })
103    }
104
105    pub fn runes(&self) -> Result<Vec<Rune>> {
106        let mut runes = Vec::<Rune>::new();
107        let mut index = 0;
108        while index < self.length {
109            let cutoff = match get_rune_cutoff_at_index(self.ptr, self.length, index) {
110                Ok(cutoff) => cutoff,
111                #[allow(unused_variables)]
112                Err(e) => {
113                    #[cfg(feature = "debug")]
114                    {
115                        eprintln!();
116                        dbg!(e);
117                    }
118                    break;
119                },
120            };
121            let length = cutoff - index;
122            let ptr = pointer::create(length)?;
123            for offset in 0..length {
124                unsafe {
125                    ptr.add(offset)
126                        .write(self.ptr.add(index + offset).read());
127                }
128            }
129            runes.push(Rune { ptr, length });
130            index = cutoff;
131        }
132        Ok(runes)
133    }
134
135    pub fn len(&self) -> usize {
136        self.length
137    }
138
139    pub fn as_str<'g>(&self) -> &'g str {
140        let mut offset = self.length;
141        loop {
142            if let Ok(slice) = std::str::from_utf8(unsafe {
143                std::slice::from_raw_parts(self.ptr, offset)
144            }) {
145                break slice;
146            }
147            if offset > 0 {
148                offset -= 1;
149            } else {
150                break "";
151            }
152        }
153    }
154
155    pub fn as_bytes<'g>(&self) -> &'g [u8] {
156        unsafe { std::slice::from_raw_parts(self.ptr, self.length) }
157    }
158
159    pub fn is_empty(&self) -> bool {
160        self.length == 0
161    }
162
163    pub fn as_debug(&self, indent: Option<usize>) -> String {
164        let indent = unwrap_indent(indent);
165        let length = self.length;
166        format!(
167            "RuneParts{{{}}}",
168            [
169                if let Some(rune) = self.rune() {
170                    format!("rune: {:#?}", rune.as_debug(Some(indent + DEFAULT_INDENT)))
171                } else {
172                    String::new()
173                },
174                format!("length: {length}"),
175                format!(
176                    "remaining_bytes: {}",
177                    format_bytes(self.as_bytes(), Some(indent + DEFAULT_INDENT))
178                ),
179            ]
180            .into_iter()
181            .filter(|c| !c.is_empty())
182            .map(|c| {
183                let padding = " ".repeat(indent);
184                format!("{padding}{c}")
185            })
186            .collect::<Vec<String>>()
187            .join("\n")
188        )
189    }
190}
191impl From<&str> for RuneParts {
192    fn from(s: &str) -> RuneParts {
193        RuneParts::new(s)
194    }
195}
196
197impl From<String> for RuneParts {
198    fn from(s: String) -> RuneParts {
199        RuneParts::new(s)
200    }
201}
202
203impl From<&String> for RuneParts {
204    fn from(s: &String) -> RuneParts {
205        RuneParts::new(s)
206    }
207}
208
209impl Display for RuneParts {
210    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
211        write!(f, "{}", self.as_str())
212    }
213}
214impl Debug for RuneParts {
215    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
216        write!(f, "{}", self.as_debug(None))
217    }
218}
219
220#[cfg(test)]
221mod test_parts {
222    use crate::{Rune, RuneParts};
223
224    #[test]
225    fn test_rune_at_index_error() {
226        let parts = RuneParts::new("πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ");
227        {
228            let result = parts.rune_at_index(1); // Ok("πŸ‘Œ")
229            assert!(result.is_err());
230            let err = result.err().unwrap();
231            assert_eq!(err.previous_valid_cutoff(), Some(0));
232            assert_eq!(err.next_valid_cutoff(), Some(4));
233        }
234        {
235            let result = parts.rune_at_index(5); // Ok("πŸ‘ŒπŸ»")
236            assert!(result.is_err());
237            let err = result.err().unwrap();
238            assert_eq!(err.previous_valid_cutoff(), Some(4));
239            assert_eq!(err.next_valid_cutoff(), Some(8));
240        }
241        {
242            let result = parts.rune_at_index(13); // Ok("πŸ‘ŒπŸΌ")
243            assert!(result.is_err());
244            let err = result.err().unwrap();
245            assert_eq!(err.previous_valid_cutoff(), Some(12));
246            assert_eq!(err.next_valid_cutoff(), Some(16));
247        }
248        {
249            let result = parts.rune_at_index(21); // Ok("πŸ‘ŒπŸ½")
250            assert!(result.is_err());
251            let err = result.err().unwrap();
252            assert_eq!(err.previous_valid_cutoff(), Some(20));
253            assert_eq!(err.next_valid_cutoff(), Some(24));
254        }
255        {
256            let result = parts.rune_at_index(29); // Ok("πŸ‘ŒπŸΎ")
257            assert!(result.is_err());
258            let err = result.err().unwrap();
259            assert_eq!(err.previous_valid_cutoff(), Some(28));
260            assert_eq!(err.next_valid_cutoff(), Some(32));
261        }
262
263        {
264            let result = parts.rune_at_index(37); // Ok("πŸ‘ŒπŸΏ")
265            assert!(result.is_err());
266            let err = result.err().unwrap();
267            assert_eq!(err.previous_valid_cutoff(), Some(36));
268            assert_eq!(err.next_valid_cutoff(), Some(40));
269        }
270    }
271
272    #[test]
273    fn test_new_single_rune() {
274        let parts = RuneParts::new("❀️");
275        assert_eq!(parts.len(), 6);
276        assert_eq!(parts.as_str(), "❀️");
277        assert_eq!(parts.as_bytes(), "❀️".as_bytes());
278    }
279    #[test]
280    fn test_new_multiple_to_vec() {
281        let parts = RuneParts::new("πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ");
282        assert_eq!(parts.len(), 44);
283        assert_eq!(parts.as_str(), "πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ");
284        assert_eq!(parts.as_bytes(), "πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ".as_bytes());
285    }
286
287    #[test]
288    fn test_rune_indexes() {
289        let parts = RuneParts::new("πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ");
290        assert_eq!(parts.indexes(), vec![0, 4, 12, 20, 28, 36, 44]);
291    }
292    #[test]
293    fn test_rune_at_index() {
294        let parts = RuneParts::new("πŸ‘ŒπŸ‘ŒπŸ»πŸ‘ŒπŸΌπŸ‘ŒπŸ½πŸ‘ŒπŸΎπŸ‘ŒπŸΏ");
295        assert_eq!(parts.rune_at_index(0), Ok(Rune::new("πŸ‘Œ")));
296        assert_eq!(parts.rune_at_index(4), Ok(Rune::new("πŸ‘ŒπŸ»")));
297        assert_eq!(parts.rune_at_index(12), Ok(Rune::new("πŸ‘ŒπŸΌ")));
298        assert_eq!(parts.rune_at_index(20), Ok(Rune::new("πŸ‘ŒπŸ½")));
299        assert_eq!(parts.rune_at_index(28), Ok(Rune::new("πŸ‘ŒπŸΎ")));
300        assert_eq!(parts.rune_at_index(36), Ok(Rune::new("πŸ‘ŒπŸΏ")));
301    }
302}