utf8_stream/
lib.rs

1use std::alloc::Layout;
2use std::fmt::{Debug, Display, Formatter};
3use std::iter::{Extend, FromIterator, IntoIterator, Iterator};
4use std::marker::PhantomData;
5use std::ops::Deref;
6
7/// Utf8Stream
8///
9/// ## Example
10///
11/// ```
12/// use utf8_stream::Utf8Stream;
13/// let stream = Utf8Stream::new("fire👩🏽‍🚒fighter");
14/// assert_eq!(stream.as_str(), "fire👩🏽‍🚒fighter");
15/// /// ```
16#[derive(Clone, Debug, PartialEq, Eq, PartialOrd, Ord)]
17pub struct Utf8Stream<'a> {
18    ptr: *mut u8,
19    index: usize,
20    length: usize,
21    _marker: PhantomData<&'a u8>,
22}
23
24impl<'a> Default for Utf8Stream<'a> {
25    fn default() -> Utf8Stream<'a> {
26        Utf8Stream {
27            ptr: to_slice_ptr_from_display(""),
28            index: 0,
29            length: 0,
30            _marker: PhantomData,
31        }
32    }
33}
34impl<'a> Drop for Utf8Stream<'a> {
35    fn drop(&mut self) {
36        if !self.ptr.is_null() {
37            let layout = Layout::array::<u8>(self.length).unwrap();
38            unsafe {
39                std::alloc::dealloc(self.ptr, layout);
40            }
41        }
42    }
43}
44impl<'a> Utf8Stream<'a> {
45    /// Creates a new [Utf8Stream](Self) from any implementor of [`Display`](std::fmt::Display)
46    ///
47    /// ```
48    /// use utf8_stream::Utf8Stream;
49    /// let stream = Utf8Stream::new("red❤️heart");
50    /// assert_eq!(stream.as_str(), "red❤️heart");
51    /// ```
52    pub fn new<T: Display>(input: T) -> Utf8Stream<'a> {
53        let input = input.to_string();
54        let ptr = to_slice_ptr_from_display(&input);
55        let length = input.len();
56        Utf8Stream {
57            index: 0,
58            ptr,
59            length,
60            _marker: PhantomData,
61        }
62    }
63
64    /// Pushes more string-like data into an [Utf8Stream](Self)
65    ///
66    /// ```
67    /// use utf8_stream::Utf8Stream;
68    /// let mut stream = Utf8Stream::new("red❤️");
69    ///
70    /// stream.push("heart");
71    ///
72    /// assert_eq!(stream.as_str(), "red❤️heart");
73    /// ```
74    pub fn push<T: Display>(&mut self, input: T) {
75        let new_chars = input.to_string().as_bytes().to_vec();
76        let new_chars_length = new_chars.len();
77        let old_length = self.length;
78        let new_length = old_length + new_chars_length;
79        grow_ptr(self.ptr, new_length);
80        self.length = new_length;
81        for (a, c) in (old_length..new_length).zip(new_chars.iter()) {
82            unsafe {
83                self.ptr.add(a).write(*c);
84            }
85        }
86    }
87
88    pub fn contains<T: Display>(&mut self, input: T) -> bool {
89        self.as_str().contains(&input.to_string())
90    }
91
92    pub fn clear(&mut self) {
93        shrink_ptr(self.ptr, 1);
94        self.length = 0;
95        self.index = 0;
96    }
97
98    pub fn rewind(&mut self) {
99        self.index = 0;
100    }
101
102    pub fn len(&self) -> usize {
103        self.length
104    }
105    fn esoteric_utf8_offset(&mut self, index: usize) -> Option<&'a str> {
106        let mut max = self.length - index;
107        #[allow(unused_assignments)]
108        let mut offset_byte = 0;
109        let mut delta = 0;
110        for offset in 0..max {
111            if index + offset >= self.length {
112                continue;
113            }
114            let offset = max - if offset == 0 { 0 } else { offset % max };
115            offset_byte = unsafe { self.ptr.add(index + offset - 1).read() };
116            if offset_byte < 127 {
117                max = offset;
118                continue;
119            }
120            delta += 1;
121            let bytes = unsafe { std::slice::from_raw_parts(self.ptr.add(index), offset) };
122            match std::str::from_utf8(bytes) {
123                Ok(c) => {
124                    if offset < max && is_not_ascii_byte(offset_byte) {
125                        self.index = index + max - delta + 1;
126                        let max_delta = delta - 1;
127                        return Some(unsafe {
128                            std::str::from_utf8_unchecked(std::slice::from_raw_parts(
129                                self.ptr.add(index),
130                                max - max_delta,
131                            ))
132                        });
133                    } else {
134                    }
135                    self.index = index + offset;
136                    return Some(c);
137                }
138                Err(_e) => {}
139            }
140        }
141        None
142    }
143    pub fn as_str(&self) -> &str {
144        let mut offset = self.length;
145        loop {
146            if let Ok(slice) =
147                std::str::from_utf8(unsafe { std::slice::from_raw_parts(self.ptr, offset) })
148            {
149                break slice;
150            }
151            if offset > 0 {
152                offset -= 1;
153            } else {
154                break "";
155            }
156        }
157    }
158}
159impl<'a> Iterator for Utf8Stream<'a> {
160    type Item = &'a str;
161
162    fn next(&mut self) -> Option<Self::Item> {
163        if self.index < self.length {
164            let index = self.index;
165            self.index += 1;
166            let byte = unsafe { self.ptr.add(index).read() };
167            if is_not_ascii_byte(byte) {
168                return self.esoteric_utf8_offset(index);
169            }
170            let bytes = unsafe { std::mem::transmute::<&[u8], &'a [u8]>(&[byte]) };
171            if let Ok(c) = std::str::from_utf8(bytes) {
172                Some(c)
173            } else {
174                None
175            }
176        } else {
177            None
178        }
179    }
180}
181
182fn to_slice_ptr_from_display<T: Display>(input: T) -> *mut u8 {
183    let bytes = input.to_string().as_bytes().to_vec();
184
185    let ptr = new_ptr(bytes.len());
186    let length = bytes.len();
187    if length == 0 {
188        return ptr;
189    }
190    for (i, c) in bytes.iter().enumerate() {
191        unsafe {
192            ptr.add(i).write(*c);
193        }
194    }
195    ptr
196}
197
198impl<'a> Extend<char> for Utf8Stream<'a> {
199    fn extend<T: IntoIterator<Item = char>>(&mut self, iter: T) {
200        for string in iter {
201            self.push(string);
202        }
203    }
204}
205impl<'a> Extend<String> for Utf8Stream<'a> {
206    fn extend<T: IntoIterator<Item = String>>(&mut self, iter: T) {
207        for string in iter {
208            self.push(string);
209        }
210    }
211}
212impl<'a> Extend<&'a str> for Utf8Stream<'a> {
213    fn extend<T: IntoIterator<Item = &'a str>>(&mut self, iter: T) {
214        for string in iter {
215            self.push(string);
216        }
217    }
218}
219impl<'a> Extend<u8> for Utf8Stream<'a> {
220    fn extend<T: IntoIterator<Item = u8>>(&mut self, iter: T) {
221        for u in iter {
222            self.push(char::from(u));
223        }
224    }
225}
226
227impl<'a> FromIterator<String> for Utf8Stream<'a> {
228    fn from_iter<I: IntoIterator<Item = String>>(iter: I) -> Utf8Stream<'a> {
229        let mut buf = Utf8Stream::default();
230        buf.extend(iter);
231        buf
232    }
233}
234impl<'a> FromIterator<u8> for Utf8Stream<'a> {
235    fn from_iter<I: IntoIterator<Item = u8>>(iter: I) -> Utf8Stream<'a> {
236        let mut buf = Utf8Stream::default();
237        buf.extend(iter);
238        buf
239    }
240}
241
242impl<'a> FromIterator<char> for Utf8Stream<'a> {
243    fn from_iter<I: IntoIterator<Item = char>>(iter: I) -> Utf8Stream<'a> {
244        let mut buf = Utf8Stream::default();
245        buf.extend(iter);
246        buf
247    }
248}
249
250impl<'a> FromIterator<&'a str> for Utf8Stream<'a> {
251    fn from_iter<I: IntoIterator<Item = &'a str>>(iter: I) -> Utf8Stream<'a> {
252        let mut buf = Utf8Stream::default();
253        buf.extend(iter);
254        buf
255    }
256}
257
258impl<'a> From<&str> for Utf8Stream<'a> {
259    fn from(s: &str) -> Utf8Stream<'a> {
260        Utf8Stream::new(s)
261    }
262}
263
264impl<'a> From<String> for Utf8Stream<'a> {
265    fn from(s: String) -> Utf8Stream<'a> {
266        Utf8Stream::new(s)
267    }
268}
269
270impl<'a> From<&String> for Utf8Stream<'a> {
271    fn from(s: &String) -> Utf8Stream<'a> {
272        Utf8Stream::new(s)
273    }
274}
275
276impl<'a> Display for Utf8Stream<'a> {
277    fn fmt(&self, f: &mut Formatter) -> std::fmt::Result {
278        write!(f, "{}", self.as_str())
279    }
280}
281
282impl<'a> Deref for Utf8Stream<'a> {
283    type Target = str;
284
285    fn deref(&self) -> &str {
286        self.as_str()
287    }
288}
289
290fn new_ptr(size: usize) -> *mut u8 {
291    let layout = Layout::array::<u8>(if size == 0 { 1 } else { size }).unwrap();
292    let ptr = unsafe {
293        let ptr = std::alloc::alloc_zeroed(layout);
294        if ptr.is_null() {
295            std::alloc::handle_alloc_error(layout);
296        }
297        ptr
298    };
299    let ptr = ptr;
300    for a in 0..size {
301        unsafe {
302            ptr.add(a).write(0);
303        }
304    }
305    ptr
306}
307fn grow_ptr(ptr: *mut u8, new_size: usize) -> *mut u8 {
308    let new_size = if new_size == 0 { 1 } else { new_size };
309    let layout = Layout::array::<u8>(new_size).unwrap();
310    let new_ptr = unsafe {
311        let new_ptr = std::alloc::realloc(ptr, layout, new_size);
312        if new_ptr.is_null() {
313            std::alloc::handle_alloc_error(layout);
314        }
315        new_ptr
316    };
317    new_ptr
318}
319
320fn shrink_ptr(ptr: *mut u8, new_size: usize) -> *mut u8 {
321    let new_size = if new_size == 0 { 1 } else { new_size };
322    let layout = Layout::array::<u8>(new_size).unwrap();
323    let new_ptr = unsafe {
324        let new_ptr = std::alloc::realloc(ptr, layout, new_size);
325        if new_ptr.is_null() {
326            std::alloc::handle_alloc_error(layout);
327        }
328        new_ptr
329    };
330    new_ptr
331}
332
333fn is_not_ascii_byte(byte: u8) -> bool {
334    !is_ascii_printable_byte(byte) || byte > 127
335}
336
337fn is_ascii_printable_byte(byte: u8) -> bool {
338    match byte {
339        9..13 | 32..126 => true,
340        _ => false,
341    }
342}