gluon_parser/
str_suffix.rs

1#![allow(unused)]
2
3use std::{mem, str};
4
5/// Str-like type where the first 0-2 bytes may point into a UTF-8 characters but all bytes
6/// following those are guaranteed to represent a valid UTF-8 string (`str`). Relying on this
7/// property we can iterate over the `StrSuffix` byte-by-byte as we would on a `[u8]` without
8/// needing an expensive validation when going back to a `str` as checking any part of a
9/// `StrSuffix` for UTF-8-ness only requires a char boundary check (same as a slicing a `str`).
10#[repr(transparent)]
11pub struct StrSuffix([u8]);
12
13impl StrSuffix {
14    pub fn new(s: &str) -> &Self {
15        unsafe { mem::transmute(s.as_bytes()) }
16    }
17
18    pub fn len(&self) -> usize {
19        self.0.len()
20    }
21
22    pub fn is_empty(&self) -> bool {
23        self.0.is_empty()
24    }
25
26    pub fn first(&self) -> Option<u8> {
27        self.0.first().cloned()
28    }
29
30    pub fn split_first(&self) -> Option<(u8, &Self)> {
31        if self.is_empty() {
32            None
33        } else {
34            Some((self.0[0], self.suffix(1)))
35        }
36    }
37
38    pub fn try_as_str(&self) -> Option<&str> {
39        self.get(0)
40    }
41
42    fn get(&self, index: usize) -> Option<&str> {
43        if self.is_char_boundary(index) {
44            Some(unsafe { str::from_utf8_unchecked(&self.0) })
45        } else {
46            None
47        }
48    }
49
50    #[inline(always)]
51    fn is_char_boundary_byte(b: u8) -> bool {
52        // This is bit magic equivalent to: b < 128 || b >= 192
53        (b as i8) >= -0x40
54    }
55
56    fn is_char_boundary(&self, index: usize) -> bool {
57        // From std::str::is_char_boundary
58        if index == 0 || index == self.len() {
59            return true;
60        }
61        match self.as_bytes().get(index) {
62            None => false,
63            Some(&b) => Self::is_char_boundary_byte(b),
64        }
65    }
66
67    fn bytes_prefix(&self) -> &[u8] {
68        for i in 0..(self.len().min(3)) {
69            if Self::is_char_boundary_byte(self.0[i]) {
70                return &self.0[..i];
71            }
72        }
73        &self.0[..0]
74    }
75
76    pub fn restore_char(&self, prefix: &[u8]) -> char {
77        assert!(prefix.len() <= 4);
78        let mut buf = [0; 4];
79        buf[..prefix.len()].copy_from_slice(prefix);
80        let suffix = self.bytes_prefix();
81        buf[prefix.len()..(prefix.len() + suffix.len())].copy_from_slice(suffix);
82        str::from_utf8(&buf)
83            .expect("UTF-8 string")
84            .chars()
85            .next()
86            .expect("char")
87    }
88
89    fn suffix(&self, index: usize) -> &Self {
90        // Any suffix of a StrSuffix is a valid StrSuffix
91        unsafe { mem::transmute(&self.0[index..]) }
92    }
93
94    pub fn as_bytes(&self) -> &[u8] {
95        &self.0
96    }
97
98    pub fn iter(&self) -> Iter {
99        Iter(self)
100    }
101}
102
103pub struct Iter<'a>(&'a StrSuffix);
104
105impl<'a> Iterator for Iter<'a> {
106    type Item = u8;
107
108    fn next(&mut self) -> Option<u8> {
109        if let Some((b, rest)) = self.0.split_first() {
110            self.0 = rest;
111            Some(b)
112        } else {
113            None
114        }
115    }
116}
117
118impl<'a> Iter<'a> {
119    pub fn as_str_suffix(&self) -> &'a StrSuffix {
120        self.0
121    }
122}