ib_matcher/matcher/
encoding.rs1use std::{ops::RangeFrom, slice::SliceIndex};
2
3pub trait EncodedStr: Sealed {
9 type CHAR;
10 type SLICE: ?Sized;
11
12 const ELEMENT_LEN_BYTE: usize = core::mem::size_of::<Self::CHAR>();
13 const CHAR: usize = Self::ELEMENT_LEN_BYTE;
14 const UTF8: bool = false;
15
16 fn is_ascii(&self) -> bool;
17 fn as_bytes(&self) -> &[u8];
18
19 unsafe fn get_unchecked<I: SliceIndex<Self::SLICE, Output = Self::SLICE>>(&self, i: I)
20 -> &Self;
21 unsafe fn get_unchecked_from(&self, range: RangeFrom<usize>) -> &Self;
22
23 fn char_index_strs(&self) -> impl Iterator<Item = (usize, char, &Self)>;
24 fn char_len_next_strs(&self) -> impl Iterator<Item = (char, usize, &Self)>;
25 fn chars_count(&self) -> usize {
26 self.char_index_strs().count()
27 }
28}
29
30mod private {
31 pub trait Sealed {}
32}
33use private::Sealed;
34
35impl Sealed for str {}
36#[cfg(feature = "encoding")]
37impl Sealed for widestring::U16Str {}
38#[cfg(feature = "encoding")]
39impl Sealed for widestring::U32Str {}
40
41impl EncodedStr for str {
42 type CHAR = u8;
43 type SLICE = str;
44
45 const UTF8: bool = true;
46
47 fn is_ascii(&self) -> bool {
48 self.is_ascii()
49 }
50
51 fn as_bytes(&self) -> &[u8] {
52 self.as_bytes()
53 }
54
55 unsafe fn get_unchecked<I: SliceIndex<Self::SLICE, Output = Self::SLICE>>(
56 &self,
57 i: I,
58 ) -> &Self {
59 self.get_unchecked(i)
60 }
61
62 unsafe fn get_unchecked_from(&self, range: RangeFrom<usize>) -> &Self {
63 self.get_unchecked(range)
64 }
65
66 fn char_index_strs(&self) -> impl Iterator<Item = (usize, char, &Self)> {
67 self.char_indices().map(|(i, c)| (i, c, &self[i..]))
68 }
69
70 fn char_len_next_strs(&self) -> impl Iterator<Item = (char, usize, &Self)> {
71 self.char_indices().map(|(i, c)| {
72 let len = c.len_utf8();
73 (c, len, &self[i + len..])
74 })
75 }
76
77 fn chars_count(&self) -> usize {
78 self.chars().count()
79 }
80}
81
82#[cfg(feature = "encoding")]
83impl EncodedStr for widestring::U16Str {
84 type CHAR = u16;
85 type SLICE = [u16];
86
87 fn is_ascii(&self) -> bool {
88 self.chars_lossy().all(|c| c.is_ascii())
90 }
91
92 fn as_bytes(&self) -> &[u8] {
93 unsafe {
94 core::slice::from_raw_parts(
95 self.as_ptr() as *const u8,
96 self.len() * core::mem::size_of::<u16>(),
97 )
98 }
99 }
100
101 unsafe fn get_unchecked<I: SliceIndex<Self::SLICE, Output = Self::SLICE>>(
102 &self,
103 i: I,
104 ) -> &Self {
105 self.get_unchecked(i)
106 }
107
108 unsafe fn get_unchecked_from(&self, range: RangeFrom<usize>) -> &Self {
109 self.get_unchecked(range)
110 }
111
112 fn char_index_strs(&self) -> impl Iterator<Item = (usize, char, &Self)> {
113 self.char_indices_lossy().map(|(i, c)| (i, c, &self[i..]))
114 }
115
116 fn char_len_next_strs(&self) -> impl Iterator<Item = (char, usize, &Self)> {
117 self.char_indices_lossy().map(|(i, c)| {
118 let len = c.len_utf16();
119 (c, len, &self[i + len..])
120 })
121 }
122}
123
124#[cfg(feature = "encoding")]
125impl EncodedStr for widestring::U32Str {
126 type CHAR = u32;
127 type SLICE = [u32];
128
129 fn is_ascii(&self) -> bool {
130 self.chars_lossy().all(|c| c.is_ascii())
132 }
133
134 fn as_bytes(&self) -> &[u8] {
135 unsafe {
136 core::slice::from_raw_parts(
137 self.as_ptr() as *const u8,
138 self.len() * core::mem::size_of::<u32>(),
139 )
140 }
141 }
142
143 unsafe fn get_unchecked<I: SliceIndex<Self::SLICE, Output = Self::SLICE>>(
144 &self,
145 i: I,
146 ) -> &Self {
147 self.get_unchecked(i)
148 }
149
150 unsafe fn get_unchecked_from(&self, range: RangeFrom<usize>) -> &Self {
151 self.get_unchecked(range)
152 }
153
154 fn char_index_strs(&self) -> impl Iterator<Item = (usize, char, &Self)> {
155 self.char_indices_lossy().map(|(i, c)| (i, c, &self[i..]))
156 }
157
158 fn char_len_next_strs(&self) -> impl Iterator<Item = (char, usize, &Self)> {
159 self.char_indices_lossy()
160 .map(|(i, c)| (c, 1, &self[i + 1..]))
161 }
162}
163
164#[cfg(test)]
165mod tests {
166 #[allow(unused_imports)]
167 use super::*;
168
169 #[cfg(feature = "encoding")]
170 #[test]
171 fn u16_is_ascii() {
172 use widestring::u16str;
173
174 assert!(u16str!("").is_ascii());
175 assert!(u16str!("abc").is_ascii());
176 assert!(u16str!("协作").is_ascii() == false);
177 }
178
179 #[cfg(feature = "encoding")]
180 #[test]
181 fn u32_is_ascii() {
182 use widestring::u32str;
183
184 assert!(u32str!("").is_ascii());
185 assert!(u32str!("abc").is_ascii());
186 assert!(u32str!("协作").is_ascii() == false);
187 }
188}