str_utils/
remove_all_invisible_characters.rs1use alloc::{borrow::Cow, str::from_utf8_unchecked, string::String};
2
3use crate::to_substring_in_place;
4
5pub trait RemoveInvisibleCharacters<'a> {
7 fn remove_all_invisible_characters(self) -> Cow<'a, str>;
26}
27
28impl<'a> RemoveInvisibleCharacters<'a> for &'a str {
29 fn remove_all_invisible_characters(self) -> Cow<'a, str> {
30 let s = self;
31 let bytes = s.as_bytes();
32
33 let length = bytes.len();
34
35 let mut p = 0;
36
37 let check_character_whether_to_remove = |p: usize, e: u8, width: usize| -> bool {
38 match width {
39 1 => {
40 match e {
41 0..=8 | 11..=13 | 14..=31 | 127 => return true,
43 _ => (),
44 }
45 },
46 3 => match e {
47 0xE2 => match bytes[p + 1] {
48 0x80 => match bytes[p + 2] {
50 0x8B..=0x8F | 0xAA..=0xAE => return true,
51 _ => (),
52 },
53 0x81 => match bytes[p + 2] {
55 0xA0 | 0xA2..=0xA4 | 0xA6..=0xA9 => return true,
56 _ => (),
57 },
58 _ => (),
59 },
60 0xEF if bytes[p + 1] == 0xBB && bytes[p + 2] == 0xBF => {
62 return true;
63 },
64 _ => (),
65 },
66 _ => (),
67 }
68
69 false
70 };
71
72 let width = loop {
73 if p == length {
74 return Cow::Borrowed(s);
75 }
76
77 let e = bytes[p];
78
79 let width = unsafe { utf8_width::get_width_assume_valid(e) };
80
81 if check_character_whether_to_remove(p, e, width) {
82 break width;
83 } else {
84 p += width;
85 }
86 };
87
88 let heading_normal_characters_end_index = p;
89
90 p += width;
91
92 let width = loop {
100 if p == length {
101 return Cow::Borrowed(unsafe {
104 from_utf8_unchecked(&bytes[..heading_normal_characters_end_index])
105 });
106 }
107
108 let e = bytes[p];
109
110 let width = unsafe { utf8_width::get_width_assume_valid(e) };
111
112 if check_character_whether_to_remove(p, e, width) {
113 p += width;
114 } else {
115 break width;
116 }
117 };
118
119 let following_invisible_characters_end_index = p;
120
121 p += width;
122
123 let width = loop {
125 if p == length {
126 return Cow::Borrowed(unsafe {
129 from_utf8_unchecked(&bytes[following_invisible_characters_end_index..])
130 });
131 }
132
133 let e = bytes[p];
134
135 let width = unsafe { utf8_width::get_width_assume_valid(e) };
136
137 if check_character_whether_to_remove(p, e, width) {
138 break width;
139 } else {
140 p += width;
141 }
142 };
143
144 let following_normal_characters_end_index = p;
145
146 p += width;
147
148 let width = loop {
150 if p == length {
151 return Cow::Borrowed(unsafe {
154 from_utf8_unchecked(
155 &bytes[following_invisible_characters_end_index
156 ..following_normal_characters_end_index],
157 )
158 });
159 }
160
161 let e = bytes[p];
162
163 let width = unsafe { utf8_width::get_width_assume_valid(e) };
164
165 if check_character_whether_to_remove(p, e, width) {
166 p += width;
167 } else {
168 break width;
169 }
170 };
171
172 let mut new_v = bytes
173 [following_invisible_characters_end_index..following_normal_characters_end_index]
174 .to_vec();
175
176 let mut start = p;
177
178 p += width;
179
180 loop {
181 if p == length {
182 break;
183 }
184
185 let e = bytes[p];
186
187 let width = unsafe { utf8_width::get_width_assume_valid(e) };
188
189 if check_character_whether_to_remove(p, e, width) {
190 new_v.extend_from_slice(&bytes[start..p]);
191
192 start = p + width;
193 }
194
195 p += width;
196 }
197
198 new_v.extend_from_slice(&bytes[start..p]);
199
200 Cow::Owned(unsafe { String::from_utf8_unchecked(new_v) })
201 }
202}
203
204impl<'a> RemoveInvisibleCharacters<'a> for Cow<'a, str> {
205 #[inline]
206 fn remove_all_invisible_characters(self) -> Cow<'a, str> {
207 match self {
208 Cow::Borrowed(s) => s.remove_all_invisible_characters(),
209 Cow::Owned(s) => match s.remove_all_invisible_characters() {
210 Cow::Borrowed(ss) => Cow::Owned(unsafe { to_substring_in_place!(s, ss) }),
211 Cow::Owned(s) => Cow::Owned(s),
212 },
213 }
214 }
215}