1use crate::extension;
32use std::borrow::Cow;
33
34pub trait Range {
36 fn sub_nearest<'a>(&self, obj: &'a str) -> &'a str;
39}
40
41extension! {
42 pub extension StrTools: str {
44 fn sub_nearest(&self, range: impl Range) -> &str;
52
53 fn capitalise(&self) -> Cow<str>;
55
56 fn decapitalise(&self) -> Cow<str>;
59 }
60
61 pub extension BufTools: [u8] {
63 fn capitalise_ascii(&self) -> Cow<[u8]>;
65
66 fn decapitalise_ascii(&self) -> Cow<[u8]>;
69 }
70}
71
72fn utf8_max(buf: &[u8], max: usize) -> &[u8] {
73 if unsafe { buf.get_unchecked(max.unchecked_sub(1)) } & 0x80 == 0x00 {
74 &buf[..max]
75 } else {
76 let start = unsafe { max.unchecked_sub(1) };
77 let mut i = start;
78 unsafe {
79 while buf.get_unchecked(i) & 0xC0 == 0x80 {
80 i = i.unchecked_sub(1);
81 }
82 let n = start.unchecked_sub(i);
83 if (buf.get_unchecked(i) & 0xF0 == 0xF0 && n == 4)
84 || (buf.get_unchecked(i) & 0xE0 == 0xE0 && n == 3)
85 || (buf.get_unchecked(i) & 0xC0 == 0xC0 && n == 2)
86 {
87 &buf[..max]
88 } else {
89 &buf[..i]
90 }
91 }
92 }
93}
94
95fn utf8_min(buf: &[u8], start: usize) -> &[u8] {
96 if unsafe { buf.get_unchecked(start) } & 0x80 == 0x00 {
97 &buf[start..]
98 } else {
99 let start = start;
100 let mut i = start;
101 unsafe {
102 while i < buf.len() && buf.get_unchecked(i) & 0xC0 == 0x80 {
103 i = i.unchecked_add(1);
104 }
105 &buf[i..]
106 }
107 }
108}
109
110impl Range for std::ops::Range<usize> {
111 fn sub_nearest<'a>(&self, obj: &'a str) -> &'a str {
112 let bytes = obj.as_bytes();
113 let bytes = utf8_max(bytes, self.end);
114 if bytes.is_empty() {
115 return "";
116 }
117 let bytes = utf8_min(bytes, self.start);
118 unsafe { std::str::from_utf8(bytes).unwrap_unchecked() }
119 }
120}
121
122impl Range for std::ops::RangeTo<usize> {
123 fn sub_nearest<'a>(&self, obj: &'a str) -> &'a str {
124 let bytes = obj.as_bytes();
125 let bytes = utf8_max(bytes, self.end);
126 unsafe { std::str::from_utf8(bytes).unwrap_unchecked() }
127 }
128}
129
130impl Range for std::ops::RangeFrom<usize> {
131 fn sub_nearest<'a>(&self, obj: &'a str) -> &'a str {
132 let bytes = obj.as_bytes();
133 let bytes = utf8_min(bytes, self.start);
134 unsafe { std::str::from_utf8(bytes).unwrap_unchecked() }
135 }
136}
137
138impl StrTools for str {
139 fn sub_nearest(&self, range: impl Range) -> &str {
140 range.sub_nearest(self)
141 }
142
143 fn capitalise(&self) -> Cow<str> {
144 if self.is_empty() {
145 return self.into();
146 }
147 let first = unsafe { self.chars().next().unwrap_unchecked() };
148 if first.is_uppercase() {
149 self.into()
150 } else {
151 (self.sub_nearest(..1).to_uppercase() + self.sub_nearest(1..)).into()
152 }
153 }
154
155 fn decapitalise(&self) -> Cow<str> {
156 if self.is_empty() {
157 return self.into();
158 }
159 let first = unsafe { self.chars().next().unwrap_unchecked() };
160 if first.is_uppercase() {
161 (self.sub_nearest(..1).to_lowercase() + self.sub_nearest(1..)).into()
162 } else {
163 self.into()
164 }
165 }
166}
167
168impl BufTools for [u8] {
169 fn capitalise_ascii(&self) -> Cow<[u8]> {
170 if self.is_empty() {
171 return self.into();
172 }
173 if self[0] >= b'A' && self[0] <= b'Z' {
174 self.into()
175 } else {
176 let mut v: Vec<u8> = self.into();
177 v[0] = v[0].to_ascii_uppercase();
178 v.into()
179 }
180 }
181
182 fn decapitalise_ascii(&self) -> Cow<[u8]> {
183 if self.is_empty() {
184 return self.into();
185 }
186 if self[0] >= b'A' && self[0] <= b'Z' {
187 let mut v: Vec<u8> = self.into();
188 v[0] = v[0].to_ascii_lowercase();
189 v.into()
190 } else {
191 self.into()
192 }
193 }
194}
195
196#[cfg(test)]
197mod tests {
198 use crate::string::{BufTools, StrTools};
199 use std::borrow::Cow;
200
201 #[test]
202 fn sub_basic() {
203 let str = "Hello";
204 assert_eq!(str.sub_nearest(..1), "H");
205 assert_eq!(str.sub_nearest(1..), "ello");
206 }
207
208 #[test]
209 fn truncate_ascii() {
210 let s = "this is a test";
211 assert_eq!(s.sub_nearest(..4), "this");
212 assert_eq!(&s[4..7], " is");
213 assert_eq!(s.sub_nearest(4..7), " is");
214 }
215
216 #[test]
217 fn truncate_utf8() {
218 let msg = "我";
219 assert_eq!(msg.sub_nearest(..3), "我");
220 assert_eq!(msg.sub_nearest(..1), "");
221 assert_eq!(msg.sub_nearest(1..), "");
222 }
223
224 #[test]
225 fn truncate_utf82() {
226 let msg = "我是";
227 assert_eq!(msg.sub_nearest(..6), "我是");
228 assert_eq!(msg.sub_nearest(..5), "我");
229 assert_eq!(msg.sub_nearest(1..), "是");
230 }
231
232 #[test]
233 fn truncate_utf83() {
234 let msg = "我abcd";
235 assert_eq!(msg.sub_nearest(..6), "我abc");
236 assert_eq!(msg.sub_nearest(1..), "abcd");
237 assert_eq!(msg.sub_nearest(1..2), "");
238 assert_eq!(msg.sub_nearest(1..4), "a");
239 assert_eq!(msg.sub_nearest(1..5), "ab");
240 assert_eq!(msg.sub_nearest(1..msg.len()), "abcd");
241 assert_eq!(msg.sub_nearest(1..msg.len() - 1), "abc");
242 }
243
244 #[test]
245 fn basic_capitalize() {
246 let msg = "abc";
247 let msg1 = "Abc";
248 assert_eq!(msg.capitalise(), "Abc");
249 assert_eq!(msg1.capitalise(), "Abc");
250 assert!(matches!(msg1.capitalise(), Cow::Borrowed(_)));
251 assert_eq!(msg1.decapitalise(), "abc");
252 }
253
254 #[test]
255 fn ascii_capitalize() {
256 let msg = "abc";
257 let msg1 = "Abc";
258 assert_eq!(&*msg.as_bytes().capitalise_ascii(), b"Abc");
259 assert_eq!(&*msg1.as_bytes().capitalise_ascii(), b"Abc");
260 assert!(matches!(
261 msg1.as_bytes().capitalise_ascii(),
262 Cow::Borrowed(_)
263 ));
264 assert_eq!(&*msg1.as_bytes().decapitalise_ascii(), b"abc");
265 }
266}