1use crate::extension;
32use std::borrow::Cow;
33
34pub trait Range {
36 fn sub_nearest<'a>(&self, obj: &'a str) -> &'a str;
39}
40
41extension! {
42 pub extension StrTools: str {
44 fn sub_nearest(&self, range: impl Range) -> &str;
52
53 fn capitalise(&self) -> Cow<str>;
55
56 fn decapitalise(&self) -> Cow<str>;
59 }
60
61 pub extension BufTools: [u8] {
63 fn capitalise_ascii(&self) -> Cow<[u8]>;
65
66 fn decapitalise_ascii(&self) -> Cow<[u8]>;
69 }
70}
71
72fn utf8_max(buf: &[u8], max: usize) -> &[u8] {
73 if unsafe { buf.get_unchecked(max.unchecked_sub(1)) } & 0x80 == 0x00 {
74 &buf[..max]
75 } else {
76 let start = unsafe { max.unchecked_sub(1) };
77 let mut i = start;
78 unsafe {
79 while buf.get_unchecked(i) & 0xC0 == 0x80 {
80 i = i.unchecked_sub(1);
81 }
82 let n = start.unchecked_sub(i);
83 if (buf.get_unchecked(i) & 0xF0 == 0xF0 && n == 4)
84 || (buf.get_unchecked(i) & 0xE0 == 0xE0 && n == 3)
85 || (buf.get_unchecked(i) & 0xC0 == 0xC0 && n == 2)
86 {
87 &buf[..max]
88 } else {
89 &buf[..i]
90 }
91 }
92 }
93}
94
95fn utf8_min(buf: &[u8], start: usize) -> &[u8] {
96 if unsafe { buf.get_unchecked(start) } & 0x80 == 0x00 {
97 &buf[start..]
98 } else {
99 let mut i = start;
100 unsafe {
101 while i < buf.len() && buf.get_unchecked(i) & 0xC0 == 0x80 {
102 i = i.unchecked_add(1);
103 }
104 &buf[i..]
105 }
106 }
107}
108
109impl Range for std::ops::Range<usize> {
110 fn sub_nearest<'a>(&self, obj: &'a str) -> &'a str {
111 let bytes = obj.as_bytes();
112 let bytes = utf8_max(bytes, self.end);
113 if bytes.is_empty() {
114 return "";
115 }
116 let bytes = utf8_min(bytes, self.start);
117 unsafe { std::str::from_utf8(bytes).unwrap_unchecked() }
118 }
119}
120
121impl Range for std::ops::RangeTo<usize> {
122 fn sub_nearest<'a>(&self, obj: &'a str) -> &'a str {
123 let bytes = obj.as_bytes();
124 let bytes = utf8_max(bytes, self.end);
125 unsafe { std::str::from_utf8(bytes).unwrap_unchecked() }
126 }
127}
128
129impl Range for std::ops::RangeFrom<usize> {
130 fn sub_nearest<'a>(&self, obj: &'a str) -> &'a str {
131 let bytes = obj.as_bytes();
132 let bytes = utf8_min(bytes, self.start);
133 unsafe { std::str::from_utf8(bytes).unwrap_unchecked() }
134 }
135}
136
137impl StrTools for str {
138 fn sub_nearest(&self, range: impl Range) -> &str {
139 range.sub_nearest(self)
140 }
141
142 fn capitalise(&self) -> Cow<str> {
143 if self.is_empty() {
144 return self.into();
145 }
146 let first = unsafe { self.chars().next().unwrap_unchecked() };
147 if first.is_uppercase() {
148 self.into()
149 } else {
150 (self.sub_nearest(..1).to_uppercase() + self.sub_nearest(1..)).into()
151 }
152 }
153
154 fn decapitalise(&self) -> Cow<str> {
155 if self.is_empty() {
156 return self.into();
157 }
158 let first = unsafe { self.chars().next().unwrap_unchecked() };
159 if first.is_uppercase() {
160 (self.sub_nearest(..1).to_lowercase() + self.sub_nearest(1..)).into()
161 } else {
162 self.into()
163 }
164 }
165}
166
167impl BufTools for [u8] {
168 fn capitalise_ascii(&self) -> Cow<[u8]> {
169 if self.is_empty() {
170 return self.into();
171 }
172 if self[0] >= b'A' && self[0] <= b'Z' {
173 self.into()
174 } else {
175 let mut v: Vec<u8> = self.into();
176 v[0] = v[0].to_ascii_uppercase();
177 v.into()
178 }
179 }
180
181 fn decapitalise_ascii(&self) -> Cow<[u8]> {
182 if self.is_empty() {
183 return self.into();
184 }
185 if self[0] >= b'A' && self[0] <= b'Z' {
186 let mut v: Vec<u8> = self.into();
187 v[0] = v[0].to_ascii_lowercase();
188 v.into()
189 } else {
190 self.into()
191 }
192 }
193}
194
195#[cfg(test)]
196mod tests {
197 use crate::string::{BufTools, StrTools};
198 use std::borrow::Cow;
199
200 #[test]
201 fn sub_basic() {
202 let str = "Hello";
203 assert_eq!(str.sub_nearest(..1), "H");
204 assert_eq!(str.sub_nearest(1..), "ello");
205 }
206
207 #[test]
208 fn truncate_ascii() {
209 let s = "this is a test";
210 assert_eq!(s.sub_nearest(..4), "this");
211 assert_eq!(&s[4..7], " is");
212 assert_eq!(s.sub_nearest(4..7), " is");
213 }
214
215 #[test]
216 fn truncate_utf8() {
217 let msg = "我";
218 assert_eq!(msg.sub_nearest(..3), "我");
219 assert_eq!(msg.sub_nearest(..1), "");
220 assert_eq!(msg.sub_nearest(1..), "");
221 }
222
223 #[test]
224 fn truncate_utf82() {
225 let msg = "我是";
226 assert_eq!(msg.sub_nearest(..6), "我是");
227 assert_eq!(msg.sub_nearest(..5), "我");
228 assert_eq!(msg.sub_nearest(1..), "是");
229 }
230
231 #[test]
232 fn truncate_utf83() {
233 let msg = "我abcd";
234 assert_eq!(msg.sub_nearest(..6), "我abc");
235 assert_eq!(msg.sub_nearest(1..), "abcd");
236 assert_eq!(msg.sub_nearest(1..2), "");
237 assert_eq!(msg.sub_nearest(1..4), "a");
238 assert_eq!(msg.sub_nearest(1..5), "ab");
239 assert_eq!(msg.sub_nearest(1..msg.len()), "abcd");
240 assert_eq!(msg.sub_nearest(1..msg.len() - 1), "abc");
241 }
242
243 #[test]
244 fn basic_capitalize() {
245 let msg = "abc";
246 let msg1 = "Abc";
247 assert_eq!(msg.capitalise(), "Abc");
248 assert_eq!(msg1.capitalise(), "Abc");
249 assert!(matches!(msg1.capitalise(), Cow::Borrowed(_)));
250 assert_eq!(msg1.decapitalise(), "abc");
251 }
252
253 #[test]
254 fn ascii_capitalize() {
255 let msg = "abc";
256 let msg1 = "Abc";
257 assert_eq!(&*msg.as_bytes().capitalise_ascii(), b"Abc");
258 assert_eq!(&*msg1.as_bytes().capitalise_ascii(), b"Abc");
259 assert!(matches!(
260 msg1.as_bytes().capitalise_ascii(),
261 Cow::Borrowed(_)
262 ));
263 assert_eq!(&*msg1.as_bytes().decapitalise_ascii(), b"abc");
264 }
265}