1use crate::slice::advance;
2use crate::slice::subslice;
3use crate::utf8::CharEncodeUtf8;
4
5use core::str;
6
7struct SplitImpl<'input, 'pat> {
8 input: &'input str,
9 pattern: &'pat str,
10 inclusive: bool,
11}
12
13impl<'input> SplitImpl<'input, '_> {
14 const fn output_len(&self) -> usize {
15 let mut input = self.input;
16 let pat = self.pattern;
17
18 if pat.is_empty() {
19 crate::utf8::str_count_chars(input) + 2
20 } else {
21 let mut ans = 0;
22 while let Some((_, remain)) = crate::str::next_match(input, pat) {
23 ans += 1;
24 input = remain
25 }
26 if self.inclusive {
27 if !input.is_empty() {
28 ans += 1;
29 }
30 } else {
31 ans += 1;
32 }
33 ans
34 }
35 }
36
37 #[allow(unsafe_code)]
38 const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
39 let mut input = self.input;
40 let pat = self.pattern;
41
42 let mut buf: [&str; N] = [""; N];
43 let mut pos = 0;
44
45 if pat.is_empty() {
46 let mut input = input.as_bytes();
47
48 {
49 buf[pos] = unsafe { str::from_utf8_unchecked(subslice(input, 0..0)) };
50 pos += 1;
51 }
52
53 while let Some((_, count)) = crate::utf8::next_char(input) {
54 buf[pos] = unsafe { str::from_utf8_unchecked(subslice(input, 0..count)) };
55 pos += 1;
56 input = advance(input, count);
57 }
58
59 {
60 buf[pos] = unsafe { str::from_utf8_unchecked(subslice(input, 0..0)) };
61 pos += 1;
62 }
63 } else {
64 while let Some((m, remain)) = crate::str::next_match(input, pat) {
65 let substr = if self.inclusive {
66 subslice(input.as_bytes(), 0..m + pat.len())
67 } else {
68 subslice(input.as_bytes(), 0..m)
69 };
70 buf[pos] = unsafe { str::from_utf8_unchecked(substr) };
71 pos += 1;
72 input = remain;
73 }
74 if self.inclusive {
75 if !input.is_empty() {
76 buf[pos] = input;
77 pos += 1;
78 }
79 } else {
80 buf[pos] = input;
81 pos += 1;
82 }
83 }
84 assert!(pos == N);
85 buf
86 }
87}
88
89pub struct Split<T, P>(pub T, pub P);
90
91impl<'input, 'pat> Split<&'input str, &'pat str> {
92 const fn to_impl(&self) -> SplitImpl<'input, 'pat> {
93 SplitImpl {
94 input: self.0,
95 pattern: self.1,
96 inclusive: false,
97 }
98 }
99
100 pub const fn output_len(&self) -> usize {
101 self.to_impl().output_len()
102 }
103
104 pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
105 self.to_impl().const_eval()
106 }
107}
108
109impl<'input> Split<&'input str, char> {
110 const fn to_impl<'pat>(&self, ch: &'pat CharEncodeUtf8) -> SplitImpl<'input, 'pat> {
111 SplitImpl {
112 input: self.0,
113 pattern: ch.as_str(),
114 inclusive: false,
115 }
116 }
117
118 pub const fn output_len(&self) -> usize {
119 let ch = CharEncodeUtf8::new(self.1);
120 self.to_impl(&ch).output_len()
121 }
122
123 pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
124 let ch = CharEncodeUtf8::new(self.1);
125 self.to_impl(&ch).const_eval()
126 }
127}
128
129#[macro_export]
153macro_rules! split {
154 ($s: expr, $pat: expr) => {{
155 const INPUT: &str = $s;
156 const OUTPUT_LEN: usize = $crate::__ctfe::Split(INPUT, $pat).output_len();
157 const OUTPUT_BUF: [&str; OUTPUT_LEN] = $crate::__ctfe::Split(INPUT, $pat).const_eval();
158 OUTPUT_BUF
159 }};
160}
161
162pub struct SplitInclusive<T, P>(pub T, pub P);
163
164impl<'input, 'pat> SplitInclusive<&'input str, &'pat str> {
165 const fn to_impl(&self) -> SplitImpl<'input, 'pat> {
166 SplitImpl {
167 input: self.0,
168 pattern: self.1,
169 inclusive: true,
170 }
171 }
172
173 pub const fn output_len(&self) -> usize {
174 self.to_impl().output_len()
175 }
176
177 pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
178 self.to_impl().const_eval()
179 }
180}
181
182impl<'input> SplitInclusive<&'input str, char> {
183 const fn to_impl<'pat>(&self, ch: &'pat CharEncodeUtf8) -> SplitImpl<'input, 'pat> {
184 SplitImpl {
185 input: self.0,
186 pattern: ch.as_str(),
187 inclusive: true,
188 }
189 }
190
191 pub const fn output_len(&self) -> usize {
192 let ch = CharEncodeUtf8::new(self.1);
193 self.to_impl(&ch).output_len()
194 }
195
196 pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
197 let ch = CharEncodeUtf8::new(self.1);
198 self.to_impl(&ch).const_eval()
199 }
200}
201
202#[macro_export]
232macro_rules! split_inclusive {
233 ($s: expr, $pat: expr) => {{
234 const INPUT: &str = $s;
235 const OUTPUT_LEN: usize = $crate::__ctfe::SplitInclusive(INPUT, $pat).output_len();
236 const OUTPUT_BUF: [&str; OUTPUT_LEN] =
237 $crate::__ctfe::SplitInclusive(INPUT, $pat).const_eval();
238 OUTPUT_BUF
239 }};
240}
241
242pub struct SplitAsciiWhitespace<T>(pub T);
243
244impl SplitAsciiWhitespace<&'_ str> {
245 pub const fn output_len(&self) -> usize {
246 let bytes = self.0.as_bytes();
247 let mut count = 0;
248 let mut i = 0;
249 let mut in_word = false;
250
251 while i < bytes.len() {
252 if bytes[i].is_ascii_whitespace() {
253 if in_word {
254 count += 1;
255 in_word = false;
256 }
257 } else {
258 in_word = true;
259 }
260 i += 1;
261 }
262
263 if in_word {
264 count += 1;
265 }
266
267 count
268 }
269
270 #[allow(unsafe_code)]
271 pub const fn const_eval<const N: usize>(&self) -> [&'_ str; N] {
272 let bytes = self.0.as_bytes();
273 let mut buf: [&str; N] = [""; N];
274 let mut pos = 0;
275 let mut i = 0;
276
277 while i < bytes.len() {
278 while i < bytes.len() && bytes[i].is_ascii_whitespace() {
280 i += 1;
281 }
282
283 if i >= bytes.len() {
284 break;
285 }
286
287 let start = i;
289
290 while i < bytes.len() && !bytes[i].is_ascii_whitespace() {
292 i += 1;
293 }
294
295 let word_bytes = subslice(bytes, start..i);
297 buf[pos] = unsafe { core::str::from_utf8_unchecked(word_bytes) };
298 pos += 1;
299 }
300
301 assert!(pos == N);
302 buf
303 }
304}
305
306pub const fn map_lines<const N: usize>(mut lines: [&str; N]) -> [&str; N] {
307 let mut i = 0;
308 while i < N {
309 let s = lines[i];
310 match crate::str::strip_suffix(s, "\r\n") {
311 Some(s) => lines[i] = s,
312 None => match crate::str::strip_suffix(s, "\n") {
313 Some(s) => lines[i] = s,
314 None => lines[i] = s,
315 },
316 }
317 i += 1;
318 }
319 lines
320}
321
322#[macro_export]
356macro_rules! split_lines {
357 ($s: expr) => {{
358 $crate::__ctfe::map_lines($crate::split_inclusive!($s, "\n"))
359 }};
360}
361
362#[macro_export]
391macro_rules! split_ascii_whitespace {
392 ($s: expr) => {{
393 const INPUT: &str = $s;
394 const OUTPUT_LEN: usize = $crate::__ctfe::SplitAsciiWhitespace(INPUT).output_len();
395 const OUTPUT_BUF: [&str; OUTPUT_LEN] =
396 $crate::__ctfe::SplitAsciiWhitespace(INPUT).const_eval();
397 OUTPUT_BUF
398 }};
399}
400
401#[cfg(test)]
402mod tests {
403 use super::*;
404
405 #[test]
406 fn test_split() {
407 macro_rules! testcase {
408 ($input: expr, $pat: expr) => {{
409 const OUTPUT: &[&str] = &$crate::split!($input, $pat);
410
411 let ans = $input.split($pat).collect::<Vec<_>>();
412 assert_eq!(OUTPUT.len(), ans.len());
413 assert_eq!(OUTPUT, &*ans, "ans = {:?}", ans);
414 }};
415 }
416
417 testcase!("", "");
418 testcase!("a中1😂1!", "");
419 testcase!("a中1😂1!", "a");
420 testcase!("a中1😂1!", "中");
421 testcase!("a中1😂1!", "1");
422 testcase!("a中1😂1!", "😂");
423 testcase!("a中1😂1!", "!");
424 testcase!("11111", "1");
425 testcase!("222", "22");
426 testcase!("啊哈哈哈", "哈哈");
427 testcase!("some string:another string", ":");
428
429 testcase!("11111", '1');
430 testcase!("a中1😂1!", 'a');
431 testcase!("a中1😂1!", '中');
432 testcase!("a中1😂1!", '1');
433 testcase!("a中1😂1!", '😂');
434 testcase!("a中1😂1!", '!');
435 }
436
437 #[test]
438 fn test_split_ascii_whitespace() {
439 macro_rules! testcase {
440 ($input: expr) => {{
441 const OUTPUT: &[&str] = &$crate::split_ascii_whitespace!($input);
442
443 let ans = $input.split_ascii_whitespace().collect::<Vec<_>>();
444 assert_eq!(
445 OUTPUT.len(),
446 ans.len(),
447 "Length mismatch for input: {:?}",
448 $input
449 );
450 assert_eq!(
451 OUTPUT, &*ans,
452 "Content mismatch for input: {:?}, expected: {:?}",
453 $input, ans
454 );
455 }};
456 }
457
458 testcase!("");
460 testcase!(" ");
461 testcase!(" ");
462 testcase!("hello");
463 testcase!(" hello ");
464 testcase!(" hello ");
465 testcase!("hello world");
466 testcase!(" hello world ");
467 testcase!(" hello world ");
468
469 testcase!("a\tb\nc\rd\x0Cf");
471 testcase!(" \t\n\r\x0C ");
472 testcase!("word1\t\t\tword2\n\n\nword3");
473
474 testcase!("foo bar baz");
476 testcase!("\tfoo\nbar\rbaz\x0C");
477 testcase!(" a b c ");
478 testcase!("\t\n\r\x0C");
479
480 testcase!("single");
482 testcase!("a");
483 testcase!("a b");
484 testcase!(" a b ");
485 }
486}