use crate::slice::advance;
use crate::slice::subslice;
use crate::utf8::CharEncodeUtf8;
use core::str;
struct SplitImpl<'input, 'pat> {
input: &'input str,
pattern: &'pat str,
inclusive: bool,
}
impl<'input> SplitImpl<'input, '_> {
const fn output_len(&self) -> usize {
let mut input = self.input;
let pat = self.pattern;
if pat.is_empty() {
crate::utf8::str_count_chars(input) + 2
} else {
let mut ans = 0;
while let Some((_, remain)) = crate::str::next_match(input, pat) {
ans += 1;
input = remain
}
if self.inclusive {
if !input.is_empty() {
ans += 1;
}
} else {
ans += 1;
}
ans
}
}
#[allow(unsafe_code)]
const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
let mut input = self.input;
let pat = self.pattern;
let mut buf: [&str; N] = [""; N];
let mut pos = 0;
if pat.is_empty() {
let mut input = input.as_bytes();
{
buf[pos] = unsafe { str::from_utf8_unchecked(subslice(input, 0..0)) };
pos += 1;
}
while let Some((_, count)) = crate::utf8::next_char(input) {
buf[pos] = unsafe { str::from_utf8_unchecked(subslice(input, 0..count)) };
pos += 1;
input = advance(input, count);
}
{
buf[pos] = unsafe { str::from_utf8_unchecked(subslice(input, 0..0)) };
pos += 1;
}
} else {
while let Some((m, remain)) = crate::str::next_match(input, pat) {
let substr = if self.inclusive {
subslice(input.as_bytes(), 0..m + pat.len())
} else {
subslice(input.as_bytes(), 0..m)
};
buf[pos] = unsafe { str::from_utf8_unchecked(substr) };
pos += 1;
input = remain;
}
if self.inclusive {
if !input.is_empty() {
buf[pos] = input;
pos += 1;
}
} else {
buf[pos] = input;
pos += 1;
}
}
assert!(pos == N);
buf
}
}
pub struct Split<T, P>(pub T, pub P);
impl<'input, 'pat> Split<&'input str, &'pat str> {
const fn to_impl(&self) -> SplitImpl<'input, 'pat> {
SplitImpl {
input: self.0,
pattern: self.1,
inclusive: false,
}
}
pub const fn output_len(&self) -> usize {
self.to_impl().output_len()
}
pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
self.to_impl().const_eval()
}
}
impl<'input> Split<&'input str, char> {
const fn to_impl<'pat>(&self, ch: &'pat CharEncodeUtf8) -> SplitImpl<'input, 'pat> {
SplitImpl {
input: self.0,
pattern: ch.as_str(),
inclusive: false,
}
}
pub const fn output_len(&self) -> usize {
let ch = CharEncodeUtf8::new(self.1);
self.to_impl(&ch).output_len()
}
pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
let ch = CharEncodeUtf8::new(self.1);
self.to_impl(&ch).const_eval()
}
}
impl<'input> Split<&'input str, &[char]> {
const fn char_in_slice(&self, ch: char) -> bool {
let chars = self.1;
let mut i = 0;
while i < chars.len() {
if chars[i] == ch {
return true;
}
i += 1;
}
false
}
pub const fn output_len(&self) -> usize {
let mut input = self.0.as_bytes();
let mut ans = 0;
if self.1.is_empty() {
return 1; }
while let Some((ch, count)) = crate::utf8::next_char(input) {
if self.char_in_slice(ch) {
ans += 1;
}
input = advance(input, count);
}
ans + 1 }
#[allow(unsafe_code)]
pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
let mut input = self.0.as_bytes();
let input_str = self.0;
let mut buf: [&str; N] = [""; N];
let mut pos = 0;
let mut start_byte_pos = 0;
let mut current_byte_pos = 0;
if self.1.is_empty() {
buf[0] = input_str;
assert!(1 == N);
return buf;
}
while let Some((ch, count)) = crate::utf8::next_char(input) {
if self.char_in_slice(ch) {
let substr_bytes = subslice(input_str.as_bytes(), start_byte_pos..current_byte_pos);
buf[pos] = unsafe { core::str::from_utf8_unchecked(substr_bytes) };
pos += 1;
start_byte_pos = current_byte_pos + count;
}
current_byte_pos += count;
input = advance(input, count);
}
let substr_bytes = subslice(input_str.as_bytes(), start_byte_pos..input_str.len());
buf[pos] = unsafe { core::str::from_utf8_unchecked(substr_bytes) };
pos += 1;
assert!(pos == N);
buf
}
}
#[macro_export]
macro_rules! split {
($s: expr, $pat: expr) => {{
const INPUT: &str = $s;
const OUTPUT_LEN: usize = $crate::__ctfe::Split(INPUT, $pat).output_len();
const OUTPUT_BUF: [&str; OUTPUT_LEN] = $crate::__ctfe::Split(INPUT, $pat).const_eval();
OUTPUT_BUF
}};
}
pub struct SplitInclusive<T, P>(pub T, pub P);
impl<'input, 'pat> SplitInclusive<&'input str, &'pat str> {
const fn to_impl(&self) -> SplitImpl<'input, 'pat> {
SplitImpl {
input: self.0,
pattern: self.1,
inclusive: true,
}
}
pub const fn output_len(&self) -> usize {
self.to_impl().output_len()
}
pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
self.to_impl().const_eval()
}
}
impl<'input> SplitInclusive<&'input str, char> {
const fn to_impl<'pat>(&self, ch: &'pat CharEncodeUtf8) -> SplitImpl<'input, 'pat> {
SplitImpl {
input: self.0,
pattern: ch.as_str(),
inclusive: true,
}
}
pub const fn output_len(&self) -> usize {
let ch = CharEncodeUtf8::new(self.1);
self.to_impl(&ch).output_len()
}
pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
let ch = CharEncodeUtf8::new(self.1);
self.to_impl(&ch).const_eval()
}
}
impl<'input> SplitInclusive<&'input str, &[char]> {
const fn char_in_slice(&self, ch: char) -> bool {
let chars = self.1;
let mut i = 0;
while i < chars.len() {
if chars[i] == ch {
return true;
}
i += 1;
}
false
}
pub const fn output_len(&self) -> usize {
if self.0.is_empty() {
return 0; }
let mut input = self.0.as_bytes();
let mut ans = 0;
if self.1.is_empty() {
return 1; }
let mut found_any_split = false;
while let Some((ch, count)) = crate::utf8::next_char(input) {
if self.char_in_slice(ch) {
ans += 1;
found_any_split = true;
}
input = advance(input, count);
}
if !found_any_split {
return 1; }
let mut input_check = self.0.as_bytes();
let mut last_was_split = false;
while let Some((ch, count)) = crate::utf8::next_char(input_check) {
let remaining = advance(input_check, count);
if remaining.is_empty() {
last_was_split = self.char_in_slice(ch);
break;
}
input_check = remaining;
}
if !last_was_split {
ans += 1;
}
ans
}
#[allow(unsafe_code)]
pub const fn const_eval<const N: usize>(&self) -> [&'input str; N] {
if self.0.is_empty() {
let buf: [&str; N] = [""; N];
assert!(N == 0);
return buf;
}
let mut input = self.0.as_bytes();
let input_str = self.0;
let mut buf: [&str; N] = [""; N];
let mut pos = 0;
let mut start_byte_pos = 0;
let mut current_byte_pos = 0;
if self.1.is_empty() {
buf[0] = input_str;
assert!(1 == N);
return buf;
}
while let Some((ch, count)) = crate::utf8::next_char(input) {
current_byte_pos += count;
if self.char_in_slice(ch) {
let substr_bytes = subslice(input_str.as_bytes(), start_byte_pos..current_byte_pos);
buf[pos] = unsafe { core::str::from_utf8_unchecked(substr_bytes) };
pos += 1;
start_byte_pos = current_byte_pos;
}
input = advance(input, count);
}
if start_byte_pos < input_str.len() {
let substr_bytes = subslice(input_str.as_bytes(), start_byte_pos..input_str.len());
buf[pos] = unsafe { core::str::from_utf8_unchecked(substr_bytes) };
pos += 1;
}
assert!(pos == N);
buf
}
}
#[macro_export]
macro_rules! split_inclusive {
($s: expr, $pat: expr) => {{
const INPUT: &str = $s;
const OUTPUT_LEN: usize = $crate::__ctfe::SplitInclusive(INPUT, $pat).output_len();
const OUTPUT_BUF: [&str; OUTPUT_LEN] =
$crate::__ctfe::SplitInclusive(INPUT, $pat).const_eval();
OUTPUT_BUF
}};
}
pub struct SplitAsciiWhitespace<T>(pub T);
impl SplitAsciiWhitespace<&'_ str> {
pub const fn output_len(&self) -> usize {
let bytes = self.0.as_bytes();
let mut count = 0;
let mut i = 0;
let mut in_word = false;
while i < bytes.len() {
if bytes[i].is_ascii_whitespace() {
if in_word {
count += 1;
in_word = false;
}
} else {
in_word = true;
}
i += 1;
}
if in_word {
count += 1;
}
count
}
#[allow(unsafe_code)]
pub const fn const_eval<const N: usize>(&self) -> [&'_ str; N] {
let bytes = self.0.as_bytes();
let mut buf: [&str; N] = [""; N];
let mut pos = 0;
let mut i = 0;
while i < bytes.len() {
while i < bytes.len() && bytes[i].is_ascii_whitespace() {
i += 1;
}
if i >= bytes.len() {
break;
}
let start = i;
while i < bytes.len() && !bytes[i].is_ascii_whitespace() {
i += 1;
}
let word_bytes = subslice(bytes, start..i);
buf[pos] = unsafe { core::str::from_utf8_unchecked(word_bytes) };
pos += 1;
}
assert!(pos == N);
buf
}
}
pub const fn map_lines<const N: usize>(mut lines: [&str; N]) -> [&str; N] {
let mut i = 0;
while i < N {
let s = lines[i];
match crate::str::strip_suffix(s, "\r\n") {
Some(s) => lines[i] = s,
None => match crate::str::strip_suffix(s, "\n") {
Some(s) => lines[i] = s,
None => lines[i] = s,
},
}
i += 1;
}
lines
}
#[macro_export]
macro_rules! split_lines {
($s: expr) => {{
$crate::__ctfe::map_lines($crate::split_inclusive!($s, "\n"))
}};
}
#[macro_export]
macro_rules! split_ascii_whitespace {
($s: expr) => {{
const INPUT: &str = $s;
const OUTPUT_LEN: usize = $crate::__ctfe::SplitAsciiWhitespace(INPUT).output_len();
const OUTPUT_BUF: [&str; OUTPUT_LEN] =
$crate::__ctfe::SplitAsciiWhitespace(INPUT).const_eval();
OUTPUT_BUF
}};
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_split() {
macro_rules! testcase {
($input: expr, $pat: expr) => {{
const OUTPUT: &[&str] = &$crate::split!($input, $pat);
let ans = $input.split($pat).collect::<Vec<_>>();
assert_eq!(OUTPUT.len(), ans.len());
assert_eq!(OUTPUT, &*ans, "ans = {:?}", ans);
}};
}
testcase!("", "");
testcase!("aä¸1😂1!", "");
testcase!("aä¸1😂1!", "a");
testcase!("aä¸1😂1!", "ä¸");
testcase!("aä¸1😂1!", "1");
testcase!("aä¸1😂1!", "😂");
testcase!("aä¸1😂1!", "!");
testcase!("11111", "1");
testcase!("222", "22");
testcase!("啊哈哈哈", "哈哈");
testcase!("some string:another string", ":");
testcase!("11111", '1');
testcase!("aä¸1😂1!", 'a');
testcase!("aä¸1😂1!", 'ä¸');
testcase!("aä¸1😂1!", '1');
testcase!("aä¸1😂1!", '😂');
testcase!("aä¸1😂1!", '!');
}
#[test]
fn test_split_char_slice() {
macro_rules! testcase_char_slice {
($input: expr, $chars: expr) => {{
const CHARS: &[char] = $chars;
const OUTPUT: &[&str] = &$crate::split!($input, CHARS);
let ans = $input.split(CHARS).collect::<Vec<_>>();
assert_eq!(
OUTPUT.len(),
ans.len(),
"Length mismatch for input: {:?}, chars: {:?}",
$input,
CHARS
);
assert_eq!(
OUTPUT, &*ans,
"Content mismatch for input: {:?}, chars: {:?}, expected: {:?}",
$input, CHARS, ans
);
}};
}
testcase_char_slice!("a,b,c", &[',']);
testcase_char_slice!("hello", &[]);
testcase_char_slice!("", &[]);
testcase_char_slice!("", &[',']);
testcase_char_slice!("hello,world;test", &[',', ';']);
testcase_char_slice!("hello", &['x', 'y', 'z']);
testcase_char_slice!("a,b,,c,", &[',']);
testcase_char_slice!(";;;", &[';']);
testcase_char_slice!("aä¸1😂1!", &['ä¸', '😂']);
testcase_char_slice!("hello世界test", &['世', '界']);
testcase_char_slice!("one:two;three,four", &[':', ';', ',']);
}
#[test]
fn test_split_inclusive_char_slice() {
macro_rules! testcase_inclusive_char_slice {
($input: expr, $chars: expr) => {{
const CHARS: &[char] = $chars;
const OUTPUT: &[&str] = &$crate::split_inclusive!($input, CHARS);
let ans = $input.split_inclusive(CHARS).collect::<Vec<_>>();
assert_eq!(
OUTPUT.len(),
ans.len(),
"Length mismatch for input: {:?}, chars: {:?}",
$input,
CHARS
);
assert_eq!(
OUTPUT, &*ans,
"Content mismatch for input: {:?}, chars: {:?}, expected: {:?}",
$input, CHARS, ans
);
}};
}
testcase_inclusive_char_slice!("a,b,c", &[',']);
testcase_inclusive_char_slice!("hello", &[]);
testcase_inclusive_char_slice!("", &[]);
testcase_inclusive_char_slice!("hello,world;test", &[',', ';']);
testcase_inclusive_char_slice!("hello", &['x', 'y', 'z']);
testcase_inclusive_char_slice!("a,b,,c,", &[',']);
testcase_inclusive_char_slice!(";;;", &[';']);
testcase_inclusive_char_slice!("aä¸1😂1!", &['ä¸', '😂']);
testcase_inclusive_char_slice!("hello世界test", &['世', '界']);
testcase_inclusive_char_slice!("one:two;three,four", &[':', ';', ',']);
}
#[test]
fn test_split_ascii_whitespace() {
macro_rules! testcase {
($input: expr) => {{
const OUTPUT: &[&str] = &$crate::split_ascii_whitespace!($input);
let ans = $input.split_ascii_whitespace().collect::<Vec<_>>();
assert_eq!(
OUTPUT.len(),
ans.len(),
"Length mismatch for input: {:?}",
$input
);
assert_eq!(
OUTPUT, &*ans,
"Content mismatch for input: {:?}, expected: {:?}",
$input, ans
);
}};
}
testcase!("");
testcase!(" ");
testcase!(" ");
testcase!("hello");
testcase!(" hello ");
testcase!(" hello ");
testcase!("hello world");
testcase!(" hello world ");
testcase!(" hello world ");
testcase!("a\tb\nc\rd\x0Cf");
testcase!(" \t\n\r\x0C ");
testcase!("word1\t\t\tword2\n\n\nword3");
testcase!("foo bar baz");
testcase!("\tfoo\nbar\rbaz\x0C");
testcase!(" a b c ");
testcase!("\t\n\r\x0C");
testcase!("single");
testcase!("a");
testcase!("a b");
testcase!(" a b ");
}
#[test]
fn test_split_runtime() {
use super::*;
let split1 = Split("a,b,c", ",");
assert_eq!(split1.output_len(), 3);
let result1: [&str; 3] = split1.const_eval();
assert_eq!(result1, ["a", "b", "c"]);
let split2 = Split("hello", ",");
assert_eq!(split2.output_len(), 1);
let result2: [&str; 1] = split2.const_eval();
assert_eq!(result2, ["hello"]);
let split_empty_pat = Split("ab", "");
assert_eq!(split_empty_pat.output_len(), 4); let result_empty: [&str; 4] = split_empty_pat.const_eval();
assert_eq!(result_empty, ["", "a", "b", ""]);
let split_char = Split("a,b,c", ',');
assert_eq!(split_char.output_len(), 3);
let result_char: [&str; 3] = split_char.const_eval();
assert_eq!(result_char, ["a", "b", "c"]);
let split_char2 = Split("hello", 'x');
assert_eq!(split_char2.output_len(), 1);
let result_char2: [&str; 1] = split_char2.const_eval();
assert_eq!(result_char2, ["hello"]);
const CHARS: &[char] = &[',', ';'];
let split_chars = Split("a,b;c", CHARS);
assert_eq!(split_chars.output_len(), 3);
let result_chars: [&str; 3] = split_chars.const_eval();
assert_eq!(result_chars, ["a", "b", "c"]);
const CHARS2: &[char] = &['x', 'y'];
let split_chars2 = Split("hello", CHARS2);
assert_eq!(split_chars2.output_len(), 1);
let result_chars2: [&str; 1] = split_chars2.const_eval();
assert_eq!(result_chars2, ["hello"]);
let split_inc = SplitInclusive("a,b,c", ",");
assert_eq!(split_inc.output_len(), 3);
let result_inc: [&str; 3] = split_inc.const_eval();
assert_eq!(result_inc, ["a,", "b,", "c"]);
let split_inc_empty = SplitInclusive("xy", "");
assert_eq!(split_inc_empty.output_len(), 4);
let result_inc_empty: [&str; 4] = split_inc_empty.const_eval();
assert_eq!(result_inc_empty, ["", "x", "y", ""]);
let split_inc_char = SplitInclusive("a,b,c", ',');
assert_eq!(split_inc_char.output_len(), 3);
let result_inc_char: [&str; 3] = split_inc_char.const_eval();
assert_eq!(result_inc_char, ["a,", "b,", "c"]);
let split_inc_chars = SplitInclusive("a,b;c", CHARS);
assert_eq!(split_inc_chars.output_len(), 3);
let result_inc_chars: [&str; 3] = split_inc_chars.const_eval();
assert_eq!(result_inc_chars, ["a,", "b;", "c"]);
let split_ws = SplitAsciiWhitespace(" hello world ");
assert_eq!(split_ws.output_len(), 2);
let result_ws: [&str; 2] = split_ws.const_eval();
assert_eq!(result_ws, ["hello", "world"]);
}
#[test]
fn test_split_edge_cases() {
const EMPTY_CHARS: &[char] = &[];
let split_empty_chars = Split("hello", EMPTY_CHARS);
assert_eq!(split_empty_chars.output_len(), 1);
let result_empty_chars: [&str; 1] = split_empty_chars.const_eval();
assert_eq!(result_empty_chars, ["hello"]);
const SPLIT_CHARS: &[char] = &[','];
let split_inc_empty_input = SplitInclusive("", SPLIT_CHARS);
assert_eq!(split_inc_empty_input.output_len(), 0);
let result_empty_input: [&str; 0] = split_inc_empty_input.const_eval();
assert_eq!(result_empty_input, [] as [&str; 0]);
let split_inc_empty_chars = SplitInclusive("hello", EMPTY_CHARS);
assert_eq!(split_inc_empty_chars.output_len(), 1);
let result_inc_empty: [&str; 1] = split_inc_empty_chars.const_eval();
assert_eq!(result_inc_empty, ["hello"]);
const NO_MATCH_CHARS: &[char] = &['x', 'y', 'z'];
let split_inc_no_match = SplitInclusive("hello", NO_MATCH_CHARS);
assert_eq!(split_inc_no_match.output_len(), 1);
let result_no_match: [&str; 1] = split_inc_no_match.const_eval();
assert_eq!(result_no_match, ["hello"]);
}
}