use stfu8;
use unicode_segmentation::UnicodeSegmentation;
pub fn chars(subject: &str) -> Vec<&str> {
if subject.is_empty() {
return vec![""];
}
subject.split_terminator("").skip(1).collect::<Vec<_>>()
}
pub fn split<'a>(subject: &'a str, pattern: &str) -> Vec<&'a str> {
if subject.is_empty() {
return vec![""];
}
if pattern.is_empty() {
return vec![subject];
}
subject.split_terminator(pattern).collect::<Vec<_>>()
}
pub fn words(subject: &str) -> Vec<&str> {
fn split_camel_case(string: &str) -> Vec<&str> {
#[derive(Clone, Copy, PartialEq)]
enum WordMode {
Boundary,
Lowercase,
Uppercase,
}
let mut words = Vec::new();
let mut word_start = 0;
let mut char_indices = string.char_indices().peekable();
let mut mode = WordMode::Boundary;
while let Some((c_idx, c)) = char_indices.next() {
if let Some(&(next_idx, next)) = char_indices.peek() {
let next_mode = if c.is_lowercase() {
WordMode::Lowercase
} else if c.is_uppercase() {
WordMode::Uppercase
} else {
mode
};
if next_mode == WordMode::Lowercase && next.is_uppercase() {
words.push(&string[word_start..next_idx]);
word_start = next_idx;
mode = WordMode::Boundary;
} else if mode == WordMode::Uppercase && c.is_uppercase() && next.is_lowercase() {
words.push(&string[word_start..c_idx]);
word_start = c_idx;
mode = WordMode::Boundary;
} else {
mode = next_mode;
}
}
}
words.push(&string[word_start..]);
words
}
let splitting_punctuation = ['-', '_'];
let split_by_whitespace_and_punctuation = subject
.unicode_words()
.flat_map(|w| w.split_terminator(|c| splitting_punctuation.contains(&c)))
.filter(|w| !w.is_empty());
let res = split_by_whitespace_and_punctuation.flat_map(split_camel_case);
res.collect()
}
pub fn graphemes(subject: &str) -> Vec<&str> {
if subject.is_empty() {
return vec![""];
}
UnicodeSegmentation::graphemes(subject, true).collect::<Vec<&str>>()
}
pub fn code_points(subject: &str) -> Vec<u16> {
if subject.is_empty() {
return vec![];
}
stfu8::decode_u16(subject).unwrap()
}