use unicode_segmentation::UnicodeSegmentation;
use alloc::vec::Vec;
fn grapheme_is_digit(c: &&str) -> bool {
c.chars().all(|c| c.is_ascii_digit())
}
fn grapheme_is_uppercase(c: &&str) -> bool {
c.to_uppercase() != c.to_lowercase() && *c == c.to_uppercase()
}
fn grapheme_is_lowercase(c: &&str) -> bool {
c.to_uppercase() != c.to_lowercase() && *c == c.to_lowercase()
}
#[derive(Debug, Clone, Copy)]
pub enum Boundary {
Custom {
condition: fn(&[&str]) -> bool,
start: usize,
len: usize,
},
Hyphen,
Underscore,
Space,
UpperLower,
LowerUpper,
DigitUpper,
UpperDigit,
DigitLower,
LowerDigit,
Acronym,
}
impl Boundary {
pub fn matches(self, s: &[&str]) -> bool {
use Boundary::*;
match self {
Underscore => s.first() == Some(&"_"),
Hyphen => s.first() == Some(&"-"),
Space => s.first() == Some(&" "),
Acronym => {
s.first().map(grapheme_is_uppercase) == Some(true)
&& s.get(1).map(grapheme_is_uppercase) == Some(true)
&& s.get(2).map(grapheme_is_lowercase) == Some(true)
}
LowerUpper => {
s.first().map(grapheme_is_lowercase) == Some(true)
&& s.get(1).map(grapheme_is_uppercase) == Some(true)
}
UpperLower => {
s.first().map(grapheme_is_uppercase) == Some(true)
&& s.get(1).map(grapheme_is_lowercase) == Some(true)
}
LowerDigit => {
s.first().map(grapheme_is_lowercase) == Some(true)
&& s.get(1).map(grapheme_is_digit) == Some(true)
}
UpperDigit => {
s.first().map(grapheme_is_uppercase) == Some(true)
&& s.get(1).map(grapheme_is_digit) == Some(true)
}
DigitLower => {
s.first().map(grapheme_is_digit) == Some(true)
&& s.get(1).map(grapheme_is_lowercase) == Some(true)
}
DigitUpper => {
s.first().map(grapheme_is_digit) == Some(true)
&& s.get(1).map(grapheme_is_uppercase) == Some(true)
}
Custom { condition, .. } => condition(s),
}
}
pub fn len(self) -> usize {
use Boundary::*;
match self {
Underscore | Hyphen | Space => 1,
LowerUpper | UpperLower | LowerDigit | UpperDigit | DigitLower | DigitUpper
| Acronym => 0,
Custom { len, .. } => len,
}
}
pub fn is_empty(self) -> bool {
self.len() == 0
}
pub fn start(self) -> usize {
use Boundary::*;
match self {
Underscore | Hyphen | Space => 0,
LowerUpper | UpperLower | LowerDigit | UpperDigit | DigitLower | DigitUpper
| Acronym => 1,
Custom { start, .. } => start,
}
}
pub const fn defaults() -> [Boundary; 9] {
[
Boundary::Underscore,
Boundary::Hyphen,
Boundary::Space,
Boundary::LowerUpper,
Boundary::LowerDigit,
Boundary::UpperDigit,
Boundary::DigitLower,
Boundary::DigitUpper,
Boundary::Acronym,
]
}
pub const fn digits() -> [Boundary; 4] {
[
Boundary::LowerDigit,
Boundary::UpperDigit,
Boundary::DigitLower,
Boundary::DigitUpper,
]
}
pub const fn letter_digit() -> [Boundary; 2] {
[Boundary::LowerDigit, Boundary::UpperDigit]
}
pub const fn digit_letter() -> [Boundary; 2] {
[Boundary::DigitLower, Boundary::DigitUpper]
}
pub fn defaults_from(pattern: &str) -> Vec<Boundary> {
let mut boundaries = Vec::new();
for boundary in Boundary::defaults() {
let parts = split(&pattern, &[boundary]);
if parts.len() > 1 || parts.is_empty() || parts[0] != pattern {
boundaries.push(boundary);
}
}
boundaries
}
}
impl PartialEq for Boundary {
fn eq(&self, other: &Self) -> bool {
match (self, other) {
(Self::Hyphen, Self::Hyphen) => true,
(Self::Underscore, Self::Underscore) => true,
(Self::Space, Self::Space) => true,
(Self::UpperLower, Self::UpperLower) => true,
(Self::LowerUpper, Self::LowerUpper) => true,
(Self::DigitUpper, Self::DigitUpper) => true,
(Self::UpperDigit, Self::UpperDigit) => true,
(Self::DigitLower, Self::DigitLower) => true,
(Self::LowerDigit, Self::LowerDigit) => true,
(Self::Acronym, Self::Acronym) => true,
(Self::Custom { .. }, Self::Custom { .. }) => false,
_ => false,
}
}
}
impl Eq for Boundary {}
impl core::hash::Hash for Boundary {
fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
core::mem::discriminant(self).hash(state);
}
}
pub fn split<'s, T>(s: &'s T, boundaries: &[Boundary]) -> Vec<&'s str>
where
T: AsRef<str>,
{
let s = s.as_ref();
if s.is_empty() {
return Vec::new();
}
let mut words = Vec::new();
let mut last_boundary_end = 0;
let (indices, graphemes): (Vec<_>, Vec<_>) = s.grapheme_indices(true).unzip();
let grapheme_length = indices[graphemes.len() - 1] + graphemes[graphemes.len() - 1].len();
for i in 0..graphemes.len() {
for boundary in boundaries {
if boundary.matches(&graphemes[i..]) {
let boundary_byte_start: usize = *indices
.get(i + boundary.start())
.unwrap_or(&grapheme_length);
let boundary_byte_end: usize = *indices
.get(i + boundary.start() + boundary.len())
.unwrap_or(&grapheme_length);
words.push(&s[last_boundary_end..boundary_byte_start]);
last_boundary_end = boundary_byte_end;
break;
}
}
}
words.push(&s[last_boundary_end..]);
words.into_iter().collect()
}
#[macro_export]
macro_rules! separator {
($delim:expr) => {
convert_case::Boundary::Custom {
condition: |s| s.join("").starts_with($delim),
start: 0,
len: $delim.len(),
}
};
}
#[cfg(test)]
mod tests {
use super::*;
use rstest::rstest;
#[test]
fn custom_boundary_inequality() {
let a = Boundary::Custom {
condition: |_| true,
start: 0,
len: 0,
};
let b = a;
assert_ne!(a, b)
}
#[test]
fn default_boundary_equality() {
assert_eq!(Boundary::Hyphen, Boundary::Hyphen);
assert_eq!(Boundary::Space, Boundary::Space);
assert_ne!(Boundary::Hyphen, Boundary::Space);
}
#[rstest]
#[case(Boundary::Hyphen, "a-b-c", vec!["a", "b", "c"])]
#[case(Boundary::Underscore, "a_b_c", vec!["a", "b", "c"])]
#[case(Boundary::Space, "a b c", vec!["a", "b", "c"])]
#[case(Boundary::LowerUpper, "lowerUpperUpper", vec!["lower", "Upper", "Upper"])]
#[case(Boundary::UpperLower, "ABc", vec!["AB", "c"])]
#[case(Boundary::Acronym, "XMLRequest", vec!["XML", "Request"])]
#[case(Boundary::LowerDigit, "abc123", vec!["abc", "123"])]
#[case(Boundary::UpperDigit, "ABC123", vec!["ABC", "123"])]
#[case(Boundary::DigitLower, "123abc", vec!["123", "abc"])]
#[case(Boundary::DigitUpper, "123ABC", vec!["123", "ABC"])]
fn split_on_boundary(
#[case] boundary: Boundary,
#[case] input: &str,
#[case] expected: Vec<&str>,
) {
assert_eq!(split(&input, &[boundary]), expected);
}
#[test]
fn split_on_multiple_delimiters() {
let s = "aaa-bbb_ccc ddd ddd-eee";
let v = split(
&s,
&[Boundary::Space, Boundary::Underscore, Boundary::Hyphen],
);
assert_eq!(v, vec!["aaa", "bbb", "ccc", "ddd", "ddd", "eee"]);
}
#[test]
fn boundaries_found_in_string() {
assert_eq!(Boundary::defaults_from(".Aaaa"), Vec::<Boundary>::new());
assert_eq!(
Boundary::defaults_from("a8.Aa.aA"),
vec![Boundary::LowerUpper, Boundary::LowerDigit]
);
assert_eq!(
Boundary::defaults_from("b1B1b"),
Boundary::digits().to_vec()
);
assert_eq!(
Boundary::defaults_from("AAa -_"),
vec![
Boundary::Underscore,
Boundary::Hyphen,
Boundary::Space,
Boundary::Acronym,
]
);
}
}