use unicode_segmentation::UnicodeSegmentation;
use alloc::vec::Vec;
fn grapheme_is_digit(c: &&str) -> bool {
c.chars().all(|c| c.is_ascii_digit())
}
fn grapheme_is_uppercase(c: &&str) -> bool {
c.to_uppercase() != c.to_lowercase() && *c == c.to_uppercase()
}
fn grapheme_is_lowercase(c: &&str) -> bool {
c.to_uppercase() != c.to_lowercase() && *c == c.to_lowercase()
}
#[derive(Debug, PartialEq, Eq, Hash, Clone, Copy)]
pub enum Boundary {
Custom {
condition: fn(&[&str]) -> bool,
start: usize,
len: usize,
},
Hyphen,
Underscore,
Space,
UpperLower,
LowerUpper,
DigitUpper,
UpperDigit,
DigitLower,
LowerDigit,
Acronym,
}
impl Boundary {
pub fn matches(self, s: &[&str]) -> bool {
use Boundary::*;
match self {
Underscore => s.first() == Some(&"_"),
Hyphen => s.first() == Some(&"-"),
Space => s.first() == Some(&" "),
Acronym => {
s.first().map(grapheme_is_uppercase) == Some(true)
&& s.get(1).map(grapheme_is_uppercase) == Some(true)
&& s.get(2).map(grapheme_is_lowercase) == Some(true)
}
LowerUpper => {
s.first().map(grapheme_is_lowercase) == Some(true)
&& s.get(1).map(grapheme_is_uppercase) == Some(true)
}
UpperLower => {
s.first().map(grapheme_is_uppercase) == Some(true)
&& s.get(1).map(grapheme_is_lowercase) == Some(true)
}
LowerDigit => {
s.first().map(grapheme_is_lowercase) == Some(true)
&& s.get(1).map(grapheme_is_digit) == Some(true)
}
UpperDigit => {
s.first().map(grapheme_is_uppercase) == Some(true)
&& s.get(1).map(grapheme_is_digit) == Some(true)
}
DigitLower => {
s.first().map(grapheme_is_digit) == Some(true)
&& s.get(1).map(grapheme_is_lowercase) == Some(true)
}
DigitUpper => {
s.first().map(grapheme_is_digit) == Some(true)
&& s.get(1).map(grapheme_is_uppercase) == Some(true)
}
Custom { condition, .. } => condition(s),
}
}
pub fn len(self) -> usize {
use Boundary::*;
match self {
Underscore | Hyphen | Space => 1,
LowerUpper | UpperLower | LowerDigit | UpperDigit | DigitLower | DigitUpper
| Acronym => 0,
Custom { len, .. } => len,
}
}
pub fn start(self) -> usize {
use Boundary::*;
match self {
Underscore | Hyphen | Space => 0,
LowerUpper | UpperLower | LowerDigit | UpperDigit | DigitLower | DigitUpper
| Acronym => 1,
Custom { start, .. } => start,
}
}
pub const fn defaults() -> [Boundary; 9] {
[
Boundary::Underscore,
Boundary::Hyphen,
Boundary::Space,
Boundary::LowerUpper,
Boundary::LowerDigit,
Boundary::UpperDigit,
Boundary::DigitLower,
Boundary::DigitUpper,
Boundary::Acronym,
]
}
pub const fn digits() -> [Boundary; 4] {
[
Boundary::LowerDigit,
Boundary::UpperDigit,
Boundary::DigitLower,
Boundary::DigitUpper,
]
}
pub const fn letter_digit() -> [Boundary; 2] {
[Boundary::LowerDigit, Boundary::UpperDigit]
}
pub const fn digit_letter() -> [Boundary; 2] {
[Boundary::DigitLower, Boundary::DigitUpper]
}
pub fn defaults_from(pattern: &str) -> Vec<Boundary> {
let mut boundaries = Vec::new();
for boundary in Boundary::defaults() {
let parts = split(&pattern, &[boundary]);
if parts.len() > 1 || parts.is_empty() || parts[0] != pattern {
boundaries.push(boundary);
}
}
boundaries
}
}
pub fn split<'s, T>(s: &'s T, boundaries: &[Boundary]) -> Vec<&'s str>
where
T: AsRef<str>,
{
let s = s.as_ref();
if s.is_empty() {
return Vec::new();
}
let mut words = Vec::new();
let mut last_boundary_end = 0;
let (indices, graphemes): (Vec<_>, Vec<_>) = s.grapheme_indices(true).unzip();
let grapheme_length = indices[graphemes.len() - 1] + graphemes[graphemes.len() - 1].len();
for i in 0..graphemes.len() {
for boundary in boundaries {
if boundary.matches(&graphemes[i..]) {
let boundary_byte_start: usize = *indices
.get(i + boundary.start())
.unwrap_or(&grapheme_length);
let boundary_byte_end: usize = *indices
.get(i + boundary.start() + boundary.len())
.unwrap_or(&grapheme_length);
words.push(&s[last_boundary_end..boundary_byte_start]);
last_boundary_end = boundary_byte_end;
break;
}
}
}
words.push(&s[last_boundary_end..]);
words.into_iter().collect()
}
#[macro_export]
macro_rules! delim_boundary {
($delim:expr) => {
convert_case::Boundary::Custom {
condition: |s| s.join("").starts_with($delim),
start: 0,
len: $delim.len(),
}
};
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn boundary_equality() {
let a = Boundary::Custom {
condition: |_| true,
start: 0,
len: 0,
};
let b = a;
assert_eq!(a, b)
}
#[test]
fn hyphen() {
let s = "a-b-c";
let v = split(&s, &[Boundary::Hyphen]);
assert_eq!(v, vec!["a", "b", "c"]);
}
#[test]
fn underscore() {
let s = "a_b_c";
let v = split(&s, &[Boundary::Underscore]);
assert_eq!(v, vec!["a", "b", "c"]);
}
#[test]
fn space() {
let s = "a b c";
let v = split(&s, &[Boundary::Space]);
assert_eq!(v, vec!["a", "b", "c"]);
}
#[test]
fn delimiters() {
let s = "aaa-bbb_ccc ddd ddd-eee";
let v = split(
&s,
&[Boundary::Space, Boundary::Underscore, Boundary::Hyphen],
);
assert_eq!(v, vec!["aaa", "bbb", "ccc", "ddd", "ddd", "eee"]);
}
#[test]
fn lower_upper() {
let s = "lowerUpperUpper";
let v = split(&s, &[Boundary::LowerUpper]);
assert_eq!(v, vec!["lower", "Upper", "Upper"]);
}
#[test]
fn acronym() {
let s = "XMLRequest";
let v = split(&s, &[Boundary::Acronym]);
assert_eq!(v, vec!["XML", "Request"]);
}
#[test]
fn boundaries_found_in_string() {
assert_eq!(Vec::<Boundary>::new(), Boundary::defaults_from(".Aaaa"));
assert_eq!(
vec![Boundary::LowerUpper, Boundary::LowerDigit],
Boundary::defaults_from("a8.Aa.aA")
);
assert_eq!(
Boundary::digits().to_vec(),
Boundary::defaults_from("b1B1b")
);
assert_eq!(
vec![
Boundary::Underscore,
Boundary::Hyphen,
Boundary::Space,
Boundary::Acronym,
],
Boundary::defaults_from("AAa -_")
);
}
#[test]
fn boundary_consts_same() {
assert_eq!(Boundary::Space, Boundary::Space);
}
}