convert_case/boundary.rs
1use unicode_segmentation::UnicodeSegmentation;
2
3use alloc::vec::Vec;
4
5fn grapheme_is_digit(c: &&str) -> bool {
6 c.chars().all(|c| c.is_ascii_digit())
7}
8
9fn grapheme_is_uppercase(c: &&str) -> bool {
10 c.to_uppercase() != c.to_lowercase() && *c == c.to_uppercase()
11}
12
13fn grapheme_is_lowercase(c: &&str) -> bool {
14 c.to_uppercase() != c.to_lowercase() && *c == c.to_lowercase()
15}
16
17/// Conditions for splitting an identifier into words.
18///
19/// Some boundaries, [`Hyphen`](Boundary::Hyphen), [`Underscore`](Boundary::Underscore), and [`Space`](Boundary::Space),
20/// consume the character they split on, whereas the other boundaries do not.
21///
22/// `Boundary` includes methods that return useful groups of boundaries. It also
23/// contains the [`defaults_from`](Boundary::defaults_from) method which will generate a subset
24/// of default boundaries based on the boundaries present in a string.
25///
26/// You can also create custom delimiter boundaries using the [`separator`](crate::separator)
27/// macro or directly instantiate `Boundary` for complex boundary conditions.
28/// ```
29/// use convert_case::{Boundary, Case, Casing, Converter};
30///
31/// assert_eq!(
32/// "TransformationsIn3D"
33/// .from_case(Case::Camel)
34/// .remove_boundaries(&Boundary::digit_letter())
35/// .to_case(Case::Snake),
36/// "transformations_in_3d",
37/// );
38///
39/// let conv = Converter::new()
40/// .set_boundaries(&Boundary::defaults_from("aA "))
41/// .to_case(Case::Title);
42/// assert_eq!(conv.convert("myVariable Name"), "My Variable Name");
43/// ```
44///
45/// ## Example
46///
47/// For more complex boundaries, such as splitting based on the first character being a certain
48/// symbol and the second is lowercase, you can instantiate a boundary directly.
49///
50/// ```
51/// # use convert_case::{Boundary, Case, Casing};
52/// let at_then_letter = Boundary::Custom {
53/// condition: |s| {
54/// s.get(0).map(|c| *c == "@") == Some(true)
55/// && s.get(1).map(|c| *c == c.to_lowercase()) == Some(true)
56/// },
57/// start: 1,
58/// len: 0,
59/// };
60/// assert_eq!(
61/// "name@domain"
62/// .set_boundaries(&[at_then_letter])
63/// .to_case(Case::Title),
64/// "Name@ Domain",
65/// )
66/// ```
67
68#[derive(Debug, Clone, Copy)]
69pub enum Boundary {
70 Custom {
71 /// A function that determines if this boundary is present at the start
72 /// of the string. Second argument is the `arg` field.
73 condition: fn(&[&str]) -> bool,
74 /// Where the beginning of the boundary is.
75 start: usize,
76 /// The length of the boundary. This is the number of graphemes that
77 /// are removed when splitting.
78 len: usize,
79 },
80
81 /// Splits on `-`, consuming the character on segmentation.
82 /// ```
83 /// # use convert_case::Boundary;
84 /// assert_eq!(
85 /// Boundary::defaults_from("-"),
86 /// vec![Boundary::Hyphen],
87 /// );
88 /// ```
89 Hyphen,
90
91 /// Splits on `_`, consuming the character on segmentation.
92 /// ```
93 /// # use convert_case::Boundary;
94 /// assert_eq!(
95 /// Boundary::defaults_from("_"),
96 /// vec![Boundary::Underscore],
97 /// );
98 /// ```
99 Underscore,
100
101 /// Splits on space, consuming the character on segmentation.
102 /// ```
103 /// # use convert_case::Boundary;
104 /// assert_eq!(
105 /// Boundary::defaults_from(" "),
106 /// vec![Boundary::Space],
107 /// );
108 /// ```
109 Space,
110
111 /// Splits where an uppercase letter is followed by a lowercase letter. This is seldom used,
112 /// and is **not** included in the [defaults](Boundary::defaults).
113 /// ```
114 /// # use convert_case::Boundary;
115 /// assert!(Boundary::defaults_from("Aa").is_empty());
116 UpperLower,
117
118 /// Splits where a lowercase letter is followed by an uppercase letter.
119 /// ```
120 /// # use convert_case::Boundary;
121 /// assert_eq!(
122 /// Boundary::defaults_from("aA"),
123 /// vec![Boundary::LowerUpper],
124 /// );
125 /// ```
126 LowerUpper,
127
128 /// Splits where digit is followed by an uppercase letter.
129 /// ```
130 /// # use convert_case::Boundary;
131 /// assert_eq!(
132 /// Boundary::defaults_from("1A"),
133 /// vec![Boundary::DigitUpper],
134 /// );
135 /// ```
136 DigitUpper,
137
138 /// Splits where an uppercase letter is followed by a digit.
139 /// ```
140 /// # use convert_case::Boundary;
141 /// assert_eq!(
142 /// Boundary::defaults_from("A1"),
143 /// vec![Boundary::UpperDigit],
144 /// );
145 /// ```
146 UpperDigit,
147
148 /// Splits where digit is followed by a lowercase letter.
149 /// ```
150 /// # use convert_case::Boundary;
151 /// assert_eq!(
152 /// Boundary::defaults_from("1a"),
153 /// vec![Boundary::DigitLower],
154 /// );
155 /// ```
156 DigitLower,
157
158 /// Splits where a lowercase letter is followed by a digit.
159 /// ```
160 /// # use convert_case::Boundary;
161 /// assert_eq!(
162 /// Boundary::defaults_from("a1"),
163 /// vec![Boundary::LowerDigit],
164 /// );
165 /// ```
166 LowerDigit,
167
168 /// Acronyms are identified by two uppercase letters followed by a lowercase letter.
169 /// The word boundary is between the two uppercase letters. For example, "HTTPRequest"
170 /// would have an acronym boundary identified at "PRe" and split into "HTTP" and "Request".
171 /// ```
172 /// # use convert_case::Boundary;
173 /// assert_eq!(
174 /// Boundary::defaults_from("AAa"),
175 /// vec![Boundary::Acronym],
176 /// );
177 /// ```
178 Acronym,
179}
180
181impl Boundary {
182 pub fn matches(self, s: &[&str]) -> bool {
183 use Boundary::*;
184 match self {
185 Underscore => s.first() == Some(&"_"),
186 Hyphen => s.first() == Some(&"-"),
187 Space => s.first() == Some(&" "),
188 Acronym => {
189 s.first().map(grapheme_is_uppercase) == Some(true)
190 && s.get(1).map(grapheme_is_uppercase) == Some(true)
191 && s.get(2).map(grapheme_is_lowercase) == Some(true)
192 }
193 LowerUpper => {
194 s.first().map(grapheme_is_lowercase) == Some(true)
195 && s.get(1).map(grapheme_is_uppercase) == Some(true)
196 }
197 UpperLower => {
198 s.first().map(grapheme_is_uppercase) == Some(true)
199 && s.get(1).map(grapheme_is_lowercase) == Some(true)
200 }
201 LowerDigit => {
202 s.first().map(grapheme_is_lowercase) == Some(true)
203 && s.get(1).map(grapheme_is_digit) == Some(true)
204 }
205 UpperDigit => {
206 s.first().map(grapheme_is_uppercase) == Some(true)
207 && s.get(1).map(grapheme_is_digit) == Some(true)
208 }
209 DigitLower => {
210 s.first().map(grapheme_is_digit) == Some(true)
211 && s.get(1).map(grapheme_is_lowercase) == Some(true)
212 }
213 DigitUpper => {
214 s.first().map(grapheme_is_digit) == Some(true)
215 && s.get(1).map(grapheme_is_uppercase) == Some(true)
216 }
217 Custom { condition, .. } => condition(s),
218 }
219 }
220
221 /// The number of graphemes consumed when splitting at the boundary.
222 pub fn len(self) -> usize {
223 use Boundary::*;
224 match self {
225 Underscore | Hyphen | Space => 1,
226 LowerUpper | UpperLower | LowerDigit | UpperDigit | DigitLower | DigitUpper
227 | Acronym => 0,
228 Custom { len, .. } => len,
229 }
230 }
231
232 /// Returns true if this boundary consumes no graphemes when splitting.
233 pub fn is_empty(self) -> bool {
234 self.len() == 0
235 }
236
237 /// The index of the character to split at.
238 pub fn start(self) -> usize {
239 use Boundary::*;
240 match self {
241 Underscore | Hyphen | Space => 0,
242 LowerUpper | UpperLower | LowerDigit | UpperDigit | DigitLower | DigitUpper
243 | Acronym => 1,
244 Custom { start, .. } => start,
245 }
246 }
247
248 /// The default list of boundaries used when `Casing::to_case` is called directly
249 /// and in a `Converter` generated from `Converter::new()`.
250 /// ```
251 /// # use convert_case::Boundary;
252 /// assert_eq!(
253 /// Boundary::defaults(),
254 /// [
255 /// Boundary::Underscore,
256 /// Boundary::Hyphen,
257 /// Boundary::Space,
258 /// Boundary::LowerUpper,
259 /// Boundary::LowerDigit,
260 /// Boundary::UpperDigit,
261 /// Boundary::DigitLower,
262 /// Boundary::DigitUpper,
263 /// Boundary::Acronym,
264 /// ],
265 /// );
266 /// ```
267 pub const fn defaults() -> [Boundary; 9] {
268 [
269 Boundary::Underscore,
270 Boundary::Hyphen,
271 Boundary::Space,
272 Boundary::LowerUpper,
273 Boundary::LowerDigit,
274 Boundary::UpperDigit,
275 Boundary::DigitLower,
276 Boundary::DigitUpper,
277 Boundary::Acronym,
278 ]
279 }
280
281 /// Returns the boundaries that involve digits.
282 /// ```
283 /// # use convert_case::Boundary;
284 /// assert_eq!(
285 /// Boundary::digits(),
286 /// [
287 /// Boundary::LowerDigit,
288 /// Boundary::UpperDigit,
289 /// Boundary::DigitLower,
290 /// Boundary::DigitUpper,
291 /// ],
292 /// );
293 /// ```
294 pub const fn digits() -> [Boundary; 4] {
295 [
296 Boundary::LowerDigit,
297 Boundary::UpperDigit,
298 Boundary::DigitLower,
299 Boundary::DigitUpper,
300 ]
301 }
302
303 /// Returns the boundaries that are letters followed by digits.
304 /// ```
305 /// # use convert_case::Boundary;
306 /// assert_eq!(
307 /// Boundary::letter_digit(),
308 /// [
309 /// Boundary::LowerDigit,
310 /// Boundary::UpperDigit,
311 /// ],
312 /// );
313 /// ```
314 pub const fn letter_digit() -> [Boundary; 2] {
315 [Boundary::LowerDigit, Boundary::UpperDigit]
316 }
317
318 /// Returns the boundaries that are digits followed by letters.
319 /// ```
320 /// # use convert_case::Boundary;
321 /// assert_eq!(
322 /// Boundary::digit_letter(),
323 /// [
324 /// Boundary::DigitLower,
325 /// Boundary::DigitUpper
326 /// ],
327 /// );
328 /// ```
329 pub const fn digit_letter() -> [Boundary; 2] {
330 [Boundary::DigitLower, Boundary::DigitUpper]
331 }
332
333 /// Returns a list of all boundaries that are identified within the given string.
334 /// Could be a short of writing out all the boundaries in a list directly. This will not
335 /// identify boundary `UpperLower` if it also used as part of `Acronym`.
336 ///
337 /// If you want to be very explicit and not overlap boundaries, it is recommended to use a colon
338 /// character.
339 /// ```
340 /// # use convert_case::Boundary;
341 /// assert_eq!(
342 /// Boundary::defaults_from("aA8a -"),
343 /// vec![
344 /// Boundary::Hyphen,
345 /// Boundary::Space,
346 /// Boundary::LowerUpper,
347 /// Boundary::UpperDigit,
348 /// Boundary::DigitLower,
349 /// ],
350 /// );
351 /// assert_eq!(
352 /// Boundary::defaults_from("bD:0B:_:AAa"),
353 /// vec![
354 /// Boundary::Underscore,
355 /// Boundary::LowerUpper,
356 /// Boundary::DigitUpper,
357 /// Boundary::Acronym,
358 /// ],
359 /// );
360 /// ```
361 pub fn defaults_from(pattern: &str) -> Vec<Boundary> {
362 let mut boundaries = Vec::new();
363 for boundary in Boundary::defaults() {
364 let parts = split(&pattern, &[boundary]);
365 if parts.len() > 1 || parts.is_empty() || parts[0] != pattern {
366 boundaries.push(boundary);
367 }
368 }
369 boundaries
370 }
371}
372
373impl PartialEq for Boundary {
374 fn eq(&self, other: &Self) -> bool {
375 match (self, other) {
376 (Self::Hyphen, Self::Hyphen) => true,
377 (Self::Underscore, Self::Underscore) => true,
378 (Self::Space, Self::Space) => true,
379 (Self::UpperLower, Self::UpperLower) => true,
380 (Self::LowerUpper, Self::LowerUpper) => true,
381 (Self::DigitUpper, Self::DigitUpper) => true,
382 (Self::UpperDigit, Self::UpperDigit) => true,
383 (Self::DigitLower, Self::DigitLower) => true,
384 (Self::LowerDigit, Self::LowerDigit) => true,
385 (Self::Acronym, Self::Acronym) => true,
386 // Custom boundaries are never equal because they contain function pointers,
387 // which cannot be reliably compared.
388 (Self::Custom { .. }, Self::Custom { .. }) => false,
389 _ => false,
390 }
391 }
392}
393
394impl Eq for Boundary {}
395
396impl core::hash::Hash for Boundary {
397 fn hash<H: core::hash::Hasher>(&self, state: &mut H) {
398 // Hash only the discriminant. Custom variants can't be meaningfully
399 // compared or hashed by their function pointer, so all Custom variants
400 // hash to the same value (their discriminant).
401 core::mem::discriminant(self).hash(state);
402 }
403}
404
405/// Split an identifier into a list of words using the list of boundaries.
406///
407/// This is used internally for splitting an identifier before mutating by
408/// a pattern and joining again with a delimiter.
409/// ```
410/// use convert_case::{Boundary, split};
411/// assert_eq!(
412/// split(&"one_two-three.four", &[Boundary::Underscore, Boundary::Hyphen]),
413/// vec!["one", "two", "three.four"],
414/// )
415/// ```
416pub fn split<'s, T>(s: &'s T, boundaries: &[Boundary]) -> Vec<&'s str>
417where
418 T: AsRef<str>,
419{
420 let s = s.as_ref();
421
422 if s.is_empty() {
423 return Vec::new();
424 }
425
426 let mut words = Vec::new();
427 let mut last_boundary_end = 0;
428
429 let (indices, graphemes): (Vec<_>, Vec<_>) = s.grapheme_indices(true).unzip();
430 let grapheme_length = indices[graphemes.len() - 1] + graphemes[graphemes.len() - 1].len();
431
432 // TODO:
433 // swapping the order of these would be faster
434 // end the loop sooner if any boundary condition is met
435 // could also hit a bitvector and do the splitting at the end? May or may not be faster
436 for i in 0..graphemes.len() {
437 for boundary in boundaries {
438 //let byte_index = indices[i];
439
440 if boundary.matches(&graphemes[i..]) {
441 // What if we find a condition at the end of the array?
442 // Maybe we can stop early based on length
443 // To do this, need to switch the loops
444 // TODO
445 let boundary_byte_start: usize = *indices
446 .get(i + boundary.start())
447 .unwrap_or(&grapheme_length);
448 let boundary_byte_end: usize = *indices
449 .get(i + boundary.start() + boundary.len())
450 .unwrap_or(&grapheme_length);
451
452 // todo clean this up a bit
453 words.push(&s[last_boundary_end..boundary_byte_start]);
454 last_boundary_end = boundary_byte_end;
455 break;
456 }
457 }
458 }
459 words.push(&s[last_boundary_end..]);
460 //words.into_iter().filter(|s| !s.is_empty()).collect()
461 words.into_iter().collect()
462}
463
464/// Create a new boundary based on a string.
465///
466/// This is shorthand for creating a boundary that splits on a specific string, and
467/// omits that string from the list of words. For more information, see [`Boundary`].
468/// ```
469/// # use convert_case::{Case, Converter, separator};
470/// let conv = Converter::new()
471/// .set_boundaries(&[separator!("::")])
472/// .to_case(Case::Camel);
473///
474/// assert_eq!(
475/// conv.convert("my::var::name"),
476/// "myVarName",
477/// )
478/// ```
479#[macro_export]
480macro_rules! separator {
481 ($delim:expr) => {
482 convert_case::Boundary::Custom {
483 condition: |s| s.join("").starts_with($delim),
484 start: 0,
485 len: $delim.len(),
486 }
487 };
488}
489
490#[cfg(test)]
491mod tests {
492 use super::*;
493 use rstest::rstest;
494
495 #[test]
496 fn custom_boundary_inequality() {
497 // Custom boundaries are never equal because they contain function pointers
498 let a = Boundary::Custom {
499 condition: |_| true,
500 start: 0,
501 len: 0,
502 };
503 let b = a;
504
505 assert_ne!(a, b)
506 }
507
508 #[test]
509 fn default_boundary_equality() {
510 assert_eq!(Boundary::Hyphen, Boundary::Hyphen);
511 assert_eq!(Boundary::Space, Boundary::Space);
512 assert_ne!(Boundary::Hyphen, Boundary::Space);
513 }
514
515 #[rstest]
516 #[case(Boundary::Hyphen, "a-b-c", vec!["a", "b", "c"])]
517 #[case(Boundary::Underscore, "a_b_c", vec!["a", "b", "c"])]
518 #[case(Boundary::Space, "a b c", vec!["a", "b", "c"])]
519 #[case(Boundary::LowerUpper, "lowerUpperUpper", vec!["lower", "Upper", "Upper"])]
520 #[case(Boundary::UpperLower, "ABc", vec!["AB", "c"])]
521 #[case(Boundary::Acronym, "XMLRequest", vec!["XML", "Request"])]
522 #[case(Boundary::LowerDigit, "abc123", vec!["abc", "123"])]
523 #[case(Boundary::UpperDigit, "ABC123", vec!["ABC", "123"])]
524 #[case(Boundary::DigitLower, "123abc", vec!["123", "abc"])]
525 #[case(Boundary::DigitUpper, "123ABC", vec!["123", "ABC"])]
526 fn split_on_boundary(
527 #[case] boundary: Boundary,
528 #[case] input: &str,
529 #[case] expected: Vec<&str>,
530 ) {
531 assert_eq!(split(&input, &[boundary]), expected);
532 }
533
534 #[test]
535 fn split_on_multiple_delimiters() {
536 let s = "aaa-bbb_ccc ddd ddd-eee";
537 let v = split(
538 &s,
539 &[Boundary::Space, Boundary::Underscore, Boundary::Hyphen],
540 );
541 assert_eq!(v, vec!["aaa", "bbb", "ccc", "ddd", "ddd", "eee"]);
542 }
543
544 #[test]
545 fn boundaries_found_in_string() {
546 // upper lower is no longer a default
547 assert_eq!(Boundary::defaults_from(".Aaaa"), Vec::<Boundary>::new());
548 assert_eq!(
549 Boundary::defaults_from("a8.Aa.aA"),
550 vec![Boundary::LowerUpper, Boundary::LowerDigit]
551 );
552 assert_eq!(
553 Boundary::defaults_from("b1B1b"),
554 Boundary::digits().to_vec()
555 );
556 assert_eq!(
557 Boundary::defaults_from("AAa -_"),
558 vec![
559 Boundary::Underscore,
560 Boundary::Hyphen,
561 Boundary::Space,
562 Boundary::Acronym,
563 ]
564 );
565 }
566}