unicode_casing/
lib.rs

1#![no_std]
2extern crate alloc;
3
4mod bool_trie;
5mod tables;
6
7use tables::{conversions, general_category};
8
9pub trait CharExt {
10    /// Indicates whether a character is titlecased.
11    ///
12    /// 'Titlecase' is defined in terms of the Unicode General Category
13    /// 'Lt'.
14    fn is_titlecase(self) -> bool;
15
16    /// Converts a character to its titlecase equivalent.
17    ///
18    /// This performs complex unconditional mappings with no tailoring.
19    /// See `to_uppercase()` for references and more information.
20    ///
21    /// This differs from `to_uppercase()` since Unicode contains
22    /// digraphs and ligature characters.
23    /// For example, U+01F3 “dz” and U+FB01 “fi”
24    /// map to U+01F1 “DZ” and U+0046 U+0069 “Fi”, respectively.
25    ///
26    /// # Return value
27    ///
28    /// Returns an iterator which yields the characters corresponding to the
29    /// titlecase equivalent of the character. If no conversion is possible then
30    /// an iterator with just the input character is returned.
31    ///
32    /// Note that `is_titlecase` will not necessarily return `true` for the
33    /// yielded characters.
34    fn to_titlecase(self) -> ToTitlecase;
35}
36
37impl CharExt for char {
38    #[inline]
39    fn is_titlecase(self) -> bool {
40        general_category::Lt(self)
41    }
42
43    #[inline]
44    fn to_titlecase(self) -> ToTitlecase {
45        ToTitlecase(CaseMappingIter::new(conversions::to_title(self)))
46    }
47}
48
49enum CaseMappingIter {
50    Three(char, char, char),
51    Two(char, char),
52    One(char),
53    Zero,
54}
55
56impl Iterator for CaseMappingIter {
57    type Item = char;
58    fn next(&mut self) -> Option<char> {
59        match *self {
60            CaseMappingIter::Three(a, b, c) => {
61                *self = CaseMappingIter::Two(b, c);
62                Some(a)
63            }
64            CaseMappingIter::Two(b, c) => {
65                *self = CaseMappingIter::One(c);
66                Some(b)
67            }
68            CaseMappingIter::One(c) => {
69                *self = CaseMappingIter::Zero;
70                Some(c)
71            }
72            CaseMappingIter::Zero => None,
73        }
74    }
75}
76
77impl CaseMappingIter {
78    fn new(chars: [char; 3]) -> CaseMappingIter {
79        if chars[2] == '\0' {
80            if chars[1] == '\0' {
81                CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
82            } else {
83                CaseMappingIter::Two(chars[0], chars[1])
84            }
85        } else {
86            CaseMappingIter::Three(chars[0], chars[1], chars[2])
87        }
88    }
89}
90
91pub struct ToTitlecase(CaseMappingIter);
92
93impl Iterator for ToTitlecase {
94    type Item = char;
95    fn next(&mut self) -> Option<char> {
96        self.0.next()
97    }
98}
99
100#[cfg(test)]
101mod tests {
102    use super::*;
103    use alloc::vec::Vec;
104
105    #[test]
106    fn test_is_titlecase() {
107        assert!(!'A'.is_titlecase());
108        assert!('\u{1FFC}'.is_titlecase());
109        assert!('Dž'.is_titlecase());
110    }
111
112    #[test]
113    fn test_to_titlecase() {
114        fn title(c: char) -> Vec<char> {
115            c.to_titlecase().collect()
116        }
117        assert_eq!(title('a'), ['A']);
118        assert_eq!(title('ö'), ['Ö']);
119        assert_eq!(title('ß'), ['S', 's']); // not ẞ: Latin capital letter sharp s
120        assert_eq!(title('ü'), ['Ü']);
121        assert_eq!(title('💩'), ['💩']);
122
123        assert_eq!(title('σ'), ['Σ']);
124        assert_eq!(title('τ'), ['Τ']);
125        assert_eq!(title('ι'), ['Ι']);
126        assert_eq!(title('γ'), ['Γ']);
127        assert_eq!(title('μ'), ['Μ']);
128        assert_eq!(title('α'), ['Α']);
129        assert_eq!(title('ς'), ['Σ']);
130        assert_eq!(title('DŽ'), ['Dž']);
131        assert_eq!(title('fi'), ['F', 'i']);
132        assert_eq!(title('ᾀ'), ['ᾈ']);
133    }
134}