unicode_casing/
lib.rs

1mod bool_trie;
2mod tables;
3
4use tables::{conversions, general_category};
5
6pub trait CharExt {
7    /// Indicates whether a character is titlecased.
8    ///
9    /// 'Titlecase' is defined in terms of the Unicode General Category
10    /// 'Lt'.
11    fn is_titlecase(self) -> bool;
12
13    /// Converts a character to its titlecase equivalent.
14    ///
15    /// This performs complex unconditional mappings with no tailoring.
16    /// See `to_uppercase()` for references and more information.
17    ///
18    /// This differs from `to_uppercase()` since Unicode contains
19    /// digraphs and ligature characters.
20    /// For example, U+01F3 “dz” and U+FB01 “fi”
21    /// map to U+01F1 “DZ” and U+0046 U+0069 “Fi”, respectively.
22    ///
23    /// # Return value
24    ///
25    /// Returns an iterator which yields the characters corresponding to the
26    /// titlecase equivalent of the character. If no conversion is possible then
27    /// an iterator with just the input character is returned.
28    ///
29    /// Note that `is_titlecase` will not necessarily return `true` for the
30    /// yielded characters.
31    fn to_titlecase(self) -> ToTitlecase;
32}
33
34impl CharExt for char {
35    #[inline]
36    fn is_titlecase(self) -> bool {
37        general_category::Lt(self)
38    }
39
40    #[inline]
41    fn to_titlecase(self) -> ToTitlecase {
42        ToTitlecase(CaseMappingIter::new(conversions::to_title(self)))
43    }
44}
45
46enum CaseMappingIter {
47    Three(char, char, char),
48    Two(char, char),
49    One(char),
50    Zero,
51}
52
53impl Iterator for CaseMappingIter {
54    type Item = char;
55    fn next(&mut self) -> Option<char> {
56        match *self {
57            CaseMappingIter::Three(a, b, c) => {
58                *self = CaseMappingIter::Two(b, c);
59                Some(a)
60            }
61            CaseMappingIter::Two(b, c) => {
62                *self = CaseMappingIter::One(c);
63                Some(b)
64            }
65            CaseMappingIter::One(c) => {
66                *self = CaseMappingIter::Zero;
67                Some(c)
68            }
69            CaseMappingIter::Zero => None,
70        }
71    }
72}
73
74impl CaseMappingIter {
75    fn new(chars: [char; 3]) -> CaseMappingIter {
76        if chars[2] == '\0' {
77            if chars[1] == '\0' {
78                CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
79            } else {
80                CaseMappingIter::Two(chars[0], chars[1])
81            }
82        } else {
83            CaseMappingIter::Three(chars[0], chars[1], chars[2])
84        }
85    }
86}
87
88pub struct ToTitlecase(CaseMappingIter);
89
90impl Iterator for ToTitlecase {
91    type Item = char;
92    fn next(&mut self) -> Option<char> {
93        self.0.next()
94    }
95}
96
97#[cfg(test)]
98mod tests {
99    use super::*;
100
101    #[test]
102    fn test_is_titlecase() {
103        assert!(!'A'.is_titlecase());
104        assert!('\u{1FFC}'.is_titlecase());
105        assert!('Dž'.is_titlecase());
106    }
107
108    #[test]
109    fn test_to_titlecase() {
110        fn title(c: char) -> Vec<char> {
111            c.to_titlecase().collect()
112        }
113        assert_eq!(title('a'), ['A']);
114        assert_eq!(title('ö'), ['Ö']);
115        assert_eq!(title('ß'), ['S', 's']); // not ẞ: Latin capital letter sharp s
116        assert_eq!(title('ü'), ['Ü']);
117        assert_eq!(title('💩'), ['💩']);
118
119        assert_eq!(title('σ'), ['Σ']);
120        assert_eq!(title('τ'), ['Τ']);
121        assert_eq!(title('ι'), ['Ι']);
122        assert_eq!(title('γ'), ['Γ']);
123        assert_eq!(title('μ'), ['Μ']);
124        assert_eq!(title('α'), ['Α']);
125        assert_eq!(title('ς'), ['Σ']);
126        assert_eq!(title('DŽ'), ['Dž']);
127        assert_eq!(title('fi'), ['F', 'i']);
128        assert_eq!(title('ᾀ'), ['ᾈ']);
129    }
130}