1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
mod bool_trie;
mod tables;

use tables::{conversions, general_category};

pub trait CharExt {
    /// Indicates whether a character is titlecased.
    ///
    /// 'Titlecase' is defined in terms of the Unicode General Category
    /// 'Lt'.
    fn is_titlecase(self) -> bool;

    /// Converts a character to its titlecase equivalent.
    ///
    /// This performs complex unconditional mappings with no tailoring.
    /// See `to_uppercase()` for references and more information.
    ///
    /// This differs from `to_uppercase()` since Unicode contains
    /// digraphs and ligature characters.
    /// For example, U+01F3 “dz” and U+FB01 “fi”
    /// map to U+01F1 “DZ” and U+0046 U+0069 “Fi”, respectively.
    ///
    /// # Return value
    ///
    /// Returns an iterator which yields the characters corresponding to the
    /// titlecase equivalent of the character. If no conversion is possible then
    /// an iterator with just the input character is returned.
    ///
    /// Note that `is_titlecase` will not necessarily return `true` for the
    /// yielded characters.
    fn to_titlecase(self) -> ToTitlecase;
}

impl CharExt for char {
    #[inline]
    fn is_titlecase(self) -> bool {
        general_category::Lt(self)
    }

    #[inline]
    fn to_titlecase(self) -> ToTitlecase {
        ToTitlecase(CaseMappingIter::new(conversions::to_title(self)))
    }
}

enum CaseMappingIter {
    Three(char, char, char),
    Two(char, char),
    One(char),
    Zero,
}

impl Iterator for CaseMappingIter {
    type Item = char;
    fn next(&mut self) -> Option<char> {
        match *self {
            CaseMappingIter::Three(a, b, c) => {
                *self = CaseMappingIter::Two(b, c);
                Some(a)
            }
            CaseMappingIter::Two(b, c) => {
                *self = CaseMappingIter::One(c);
                Some(b)
            }
            CaseMappingIter::One(c) => {
                *self = CaseMappingIter::Zero;
                Some(c)
            }
            CaseMappingIter::Zero => None,
        }
    }
}

impl CaseMappingIter {
    fn new(chars: [char; 3]) -> CaseMappingIter {
        if chars[2] == '\0' {
            if chars[1] == '\0' {
                CaseMappingIter::One(chars[0]) // Including if chars[0] == '\0'
            } else {
                CaseMappingIter::Two(chars[0], chars[1])
            }
        } else {
            CaseMappingIter::Three(chars[0], chars[1], chars[2])
        }
    }
}

pub struct ToTitlecase(CaseMappingIter);

impl Iterator for ToTitlecase {
    type Item = char;
    fn next(&mut self) -> Option<char> {
        self.0.next()
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_is_titlecase() {
        assert!(!'A'.is_titlecase());
        assert!('\u{1FFC}'.is_titlecase());
        assert!('Dž'.is_titlecase());
    }

    #[test]
    fn test_to_titlecase() {
        fn title(c: char) -> Vec<char> {
            c.to_titlecase().collect()
        }
        assert_eq!(title('a'), ['A']);
        assert_eq!(title('ö'), ['Ö']);
        assert_eq!(title('ß'), ['S', 's']); // not ẞ: Latin capital letter sharp s
        assert_eq!(title('ü'), ['Ü']);
        assert_eq!(title('💩'), ['💩']);

        assert_eq!(title('σ'), ['Σ']);
        assert_eq!(title('τ'), ['Τ']);
        assert_eq!(title('ι'), ['Ι']);
        assert_eq!(title('γ'), ['Γ']);
        assert_eq!(title('μ'), ['Μ']);
        assert_eq!(title('α'), ['Α']);
        assert_eq!(title('ς'), ['Σ']);
        assert_eq!(title('DŽ'), ['Dž']);
        assert_eq!(title('fi'), ['F', 'i']);
        assert_eq!(title('ᾀ'), ['ᾈ']);
    }
}