1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
//! Utility library for handling strings with german Umlauts "äöüÄÖÜß"

/// Inplace string processing functions.
///
/// `UnlautsInplaceExt` adds inplace string processing functions for the german "Umlauts"
/// 'ä', 'ö', 'ü', 'ß' and their uppercase variants (except for uppercase 'ß').
/// Because these functions dont resize their containers or shift the containing data,
/// those methods are limited and should only be used if the higher performance
/// is absolutely needed.
pub trait UmlautsOwned {
    /// Lowercases alphabetic ASCII chars and UTF-8 umlauts.
    ///
    /// Like [`make_ascii_lowercase`] but it will also make utf8 umlauts lowercase:
    /// - 'Ä' -> 'ä'
    /// - 'Ö' -> 'ö'
    /// - 'Ü' -> 'ü'
    ///
    /// # Examples
    ///
    /// ```rust
    /// extern crate umlauts;
    /// use umlauts::UmlautsOwned;
    ///
    /// let mut s = "Öl Ärmel Übermut".as_bytes().to_vec();
    /// s.make_utf8_umlauts_lowercase();
    /// assert_eq!("öl ärmel übermut".as_bytes(), s);
    /// ```
    ///
    /// [`make_ascii_lowercase`]: std::slice::[u8]::make_ascii_lowercase
    fn make_utf8_umlauts_lowercase(&mut self);

    /// Upercases alphabetic ASCII chars and UTF-8 umlauts.
    ///
    /// Like [`make_ascii_uppercase`] but it will also make utf8 umlauts uppercase:
    /// - 'ä' -> 'Ä'
    /// - 'ö' -> 'Ö'
    /// - 'ü' -> 'Ü'
    ///
    /// # Examples
    ///
    /// ```rust
    /// extern crate umlauts;
    /// use umlauts::UmlautsOwned;
    ///
    /// let mut s = "Öl Ärmel Übermut".as_bytes().to_vec();
    /// s.make_utf8_umlauts_uppercase();
    /// assert_eq!("ÖL ÄRMEL ÜBERMUT".as_bytes(), s);
    /// ```
    ///
    /// [`make_ascii_uppercase`]: std::slice::[u8]::make_ascii_uppercase
    fn make_utf8_umlauts_uppercase(&mut self);
}

impl UmlautsOwned for [u8] {
    fn make_utf8_umlauts_lowercase(self: &mut [u8]) {
        let mut i = 0;
        while i < self.len() - 1 {
            let c = self[i];
            match (c, self[i+1]) {
                (b'A'..=b'Z', _) => self[i] = c.to_ascii_lowercase(),
                // Ä
                (0xc3, 0x84) => self[i+1] = 0xa4,
                // Ö
                (0xc3, 0x96) => self[i+1] = 0xb6,
                // Ü
                (0xc3, 0x9c) => self[i+1] = 0xbc,
                _ => {},
            }
            i+=1;
        }
        self.last_mut().map(|c| c.make_ascii_lowercase());
    }

    fn make_utf8_umlauts_uppercase(self: &mut [u8]) {
        let mut i = 0;
        while i < self.len() - 1 {
            let c = self[i];
            match (c, self[i+1]) {
                (b'a'..=b'z', _) => self[i] = c.to_ascii_uppercase(),
                // ä
                (0xc3, 0xa4) => self[i+1] = 0x84,
                // ö
                (0xc3, 0xb6) => self[i+1] = 0x96,
                // ü
                (0xc3, 0xbc) => self[i+1] = 0x9c,
                _ => {},
            }
            i+=1;
        }
        self.last_mut().map(|c| c.make_ascii_uppercase());
    }
}

impl UmlautsOwned for str {
    fn make_utf8_umlauts_lowercase(&mut self) {
        unsafe { self.as_bytes_mut().make_utf8_umlauts_lowercase(); }
    }

    fn make_utf8_umlauts_uppercase(&mut self) {
        unsafe { self.as_bytes_mut().make_utf8_umlauts_uppercase(); }
    }
}

#[cfg(test)]
mod tests {
    use crate::UmlautsOwned;

    #[test]
    fn char_length() {
        assert_eq!("ä".as_bytes().len(), 2);
        assert_eq!("ö".as_bytes().len(), 2);
        assert_eq!("ü".as_bytes().len(), 2);
        assert_eq!("Ä".as_bytes().len(), 2);
        assert_eq!("Ö".as_bytes().len(), 2);
        assert_eq!("Ü".as_bytes().len(), 2);
        assert_eq!("ß".as_bytes().len(), 2);
    }

    #[test]
    fn char_start() {
        assert_eq!("ä".as_bytes()[0], 0xc3);
        assert_eq!("ö".as_bytes()[0], 0xc3);
        assert_eq!("ü".as_bytes()[0], 0xc3);
        assert_eq!("Ä".as_bytes()[0], 0xc3);
        assert_eq!("Ö".as_bytes()[0], 0xc3);
        assert_eq!("Ü".as_bytes()[0], 0xc3);
        assert_eq!("ß".as_bytes()[0], 0xc3);
    }

    #[test]
    fn make_utf8_bytes() {
        let mut text = "ÄÖÜäöüABCDabcd".as_bytes().to_vec();
        text.make_utf8_umlauts_lowercase();
        assert_eq!(text, "äöüäöüabcdabcd".as_bytes());
        text.make_utf8_umlauts_uppercase();
        assert_eq!(text, "ÄÖÜÄÖÜABCDABCD".as_bytes());
    }

    #[test]
    fn make_utf8_string() {
        let mut text = "ÄÖÜäöüABCDabcd".to_string();
        text.make_utf8_umlauts_lowercase();
        assert_eq!(text, "äöüäöüabcdabcd");
        text.make_utf8_umlauts_uppercase();
        assert_eq!(text, "ÄÖÜÄÖÜABCDABCD");
    }
}