1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
//! Functionality for keeping strings in a format the client expects.
//!
//! # Background
//!
//! The client uppercases both the username and password before hashing them. The username sent to
//! the server is also an uppercased version. This means that to the client, there's no difference
//! between logging in as `alice`, `ALICE`, or anything in between. This is no problem for ASCII
//! characters as they have well defined upper- and lowercase letters.
//!
//! Unicode characters, however, act differently and without any real pattern.
//!
//! The letter `ń`, Unicode code point `U+0144`, name `LATIN SMALL LETTER N WITH ACUTE` for example,
//! appears as a capital `N` in the client, and sends the byte `0x4E` which is ASCII N. This is
//! despite the letter `Ń`, Unicode code point `U+0144`, name `LATIN CAPITAL LETTER N WITH ACUTE`
//! existing.
//!
//! The letter `ž`, Unicode code point `U+017E`, name `LATIN SMALL LETTER Z WITH CARON` appears as
//! the literal letter `ž` and gets sent over the network as the bytes `0xC5 0xBE` which is UTF-8
//! for that same letter.
//!
//! The letter `Ž`, Unicode code point `U+017D`, name `LATIN CAPITAL LETTER Z WITH CARON` appears as
//! the literal letter `Ž` in the client and gets sent over the network as the bytes `0xC5 0xBD`
//! which is UTF-8 for that same letter.
//!
//! The letter `ƒ`, Unicode code point `U+0192`, name `LATIN SMALL LETTER F WITH HOOK`, appears as
//! the literal letter `ƒ` and gets sent over the network as the bytes `0xC6 0x92` which is UTF-8
//! for that same letter.
//!
//! The letter `Ƒ`, Unicode code point `U+0191`, name `LATIN CAPITAL LETTER F WITH HOOK` appears as
//! the lower case version in the client and gets sent over the network as the lowercase version.
//!
//! None of the Cyrillic letters show in the client and get transmitted as a question mark (byte `0x3F`).
//!
//! These wildly varying rules for transforming the username and password means that the only way to
//! really be sure how a specific character is represented on the client and gets sent over the network
//! is to test every single unicode character. The behavior is also not guaranteed to be the same across
//! different versions, or even different localizations of the same version.
//!
//! The client is able to enter up to 16 characters in the client, which will be sent over the network
//! as one or more UTF-8 bytes.
//!
//! # Problems
//!
//! The user will need to register their account outside of the client. They might name their account
//! `Ƒast` and get through registration because the web service does not know that the letter `Ƒ` can not
//! be represented in the client and is instead shown and sent as `ƒ`. The user is unable to log in, instead
//! getting an "Account does not exist" message.
//!
//! Another user creates an account named `ńacho` and gets through registration. Since the letter `ń` is
//! represented as the letter `N` in the client, the sign up service
//! makes this transformation in order to stay in sync with the client.
//! This might allow the user to log into the account named `Nacho`, depending on which
//! verifier/salt pair is fetched from the database.
//!
//! Authentication relies on the signup service, server and client to have the exact
//! same behavior, otherwise vulnerabilities will appear or users might be unable to log in.
//!
//! # Solution
//!
//! The only manageable solution is to stick to only the ASCII character set and reject all other
//! characters.
//! This greatly reduces the complexity of every link in the chain and decreases possible vulnerabilities.
//!
//! This also provides the benefit of knowing exactly how large an account name can be.
//!

use crate::error::NormalizedStringError;
use std::fmt;
use std::fmt::{Display, Formatter};

/// Represents usernames and passwords containing only allowed characters.
///
/// Ownership is always taken by the function requiring it in order to prevent any cloning and reallocation.
///
/// See [`normalized_string`](`crate::normalized_string`) for more information.
#[derive(Debug)]
pub struct NormalizedString {
    s: String,
}

/// The highest amount of letters that the client will allow in both the username
/// and password fields.
/// Always 16.
pub const MAXIMUM_STRING_LENGTH_IN_BYTES: u8 = 16;

impl NormalizedString {
    /// Checks for non-ASCII characters and too large of a string
    /// and correctly uppercases letters as needed.
    ///
    /// Allowed characters are all ASCII characters except for ASCII control characters.
    ///
    /// # Errors
    ///
    /// See the [module level docs](crate::normalized_string) for explanation of allowed values.
    pub fn new(s: impl Into<String>) -> Result<Self, NormalizedStringError> {
        let s = s.into();

        if s.len() > MAXIMUM_STRING_LENGTH_IN_BYTES as usize {
            return Err(NormalizedStringError::StringTooLong);
        }

        for c in s.chars() {
            if !c.is_ascii() || c.is_ascii_control() {
                return Err(NormalizedStringError::CharacterNotAllowed(c));
            }
        }

        Ok(Self {
            s: s.to_ascii_uppercase(),
        })
    }
}

impl Display for NormalizedString {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        write!(f, "{}", self.s)
    }
}

impl AsRef<str> for NormalizedString {
    fn as_ref(&self) -> &str {
        &self.s
    }
}

#[cfg(test)]
mod test {
    use crate::error::NormalizedStringError;
    use crate::normalized_string::{NormalizedString, MAXIMUM_STRING_LENGTH_IN_BYTES};

    #[test]
    fn allows_all_ascii_chars() {
        let allowed = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789!@#$%^&*()_-,.<>/?[]{}'|=+~` ";

        for c in allowed.chars() {
            let s = NormalizedString::new(c).unwrap();
            assert_eq!(s.as_ref(), c.to_ascii_uppercase().to_string());
        }
    }

    #[test]
    // Acts as a sentinel to ensure that the ascii check doesn't disappear
    fn only_allows_ascii_chars() {
        // Arbitrarily picked non-allowed ASCII control codes, Latin-1, Cyrillic and Greek letters
        let ascii_control_codes = "\x00\x01\x02\x03\x04\x05\x06\x07\x08\x09\x0a\x0b\x0c\x0d\x0e\x0f\x10\x11\x12\x13\x14\x15\x16\x17\x18\x19\x1a\x1b\x1c\x1d\x1e\x1f\x7f";
        for c in ascii_control_codes.chars() {
            let s = NormalizedString::new(c);
            assert!(s.is_err(), "Char should be allowed: '{}'", c);
        }

        let not_allowed_chars = "¢£¤¦¥©¨«¹²³¬®±µ¼½¾¿ÇÐØÞßðüĎГДЕЖЗЙΨΩ";
        for c in not_allowed_chars.chars() {
            let s = NormalizedString::new(c);
            assert!(s.is_err(), "Char should be allowed: '{}'", c);
        }
    }

    #[test]
    fn not_too_long() {
        let acceptable_length_string = String::from("16bytelongstring");
        let acceptable_length_string = NormalizedString::new(acceptable_length_string).unwrap();
        assert_eq!(acceptable_length_string.as_ref(), "16BYTELONGSTRING");

        let too_long_string = String::from("17bytelongstringA");
        let too_long_string = NormalizedString::new(too_long_string);
        match too_long_string {
            Ok(_) => {
                panic!(
                    "Should not allow strings longer than: '{}'",
                    MAXIMUM_STRING_LENGTH_IN_BYTES
                );
            }
            Err(e) => match e {
                NormalizedStringError::CharacterNotAllowed(_) => {
                    panic!("should fail due to string being too long")
                }
                NormalizedStringError::StringTooLong => {}
            },
        }
    }
}