punycode 0.4.1

Functions to decode and encode Punycode
Documentation
#![deny(
    missing_copy_implementations,
    missing_debug_implementations,
    missing_docs,
    trivial_casts,
    trivial_numeric_casts,
    unsafe_code,
    unused_import_braces,
    unused_qualifications,
)]

#![cfg_attr(feature = "dev", feature(plugin))]
#![cfg_attr(feature = "dev", plugin(clippy))]
#![cfg_attr(feature = "dev", deny(clippy))]

//! Fonctions to decode and encode [RFC-3492 Punycode](https://tools.ietf.org/html/rfc3492).

// See [RFC-3492, section 4](https://tools.ietf.org/html/rfc3492#section-4).
const BASE         : u32 = 36;
const TMIN         : u32 = 1;
const TMAX         : u32 = 26;
const SKEW         : u32 = 38;
const DAMP         : u32 = 700;
const INITIAL_BIAS : u32 = 72;
const INITIAL_N    : u32 = 128;
const DELIMITER    : char = '-';

/// Decode the string as Punycode. The string should not contain the initial `xn--` and must
/// contain only ASCII characters.
/// # Example
/// ```
/// assert_eq!(
///     punycode::decode("acadmie-franaise-npb1a").unwrap(),
///     "académie-française"
/// );
/// ```
pub fn decode(input: &str) -> Result<String, ()> {
    if !input.is_ascii() {
        return Err(());
    }

    let mut n = INITIAL_N;
    let mut i = 0;
    let mut bias = INITIAL_BIAS;

    let (mut output, input) = if let Some(i) = input.rfind(DELIMITER) {
        (input[0..i].chars().collect(), &input[i+1..])
    }
    else {
        (vec![], &input[..])
    };

    let mut it = input.chars().peekable();
    while it.peek() != None {
        let oldi = i;
        let mut w = 1;

        for k in 1.. {
            let c = if let Some(c) = it.next() {
                c
            }
            else {
                return Err(());
            };

            let k = k*BASE;

            let digit = decode_digit(c);

            if digit == BASE {
                return Err(());
            }

            // overflow check
            if digit > (std::u32::MAX - i) / w {
                return Err(());
            }
            i += digit * w;

            let t = clamped_sub(TMIN, k, bias, TMAX);
            if digit < t {
                break;
            }

            // overflow check
            if BASE > (std::u32::MAX - t) / w {
                return Err(());
            }
            w *= BASE - t;
        }

        let len = (output.len() + 1) as u32;
        bias = adapt(i - oldi, len, oldi == 0);

        let il = i / len;
        // overflow check
        if n > std::u32::MAX - il {
            return Err(());
        }
        n += il;
        i %= len;

        if let Some(c) = std::char::from_u32(n) {
            output.insert(i as usize, c);
        }
        else {
            return Err(());
        }

        i += 1;
    }

    Ok(output.iter().cloned().collect())
}

/// Encode a string as punycode. The result string will contain only ASCII characters. The result
/// string does not start with `xn--`.
/// # Example
/// ```
/// assert_eq!(
///     punycode::encode("académie-française").unwrap(),
///     "acadmie-franaise-npb1a"
/// );
/// ```
pub fn encode(input: &str) -> Result<String, ()> {
    encode_slice(&input.chars().collect::<Vec<char>>())
}

fn encode_slice(input: &[char]) -> Result<String, ()> {
    let mut n = INITIAL_N;
    let mut delta = 0;
    let mut bias = INITIAL_BIAS;

    let mut output : String = input.iter().filter(|&&c| c.is_ascii()).cloned().collect();
    let mut h = output.len() as u32;
    let b = h;

    if b > 0 {
        output.push(DELIMITER)
    }

    while h < input.len() as u32 {
        let m = *input.iter().filter(|&&c| (c as u32) >= n).min().unwrap() as u32;

        if m - n > (std::u32::MAX - delta) / (h + 1) {
            return Err(());
        }
        delta += (m - n) * (h + 1);

        n = m;

        for c in input {
            let c = *c as u32;
            if c < n {
                delta += 1;
            }
            else if c == n {
                let mut q = delta;

                for k in 1.. {
                    let k = k*BASE;

                    let t = clamped_sub(TMIN, k, bias, TMAX);

                    if q < t {
                        break;
                    }

                    output.push(encode_digit(t + (q - t) % (BASE - t)));

                    q = (q - t) / (BASE - t);
                }

                output.push(encode_digit(q));

                bias = adapt(delta, h+1, h == b);
                delta = 0;
                h += 1;
            }
        }

        delta += 1;
        n += 1;
    }

    Ok(output)
}

fn adapt(delta: u32, numpoint: u32, firsttime: bool) -> u32 {
    let mut delta = if firsttime {
        delta / DAMP
    }
    else {
        delta / 2
    };

    delta += delta / numpoint;
    let mut k = 0;

    while delta > (BASE - TMIN) * TMAX / 2 {
        delta /= BASE - TMIN;
        k += BASE
    }

    k + (BASE - TMIN + 1) * delta / (delta + SKEW)
}

/// Compute `lhs-rhs`. Result will be clamped in [min, max].
fn clamped_sub<T>(min: T, lhs: T, rhs: T, max: T) -> T
where T : Ord
        + std::ops::Add<Output=T>
        + std::ops::Sub<Output=T>
        + Copy
{
    if min + rhs >= lhs { min }
    else if max + rhs <= lhs { max }
    else { lhs - rhs }
}

fn decode_digit(c: char) -> u32 {
    let cp = c as u32;

    match c {
        '0' ... '9' => cp - ('0' as u32) + 26,
        'A' ... 'Z' => cp - ('A' as u32),
        'a' ... 'z' => cp - ('a' as u32),
        _ => BASE,
    }
}

fn encode_digit(d: u32) -> char {
    let r = (d + 22 + (if d < 26 { 75 } else { 0 })) as u8 as char;

    assert!(('0' <= r && r <= '9') || ('a' <= r && r <= 'z'), "r = {}", r);

    r
}

#[cfg(test)]
static TESTS: &'static [(&'static str, &'static str)] = &[
    // examples taken from [RCF-3492, section 7.1](https://tools.ietf.org/html/rfc3492#section-7.1)
    (&"\u{0644}\u{064A}\u{0647}\u{0645}\u{0627}\u{0628}\u{062A}\u{0643}\u{0644}\
       \u{0645}\u{0648}\u{0634}\u{0639}\u{0631}\u{0628}\u{064A}\u{061F}",
     &"egbpdaj6bu4bxfgehfvwxn"),

    (&"\u{4ED6}\u{4EEC}\u{4E3A}\u{4EC0}\u{4E48}\u{4E0D}\u{8BF4}\u{4E2D}\u{6587}",
     &"ihqwcrb4cv8a8dqg056pqjye"),

    (&"\u{4ED6}\u{5011}\u{7232}\u{4EC0}\u{9EBD}\u{4E0D}\u{8AAA}\u{4E2D}\u{6587}",
     &"ihqwctvzc91f659drss3x8bo0yb"),

    (&"\u{0050}\u{0072}\u{006F}\u{010D}\u{0070}\u{0072}\u{006F}\u{0073}\u{0074}\
       \u{011B}\u{006E}\u{0065}\u{006D}\u{006C}\u{0075}\u{0076}\u{00ED}\u{010D}\
       \u{0065}\u{0073}\u{006B}\u{0079}",
     &"Proprostnemluvesky-uyb24dma41a"),

    (&"\u{05DC}\u{05DE}\u{05D4}\u{05D4}\u{05DD}\u{05E4}\u{05E9}\u{05D5}\u{05D8}\
       \u{05DC}\u{05D0}\u{05DE}\u{05D3}\u{05D1}\u{05E8}\u{05D9}\u{05DD}\u{05E2}\
       \u{05D1}\u{05E8}\u{05D9}\u{05EA}",
     &"4dbcagdahymbxekheh6e0a7fei0b"),

    (&"\u{092F}\u{0939}\u{0932}\u{094B}\u{0917}\u{0939}\u{093F}\u{0928}\u{094D}\
       \u{0926}\u{0940}\u{0915}\u{094D}\u{092F}\u{094B}\u{0902}\u{0928}\u{0939}\
       \u{0940}\u{0902}\u{092C}\u{094B}\u{0932}\u{0938}\u{0915}\u{0924}\u{0947}\
       \u{0939}\u{0948}\u{0902}",
     &"i1baa7eci9glrd9b2ae1bj0hfcgg6iyaf8o0a1dig0cd"),

    (&"\u{306A}\u{305C}\u{307F}\u{3093}\u{306A}\u{65E5}\u{672C}\u{8A9E}\u{3092}\
       \u{8A71}\u{3057}\u{3066}\u{304F}\u{308C}\u{306A}\u{3044}\u{306E}\u{304B}",
     &"n8jok5ay5dzabd5bym9f0cm5685rrjetr6pdxa"),

    (&"\u{C138}\u{ACC4}\u{C758}\u{BAA8}\u{B4E0}\u{C0AC}\u{B78C}\u{B4E4}\u{C774}\
       \u{D55C}\u{AD6D}\u{C5B4}\u{B97C}\u{C774}\u{D574}\u{D55C}\u{B2E4}\u{BA74}\
       \u{C5BC}\u{B9C8}\u{B098}\u{C88B}\u{C744}\u{AE4C}",
     &"989aomsvi5e83db1d2a355cv1e0vak1dwrv93d5xbh15a0dt30a5jpsd879ccm6fea98c"),

    (&"\u{043F}\u{043E}\u{0447}\u{0435}\u{043C}\u{0443}\u{0436}\u{0435}\u{043E}\
       \u{043D}\u{0438}\u{043D}\u{0435}\u{0433}\u{043E}\u{0432}\u{043E}\u{0440}\
       \u{044F}\u{0442}\u{043F}\u{043E}\u{0440}\u{0443}\u{0441}\u{0441}\u{043A}\
       \u{0438}",
     &"b1abfaaepdrnnbgefbaDotcwatmq2g4l"),

    (&"\u{0050}\u{006F}\u{0072}\u{0071}\u{0075}\u{00E9}\u{006E}\u{006F}\u{0070}\
       \u{0075}\u{0065}\u{0064}\u{0065}\u{006E}\u{0073}\u{0069}\u{006D}\u{0070}\
       \u{006C}\u{0065}\u{006D}\u{0065}\u{006E}\u{0074}\u{0065}\u{0068}\u{0061}\
       \u{0062}\u{006C}\u{0061}\u{0072}\u{0065}\u{006E}\u{0045}\u{0073}\u{0070}\
       \u{0061}\u{00F1}\u{006F}\u{006C}",
     &"PorqunopuedensimplementehablarenEspaol-fmd56a"),

    (&"\u{0054}\u{1EA1}\u{0069}\u{0073}\u{0061}\u{006F}\u{0068}\u{1ECD}\u{006B}\
       \u{0068}\u{00F4}\u{006E}\u{0067}\u{0074}\u{0068}\u{1EC3}\u{0063}\u{0068}\
       \u{1EC9}\u{006E}\u{00F3}\u{0069}\u{0074}\u{0069}\u{1EBF}\u{006E}\u{0067}\
       \u{0056}\u{0069}\u{1EC7}\u{0074}",
     &"TisaohkhngthchnitingVit-kjcr8268qyxafd2f1b9g"),

    (&"\u{0033}\u{5E74}\u{0042}\u{7D44}\u{91D1}\u{516B}\u{5148}\u{751F}",
     &"3B-ww4c5e180e575a65lsy2b"),

    (&"\u{5B89}\u{5BA4}\u{5948}\u{7F8E}\u{6075}\u{002D}\u{0077}\u{0069}\u{0074}\
       \u{0068}\u{002D}\u{0053}\u{0055}\u{0050}\u{0045}\u{0052}\u{002D}\u{004D}\
       \u{004F}\u{004E}\u{004B}\u{0045}\u{0059}\u{0053}",
     &"-with-SUPER-MONKEYS-pc58ag80a8qai00g7n9n"),

    (&"\u{0048}\u{0065}\u{006C}\u{006C}\u{006F}\u{002D}\u{0041}\u{006E}\u{006F}\
       \u{0074}\u{0068}\u{0065}\u{0072}\u{002D}\u{0057}\u{0061}\u{0079}\u{002D}\
       \u{305D}\u{308C}\u{305E}\u{308C}\u{306E}\u{5834}\u{6240}",
     &"Hello-Another-Way--fc4qua05auwb3674vfr0b"),

    (&"\u{3072}\u{3068}\u{3064}\u{5C4B}\u{6839}\u{306E}\u{4E0B}\u{0032}",
     &"2-u9tlzr9756bt3uc0v"),

    (&"\u{004D}\u{0061}\u{006A}\u{0069}\u{3067}\u{004B}\u{006F}\u{0069}\u{3059}\
       \u{308B}\u{0035}\u{79D2}\u{524D}",
     &"MajiKoi5-783gue6qz075azm5e"),

    (&"\u{30D1}\u{30D5}\u{30A3}\u{30FC}\u{0064}\u{0065}\u{30EB}\u{30F3}\u{30D0}",
     &"de-jg4avhby1noc0d"),

    (&"\u{305D}\u{306E}\u{30B9}\u{30D4}\u{30FC}\u{30C9}\u{3067}",
     &"d9juau41awczczp"),

    (&"\u{002D}\u{003E}\u{0020}\u{0024}\u{0031}\u{002E}\u{0030}\u{0030}\u{0020}\
       \u{003C}\u{002D}",
     &"-> $1.00 <--"),

     // some real-life examples
     (&"académie-française", &"acadmie-franaise-npb1a"),
     (&"bücher", &"bcher-kva"),
     (&"république-numérique", &"rpublique-numrique-bwbm"),

     // some real-life TLD
     (&"бг",       &"90ae"),
     (&"рф",       &"p1ai"),
     (&"укр",      &"j1amh"),
     (&"السعودية", &"mgberp4a5d4ar"),
     (&"امارات",   &"mgbaam7a8h"),
     (&"مصر",      &"wgbh1c"),
     (&"中国",     &"fiqs8s"),
     (&"中國",     &"fiqz9s"),
     (&"台湾",     &"kprw13d"),
     (&"台灣",     &"kpry57d"),
     (&"香港",     &"j6w193g"),

     // other
     (&"", &""),
     (&"a", &"a-"),
     (&"0", &"0-"),
     (&"A", &"A-"),
     (&"é", &"9ca"),
     (&"\n", &"\n-"),
];

#[test]
fn test_decode() {
    for t in TESTS {
        assert_eq!(decode(&t.1), Ok(t.0.into()));
    }
}

#[test]
fn test_encode() {
    for t in TESTS {
        assert_eq!(encode(t.0).unwrap().to_lowercase(), t.1.to_lowercase());
    }
}

#[test]
fn test_fail_decode() {
    assert_eq!(decode(&"bcher-kva.ch"), Err(()));
    assert_eq!(decode(&"+"), Err(()));
    assert_eq!(decode(&"\\"), Err(()));
    assert_eq!(decode(&"é"), Err(()));
    assert_eq!(decode(&"99999999"), Err(()));
}