// Copyright 2013-2014 The rust-url developers.
//
// Licensed under the Apache License, Version 2.0 <LICENSE-APACHE or
// http://www.apache.org/licenses/LICENSE-2.0> or the MIT license
// <LICENSE-MIT or http://opensource.org/licenses/MIT>, at your
// option. This file may not be copied, modified, or distributed
// except according to those terms.

//! [*Unicode IDNA Compatibility Processing*
//! (Unicode Technical Standard #46)](http://www.unicode.org/reports/tr46/)

use self::Mapping::*;
use punycode;
#[allow(unused_imports, deprecated)]
use std::ascii::AsciiExt;
use std::cmp::Ordering::{Equal, Less, Greater};
use unicode_bidi::{BidiClass, bidi_class};
use unicode_normalization::UnicodeNormalization;
use unicode_normalization::char::is_combining_mark;

include!("uts46_mapping_table.rs");


pub static PUNYCODE_PREFIX: &'static str = "xn--";


#[derive(Debug)]
struct StringTableSlice {
    // Store these as separate fields so the structure will have an
    // alignment of 1 and thus pack better into the Mapping enum, below.
    byte_start_lo: u8,
    byte_start_hi: u8,
    byte_len: u8,
}

fn decode_slice(slice: &StringTableSlice) -> &'static str {
    let lo = slice.byte_start_lo as usize;
    let hi = slice.byte_start_hi as usize;
    let start = (hi << 8) | lo;
    let len = slice.byte_len as usize;
    &STRING_TABLE[start..(start + len)]
}

#[repr(u8)]
#[derive(Debug)]
enum Mapping {
    Valid,
    Ignored,
    Mapped(StringTableSlice),
    Deviation(StringTableSlice),
    Disallowed,
    DisallowedStd3Valid,
    DisallowedStd3Mapped(StringTableSlice),
}

struct Range {
    from: char,
    to: char,
}

fn find_char(codepoint: char) -> &'static Mapping {
    let r = TABLE.binary_search_by(|ref range| {
        if codepoint > range.to {
            Less
        } else if codepoint < range.from {
            Greater
        } else {
            Equal
        }
    });
    r.ok().map(|i| {
        const SINGLE_MARKER: u16 = 1 << 15;

        let x = INDEX_TABLE[i];
        let single = (x & SINGLE_MARKER) != 0;
        let offset = !SINGLE_MARKER & x;

        if single {
            &MAPPING_TABLE[offset as usize]
        } else {
            &MAPPING_TABLE[(offset + (codepoint as u16 - TABLE[i].from as u16)) as usize]
        }
    }).unwrap()
}

fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec<Error>) {
    match *find_char(codepoint) {
        Mapping::Valid => output.push(codepoint),
        Mapping::Ignored => {},
        Mapping::Mapped(ref slice) => output.push_str(decode_slice(slice)),
        Mapping::Deviation(ref slice) => {
            if flags.transitional_processing {
                output.push_str(decode_slice(slice))
            } else {
                output.push(codepoint)
            }
        }
        Mapping::Disallowed => {
            errors.push(Error::DissallowedCharacter);
            output.push(codepoint);
        }
        Mapping::DisallowedStd3Valid => {
            if flags.use_std3_ascii_rules {
                errors.push(Error::DissallowedByStd3AsciiRules);
            }
            output.push(codepoint)
        }
        Mapping::DisallowedStd3Mapped(ref slice) => {
            if flags.use_std3_ascii_rules {
                errors.push(Error::DissallowedMappedInStd3);
            }
            output.push_str(decode_slice(slice))
        }
    }
}

// http://tools.ietf.org/html/rfc5893#section-2
fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
    // Rule 0: Bidi Rules apply to Bidi Domain Names: a name with at least one RTL label.  A label
    // is RTL if it contains at least one character of bidi class R, AL or AN.
    if !is_bidi_domain {
        return true;
    }

    let mut chars = label.chars();
    let first_char_class = match chars.next() {
        Some(c) => bidi_class(c),
        None => return true, // empty string
    };

    match first_char_class {
        // LTR label
        BidiClass::L => {
            // Rule 5
            loop {
                match chars.next() {
                    Some(c) => {
                        if !matches!(bidi_class(c),
                                     BidiClass::L | BidiClass::EN |
                                     BidiClass::ES | BidiClass::CS |
                                     BidiClass::ET | BidiClass::ON |
                                     BidiClass::BN | BidiClass::NSM
                                    ) {
                            return false;
                        }
                    },
                    None => { break; },
                }
            }

            // Rule 6
            // must end in L or EN followed by 0 or more NSM
            let mut rev_chars = label.chars().rev();
            let mut last_non_nsm = rev_chars.next();
            loop {
                match last_non_nsm {
                    Some(c) if bidi_class(c) == BidiClass::NSM => {
                        last_non_nsm = rev_chars.next();
                        continue;
                    }
                    _ => { break; },
                }
            }
            match last_non_nsm {
                Some(c) if bidi_class(c) == BidiClass::L
                    || bidi_class(c) == BidiClass::EN => {},
                Some(_) => { return false; },
                _ => {}
            }

        }

        // RTL label
        BidiClass::R | BidiClass::AL => {
            let mut found_en = false;
            let mut found_an = false;

            // Rule 2
            loop {
                match chars.next() {
                    Some(c) => {
                        let char_class = bidi_class(c);

                        if char_class == BidiClass::EN {
                            found_en = true;
                        }
                        if char_class == BidiClass::AN {
                            found_an = true;
                        }

                        if !matches!(char_class, BidiClass::R | BidiClass::AL |
                                     BidiClass::AN | BidiClass::EN |
                                     BidiClass::ES | BidiClass::CS |
                                     BidiClass::ET | BidiClass::ON |
                                     BidiClass::BN | BidiClass::NSM) {
                            return false;
                        }
                    },
                    None => { break; },
                }
            }
            // Rule 3
            let mut rev_chars = label.chars().rev();
            let mut last = rev_chars.next();
            loop { // must end in L or EN followed by 0 or more NSM
                match last {
                    Some(c) if bidi_class(c) == BidiClass::NSM => {
                        last = rev_chars.next();
                        continue;
                    }
                    _ => { break; },
                }
            }
            match last {
                Some(c) if matches!(bidi_class(c), BidiClass::R | BidiClass::AL |
                                    BidiClass::EN | BidiClass::AN) => {},
                _ => { return false; }
            }

            // Rule 4
            if found_an && found_en {
                return false;
            }
        }

        // Rule 1: Should start with L or R/AL
        _ => {
            return false;
        }
    }

    return true;
}

/// http://www.unicode.org/reports/tr46/#Validity_Criteria
fn validate_full(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Error>) {
    // V1: Must be in NFC form.
    if label.nfc().ne(label.chars()) {
        errors.push(Error::ValidityCriteria);
    } else {
        validate(label, is_bidi_domain, flags, errors);
    }
}

fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Error>) {
    let first_char = label.chars().next();
    if first_char == None {
        // Empty string, pass
    }

    // V2: No U+002D HYPHEN-MINUS in both third and fourth positions.
    //
    // NOTE: Spec says that the label must not contain a HYPHEN-MINUS character in both the
    // third and fourth positions. But nobody follows this criteria. See the spec issue below:
    // https://github.com/whatwg/url/issues/53
    //
    // TODO: Add *CheckHyphens* flag.

    // V3: neither begin nor end with a U+002D HYPHEN-MINUS
    else if label.starts_with("-") || label.ends_with("-") {
        errors.push(Error::ValidityCriteria);
    }

    // V4: not contain a U+002E FULL STOP
    //
    // Here, label can't contain '.' since the input is from .split('.')

    // V5: not begin with a GC=Mark
    else if is_combining_mark(first_char.unwrap()) {
        errors.push(Error::ValidityCriteria);
    }

    // V6: Check against Mapping Table
    else if label.chars().any(|c| match *find_char(c) {
        Mapping::Valid => false,
        Mapping::Deviation(_) => flags.transitional_processing,
        Mapping::DisallowedStd3Valid => flags.use_std3_ascii_rules,
        _ => true,
    }) {
        errors.push(Error::ValidityCriteria);
    }

    // V7: ContextJ rules
    //
    // TODO: Implement rules and add *CheckJoiners* flag.

    // V8: Bidi rules
    //
    // TODO: Add *CheckBidi* flag
    else if !passes_bidi(label, is_bidi_domain)
    {
        errors.push(Error::ValidityCriteria);
    }
}

/// http://www.unicode.org/reports/tr46/#Processing
fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
    let mut mapped = String::with_capacity(domain.len());
    for c in domain.chars() {
        map_char(c, flags, &mut mapped, errors)
    }
    let mut normalized = String::with_capacity(mapped.len());
    normalized.extend(mapped.nfc());

    // Find out if it's a Bidi Domain Name
    //
    // First, check for literal bidi chars
    let mut is_bidi_domain = domain.chars().any(|c|
        matches!(bidi_class(c), BidiClass::R | BidiClass::AL | BidiClass::AN)
    );
    if !is_bidi_domain {
        // Then check for punycode-encoded bidi chars
        for label in normalized.split('.') {
            if label.starts_with(PUNYCODE_PREFIX) {
                match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) {
                    Some(decoded_label) => {
                        if decoded_label.chars().any(|c|
                            matches!(bidi_class(c), BidiClass::R | BidiClass::AL | BidiClass::AN)
                        ) {
                            is_bidi_domain = true;
                        }
                    }
                    None => {
                        is_bidi_domain = true;
                    }
                }
            }
        }
    }

    let mut validated = String::new();
    let mut first = true;
    for label in normalized.split('.') {
        if !first {
            validated.push('.');
        }
        first = false;
        if label.starts_with(PUNYCODE_PREFIX) {
            match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) {
                Some(decoded_label) => {
                    let flags = Flags { transitional_processing: false, ..flags };
                    validate_full(&decoded_label, is_bidi_domain, flags, errors);
                    validated.push_str(&decoded_label)
                }
                None => errors.push(Error::PunycodeError)
            }
        } else {
            // `normalized` is already `NFC` so we can skip that check
            validate(label, is_bidi_domain, flags, errors);
            validated.push_str(label)
        }
    }
    validated
}

#[derive(Copy, Clone)]
pub struct Flags {
   pub use_std3_ascii_rules: bool,
   pub transitional_processing: bool,
   pub verify_dns_length: bool,
}

#[derive(PartialEq, Eq, Clone, Copy, Debug)]
enum Error {
    PunycodeError,
    ValidityCriteria,
    DissallowedByStd3AsciiRules,
    DissallowedMappedInStd3,
    DissallowedCharacter,
    TooLongForDns,
    TooShortForDns,
}

/// Errors recorded during UTS #46 processing.
///
/// This is opaque for now, only indicating the presence of at least one error.
/// More details may be exposed in the future.
#[derive(Debug)]
pub struct Errors(Vec<Error>);

/// http://www.unicode.org/reports/tr46/#ToASCII
pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
    let mut errors = Vec::new();
    let mut result = String::new();
    let mut first = true;
    for label in processing(domain, flags, &mut errors).split('.') {
        if !first {
            result.push('.');
        }
        first = false;
        if label.is_ascii() {
            result.push_str(label);
        } else {
            match punycode::encode_str(label) {
                Some(x) => {
                    result.push_str(PUNYCODE_PREFIX);
                    result.push_str(&x);
                },
                None => errors.push(Error::PunycodeError)
            }
        }
    }

    if flags.verify_dns_length {
        let domain = if result.ends_with(".") { &result[..result.len()-1]  } else { &*result };
        if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) {
            errors.push(Error::TooShortForDns)
        }
        if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) {
            errors.push(Error::TooLongForDns)
        }
    }
    if errors.is_empty() {
        Ok(result)
    } else {
        Err(Errors(errors))
    }
}

/// http://www.unicode.org/reports/tr46/#ToUnicode
///
/// Only `use_std3_ascii_rules` is used in `flags`.
pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) {
    flags.transitional_processing = false;
    let mut errors = Vec::new();
    let domain = processing(domain, flags, &mut errors);
    let errors = if errors.is_empty() {
        Ok(())
    } else {
        Err(Errors(errors))
    };
    (domain, errors)
}