use std::ascii::AsciiExt;
use unic_normal::StrNormalForm;
use unic_ucd_bidi::{bidi_class, BidiClass};
use unic_ucd_normal::is_combining_mark;
use mapping::Mapping;
use punycode;
pub static PUNYCODE_PREFIX: &'static str = "xn--";
fn map_char(codepoint: char, flags: Flags, output: &mut String, errors: &mut Vec<Error>) {
match Mapping::of(codepoint) {
Mapping::Valid => output.push(codepoint),
Mapping::Ignored => {}
Mapping::Mapped(slice) => output.push_str(slice),
Mapping::Deviation(slice) => if flags.transitional_processing {
output.push_str(slice)
} else {
output.push(codepoint)
},
Mapping::Disallowed => {
errors.push(Error::DissallowedCharacter);
output.push(codepoint);
}
Mapping::DisallowedStd3Valid => {
if flags.use_std3_ascii_rules {
errors.push(Error::DissallowedByStd3AsciiRules);
}
output.push(codepoint)
}
Mapping::DisallowedStd3Mapped(slice) => {
if flags.use_std3_ascii_rules {
errors.push(Error::DissallowedMappedInStd3);
}
output.push_str(slice)
}
}
}
fn passes_bidi(label: &str, is_bidi_domain: bool) -> bool {
use self::bidi_class::abbr_names::*;
if !is_bidi_domain {
return true;
}
let mut chars = label.chars();
let first_char_class = match chars.next() {
Some(c) => BidiClass::of(c),
None => return true, };
match first_char_class {
L => {
while let Some(c) = chars.next() {
if !matches!(BidiClass::of(c), L | EN | ES | CS | ET | ON | BN | NSM) {
return false;
}
}
let mut rev_chars = label.chars().rev();
let mut last_non_nsm = rev_chars.next();
loop {
match last_non_nsm {
Some(c) if BidiClass::of(c) == NSM => {
last_non_nsm = rev_chars.next();
continue;
}
_ => {
break;
}
}
}
match last_non_nsm {
Some(c) if BidiClass::of(c) == L || BidiClass::of(c) == EN => {}
Some(_) => {
return false;
}
_ => {}
}
}
R | AL => {
let mut found_en = false;
let mut found_an = false;
while let Some(c) = chars.next() {
let char_class = BidiClass::of(c);
if char_class == EN {
found_en = true;
}
if char_class == AN {
found_an = true;
}
if !matches!(char_class, R | AL | AN | EN | ES | CS | ET | ON | BN | NSM) {
return false;
}
}
let mut rev_chars = label.chars().rev();
let mut last = rev_chars.next();
loop {
match last {
Some(c) if BidiClass::of(c) == NSM => {
last = rev_chars.next();
continue;
}
_ => {
break;
}
}
}
match last {
Some(c) if matches!(BidiClass::of(c), R | AL | EN | AN) => {}
_ => {
return false;
}
}
if found_an && found_en {
return false;
}
}
_ => {
return false;
}
}
true
}
fn validate(label: &str, is_bidi_domain: bool, flags: Flags, errors: &mut Vec<Error>) {
let first_char = label.chars().next();
if first_char == None {
}
else if label.nfc().ne(label.chars()) {
errors.push(Error::ValidityCriteria);
}
else if label.starts_with('-') || label.ends_with('-') {
errors.push(Error::ValidityCriteria);
}
else if is_combining_mark(first_char.unwrap()) {
errors.push(Error::ValidityCriteria);
}
else if label.chars().any(|c| match Mapping::of(c) {
Mapping::Valid => false,
Mapping::Deviation(_) => flags.transitional_processing,
Mapping::DisallowedStd3Valid => flags.use_std3_ascii_rules,
_ => true,
}) {
errors.push(Error::ValidityCriteria);
}
else if !passes_bidi(label, is_bidi_domain) {
errors.push(Error::ValidityCriteria);
}
}
fn processing(domain: &str, flags: Flags, errors: &mut Vec<Error>) -> String {
use self::bidi_class::abbr_names::*;
let mut mapped = String::new();
for c in domain.chars() {
map_char(c, flags, &mut mapped, errors)
}
let normalized: String = mapped.nfc().collect();
let mut is_bidi_domain = domain
.chars()
.any(|c| matches!(BidiClass::of(c), R | AL | AN));
if !is_bidi_domain {
for label in normalized.split('.') {
if label.starts_with(PUNYCODE_PREFIX) {
match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) {
Some(decoded_label) => if decoded_label
.chars()
.any(|c| matches!(BidiClass::of(c), R | AL | AN))
{
is_bidi_domain = true;
},
None => {
is_bidi_domain = true;
}
}
}
}
}
let mut validated = String::new();
let mut first = true;
for label in normalized.split('.') {
if !first {
validated.push('.');
}
first = false;
if label.starts_with(PUNYCODE_PREFIX) {
match punycode::decode_to_string(&label[PUNYCODE_PREFIX.len()..]) {
Some(decoded_label) => {
let flags = Flags {
transitional_processing: false,
..flags
};
validate(&decoded_label, is_bidi_domain, flags, errors);
validated.push_str(&decoded_label)
}
None => errors.push(Error::PunycodeError),
}
} else {
validate(label, is_bidi_domain, flags, errors);
validated.push_str(label)
}
}
validated
}
#[derive(Copy, Clone)]
pub struct Flags {
pub use_std3_ascii_rules: bool,
pub transitional_processing: bool,
pub verify_dns_length: bool,
}
#[derive(PartialEq, Eq, Clone, Copy, Debug)]
enum Error {
PunycodeError,
ValidityCriteria,
DissallowedByStd3AsciiRules,
DissallowedMappedInStd3,
DissallowedCharacter,
TooLongForDns,
TooShortForDns,
}
#[derive(Debug, Eq, PartialEq)]
pub struct Errors(Vec<Error>);
pub fn to_ascii(domain: &str, flags: Flags) -> Result<String, Errors> {
let mut errors = Vec::new();
let mut result = String::new();
let mut first = true;
for label in processing(domain, flags, &mut errors).split('.') {
if !first {
result.push('.');
}
first = false;
if label.is_ascii() {
result.push_str(label);
} else {
match punycode::encode_str(label) {
Some(x) => {
result.push_str(PUNYCODE_PREFIX);
result.push_str(&x);
}
None => errors.push(Error::PunycodeError),
}
}
}
if flags.verify_dns_length {
let domain = if result.ends_with('.') {
&result[..result.len() - 1]
} else {
&*result
};
if domain.len() < 1 || domain.split('.').any(|label| label.len() < 1) {
errors.push(Error::TooShortForDns)
}
if domain.len() > 253 || domain.split('.').any(|label| label.len() > 63) {
errors.push(Error::TooLongForDns)
}
}
if errors.is_empty() {
Ok(result)
} else {
Err(Errors(errors))
}
}
pub fn to_unicode(domain: &str, mut flags: Flags) -> (String, Result<(), Errors>) {
flags.transitional_processing = false;
let mut errors = Vec::new();
let domain = processing(domain, flags, &mut errors);
let errors = if errors.is_empty() {
Ok(())
} else {
Err(Errors(errors))
};
(domain, errors)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_punycode_prefix_with_length_check() {
fn _to_ascii(domain: &str) -> Result<String, Errors> {
to_ascii(
domain,
Flags {
transitional_processing: false,
use_std3_ascii_rules: true,
verify_dns_length: true,
},
)
}
assert!(_to_ascii("xn--").is_err());
assert!(_to_ascii("xn---").is_err());
assert!(_to_ascii("xn-----").is_err());
assert!(_to_ascii("xn--.").is_err());
assert!(_to_ascii("xn--...").is_err());
assert!(_to_ascii(".xn--").is_err());
assert!(_to_ascii("...xn--").is_err());
assert!(_to_ascii("xn--.xn--").is_err());
assert!(_to_ascii("xn--.example.org").is_err());
}
#[test]
fn test_punycode_prefix_without_length_check() {
fn _to_ascii(domain: &str) -> Result<String, Errors> {
to_ascii(
domain,
Flags {
transitional_processing: false,
use_std3_ascii_rules: true,
verify_dns_length: false,
},
)
}
assert_eq!(_to_ascii("xn--"), Ok("".to_owned()));
assert!(_to_ascii("xn---").is_err());
assert!(_to_ascii("xn-----").is_err());
assert_eq!(_to_ascii("xn--."), Ok(".".to_owned()));
assert_eq!(_to_ascii("xn--..."), Ok("...".to_owned()));
assert_eq!(_to_ascii(".xn--"), Ok(".".to_owned()));
assert_eq!(_to_ascii("...xn--"), Ok("...".to_owned()));
assert_eq!(_to_ascii("xn--.xn--"), Ok(".".to_owned()));
assert_eq!(_to_ascii("xn--.example.org"), Ok(".example.org".to_owned()));
}
#[test]
fn test_v5() {
fn _to_ascii(domain: &str) -> Result<String, Errors> {
to_ascii(
domain,
Flags {
transitional_processing: false,
use_std3_ascii_rules: true,
verify_dns_length: true,
},
)
}
assert!(is_combining_mark('\u{11C3A}'));
assert!(_to_ascii("\u{11C3A}").is_err());
assert!(_to_ascii("\u{850f}.\u{11C3A}").is_err());
assert!(_to_ascii("\u{850f}\u{ff61}\u{11C3A}").is_err());
}
#[test]
fn test_v8_bidi_rules() {
fn _to_ascii(domain: &str) -> Result<String, Errors> {
to_ascii(
domain,
Flags {
transitional_processing: false,
use_std3_ascii_rules: true,
verify_dns_length: true,
},
)
}
assert_eq!(_to_ascii("abc"), Ok("abc".to_owned()));
assert_eq!(_to_ascii("123"), Ok("123".to_owned()));
assert_eq!(_to_ascii("אבּג"), Ok("xn--kdb3bdf".to_owned()));
assert_eq!(_to_ascii("ابج"), Ok("xn--mgbcm".to_owned()));
assert_eq!(_to_ascii("abc.ابج"), Ok("abc.xn--mgbcm".to_owned()));
assert_eq!(
_to_ascii("אבּג.ابج"),
Ok("xn--kdb3bdf.xn--mgbcm".to_owned())
);
assert!(_to_ascii("0a.\u{05D0}").is_err());
assert!(_to_ascii("0à.\u{05D0}").is_err());
assert!(_to_ascii("xn--0ca24w").is_err());
}
}