#![deny(unsafe_code)]
#![warn(
clippy::filetype_is_file,
clippy::integer_division,
clippy::needless_borrow,
clippy::nursery,
clippy::pedantic,
clippy::perf,
clippy::suboptimal_flops,
clippy::unneeded_field_pattern,
macro_use_extern_crate,
missing_copy_implementations,
missing_debug_implementations,
missing_docs,
non_ascii_idents,
trivial_casts,
trivial_numeric_casts,
unreachable_pub,
unused_crate_dependencies,
unused_extern_crates,
unused_import_braces,
)]
#![allow(
clippy::module_name_repetitions,
clippy::redundant_pub_crate,
)]
#![cfg_attr(feature = "docsrs", feature(doc_cfg))]
mod idna;
mod psl;
mod puny;
use idna::{
CharKind,
IdnaChars,
};
use psl::SuffixKind;
use std::{
cmp::Ordering,
fmt,
hash::{
Hash,
Hasher,
},
io::{
Error,
ErrorKind,
},
ops::{
Deref,
Range,
},
};
use unicode_bidi::{
bidi_class,
BidiClass,
};
use unicode_normalization::{
IsNormalized,
UnicodeNormalization,
};
const PREFIX: &str = "xn--";
#[derive(Debug, Default, Clone)]
pub struct Domain {
host: String,
root: Range<usize>,
suffix: Range<usize>,
}
impl AsRef<str> for Domain {
#[inline]
fn as_ref(&self) -> &str { self.as_str() }
}
impl AsRef<[u8]> for Domain {
#[inline]
fn as_ref(&self) -> &[u8] { self.as_bytes() }
}
impl Deref for Domain {
type Target = str;
#[inline]
fn deref(&self) -> &Self::Target { &self.host }
}
impl Eq for Domain {}
impl fmt::Display for Domain {
#[inline]
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.as_str())
}
}
impl Hash for Domain {
#[inline]
fn hash<H: Hasher>(&self, state: &mut H) { self.host.hash(state); }
}
impl Ord for Domain {
#[inline]
fn cmp(&self, other: &Self) -> Ordering { self.host.cmp(&other.host) }
}
impl PartialEq for Domain {
#[inline]
fn eq(&self, other: &Self) -> bool { self.host == other.host }
}
macro_rules! partial_eq {
(deref: $($cast:ident $ty:ty),+ $(,)?) => ($(
impl PartialEq<$ty> for Domain {
#[inline]
fn eq(&self, other: &$ty) -> bool { self.$cast() == *other }
}
impl PartialEq<Domain> for $ty {
#[inline]
fn eq(&self, other: &Domain) -> bool { other.$cast() == *self }
}
)+);
($($cast:ident $ty:ty),+ $(,)?) => ($(
impl PartialEq<$ty> for Domain {
#[inline]
fn eq(&self, other: &$ty) -> bool { self.$cast() == other }
}
impl PartialEq<Domain> for $ty {
#[inline]
fn eq(&self, other: &Domain) -> bool { other.$cast() == self }
}
)+);
}
partial_eq!(
as_str str,
as_str String,
);
partial_eq!(
deref:
as_str &str,
as_str &String,
);
impl PartialOrd for Domain {
#[inline]
fn partial_cmp(&self, other: &Self) -> Option<Ordering> { Some(self.cmp(other)) }
}
macro_rules! impl_try {
($($ty:ty),+) => ($(
impl TryFrom<$ty> for Domain {
type Error = Error;
fn try_from(src: $ty) -> Result<Self, Self::Error> {
Self::new(src).ok_or_else(|| ErrorKind::InvalidData.into())
}
}
)+)
}
impl_try!(&str, String, &String);
impl Domain {
#[must_use]
pub fn is_empty(&self) -> bool { self.host.is_empty() }
#[must_use]
pub fn len(&self) -> usize { self.host.len() }
#[must_use]
pub fn as_str(&self) -> &str { &self.host }
#[must_use]
pub fn as_bytes(&self) -> &[u8] { self.host.as_bytes() }
}
impl Domain {
pub fn new<S>(src: S) -> Option<Self>
where S: AsRef<str> {
idna_to_ascii(src.as_ref())
.and_then(|host| find_dots(host.as_bytes())
.map(|(mut d, s)| {
if 0 < d { d += 1; }
Self {
root: d..s - 1,
suffix: s..host.len(),
host,
}
})
)
}
}
impl Domain {
#[must_use]
pub fn has_www(&self) -> bool {
self.root.start >= 4 && self.host.starts_with("www.")
}
pub fn strip_www(&mut self, recurse: bool) -> bool {
let mut res: bool = false;
while self.has_www() {
self.host.replace_range(..4, "");
self.root.start -= 4;
self.root.end -= 4;
self.suffix.start -= 4;
self.suffix.end -= 4;
if ! recurse { return true; }
res = true;
}
res
}
#[must_use]
pub fn without_www(&self) -> Option<Self> {
if self.has_www() {
let mut new = self.clone();
new.strip_www(false);
Some(new)
}
else { None }
}
}
impl Domain {
#[allow(clippy::missing_const_for_fn)] #[must_use]
pub fn take(self) -> String { self.host }
}
impl Domain {
#[must_use]
pub fn host(&self) -> &str { &self.host }
#[must_use]
pub fn root(&self) -> &str {
&self.host[self.root.start..self.root.end]
}
#[must_use]
pub fn subdomain(&self) -> Option<&str> {
if self.root.start > 0 { Some(&self.host[0..self.root.start - 1]) }
else { None }
}
#[must_use]
pub fn suffix(&self) -> &str {
&self.host[self.suffix.start..self.suffix.end]
}
#[must_use]
pub fn tld(&self) -> &str { &self.host[self.root.start..] }
}
#[cfg(any(test, feature = "serde"))]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "serde")))]
impl serde::Serialize for Domain {
#[inline]
fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
where S: serde::Serializer { serializer.serialize_str(&self.host) }
}
#[cfg(any(test, feature = "serde"))]
#[cfg_attr(feature = "docsrs", doc(cfg(feature = "serde")))]
impl<'de> serde::Deserialize<'de> for Domain {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where D: serde::de::Deserializer<'de> {
let s: std::borrow::Cow<str> = serde::de::Deserialize::deserialize(deserializer)?;
Self::new(s).ok_or_else(|| serde::de::Error::custom("Invalid domain."))
}
}
#[allow(unsafe_code)]
fn find_dots(host: &[u8]) -> Option<(usize, usize)> {
if host.len() < 3 || SuffixKind::from_slice(host).is_some() { return None; }
let mut last: usize = 0;
let mut dot: usize = 0;
for (idx, _) in host.iter().enumerate().filter(|(_, &b)| b'.' == b) {
if let Some(suffix) = SuffixKind::from_slice(unsafe { host.get_unchecked(idx + 1..) }) {
return match suffix {
SuffixKind::Tld => Some((dot, idx + 1)),
SuffixKind::Wild =>
if dot == 0 { None }
else { Some((last, dot + 1)) },
SuffixKind::WildEx(ex) => {
let after_dot: usize =
if dot == 0 { 0 }
else { dot + 1 };
if ex.is_match(unsafe { host.get_unchecked(after_dot..idx) }) {
Some((dot, idx + 1))
}
else if dot == 0 { None }
else { Some((last, after_dot)) }
},
};
}
std::mem::swap(&mut dot, &mut last);
dot = idx;
}
None
}
fn idna_to_ascii(src: &str) -> Option<String> {
let src: &str = src.trim_matches(|c: char| c == '.' || c.is_ascii_whitespace());
if src.is_empty() { return None; }
let bytes = src.as_bytes();
let mut cap: bool = false;
let mut dot: bool = false;
let mut dash: bool = false;
if
bytes.len() < 254 &&
bytes.iter().all(|&b| match b {
b'.' => {
dot = true;
true
},
b'-' => {
dash = true;
true
}
b'A'..=b'Z' => {
cap = true;
true
},
b'a'..=b'z' | b'0'..=b'9' => true,
_ => false,
}) &&
dot &&
bytes.split(|b| b'.'.eq(b))
.all(|chunk|
! chunk.is_empty() &&
chunk.len() < 64 &&
(
! dash ||
(
! chunk.starts_with(b"xn--") &&
chunk[0] != b'-' &&
chunk[chunk.len() - 1] != b'-'
)
)
)
{
if cap { Some(src.to_ascii_lowercase()) }
else { Some(src.to_owned()) }
}
else { idna_to_ascii_slow(src) }
}
fn idna_to_ascii_slow(src: &str) -> Option<String> {
let mut error: bool = false;
let iter = IdnaChars::new(src, &mut error).nfc();
let mut prefix: IdnaPrefix = IdnaPrefix::Dot;
let mut scratch: Vec<char> = Vec::with_capacity(253);
for c in iter {
scratch.push(c);
prefix = prefix.advance(c);
}
let scratch_len: usize = scratch.len();
if error || scratch_len == 0 || scratch[0] == '.' || scratch[scratch_len - 1] == '.' {
return None;
}
if ! matches!(prefix, IdnaPrefix::Dash2) {
return idna_normalize_c(&scratch);
}
let mut normalized: Vec<char> = Vec::with_capacity(scratch.len());
if ! idna_normalize_b(&scratch, &mut normalized) {
return None;
}
let mut scratch = String::with_capacity(normalized.len());
let mut first = true;
let mut parts: u8 = 0;
for part in normalized.split(|c| '.'.eq(c)) {
if first { first = false; }
else { scratch.push('.'); }
if part.iter().all(char::is_ascii) { scratch.extend(part); }
else {
scratch.push_str(PREFIX);
if ! puny::encode_into(part, &mut scratch) { return None; }
}
parts += 1;
}
if 1 < parts && scratch.len() < 254 { Some(scratch) }
else { None }
}
#[allow(clippy::similar_names)]
fn idna_check_bidi(part: &[char]) -> bool {
match bidi_class(part[0]) {
BidiClass::L => {
let mut nom: bool = false;
for c in part.iter().skip(1).rev().map(|c| bidi_class(*c)) {
match c {
BidiClass::NSM => {},
BidiClass::BN | BidiClass::CS | BidiClass::EN | BidiClass::ES |
BidiClass::ET | BidiClass::L | BidiClass::ON => if ! nom {
if c == BidiClass::L || c == BidiClass::EN {
nom = true;
}
else { return false; }
},
_ => return false,
}
}
true
},
BidiClass::R | BidiClass::AL => {
let mut has_an: bool = false;
let mut has_en: bool = false;
let mut nom: bool = false;
for c in part.iter().skip(1).rev().map(|c| bidi_class(*c)) {
match c {
BidiClass::AN => {
if has_en { return false; }
has_an = true;
nom = true;
},
BidiClass::EN => {
if has_an { return false; }
has_en = true;
nom = true;
},
BidiClass::NSM => {},
BidiClass::AL | BidiClass::BN | BidiClass::CS | BidiClass::ES |
BidiClass::ET | BidiClass::ON | BidiClass::R => if ! nom {
if c == BidiClass::R || c == BidiClass::AL {
nom = true;
}
else { return false; }
},
_ => return false,
}
}
true
},
_ => false,
}
}
fn idna_check_validity(part: &[char], deep: bool) -> bool {
let len: usize = part.len();
0 < len &&
len < 64 &&
part[0] != '-' &&
part[len - 1] != '-' &&
! unicode_normalization::char::is_combining_mark(part[0]) &&
(! deep || part.iter().copied().all(CharKind::is_valid))
}
fn idna_has_bidi(part: &[char]) -> bool {
part.iter()
.copied()
.any(|c|
! c.is_ascii_graphic() &&
matches!(bidi_class(c), BidiClass::R | BidiClass::AL | BidiClass::AN)
)
}
fn idna_normalize_b(src: &[char], out: &mut Vec<char>) -> bool {
let mut first = true;
let mut is_bidi = false;
for part in src.split(|c| '.'.eq(c)) {
if first { first = false; }
else { out.push('.'); }
if let Some(chunk) = part.strip_prefix(&['x', 'n', '-', '-']) {
let mut decoded_part = match puny::decode(chunk) {
Some(s) => s,
None => return false,
};
if ! idna_check_validity(&decoded_part, true) { return false; }
match unicode_normalization::is_nfc_quick(decoded_part.iter().copied()) {
IsNormalized::Yes => {},
IsNormalized::No => return false,
IsNormalized::Maybe => {
if ! decoded_part.iter().copied().eq(decoded_part.iter().copied().nfc()) {
return false;
}
},
}
if ! is_bidi && idna_has_bidi(&decoded_part) { is_bidi = true; }
out.append(&mut decoded_part);
}
else {
if ! idna_check_validity(part, false) { return false; }
if ! is_bidi && idna_has_bidi(part) { is_bidi = true; }
out.extend_from_slice(part);
}
}
! is_bidi || out.split(|c| '.'.eq(c)).all(idna_check_bidi)
}
fn idna_normalize_c(src: &[char]) -> Option<String> {
let mut out = String::with_capacity(253);
let mut first = true;
let mut parts: u8 = 0;
let is_bidi: bool = idna_has_bidi(src);
for part in src.split(|c| '.'.eq(c)) {
if first { first = false; }
else { out.push('.'); }
if ! idna_check_validity(part, false) || (is_bidi && ! idna_check_bidi(part)) {
return None;
}
if part.iter().all(char::is_ascii) { out.extend(part); }
else {
out.push_str(PREFIX);
if ! puny::encode_into(part, &mut out) { return None; }
}
parts += 1;
}
if 1 < parts && out.len() < 254 { Some(out) }
else { None }
}
#[repr(u8)]
#[derive(Clone, Copy)]
enum IdnaPrefix {
Na,
Dot,
Ex,
En,
Dash1,
Dash2,
}
impl IdnaPrefix {
const fn advance(self, ch: char) -> Self {
match (ch, self) {
(_, Self::Dash2) | ('-', Self::Dash1) => Self::Dash2,
('.', _) => Self::Dot,
('x', Self::Dot) => Self::Ex,
('n', Self::Ex) => Self::En,
('-', Self::En) => Self::Dash1,
_ => Self::Na,
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use brunch as _;
include!(concat!(env!("OUT_DIR"), "/adbyss-idna-tests.rs"));
#[test]
fn t_tld() {
t_tld_assert("COM", None);
t_tld_assert("example.COM", Some("example.com"));
t_tld_assert("WwW.example.COM", Some("example.com"));
t_tld_assert(".com", None);
t_tld_assert(".example", None);
t_tld_assert(".example.com", Some("example.com"));
t_tld_assert(".example.example", None);
t_tld_assert("example", None);
t_tld_assert("example.example", None);
t_tld_assert("b.example.example", None);
t_tld_assert("a.b.example.example", None);
t_tld_assert("biz", None);
t_tld_assert("domain.biz", Some("domain.biz"));
t_tld_assert("b.domain.biz", Some("domain.biz"));
t_tld_assert("a.b.domain.biz", Some("domain.biz"));
t_tld_assert("com", None);
t_tld_assert("example.com", Some("example.com"));
t_tld_assert("b.example.com", Some("example.com"));
t_tld_assert("a.b.example.com", Some("example.com"));
t_tld_assert("uk.com", None);
t_tld_assert("example.uk.com", Some("example.uk.com"));
t_tld_assert("b.example.uk.com", Some("example.uk.com"));
t_tld_assert("a.b.example.uk.com", Some("example.uk.com"));
t_tld_assert("test.ac", Some("test.ac"));
t_tld_assert("mm", None);
t_tld_assert("c.mm", None);
t_tld_assert("b.c.mm", Some("b.c.mm"));
t_tld_assert("a.b.c.mm", Some("b.c.mm"));
t_tld_assert("jp", None);
t_tld_assert("test.jp", Some("test.jp"));
t_tld_assert("www.test.jp", Some("test.jp"));
t_tld_assert("ac.jp", None);
t_tld_assert("test.ac.jp", Some("test.ac.jp"));
t_tld_assert("www.test.ac.jp", Some("test.ac.jp"));
t_tld_assert("kyoto.jp", None);
t_tld_assert("test.kyoto.jp", Some("test.kyoto.jp"));
t_tld_assert("ide.kyoto.jp", None);
t_tld_assert("b.ide.kyoto.jp", Some("b.ide.kyoto.jp"));
t_tld_assert("a.b.ide.kyoto.jp", Some("b.ide.kyoto.jp"));
t_tld_assert("c.kobe.jp", None);
t_tld_assert("b.c.kobe.jp", Some("b.c.kobe.jp"));
t_tld_assert("a.b.c.kobe.jp", Some("b.c.kobe.jp"));
t_tld_assert("city.kobe.jp", Some("city.kobe.jp"));
t_tld_assert("www.city.kobe.jp", Some("city.kobe.jp"));
t_tld_assert("ck", None);
t_tld_assert("test.ck", None);
t_tld_assert("b.test.ck", Some("b.test.ck"));
t_tld_assert("a.b.test.ck", Some("b.test.ck"));
t_tld_assert("www.ck", Some("www.ck"));
t_tld_assert("www.www.ck", Some("www.ck"));
t_tld_assert("us", None);
t_tld_assert("test.us", Some("test.us"));
t_tld_assert("www.test.us", Some("test.us"));
t_tld_assert("ak.us", None);
t_tld_assert("test.ak.us", Some("test.ak.us"));
t_tld_assert("www.test.ak.us", Some("test.ak.us"));
t_tld_assert("k12.ak.us", None);
t_tld_assert("test.k12.ak.us", Some("test.k12.ak.us"));
t_tld_assert("www.test.k12.ak.us", Some("test.k12.ak.us"));
t_tld_assert("食狮.com.cn", Some("xn--85x722f.com.cn"));
t_tld_assert("食狮.公司.cn", Some("xn--85x722f.xn--55qx5d.cn"));
t_tld_assert("www.食狮.公司.cn", Some("xn--85x722f.xn--55qx5d.cn"));
t_tld_assert("shishi.公司.cn", Some("shishi.xn--55qx5d.cn"));
t_tld_assert("公司.cn", None);
t_tld_assert("食狮.中国", Some("xn--85x722f.xn--fiqs8s"));
t_tld_assert("www.食狮.中国", Some("xn--85x722f.xn--fiqs8s"));
t_tld_assert("shishi.中国", Some("shishi.xn--fiqs8s"));
t_tld_assert("中国", None);
}
fn t_tld_assert(a: &str, b: Option<&str>) {
if b.is_none() {
let res = Domain::new(a);
assert!(
res.is_none(),
"Unexpectedly parsed: {:?}\n{:?}\n", a, res
);
}
else {
if let Some(dom) = Domain::new(a) {
assert_eq!(
dom.tld(),
b.unwrap(),
"Failed parsing: {:?}", dom
);
}
else {
panic!("Failed parsing: {:?}", a);
}
}
}
#[test]
fn t_chunks() {
let mut dom = Domain::new("abc.www.食狮.中国").unwrap();
assert_eq!(dom.subdomain(), Some("abc.www"));
assert_eq!(dom.root(), "xn--85x722f");
assert_eq!(dom.suffix(), "xn--fiqs8s");
assert_eq!(dom.tld(), "xn--85x722f.xn--fiqs8s");
assert_eq!(dom.host(), "abc.www.xn--85x722f.xn--fiqs8s");
assert_eq!(dom.host(), dom.deref());
dom = Domain::new("blobfolio.com").unwrap();
assert_eq!(dom.subdomain(), None);
assert_eq!(dom.root(), "blobfolio");
assert_eq!(dom.suffix(), "com");
assert_eq!(dom.tld(), "blobfolio.com");
assert_eq!(dom.host(), "blobfolio.com");
dom = Domain::new("www.blobfolio.com").unwrap();
assert_eq!(dom.subdomain(), Some("www"));
assert_eq!(dom.root(), "blobfolio");
assert_eq!(dom.suffix(), "com");
assert_eq!(dom.tld(), "blobfolio.com");
assert_eq!(dom.host(), "www.blobfolio.com");
dom = Domain::new("another.damn.sub.domain.blobfolio.com").unwrap();
assert_eq!(dom.subdomain(), Some("another.damn.sub.domain"));
assert_eq!(dom.root(), "blobfolio");
assert_eq!(dom.suffix(), "com");
assert_eq!(dom.tld(), "blobfolio.com");
assert_eq!(dom.host(), "another.damn.sub.domain.blobfolio.com");
dom = Domain::new(" ....blobfolio.com.... ").unwrap();
assert_eq!(dom.subdomain(), None);
assert_eq!(dom.root(), "blobfolio");
assert_eq!(dom.suffix(), "com");
assert_eq!(dom.tld(), "blobfolio.com");
assert_eq!(dom.host(), "blobfolio.com");
}
#[test]
fn t_without_www() {
let dom1 = Domain::new("www.blobfolio.com").unwrap();
assert!(dom1.has_www());
let dom2 = dom1.without_www().unwrap();
assert_eq!(dom2.subdomain(), None);
assert_eq!(dom2.root(), "blobfolio");
assert_eq!(dom2.suffix(), "com");
assert_eq!(dom2.tld(), "blobfolio.com");
assert_eq!(dom2.host(), "blobfolio.com");
assert!(! dom2.has_www());
}
#[test]
fn t_serde() {
let dom1: Domain = Domain::new("serialize.domain.com")
.expect("Domain failed.");
let serial: String = serde_json::to_string(&dom1)
.expect("Serialize failed.");
assert_eq!(serial, "\"serialize.domain.com\"");
let dom2: Domain = serde_json::from_str(&serial).expect("Deserialize failed.");
assert_eq!(dom1, dom2);
}
#[test]
fn t_idna_valid() {
assert!(matches!(CharKind::from_char('-'), Some(CharKind::Valid)));
assert!(matches!(CharKind::from_char('.'), Some(CharKind::Valid)));
for c in '0'..='9' {
assert!(matches!(CharKind::from_char(c), Some(CharKind::Valid)));
}
for c in 'a'..='z' {
assert!(matches!(CharKind::from_char(c), Some(CharKind::Valid)));
}
}
#[test]
fn t_idna() {
assert_eq!(IDNA_DATA.is_empty(), false, "Missing IDNA/Unicode test data.");
for (i, o) in IDNA_DATA {
assert_eq!(
idna_to_ascii(i),
o.map(String::from),
"IDNA handling failed: {:?}", i
);
}
}
}