#![no_std]
#![forbid(unsafe_code)]
#![warn(missing_docs)]
#[cfg(feature = "alloc")]
extern crate alloc;
#[cfg(feature = "alloc")]
use alloc::string::String;
use core::cmp::Ordering;
pub mod compat;
static NODES: &[u8] = include_bytes!("trie_nodes.bin");
static EDGES: &[u8] = include_bytes!("trie_edges.bin");
static LABELS: &str = include_str!("trie_labels.txt");
const F_RULE: u8 = 1;
const F_RULE_PRIV: u8 = 2;
const F_WILD: u8 = 4;
const F_WILD_PRIV: u8 = 8;
const F_EXC: u8 = 16;
const F_EXC_PRIV: u8 = 32;
#[inline]
fn le_u32(b: &[u8], o: usize) -> usize {
u32::from_le_bytes([b[o], b[o + 1], b[o + 2], b[o + 3]]) as usize
}
#[inline]
fn node_rec(i: usize) -> (usize, usize, u8) {
let o = i * 8;
let edge_start = le_u32(NODES, o);
let edge_count = u16::from_le_bytes([NODES[o + 4], NODES[o + 5]]) as usize;
(edge_start, edge_count, NODES[o + 6])
}
#[inline]
fn edge_rec(j: usize) -> (&'static str, usize) {
let o = j * 9;
let label_off = le_u32(EDGES, o);
let label_len = EDGES[o + 4] as usize;
let child = le_u32(EDGES, o + 5);
(&LABELS[label_off..label_off + label_len], child)
}
#[inline]
fn find_child(start: usize, count: usize, label: &str) -> Option<usize> {
let kb = label.as_bytes();
let (mut lo, mut hi) = (start, start + count);
while lo < hi {
let mid = lo + (hi - lo) / 2;
let (elabel, child) = edge_rec(mid);
match kb.cmp(elabel.as_bytes()) {
Ordering::Equal => return Some(child),
Ordering::Less => hi = mid,
Ordering::Greater => lo = mid + 1,
}
}
None
}
#[inline]
fn section(flags: u8, priv_bit: u8) -> Type {
if flags & priv_bit != 0 {
Type::Private
} else {
Type::Icann
}
}
const MAX_LABELS: usize = 128;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum Type {
Icann,
Private,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct Parts {
suffix_off: usize,
domain_off: Option<usize>,
typ: Option<Type>,
}
fn compute(ascii: &str) -> Option<Parts> {
let mut offs = [0usize; MAX_LABELS];
let mut n = 1usize;
for (i, &b) in ascii.as_bytes().iter().enumerate() {
if b == b'.' {
if n >= MAX_LABELS {
return None;
}
offs[n] = i + 1;
n += 1;
}
}
let label = |k: usize| -> &str {
let s = offs[k];
let e = if k + 1 < n {
offs[k + 1] - 1
} else {
ascii.len()
};
&ascii[s..e]
};
let mut best: Option<(usize, Type)> = None;
let mut exception: Option<(usize, Type)> = None;
let mut node = 0usize; let mut consumed = 0usize; loop {
let (edge_start, edge_count, flags) = node_rec(node);
if consumed >= 1 {
if flags & F_RULE != 0 && best.is_none_or(|(c, _)| consumed > c) {
best = Some((consumed, section(flags, F_RULE_PRIV)));
}
if flags & F_EXC != 0 && exception.is_none_or(|(c, _)| consumed > c) {
exception = Some((consumed, section(flags, F_EXC_PRIV)));
}
}
if flags & F_WILD != 0 && consumed < n && best.is_none_or(|(c, _)| consumed + 1 > c) {
best = Some((consumed + 1, section(flags, F_WILD_PRIV)));
}
if consumed == n {
break;
}
match find_child(edge_start, edge_count, label(n - 1 - consumed)) {
Some(child) => {
node = child;
consumed += 1;
}
None => break,
}
}
let (suffix_labels, typ) = if let Some((c, ty)) = exception {
(c - 1, Some(ty)) } else if let Some((c, ty)) = best {
(c, Some(ty))
} else {
(1, None) };
let suffix_idx = n - suffix_labels;
Some(Parts {
suffix_off: offs[suffix_idx],
domain_off: if suffix_idx >= 1 {
Some(offs[suffix_idx - 1])
} else {
None
},
typ,
})
}
fn is_normalized_ascii(host: &str) -> bool {
let b = host.as_bytes();
if b.is_empty() || b[0] == b'.' || b[b.len() - 1] == b'.' {
return false;
}
let mut prev_dot = false;
for &c in b {
if !c.is_ascii() {
return false;
}
if c == b'.' {
if prev_dot {
return false; }
prev_dot = true;
} else {
if c.is_ascii_uppercase() {
return false; }
prev_dot = false;
}
}
true
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Domain<'a> {
input: &'a str,
parts: Parts,
}
impl<'a> Domain<'a> {
#[inline]
pub fn as_str(&self) -> &'a str {
self.input
}
#[inline]
pub fn suffix(&self) -> &'a str {
&self.input[self.parts.suffix_off..]
}
#[inline]
pub fn registrable_domain(&self) -> Option<&'a str> {
self.parts.domain_off.map(|o| &self.input[o..])
}
#[inline]
pub fn subdomain(&self) -> Option<&'a str> {
match self.parts.domain_off {
Some(d) if d > 0 => Some(&self.input[..d - 1]),
_ => None,
}
}
#[inline]
pub fn is_public_suffix(&self) -> bool {
self.parts.domain_off.is_none()
}
#[inline]
pub fn typ(&self) -> Option<Type> {
self.parts.typ
}
#[inline]
pub fn is_icann(&self) -> bool {
self.parts.typ == Some(Type::Icann)
}
#[inline]
pub fn is_private(&self) -> bool {
self.parts.typ == Some(Type::Private)
}
#[inline]
pub fn is_known(&self) -> bool {
self.parts.typ.is_some()
}
}
#[inline]
pub fn lookup(host: &str) -> Option<Domain<'_>> {
let host = host.strip_suffix('.').unwrap_or(host);
if !is_normalized_ascii(host) {
return None;
}
Some(Domain {
input: host,
parts: compute(host)?,
})
}
#[inline]
pub fn psl_version() -> &'static str {
include_str!("psl_version.txt")
}
#[cfg(feature = "alloc")]
#[derive(Clone, Debug)]
pub struct Info {
ascii: String,
parts: Parts,
}
#[cfg(feature = "alloc")]
impl Info {
#[inline]
fn borrow(&self) -> Domain<'_> {
Domain {
input: &self.ascii,
parts: self.parts,
}
}
#[inline]
pub fn as_ascii(&self) -> &str {
&self.ascii
}
#[inline]
pub fn suffix(&self) -> &str {
self.borrow().suffix()
}
#[inline]
pub fn registrable_domain(&self) -> Option<&str> {
self.borrow().registrable_domain()
}
#[inline]
pub fn subdomain(&self) -> Option<&str> {
self.borrow().subdomain()
}
#[inline]
pub fn is_public_suffix(&self) -> bool {
self.borrow().is_public_suffix()
}
#[inline]
pub fn typ(&self) -> Option<Type> {
self.parts.typ
}
#[inline]
pub fn is_icann(&self) -> bool {
self.borrow().is_icann()
}
#[inline]
pub fn is_private(&self) -> bool {
self.borrow().is_private()
}
#[inline]
pub fn is_known(&self) -> bool {
self.borrow().is_known()
}
}
#[cfg(feature = "alloc")]
fn normalize(domain: &str) -> Option<String> {
let d = domain.trim();
let d = d.strip_suffix('.').unwrap_or(d);
if d.is_empty() {
return None;
}
let ascii = to_ascii(d)?;
if ascii.is_empty() || ascii.starts_with('.') || ascii.ends_with('.') || ascii.contains("..") {
return None;
}
Some(ascii)
}
#[cfg(all(feature = "alloc", feature = "idna"))]
#[inline]
fn to_ascii(domain: &str) -> Option<String> {
idna::domain_to_ascii(domain).ok()
}
#[cfg(all(feature = "alloc", not(feature = "idna")))]
#[inline]
fn to_ascii(domain: &str) -> Option<String> {
if domain.is_ascii() {
Some(domain.to_ascii_lowercase())
} else {
None
}
}
#[cfg(feature = "alloc")]
pub fn analyze(domain: &str) -> Option<Info> {
let ascii = normalize(domain)?;
let parts = compute(&ascii)?;
Some(Info { ascii, parts })
}
#[cfg(feature = "alloc")]
#[inline]
pub fn suffix(domain: &str) -> Option<String> {
analyze(domain).map(|i| String::from(i.suffix()))
}
#[cfg(feature = "alloc")]
#[inline]
pub fn registrable_domain(domain: &str) -> Option<String> {
analyze(domain).and_then(|i| i.registrable_domain().map(String::from))
}
#[cfg(feature = "alloc")]
#[inline]
pub fn subdomain(domain: &str) -> Option<String> {
analyze(domain).and_then(|i| i.subdomain().map(String::from))
}
#[cfg(feature = "alloc")]
#[inline]
pub fn is_public_suffix(domain: &str) -> bool {
analyze(domain).is_some_and(|i| i.is_public_suffix())
}
#[cfg(test)]
mod tests {
extern crate std;
use super::*;
#[cfg(feature = "alloc")]
use std::string::String;
#[test]
fn core_lookup_basic() {
let d = lookup("www.example.co.uk").unwrap();
assert_eq!(d.suffix(), "co.uk");
assert_eq!(d.registrable_domain(), Some("example.co.uk"));
assert_eq!(d.subdomain(), Some("www"));
assert!(d.is_icann());
}
#[test]
fn core_lookup_requires_normalized_input() {
assert!(lookup("WWW.EXAMPLE.COM").is_none());
assert!(lookup("食狮.com.cn").is_none());
assert!(lookup("").is_none());
assert!(lookup(".com").is_none());
assert!(lookup("a..b").is_none());
assert_eq!(lookup("example.com.").unwrap().suffix(), "com");
}
#[test]
fn core_wildcard_and_exception() {
assert!(lookup("foo.ck").unwrap().is_public_suffix());
assert_eq!(
lookup("a.b.test.ck").unwrap().registrable_domain(),
Some("b.test.ck")
);
assert_eq!(
lookup("www.ck").unwrap().registrable_domain(),
Some("www.ck")
);
assert_eq!(lookup("www.ck").unwrap().suffix(), "ck");
}
#[test]
fn core_unknown_tld_default_rule() {
let d = lookup("foo.nonexistenttld").unwrap();
assert_eq!(d.suffix(), "nonexistenttld");
assert_eq!(d.registrable_domain(), Some("foo.nonexistenttld"));
assert!(!d.is_known());
}
#[cfg(feature = "alloc")]
#[test]
fn every_rule_resolves() {
let rules = include_str!("rules.txt");
for line in rules.lines() {
let rule = &line[..line.find('\t').unwrap()];
let host = alloc::format!("label.{rule}");
let d = lookup(&host).expect("valid host");
assert_eq!(d.suffix(), rule, "rule {rule:?}");
assert_eq!(d.registrable_domain(), Some(host.as_str()), "rule {rule:?}");
}
}
#[test]
fn version_present() {
assert!(!psl_version().is_empty());
assert!(!psl_version().contains('\n'));
}
#[cfg(feature = "alloc")]
#[test]
fn alloc_api() {
assert_eq!(suffix("example.com").as_deref(), Some("com"));
assert_eq!(
registrable_domain("www.example.co.uk").as_deref(),
Some("example.co.uk")
);
assert_eq!(subdomain("a.b.example.co.uk").as_deref(), Some("a.b"));
assert_eq!(registrable_domain("co.uk"), None);
assert!(is_public_suffix("co.uk"));
assert_ne!(suffix("."), Some(String::new()));
assert!(analyze("com..").is_none());
}
#[cfg(feature = "alloc")]
#[test]
fn alloc_normalizes_case_and_trailing_dot() {
assert_eq!(
registrable_domain("WwW.Example.COM.").as_deref(),
Some("example.com")
);
}
#[cfg(feature = "alloc")]
#[test]
fn alloc_private_suffix() {
let info = analyze("foo.blogspot.com").unwrap();
assert_eq!(info.suffix(), "blogspot.com");
assert!(info.is_private());
assert_eq!(registrable_domain("blogspot.com"), None);
}
}