#![no_std]
#![forbid(unsafe_code)]
#![warn(missing_docs)]
#[cfg(feature = "alloc")]
extern crate alloc;
#[cfg(feature = "alloc")]
use alloc::string::String;
use core::cmp::Ordering;
pub mod compat;
const NODES_RAW: &[u8] = include_bytes!("trie_nodes.bin");
const EDGES_RAW: &[u8] = include_bytes!("trie_edges.bin");
const LABELS: &[u8] = include_bytes!("trie_labels.bin");
const N_NODES: usize = NODES_RAW.len() / 3;
const N_EDGES: usize = count_edges();
const _: () = assert!(
N_NODES <= u16::MAX as usize + 1,
"node count exceeds u16; widen Edge::child"
);
const _: () = assert!(
N_EDGES <= u16::MAX as usize + 1,
"edge count exceeds u16; widen Node::edge_start"
);
const _: () = assert!(
LABELS.len() <= u16::MAX as usize + 1,
"label pool exceeds u16 offsets; widen Edge::label_off"
);
const F_RULE: u8 = 1;
const F_RULE_PRIV: u8 = 2;
const F_WILD: u8 = 4;
const F_WILD_PRIV: u8 = 8;
const F_EXC: u8 = 16;
const F_EXC_PRIV: u8 = 32;
#[derive(Clone, Copy)]
struct Node {
edge_start: u16,
edge_count: u16,
flags: u8,
}
#[derive(Clone, Copy)]
struct Edge {
#[cfg(feature = "fast-lookup")]
prefix: u32,
label_off: u16,
child: u16,
}
const fn rd_u16(b: &[u8], o: usize) -> u16 {
(b[o] as u16) | ((b[o + 1] as u16) << 8)
}
const fn count_edges() -> usize {
let mut total = 0usize;
let mut i = 0;
while i < N_NODES {
total += rd_u16(NODES_RAW, i * 3) as usize;
i += 1;
}
total
}
const fn decode_nodes() -> [Node; N_NODES] {
let mut out = [Node {
edge_start: 0,
edge_count: 0,
flags: 0,
}; N_NODES];
let mut i = 0;
let mut edge_start: u32 = 0;
while i < N_NODES {
let o = i * 3;
let edge_count = rd_u16(NODES_RAW, o);
out[i] = Node {
edge_start: edge_start as u16,
edge_count,
flags: NODES_RAW[o + 2],
};
edge_start += edge_count as u32;
i += 1;
}
out
}
const fn decode_edges() -> [Edge; N_EDGES] {
let mut out = [Edge {
#[cfg(feature = "fast-lookup")]
prefix: 0,
label_off: 0,
child: 0,
}; N_EDGES];
let mut j = 0;
let mut cursor = 0usize;
let mut child: i64 = 0;
while j < N_EDGES {
let label_off = rd_u16(EDGES_RAW, cursor);
cursor += 2;
let mut val: u64 = 0;
let mut shift = 0u32;
loop {
let b = EDGES_RAW[cursor];
cursor += 1;
val |= ((b & 0x7f) as u64) << shift;
if b & 0x80 == 0 {
break;
}
shift += 7;
}
let delta = (val >> 1) as i64 ^ -((val & 1) as i64);
child += delta;
out[j] = Edge {
#[cfg(feature = "fast-lookup")]
prefix: prefix_key_at(label_off as usize),
label_off,
child: child as u16,
};
j += 1;
}
out
}
#[cfg(feature = "fast-lookup")]
const fn prefix_key_at(off: usize) -> u32 {
let len = LABELS[off] as usize;
let n = if len > 4 { 4 } else { len };
let mut key: u32 = 0;
let mut k = 0;
while k < n {
key = (key << 8) | LABELS[off + 1 + k] as u32;
k += 1;
}
key << ((4 - n) * 8)
}
static NODES: [Node; N_NODES] = decode_nodes();
static EDGES: [Edge; N_EDGES] = decode_edges();
#[inline]
fn node_rec(i: usize) -> (usize, usize, u8) {
let n = &NODES[i];
(n.edge_start as usize, n.edge_count as usize, n.flags)
}
#[inline]
fn edge_label(e: &Edge) -> &'static [u8] {
let off = e.label_off as usize;
let len = LABELS[off] as usize;
&LABELS[off + 1..off + 1 + len]
}
#[cfg(feature = "fast-lookup")]
#[inline]
fn prefix_key(label: &[u8]) -> u32 {
let n = if label.len() > 4 { 4 } else { label.len() };
let mut key: u32 = 0;
let mut k = 0;
while k < n {
key = (key << 8) | label[k] as u32;
k += 1;
}
key << ((4 - n) as u32 * 8)
}
#[cfg(feature = "fast-lookup")]
#[inline]
fn find_child(start: usize, count: usize, label: &[u8]) -> Option<usize> {
let key = prefix_key(label);
let (mut lo, mut hi) = (start, start + count);
while lo < hi {
let mid = lo + (hi - lo) / 2;
let e = &EDGES[mid];
let ord = match key.cmp(&e.prefix) {
Ordering::Equal => label.cmp(edge_label(e)),
other => other,
};
match ord {
Ordering::Equal => return Some(e.child as usize),
Ordering::Less => hi = mid,
Ordering::Greater => lo = mid + 1,
}
}
None
}
#[cfg(not(feature = "fast-lookup"))]
#[inline]
fn find_child(start: usize, count: usize, label: &[u8]) -> Option<usize> {
let (mut lo, mut hi) = (start, start + count);
while lo < hi {
let mid = lo + (hi - lo) / 2;
let e = &EDGES[mid];
match label.cmp(edge_label(e)) {
Ordering::Equal => return Some(e.child as usize),
Ordering::Less => hi = mid,
Ordering::Greater => lo = mid + 1,
}
}
None
}
#[inline]
fn section(flags: u8, priv_bit: u8) -> Type {
if flags & priv_bit != 0 {
Type::Private
} else {
Type::Icann
}
}
const MAX_LABELS: usize = 128;
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum Type {
Icann,
Private,
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
struct Parts {
suffix_off: usize,
domain_off: Option<usize>,
typ: Option<Type>,
}
fn compute(ascii: &str) -> Option<Parts> {
let mut offs = [0usize; MAX_LABELS];
let mut n = 1usize;
for (i, &b) in ascii.as_bytes().iter().enumerate() {
if b == b'.' {
if n >= MAX_LABELS {
return None;
}
offs[n] = i + 1;
n += 1;
}
}
let label = |k: usize| -> &str {
let s = offs[k];
let e = if k + 1 < n {
offs[k + 1] - 1
} else {
ascii.len()
};
&ascii[s..e]
};
let mut best: Option<(usize, Type)> = None;
let mut exception: Option<(usize, Type)> = None;
let mut node = 0usize; let mut consumed = 0usize; loop {
let (edge_start, edge_count, flags) = node_rec(node);
if consumed >= 1 {
if flags & F_RULE != 0 && best.is_none_or(|(c, _)| consumed > c) {
best = Some((consumed, section(flags, F_RULE_PRIV)));
}
if flags & F_EXC != 0 && exception.is_none_or(|(c, _)| consumed > c) {
exception = Some((consumed, section(flags, F_EXC_PRIV)));
}
}
if flags & F_WILD != 0 && consumed < n && best.is_none_or(|(c, _)| consumed + 1 > c) {
best = Some((consumed + 1, section(flags, F_WILD_PRIV)));
}
if consumed == n {
break;
}
match find_child(edge_start, edge_count, label(n - 1 - consumed).as_bytes()) {
Some(child) => {
node = child;
consumed += 1;
}
None => break,
}
}
let (suffix_labels, typ) = if let Some((c, ty)) = exception {
(c - 1, Some(ty)) } else if let Some((c, ty)) = best {
(c, Some(ty))
} else {
(1, None) };
let suffix_idx = n - suffix_labels;
Some(Parts {
suffix_off: offs[suffix_idx],
domain_off: if suffix_idx >= 1 {
Some(offs[suffix_idx - 1])
} else {
None
},
typ,
})
}
fn is_normalized_ascii(host: &str) -> bool {
let b = host.as_bytes();
if b.is_empty() || b[0] == b'.' || b[b.len() - 1] == b'.' {
return false;
}
let mut prev_dot = false;
for &c in b {
if !c.is_ascii() {
return false;
}
if c == b'.' {
if prev_dot {
return false; }
prev_dot = true;
} else {
if c.is_ascii_uppercase() {
return false; }
prev_dot = false;
}
}
true
}
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
pub struct Domain<'a> {
input: &'a str,
parts: Parts,
}
impl<'a> Domain<'a> {
#[inline]
pub fn as_str(&self) -> &'a str {
self.input
}
#[inline]
pub fn suffix(&self) -> &'a str {
&self.input[self.parts.suffix_off..]
}
#[inline]
pub fn registrable_domain(&self) -> Option<&'a str> {
self.parts.domain_off.map(|o| &self.input[o..])
}
#[inline]
pub fn subdomain(&self) -> Option<&'a str> {
match self.parts.domain_off {
Some(d) if d > 0 => Some(&self.input[..d - 1]),
_ => None,
}
}
#[inline]
pub fn is_public_suffix(&self) -> bool {
self.parts.domain_off.is_none()
}
#[inline]
pub fn typ(&self) -> Option<Type> {
self.parts.typ
}
#[inline]
pub fn is_icann(&self) -> bool {
self.parts.typ == Some(Type::Icann)
}
#[inline]
pub fn is_private(&self) -> bool {
self.parts.typ == Some(Type::Private)
}
#[inline]
pub fn is_known(&self) -> bool {
self.parts.typ.is_some()
}
}
#[inline]
pub fn lookup(host: &str) -> Option<Domain<'_>> {
let host = host.strip_suffix('.').unwrap_or(host);
if !is_normalized_ascii(host) {
return None;
}
Some(Domain {
input: host,
parts: compute(host)?,
})
}
#[inline]
pub fn psl_version() -> &'static str {
include_str!("psl_version.txt")
}
#[cfg(feature = "alloc")]
#[derive(Clone, Debug)]
pub struct Info {
ascii: String,
parts: Parts,
}
#[cfg(feature = "alloc")]
impl Info {
#[inline]
fn borrow(&self) -> Domain<'_> {
Domain {
input: &self.ascii,
parts: self.parts,
}
}
#[inline]
pub fn as_ascii(&self) -> &str {
&self.ascii
}
#[inline]
pub fn suffix(&self) -> &str {
self.borrow().suffix()
}
#[inline]
pub fn registrable_domain(&self) -> Option<&str> {
self.borrow().registrable_domain()
}
#[inline]
pub fn subdomain(&self) -> Option<&str> {
self.borrow().subdomain()
}
#[inline]
pub fn is_public_suffix(&self) -> bool {
self.borrow().is_public_suffix()
}
#[inline]
pub fn typ(&self) -> Option<Type> {
self.parts.typ
}
#[inline]
pub fn is_icann(&self) -> bool {
self.borrow().is_icann()
}
#[inline]
pub fn is_private(&self) -> bool {
self.borrow().is_private()
}
#[inline]
pub fn is_known(&self) -> bool {
self.borrow().is_known()
}
}
#[cfg(feature = "alloc")]
fn normalize(domain: &str) -> Option<String> {
let d = domain.trim();
let d = d.strip_suffix('.').unwrap_or(d);
if d.is_empty() {
return None;
}
let ascii = to_ascii(d)?;
if ascii.is_empty() || ascii.starts_with('.') || ascii.ends_with('.') || ascii.contains("..") {
return None;
}
Some(ascii)
}
#[cfg(all(feature = "alloc", feature = "idna"))]
#[inline]
fn to_ascii(domain: &str) -> Option<String> {
idna::domain_to_ascii(domain).ok()
}
#[cfg(all(feature = "alloc", not(feature = "idna")))]
#[inline]
fn to_ascii(domain: &str) -> Option<String> {
if domain.is_ascii() {
Some(domain.to_ascii_lowercase())
} else {
None
}
}
#[cfg(feature = "alloc")]
pub fn analyze(domain: &str) -> Option<Info> {
let ascii = normalize(domain)?;
let parts = compute(&ascii)?;
Some(Info { ascii, parts })
}
#[cfg(feature = "alloc")]
#[inline]
pub fn suffix(domain: &str) -> Option<String> {
analyze(domain).map(|i| String::from(i.suffix()))
}
#[cfg(feature = "alloc")]
#[inline]
pub fn registrable_domain(domain: &str) -> Option<String> {
analyze(domain).and_then(|i| i.registrable_domain().map(String::from))
}
#[cfg(feature = "alloc")]
#[inline]
pub fn subdomain(domain: &str) -> Option<String> {
analyze(domain).and_then(|i| i.subdomain().map(String::from))
}
#[cfg(feature = "alloc")]
#[inline]
pub fn is_public_suffix(domain: &str) -> bool {
analyze(domain).is_some_and(|i| i.is_public_suffix())
}
#[cfg(test)]
mod tests {
extern crate std;
use super::*;
#[cfg(feature = "alloc")]
use std::string::String;
#[test]
fn core_lookup_basic() {
let d = lookup("www.example.co.uk").unwrap();
assert_eq!(d.suffix(), "co.uk");
assert_eq!(d.registrable_domain(), Some("example.co.uk"));
assert_eq!(d.subdomain(), Some("www"));
assert!(d.is_icann());
}
#[test]
fn core_lookup_requires_normalized_input() {
assert!(lookup("WWW.EXAMPLE.COM").is_none());
assert!(lookup("食狮.com.cn").is_none());
assert!(lookup("").is_none());
assert!(lookup(".com").is_none());
assert!(lookup("a..b").is_none());
assert_eq!(lookup("example.com.").unwrap().suffix(), "com");
}
#[test]
fn core_wildcard_and_exception() {
assert!(lookup("foo.ck").unwrap().is_public_suffix());
assert_eq!(
lookup("a.b.test.ck").unwrap().registrable_domain(),
Some("b.test.ck")
);
assert_eq!(
lookup("www.ck").unwrap().registrable_domain(),
Some("www.ck")
);
assert_eq!(lookup("www.ck").unwrap().suffix(), "ck");
}
#[test]
fn core_unknown_tld_default_rule() {
let d = lookup("foo.nonexistenttld").unwrap();
assert_eq!(d.suffix(), "nonexistenttld");
assert_eq!(d.registrable_domain(), Some("foo.nonexistenttld"));
assert!(!d.is_known());
}
#[cfg(feature = "alloc")]
#[test]
fn every_rule_resolves() {
let rules = include_str!("rules.txt");
for line in rules.lines() {
let rule = &line[..line.find('\t').unwrap()];
let host = alloc::format!("label.{rule}");
let d = lookup(&host).expect("valid host");
assert_eq!(d.suffix(), rule, "rule {rule:?}");
assert_eq!(d.registrable_domain(), Some(host.as_str()), "rule {rule:?}");
}
}
#[test]
fn version_present() {
assert!(!psl_version().is_empty());
assert!(!psl_version().contains('\n'));
}
#[cfg(feature = "alloc")]
#[test]
fn alloc_api() {
assert_eq!(suffix("example.com").as_deref(), Some("com"));
assert_eq!(
registrable_domain("www.example.co.uk").as_deref(),
Some("example.co.uk")
);
assert_eq!(subdomain("a.b.example.co.uk").as_deref(), Some("a.b"));
assert_eq!(registrable_domain("co.uk"), None);
assert!(is_public_suffix("co.uk"));
assert_ne!(suffix("."), Some(String::new()));
assert!(analyze("com..").is_none());
}
#[cfg(feature = "alloc")]
#[test]
fn alloc_normalizes_case_and_trailing_dot() {
assert_eq!(
registrable_domain("WwW.Example.COM.").as_deref(),
Some("example.com")
);
}
#[cfg(feature = "alloc")]
#[test]
fn alloc_private_suffix() {
let info = analyze("foo.blogspot.com").unwrap();
assert_eq!(info.suffix(), "blogspot.com");
assert!(info.is_private());
assert_eq!(registrable_domain("blogspot.com"), None);
}
}