#![forbid(unsafe_code)]
#![warn(missing_docs)]
use std::collections::HashMap;
use std::sync::OnceLock;
static DATA: &str = include_str!("list.txt");
#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)]
pub enum Type {
Icann,
Private,
}
struct Db {
rules: HashMap<&'static str, Type>,
wildcards: HashMap<&'static str, Type>,
exceptions: HashMap<&'static str, Type>,
}
fn db() -> &'static Db {
static DB: OnceLock<Db> = OnceLock::new();
DB.get_or_init(|| {
let mut rules = HashMap::new();
let mut wildcards = HashMap::new();
let mut exceptions = HashMap::new();
for line in DATA.lines() {
let bytes = line.as_bytes();
if bytes.is_empty() || bytes[0] == b'#' || line.len() < 4 {
continue;
}
let ty = if bytes[0] == b'P' {
Type::Private
} else {
Type::Icann
};
let value = &line[3..];
match bytes[1] {
b'N' => {
rules.insert(value, ty);
}
b'W' => {
wildcards.insert(value, ty);
}
b'E' => {
exceptions.insert(value, ty);
}
_ => {}
}
}
Db {
rules,
wildcards,
exceptions,
}
})
}
#[derive(Clone, Debug)]
pub struct Info {
ascii: String,
suffix_off: usize,
domain_off: Option<usize>,
typ: Option<Type>,
}
impl Info {
#[inline]
pub fn as_ascii(&self) -> &str {
&self.ascii
}
#[inline]
pub fn suffix(&self) -> &str {
&self.ascii[self.suffix_off..]
}
#[inline]
pub fn registrable_domain(&self) -> Option<&str> {
self.domain_off.map(|o| &self.ascii[o..])
}
#[inline]
pub fn subdomain(&self) -> Option<&str> {
match self.domain_off {
Some(d) if d > 0 => Some(&self.ascii[..d - 1]),
_ => None,
}
}
#[inline]
pub fn is_public_suffix(&self) -> bool {
self.domain_off.is_none()
}
#[inline]
pub fn typ(&self) -> Option<Type> {
self.typ
}
#[inline]
pub fn is_icann(&self) -> bool {
self.typ == Some(Type::Icann)
}
#[inline]
pub fn is_private(&self) -> bool {
self.typ == Some(Type::Private)
}
#[inline]
pub fn is_known(&self) -> bool {
self.typ.is_some()
}
}
fn normalize(domain: &str) -> Option<String> {
let d = domain.trim();
let d = d.strip_suffix('.').unwrap_or(d);
if d.is_empty() {
return None;
}
let ascii = to_ascii(d)?;
if ascii.is_empty() || ascii.starts_with('.') || ascii.ends_with('.') || ascii.contains("..") {
return None;
}
Some(ascii)
}
#[cfg(feature = "idna")]
#[inline]
fn to_ascii(domain: &str) -> Option<String> {
idna::domain_to_ascii(domain).ok()
}
#[cfg(not(feature = "idna"))]
#[inline]
fn to_ascii(domain: &str) -> Option<String> {
if domain.is_ascii() {
Some(domain.to_ascii_lowercase())
} else {
None
}
}
pub fn analyze(domain: &str) -> Option<Info> {
let ascii = normalize(domain)?;
let db = db();
let mut offs = vec![0usize];
for (i, b) in ascii.bytes().enumerate() {
if b == b'.' {
offs.push(i + 1);
}
}
let n = offs.len();
let mut best: Option<(usize, Type)> = None; let mut exception: Option<(usize, Type)> = None;
for i in 0..n {
let cand = &ascii[offs[i]..]; let rule_labels = n - i;
if let Some(&ty) = db.exceptions.get(cand) {
if exception.is_none_or(|(c, _)| rule_labels > c) {
exception = Some((rule_labels, ty));
}
}
if let Some(&ty) = db.rules.get(cand) {
if best.is_none_or(|(c, _)| rule_labels > c) {
best = Some((rule_labels, ty));
}
}
if i + 1 < n {
let y = &ascii[offs[i + 1]..];
if let Some(&ty) = db.wildcards.get(y) {
if best.is_none_or(|(c, _)| rule_labels > c) {
best = Some((rule_labels, ty));
}
}
}
}
let (suffix_labels, typ) = if let Some((c, ty)) = exception {
(c - 1, Some(ty))
} else if let Some((c, ty)) = best {
(c, Some(ty))
} else {
(1, None)
};
let suffix_idx = n - suffix_labels;
let suffix_off = offs[suffix_idx];
let domain_off = if suffix_idx >= 1 {
Some(offs[suffix_idx - 1])
} else {
None
};
Some(Info {
ascii,
suffix_off,
domain_off,
typ,
})
}
#[inline]
pub fn suffix(domain: &str) -> Option<String> {
analyze(domain).map(|i| i.suffix().to_owned())
}
#[inline]
pub fn registrable_domain(domain: &str) -> Option<String> {
analyze(domain).and_then(|i| i.registrable_domain().map(str::to_owned))
}
#[inline]
pub fn is_public_suffix(domain: &str) -> bool {
analyze(domain).is_some_and(|i| i.is_public_suffix())
}
#[inline]
pub fn subdomain(domain: &str) -> Option<String> {
analyze(domain).and_then(|i| i.subdomain().map(str::to_owned))
}
pub fn psl_version() -> &'static str {
DATA.lines()
.find_map(|l| l.strip_prefix("# PSL VERSION:"))
.map(str::trim)
.unwrap_or("unknown")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn basic_suffixes() {
assert_eq!(suffix("example.com").as_deref(), Some("com"));
assert_eq!(suffix("www.example.co.uk").as_deref(), Some("co.uk"));
assert_eq!(
registrable_domain("www.example.co.uk").as_deref(),
Some("example.co.uk")
);
}
#[test]
fn bare_suffix_has_no_registrable_domain() {
assert_eq!(registrable_domain("co.uk"), None);
assert!(is_public_suffix("co.uk"));
assert!(is_public_suffix("com"));
assert!(!is_public_suffix("example.com"));
}
#[test]
fn wildcard_and_exception() {
assert!(is_public_suffix("foo.ck"));
assert_eq!(
registrable_domain("a.b.test.ck").as_deref(),
Some("b.test.ck")
);
assert_eq!(registrable_domain("www.ck").as_deref(), Some("www.ck"));
assert_eq!(suffix("www.ck").as_deref(), Some("ck"));
}
#[test]
fn unknown_tld_uses_default_rule() {
let info = analyze("foo.nonexistenttld").unwrap();
assert_eq!(info.suffix(), "nonexistenttld");
assert_eq!(info.registrable_domain(), Some("foo.nonexistenttld"));
assert!(!info.is_known());
assert_eq!(info.typ(), None);
}
#[test]
fn classification() {
assert!(analyze("example.com").unwrap().is_icann());
}
#[test]
fn subdomains() {
assert_eq!(subdomain("www.example.co.uk").as_deref(), Some("www"));
assert_eq!(subdomain("a.b.example.co.uk").as_deref(), Some("a.b"));
assert_eq!(subdomain("example.co.uk"), None);
assert_eq!(subdomain("co.uk"), None);
}
#[test]
fn private_suffixes() {
let info = analyze("foo.blogspot.com").unwrap();
assert_eq!(info.suffix(), "blogspot.com");
assert!(info.is_private());
assert!(!info.is_icann());
assert_eq!(registrable_domain("blogspot.com"), None);
}
#[test]
fn malformed_inputs() {
assert!(analyze("").is_none());
assert!(analyze(".").is_none());
assert!(analyze("..").is_none());
assert!(analyze(".com").is_none());
assert!(analyze("a..b").is_none());
assert!(analyze("com..").is_none());
assert_ne!(suffix("."), Some(String::new()));
}
#[test]
fn trailing_dot_and_case() {
assert_eq!(
registrable_domain("WwW.Example.COM.").as_deref(),
Some("example.com")
);
}
#[test]
fn version_is_present() {
assert!(!psl_version().is_empty());
}
}