use std::path::{Path, PathBuf};
use super::PublicSuffixList;
const MAGIC: &[u8] = b".DAFSA@PSL_";
const HEADER_LEN: usize = 16;
const FLAG_EXCEPTION: u8 = 1 << 0;
const FLAG_WILDCARD: u8 = 1 << 1;
#[derive(thiserror::Error, Debug)]
pub enum DafsaFileLoadError {
#[error("io error: {0}")]
Io(#[from] std::io::Error),
#[error("file too small to contain a valid DAFSA header")]
Truncated,
#[error("not a libpsl DAFSA file (missing or malformed magic)")]
BadMagic,
#[error("malformed DAFSA header (magic present but header is otherwise invalid)")]
BadHeader,
#[error("unsupported DAFSA version: {0}")]
UnsupportedVersion(u32),
}
pub const SYSTEM_PSL_DAFSA_PATH: &str = "/usr/share/publicsuffix/public_suffix_list.dafsa";
pub struct DafsaFilePublicSuffixList {
graph: Vec<u8>,
source: PathBuf,
}
impl std::fmt::Debug for DafsaFilePublicSuffixList {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("DafsaFilePublicSuffixList")
.field("source", &self.source)
.field("graph_bytes", &self.graph.len())
.finish()
}
}
impl DafsaFilePublicSuffixList {
pub fn from_path(path: impl AsRef<Path>) -> Result<Self, DafsaFileLoadError> {
let path = path.as_ref();
let bytes = std::fs::read(path)?;
let graph = parse_header(&bytes)?;
Ok(Self {
graph,
source: path.to_path_buf(),
})
}
pub fn from_system_file() -> Result<Self, DafsaFileLoadError> {
Self::from_path(SYSTEM_PSL_DAFSA_PATH)
}
fn is_public_suffix(&self, domain: &str) -> bool {
if let Some(flags) = lookup(&self.graph, domain.as_bytes()) {
return (flags & FLAG_EXCEPTION) == 0;
}
if let Some((_, parent)) = domain.split_once('.') {
if let Some(flags) = lookup(&self.graph, parent.as_bytes()) {
return (flags & FLAG_WILDCARD) != 0;
}
}
false
}
}
impl PublicSuffixList for DafsaFilePublicSuffixList {
fn public_suffix(&self, host: &str) -> Option<String> {
let mut current = host;
loop {
if self.is_public_suffix(current) {
return Some(current.to_string());
}
match current.split_once('.') {
Some((_, rest)) => current = rest,
None => return None,
}
}
}
}
fn parse_header(bytes: &[u8]) -> Result<Vec<u8>, DafsaFileLoadError> {
let (header, graph) = bytes
.split_at_checked(HEADER_LEN)
.ok_or(DafsaFileLoadError::Truncated)?;
if !header.starts_with(MAGIC) {
return Err(DafsaFileLoadError::BadMagic);
}
if header.last() != Some(&b'\n') {
return Err(DafsaFileLoadError::BadHeader);
}
let version_field = header
.get(MAGIC.len()..HEADER_LEN - 1)
.ok_or(DafsaFileLoadError::BadHeader)?;
let digit_count = version_field
.iter()
.take_while(|b| b.is_ascii_digit())
.count();
let version_digits = version_field
.get(..digit_count)
.filter(|digits| !digits.is_empty())
.ok_or(DafsaFileLoadError::BadHeader)?;
let version: u32 = std::str::from_utf8(version_digits)
.map_err(|_| DafsaFileLoadError::BadHeader)?
.parse()
.map_err(|_| DafsaFileLoadError::BadHeader)?;
if version != 0 {
return Err(DafsaFileLoadError::UnsupportedVersion(version));
}
Ok(graph.to_vec())
}
fn lookup(graph: &[u8], key: &[u8]) -> Option<u8> {
let mut pos: usize = 0;
let mut offset: usize = 0;
let mut key_pos: usize = 0;
let key_end = key.len();
while get_next_offset(graph, &mut pos, &mut offset).is_some() {
let mut did_consume = false;
if key_pos < key_end && !is_eol(graph, offset) {
if !is_match(graph, offset, key, key_pos) {
continue;
}
did_consume = true;
offset += 1;
key_pos += 1;
while !is_eol(graph, offset) && key_pos < key_end {
if !is_match(graph, offset, key, key_pos) {
return None;
}
offset += 1;
key_pos += 1;
}
}
if key_pos == key_end {
if let Some(rv) = get_return_value(graph, offset) {
return Some(rv);
}
if did_consume {
return None;
}
continue;
}
if !is_end_char_match(graph, offset, key, key_pos) {
if did_consume {
return None;
}
continue;
}
offset += 1;
key_pos += 1;
pos = offset;
}
None
}
fn get_next_offset(graph: &[u8], pos: &mut usize, offset: &mut usize) -> Option<()> {
let b = *graph.get(*pos)?;
let consumed = match b & 0x60 {
0x60 => {
let hi = *graph.get(*pos + 1)? as usize;
let lo = *graph.get(*pos + 2)? as usize;
*offset = offset.checked_add(((b as usize & 0x1F) << 16) | (hi << 8) | lo)?;
3
}
0x40 => {
let lo = *graph.get(*pos + 1)? as usize;
*offset = offset.checked_add(((b as usize & 0x1F) << 8) | lo)?;
2
}
_ => {
*offset = offset.checked_add((b as usize) & 0x3F)?;
1
}
};
if b & 0x80 != 0 {
*pos = graph.len();
} else {
*pos += consumed;
}
Some(())
}
fn is_eol(graph: &[u8], offset: usize) -> bool {
graph.get(offset).is_some_and(|b| b & 0x80 != 0)
}
fn is_match(graph: &[u8], offset: usize, key: &[u8], key_pos: usize) -> bool {
match (graph.get(offset), key.get(key_pos)) {
(Some(g), Some(k)) => g == k,
_ => false,
}
}
fn is_end_char_match(graph: &[u8], offset: usize, key: &[u8], key_pos: usize) -> bool {
match (graph.get(offset), key.get(key_pos)) {
(Some(g), Some(k)) => (g ^ 0x80) == *k,
_ => false,
}
}
fn get_return_value(graph: &[u8], offset: usize) -> Option<u8> {
let b = *graph.get(offset)?;
if b & 0xE0 == 0x80 {
Some(b & 0x0F)
} else {
None
}
}
#[cfg(test)]
mod tests {
use super::*;
const FIXTURE: &[u8] = &[
0x2e, 0x44, 0x41, 0x46, 0x53, 0x41, 0x40, 0x50, 0x53, 0x4c, 0x5f, 0x30, 0x20, 0x20, 0x20,
0x0a, 0x05, 0x03, 0x0a, 0x07, 0x87, 0x6b, 0x77, 0x86, 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x69, 0x6f,
0x88, 0x66, 0x6f, 0x6f, 0x2e, 0x6b, 0x77, 0x85, 0x63, 0xef, 0x02, 0x82, 0xed, 0x84, 0x2e, 0x75, 0x6b, 0x84, ];
fn loaded() -> DafsaFilePublicSuffixList {
let graph = parse_header(FIXTURE).expect("fixture parses");
DafsaFilePublicSuffixList {
graph,
source: PathBuf::from("<fixture>"),
}
}
#[test]
fn lookup_simple_icann_rule() {
let psl = loaded();
assert_eq!(lookup(&psl.graph, b"com"), Some(4));
assert_eq!(lookup(&psl.graph, b"uk"), Some(4));
assert_eq!(lookup(&psl.graph, b"co.uk"), Some(4));
}
#[test]
fn lookup_wildcard_and_exception() {
let psl = loaded();
assert_eq!(lookup(&psl.graph, b"kw"), Some(0b0110));
assert_eq!(lookup(&psl.graph, b"foo.kw"), Some(0b0101));
}
#[test]
fn lookup_private_section() {
let psl = loaded();
assert_eq!(lookup(&psl.graph, b"github.io"), Some(0b1000));
}
#[test]
fn lookup_unknown_returns_none() {
let psl = loaded();
assert_eq!(lookup(&psl.graph, b"example"), None);
assert_eq!(lookup(&psl.graph, b"example.com"), None);
assert_eq!(lookup(&psl.graph, b"c"), None);
assert_eq!(lookup(&psl.graph, b"comm"), None);
assert_eq!(lookup(&psl.graph, b""), None);
}
#[test]
fn public_suffix_finds_longest_match() {
let psl = loaded();
assert_eq!(psl.public_suffix("example.com").as_deref(), Some("com"));
assert_eq!(psl.public_suffix("example.co.uk").as_deref(), Some("co.uk"));
assert_eq!(psl.public_suffix("co.uk").as_deref(), Some("co.uk"));
assert_eq!(psl.public_suffix("uk").as_deref(), Some("uk"));
}
#[test]
fn public_suffix_wildcard_synthesis() {
let psl = loaded();
assert_eq!(
psl.public_suffix("anything.kw").as_deref(),
Some("anything.kw")
);
assert_eq!(psl.public_suffix("a.b.kw").as_deref(), Some("b.kw"));
}
#[test]
fn public_suffix_exception_overrides_wildcard() {
let psl = loaded();
assert_eq!(psl.public_suffix("foo.kw").as_deref(), Some("kw"));
assert_eq!(psl.public_suffix("sub.foo.kw").as_deref(), Some("kw"));
}
#[test]
fn public_suffix_private_section_included() {
let psl = loaded();
assert_eq!(
psl.public_suffix("repo.github.io").as_deref(),
Some("github.io"),
);
assert_eq!(psl.public_suffix("github.io").as_deref(), Some("github.io"));
}
#[test]
fn public_suffix_none_for_non_psl_host() {
let psl = loaded();
assert_eq!(psl.public_suffix("localhost"), None);
assert_eq!(psl.public_suffix("invalid"), None);
}
#[test]
fn registrable_domain_computed_from_suffix() {
let psl = loaded();
assert_eq!(
psl.registrable_domain("login.example.com").as_deref(),
Some("example.com"),
);
assert_eq!(
psl.registrable_domain("example.com").as_deref(),
Some("example.com"),
);
assert_eq!(psl.registrable_domain("com"), None);
assert_eq!(
psl.registrable_domain("a.b.example.co.uk").as_deref(),
Some("example.co.uk"),
);
}
#[test]
fn parse_header_rejects_truncated() {
let too_short = &FIXTURE[..10];
assert!(matches!(
parse_header(too_short),
Err(DafsaFileLoadError::Truncated)
));
}
#[test]
fn parse_header_rejects_bad_magic() {
let mut bad = FIXTURE.to_vec();
bad[0] = b'X';
assert!(matches!(
parse_header(&bad),
Err(DafsaFileLoadError::BadMagic)
));
}
#[test]
fn parse_header_rejects_unsupported_version() {
let mut v1 = FIXTURE.to_vec();
v1[11] = b'1';
assert!(matches!(
parse_header(&v1),
Err(DafsaFileLoadError::UnsupportedVersion(1))
));
}
#[test]
fn parse_header_rejects_missing_newline() {
let mut bad = FIXTURE.to_vec();
bad[HEADER_LEN - 1] = b' ';
assert!(matches!(
parse_header(&bad),
Err(DafsaFileLoadError::BadHeader)
));
}
}