use crate::data_encoding;
use aho_corasick::AhoCorasick;
use std::sync::atomic::{AtomicU64, Ordering};
pub const MAX_V0_ITERATIONS: u32 = 1000;
pub const MAX_V1_ITERATIONS: u32 = 100;
pub const MAX_ENCRYPTION_ITERATIONS: u32 = 10000;
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub struct FilterLimits {
pub max_v0_iterations: u32,
pub max_v1_iterations: u32,
pub max_encryption_iterations: u32,
}
impl Default for FilterLimits {
fn default() -> Self {
Self {
max_v0_iterations: MAX_V0_ITERATIONS,
max_v1_iterations: MAX_V1_ITERATIONS,
max_encryption_iterations: MAX_ENCRYPTION_ITERATIONS,
}
}
}
#[derive(Debug, Clone, PartialEq, Eq)]
#[non_exhaustive]
pub enum FilterError {
MaxIterationsExceeded {
iterations: u32,
},
InvalidPattern {
index: usize,
},
EmptyPattern {
index: usize,
},
#[cfg(feature = "encryption")]
EncryptionError(crate::encryption::EncryptionError),
}
impl std::fmt::Display for FilterError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
Self::MaxIterationsExceeded { iterations } => {
write!(
f,
"failed to generate clean ID after {iterations} iterations; \
blocklist may be too restrictive"
)
}
Self::InvalidPattern { index } => {
write!(
f,
"invalid blocklist pattern at index {index}: \
only TNID data characters are allowed (-0-9A-Za-z_)"
)
}
Self::EmptyPattern { index } => {
write!(f, "empty blocklist pattern at index {index}")
}
#[cfg(feature = "encryption")]
Self::EncryptionError(e) => write!(f, "encryption error: {e}"),
}
}
}
impl std::error::Error for FilterError {
fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
match self {
#[cfg(feature = "encryption")]
Self::EncryptionError(e) => Some(e),
_ => None,
}
}
}
#[cfg(feature = "encryption")]
impl From<crate::encryption::EncryptionError> for FilterError {
fn from(e: crate::encryption::EncryptionError) -> Self {
Self::EncryptionError(e)
}
}
pub struct Blocklist {
automaton: AhoCorasick,
last_safe_timestamp: AtomicU64,
limits: FilterLimits,
}
impl Blocklist {
pub fn new(patterns: &[&str]) -> Result<Self, FilterError> {
Self::with_limits(patterns, FilterLimits::default())
}
pub fn with_limits(patterns: &[&str], limits: FilterLimits) -> Result<Self, FilterError> {
for (index, pattern) in patterns.iter().enumerate() {
if pattern.is_empty() {
return Err(FilterError::EmptyPattern { index });
}
if !pattern.is_ascii() || !pattern.bytes().all(data_encoding::is_valid_data_char) {
return Err(FilterError::InvalidPattern { index });
}
}
let automaton = AhoCorasick::builder()
.ascii_case_insensitive(true)
.build(patterns)
.expect("failed to build Aho-Corasick automaton");
Ok(Self {
automaton,
last_safe_timestamp: AtomicU64::new(0),
limits,
})
}
pub fn limits(&self) -> &FilterLimits {
&self.limits
}
pub fn contains_match(&self, text: &str) -> bool {
self.automaton.is_match(text)
}
pub(crate) fn get_starting_timestamp(&self) -> u64 {
let current = (time::OffsetDateTime::now_utc().unix_timestamp_nanos() / 1_000_000) as u64;
let last_safe = self.last_safe_timestamp.load(Ordering::Relaxed);
current.max(last_safe)
}
pub(crate) fn record_safe_timestamp(&self, timestamp: u64) {
self.last_safe_timestamp
.fetch_max(timestamp, Ordering::Relaxed);
}
}
impl std::fmt::Debug for Blocklist {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
f.debug_struct("Blocklist")
.field("pattern_count", &self.automaton.patterns_len())
.field("limits", &self.limits)
.finish()
}
}
pub const FIRST_CHAR_WITH_RANDOM: usize = 7;
pub fn match_touches_random_portion(match_start: usize, match_len: usize) -> bool {
match_start + match_len > FIRST_CHAR_WITH_RANDOM
}
pub fn timestamp_bump_for_char(char_pos: usize) -> u64 {
debug_assert!(char_pos <= 6, "char_pos must be in timestamp portion (0-6)");
1u64 << (42 - 6 * char_pos)
}
pub fn find_first_match(blocklist: &Blocklist, text: &str) -> Option<(usize, usize)> {
blocklist.automaton.find(text).map(|m| (m.start(), m.len()))
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn blocklist_matches_case_insensitive() {
let blocklist = Blocklist::new(&["TACO", "FOO"]).unwrap();
assert!(blocklist.contains_match("TACO"));
assert!(blocklist.contains_match("taco"));
assert!(blocklist.contains_match("Taco"));
assert!(blocklist.contains_match("xyzTACOxyz"));
assert!(blocklist.contains_match("xyztacoxyz"));
assert!(blocklist.contains_match("FOO"));
assert!(blocklist.contains_match("foo"));
assert!(!blocklist.contains_match("hello"));
assert!(!blocklist.contains_match(""));
}
#[test]
fn blocklist_empty() {
let blocklist = Blocklist::new(&[]).unwrap();
assert!(!blocklist.contains_match("anything"));
}
#[test]
fn blocklist_rejects_empty_patterns() {
let err = Blocklist::new(&[""]).unwrap_err();
assert!(matches!(err, FilterError::EmptyPattern { index: 0 }));
let err = Blocklist::new(&["TACO", ""]).unwrap_err();
assert!(matches!(err, FilterError::EmptyPattern { index: 1 }));
}
#[test]
fn find_first_match_returns_position() {
let blocklist = Blocklist::new(&["TACO"]).unwrap();
let result = find_first_match(&blocklist, "xyzTACOxyz");
assert_eq!(result, Some((3, 4)));
let result = find_first_match(&blocklist, "hello");
assert_eq!(result, None);
}
#[test]
fn match_position_classification() {
assert!(!match_touches_random_portion(0, 3)); assert!(!match_touches_random_portion(4, 3)); assert!(!match_touches_random_portion(0, 7));
assert!(match_touches_random_portion(5, 3)); assert!(match_touches_random_portion(7, 3)); assert!(match_touches_random_portion(8, 3)); assert!(match_touches_random_portion(14, 3)); }
#[test]
fn timestamp_bump_values() {
assert_eq!(timestamp_bump_for_char(6), 64);
assert_eq!(timestamp_bump_for_char(5), 4096);
assert_eq!(timestamp_bump_for_char(4), 262144);
assert_eq!(timestamp_bump_for_char(3), 1 << 24);
assert_eq!(timestamp_bump_for_char(0), 1 << 42);
}
#[test]
fn blocklist_rejects_invalid_pattern_characters() {
assert!(Blocklist::new(&["TACO"]).is_ok());
assert!(Blocklist::new(&["hello-world_123"]).is_ok());
let err = Blocklist::new(&["hello world"]).unwrap_err();
assert!(matches!(err, FilterError::InvalidPattern { index: 0 }));
let err = Blocklist::new(&["TACO", "foo@bar"]).unwrap_err();
assert!(matches!(err, FilterError::InvalidPattern { index: 1 }));
assert!(Blocklist::new(&["foo!bar"]).is_err());
assert!(Blocklist::new(&["café"]).is_err());
}
}