use std::io::BufRead;
use super::{BlocklistParser, ParseError};
#[derive(Debug, Clone, Copy, Default)]
pub struct DomainListParser;
impl BlocklistParser for DomainListParser {
fn parse(&self, reader: &mut dyn BufRead) -> Result<Vec<String>, ParseError> {
let mut domains = Vec::new();
let mut line = String::new();
loop {
line.clear();
let bytes_read = reader.read_line(&mut line)?;
if bytes_read == 0 {
break;
}
let trimmed = line.trim();
if trimmed.is_empty() {
continue;
}
if trimmed.starts_with('#') {
continue;
}
domains.push(trimmed.to_string());
}
Ok(domains)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::BufReader;
fn parse(content: &str) -> Result<Vec<String>, ParseError> {
DomainListParser.parse(&mut BufReader::new(content.as_bytes()))
}
#[test]
fn should_parse_simple_domains() {
let content = "example.com\ntest.org\nanother.net";
let domains = parse(content).unwrap();
assert_eq!(domains, vec!["example.com", "test.org", "another.net"]);
}
#[test]
fn should_skip_comments_when_line_starts_with_hash() {
let content = "# Comment line\nexample.com\n# Another comment\ntest.org";
let domains = parse(content).unwrap();
assert_eq!(domains, vec!["example.com", "test.org"]);
}
#[test]
fn should_skip_empty_lines() {
let content = "example.com\n\n\ntest.org\n\n";
let domains = parse(content).unwrap();
assert_eq!(domains, vec!["example.com", "test.org"]);
}
#[test]
fn should_trim_whitespace_from_domains() {
let content = " example.com \n\ttest.org\t\n another.net";
let domains = parse(content).unwrap();
assert_eq!(domains, vec!["example.com", "test.org", "another.net"]);
}
#[test]
fn should_preserve_wildcard_domains() {
let content = "*.example.com\n*.ads.test.org";
let domains = parse(content).unwrap();
assert_eq!(domains, vec!["*.example.com", "*.ads.test.org"]);
}
#[test]
fn should_handle_mixed_content_with_comments_and_domains() {
let content = r"
# Blocklist for ads
example.com
*.ads.example.com
# Trackers
tracker.example.org
";
let domains = parse(content).unwrap();
assert_eq!(
domains,
vec!["example.com", "*.ads.example.com", "tracker.example.org"]
);
}
#[test]
fn should_return_empty_vec_when_file_is_empty() {
let content = "";
let domains = parse(content).unwrap();
assert!(domains.is_empty());
}
#[test]
fn should_return_empty_vec_when_file_contains_only_comments() {
let content = "# Comment 1\n# Comment 2\n# Comment 3";
let domains = parse(content).unwrap();
assert!(domains.is_empty());
}
#[test]
fn should_return_empty_vec_when_file_contains_only_empty_lines() {
let content = "\n\n\n\n";
let domains = parse(content).unwrap();
assert!(domains.is_empty());
}
#[test]
fn should_skip_indented_comments() {
let content = " # Indented comment\nexample.com";
let domains = parse(content).unwrap();
assert_eq!(domains, vec!["example.com"]);
}
#[test]
fn should_preserve_inline_hash_when_not_at_start() {
let content = "example.com#not-a-comment";
let domains = parse(content).unwrap();
assert_eq!(domains, vec!["example.com#not-a-comment"]);
}
#[test]
fn should_parse_subdomain_patterns() {
let content = "ads.facebook.com\ntrack.google.com\n*.doubleclick.net";
let domains = parse(content).unwrap();
assert_eq!(
domains,
vec!["ads.facebook.com", "track.google.com", "*.doubleclick.net"]
);
}
#[test]
fn should_parse_real_world_blocklist_sample() {
let content = r"
# Title: Personal blocklist
# Last updated: 2024-01-01
# Ads
ads.example.com
*.advertising.com
# Trackers
tracker1.example.org
tracker2.example.org
# Social media trackers
*.facebook.net
pixel.facebook.com
";
let domains = parse(content).unwrap();
assert_eq!(domains.len(), 6);
assert!(domains.contains(&"ads.example.com".to_string()));
assert!(domains.contains(&"*.advertising.com".to_string()));
assert!(domains.contains(&"*.facebook.net".to_string()));
}
#[test]
fn should_handle_windows_line_endings() {
let content = "example.com\r\ntest.org\r\n";
let domains = parse(content).unwrap();
assert_eq!(domains, vec!["example.com", "test.org"]);
}
#[test]
fn should_handle_mixed_line_endings() {
let content = "example.com\ntest.org\r\nanother.net\r";
let domains = parse(content).unwrap();
assert!(domains.len() >= 2);
assert_eq!(domains[0], "example.com");
assert_eq!(domains[1], "test.org");
}
}