use std::io::BufReader;
use std::path::{Path, PathBuf};
use tokio::fs::File;
use tokio::io::AsyncReadExt;
use super::{ParseError, parser_for_format};
use crate::config::BlocklistFormat;
#[derive(Debug, thiserror::Error)]
pub enum LoadError {
#[error("file not found: {0:?}")]
NotFound(PathBuf),
#[error("permission denied: {0:?}")]
PermissionDenied(PathBuf),
#[error("I/O error reading {path:?}")]
Io {
path: PathBuf,
#[source]
source: std::io::Error,
},
#[error("parse error")]
Parse(#[from] ParseError),
#[error("task join error")]
Join(#[from] tokio::task::JoinError),
}
pub struct FileLoader;
impl FileLoader {
pub async fn load(path: &Path, format: BlocklistFormat) -> Result<Vec<String>, LoadError> {
let path_buf = path.to_path_buf();
let mut file = File::open(path).await.map_err(|e| match e.kind() {
std::io::ErrorKind::NotFound => LoadError::NotFound(path_buf.clone()),
std::io::ErrorKind::PermissionDenied => LoadError::PermissionDenied(path_buf.clone()),
_ => LoadError::Io {
path: path_buf.clone(),
source: e,
},
})?;
let mut content = String::new();
file.read_to_string(&mut content)
.await
.map_err(|e| LoadError::Io {
path: path_buf.clone(),
source: e,
})?;
let domains = tokio::task::spawn_blocking(move || {
let parser = parser_for_format(format);
let mut reader = BufReader::new(content.as_bytes());
parser.parse(&mut reader)
})
.await??;
Ok(domains)
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Write;
use tempfile::NamedTempFile;
#[tokio::test]
async fn should_load_domains_format_file() {
let mut file = NamedTempFile::new().unwrap();
writeln!(file, "# Comment").unwrap();
writeln!(file, "example.com").unwrap();
writeln!(file, "*.ads.com").unwrap();
file.flush().unwrap();
let domains = FileLoader::load(file.path(), BlocklistFormat::Domains)
.await
.unwrap();
assert_eq!(domains, vec!["example.com", "*.ads.com"]);
}
#[tokio::test]
async fn should_load_hosts_format_file() {
let mut file = NamedTempFile::new().unwrap();
writeln!(file, "# Hosts file").unwrap();
writeln!(file, "0.0.0.0 ads.example.com").unwrap();
writeln!(file, "127.0.0.1 tracking.example.com").unwrap();
file.flush().unwrap();
let domains = FileLoader::load(file.path(), BlocklistFormat::Hosts)
.await
.unwrap();
assert_eq!(domains, vec!["ads.example.com", "tracking.example.com"]);
}
#[tokio::test]
async fn should_load_adblock_format_file() {
let mut file = NamedTempFile::new().unwrap();
writeln!(file, "! AdBlock comment").unwrap();
writeln!(file, "||ads.example.com^").unwrap();
writeln!(file, "||tracking.example.com^$third-party").unwrap();
file.flush().unwrap();
let domains = FileLoader::load(file.path(), BlocklistFormat::Adblock)
.await
.unwrap();
assert_eq!(domains, vec!["ads.example.com", "tracking.example.com"]);
}
#[tokio::test]
async fn should_return_empty_vec_when_file_is_empty() {
let file = NamedTempFile::new().unwrap();
let domains = FileLoader::load(file.path(), BlocklistFormat::Domains)
.await
.unwrap();
assert!(domains.is_empty());
}
#[tokio::test]
async fn should_return_empty_vec_when_file_has_only_comments() {
let mut file = NamedTempFile::new().unwrap();
writeln!(file, "# This is a comment").unwrap();
writeln!(file, "# Another comment").unwrap();
file.flush().unwrap();
let domains = FileLoader::load(file.path(), BlocklistFormat::Domains)
.await
.unwrap();
assert!(domains.is_empty());
}
#[tokio::test]
async fn should_return_not_found_error_when_file_does_not_exist() {
let result = FileLoader::load(
Path::new("/nonexistent/path/to/blocklist.txt"),
BlocklistFormat::Domains,
)
.await;
assert!(matches!(result, Err(LoadError::NotFound(_))));
}
#[tokio::test]
async fn should_handle_file_with_mixed_line_endings() {
let mut file = NamedTempFile::new().unwrap();
file.write_all(b"example.com\r\ntest.com\n*.ads.com\r\n")
.unwrap();
file.flush().unwrap();
let domains = FileLoader::load(file.path(), BlocklistFormat::Domains)
.await
.unwrap();
assert_eq!(domains, vec!["example.com", "test.com", "*.ads.com"]);
}
#[tokio::test]
async fn should_handle_large_file() {
let mut file = NamedTempFile::new().unwrap();
for i in 0..10_000 {
writeln!(file, "domain{i}.example.com").unwrap();
}
file.flush().unwrap();
let domains = FileLoader::load(file.path(), BlocklistFormat::Domains)
.await
.unwrap();
assert_eq!(domains.len(), 10_000);
assert_eq!(domains[0], "domain0.example.com");
assert_eq!(domains[9999], "domain9999.example.com");
}
#[tokio::test]
async fn should_trim_whitespace_from_domains() {
let mut file = NamedTempFile::new().unwrap();
writeln!(file, " example.com ").unwrap();
writeln!(file, "\ttest.com\t").unwrap();
file.flush().unwrap();
let domains = FileLoader::load(file.path(), BlocklistFormat::Domains)
.await
.unwrap();
assert_eq!(domains, vec!["example.com", "test.com"]);
}
}