use std::fs;
use std::path::{Path, PathBuf};
const MIN_ENTROPY_STRING_LENGTH: usize = 24;
const ENTROPY_RATIO_LOWER_BOUND: f32 = 0.28;
const ENTROPY_RATIO_UPPER_BOUND_LETTERS: f32 = 0.38;
const ENTROPY_RATIO_LOWER_BOUND_LETTERS: f32 = 0.31;
#[must_use]
pub fn is_obfuscated(filename: &str) -> bool {
let stem = Path::new(filename)
.file_stem()
.and_then(|s| s.to_str())
.unwrap_or(filename);
let checks = [
is_high_entropy(stem),
looks_like_uuid(stem),
is_hex_string(stem) && stem.len() > 16,
has_no_vowels(stem) && stem.len() > 8,
];
checks.iter().any(|&c| c)
}
fn is_high_entropy(s: &str) -> bool {
if s.len() < MIN_ENTROPY_STRING_LENGTH {
return false;
}
let mut upper = 0;
let mut lower = 0;
let mut digit = 0;
for c in s.chars() {
match c {
'A'..='Z' => upper += 1,
'a'..='z' => lower += 1,
'0'..='9' => digit += 1,
_ => {} }
}
let total = (upper + lower + digit) as f32;
if total < MIN_ENTROPY_STRING_LENGTH as f32 {
return false;
}
let upper_ratio = upper as f32 / total;
let lower_ratio = lower as f32 / total;
let digit_ratio = digit as f32 / total;
if upper == 0 || lower == 0 || digit == 0 {
return false;
}
let balanced_upper = (ENTROPY_RATIO_LOWER_BOUND_LETTERS..=ENTROPY_RATIO_UPPER_BOUND_LETTERS)
.contains(&upper_ratio);
let balanced_lower = (ENTROPY_RATIO_LOWER_BOUND_LETTERS..=ENTROPY_RATIO_UPPER_BOUND_LETTERS)
.contains(&lower_ratio);
let balanced_digit =
(ENTROPY_RATIO_LOWER_BOUND..=ENTROPY_RATIO_UPPER_BOUND_LETTERS).contains(&digit_ratio);
balanced_upper && balanced_lower && balanced_digit
}
fn looks_like_uuid(s: &str) -> bool {
if s.len() == 36 && s.chars().filter(|&c| c == '-').count() == 4 {
let parts: Vec<&str> = s.split('-').collect();
if parts.len() == 5
&& parts[0].len() == 8
&& parts[1].len() == 4
&& parts[2].len() == 4
&& parts[3].len() == 4
&& parts[4].len() == 12
{
return parts
.iter()
.all(|p| p.chars().all(|c| c.is_ascii_hexdigit()));
}
}
if s.len() == 32 {
return s.chars().all(|c| c.is_ascii_hexdigit());
}
false
}
fn is_hex_string(s: &str) -> bool {
if s.is_empty() {
return false;
}
s.chars().all(|c| c.is_ascii_hexdigit())
}
fn has_no_vowels(s: &str) -> bool {
let vowels = ['a', 'e', 'i', 'o', 'u', 'A', 'E', 'I', 'O', 'U'];
!s.chars().any(|c| vowels.contains(&c))
}
pub fn determine_final_name(
job_name: &str,
nzb_meta_name: Option<&str>,
extracted_files: &[PathBuf],
) -> String {
if !is_obfuscated(job_name) {
return job_name.to_string();
}
if let Some(meta_name) = nzb_meta_name
&& !is_obfuscated(meta_name)
{
return meta_name.to_string();
}
if let Some(largest) = find_largest_file(extracted_files)
&& let Some(name) = largest.file_stem().and_then(|s| s.to_str())
&& !is_obfuscated(name)
{
return name.to_string();
}
job_name.to_string()
}
pub fn find_largest_file(files: &[PathBuf]) -> Option<PathBuf> {
let mut largest_idx: Option<usize> = None;
let mut largest_size: u64 = 0;
for (idx, file) in files.iter().enumerate() {
if file.is_dir() {
continue;
}
if let Ok(metadata) = fs::metadata(file) {
let size = metadata.len();
if largest_idx.is_none() || size > largest_size {
largest_idx = Some(idx);
largest_size = size;
}
}
}
largest_idx.map(|idx| files[idx].clone())
}
#[allow(clippy::unwrap_used, clippy::expect_used)]
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_high_entropy() {
assert!(is_high_entropy("aB3cD5eF7gH9iJ1kL2mN4oP6qR8sT0uV2"));
assert!(is_high_entropy("Xk4mP9wRt2Yz8QvN3Lb6Hj5Mk7Np1"));
assert!(is_high_entropy("aB3cD5eF7gH9iJ1kL2mN4oP6"));
assert!(!is_high_entropy("MovieName2024"));
assert!(!is_high_entropy("episode01"));
assert!(!is_high_entropy("short")); assert!(!is_high_entropy("EpisodeS01E01720pWEBDL")); assert!(!is_high_entropy("aB3cD5eF7gH9iJ1kL2mN4o")); }
#[test]
fn test_looks_like_uuid() {
assert!(looks_like_uuid("550e8400-e29b-41d4-a716-446655440000"));
assert!(looks_like_uuid("550e8400e29b41d4a716446655440000"));
assert!(looks_like_uuid("A1B2C3D4-E5F6-7890-ABCD-EF1234567890"));
assert!(!looks_like_uuid("not-a-uuid-at-all"));
assert!(!looks_like_uuid("550e8400-e29b-41d4-a716")); assert!(!looks_like_uuid(
"550e8400-e29b-41d4-a716-446655440000-extra"
)); }
#[test]
fn test_is_hex_string() {
assert!(is_hex_string("0123456789abcdef"));
assert!(is_hex_string("ABCDEF123456"));
assert!(is_hex_string("deadbeef"));
assert!(!is_hex_string("not hex"));
assert!(!is_hex_string("g123456")); assert!(!is_hex_string("")); }
#[test]
fn test_has_no_vowels() {
assert!(has_no_vowels("xkcdmnbvcxz"));
assert!(has_no_vowels("1234567890"));
assert!(has_no_vowels("bcdfghjklmnpqrstvwxyz"));
assert!(!has_no_vowels("hello"));
assert!(!has_no_vowels("movie"));
assert!(!has_no_vowels("A"));
}
#[test]
fn test_is_obfuscated_uuid_patterns() {
assert!(is_obfuscated("550e8400-e29b-41d4-a716-446655440000.mkv"));
assert!(is_obfuscated("550e8400e29b41d4a716446655440000.avi"));
}
#[test]
fn test_is_obfuscated_hex_strings() {
assert!(is_obfuscated("a3f8b2c9d1e5f7a4b6c8d0e2f4a6b8c0.mp4"));
assert!(is_obfuscated("deadbeef1234567890abcdef.mkv"));
assert!(!is_obfuscated("Movie[1a2b3c4d].mkv"));
}
#[test]
fn test_is_obfuscated_no_vowels() {
assert!(is_obfuscated("xkcd1234mnbvcxz.avi"));
assert!(is_obfuscated("bcdfghjklmnp.mp4"));
assert!(!is_obfuscated("cd1.mkv"));
}
#[test]
fn test_is_obfuscated_high_entropy() {
assert!(is_obfuscated("aB3cD5eF7gH9iJ1kL2mN4oP6.mkv"));
assert!(is_obfuscated("Xk4mP9wRt2Yz8QvN3Lb6.avi"));
}
#[test]
fn test_is_obfuscated_normal_filenames() {
assert!(!is_obfuscated("Movie.Name.2024.1080p.BluRay.x264.mkv"));
assert!(!is_obfuscated("Episode.S01E01.720p.WEB-DL.mkv"));
assert!(!is_obfuscated("Documentary.Title.2024.mp4"));
assert!(!is_obfuscated("album_track01.mp3"));
assert!(!is_obfuscated("my-vacation-video.avi"));
}
#[test]
fn test_is_obfuscated_edge_cases() {
assert!(!is_obfuscated(""));
assert!(!is_obfuscated("a.mkv"));
assert!(!is_obfuscated("ab.mp4"));
assert!(is_obfuscated("a3f8b2c9d1e5f7a4b6c8d0e2f4a6b8c0")); }
#[test]
fn test_is_obfuscated_mixed_cases() {
assert!(is_obfuscated("98234ksdfj2398sdkjf.avi"));
assert!(!is_obfuscated("7a4b9c2d.mkv")); assert!(!is_obfuscated("[1a2b3c4d].mkv"));
assert!(!is_obfuscated("x264.mkv")); assert!(!is_obfuscated("h264.mp4")); assert!(!is_obfuscated("BD1080p.mkv")); }
#[test]
fn test_determine_final_name_from_job_name() {
let job_name = "Movie.Name.2024.1080p";
let extracted = vec![PathBuf::from("movie.mkv")];
let name = determine_final_name(job_name, None, &extracted);
assert_eq!(name, "Movie.Name.2024.1080p");
}
#[test]
fn test_determine_final_name_from_nzb_meta() {
let job_name = "a3f8b2c9d1e5f7a4b6c8d0e2f4a6b8c0";
let nzb_meta = Some("Movie.Name.2024.1080p");
let extracted = vec![PathBuf::from("random.mkv")];
let name = determine_final_name(job_name, nzb_meta, &extracted);
assert_eq!(name, "Movie.Name.2024.1080p");
}
#[test]
fn test_determine_final_name_from_largest_file() {
let job_name = "a3f8b2c9d1e5f7a4b6c8d0e2f4a6b8c0";
let nzb_meta = Some("550e8400-e29b-41d4-a716-446655440000");
let temp_dir = std::env::temp_dir().join("usenet_dl_test_determine_name");
let _ = fs::remove_dir_all(&temp_dir);
fs::create_dir_all(&temp_dir).unwrap();
let small_file = temp_dir.join("Movie.Name.2024.sample.mkv");
let large_file = temp_dir.join("Movie.Name.2024.1080p.mkv");
fs::write(&small_file, b"small").unwrap();
fs::write(&large_file, b"large content here").unwrap();
let extracted = vec![small_file.clone(), large_file.clone()];
let name = determine_final_name(job_name, nzb_meta, &extracted);
assert_eq!(name, "Movie.Name.2024.1080p");
let _ = fs::remove_dir_all(&temp_dir);
}
#[test]
fn test_determine_final_name_fallback_to_obfuscated_job_name() {
let job_name = "a3f8b2c9d1e5f7a4b6c8d0e2f4a6b8c0";
let nzb_meta = Some("550e8400-e29b-41d4-a716-446655440000");
let extracted = vec![PathBuf::from("xkcd1234mnbvcxz.mkv")];
let name = determine_final_name(job_name, nzb_meta, &extracted);
assert_eq!(name, "a3f8b2c9d1e5f7a4b6c8d0e2f4a6b8c0");
}
#[test]
fn test_determine_final_name_empty_extracted_files() {
let job_name = "Movie.Name.2024";
let extracted = vec![];
let name = determine_final_name(job_name, None, &extracted);
assert_eq!(name, "Movie.Name.2024");
}
#[test]
fn test_find_largest_file_basic() {
let temp_dir = std::env::temp_dir().join("usenet_dl_test_largest_basic");
let _ = fs::remove_dir_all(&temp_dir);
fs::create_dir_all(&temp_dir).unwrap();
let file1 = temp_dir.join("small.txt");
let file2 = temp_dir.join("large.mkv");
let file3 = temp_dir.join("medium.avi");
fs::write(&file1, b"small").unwrap();
fs::write(&file2, b"large content here with more bytes").unwrap();
fs::write(&file3, b"medium size").unwrap();
let files = vec![file1.clone(), file2.clone(), file3.clone()];
let largest = find_largest_file(&files);
assert_eq!(largest, Some(file2));
let _ = fs::remove_dir_all(&temp_dir);
}
#[test]
fn test_find_largest_file_empty_list() {
let files = vec![];
let largest = find_largest_file(&files);
assert_eq!(largest, None);
}
#[test]
fn test_find_largest_file_ignores_directories() {
let temp_dir = std::env::temp_dir().join("usenet_dl_test_largest_dirs");
let _ = fs::remove_dir_all(&temp_dir);
fs::create_dir_all(&temp_dir).unwrap();
let file = temp_dir.join("file.mkv");
let subdir = temp_dir.join("subdir");
fs::write(&file, b"content").unwrap();
fs::create_dir(&subdir).unwrap();
let files = vec![subdir.clone(), file.clone()];
let largest = find_largest_file(&files);
assert_eq!(largest, Some(file));
let _ = fs::remove_dir_all(&temp_dir);
}
#[test]
fn test_find_largest_file_nonexistent_files() {
let files = vec![
PathBuf::from("/nonexistent/file1.mkv"),
PathBuf::from("/nonexistent/file2.avi"),
];
let largest = find_largest_file(&files);
assert_eq!(largest, None);
}
#[test]
fn test_determine_final_name_with_extensions() {
let job_name = "Movie.Name.2024";
let extracted = vec![PathBuf::from("video.mkv")];
let name = determine_final_name(job_name, None, &extracted);
assert_eq!(name, "Movie.Name.2024");
}
}