use std::collections::BTreeMap;
use std::path::{Path, PathBuf};
use clap::Parser;
use hunch::{Confidence, Pipeline};
const MEDIA_EXTENSIONS: &[&str] = &[
"mkv", "mp4", "avi", "wmv", "flv", "ts", "m4v", "webm", "ogv", "mov", "mpg", "mpeg", "m2ts",
"iso", "img", "rmvb", "rm",
"srt", "ass", "ssa", "sub", "idx", "vtt", "sup", "smi",
];
#[derive(Parser)]
#[command(
name = "hunch",
about = "Fast, offline media filename parser — extract title, year, codec, and 40+ properties",
after_help = "EXAMPLES:
Parse a single file:
hunch 'Show.S01E03.720p.BluRay.x264-GROUP.mkv'
Parse with sibling context (improves title detection):
hunch 'S01E03.mkv' --context /path/to/show/
Batch-parse a single directory:
hunch --batch /path/to/show/ -j
Batch-parse an entire media library (RECOMMENDED):
hunch --batch /path/to/tv/ -r -j
The -r flag recurses into subdirectories and preserves the full
relative path (e.g. tv/Anime/Show/Extra/file.mkv). This gives
the parser critical context from directory names like 'tv/',
'Anime/', 'Season 1/' for accurate type detection.
Without -r, files in deep subdirectories lose their path context
and bonus content may be misclassified as movies."
)]
#[command(version)]
struct Cli {
#[arg(conflicts_with = "batch_dir")]
filename: Vec<String>,
#[arg(long = "context", value_name = "DIR", conflicts_with = "batch_dir")]
context_dir: Option<PathBuf>,
#[arg(long = "batch", value_name = "DIR", conflicts_with_all = ["context_dir", "filename"])]
batch_dir: Option<PathBuf>,
#[arg(short = 'r', long = "recursive", requires = "batch_dir")]
recursive: bool,
#[arg(short = 'j', long = "json")]
json: bool,
#[arg(short = 'v', long = "verbose")]
verbose: bool,
}
fn main() {
let cli = Cli::parse();
if cli.verbose {
env_logger::Builder::new()
.filter_module("hunch", log::LevelFilter::Debug)
.init();
} else {
env_logger::Builder::from_env(env_logger::Env::default().default_filter_or("off")).init();
}
let pipeline = Pipeline::new();
if let Some(ref batch_dir) = cli.batch_dir {
run_batch(&pipeline, batch_dir, cli.recursive, cli.json);
return;
}
if cli.filename.is_empty() {
eprintln!("Usage: hunch <filename>");
eprintln!(" hunch --batch <dir>");
std::process::exit(1);
}
let siblings: Vec<String> = if let Some(ref ctx_dir) = cli.context_dir {
list_media_files(ctx_dir)
.iter()
.filter_map(|p| p.file_name()?.to_str().map(String::from))
.collect()
} else {
Vec::new()
};
for filename in &cli.filename {
let result = if siblings.is_empty() {
pipeline.run(filename)
} else {
let sibs: Vec<&str> = siblings
.iter()
.filter(|s| s.as_str() != filename.as_str())
.map(|s| s.as_str())
.collect();
pipeline.run_with_context(filename, &sibs)
};
print_result(filename, &result, cli.json);
if !cli.json
&& result.confidence() == Confidence::Low
&& cli.context_dir.is_none()
&& cli.batch_dir.is_none()
{
eprintln!("\u{26a0} Low confidence result. Try: hunch --context . \"{filename}\"");
eprintln!(" (sibling files can improve title detection)");
}
}
}
fn run_batch(pipeline: &Pipeline, batch_dir: &Path, recursive: bool, json: bool) {
let files = if recursive {
list_media_files_recursive(batch_dir)
} else {
list_media_files(batch_dir)
};
if !recursive {
warn_if_subdirs_have_media(batch_dir);
}
if files.is_empty() {
eprintln!("No media files found in {}", batch_dir.display());
std::process::exit(1);
}
let batch_name = batch_dir.file_name().and_then(|n| n.to_str()).unwrap_or("");
let rel_paths: Vec<String> = files
.iter()
.filter_map(|p| {
let rel = p.strip_prefix(batch_dir).ok()?.to_str()?;
if batch_name.is_empty() {
Some(rel.to_string())
} else {
Some(format!("{batch_name}/{rel}"))
}
})
.collect();
let groups = group_by_parent(&rel_paths);
let mut dir_titles: BTreeMap<String, String> = BTreeMap::new();
for (parent_key, indices) in &groups {
let group_paths: Vec<&str> = indices.iter().map(|&i| rel_paths[i].as_str()).collect();
let fallback_title: Option<&str> = if recursive {
let dir_name = Path::new(parent_key)
.file_name()
.and_then(|n| n.to_str())
.unwrap_or("");
if is_inheritance_blocking_dir(dir_name) {
None
} else {
find_ancestor_title(parent_key, &dir_titles)
}
} else {
None
};
let mut group_titles: Vec<String> = Vec::new();
for (pos, &idx) in indices.iter().enumerate() {
let input = &rel_paths[idx];
let siblings: Vec<&str> = group_paths
.iter()
.enumerate()
.filter(|(j, _)| *j != pos)
.map(|(_, s)| *s)
.collect();
let result = pipeline.run_with_context_and_fallback(input, &siblings, fallback_title);
if let Some(title) = result.title() {
group_titles.push(title.to_string());
}
let display_name = if recursive {
input.as_str()
} else {
files[idx]
.file_name()
.and_then(|n| n.to_str())
.unwrap_or(input)
};
print_result(display_name, &result, json);
}
if recursive {
if let Some(title) = most_common_title(&group_titles) {
dir_titles.insert(parent_key.clone(), title);
}
}
}
}
fn group_by_parent(rel_paths: &[String]) -> BTreeMap<String, Vec<usize>> {
let mut groups: BTreeMap<String, Vec<usize>> = BTreeMap::new();
for (i, path) in rel_paths.iter().enumerate() {
let parent = Path::new(path)
.parent()
.and_then(|p| p.to_str())
.unwrap_or("")
.to_string();
groups.entry(parent).or_default().push(i);
}
groups
}
fn find_ancestor_title<'a>(
child_key: &str,
dir_titles: &'a BTreeMap<String, String>,
) -> Option<&'a str> {
let mut current = child_key;
loop {
let parent = Path::new(current)
.parent()
.and_then(|p| p.to_str())
.unwrap_or("");
if parent == current {
break;
}
if let Some(title) = dir_titles.get(parent) {
return Some(title.as_str());
}
current = parent;
}
None
}
fn most_common_title(titles: &[String]) -> Option<String> {
if titles.is_empty() {
return None;
}
let mut counts: BTreeMap<&str, usize> = BTreeMap::new();
for title in titles {
*counts.entry(title.as_str()).or_default() += 1;
}
counts
.into_iter()
.max_by_key(|(_, count)| *count)
.map(|(title, _)| title.to_string())
}
fn print_result(filename: &str, result: &hunch::HunchResult, json: bool) {
if json {
let mut map = result.to_flat_map();
map.insert(
"_filename".to_string(),
serde_json::Value::String(filename.to_string()),
);
match serde_json::to_string(&map) {
Ok(json) => println!("{json}"),
Err(e) => {
eprintln!("Error: failed to serialize result: {e}");
std::process::exit(1);
}
}
} else {
println!("{result}");
}
}
fn list_media_files(dir: &Path) -> Vec<PathBuf> {
let Ok(entries) = std::fs::read_dir(dir) else {
eprintln!("Error: cannot read directory {}", dir.display());
std::process::exit(1);
};
let mut files: Vec<PathBuf> = entries
.filter_map(|e| e.ok())
.filter_map(|e| {
let ft = e.file_type().ok()?;
if ft.is_symlink() || !ft.is_file() {
return None;
}
let path = e.path();
is_media_extension(&path).then_some(path)
})
.collect();
files.sort();
files
}
const MAX_WALK_DEPTH: usize = 32;
fn list_media_files_recursive(dir: &Path) -> Vec<PathBuf> {
let mut files = Vec::new();
walk_dir(dir, &mut files);
files.sort();
files
}
fn walk_dir(dir: &Path, out: &mut Vec<PathBuf>) {
walk_dir_inner(dir, out, 0);
}
fn walk_dir_inner(dir: &Path, out: &mut Vec<PathBuf>, depth: usize) {
if depth >= MAX_WALK_DEPTH {
return;
}
let Ok(entries) = std::fs::read_dir(dir) else {
return;
};
let mut dirs = Vec::new();
for entry in entries.filter_map(|e| e.ok()) {
let Ok(ft) = entry.file_type() else {
continue;
};
if ft.is_symlink() {
continue;
}
let path = entry.path();
if ft.is_file() && is_media_extension(&path) {
out.push(path);
} else if ft.is_dir() {
dirs.push(path);
}
}
dirs.sort();
for d in dirs {
walk_dir_inner(&d, out, depth + 1);
}
}
fn is_media_extension(path: &Path) -> bool {
path.extension()
.and_then(|ext| ext.to_str())
.is_some_and(|ext| {
MEDIA_EXTENSIONS
.iter()
.any(|me| me.eq_ignore_ascii_case(ext))
})
}
fn warn_if_subdirs_have_media(batch_dir: &Path) {
let Ok(entries) = std::fs::read_dir(batch_dir) else {
return;
};
let subdirs_with_media: Vec<String> = entries
.filter_map(|e| e.ok())
.filter(|e| {
e.file_type()
.map(|ft| ft.is_dir() && !ft.is_symlink())
.unwrap_or(false)
})
.filter(|e| dir_contains_media(&e.path()))
.filter_map(|e| e.file_name().to_str().map(String::from))
.collect();
if subdirs_with_media.is_empty() {
return;
}
let n = subdirs_with_media.len();
let dir_display = batch_dir.display();
eprintln!(
"hint: found media files in {n} subdirector{} being skipped. \
Use -r to include them\n \
with full path context (improves type detection and title extraction).\n \
Example: hunch --batch {dir_display} -r -j",
if n == 1 { "y" } else { "ies" },
);
}
fn dir_contains_media(dir: &Path) -> bool {
dir_contains_media_inner(dir, 0)
}
fn dir_contains_media_inner(dir: &Path, depth: usize) -> bool {
if depth >= MAX_WALK_DEPTH {
return false;
}
let Ok(entries) = std::fs::read_dir(dir) else {
return false;
};
let mut subdirs = Vec::new();
for entry in entries.filter_map(|e| e.ok()) {
let Ok(ft) = entry.file_type() else {
continue;
};
if ft.is_symlink() {
continue;
}
let path = entry.path();
if ft.is_file() && is_media_extension(&path) {
return true;
} else if ft.is_dir() {
subdirs.push(path);
}
}
subdirs
.iter()
.any(|d| dir_contains_media_inner(d, depth + 1))
}
fn is_inheritance_blocking_dir(name: &str) -> bool {
matches!(
name.to_ascii_lowercase().as_str(),
"sample"
| "samples"
| "subs"
| "subtitles"
| "featurettes"
| "extras"
| "extra"
| "specials"
| "bonus"
)
}