use anyhow::{Context, Result};
use blazehash::ads::enumerate_ads;
use blazehash::algorithm::Algorithm;
use blazehash::format::{write_csv, write_dfxml, write_json, write_jsonl, write_sumfile};
use blazehash::hash::{hash_file, FileHashResult};
use blazehash::manifest::{write_header, write_record};
use blazehash::output::make_writer;
use blazehash::resume::ResumeState;
use blazehash::walk::walk_and_hash;
use blazehash::walk_filter::WalkFilter;
use std::fs;
use std::io::Write;
use std::path::PathBuf;
use super::report_walk_errors;
pub struct HashOptions<'a> {
pub paths: &'a [PathBuf],
pub algorithms: &'a [Algorithm],
pub recursive: bool,
pub format: &'a str,
pub bare: bool,
pub resume: bool,
pub output: Option<&'a PathBuf>,
pub no_cache: bool,
pub no_gpu: bool,
pub filter: &'a WalkFilter,
pub nsrl: Option<&'a PathBuf>,
pub nsrl_exclude: bool,
pub sign: bool,
pub ads: bool,
}
pub fn run(opts: HashOptions<'_>) -> Result<()> {
let HashOptions {
paths,
algorithms,
recursive,
format,
bare,
resume,
output,
no_cache,
no_gpu,
filter,
nsrl,
nsrl_exclude,
sign,
ads,
} = opts;
let mut resume_state = load_resume_state(resume, output)?;
let append = resume && output.is_some_and(|p| p.exists());
let mut writer = make_writer(output.map(|p| p.as_path()), append)?;
#[allow(unused_mut)] let mut all_results = collect_results(
paths,
algorithms,
recursive,
&mut resume_state,
no_cache,
no_gpu,
filter,
ads,
)?;
#[cfg(feature = "nsrl")]
if let Some(nsrl_path) = nsrl {
let lookup = blazehash::nsrl::NsrlLookup::open(nsrl_path)?;
let mut known_count = 0usize;
all_results = all_results
.into_iter()
.filter(|r| {
let hash_val = r
.hashes
.get(&Algorithm::Sha256)
.or_else(|| r.hashes.get(&Algorithm::Md5))
.map(|s| s.as_str())
.unwrap_or("");
if lookup.lookup(hash_val) == blazehash::nsrl::NsrlResult::KnownGood {
eprintln!("[K] {} (NSRL known-good)", r.path.display());
known_count += 1;
!nsrl_exclude
} else {
true
}
})
.collect();
if known_count > 0 {
eprintln!("[K] {known_count} file(s) matched NSRL");
}
}
#[cfg(not(feature = "nsrl"))]
let _ = (nsrl, nsrl_exclude);
let needs_header = !(bare || append);
write_output(&mut writer, &all_results, algorithms, format, needs_header)?;
writer.flush()?;
if sign {
let output_path = output.ok_or_else(|| anyhow::anyhow!("--sign requires --output"))?;
blazehash::signing::sign(output_path)?;
}
Ok(())
}
fn load_resume_state(resume: bool, output: Option<&PathBuf>) -> Result<ResumeState> {
if !resume {
return Ok(ResumeState::new());
}
match output {
Some(p) if p.exists() => {
let content = fs::read_to_string(p)
.with_context(|| format!("failed to read manifest for resume: {}", p.display()))?;
ResumeState::from_manifest(&content)
}
_ => Ok(ResumeState::new()),
}
}
#[allow(clippy::too_many_arguments)]
fn collect_results(
paths: &[PathBuf],
algorithms: &[Algorithm],
recursive: bool,
resume_state: &mut ResumeState,
no_cache: bool,
no_gpu: bool,
filter: &WalkFilter,
ads: bool,
) -> Result<Vec<FileHashResult>> {
let mut all_results = Vec::new();
for path in paths {
if path.is_file() {
if resume_state.is_done(path) {
continue;
}
let result = hash_file(path, algorithms, no_cache, no_gpu)
.with_context(|| format!("failed to hash {}", path.display()))?;
resume_state.mark_done(path.clone());
if ads {
hash_ads_streams(path, algorithms, no_cache, no_gpu, &mut all_results);
}
all_results.push(result);
} else if path.is_dir() {
let output = walk_and_hash(path, algorithms, recursive, filter)?;
report_walk_errors(&output.errors);
for r in output.results {
if resume_state.is_done(&r.path) {
continue;
}
if ads {
hash_ads_streams(&r.path, algorithms, no_cache, no_gpu, &mut all_results);
}
resume_state.mark_done(r.path.clone());
all_results.push(r);
}
}
}
Ok(all_results)
}
fn hash_ads_streams(
path: &std::path::Path,
algorithms: &[Algorithm],
no_cache: bool,
no_gpu: bool,
results: &mut Vec<FileHashResult>,
) {
for stream_path in enumerate_ads(path) {
match hash_file(&stream_path, algorithms, no_cache, no_gpu) {
Ok(r) => results.push(r),
Err(e) => eprintln!("[!] Failed to hash ADS {}: {e}", stream_path.display()),
}
}
}
fn write_output<W: Write>(
writer: &mut W,
results: &[FileHashResult],
algorithms: &[Algorithm],
format: &str,
needs_header: bool,
) -> Result<()> {
match format {
"csv" => write_csv(writer, results, algorithms)?,
"dfxml" => write_dfxml(writer, results, algorithms)?,
"json" => write_json(writer, results, algorithms)?,
"jsonl" => write_jsonl(writer, results, algorithms)?,
"sha256sum" | "md5sum" => write_sumfile(writer, results, algorithms)?,
_ => {
if needs_header {
write_header(writer, algorithms)?;
}
for result in results {
write_record(writer, result, algorithms)?;
}
}
}
Ok(())
}