use anyhow::{bail, ensure};
use clap::Parser;
use std::cmp;
use std::fs::File;
use std::io;
use std::io::prelude::*;
use std::path::{Path, PathBuf};
#[cfg(test)]
mod unit_tests;
const NAME: &str = "b3sum";
const DERIVE_KEY_ARG: &str = "derive_key";
const KEYED_ARG: &str = "keyed";
const LENGTH_ARG: &str = "length";
const NO_NAMES_ARG: &str = "no_names";
const RAW_ARG: &str = "raw";
const TAG_ARG: &str = "tag";
const CHECK_ARG: &str = "check";
#[derive(Parser)]
#[command(version, max_term_width(100))]
struct Inner {
file: Vec<PathBuf>,
#[arg(long, requires("file"))]
keyed: bool,
#[arg(long, value_name("CONTEXT"), conflicts_with(KEYED_ARG))]
derive_key: Option<String>,
#[arg(
short,
long,
default_value_t = blake3::OUT_LEN as u64,
value_name("LEN")
)]
length: u64,
#[arg(long, default_value_t = 0, value_name("SEEK"))]
seek: u64,
#[arg(long, value_name("NUM"))]
num_threads: Option<usize>,
#[arg(long)]
no_mmap: bool,
#[arg(long)]
no_names: bool,
#[arg(long)]
raw: bool,
#[arg(long)]
tag: bool,
#[arg(
short,
long,
conflicts_with(DERIVE_KEY_ARG),
conflicts_with(KEYED_ARG),
conflicts_with(LENGTH_ARG),
conflicts_with(RAW_ARG),
conflicts_with(TAG_ARG),
conflicts_with(NO_NAMES_ARG)
)]
check: bool,
#[arg(long, requires(CHECK_ARG))]
quiet: bool,
}
struct Args {
inner: Inner,
file_args: Vec<PathBuf>,
base_hasher: blake3::Hasher,
}
impl Args {
fn parse() -> anyhow::Result<Self> {
let inner = Inner::parse_from(wild::args_os());
let file_args = if !inner.file.is_empty() {
inner.file.clone()
} else {
vec!["-".into()]
};
if inner.raw && file_args.len() > 1 {
bail!("Only one filename can be provided when using --raw");
}
let base_hasher = if inner.keyed {
blake3::Hasher::new_keyed(&read_key_from_stdin()?)
} else if let Some(ref context) = inner.derive_key {
blake3::Hasher::new_derive_key(context)
} else {
blake3::Hasher::new()
};
Ok(Self {
inner,
file_args,
base_hasher,
})
}
fn num_threads(&self) -> Option<usize> {
self.inner.num_threads
}
fn check(&self) -> bool {
self.inner.check
}
fn raw(&self) -> bool {
self.inner.raw
}
fn tag(&self) -> bool {
self.inner.tag
}
fn no_mmap(&self) -> bool {
self.inner.no_mmap
}
fn no_names(&self) -> bool {
self.inner.no_names
}
fn len(&self) -> u64 {
self.inner.length
}
fn seek(&self) -> u64 {
self.inner.seek
}
fn keyed(&self) -> bool {
self.inner.keyed
}
fn quiet(&self) -> bool {
self.inner.quiet
}
}
fn hash_path(args: &Args, path: &Path) -> anyhow::Result<blake3::OutputReader> {
let mut hasher = args.base_hasher.clone();
if path == Path::new("-") {
if args.keyed() {
bail!("Cannot open `-` in keyed mode");
}
hasher.update_reader(io::stdin().lock())?;
} else if args.no_mmap() {
hasher.update_reader(File::open(path)?)?;
} else {
hasher.update_mmap_rayon(path)?;
}
let mut output_reader = hasher.finalize_xof();
output_reader.set_position(args.seek());
Ok(output_reader)
}
fn write_hex_output(mut output: blake3::OutputReader, args: &Args) -> anyhow::Result<()> {
let mut len = args.len();
let mut block = [0; blake3::BLOCK_LEN];
while len > 0 {
output.fill(&mut block);
let hex_str = hex::encode(&block[..]);
let take_bytes = cmp::min(len, block.len() as u64);
print!("{}", &hex_str[..2 * take_bytes as usize]);
len -= take_bytes;
}
Ok(())
}
fn write_raw_output(output: blake3::OutputReader, args: &Args) -> anyhow::Result<()> {
let mut output = output.take(args.len());
let stdout = std::io::stdout();
let mut handler = stdout.lock();
std::io::copy(&mut output, &mut handler)?;
Ok(())
}
fn read_key_from_stdin() -> anyhow::Result<[u8; blake3::KEY_LEN]> {
let mut bytes = Vec::with_capacity(blake3::KEY_LEN + 1);
let n = std::io::stdin()
.lock()
.take(blake3::KEY_LEN as u64 + 1)
.read_to_end(&mut bytes)?;
if n < blake3::KEY_LEN {
bail!(
"expected {} key bytes from stdin, found {}",
blake3::KEY_LEN,
n,
)
} else if n > blake3::KEY_LEN {
bail!("read more than {} key bytes from stdin", blake3::KEY_LEN)
} else {
Ok(bytes[..blake3::KEY_LEN].try_into().unwrap())
}
}
struct FilepathString {
filepath_string: String,
is_escaped: bool,
}
fn filepath_to_string(filepath: &Path) -> FilepathString {
let unicode_cow = filepath.to_string_lossy();
let mut filepath_string = unicode_cow.to_string();
if cfg!(windows) {
filepath_string = filepath_string.replace('\\', "/");
}
let mut is_escaped = false;
if filepath_string.contains(['\\', '\n', '\r']) {
filepath_string = filepath_string
.replace('\\', "\\\\")
.replace('\n', "\\n")
.replace('\r', "\\r");
is_escaped = true;
}
FilepathString {
filepath_string,
is_escaped,
}
}
fn hex_half_byte(c: char) -> anyhow::Result<u8> {
if '0' <= c && c <= '9' {
return Ok(c as u8 - '0' as u8);
}
if 'a' <= c && c <= 'f' {
return Ok(c as u8 - 'a' as u8 + 10);
}
bail!("Invalid hex");
}
fn check_for_invalid_characters(utf8_path: &str) -> anyhow::Result<()> {
if utf8_path.contains('\0') {
bail!("Null character in path");
}
if utf8_path.contains('�') {
bail!("Unicode replacement character in path");
}
if cfg!(windows) && utf8_path.contains('\\') {
bail!("Backslash in path");
}
Ok(())
}
fn unescape(mut path: &str) -> anyhow::Result<String> {
let mut unescaped = String::with_capacity(2 * path.len());
while let Some(i) = path.find('\\') {
ensure!(i < path.len() - 1, "Invalid backslash escape");
unescaped.push_str(&path[..i]);
match path[i + 1..].chars().next().unwrap() {
'n' => unescaped.push_str("\n"),
'r' => unescaped.push_str("\r"),
'\\' => unescaped.push_str("\\"),
_ => bail!("Invalid backslash escape"),
}
path = &path[i + 2..];
}
unescaped.push_str(path);
Ok(unescaped)
}
#[derive(Debug)]
struct ParsedCheckLine {
file_string: String,
is_escaped: bool,
file_path: PathBuf,
expected_hash: blake3::Hash,
}
fn split_untagged_check_line(line_after_slash: &str) -> Option<(&str, &str)> {
line_after_slash.split_once(" ")
}
fn split_tagged_check_line(line_after_slash: &str) -> Option<(&str, &str)> {
let prefix = "BLAKE3 (";
if !line_after_slash.starts_with(prefix) {
return None;
}
line_after_slash[prefix.len()..].rsplit_once(") = ")
}
fn parse_check_line(mut line: &str) -> anyhow::Result<ParsedCheckLine> {
line = line.trim_end_matches(['\r', '\n']);
let Some(first) = line.chars().next() else {
bail!("Empty line");
};
let line_after_slash;
let is_escaped;
if first == '\\' {
is_escaped = true;
line_after_slash = &line[1..];
} else {
is_escaped = false;
line_after_slash = line;
}
let hash_hex;
let file_str;
if let Some((left, right)) = split_untagged_check_line(line_after_slash) {
hash_hex = left;
file_str = right;
} else if let Some((left, right)) = split_tagged_check_line(line_after_slash) {
file_str = left;
hash_hex = right;
} else {
bail!("Invalid check line format");
}
ensure!(hash_hex.len() == 2 * blake3::OUT_LEN, "Invalid hash length");
let mut hex_chars = hash_hex.chars();
let mut hash_bytes = [0; blake3::OUT_LEN];
for byte in &mut hash_bytes {
let high_char = hex_chars.next().unwrap();
let low_char = hex_chars.next().unwrap();
*byte = 16 * hex_half_byte(high_char)? + hex_half_byte(low_char)?;
}
let expected_hash: blake3::Hash = hash_bytes.into();
let file_path_string = if is_escaped {
unescape(file_str)?
} else {
file_str.to_string()
};
ensure!(!file_path_string.is_empty(), "empty file path");
check_for_invalid_characters(&file_path_string)?;
Ok(ParsedCheckLine {
file_string: file_str.to_string(),
is_escaped,
file_path: file_path_string.into(),
expected_hash,
})
}
fn hash_one_input(path: &Path, args: &Args) -> anyhow::Result<()> {
let output = hash_path(args, path)?;
if args.raw() {
write_raw_output(output, args)?;
return Ok(());
}
if args.no_names() {
write_hex_output(output, args)?;
println!();
return Ok(());
}
let FilepathString {
filepath_string,
is_escaped,
} = filepath_to_string(path);
if is_escaped {
print!("\\");
}
if args.tag() {
print!("BLAKE3 ({}) = ", filepath_string);
write_hex_output(output, args)?;
println!();
return Ok(());
}
write_hex_output(output, args)?;
println!(" {}", filepath_string);
Ok(())
}
fn check_one_line(line: &str, args: &Args) -> bool {
let parse_result = parse_check_line(&line);
let ParsedCheckLine {
file_string,
is_escaped,
file_path,
expected_hash,
} = match parse_result {
Ok(parsed) => parsed,
Err(e) => {
eprintln!("{}: {}", NAME, e);
return false;
}
};
let file_string = if is_escaped {
"\\".to_string() + &file_string
} else {
file_string
};
let found_hash: blake3::Hash;
match hash_path(args, &file_path) {
Ok(mut output) => {
let mut found_hash_bytes = [0; blake3::OUT_LEN];
output.fill(&mut found_hash_bytes);
found_hash = found_hash_bytes.into();
}
Err(e) => {
println!("{}: FAILED ({})", file_string, e);
return false;
}
};
if expected_hash == found_hash {
if !args.quiet() {
println!("{}: OK", file_string);
}
true
} else {
println!("{}: FAILED", file_string);
false
}
}
fn check_one_checkfile(path: &Path, args: &Args, files_failed: &mut u64) -> anyhow::Result<()> {
let mut file;
let stdin;
let mut stdin_lock;
let mut bufreader: io::BufReader<&mut dyn Read>;
if path == Path::new("-") {
stdin = io::stdin();
stdin_lock = stdin.lock();
bufreader = io::BufReader::new(&mut stdin_lock);
} else {
file = File::open(path)?;
bufreader = io::BufReader::new(&mut file);
}
let mut line = String::new();
loop {
line.clear();
let n = bufreader.read_line(&mut line)?;
if n == 0 {
return Ok(());
}
let success = check_one_line(&line, args);
if !success {
*files_failed = files_failed.saturating_add(1);
}
}
}
fn main() -> anyhow::Result<()> {
let args = Args::parse()?;
let mut thread_pool_builder = rayon_core::ThreadPoolBuilder::new();
if let Some(num_threads) = args.num_threads() {
thread_pool_builder = thread_pool_builder.num_threads(num_threads);
}
let thread_pool = thread_pool_builder.build()?;
thread_pool.install(|| {
let mut files_failed = 0u64;
for path in &args.file_args {
if args.check() {
check_one_checkfile(path, &args, &mut files_failed)?;
} else {
let result = hash_one_input(path, &args);
if let Err(e) = result {
files_failed = files_failed.saturating_add(1);
eprintln!("{}: {}: {}", NAME, path.to_string_lossy(), e);
}
}
}
if args.check() && files_failed > 0 {
eprintln!(
"{}: WARNING: {} computed checksum{} did NOT match",
NAME,
files_failed,
if files_failed == 1 { "" } else { "s" },
);
}
std::process::exit(if files_failed > 0 { 1 } else { 0 });
})
}
#[cfg(test)]
mod test {
use clap::CommandFactory;
#[test]
fn test_args() {
crate::Inner::command().debug_assert();
}
}