use anyhow::{bail, Context, Result};
use openssl::sha;
use regex::Regex;
use std::fs::OpenOptions;
use std::io::{self, stdin, stdout, BufRead, BufReader, Read, Write};
use crate::cmdline::*;
const MAX_CHUNK_SIZE: usize = 64 * 1024 * 1024;
pub fn stream_hash(config: StreamHashConfig) -> Result<()> {
let mut hash_file = OpenOptions::new()
.read(true)
.open(&config.hash_file)
.with_context(|| format!("opening {}", config.hash_file))?;
do_stream_hash(&mut hash_file, &mut stdin().lock(), &mut stdout().lock())
}
fn do_stream_hash(
hash_file: &mut impl Read,
input: &mut impl Read,
output: &mut impl Write,
) -> Result<()> {
let mut hash_file = BufReader::new(hash_file);
let mut line = String::new();
if hash_file
.read_line(&mut line)
.context("reading hash file")?
== 0
{
bail!("hash file is empty");
}
let captures = Regex::new(r"^stream-hash ([a-z0-9]+) ([0-9]+)\n$")
.expect("compiling RE")
.captures(&line)
.context("couldn't parse hash file header")?;
let hash_func = match captures
.get(1)
.expect("digest algorithm not found")
.as_str()
{
"sha256" => sha::sha256,
d => bail!("unknown digest algorithm {}", d),
};
let chunk_size = captures
.get(2)
.expect("chunk size not found")
.as_str()
.parse::<usize>()
.context("couldn't parse chunk size")?;
if chunk_size == 0 {
bail!("chunk size cannot be zero");
} else if chunk_size > MAX_CHUNK_SIZE {
bail!(
"chunk size {} is greater than maximum {}",
chunk_size,
MAX_CHUNK_SIZE
);
}
let mut buf = vec![0u8; chunk_size];
let mut offset: u64 = 0;
for line in hash_file.lines() {
let line = line.context("couldn't read hash from hash file")?;
let expected_hash =
hex::decode(&line).with_context(|| format!("couldn't decode hash: {line:?}"))?;
let mut count = 0;
loop {
count += match input.read(&mut buf[count..]) {
Ok(0) => break,
Ok(n) => n,
Err(ref e) if e.kind() == io::ErrorKind::Interrupted => continue,
Err(e) => return Err(e).context("reading input"),
};
}
if count == 0 {
bail!("premature end of input data at offset {}", offset);
}
let data = &buf[..count];
let found_hash = hash_func(data);
if expected_hash != found_hash {
bail!(
"hash mismatch at offset {}; expected {}, found {}",
offset,
hex::encode(expected_hash),
hex::encode(found_hash)
);
}
output.write_all(data).context("writing output")?;
offset += data.len() as u64;
}
if input.read(&mut buf[..1]).context("draining input")? != 0 {
bail!("found extra input data at offset {}", offset);
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
#[test]
fn test_stream_hash() {
struct Test {
hash_file: &'static str,
input: &'static str,
err: Option<&'static str>,
}
let tests = vec![
Test {
hash_file: "",
input: "",
err: Some("hash file is empty"),
},
Test {
hash_file: "aardvark\n",
input: "",
err: Some("couldn't parse hash file header"),
},
Test {
hash_file: " stream-hash sha256 1234 \n",
input: "",
err: Some("couldn't parse hash file header"),
},
Test {
hash_file: "stream-hash sha255 1234\n",
input: "",
err: Some("unknown digest algorithm sha255"),
},
Test {
hash_file: "stream-hash sha256 0\n",
input: "",
err: Some("chunk size cannot be zero"),
},
Test {
hash_file: "stream-hash sha256 134217728\n",
input: "",
err: Some("chunk size 134217728 is greater than maximum 67108864"),
},
Test {
hash_file: "stream-hash sha256 8\nasdf\n",
input: "",
err: Some("couldn't decode hash: \"asdf\""),
},
Test {
hash_file: "stream-hash sha256 8\n",
input: "",
err: None,
},
Test {
hash_file: "stream-hash sha256 8
3af36011654a7bc5159ecf41c610f1f7dbd9deb0d5638f8626db66f7b6467106
3af36011654a7bc5159ecf41c610f1f7dbd9deb0d5638f8626db66f7b6467106
",
input: "asdfasd\n",
err: Some("premature end of input data at offset 8"),
},
Test {
hash_file: "stream-hash sha256 8
3af36011654a7bc5159ecf41c610f1f7dbd9deb0d5638f8626db66f7b6467106
e1bc8d3ba4afc7e109612cb73acbdddac052c93025aa1f82942edabb7deb82a1
",
input: "asdfasd\nasdf\n",
err: Some("hash mismatch at offset 8; expected e1bc8d3ba4afc7e109612cb73acbdddac052c93025aa1f82942edabb7deb82a1, found d1bc8d3ba4afc7e109612cb73acbdddac052c93025aa1f82942edabb7deb82a1"),
},
Test {
hash_file: "stream-hash sha256 8
3af36011654a7bc5159ecf41c610f1f7dbd9deb0d5638f8626db66f7b6467106
",
input: "asdfasd\nqqq",
err: Some("found extra input data at offset 8"),
},
Test {
hash_file: "stream-hash sha256 8
3af36011654a7bc5159ecf41c610f1f7dbd9deb0d5638f8626db66f7b6467106
5f70ae29b3019ec851ef6b664b59d3fd88dda0de5eb58212ddbd97c65c3f8198
",
input: "asdfasd\nqwer\n",
err: None,
},
Test {
hash_file: "stream-hash sha256 8
3af36011654a7bc5159ecf41c610f1f7dbd9deb0d5638f8626db66f7b6467106
ef2323b075d71f44c62f62d37b29a5fc4f10c03579a3f6e5b00c2d9666a75e65
",
input: "asdfasd\nqwertyu\n",
err: None,
},
Test {
hash_file: "stream-hash sha256 8
688787d8ff144c502c7f5cffaafe2cc588d86079f9de88304c26b0cb99ce91c6",
input: "asd",
err: None,
},
];
for (i, test) in tests.iter().enumerate() {
let mut output: Vec<u8> = Vec::new();
match do_stream_hash(
&mut Cursor::new(&test.hash_file),
&mut Cursor::new(&test.input),
&mut output,
) {
Ok(_) => {
assert!(
test.err.is_none(),
"{}: expected error: {}",
i,
test.err.unwrap_or("-")
);
assert_eq!(test.input.as_bytes(), output.as_slice(), "{i}");
}
Err(e) => {
assert!(test.err.is_some(), "{i}: found error: {e}");
assert_eq!(&e.to_string(), test.err.unwrap(), "{i}");
}
}
}
}
}