use std::{
fs::File,
io::{Cursor, Read, Seek, SeekFrom},
ops::RangeInclusive,
path::Path,
};
use range_set::RangeSet;
use serde::{Deserialize, Serialize};
use serde_json::Value;
use sha2::{Digest, Sha256, Sha384, Sha512};
use crate::{crypto::base64::encode, utils::io_utils::stream_len, Error, Result};
const MAX_HASH_BUF: usize = 256 * 1024 * 1024;
#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)]
pub struct HashRange {
start: u64,
length: u64,
#[serde(skip)]
bmff_offset: Option<u64>,
}
impl HashRange {
pub fn new(start: u64, length: u64) -> Self {
HashRange {
start,
length,
bmff_offset: None,
}
}
#[allow(dead_code)]
pub fn set_start(&mut self, start: u64) {
self.start = start;
}
pub fn start(&self) -> u64 {
self.start
}
pub fn length(&self) -> u64 {
self.length
}
pub fn set_length(&mut self, length: u64) {
self.length = length;
}
pub fn set_bmff_offset(&mut self, offset: u64) {
self.bmff_offset = Some(offset);
}
pub fn bmff_offset(&self) -> Option<u64> {
self.bmff_offset
}
}
pub fn vec_compare(va: &[u8], vb: &[u8]) -> bool {
(va.len() == vb.len()) && va.iter()
.zip(vb)
.all(|(a,b)| a == b)
}
#[derive(Clone, Debug)]
pub enum Hasher {
SHA256(Sha256),
SHA384(Sha384),
SHA512(Sha512),
}
impl Default for Hasher {
fn default() -> Self {
Hasher::SHA256(Sha256::new())
}
}
impl Hasher {
pub fn update(&mut self, data: &[u8]) {
use Hasher::*;
match self {
SHA256(ref mut d) => d.update(data),
SHA384(ref mut d) => d.update(data),
SHA512(ref mut d) => d.update(data),
}
}
pub fn finalize(hasher_enum: Hasher) -> Vec<u8> {
use Hasher::*;
match hasher_enum {
SHA256(d) => d.finalize().to_vec(),
SHA384(d) => d.finalize().to_vec(),
SHA512(d) => d.finalize().to_vec(),
}
}
pub fn finalize_reset(&mut self) -> Vec<u8> {
use Hasher::*;
match self {
SHA256(ref mut d) => d.finalize_reset().to_vec(),
SHA384(ref mut d) => d.finalize_reset().to_vec(),
SHA512(ref mut d) => d.finalize_reset().to_vec(),
}
}
pub fn new(alg: &str) -> Result<Hasher> {
match alg {
"sha256" => Ok(Hasher::SHA256(Sha256::new())),
"sha384" => Ok(Hasher::SHA384(Sha384::new())),
"sha512" => Ok(Hasher::SHA512(Sha512::new())),
_ => Err(Error::UnsupportedType),
}
}
}
pub fn hash_by_alg(alg: &str, data: &[u8], exclusions: Option<Vec<HashRange>>) -> Vec<u8> {
let mut reader = Cursor::new(data);
hash_stream_by_alg(alg, &mut reader, exclusions, true).unwrap_or_default()
}
pub fn hash_by_alg_with_inclusions(alg: &str, data: &[u8], inclusions: Vec<HashRange>) -> Vec<u8> {
let mut reader = Cursor::new(data);
hash_stream_by_alg(alg, &mut reader, Some(inclusions), false).unwrap_or_default()
}
pub fn hash_asset_by_alg(
alg: &str,
asset_path: &Path,
exclusions: Option<Vec<HashRange>>,
) -> Result<Vec<u8>> {
let mut file = File::open(asset_path)?;
hash_stream_by_alg(alg, &mut file, exclusions, true)
}
pub fn hash_asset_by_alg_with_inclusions(
alg: &str,
asset_path: &Path,
inclusions: Vec<HashRange>,
) -> Result<Vec<u8>> {
let mut file = File::open(asset_path)?;
hash_stream_by_alg(alg, &mut file, Some(inclusions), false)
}
pub(crate) fn hash_stream_by_alg_with_progress<R, F>(
alg: &str,
data: &mut R,
hash_range: Option<Vec<HashRange>>,
is_exclusion: bool,
progress: &mut F,
) -> Result<Vec<u8>>
where
R: Read + Seek + ?Sized,
F: FnMut(u32, u32) -> Result<()>,
{
let mut bmff_v2_starts: Vec<u64> = Vec::new();
use Hasher::*;
let mut hasher_enum = match alg {
"sha256" => SHA256(Sha256::new()),
"sha384" => SHA384(Sha384::new()),
"sha512" => SHA512(Sha512::new()),
_ => {
return Err(Error::UnsupportedType);
}
};
let data_len = stream_len(data)?;
data.rewind()?;
if data_len < 1 {
return Err(Error::OtherError("no data to hash".into()));
}
let ranges = match hash_range {
Some(mut hr) if !hr.is_empty() => {
hr.sort_by_key(|a| a.start());
let num_blocks = hr.len();
let range_end = hr[num_blocks - 1].start() + hr[num_blocks - 1].length();
let data_end = data_len - 1;
if data_len < range_end {
return Err(Error::BadParam(
"The exclusion range exceed the data length".to_string(),
));
}
if is_exclusion {
let mut ranges_vec: Vec<RangeInclusive<u64>> = Vec::new();
let mut ranges = RangeSet::<[RangeInclusive<u64>; 1]>::from(0..=data_end);
for exclusion in hr {
if let Some(offset) = exclusion.bmff_offset() {
bmff_v2_starts.push(offset);
continue;
}
if exclusion.length() == 0 {
continue;
}
let end = exclusion
.start()
.checked_add(exclusion.length())
.ok_or(Error::BadParam("No exclusion range".to_string()))?
.checked_sub(1)
.ok_or(Error::BadParam("No exclusion range".to_string()))?;
let exclusion_start = exclusion.start();
ranges.remove_range(exclusion_start..=end);
}
if !bmff_v2_starts.is_empty() {
bmff_v2_starts.sort();
for r in ranges.into_smallvec() {
let mut current_range = r;
for os in &bmff_v2_starts {
if current_range.contains(os) {
if *current_range.start() == *os {
ranges_vec.push(RangeInclusive::new(*os, *os));
} else {
ranges_vec
.push(RangeInclusive::new(*current_range.start(), *os - 1)); ranges_vec.push(RangeInclusive::new(*os, *os)); current_range = RangeInclusive::new(*os, *current_range.end());
}
}
}
ranges_vec.push(current_range);
}
let range_start = RangeInclusive::new(0, 0);
let range_end = RangeInclusive::new(data_end, data_end);
let before_any_range = *ranges_vec.first().unwrap_or(&range_start).start();
let after_any_range = *ranges_vec.last().unwrap_or(&range_end).end();
for os in &bmff_v2_starts {
if !ranges_vec.iter().any(|r| r.contains(os))
&& *os > before_any_range
&& *os < after_any_range
{
ranges_vec.push(RangeInclusive::new(*os, *os));
}
}
ranges_vec.sort_by(|a, b| {
let a_start = a.start();
let b_start = b.start();
a_start.cmp(b_start)
});
ranges_vec
} else {
for r in ranges.into_smallvec() {
ranges_vec.push(r);
}
ranges_vec
}
} else {
let mut ranges_vec: Vec<RangeInclusive<u64>> = Vec::new();
for inclusion in hr {
if inclusion.length() == 0 {
continue;
}
let end = inclusion.start() + inclusion.length() - 1;
let inclusion_start = inclusion.start();
if let Some(offset) = inclusion.bmff_offset() {
ranges_vec.push(RangeInclusive::new(offset, offset));
bmff_v2_starts.push(offset);
}
ranges_vec.push(RangeInclusive::new(inclusion_start, end));
}
ranges_vec
}
}
_ => {
let mut ranges_vec: Vec<RangeInclusive<u64>> = Vec::new();
let data_end = data_len - 1;
ranges_vec.push(RangeInclusive::new(0_u64, data_end));
ranges_vec
}
};
let total: u32 = ranges
.iter()
.map(|r| {
let len = r.end() - r.start() + 1;
(len as usize).div_ceil(MAX_HASH_BUF) as u32
})
.sum();
let mut step: u32 = 0;
if cfg!(target_arch = "wasm32") {
for r in ranges {
step += 1;
progress(step, total)?;
let start = r.start();
let end = r.end();
let mut chunk_left = end - start + 1;
if bmff_v2_starts.contains(start) && end == start {
hasher_enum.update(&start.to_be_bytes());
continue;
}
data.seek(SeekFrom::Start(*start))?;
loop {
let mut chunk = vec![0u8; std::cmp::min(chunk_left as usize, MAX_HASH_BUF)];
data.read_exact(&mut chunk)?;
hasher_enum.update(&chunk);
chunk_left -= chunk.len() as u64;
if chunk_left == 0 {
break;
}
step += 1;
progress(step, total)?;
}
}
} else {
for r in ranges {
step += 1;
progress(step, total)?;
let start = r.start();
let end = r.end();
let mut chunk_left = end - start + 1;
if bmff_v2_starts.contains(start) && end == start {
hasher_enum.update(&start.to_be_bytes());
continue;
}
data.seek(SeekFrom::Start(*start))?;
let mut chunk = vec![0u8; std::cmp::min(chunk_left as usize, MAX_HASH_BUF)];
data.read_exact(&mut chunk)?;
loop {
let (tx, rx) = std::sync::mpsc::channel();
chunk_left -= chunk.len() as u64;
std::thread::spawn(move || {
hasher_enum.update(&chunk);
tx.send(hasher_enum).unwrap_or_default();
});
if chunk_left == 0 {
hasher_enum = match rx.recv() {
Ok(hasher) => hasher,
Err(_) => return Err(Error::ThreadReceiveError),
};
break;
}
let mut next_chunk = vec![0u8; std::cmp::min(chunk_left as usize, MAX_HASH_BUF)];
data.read_exact(&mut next_chunk)?;
hasher_enum = match rx.recv() {
Ok(hasher) => hasher,
Err(_) => return Err(Error::ThreadReceiveError),
};
step += 1;
progress(step, total)?;
chunk = next_chunk;
}
}
}
Ok(Hasher::finalize(hasher_enum))
}
pub fn hash_stream_by_alg<R>(
alg: &str,
data: &mut R,
hash_range: Option<Vec<HashRange>>,
is_exclusion: bool,
) -> Result<Vec<u8>>
where
R: Read + Seek + ?Sized,
{
hash_stream_by_alg_with_progress(alg, data, hash_range, is_exclusion, &mut |_, _| Ok(()))
}
pub fn verify_by_alg(
alg: &str,
hash: &[u8],
data: &[u8],
exclusions: Option<Vec<HashRange>>,
) -> bool {
let data_hash = hash_by_alg(alg, data, exclusions);
vec_compare(hash, &data_hash)
}
pub fn verify_asset_by_alg(
alg: &str,
hash: &[u8],
asset_path: &Path,
exclusions: Option<Vec<HashRange>>,
) -> bool {
if let Ok(data_hash) = hash_asset_by_alg(alg, asset_path, exclusions) {
vec_compare(hash, &data_hash)
} else {
false
}
}
pub fn verify_stream_by_alg<R>(
alg: &str,
hash: &[u8],
reader: &mut R,
hash_range: Option<Vec<HashRange>>,
is_exclusion: bool,
) -> bool
where
R: Read + Seek + ?Sized,
{
if let Ok(data_hash) = hash_stream_by_alg(alg, reader, hash_range, is_exclusion) {
vec_compare(hash, &data_hash)
} else {
false
}
}
pub fn concat_and_hash(alg: &str, left: &[u8], right: Option<&[u8]>) -> Vec<u8> {
let mut temp = left.to_vec();
if let Some(r) = right {
temp.append(&mut r.to_vec())
}
hash_by_alg(alg, &temp, None)
}
pub fn hash_to_b64(mut value: Value) -> Value {
use std::collections::VecDeque;
let mut queue = VecDeque::new();
queue.push_back(&mut value);
while let Some(current) = queue.pop_front() {
match current {
Value::Object(obj) => {
for (_, v) in obj.iter_mut() {
if let Value::Array(hash_arr) = v {
if !hash_arr.is_empty() && hash_arr.iter().all(|x| x.is_number()) {
let mut hash_bytes = Vec::with_capacity(hash_arr.len());
for n in hash_arr.iter() {
if let Some(num) = n.as_u64() {
hash_bytes.push(num as u8);
}
}
*v = Value::String(encode(&hash_bytes));
}
}
queue.push_back(v);
}
}
Value::Array(arr) => {
for v in arr.iter_mut() {
queue.push_back(v);
}
}
_ => {}
}
}
value
}
#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used)]
use std::io::Cursor;
use super::*;
#[test]
fn progress_callback_is_called() {
let data = vec![0u8; 64];
let mut called = false;
let mut reader = Cursor::new(&data);
let mut cb = |_step, _total| {
called = true;
Ok(())
};
hash_stream_by_alg_with_progress("sha256", &mut reader, None, true, &mut cb).unwrap();
assert!(called, "progress callback should have been invoked");
}
#[test]
fn progress_callback_can_cancel() {
let data = vec![0u8; 64];
let mut reader = Cursor::new(&data);
let mut cb = |_step, _total| Err(Error::OperationCancelled);
let result = hash_stream_by_alg_with_progress("sha256", &mut reader, None, true, &mut cb);
assert!(
matches!(result, Err(Error::OperationCancelled)),
"expected OperationCancelled, got {result:?}"
);
}
}