use std::{
fs::File,
io::{Cursor, Read, Seek, SeekFrom},
ops::RangeInclusive,
path::Path,
};
use range_set::RangeSet;
use serde::{Deserialize, Serialize};
use sha2::{Digest, Sha256, Sha384, Sha512};
use crate::{utils::io_utils::stream_len, Error, Result};
const MAX_HASH_BUF: usize = 256 * 1024 * 1024;
#[derive(Clone, Serialize, Deserialize, Debug, PartialEq, Eq)]
pub struct HashRange {
start: u64,
length: u64,
#[serde(skip)]
bmff_offset: Option<u64>,
}
impl HashRange {
pub fn new(start: u64, length: u64) -> Self {
HashRange {
start,
length,
bmff_offset: None,
}
}
#[allow(dead_code)]
pub fn set_start(&mut self, start: u64) {
self.start = start;
}
pub fn start(&self) -> u64 {
self.start
}
pub fn length(&self) -> u64 {
self.length
}
pub fn set_length(&mut self, length: u64) {
self.length = length;
}
pub fn set_bmff_offset(&mut self, offset: u64) {
self.bmff_offset = Some(offset);
}
pub fn bmff_offset(&self) -> Option<u64> {
self.bmff_offset
}
}
pub fn vec_compare(va: &[u8], vb: &[u8]) -> bool {
(va.len() == vb.len()) && va.iter()
.zip(vb)
.all(|(a,b)| a == b)
}
#[derive(Clone)]
pub enum Hasher {
SHA256(Sha256),
SHA384(Sha384),
SHA512(Sha512),
}
impl Hasher {
pub fn update(&mut self, data: &[u8]) {
use Hasher::*;
match self {
SHA256(ref mut d) => d.update(data),
SHA384(ref mut d) => d.update(data),
SHA512(ref mut d) => d.update(data),
}
}
pub fn finalize(hasher_enum: Hasher) -> Vec<u8> {
use Hasher::*;
match hasher_enum {
SHA256(d) => d.finalize().to_vec(),
SHA384(d) => d.finalize().to_vec(),
SHA512(d) => d.finalize().to_vec(),
}
}
}
pub fn hash_by_alg(alg: &str, data: &[u8], exclusions: Option<Vec<HashRange>>) -> Vec<u8> {
let mut reader = Cursor::new(data);
hash_stream_by_alg(alg, &mut reader, exclusions, true).unwrap_or_default()
}
pub fn hash_by_alg_with_inclusions(alg: &str, data: &[u8], inclusions: Vec<HashRange>) -> Vec<u8> {
let mut reader = Cursor::new(data);
hash_stream_by_alg(alg, &mut reader, Some(inclusions), false).unwrap_or_default()
}
pub fn hash_asset_by_alg(
alg: &str,
asset_path: &Path,
exclusions: Option<Vec<HashRange>>,
) -> Result<Vec<u8>> {
let mut file = File::open(asset_path)?;
hash_stream_by_alg(alg, &mut file, exclusions, true)
}
pub fn hash_asset_by_alg_with_inclusions(
alg: &str,
asset_path: &Path,
inclusions: Vec<HashRange>,
) -> Result<Vec<u8>> {
let mut file = File::open(asset_path)?;
hash_stream_by_alg(alg, &mut file, Some(inclusions), false)
}
pub fn hash_stream_by_alg<R>(
alg: &str,
data: &mut R,
hash_range: Option<Vec<HashRange>>,
is_exclusion: bool,
) -> Result<Vec<u8>>
where
R: Read + Seek + ?Sized,
{
let mut bmff_v2_starts: Vec<u64> = Vec::new();
use Hasher::*;
let mut hasher_enum = match alg {
"sha256" => SHA256(Sha256::new()),
"sha384" => SHA384(Sha384::new()),
"sha512" => SHA512(Sha512::new()),
_ => {
return Err(Error::UnsupportedType);
}
};
let data_len = stream_len(data)?;
data.rewind()?;
if data_len < 1 {
return Err(Error::OtherError("no data to hash".into()));
}
let ranges = match hash_range {
Some(mut hr) if !hr.is_empty() => {
hr.sort_by_key(|a| a.start());
let num_blocks = hr.len();
let range_end = hr[num_blocks - 1].start() + hr[num_blocks - 1].length();
let data_end = data_len - 1;
if data_len < range_end {
return Err(Error::BadParam(
"The exclusion range exceed the data length".to_string(),
));
}
if is_exclusion {
let mut ranges_vec: Vec<RangeInclusive<u64>> = Vec::new();
let mut ranges = RangeSet::<[RangeInclusive<u64>; 1]>::from(0..=data_end);
for exclusion in hr {
if let Some(offset) = exclusion.bmff_offset() {
bmff_v2_starts.push(offset);
continue;
}
let end = exclusion.start() + exclusion.length() - 1;
let exclusion_start = exclusion.start();
ranges.remove_range(exclusion_start..=end);
}
if !bmff_v2_starts.is_empty() {
bmff_v2_starts.sort();
for r in ranges.into_smallvec() {
let mut current_range = r;
for os in &bmff_v2_starts {
if current_range.contains(os) {
if *current_range.start() == *os {
ranges_vec.push(RangeInclusive::new(*os, *os));
} else {
ranges_vec
.push(RangeInclusive::new(*current_range.start(), *os - 1)); ranges_vec.push(RangeInclusive::new(*os, *os)); current_range = RangeInclusive::new(*os, *current_range.end());
}
}
}
ranges_vec.push(current_range);
}
let range_start = RangeInclusive::new(0, 0);
let range_end = RangeInclusive::new(data_end, data_end);
let before_any_range = *ranges_vec.first().unwrap_or(&range_start).start();
let after_any_range = *ranges_vec.last().unwrap_or(&range_end).end();
for os in &bmff_v2_starts {
if !ranges_vec.iter().any(|r| r.contains(os))
&& *os > before_any_range
&& *os < after_any_range
{
ranges_vec.push(RangeInclusive::new(*os, *os));
}
}
ranges_vec.sort_by(|a, b| {
let a_start = a.start();
let b_start = b.start();
a_start.cmp(b_start)
});
ranges_vec
} else {
for r in ranges.into_smallvec() {
ranges_vec.push(r);
}
ranges_vec
}
} else {
let mut ranges_vec: Vec<RangeInclusive<u64>> = Vec::new();
for inclusion in hr {
let end = inclusion.start() + inclusion.length() - 1;
let inclusion_start = inclusion.start();
if let Some(offset) = inclusion.bmff_offset() {
ranges_vec.push(RangeInclusive::new(offset, offset));
bmff_v2_starts.push(offset);
}
ranges_vec.push(RangeInclusive::new(inclusion_start, end));
}
ranges_vec
}
}
_ => {
let mut ranges_vec: Vec<RangeInclusive<u64>> = Vec::new();
let data_end = data_len - 1;
ranges_vec.push(RangeInclusive::new(0_u64, data_end));
ranges_vec
}
};
if cfg!(feature = "no_interleaved_io") || cfg!(target_arch = "wasm32") {
for r in ranges {
let start = r.start();
let end = r.end();
let mut chunk_left = end - start + 1;
if bmff_v2_starts.contains(start) && end == start {
hasher_enum.update(&start.to_be_bytes());
continue;
}
data.seek(SeekFrom::Start(*start))?;
loop {
let mut chunk = vec![0u8; std::cmp::min(chunk_left as usize, MAX_HASH_BUF)];
data.read_exact(&mut chunk)?;
hasher_enum.update(&chunk);
chunk_left -= chunk.len() as u64;
if chunk_left == 0 {
break;
}
}
}
} else {
for r in ranges {
let start = r.start();
let end = r.end();
let mut chunk_left = end - start + 1;
if bmff_v2_starts.contains(start) && end == start {
hasher_enum.update(&start.to_be_bytes());
continue;
}
data.seek(SeekFrom::Start(*start))?;
let mut chunk = vec![0u8; std::cmp::min(chunk_left as usize, MAX_HASH_BUF)];
data.read_exact(&mut chunk)?;
loop {
let (tx, rx) = std::sync::mpsc::channel();
chunk_left -= chunk.len() as u64;
std::thread::spawn(move || {
hasher_enum.update(&chunk);
tx.send(hasher_enum).unwrap_or_default();
});
if chunk_left == 0 {
hasher_enum = match rx.recv() {
Ok(hasher) => hasher,
Err(_) => return Err(Error::ThreadReceiveError),
};
break;
}
let mut next_chunk = vec![0u8; std::cmp::min(chunk_left as usize, MAX_HASH_BUF)];
data.read_exact(&mut next_chunk)?;
hasher_enum = match rx.recv() {
Ok(hasher) => hasher,
Err(_) => return Err(Error::ThreadReceiveError),
};
chunk = next_chunk;
}
}
}
Ok(Hasher::finalize(hasher_enum))
}
pub fn verify_by_alg(
alg: &str,
hash: &[u8],
data: &[u8],
exclusions: Option<Vec<HashRange>>,
) -> bool {
let data_hash = hash_by_alg(alg, data, exclusions);
vec_compare(hash, &data_hash)
}
pub fn verify_asset_by_alg(
alg: &str,
hash: &[u8],
asset_path: &Path,
exclusions: Option<Vec<HashRange>>,
) -> bool {
if let Ok(data_hash) = hash_asset_by_alg(alg, asset_path, exclusions) {
vec_compare(hash, &data_hash)
} else {
false
}
}
pub fn verify_stream_by_alg<R>(
alg: &str,
hash: &[u8],
reader: &mut R,
hash_range: Option<Vec<HashRange>>,
is_exclusion: bool,
) -> bool
where
R: Read + Seek + ?Sized,
{
if let Ok(data_hash) = hash_stream_by_alg(alg, reader, hash_range, is_exclusion) {
vec_compare(hash, &data_hash)
} else {
false
}
}
pub fn concat_and_hash(alg: &str, left: &[u8], right: Option<&[u8]>) -> Vec<u8> {
let mut temp = left.to_vec();
if let Some(r) = right {
temp.append(&mut r.to_vec())
}
hash_by_alg(alg, &temp, None)
}