use crate::app::report::SessionScanTarget;
use crate::app::session_files::SessionFileFormat;
use eyre::{Result, WrapErr, eyre};
use std::fs::File;
use std::io::{Read, Seek, SeekFrom};
use std::path::Path;
const FNV_OFFSET_LEFT: u64 = 0xcbf2_9ce4_8422_2325;
const FNV_OFFSET_RIGHT: u64 = 0x8422_2325_cbf2_9ce4;
const FNV_PRIME: u64 = 0x0000_0100_0000_01b3;
const HASH_SAMPLE_BYTES: usize = 4 * 1024;
const HASH_SAMPLE_BYTES_U64: u64 = 4 * 1024;
#[derive(Clone, Copy, Debug, Eq, PartialEq)]
pub(super) struct ContentHash {
left: u64,
right: u64,
}
impl ContentHash {
pub(super) const fn new() -> Self {
Self {
left: FNV_OFFSET_LEFT,
right: FNV_OFFSET_RIGHT,
}
}
fn update(&mut self, bytes: &[u8]) {
for byte in bytes {
self.left ^= u64::from(*byte);
self.left = self.left.wrapping_mul(FNV_PRIME);
self.right ^= u64::from(*byte).rotate_left(1);
self.right = self.right.wrapping_mul(FNV_PRIME);
}
}
pub(super) fn encode(self) -> String {
format!("{:016x}{:016x}", self.left, self.right)
}
pub(super) fn decode(value: &str) -> Option<Self> {
if value.len() != 32 || !value.is_ascii() {
return None;
}
Some(Self {
left: u64::from_str_radix(&value[..16], 16).ok()?,
right: u64::from_str_radix(&value[16..], 16).ok()?,
})
}
}
#[derive(Default)]
pub(super) struct ParsedContentHash {
offset: u64,
head: Vec<u8>,
tail: Vec<u8>,
}
impl ParsedContentHash {
pub(super) fn observe(&mut self, bytes: &[u8]) {
let head_remaining = HASH_SAMPLE_BYTES.saturating_sub(self.head.len());
if head_remaining > 0 {
self.head
.extend_from_slice(&bytes[..bytes.len().min(head_remaining)]);
}
if bytes.len() >= HASH_SAMPLE_BYTES {
self.tail.clear();
self.tail
.extend_from_slice(&bytes[bytes.len() - HASH_SAMPLE_BYTES..]);
} else {
self.tail.extend_from_slice(bytes);
if self.tail.len() > HASH_SAMPLE_BYTES {
let excess = self.tail.len() - HASH_SAMPLE_BYTES;
self.tail.drain(..excess);
}
}
self.offset = self
.offset
.saturating_add(u64::try_from(bytes.len()).unwrap_or(u64::MAX));
}
pub(super) const fn offset(&self) -> u64 {
self.offset
}
pub(super) fn finish(self) -> ContentHash {
let mut hash = ContentHash::new();
hash.update(&self.offset.to_le_bytes());
hash.update(&self.head);
if self.offset > HASH_SAMPLE_BYTES_U64 {
hash.update(&[0xff]);
hash.update(&self.tail);
}
hash
}
}
pub(super) fn content_hash_prefix(path: &Path, offset: u64) -> Result<ContentHash> {
let mut file =
File::open(path).wrap_err_with(|| format!("failed to open {}", path.display()))?;
let mut hash = ContentHash::new();
hash.update(&offset.to_le_bytes());
let sample_bytes = HASH_SAMPLE_BYTES_U64;
let head_len = usize::try_from(offset.min(sample_bytes))
.wrap_err("hash sample length does not fit usize")?;
read_hash_sample(&mut file, head_len, &mut hash)?;
if offset > sample_bytes {
hash.update(&[0xff]);
file.seek(SeekFrom::Start(offset.saturating_sub(sample_bytes)))?;
read_hash_sample(&mut file, HASH_SAMPLE_BYTES, &mut hash)?;
}
Ok(hash)
}
fn read_hash_sample(file: &mut File, len: usize, hash: &mut ContentHash) -> Result<()> {
let mut remaining = len;
let mut buffer = [0_u8; HASH_SAMPLE_BYTES];
while remaining > 0 {
let read_len = remaining.min(buffer.len());
let bytes_read = file.read(&mut buffer[..read_len])?;
if bytes_read == 0 {
return Err(eyre!("file ended before indexed prefix"));
}
hash.update(&buffer[..bytes_read]);
remaining -= bytes_read;
}
Ok(())
}
pub(super) struct ObservedFile {
pub(super) path_key: String,
pub(super) metadata: FileMetadata,
pub(super) file_format: SessionFileFormat,
}
impl ObservedFile {
pub(super) fn from_target(target: &SessionScanTarget) -> Self {
Self {
path_key: target.path_key.clone(),
metadata: FileMetadata::from_target(target),
file_format: target.file_format,
}
}
}
#[derive(Clone)]
pub(super) struct FileMetadata {
pub(super) file_format: SessionFileFormat,
pub(super) size: u64,
pub(super) mtime_ns: Option<i64>,
pub(super) dev: Option<i64>,
pub(super) ino: Option<i64>,
pub(super) ctime_ns: Option<i64>,
}
impl FileMetadata {
fn from_target(target: &SessionScanTarget) -> Self {
Self {
file_format: target.file_format,
size: target.bytes,
mtime_ns: target.metadata.mtime_ns,
dev: target.metadata.dev,
ino: target.metadata.ino,
ctime_ns: target.metadata.ctime_ns,
}
}
pub(super) fn same_contents_as(&self, other: &Self) -> bool {
self.file_format == other.file_format
&& self.size == other.size
&& self.mtime_ns == other.mtime_ns
&& self.dev == other.dev
&& self.ino == other.ino
&& self.ctime_ns == other.ctime_ns
}
pub(super) fn same_identity_as(&self, other: &Self) -> bool {
match (self.dev, self.ino, other.dev, other.ino) {
(Some(left_dev), Some(left_ino), Some(right_dev), Some(right_ino)) => {
left_dev == right_dev && left_ino == right_ino
}
_ => true,
}
}
}