use std::fs::File;
use std::path::PathBuf;
use anyhow::{anyhow, Context, Result};
use crate::candidate_selection::collision_tracker::CollisionType::{Occupied, Terminal};
use crate::candidate_selection::collision_tracker::FollowUpWork::{Nothing, One, Two};
use crate::candidate_selection::messages::{FileWithDescriptor, HashComputedResult};
use crate::file_descriptor::FileDescriptor;
use rustc_hash::{FxHashMap, FxHashSet};
#[derive(Hash, Eq, PartialEq, Debug)]
struct PartialCollisionKey {
hash: u64,
offset: u64,
block_size: u64,
file_size: u64,
}
#[derive(Hash, Eq, PartialEq, Debug)]
pub struct FullCollisionKey {
pub file_size: u64,
hash: u64,
}
#[derive(Hash, Eq, PartialEq, Debug)]
enum CollisionType {
Terminal,
Occupied(FileDescriptor),
}
pub struct CollisionTracker {
pub handles: FxHashMap<FileDescriptor, FxHashSet<PathBuf>>,
pub duplicates: FxHashMap<FullCollisionKey, FxHashSet<FileDescriptor>>,
hits: FxHashMap<PartialCollisionKey, CollisionType>,
has_hash: FxHashSet<FileDescriptor>,
}
pub enum FollowUpWork {
Nothing,
One(FileWithDescriptor),
Two(FileWithDescriptor, FileWithDescriptor),
}
impl CollisionTracker {
pub fn new() -> Self {
CollisionTracker {
handles: FxHashMap::default(),
duplicates: FxHashMap::default(),
hits: FxHashMap::default(),
has_hash: FxHashSet::default(),
}
}
pub fn remove_file(&mut self, file_with_descriptor: FileWithDescriptor) {
if self
.has_hash
.contains(&file_with_descriptor.file_descriptor)
{
warn!("Removing partially hashed file {:?}", file_with_descriptor)
} else {
self.handles.remove(&file_with_descriptor.file_descriptor);
}
}
pub fn update_hash(
&mut self,
file_with_descriptor: FileWithDescriptor,
hash_result: &HashComputedResult,
) -> Result<FollowUpWork> {
self.has_hash.insert(file_with_descriptor.file_descriptor);
let block_size = hash_result.instructions.read_size;
let file_size = hash_result.instructions.file_size;
let offset = hash_result.instructions.offset;
let hash = hash_result.hash;
let file_descriptor = file_with_descriptor.file_descriptor;
let key = PartialCollisionKey {
hash,
offset,
block_size,
file_size,
};
if let Some(hit) = self.hits.get(&key) {
match hit {
Occupied(_) if offset + block_size < file_size => {
let hit = self.hits.insert(key, Terminal);
if let Some(Occupied(occupying_handle)) = hit {
trace!("Reopening known file");
let reopened_handle = self
.handles
.get(&occupying_handle)
.context("Expected FileDescriptor was not present")?
.iter()
.next()
.map(File::open)
.transpose()?
.map(|file| FileWithDescriptor {
file,
file_descriptor: occupying_handle,
})
.context("Could not reopen file")?;
trace!("Returning opened file");
Ok(Two(file_with_descriptor, reopened_handle))
} else {
Err(anyhow!("Unexpected mismatch in collision tracker"))
}
}
Occupied(_) => {
let dup_key = FullCollisionKey { file_size, hash };
let hit = self.hits.insert(key, Terminal);
if let Some(Occupied(occupying_handle)) = hit {
if let Some(set) = self.duplicates.get_mut(&dup_key) {
set.insert(file_descriptor);
set.insert(occupying_handle);
} else {
let mut set = FxHashSet::default();
set.insert(file_descriptor);
set.insert(occupying_handle);
self.duplicates.insert(dup_key, set);
}
}
Ok(Nothing)
}
Terminal if offset + block_size < file_size => Ok(One(file_with_descriptor)),
Terminal => {
let dup_key = FullCollisionKey { file_size, hash };
if let Some(set) = self.duplicates.get_mut(&dup_key) {
set.insert(file_descriptor);
}
Ok(Nothing)
}
}
} else {
self.hits.insert(key, Occupied(file_descriptor));
Ok(Nothing)
}
}
pub fn register_path(&mut self, file_descriptor: FileDescriptor, path: PathBuf) -> bool {
if let Some(filenames) = self.handles.get_mut(&file_descriptor) {
filenames.insert(path);
false
} else {
let mut filenames = FxHashSet::default();
filenames.insert(path);
self.handles.insert(file_descriptor, filenames);
true
}
}
}