dsc 0.1.3

dsc is a cli tool for finding and removing duplicate files on one or multiple file systems, while respecting your gitignore rules.
use std::fs::File;
use std::io::{Read, Seek, SeekFrom};

use anyhow::Result;

use crate::candidate_selection::hashing::HashInstructions;
use xxhash_rust::xxh3;

pub struct FileHasher {
    buffer: Vec<u8>,
}

impl FileHasher {
    pub fn new(buffer_size: usize) -> Self {
        FileHasher {
            buffer: vec![0u8; buffer_size],
        }
    }

    pub fn hash(&mut self, file: &mut File, instructions: &HashInstructions) -> Result<u64> {
        let seek = instructions.offset;
        let mut seed = instructions.seed;

        // We could use the block size - seek for the remaining size
        // but this may cause us to block when reading from a pipe
        let mut remaining = instructions.work_size();

        trace!("Start seek");
        file.seek(SeekFrom::Start(seek))?;

        while remaining > 0 {
            trace!("Start file.read");
            let read = file.read(&mut self.buffer)?;
            trace!("Finished file.read");

            if read > 0 {
                seed = xxh3::xxh3_64_with_seed(&self.buffer[..read], seed);
                let read = read as u64;

                if read > remaining {
                    remaining = 0
                } else {
                    remaining -= read;
                }
            } else {
                remaining = 0;
            }
        }

        Ok(seed)
    }
}