gix_hash/
io.rs

1use crate::hasher;
2
3/// The error type for I/O operations that compute hashes.
4#[derive(Debug, thiserror::Error)]
5#[allow(missing_docs)]
6pub enum Error {
7    #[error(transparent)]
8    Io(#[from] std::io::Error),
9    #[error("Failed to hash data")]
10    Hasher(#[from] hasher::Error),
11}
12
13pub(super) mod _impl {
14    use crate::{hasher, io::Error, Hasher};
15
16    /// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
17    /// while initializing and calling `progress`.
18    ///
19    /// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself,
20    /// denoting the amount of bytes to hash starting from the beginning of the file.
21    ///
22    /// # Note
23    ///
24    /// * [Interrupts][gix_features::interrupt] are supported.
25    pub fn bytes_of_file(
26        path: &std::path::Path,
27        num_bytes_from_start: u64,
28        kind: crate::Kind,
29        progress: &mut dyn gix_features::progress::Progress,
30        should_interrupt: &std::sync::atomic::AtomicBool,
31    ) -> Result<crate::ObjectId, Error> {
32        bytes(
33            &mut std::fs::File::open(path)?,
34            num_bytes_from_start,
35            kind,
36            progress,
37            should_interrupt,
38        )
39    }
40
41    /// Similar to [`bytes_of_file`], but operates on a stream of bytes.
42    pub fn bytes(
43        read: &mut dyn std::io::Read,
44        num_bytes_from_start: u64,
45        kind: crate::Kind,
46        progress: &mut dyn gix_features::progress::Progress,
47        should_interrupt: &std::sync::atomic::AtomicBool,
48    ) -> Result<crate::ObjectId, Error> {
49        bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt)
50    }
51
52    /// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind.
53    pub fn bytes_with_hasher(
54        read: &mut dyn std::io::Read,
55        num_bytes_from_start: u64,
56        mut hasher: Hasher,
57        progress: &mut dyn gix_features::progress::Progress,
58        should_interrupt: &std::sync::atomic::AtomicBool,
59    ) -> Result<crate::ObjectId, Error> {
60        let start = std::time::Instant::now();
61        // init progress before the possibility for failure, as convenience in case people want to recover
62        progress.init(
63            Some(num_bytes_from_start as gix_features::progress::prodash::progress::Step),
64            gix_features::progress::bytes(),
65        );
66
67        const BUF_SIZE: usize = u16::MAX as usize;
68        let mut buf = [0u8; BUF_SIZE];
69        let mut bytes_left = num_bytes_from_start;
70
71        while bytes_left > 0 {
72            let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)];
73            read.read_exact(out)?;
74            bytes_left -= out.len() as u64;
75            progress.inc_by(out.len());
76            hasher.update(out);
77            if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {
78                return Err(std::io::Error::other("Interrupted").into());
79            }
80        }
81
82        let id = hasher.try_finalize()?;
83        progress.show_throughput(start);
84        Ok(id)
85    }
86
87    /// A utility to automatically generate a hash while writing into an inner writer.
88    pub struct Write<T> {
89        /// The hash implementation.
90        pub hash: Hasher,
91        /// The inner writer.
92        pub inner: T,
93    }
94
95    impl<T> std::io::Write for Write<T>
96    where
97        T: std::io::Write,
98    {
99        fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
100            let written = self.inner.write(buf)?;
101            self.hash.update(&buf[..written]);
102            Ok(written)
103        }
104
105        fn flush(&mut self) -> std::io::Result<()> {
106            self.inner.flush()
107        }
108    }
109
110    impl<T> Write<T>
111    where
112        T: std::io::Write,
113    {
114        /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`.
115        pub fn new(inner: T, object_hash: crate::Kind) -> Self {
116            match object_hash {
117                #[cfg(feature = "sha1")]
118                crate::Kind::Sha1 => Write {
119                    inner,
120                    hash: crate::hasher(object_hash),
121                },
122                #[cfg(feature = "sha256")]
123                crate::Kind::Sha256 => Write {
124                    inner,
125                    hash: crate::hasher(object_hash),
126                },
127            }
128        }
129    }
130}
131pub use _impl::Write;