gix_features/
hash.rs

1//! Hash functions and hash utilities
2//!
3//! With the `fast-sha1` feature, the `Sha1` hash type will use a more elaborate implementation utilizing hardware support
4//! in case it is available. Otherwise the `rustsha1` feature should be set. `fast-sha1` will take precedence.
5//! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance.
6#[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))]
7mod _impl {
8    use super::Digest;
9
10    /// A implementation of the Sha1 hash, which can be used once.
11    #[derive(Default, Clone)]
12    pub struct Sha1(sha1_smol::Sha1);
13
14    impl Sha1 {
15        /// Digest the given `bytes`.
16        pub fn update(&mut self, bytes: &[u8]) {
17            self.0.update(bytes);
18        }
19        /// Finalize the hash and produce a digest.
20        pub fn digest(self) -> Digest {
21            self.0.digest().bytes()
22        }
23    }
24}
25
26/// A hash-digest produced by a [`Hasher`] hash implementation.
27#[cfg(any(feature = "fast-sha1", feature = "rustsha1"))]
28pub type Digest = [u8; 20];
29
30#[cfg(feature = "fast-sha1")]
31mod _impl {
32    use sha1::Digest;
33
34    /// A implementation of the Sha1 hash, which can be used once.
35    #[derive(Default, Clone)]
36    pub struct Sha1(sha1::Sha1);
37
38    impl Sha1 {
39        /// Digest the given `bytes`.
40        pub fn update(&mut self, bytes: &[u8]) {
41            self.0.update(bytes);
42        }
43        /// Finalize the hash and produce a digest.
44        pub fn digest(self) -> super::Digest {
45            self.0.finalize().into()
46        }
47    }
48}
49
50#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
51pub use _impl::Sha1 as Hasher;
52
53/// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash.
54///
55/// When calling this function for the first time, `previous_value` should be `0`. Otherwise it
56/// should be the previous return value of this function to provide a hash of multiple sequential
57/// chunks of `bytes`.
58#[cfg(feature = "crc32")]
59pub fn crc32_update(previous_value: u32, bytes: &[u8]) -> u32 {
60    let mut h = crc32fast::Hasher::new_with_initial(previous_value);
61    h.update(bytes);
62    h.finalize()
63}
64
65/// Compute a CRC32 value of the given input `bytes`.
66///
67/// In case multiple chunks of `bytes` are present, one should use [`crc32_update()`] instead.
68#[cfg(feature = "crc32")]
69pub fn crc32(bytes: &[u8]) -> u32 {
70    let mut h = crc32fast::Hasher::new();
71    h.update(bytes);
72    h.finalize()
73}
74
75/// Produce a hasher suitable for the given kind of hash.
76#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
77pub fn hasher(kind: gix_hash::Kind) -> Hasher {
78    match kind {
79        gix_hash::Kind::Sha1 => Hasher::default(),
80    }
81}
82
83/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
84/// while initializing and calling `progress`.
85///
86/// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself,
87/// denoting the amount of bytes to hash starting from the beginning of the file.
88///
89/// # Note
90///
91/// * Only available with the `gix-object` feature enabled due to usage of the [`gix_hash::Kind`] enum and the
92///   [`gix_hash::ObjectId`] return value.
93/// * [Interrupts][crate::interrupt] are supported.
94#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
95pub fn bytes_of_file(
96    path: &std::path::Path,
97    num_bytes_from_start: u64,
98    kind: gix_hash::Kind,
99    progress: &mut dyn crate::progress::Progress,
100    should_interrupt: &std::sync::atomic::AtomicBool,
101) -> std::io::Result<gix_hash::ObjectId> {
102    bytes(
103        &mut std::fs::File::open(path)?,
104        num_bytes_from_start,
105        kind,
106        progress,
107        should_interrupt,
108    )
109}
110
111/// Similar to [`bytes_of_file`], but operates on a stream of bytes.
112#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
113pub fn bytes(
114    read: &mut dyn std::io::Read,
115    num_bytes_from_start: u64,
116    kind: gix_hash::Kind,
117    progress: &mut dyn crate::progress::Progress,
118    should_interrupt: &std::sync::atomic::AtomicBool,
119) -> std::io::Result<gix_hash::ObjectId> {
120    bytes_with_hasher(read, num_bytes_from_start, hasher(kind), progress, should_interrupt)
121}
122
123/// Similar to [`bytes()`], but takes a `hasher` instead of a hash kind.
124#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
125pub fn bytes_with_hasher(
126    read: &mut dyn std::io::Read,
127    num_bytes_from_start: u64,
128    mut hasher: Hasher,
129    progress: &mut dyn crate::progress::Progress,
130    should_interrupt: &std::sync::atomic::AtomicBool,
131) -> std::io::Result<gix_hash::ObjectId> {
132    let start = std::time::Instant::now();
133    // init progress before the possibility for failure, as convenience in case people want to recover
134    progress.init(
135        Some(num_bytes_from_start as prodash::progress::Step),
136        crate::progress::bytes(),
137    );
138
139    const BUF_SIZE: usize = u16::MAX as usize;
140    let mut buf = [0u8; BUF_SIZE];
141    let mut bytes_left = num_bytes_from_start;
142
143    while bytes_left > 0 {
144        let out = &mut buf[..BUF_SIZE.min(bytes_left as usize)];
145        read.read_exact(out)?;
146        bytes_left -= out.len() as u64;
147        progress.inc_by(out.len());
148        hasher.update(out);
149        if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {
150            return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted"));
151        }
152    }
153
154    let id = gix_hash::ObjectId::from(hasher.digest());
155    progress.show_throughput(start);
156    Ok(id)
157}
158
159#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
160mod write {
161    use crate::hash::Hasher;
162
163    /// A utility to automatically generate a hash while writing into an inner writer.
164    pub struct Write<T> {
165        /// The hash implementation.
166        pub hash: Hasher,
167        /// The inner writer.
168        pub inner: T,
169    }
170
171    impl<T> std::io::Write for Write<T>
172    where
173        T: std::io::Write,
174    {
175        fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
176            let written = self.inner.write(buf)?;
177            self.hash.update(&buf[..written]);
178            Ok(written)
179        }
180
181        fn flush(&mut self) -> std::io::Result<()> {
182            self.inner.flush()
183        }
184    }
185
186    impl<T> Write<T>
187    where
188        T: std::io::Write,
189    {
190        /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`.
191        pub fn new(inner: T, object_hash: gix_hash::Kind) -> Self {
192            match object_hash {
193                gix_hash::Kind::Sha1 => Write {
194                    inner,
195                    hash: Hasher::default(),
196                },
197            }
198        }
199    }
200}
201#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
202pub use write::Write;