git_features/
hash.rs

1//! Hash functions and hash utilities
2//!
3//! With the `fast-sha1` feature, the `Sha1` hash type will use a more elaborate implementation utilizing hardware support
4//! in case it is available. Otherwise the `rustsha1` feature should be set. `fast-sha1` will take precedence.
5//! Otherwise, a minimal yet performant implementation is used instead for a decent trade-off between compile times and run-time performance.
6#[cfg(all(feature = "rustsha1", not(feature = "fast-sha1")))]
7mod _impl {
8    use super::Sha1Digest;
9
10    /// A implementation of the Sha1 hash, which can be used once.
11    #[derive(Default, Clone)]
12    pub struct Sha1(sha1_smol::Sha1);
13
14    impl Sha1 {
15        /// Digest the given `bytes`.
16        pub fn update(&mut self, bytes: &[u8]) {
17            self.0.update(bytes)
18        }
19        /// Finalize the hash and produce a digest.
20        pub fn digest(self) -> Sha1Digest {
21            self.0.digest().bytes()
22        }
23    }
24}
25
26/// A 20 bytes digest produced by a [`Sha1`] hash implementation.
27#[cfg(any(feature = "fast-sha1", feature = "rustsha1"))]
28pub type Sha1Digest = [u8; 20];
29
30#[cfg(feature = "fast-sha1")]
31mod _impl {
32    use sha1::Digest;
33
34    use super::Sha1Digest;
35
36    /// A implementation of the Sha1 hash, which can be used once.
37    #[derive(Default, Clone)]
38    pub struct Sha1(sha1::Sha1);
39
40    impl Sha1 {
41        /// Digest the given `bytes`.
42        pub fn update(&mut self, bytes: &[u8]) {
43            self.0.update(bytes)
44        }
45        /// Finalize the hash and produce a digest.
46        pub fn digest(self) -> Sha1Digest {
47            self.0.finalize().into()
48        }
49    }
50}
51
52#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
53pub use _impl::Sha1;
54
55/// Compute a CRC32 hash from the given `bytes`, returning the CRC32 hash.
56///
57/// When calling this function for the first time, `previous_value` should be `0`. Otherwise it
58/// should be the previous return value of this function to provide a hash of multiple sequential
59/// chunks of `bytes`.
60#[cfg(feature = "crc32")]
61pub fn crc32_update(previous_value: u32, bytes: &[u8]) -> u32 {
62    let mut h = crc32fast::Hasher::new_with_initial(previous_value);
63    h.update(bytes);
64    h.finalize()
65}
66
67/// Compute a CRC32 value of the given input `bytes`.
68///
69/// In case multiple chunks of `bytes` are present, one should use [`crc32_update()`] instead.
70#[cfg(feature = "crc32")]
71pub fn crc32(bytes: &[u8]) -> u32 {
72    let mut h = crc32fast::Hasher::new();
73    h.update(bytes);
74    h.finalize()
75}
76
77/// Produce a hasher suitable for the given kind of hash.
78#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
79pub fn hasher(kind: git_hash::Kind) -> Sha1 {
80    match kind {
81        git_hash::Kind::Sha1 => Sha1::default(),
82    }
83}
84
85/// Compute the hash of `kind` for the bytes in the file at `path`, hashing only the first `num_bytes_from_start`
86/// while initializing and calling `progress`.
87///
88/// `num_bytes_from_start` is useful to avoid reading trailing hashes, which are never part of the hash itself,
89/// denoting the amount of bytes to hash starting from the beginning of the file.
90///
91/// # Note
92///
93/// * Only available with the `git-object` feature enabled due to usage of the [`git_hash::Kind`] enum and the
94///   [`git_hash::ObjectId`] return value.
95/// * [Interrupts][crate::interrupt] are supported.
96#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
97pub fn bytes_of_file(
98    path: impl AsRef<std::path::Path>,
99    num_bytes_from_start: usize,
100    kind: git_hash::Kind,
101    progress: &mut impl crate::progress::Progress,
102    should_interrupt: &std::sync::atomic::AtomicBool,
103) -> std::io::Result<git_hash::ObjectId> {
104    bytes(
105        std::fs::File::open(path)?,
106        num_bytes_from_start,
107        kind,
108        progress,
109        should_interrupt,
110    )
111}
112
113/// Similar to [`bytes_of_file`], but operates on an already open file.
114#[cfg(all(feature = "progress", any(feature = "rustsha1", feature = "fast-sha1")))]
115pub fn bytes(
116    mut read: impl std::io::Read,
117    num_bytes_from_start: usize,
118    kind: git_hash::Kind,
119    progress: &mut impl crate::progress::Progress,
120    should_interrupt: &std::sync::atomic::AtomicBool,
121) -> std::io::Result<git_hash::ObjectId> {
122    let mut hasher = hasher(kind);
123    let start = std::time::Instant::now();
124    // init progress before the possibility for failure, as convenience in case people want to recover
125    progress.init(Some(num_bytes_from_start), crate::progress::bytes());
126
127    const BUF_SIZE: usize = u16::MAX as usize;
128    let mut buf = [0u8; BUF_SIZE];
129    let mut bytes_left = num_bytes_from_start;
130
131    while bytes_left > 0 {
132        let out = &mut buf[..BUF_SIZE.min(bytes_left)];
133        read.read_exact(out)?;
134        bytes_left -= out.len();
135        progress.inc_by(out.len());
136        hasher.update(out);
137        if should_interrupt.load(std::sync::atomic::Ordering::SeqCst) {
138            return Err(std::io::Error::new(std::io::ErrorKind::Other, "Interrupted"));
139        }
140    }
141
142    let id = git_hash::ObjectId::from(hasher.digest());
143    progress.show_throughput(start);
144    Ok(id)
145}
146
147#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
148mod write {
149    use crate::hash::Sha1;
150
151    /// A utility to automatically generate a hash while writing into an inner writer.
152    pub struct Write<T> {
153        /// The hash implementation.
154        pub hash: Sha1,
155        /// The inner writer.
156        pub inner: T,
157    }
158
159    impl<T> std::io::Write for Write<T>
160    where
161        T: std::io::Write,
162    {
163        fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
164            let written = self.inner.write(buf)?;
165            self.hash.update(&buf[..written]);
166            Ok(written)
167        }
168
169        fn flush(&mut self) -> std::io::Result<()> {
170            self.inner.flush()
171        }
172    }
173
174    impl<T> Write<T>
175    where
176        T: std::io::Write,
177    {
178        /// Create a new hash writer which hashes all bytes written to `inner` with a hash of `kind`.
179        pub fn new(inner: T, object_hash: git_hash::Kind) -> Self {
180            match object_hash {
181                git_hash::Kind::Sha1 => Write {
182                    inner,
183                    hash: Sha1::default(),
184                },
185            }
186        }
187    }
188}
189#[cfg(any(feature = "rustsha1", feature = "fast-sha1"))]
190pub use write::Write;