gix_hash/
oid.rs

1use std::hash;
2
3use crate::{Kind, ObjectId, SIZE_OF_SHA1_DIGEST};
4
5/// A borrowed reference to a hash identifying objects.
6///
7/// # Future Proofing
8///
9/// In case we wish to support multiple hashes with the same length we cannot discriminate
10/// using the slice length anymore. To make that work, we will use the high bits of the
11/// internal `bytes` slice length (a fat pointer, pointing to data and its length in bytes)
12/// to encode additional information. Before accessing or returning the bytes, a new adjusted
13/// slice will be constructed, while the high bits will be used to help resolving the
14/// hash [`kind()`][oid::kind()].
15/// We expect to have quite a few bits available for such 'conflict resolution' as most hashes aren't longer
16/// than 64 bytes.
17#[derive(PartialEq, Eq, Ord, PartialOrd)]
18#[repr(transparent)]
19#[allow(non_camel_case_types)]
20#[cfg_attr(feature = "serde", derive(serde::Serialize))]
21pub struct oid {
22    bytes: [u8],
23}
24
25// False positive:
26// Using an automatic implementation of `Hash` for `oid` would lead to
27// it attempting to hash the length of the slice first. On 32 bit systems
28// this can lead to issues with the custom `gix_hashtable` `Hasher` implementation,
29// and it currently ends up being discarded there anyway.
30#[allow(clippy::derived_hash_with_manual_eq)]
31impl hash::Hash for oid {
32    fn hash<H: hash::Hasher>(&self, state: &mut H) {
33        state.write(self.as_bytes());
34    }
35}
36
37/// A utility able to format itself with the given number of characters in hex.
38#[derive(PartialEq, Eq, Hash, Ord, PartialOrd)]
39pub struct HexDisplay<'a> {
40    inner: &'a oid,
41    hex_len: usize,
42}
43
44impl std::fmt::Display for HexDisplay<'_> {
45    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
46        let mut hex = Kind::hex_buf();
47        let hex = self.inner.hex_to_buf(hex.as_mut());
48        let max_len = hex.len();
49        f.write_str(&hex[..self.hex_len.min(max_len)])
50    }
51}
52
53impl std::fmt::Debug for oid {
54    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
55        write!(
56            f,
57            "{}({})",
58            match self.kind() {
59                Kind::Sha1 => "Sha1",
60            },
61            self.to_hex(),
62        )
63    }
64}
65
66/// The error returned when trying to convert a byte slice to an [`oid`] or [`ObjectId`]
67#[allow(missing_docs)]
68#[derive(Debug, thiserror::Error)]
69pub enum Error {
70    #[error("Cannot instantiate git hash from a digest of length {0}")]
71    InvalidByteSliceLength(usize),
72}
73
74/// Conversion
75impl oid {
76    /// Try to create a shared object id from a slice of bytes representing a hash `digest`
77    #[inline]
78    pub fn try_from_bytes(digest: &[u8]) -> Result<&Self, Error> {
79        match digest.len() {
80            20 => Ok(
81                #[allow(unsafe_code)]
82                unsafe {
83                    &*(std::ptr::from_ref::<[u8]>(digest) as *const oid)
84                },
85            ),
86            len => Err(Error::InvalidByteSliceLength(len)),
87        }
88    }
89
90    /// Create an OID from the input `value` slice without performing any safety check.
91    /// Use only once sure that `value` is a hash of valid length.
92    pub fn from_bytes_unchecked(value: &[u8]) -> &Self {
93        Self::from_bytes(value)
94    }
95
96    /// Only from code that statically assures correct sizes using array conversions.
97    pub(crate) fn from_bytes(value: &[u8]) -> &Self {
98        #[allow(unsafe_code)]
99        unsafe {
100            &*(std::ptr::from_ref::<[u8]>(value) as *const oid)
101        }
102    }
103}
104
105/// Access
106impl oid {
107    /// The kind of hash used for this instance.
108    #[inline]
109    pub fn kind(&self) -> Kind {
110        Kind::from_len_in_bytes(self.bytes.len())
111    }
112
113    /// The first byte of the hash, commonly used to partition a set of object ids.
114    #[inline]
115    pub fn first_byte(&self) -> u8 {
116        self.bytes[0]
117    }
118
119    /// Interpret this object id as raw byte slice.
120    #[inline]
121    pub fn as_bytes(&self) -> &[u8] {
122        &self.bytes
123    }
124
125    /// Return a type which can display itself in hexadecimal form with the `len` amount of characters.
126    #[inline]
127    pub fn to_hex_with_len(&self, len: usize) -> HexDisplay<'_> {
128        HexDisplay {
129            inner: self,
130            hex_len: len,
131        }
132    }
133
134    /// Return a type which displays this oid as hex in full.
135    #[inline]
136    pub fn to_hex(&self) -> HexDisplay<'_> {
137        HexDisplay {
138            inner: self,
139            hex_len: self.bytes.len() * 2,
140        }
141    }
142
143    /// Returns `true` if this hash consists of all null bytes.
144    #[inline]
145    #[doc(alias = "is_zero", alias = "git2")]
146    pub fn is_null(&self) -> bool {
147        match self.kind() {
148            Kind::Sha1 => &self.bytes == oid::null_sha1().as_bytes(),
149        }
150    }
151
152    /// Returns `true` if this hash is equal to an empty blob.
153    #[inline]
154    pub fn is_empty_blob(&self) -> bool {
155        match self.kind() {
156            Kind::Sha1 => &self.bytes == oid::empty_blob_sha1().as_bytes(),
157        }
158    }
159
160    /// Returns `true` if this hash is equal to an empty tree.
161    #[inline]
162    pub fn is_empty_tree(&self) -> bool {
163        match self.kind() {
164            Kind::Sha1 => &self.bytes == oid::empty_tree_sha1().as_bytes(),
165        }
166    }
167}
168
169/// Sha1 specific methods
170impl oid {
171    /// Write ourselves to the `out` in hexadecimal notation, returning the hex-string ready for display.
172    ///
173    /// **Panics** if the buffer isn't big enough to hold twice as many bytes as the current binary size.
174    #[inline]
175    #[must_use]
176    pub fn hex_to_buf<'a>(&self, buf: &'a mut [u8]) -> &'a mut str {
177        let num_hex_bytes = self.bytes.len() * 2;
178        faster_hex::hex_encode(&self.bytes, &mut buf[..num_hex_bytes]).expect("to count correctly")
179    }
180
181    /// Write ourselves to `out` in hexadecimal notation.
182    #[inline]
183    pub fn write_hex_to(&self, out: &mut dyn std::io::Write) -> std::io::Result<()> {
184        let mut hex = Kind::hex_buf();
185        let hex_len = self.hex_to_buf(&mut hex).len();
186        out.write_all(&hex[..hex_len])
187    }
188
189    /// Returns a Sha1 digest with all bytes being initialized to zero.
190    #[inline]
191    pub(crate) fn null_sha1() -> &'static Self {
192        oid::from_bytes([0u8; SIZE_OF_SHA1_DIGEST].as_ref())
193    }
194
195    /// Returns an oid representing the hash of an empty blob.
196    #[inline]
197    pub(crate) fn empty_blob_sha1() -> &'static Self {
198        oid::from_bytes(b"\xe6\x9d\xe2\x9b\xb2\xd1\xd6\x43\x4b\x8b\x29\xae\x77\x5a\xd8\xc2\xe4\x8c\x53\x91")
199    }
200
201    /// Returns an oid representing the hash of an empty tree.
202    #[inline]
203    pub(crate) fn empty_tree_sha1() -> &'static Self {
204        oid::from_bytes(b"\x4b\x82\x5d\xc6\x42\xcb\x6e\xb9\xa0\x60\xe5\x4b\xf8\xd6\x92\x88\xfb\xee\x49\x04")
205    }
206}
207
208impl AsRef<oid> for &oid {
209    fn as_ref(&self) -> &oid {
210        self
211    }
212}
213
214impl<'a> TryFrom<&'a [u8]> for &'a oid {
215    type Error = Error;
216
217    fn try_from(value: &'a [u8]) -> Result<Self, Self::Error> {
218        oid::try_from_bytes(value)
219    }
220}
221
222impl ToOwned for oid {
223    type Owned = ObjectId;
224
225    fn to_owned(&self) -> Self::Owned {
226        match self.kind() {
227            Kind::Sha1 => ObjectId::Sha1(self.bytes.try_into().expect("no bug in hash detection")),
228        }
229    }
230}
231
232impl<'a> From<&'a [u8; SIZE_OF_SHA1_DIGEST]> for &'a oid {
233    fn from(v: &'a [u8; SIZE_OF_SHA1_DIGEST]) -> Self {
234        oid::from_bytes(v.as_ref())
235    }
236}
237
238impl std::fmt::Display for &oid {
239    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
240        let mut buf = Kind::hex_buf();
241        f.write_str(self.hex_to_buf(&mut buf))
242    }
243}
244
245impl PartialEq<ObjectId> for &oid {
246    fn eq(&self, other: &ObjectId) -> bool {
247        *self == other.as_ref()
248    }
249}
250
251/// Manually created from a version that uses a slice, and we forcefully try to convert it into a borrowed array of the desired size
252/// Could be improved by fitting this into serde.
253/// Unfortunately the `serde::Deserialize` derive wouldn't work for borrowed arrays.
254#[cfg(feature = "serde")]
255impl<'de: 'a, 'a> serde::Deserialize<'de> for &'a oid {
256    fn deserialize<D>(deserializer: D) -> Result<Self, <D as serde::Deserializer<'de>>::Error>
257    where
258        D: serde::Deserializer<'de>,
259    {
260        struct __Visitor<'de: 'a, 'a> {
261            marker: std::marker::PhantomData<&'a oid>,
262            lifetime: std::marker::PhantomData<&'de ()>,
263        }
264        impl<'de: 'a, 'a> serde::de::Visitor<'de> for __Visitor<'de, 'a> {
265            type Value = &'a oid;
266            fn expecting(&self, __formatter: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
267                std::fmt::Formatter::write_str(__formatter, "tuple struct Digest")
268            }
269            #[inline]
270            fn visit_newtype_struct<__E>(self, __e: __E) -> std::result::Result<Self::Value, __E::Error>
271            where
272                __E: serde::Deserializer<'de>,
273            {
274                let __field0: &'a [u8] = match <&'a [u8] as serde::Deserialize>::deserialize(__e) {
275                    Ok(__val) => __val,
276                    Err(__err) => {
277                        return Err(__err);
278                    }
279                };
280                Ok(oid::try_from_bytes(__field0).expect("hash of known length"))
281            }
282            #[inline]
283            fn visit_seq<__A>(self, mut __seq: __A) -> std::result::Result<Self::Value, __A::Error>
284            where
285                __A: serde::de::SeqAccess<'de>,
286            {
287                let __field0 = match match serde::de::SeqAccess::next_element::<&'a [u8]>(&mut __seq) {
288                    Ok(__val) => __val,
289                    Err(__err) => {
290                        return Err(__err);
291                    }
292                } {
293                    Some(__value) => __value,
294                    None => {
295                        return Err(serde::de::Error::invalid_length(
296                            0usize,
297                            &"tuple struct Digest with 1 element",
298                        ));
299                    }
300                };
301                Ok(oid::try_from_bytes(__field0).expect("hash of known length"))
302            }
303        }
304        serde::Deserializer::deserialize_newtype_struct(
305            deserializer,
306            "Digest",
307            __Visitor {
308                marker: std::marker::PhantomData::<&'a oid>,
309                lifetime: std::marker::PhantomData,
310            },
311        )
312    }
313}