gix_status/index_as_worktree/
traits.rs

1use std::{io::Read, sync::atomic::AtomicBool};
2
3use bstr::BStr;
4use gix_hash::ObjectId;
5use gix_index as index;
6use index::Entry;
7
8use crate::index_as_worktree::Error;
9
10/// Compares the content of two blobs in some way.
11pub trait CompareBlobs {
12    /// Output data produced by [`compare_blobs()`][CompareBlobs::compare_blobs()].
13    type Output;
14
15    /// Providing the underlying index `entry`, allow comparing a file in the worktree of size `worktree_blob_size`
16    /// and allow streaming its bytes using `data`.
17    /// If this function returns `None` the `entry` and the worktree blob are assumed to be identical.
18    /// Use `data` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself.
19    /// `buf` can be used to store additional data, and it can be assumed to be a cleared buffer.
20    fn compare_blobs<'a, 'b>(
21        &mut self,
22        entry: &gix_index::Entry,
23        worktree_blob_size: u64,
24        data: impl ReadData<'a>,
25        buf: &mut Vec<u8>,
26    ) -> Result<Option<Self::Output>, Error>;
27}
28
29/// Determine the status of a submodule, which always indicates that it changed if present.
30pub trait SubmoduleStatus {
31    /// The status result, describing in which way the submodule changed.
32    type Output;
33    /// A custom error that may occur while computing the submodule status.
34    type Error: std::error::Error + Send + Sync + 'static;
35
36    /// Compute the status of the submodule at `entry` and `rela_path`, or return `None` if no change was detected.
37    fn status(&mut self, entry: &gix_index::Entry, rela_path: &BStr) -> Result<Option<Self::Output>, Self::Error>;
38}
39
40/// Lazy borrowed access to worktree or blob data, with streaming support for worktree files.
41pub trait ReadData<'a> {
42    /// Returns the contents of this blob.
43    ///
44    /// This potentially performs IO and other expensive operations
45    /// and should only be called when necessary.
46    fn read_blob(self) -> Result<&'a [u8], Error>;
47
48    /// Stream a worktree file in such a manner that its content matches what would be put into git.
49    fn stream_worktree_file(self) -> Result<read_data::Stream<'a>, Error>;
50}
51
52///
53pub mod read_data {
54    use std::sync::atomic::Ordering;
55
56    use gix_filter::pipeline::convert::ToGitOutcome;
57
58    use crate::AtomicU64;
59
60    /// A stream with worktree file data.
61    pub struct Stream<'a> {
62        pub(crate) inner: ToGitOutcome<'a, std::fs::File>,
63        pub(crate) bytes: Option<&'a AtomicU64>,
64        pub(crate) len: Option<u64>,
65    }
66
67    impl<'a> Stream<'a> {
68        /// Return the underlying byte-buffer if there is one.
69        ///
70        /// If `None`, read from this instance like a stream.
71        /// Note that this method should only be called once to assure proper accounting of the amount of bytes read.
72        pub fn as_bytes(&self) -> Option<&'a [u8]> {
73            self.inner.as_bytes().map(|v| {
74                if let Some(bytes) = self.bytes {
75                    bytes.fetch_add(v.len() as u64, Ordering::Relaxed);
76                }
77                v
78            })
79        }
80
81        /// Return the size of the stream in bytes if it is known in advance.
82        pub fn size(&self) -> Option<u64> {
83            self.len
84        }
85    }
86
87    impl std::io::Read for Stream<'_> {
88        fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
89            let n = self.inner.read(buf)?;
90            if let Some(bytes) = self.bytes {
91                bytes.fetch_add(n as u64, Ordering::Relaxed);
92            }
93            Ok(n)
94        }
95    }
96}
97
98/// Compares to blobs by comparing their size and oid, and only looks at the file if
99/// the size matches, therefore it's very fast.
100#[derive(Clone)]
101pub struct FastEq;
102
103impl CompareBlobs for FastEq {
104    type Output = ();
105
106    // TODO: make all streaming IOPs interruptible.
107    fn compare_blobs<'a, 'b>(
108        &mut self,
109        entry: &Entry,
110        worktree_file_size: u64,
111        data: impl ReadData<'a>,
112        buf: &mut Vec<u8>,
113    ) -> Result<Option<Self::Output>, Error> {
114        // make sure to account for racily smudged entries here so that they don't always keep
115        // showing up as modified even after their contents have changed again, to a potentially
116        // unmodified state. That means that we want to ignore stat.size == 0 for non_empty_blobs.
117        if u64::from(entry.stat.size) != worktree_file_size && (entry.id.is_empty_blob() || entry.stat.size != 0) {
118            return Ok(Some(()));
119        }
120        HashEq
121            .compare_blobs(entry, worktree_file_size, data, buf)
122            .map(|opt| opt.map(|_| ()))
123    }
124}
125
126/// Compares files to blobs by *always* comparing their hashes.
127///
128/// Same as [`FastEq`] but does not contain a fast path for files with mismatched files and
129/// therefore always returns an OID that can be reused later.
130#[derive(Clone)]
131pub struct HashEq;
132
133impl CompareBlobs for HashEq {
134    type Output = ObjectId;
135
136    fn compare_blobs<'a, 'b>(
137        &mut self,
138        entry: &Entry,
139        _worktree_blob_size: u64,
140        data: impl ReadData<'a>,
141        buf: &mut Vec<u8>,
142    ) -> Result<Option<Self::Output>, Error> {
143        let mut stream = data.stream_worktree_file()?;
144        match stream.as_bytes() {
145            Some(buffer) => {
146                let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buffer);
147                Ok((entry.id != file_hash).then_some(file_hash))
148            }
149            None => {
150                let file_hash = match stream.size() {
151                    None => {
152                        stream.read_to_end(buf)?;
153                        gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buf)
154                    }
155                    Some(len) => gix_object::compute_stream_hash(
156                        entry.id.kind(),
157                        gix_object::Kind::Blob,
158                        &mut stream,
159                        len,
160                        &mut gix_features::progress::Discard,
161                        &AtomicBool::default(),
162                    )?,
163                };
164                Ok((entry.id != file_hash).then_some(file_hash))
165            }
166        }
167    }
168}