gix_status/index_as_worktree/traits.rs
1use std::{io::Read, sync::atomic::AtomicBool};
2
3use bstr::BStr;
4use gix_hash::ObjectId;
5use gix_index as index;
6use index::Entry;
7
8use crate::index_as_worktree::Error;
9
10/// Compares the content of two blobs in some way.
11pub trait CompareBlobs {
12 /// Output data produced by [`compare_blobs()`][CompareBlobs::compare_blobs()].
13 type Output;
14
15 /// Providing the underlying index `entry`, allow comparing a file in the worktree of size `worktree_blob_size`
16 /// and allow streaming its bytes using `data`.
17 /// If this function returns `None` the `entry` and the worktree blob are assumed to be identical.
18 /// Use `data` to obtain the data for the blob referred to by `entry`, allowing comparisons of the data itself.
19 /// `buf` can be used to store additional data, and it can be assumed to be a cleared buffer.
20 fn compare_blobs<'a, 'b>(
21 &mut self,
22 entry: &gix_index::Entry,
23 worktree_blob_size: u64,
24 data: impl ReadData<'a>,
25 buf: &mut Vec<u8>,
26 ) -> Result<Option<Self::Output>, Error>;
27}
28
29/// Determine the status of a submodule, which always indicates that it changed if present.
30pub trait SubmoduleStatus {
31 /// The status result, describing in which way the submodule changed.
32 type Output;
33 /// A custom error that may occur while computing the submodule status.
34 type Error: std::error::Error + Send + Sync + 'static;
35
36 /// Compute the status of the submodule at `entry` and `rela_path`, or return `None` if no change was detected.
37 fn status(&mut self, entry: &gix_index::Entry, rela_path: &BStr) -> Result<Option<Self::Output>, Self::Error>;
38}
39
40/// Lazy borrowed access to worktree or blob data, with streaming support for worktree files.
41pub trait ReadData<'a> {
42 /// Returns the contents of this blob.
43 ///
44 /// This potentially performs IO and other expensive operations
45 /// and should only be called when necessary.
46 fn read_blob(self) -> Result<&'a [u8], Error>;
47
48 /// Stream a worktree file in such a manner that its content matches what would be put into git.
49 fn stream_worktree_file(self) -> Result<read_data::Stream<'a>, Error>;
50}
51
52///
53pub mod read_data {
54 use std::sync::atomic::Ordering;
55
56 use gix_filter::pipeline::convert::ToGitOutcome;
57
58 use crate::AtomicU64;
59
60 /// A stream with worktree file data.
61 pub struct Stream<'a> {
62 pub(crate) inner: ToGitOutcome<'a, std::fs::File>,
63 pub(crate) bytes: Option<&'a AtomicU64>,
64 pub(crate) len: Option<u64>,
65 }
66
67 impl<'a> Stream<'a> {
68 /// Return the underlying byte-buffer if there is one.
69 ///
70 /// If `None`, read from this instance like a stream.
71 /// Note that this method should only be called once to assure proper accounting of the amount of bytes read.
72 pub fn as_bytes(&self) -> Option<&'a [u8]> {
73 self.inner.as_bytes().map(|v| {
74 if let Some(bytes) = self.bytes {
75 bytes.fetch_add(v.len() as u64, Ordering::Relaxed);
76 }
77 v
78 })
79 }
80
81 /// Return the size of the stream in bytes if it is known in advance.
82 pub fn size(&self) -> Option<u64> {
83 self.len
84 }
85 }
86
87 impl std::io::Read for Stream<'_> {
88 fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
89 let n = self.inner.read(buf)?;
90 if let Some(bytes) = self.bytes {
91 bytes.fetch_add(n as u64, Ordering::Relaxed);
92 }
93 Ok(n)
94 }
95 }
96}
97
98/// Compares to blobs by comparing their size and oid, and only looks at the file if
99/// the size matches, therefore it's very fast.
100#[derive(Clone)]
101pub struct FastEq;
102
103impl CompareBlobs for FastEq {
104 type Output = ();
105
106 // TODO: make all streaming IOPs interruptible.
107 fn compare_blobs<'a, 'b>(
108 &mut self,
109 entry: &Entry,
110 worktree_file_size: u64,
111 data: impl ReadData<'a>,
112 buf: &mut Vec<u8>,
113 ) -> Result<Option<Self::Output>, Error> {
114 // make sure to account for racily smudged entries here so that they don't always keep
115 // showing up as modified even after their contents have changed again, to a potentially
116 // unmodified state. That means that we want to ignore stat.size == 0 for non_empty_blobs.
117 if u64::from(entry.stat.size) != worktree_file_size && (entry.id.is_empty_blob() || entry.stat.size != 0) {
118 return Ok(Some(()));
119 }
120 HashEq
121 .compare_blobs(entry, worktree_file_size, data, buf)
122 .map(|opt| opt.map(|_| ()))
123 }
124}
125
126/// Compares files to blobs by *always* comparing their hashes.
127///
128/// Same as [`FastEq`] but does not contain a fast path for files with mismatched files and
129/// therefore always returns an OID that can be reused later.
130#[derive(Clone)]
131pub struct HashEq;
132
133impl CompareBlobs for HashEq {
134 type Output = ObjectId;
135
136 fn compare_blobs<'a, 'b>(
137 &mut self,
138 entry: &Entry,
139 _worktree_blob_size: u64,
140 data: impl ReadData<'a>,
141 buf: &mut Vec<u8>,
142 ) -> Result<Option<Self::Output>, Error> {
143 let mut stream = data.stream_worktree_file()?;
144 match stream.as_bytes() {
145 Some(buffer) => {
146 let file_hash = gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buffer);
147 Ok((entry.id != file_hash).then_some(file_hash))
148 }
149 None => {
150 let file_hash = match stream.size() {
151 None => {
152 stream.read_to_end(buf)?;
153 gix_object::compute_hash(entry.id.kind(), gix_object::Kind::Blob, buf)
154 }
155 Some(len) => gix_object::compute_stream_hash(
156 entry.id.kind(),
157 gix_object::Kind::Blob,
158 &mut stream,
159 len,
160 &mut gix_features::progress::Discard,
161 &AtomicBool::default(),
162 )?,
163 };
164 Ok((entry.id != file_hash).then_some(file_hash))
165 }
166 }
167 }
168}