gix_merge/blob/pipeline.rs
1use std::{
2 io::Read,
3 path::{Path, PathBuf},
4};
5
6use bstr::BStr;
7use gix_filter::{
8 driver::apply::{Delay, MaybeDelayed},
9 pipeline::convert::{ToGitOutcome, ToWorktreeOutcome},
10};
11use gix_object::tree::EntryKind;
12
13use super::{Pipeline, ResourceKind};
14
15/// Options for use in a [`Pipeline`].
16#[derive(Default, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)]
17pub struct Options {
18 /// The amount of bytes that an object has to reach before being treated as binary.
19 /// These objects will not be queried, nor will their data be processed in any way.
20 /// If `0`, no file is ever considered binary due to their size.
21 ///
22 /// Note that for files stored in `git`, what counts is their stored, decompressed size,
23 /// thus `git-lfs` files would typically not be considered binary unless one explicitly sets
24 /// them.
25 /// However, if they are to be retrieved from the worktree, the worktree size is what matters,
26 /// even though that also might be a `git-lfs` file which is small in Git.
27 pub large_file_threshold_bytes: u64,
28}
29
30/// The specific way to convert a resource.
31#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
32pub enum Mode {
33 /// Prepare resources as they are stored in `git`.
34 ///
35 /// This is naturally the case when object-ids are used, but a conversion is needed
36 /// when data is read from a worktree.
37 #[default]
38 ToGit,
39 /// For sources that are object-ids, convert them to what *would* be stored in the worktree,
40 /// and back to what *would* be stored in Git.
41 ///
42 /// Sources that are located in a worktree are merely converted to what *would* be stored in Git.
43 ///
44 /// This is useful to prevent merge conflicts due to inconcistent whitespace.
45 Renormalize,
46}
47
48/// A way to access roots for different kinds of resources that are possibly located and accessible in a worktree.
49#[derive(Clone, Debug, Default)]
50pub struct WorktreeRoots {
51 /// The worktree root where the current (or our) version of the resource is present.
52 pub current_root: Option<PathBuf>,
53 /// The worktree root where the other (or their) version of the resource is present.
54 pub other_root: Option<PathBuf>,
55 /// The worktree root where containing the resource of the common ancestor of our and their version.
56 pub common_ancestor_root: Option<PathBuf>,
57}
58
59impl WorktreeRoots {
60 /// Return the root path for the given `kind`
61 pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> {
62 match kind {
63 ResourceKind::CurrentOrOurs => self.current_root.as_deref(),
64 ResourceKind::CommonAncestorOrBase => self.common_ancestor_root.as_deref(),
65 ResourceKind::OtherOrTheirs => self.other_root.as_deref(),
66 }
67 }
68
69 /// Return `true` if all worktree roots are unset.
70 pub fn is_unset(&self) -> bool {
71 self.current_root.is_none() && self.other_root.is_none() && self.common_ancestor_root.is_none()
72 }
73}
74
75/// Lifecycle
76impl Pipeline {
77 /// Create a new instance of a pipeline which produces blobs suitable for merging.
78 ///
79 /// `roots` allow to read worktree files directly, and `worktree_filter` is used
80 /// to transform object database data directly.
81 /// `options` are used to further configure the way we act.
82 pub fn new(roots: WorktreeRoots, worktree_filter: gix_filter::Pipeline, options: Options) -> Self {
83 Pipeline {
84 roots,
85 filter: worktree_filter,
86 options,
87 path: Default::default(),
88 }
89 }
90}
91
92/// Access
93impl Pipeline {}
94
95/// Data as returned by [`Pipeline::convert_to_mergeable()`].
96#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
97pub enum Data {
98 /// The data to use for merging was written into the buffer that was passed during the call to [`Pipeline::convert_to_mergeable()`].
99 Buffer,
100 /// The file or blob is above the big-file threshold and cannot be processed.
101 ///
102 /// In this state, the file cannot be merged.
103 TooLarge {
104 /// The size of the object prior to performing any filtering or as it was found on disk.
105 ///
106 /// Note that technically, the size isn't always representative of the same 'state' of the
107 /// content, as once it can be the size of the blob in git, and once it's the size of file
108 /// in the worktree - both can differ a lot depending on filters.
109 size: u64,
110 },
111}
112
113///
114pub mod convert_to_mergeable {
115 use std::collections::TryReserveError;
116
117 use bstr::BString;
118 use gix_object::tree::EntryKind;
119
120 /// The error returned by [Pipeline::convert_to_mergeable()](super::Pipeline::convert_to_mergeable()).
121 #[derive(Debug, thiserror::Error)]
122 #[allow(missing_docs)]
123 pub enum Error {
124 #[error("Entry at '{rela_path}' must be regular file or symlink, but was {actual:?}")]
125 InvalidEntryKind { rela_path: BString, actual: EntryKind },
126 #[error("Entry at '{rela_path}' could not be read as symbolic link")]
127 ReadLink { rela_path: BString, source: std::io::Error },
128 #[error("Entry at '{rela_path}' could not be opened for reading or read from")]
129 OpenOrRead { rela_path: BString, source: std::io::Error },
130 #[error("Entry at '{rela_path}' could not be copied from a filter process to a memory buffer")]
131 StreamCopy { rela_path: BString, source: std::io::Error },
132 #[error(transparent)]
133 FindObject(#[from] gix_object::find::existing_object::Error),
134 #[error(transparent)]
135 ConvertToWorktree(#[from] gix_filter::pipeline::convert::to_worktree::Error),
136 #[error(transparent)]
137 ConvertToGit(#[from] gix_filter::pipeline::convert::to_git::Error),
138 #[error("Memory allocation failed")]
139 OutOfMemory(#[from] TryReserveError),
140 }
141}
142
143/// Conversion
144impl Pipeline {
145 /// Convert the object at `id`, `mode`, `rela_path` and `kind`, providing access to `attributes` and `objects`.
146 /// The resulting merge-able data is written into `out`, if it's not too large.
147 /// The returned [`Data`] contains information on how to use `out`, which will be cleared if it is `None`, indicating
148 /// that no object was found at the location *on disk* - it's always an error to provide an object ID that doesn't exist
149 /// in the object database.
150 ///
151 /// `attributes` must be returning the attributes at `rela_path` and is used for obtaining worktree filter settings,
152 /// and `objects` must be usable if `kind` is a resource in the object database,
153 /// i.e. if no worktree root is available. It's notable that if a worktree root is present for `kind`,
154 /// then a `rela_path` is used to access it on disk.
155 ///
156 /// If `id` [is null](gix_hash::ObjectId::is_null()) or the file in question doesn't exist in the worktree in case
157 /// [a root](WorktreeRoots) is present, then `out` will be left cleared and the output data will be `None`.
158 /// This is useful to simplify the calling code as empty buffers signal that nothing is there.
159 ///
160 /// Note that `mode` is trusted, and we will not re-validate that the entry in the worktree actually is of that mode.
161 /// Only blobs are allowed.
162 ///
163 /// Use `convert` to control what kind of the resource will be produced.
164 #[allow(clippy::too_many_arguments)]
165 pub fn convert_to_mergeable(
166 &mut self,
167 id: &gix_hash::oid,
168 mode: EntryKind,
169 rela_path: &BStr,
170 kind: ResourceKind,
171 attributes: &mut dyn FnMut(&BStr, &mut gix_filter::attributes::search::Outcome),
172 objects: &dyn gix_object::FindObjectOrHeader,
173 convert: Mode,
174 out: &mut Vec<u8>,
175 ) -> Result<Option<Data>, convert_to_mergeable::Error> {
176 if !matches!(mode, EntryKind::Blob | EntryKind::BlobExecutable) {
177 return Err(convert_to_mergeable::Error::InvalidEntryKind {
178 rela_path: rela_path.to_owned(),
179 actual: mode,
180 });
181 }
182
183 out.clear();
184 match self.roots.by_kind(kind) {
185 Some(root) => {
186 self.path.clear();
187 self.path.push(root);
188 self.path.push(gix_path::from_bstr(rela_path));
189 let size_in_bytes = (self.options.large_file_threshold_bytes > 0)
190 .then(|| {
191 none_if_missing(self.path.metadata().map(|md| md.len())).map_err(|err| {
192 convert_to_mergeable::Error::OpenOrRead {
193 rela_path: rela_path.to_owned(),
194 source: err,
195 }
196 })
197 })
198 .transpose()?;
199 let data = match size_in_bytes {
200 Some(None) => None, // missing as identified by the size check
201 Some(Some(size)) if size > self.options.large_file_threshold_bytes => Some(Data::TooLarge { size }),
202 _ => {
203 let file = none_if_missing(std::fs::File::open(&self.path)).map_err(|err| {
204 convert_to_mergeable::Error::OpenOrRead {
205 rela_path: rela_path.to_owned(),
206 source: err,
207 }
208 })?;
209
210 if let Some(file) = file {
211 match convert {
212 Mode::ToGit | Mode::Renormalize => {
213 let res = self.filter.convert_to_git(
214 file,
215 gix_path::from_bstr(rela_path).as_ref(),
216 attributes,
217 &mut |buf| {
218 if convert == Mode::Renormalize {
219 Ok(None)
220 } else {
221 objects.try_find(id, buf).map(|obj| obj.map(|_| ()))
222 }
223 },
224 )?;
225
226 match res {
227 ToGitOutcome::Unchanged(mut file) => {
228 file.read_to_end(out).map_err(|err| {
229 convert_to_mergeable::Error::OpenOrRead {
230 rela_path: rela_path.to_owned(),
231 source: err,
232 }
233 })?;
234 }
235 ToGitOutcome::Process(mut stream) => {
236 stream.read_to_end(out).map_err(|err| {
237 convert_to_mergeable::Error::OpenOrRead {
238 rela_path: rela_path.to_owned(),
239 source: err,
240 }
241 })?;
242 }
243 ToGitOutcome::Buffer(buf) => {
244 out.clear();
245 out.try_reserve(buf.len())?;
246 out.extend_from_slice(buf);
247 }
248 }
249 }
250 }
251
252 Some(Data::Buffer)
253 } else {
254 None
255 }
256 }
257 };
258 Ok(data)
259 }
260 None => {
261 let data = if id.is_null() {
262 None
263 } else {
264 let header = objects
265 .try_header(id)
266 .map_err(gix_object::find::existing_object::Error::Find)?
267 .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
268 let is_binary = self.options.large_file_threshold_bytes > 0
269 && header.size > self.options.large_file_threshold_bytes;
270 let data = if is_binary {
271 Data::TooLarge { size: header.size }
272 } else {
273 objects
274 .try_find(id, out)
275 .map_err(gix_object::find::existing_object::Error::Find)?
276 .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
277
278 if convert == Mode::Renormalize {
279 {
280 let res = self
281 .filter
282 .convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?;
283
284 match res {
285 ToWorktreeOutcome::Unchanged(_) => {}
286 ToWorktreeOutcome::Buffer(src) => {
287 out.clear();
288 out.try_reserve(src.len())?;
289 out.extend_from_slice(src);
290 }
291 ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => {
292 std::io::copy(&mut stream, out).map_err(|err| {
293 convert_to_mergeable::Error::StreamCopy {
294 rela_path: rela_path.to_owned(),
295 source: err,
296 }
297 })?;
298 }
299 ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => {
300 unreachable!("we prohibit this")
301 }
302 }
303 }
304
305 let res = self.filter.convert_to_git(
306 &**out,
307 &gix_path::from_bstr(rela_path),
308 attributes,
309 &mut |_buf| Ok(None),
310 )?;
311
312 match res {
313 ToGitOutcome::Unchanged(_) => {}
314 ToGitOutcome::Process(mut stream) => {
315 stream
316 .read_to_end(out)
317 .map_err(|err| convert_to_mergeable::Error::OpenOrRead {
318 rela_path: rela_path.to_owned(),
319 source: err,
320 })?;
321 }
322 ToGitOutcome::Buffer(buf) => {
323 out.clear();
324 out.try_reserve(buf.len())?;
325 out.extend_from_slice(buf);
326 }
327 }
328 }
329
330 Data::Buffer
331 };
332 Some(data)
333 };
334 Ok(data)
335 }
336 }
337 }
338}
339
340fn none_if_missing<T>(res: std::io::Result<T>) -> std::io::Result<Option<T>> {
341 match res {
342 Ok(data) => Ok(Some(data)),
343 Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
344 Err(err) => Err(err),
345 }
346}