gix_merge/blob/
pipeline.rs

1use std::{
2    io::Read,
3    path::{Path, PathBuf},
4};
5
6use bstr::BStr;
7use gix_filter::{
8    driver::apply::{Delay, MaybeDelayed},
9    pipeline::convert::{ToGitOutcome, ToWorktreeOutcome},
10};
11use gix_object::tree::EntryKind;
12
13use super::{Pipeline, ResourceKind};
14
15/// Options for use in a [`Pipeline`].
16#[derive(Default, Clone, PartialEq, Eq, Debug, Hash, Ord, PartialOrd)]
17pub struct Options {
18    /// The amount of bytes that an object has to reach before being treated as binary.
19    /// These objects will not be queried, nor will their data be processed in any way.
20    /// If `0`, no file is ever considered binary due to their size.
21    ///
22    /// Note that for files stored in `git`, what counts is their stored, decompressed size,
23    /// thus `git-lfs` files would typically not be considered binary unless one explicitly sets
24    /// them.
25    /// However, if they are to be retrieved from the worktree, the worktree size is what matters,
26    /// even though that also might be a `git-lfs` file which is small in Git.
27    pub large_file_threshold_bytes: u64,
28}
29
30/// The specific way to convert a resource.
31#[derive(Default, Debug, Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Hash)]
32pub enum Mode {
33    /// Prepare resources as they are stored in `git`.
34    ///
35    /// This is naturally the case when object-ids are used, but a conversion is needed
36    /// when data is read from a worktree.
37    #[default]
38    ToGit,
39    /// For sources that are object-ids, convert them to what *would* be stored in the worktree,
40    /// and back to what *would* be stored in Git.
41    ///
42    /// Sources that are located in a worktree are merely converted to what *would* be stored in Git.
43    ///
44    /// This is useful to prevent merge conflicts due to inconcistent whitespace.
45    Renormalize,
46}
47
48/// A way to access roots for different kinds of resources that are possibly located and accessible in a worktree.
49#[derive(Clone, Debug, Default)]
50pub struct WorktreeRoots {
51    /// The worktree root where the current (or our) version of the resource is present.
52    pub current_root: Option<PathBuf>,
53    /// The worktree root where the other (or their) version of the resource is present.
54    pub other_root: Option<PathBuf>,
55    /// The worktree root where containing the resource of the common ancestor of our and their version.
56    pub common_ancestor_root: Option<PathBuf>,
57}
58
59impl WorktreeRoots {
60    /// Return the root path for the given `kind`
61    pub fn by_kind(&self, kind: ResourceKind) -> Option<&Path> {
62        match kind {
63            ResourceKind::CurrentOrOurs => self.current_root.as_deref(),
64            ResourceKind::CommonAncestorOrBase => self.common_ancestor_root.as_deref(),
65            ResourceKind::OtherOrTheirs => self.other_root.as_deref(),
66        }
67    }
68
69    /// Return `true` if all worktree roots are unset.
70    pub fn is_unset(&self) -> bool {
71        self.current_root.is_none() && self.other_root.is_none() && self.common_ancestor_root.is_none()
72    }
73}
74
75/// Lifecycle
76impl Pipeline {
77    /// Create a new instance of a pipeline which produces blobs suitable for merging.
78    ///
79    /// `roots` allow to read worktree files directly, and `worktree_filter` is used
80    /// to transform object database data directly.
81    /// `options` are used to further configure the way we act.
82    pub fn new(roots: WorktreeRoots, worktree_filter: gix_filter::Pipeline, options: Options) -> Self {
83        Pipeline {
84            roots,
85            filter: worktree_filter,
86            options,
87            path: Default::default(),
88        }
89    }
90}
91
92/// Access
93impl Pipeline {}
94
95/// Data as returned by [`Pipeline::convert_to_mergeable()`].
96#[derive(Copy, Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
97pub enum Data {
98    /// The data to use for merging was written into the buffer that was passed during the call to [`Pipeline::convert_to_mergeable()`].
99    Buffer,
100    /// The file or blob is above the big-file threshold and cannot be processed.
101    ///
102    /// In this state, the file cannot be merged.
103    TooLarge {
104        /// The size of the object prior to performing any filtering or as it was found on disk.
105        ///
106        /// Note that technically, the size isn't always representative of the same 'state' of the
107        /// content, as once it can be the size of the blob in git, and once it's the size of file
108        /// in the worktree - both can differ a lot depending on filters.
109        size: u64,
110    },
111}
112
113///
114pub mod convert_to_mergeable {
115    use std::collections::TryReserveError;
116
117    use bstr::BString;
118    use gix_object::tree::EntryKind;
119
120    /// The error returned by [Pipeline::convert_to_mergeable()](super::Pipeline::convert_to_mergeable()).
121    #[derive(Debug, thiserror::Error)]
122    #[allow(missing_docs)]
123    pub enum Error {
124        #[error("Entry at '{rela_path}' must be regular file or symlink, but was {actual:?}")]
125        InvalidEntryKind { rela_path: BString, actual: EntryKind },
126        #[error("Entry at '{rela_path}' could not be read as symbolic link")]
127        ReadLink { rela_path: BString, source: std::io::Error },
128        #[error("Entry at '{rela_path}' could not be opened for reading or read from")]
129        OpenOrRead { rela_path: BString, source: std::io::Error },
130        #[error("Entry at '{rela_path}' could not be copied from a filter process to a memory buffer")]
131        StreamCopy { rela_path: BString, source: std::io::Error },
132        #[error(transparent)]
133        FindObject(#[from] gix_object::find::existing_object::Error),
134        #[error(transparent)]
135        ConvertToWorktree(#[from] gix_filter::pipeline::convert::to_worktree::Error),
136        #[error(transparent)]
137        ConvertToGit(#[from] gix_filter::pipeline::convert::to_git::Error),
138        #[error("Memory allocation failed")]
139        OutOfMemory(#[from] TryReserveError),
140    }
141}
142
143/// Conversion
144impl Pipeline {
145    /// Convert the object at `id`, `mode`, `rela_path` and `kind`, providing access to `attributes` and `objects`.
146    /// The resulting merge-able data is written into `out`, if it's not too large.
147    /// The returned [`Data`] contains information on how to use `out`, which will be cleared if it is `None`, indicating
148    /// that no object was found at the location *on disk* - it's always an error to provide an object ID that doesn't exist
149    /// in the object database.
150    ///
151    /// `attributes` must be returning the attributes at `rela_path` and is used for obtaining worktree filter settings,
152    /// and `objects` must be usable if `kind` is a resource in the object database,
153    /// i.e. if no worktree root is available. It's notable that if a worktree root is present for `kind`,
154    /// then a `rela_path` is used to access it on disk.
155    ///
156    /// If `id` [is null](gix_hash::ObjectId::is_null()) or the file in question doesn't exist in the worktree in case
157    /// [a root](WorktreeRoots) is present, then `out` will be left cleared and the output data will be `None`.
158    /// This is useful to simplify the calling code as empty buffers signal that nothing is there.
159    ///
160    /// Note that `mode` is trusted, and we will not re-validate that the entry in the worktree actually is of that mode.
161    /// Only blobs are allowed.
162    ///
163    /// Use `convert` to control what kind of the resource will be produced.
164    #[allow(clippy::too_many_arguments)]
165    pub fn convert_to_mergeable(
166        &mut self,
167        id: &gix_hash::oid,
168        mode: EntryKind,
169        rela_path: &BStr,
170        kind: ResourceKind,
171        attributes: &mut dyn FnMut(&BStr, &mut gix_filter::attributes::search::Outcome),
172        objects: &dyn gix_object::FindObjectOrHeader,
173        convert: Mode,
174        out: &mut Vec<u8>,
175    ) -> Result<Option<Data>, convert_to_mergeable::Error> {
176        if !matches!(mode, EntryKind::Blob | EntryKind::BlobExecutable) {
177            return Err(convert_to_mergeable::Error::InvalidEntryKind {
178                rela_path: rela_path.to_owned(),
179                actual: mode,
180            });
181        }
182
183        out.clear();
184        match self.roots.by_kind(kind) {
185            Some(root) => {
186                self.path.clear();
187                self.path.push(root);
188                self.path.push(gix_path::from_bstr(rela_path));
189                let size_in_bytes = (self.options.large_file_threshold_bytes > 0)
190                    .then(|| {
191                        none_if_missing(self.path.metadata().map(|md| md.len())).map_err(|err| {
192                            convert_to_mergeable::Error::OpenOrRead {
193                                rela_path: rela_path.to_owned(),
194                                source: err,
195                            }
196                        })
197                    })
198                    .transpose()?;
199                let data = match size_in_bytes {
200                    Some(None) => None, // missing as identified by the size check
201                    Some(Some(size)) if size > self.options.large_file_threshold_bytes => Some(Data::TooLarge { size }),
202                    _ => {
203                        let file = none_if_missing(std::fs::File::open(&self.path)).map_err(|err| {
204                            convert_to_mergeable::Error::OpenOrRead {
205                                rela_path: rela_path.to_owned(),
206                                source: err,
207                            }
208                        })?;
209
210                        if let Some(file) = file {
211                            match convert {
212                                Mode::ToGit | Mode::Renormalize => {
213                                    let res = self.filter.convert_to_git(
214                                        file,
215                                        gix_path::from_bstr(rela_path).as_ref(),
216                                        attributes,
217                                        &mut |buf| {
218                                            if convert == Mode::Renormalize {
219                                                Ok(None)
220                                            } else {
221                                                objects.try_find(id, buf).map(|obj| obj.map(|_| ()))
222                                            }
223                                        },
224                                    )?;
225
226                                    match res {
227                                        ToGitOutcome::Unchanged(mut file) => {
228                                            file.read_to_end(out).map_err(|err| {
229                                                convert_to_mergeable::Error::OpenOrRead {
230                                                    rela_path: rela_path.to_owned(),
231                                                    source: err,
232                                                }
233                                            })?;
234                                        }
235                                        ToGitOutcome::Process(mut stream) => {
236                                            stream.read_to_end(out).map_err(|err| {
237                                                convert_to_mergeable::Error::OpenOrRead {
238                                                    rela_path: rela_path.to_owned(),
239                                                    source: err,
240                                                }
241                                            })?;
242                                        }
243                                        ToGitOutcome::Buffer(buf) => {
244                                            out.clear();
245                                            out.try_reserve(buf.len())?;
246                                            out.extend_from_slice(buf);
247                                        }
248                                    }
249                                }
250                            }
251
252                            Some(Data::Buffer)
253                        } else {
254                            None
255                        }
256                    }
257                };
258                Ok(data)
259            }
260            None => {
261                let data = if id.is_null() {
262                    None
263                } else {
264                    let header = objects
265                        .try_header(id)
266                        .map_err(gix_object::find::existing_object::Error::Find)?
267                        .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
268                    let is_binary = self.options.large_file_threshold_bytes > 0
269                        && header.size > self.options.large_file_threshold_bytes;
270                    let data = if is_binary {
271                        Data::TooLarge { size: header.size }
272                    } else {
273                        objects
274                            .try_find(id, out)
275                            .map_err(gix_object::find::existing_object::Error::Find)?
276                            .ok_or_else(|| gix_object::find::existing_object::Error::NotFound { oid: id.to_owned() })?;
277
278                        if convert == Mode::Renormalize {
279                            {
280                                let res = self
281                                    .filter
282                                    .convert_to_worktree(out, rela_path, attributes, Delay::Forbid)?;
283
284                                match res {
285                                    ToWorktreeOutcome::Unchanged(_) => {}
286                                    ToWorktreeOutcome::Buffer(src) => {
287                                        out.clear();
288                                        out.try_reserve(src.len())?;
289                                        out.extend_from_slice(src);
290                                    }
291                                    ToWorktreeOutcome::Process(MaybeDelayed::Immediate(mut stream)) => {
292                                        std::io::copy(&mut stream, out).map_err(|err| {
293                                            convert_to_mergeable::Error::StreamCopy {
294                                                rela_path: rela_path.to_owned(),
295                                                source: err,
296                                            }
297                                        })?;
298                                    }
299                                    ToWorktreeOutcome::Process(MaybeDelayed::Delayed(_)) => {
300                                        unreachable!("we prohibit this")
301                                    }
302                                }
303                            }
304
305                            let res = self.filter.convert_to_git(
306                                &**out,
307                                &gix_path::from_bstr(rela_path),
308                                attributes,
309                                &mut |_buf| Ok(None),
310                            )?;
311
312                            match res {
313                                ToGitOutcome::Unchanged(_) => {}
314                                ToGitOutcome::Process(mut stream) => {
315                                    stream
316                                        .read_to_end(out)
317                                        .map_err(|err| convert_to_mergeable::Error::OpenOrRead {
318                                            rela_path: rela_path.to_owned(),
319                                            source: err,
320                                        })?;
321                                }
322                                ToGitOutcome::Buffer(buf) => {
323                                    out.clear();
324                                    out.try_reserve(buf.len())?;
325                                    out.extend_from_slice(buf);
326                                }
327                            }
328                        }
329
330                        Data::Buffer
331                    };
332                    Some(data)
333                };
334                Ok(data)
335            }
336        }
337    }
338}
339
340fn none_if_missing<T>(res: std::io::Result<T>) -> std::io::Result<Option<T>> {
341    match res {
342        Ok(data) => Ok(Some(data)),
343        Err(err) if err.kind() == std::io::ErrorKind::NotFound => Ok(None),
344        Err(err) => Err(err),
345    }
346}