gix_merge/blob/platform/
merge.rs

1use std::{io::Read, path::PathBuf};
2
3use crate::blob::{builtin_driver, PlatformRef, Resolution};
4
5/// Options for the use in the [`PlatformRef::merge()`] call.
6#[derive(Default, Copy, Clone, Debug, Eq, PartialEq)]
7pub struct Options {
8    /// If `true`, the resources being merged are contained in a virtual ancestor,
9    /// which is the case when merge bases are merged into one.
10    /// This flag affects the choice of merge drivers.
11    pub is_virtual_ancestor: bool,
12    /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible, and it picks a side.
13    pub resolve_binary_with: Option<builtin_driver::binary::ResolveWith>,
14    /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text).
15    pub text: builtin_driver::text::Options,
16}
17
18/// The error returned by [`PlatformRef::merge()`].
19#[derive(Debug, thiserror::Error)]
20#[allow(missing_docs)]
21pub enum Error {
22    #[error(transparent)]
23    PrepareExternalDriver(#[from] inner::prepare_external_driver::Error),
24    #[error("Failed to launch external merge driver: {cmd}")]
25    SpawnExternalDriver { cmd: String, source: std::io::Error },
26    #[error("External merge driver failed with non-zero exit status {status:?}: {cmd}")]
27    ExternalDriverFailure {
28        status: std::process::ExitStatus,
29        cmd: String,
30    },
31    #[error("IO failed when dealing with merge-driver output")]
32    ExternalDriverIO(#[from] std::io::Error),
33}
34
35/// The product of a [`PlatformRef::prepare_external_driver()`] operation.
36///
37/// This type allows to creation of [`std::process::Command`], ready to run, with `stderr` and `stdout` set to *inherit*,
38/// but `stdin` closed.
39/// It's expected to leave its result in the file substituted at `current` which is then supposed to be read back from there.
40// TODO: remove dead-code annotation
41#[allow(dead_code)]
42pub struct Command {
43    /// The pre-configured command
44    cmd: std::process::Command,
45    /// A tempfile holding the *current* (ours) state of the resource.
46    current: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
47    /// The path at which `current` is located, for reading the result back from later.
48    current_path: PathBuf,
49    /// A tempfile holding the *ancestor* (base) state of the resource.
50    ancestor: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
51    /// A tempfile holding the *other* (their) state of the resource.
52    other: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
53}
54
55// Just to keep things here but move them a level up later.
56pub(super) mod inner {
57    ///
58    pub mod prepare_external_driver {
59        use std::{
60            io::Write,
61            ops::{Deref, DerefMut},
62            path::{Path, PathBuf},
63            process::Stdio,
64        };
65
66        use bstr::{BString, ByteVec};
67        use gix_tempfile::{AutoRemove, ContainingDirectory};
68
69        use crate::blob::{
70            builtin_driver,
71            builtin_driver::text::Conflict,
72            platform::{merge, DriverChoice},
73            BuiltinDriver, Driver, PlatformRef, ResourceKind,
74        };
75
76        /// The error returned by [PlatformRef::prepare_external_driver()](PlatformRef::prepare_external_driver()).
77        #[derive(Debug, thiserror::Error)]
78        #[allow(missing_docs)]
79        pub enum Error {
80            #[error("The resource of kind {kind:?} was too large to be processed")]
81            ResourceTooLarge { kind: ResourceKind },
82            #[error(
83                "Tempfile to store content of '{rela_path}' ({kind:?}) for passing to external merge command could not be created"
84            )]
85            CreateTempfile {
86                rela_path: BString,
87                kind: ResourceKind,
88                source: std::io::Error,
89            },
90            #[error(
91                "Could not write content of '{rela_path}' ({kind:?}) to tempfile for passing to external merge command"
92            )]
93            WriteTempfile {
94                rela_path: BString,
95                kind: ResourceKind,
96                source: std::io::Error,
97            },
98        }
99
100        /// Plumbing
101        impl<'parent> PlatformRef<'parent> {
102            /// Given `merge_command` and `context`, typically obtained from git-configuration, and the currently set merge-resources,
103            /// prepare the invocation and temporary files needed to launch it according to protocol.
104            /// See the documentation of [`Driver::command`] for possible substitutions.
105            ///
106            /// Please note that this is an expensive operation this will always create three temporary files to hold all sides of the merge.
107            ///
108            /// The resulting command should be spawned, and when successful, [the result file can be opened](merge::Command::open_result_file)
109            /// to read back the result into a suitable buffer.
110            ///
111            /// ### Deviation
112            ///
113            /// * We allow passing more context than Git would by taking a whole `context`,
114            ///   it's up to the caller to decide how much is filled.
115            /// * Our tempfiles aren't suffixed `.merge_file_XXXXXX` with `X` replaced with characters for uniqueness.
116            pub fn prepare_external_driver(
117                &self,
118                merge_command: BString,
119                builtin_driver::text::Labels {
120                    ancestor,
121                    current,
122                    other,
123                }: builtin_driver::text::Labels<'_>,
124                context: gix_command::Context,
125            ) -> Result<merge::Command, Error> {
126                fn write_data(
127                    data: &[u8],
128                    directory: &Path,
129                ) -> std::io::Result<(gix_tempfile::Handle<gix_tempfile::handle::Closed>, PathBuf)> {
130                    let mut file = gix_tempfile::new(directory, ContainingDirectory::Exists, AutoRemove::Tempfile)?;
131                    file.write_all(data)?;
132                    let mut path = Default::default();
133                    file.with_mut(|f| {
134                        f.path().clone_into(&mut path);
135                    })?;
136                    let file = file.close()?;
137                    Ok((file, path))
138                }
139
140                let base = self.ancestor.data.as_slice().ok_or(Error::ResourceTooLarge {
141                    kind: ResourceKind::CommonAncestorOrBase,
142                })?;
143                let ours = self.current.data.as_slice().ok_or(Error::ResourceTooLarge {
144                    kind: ResourceKind::CurrentOrOurs,
145                })?;
146                let theirs = self.other.data.as_slice().ok_or(Error::ResourceTooLarge {
147                    kind: ResourceKind::OtherOrTheirs,
148                })?;
149
150                let tmp_dir = context
151                    .worktree_dir
152                    .as_deref()
153                    .or(context.git_dir.as_deref())
154                    .unwrap_or(Path::new(""));
155                let (base_tmp, base_path) = write_data(base, tmp_dir).map_err(|err| Error::CreateTempfile {
156                    rela_path: self.ancestor.rela_path.into(),
157                    kind: ResourceKind::CommonAncestorOrBase,
158                    source: err,
159                })?;
160                let (ours_tmp, ours_path) = write_data(ours, tmp_dir).map_err(|err| Error::CreateTempfile {
161                    rela_path: self.current.rela_path.into(),
162                    kind: ResourceKind::CurrentOrOurs,
163                    source: err,
164                })?;
165                let (theirs_tmp, theirs_path) = write_data(theirs, tmp_dir).map_err(|err| Error::CreateTempfile {
166                    rela_path: self.other.rela_path.into(),
167                    kind: ResourceKind::OtherOrTheirs,
168                    source: err,
169                })?;
170
171                let mut cmd = BString::from(Vec::with_capacity(merge_command.len()));
172                let mut count = 0;
173                for token in merge_command.split(|b| *b == b'%') {
174                    count += 1;
175                    let token = if count > 1 {
176                        match token.first() {
177                            Some(&b'O') => {
178                                cmd.push_str(gix_path::into_bstr(&base_path).as_ref());
179                                &token[1..]
180                            }
181                            Some(&b'A') => {
182                                cmd.push_str(gix_path::into_bstr(&ours_path).as_ref());
183                                &token[1..]
184                            }
185                            Some(&b'B') => {
186                                cmd.push_str(gix_path::into_bstr(&theirs_path).as_ref());
187                                &token[1..]
188                            }
189                            Some(&b'L') => {
190                                let marker_size = self
191                                    .options
192                                    .text
193                                    .conflict
194                                    .marker_size()
195                                    .unwrap_or(Conflict::DEFAULT_MARKER_SIZE);
196                                cmd.push_str(format!("{marker_size}"));
197                                &token[1..]
198                            }
199                            Some(&b'P') => {
200                                cmd.push_str(gix_quote::single(self.current.rela_path));
201                                &token[1..]
202                            }
203                            Some(&b'S') => {
204                                cmd.push_str(gix_quote::single(ancestor.unwrap_or_default()));
205                                &token[1..]
206                            }
207                            Some(&b'X') => {
208                                cmd.push_str(gix_quote::single(current.unwrap_or_default()));
209                                &token[1..]
210                            }
211                            Some(&b'Y') => {
212                                cmd.push_str(gix_quote::single(other.unwrap_or_default()));
213                                &token[1..]
214                            }
215                            Some(_other) => {
216                                cmd.push(b'%');
217                                token
218                            }
219                            None => b"%",
220                        }
221                    } else {
222                        token
223                    };
224                    cmd.extend_from_slice(token);
225                }
226
227                Ok(merge::Command {
228                    cmd: gix_command::prepare(gix_path::from_bstring(cmd))
229                        .with_context(context)
230                        .command_may_be_shell_script()
231                        .stdin(Stdio::null())
232                        .stdout(Stdio::inherit())
233                        .stderr(Stdio::inherit())
234                        .into(),
235                    current: ours_tmp,
236                    current_path: ours_path,
237                    ancestor: base_tmp,
238                    other: theirs_tmp,
239                })
240            }
241
242            /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err`
243            /// with the built-in driver to use instead.
244            pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> {
245                match self.driver {
246                    DriverChoice::BuiltIn(builtin) => Err(builtin),
247                    DriverChoice::Index(idx) => self.parent.drivers.get(idx).ok_or(BuiltinDriver::default()),
248                }
249            }
250        }
251
252        impl std::fmt::Debug for merge::Command {
253            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
254                self.cmd.fmt(f)
255            }
256        }
257
258        impl Deref for merge::Command {
259            type Target = std::process::Command;
260
261            fn deref(&self) -> &Self::Target {
262                &self.cmd
263            }
264        }
265
266        impl DerefMut for merge::Command {
267            fn deref_mut(&mut self) -> &mut Self::Target {
268                &mut self.cmd
269            }
270        }
271
272        impl merge::Command {
273            /// Open the file which should have been written to the location of `ours`, to yield the result of the merge operation.
274            /// Calling this makes sense only after the merge command has finished successfully.
275            pub fn open_result_file(&self) -> std::io::Result<std::fs::File> {
276                std::fs::File::open(&self.current_path)
277            }
278        }
279    }
280
281    ///
282    pub mod builtin_merge {
283        use crate::blob::{
284            builtin_driver,
285            platform::{resource, resource::Data},
286            BuiltinDriver, PlatformRef, Resolution,
287        };
288
289        /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](PlatformRef::builtin_merge).
290        #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
291        pub enum Pick {
292            /// In a binary merge, chose the ancestor.
293            ///
294            /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
295            Ancestor,
296            /// In a binary merge, chose our side.
297            ///
298            /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
299            Ours,
300            /// In a binary merge, chose their side.
301            ///
302            /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
303            Theirs,
304            /// New data was produced with the result of the merge, to be found in the buffer that was passed to
305            /// [builtin_merge()](PlatformRef::builtin_merge).
306            /// This happens for any merge that isn't a binary merge.
307            Buffer,
308        }
309
310        /// Plumbing
311        impl<'parent> PlatformRef<'parent> {
312            /// Perform the merge using the given `driver`, possibly placing the output in `out`.
313            /// `input` can be used to keep tokens between runs, but note it will only grow in size unless cleared manually.
314            /// Use `labels` to annotate conflict sections in case of a text-merge.
315            /// Returns `None` if one of the buffers is too large, making a merge impossible.
316            /// Note that if the *pick* wasn't [`Pick::Buffer`], then `out` will not have been cleared,
317            /// and one has to take the data from the respective resource.
318            ///
319            /// If there is no buffer loaded as the resource is too big, we will automatically perform a binary merge
320            /// which effectively chooses our side by default.
321            pub fn builtin_merge(
322                &self,
323                driver: BuiltinDriver,
324                out: &mut Vec<u8>,
325                input: &mut imara_diff::intern::InternedInput<&'parent [u8]>,
326                labels: builtin_driver::text::Labels<'_>,
327            ) -> (Pick, Resolution) {
328                let base = self.ancestor.data.as_slice().unwrap_or_default();
329                let ours = self.current.data.as_slice().unwrap_or_default();
330                let theirs = self.other.data.as_slice().unwrap_or_default();
331                let driver = if driver != BuiltinDriver::Binary
332                    && (is_binary_buf(self.ancestor.data)
333                        || is_binary_buf(self.other.data)
334                        || is_binary_buf(self.current.data))
335                {
336                    BuiltinDriver::Binary
337                } else {
338                    driver
339                };
340                match driver {
341                    BuiltinDriver::Text => {
342                        let resolution =
343                            builtin_driver::text(out, input, labels, ours, base, theirs, self.options.text);
344                        (Pick::Buffer, resolution)
345                    }
346                    BuiltinDriver::Binary => {
347                        // easier to reason about the 'split' compared to merging both conditions
348                        #[allow(clippy::if_same_then_else)]
349                        if !(self.current.id.is_null() || self.other.id.is_null()) && self.current.id == self.other.id {
350                            (Pick::Ours, Resolution::Complete)
351                        } else if (self.current.id.is_null() || self.other.id.is_null()) && ours == theirs {
352                            (Pick::Ours, Resolution::Complete)
353                        } else {
354                            let (pick, resolution) = builtin_driver::binary(self.options.resolve_binary_with);
355                            let pick = match pick {
356                                builtin_driver::binary::Pick::Ours => Pick::Ours,
357                                builtin_driver::binary::Pick::Theirs => Pick::Theirs,
358                                builtin_driver::binary::Pick::Ancestor => Pick::Ancestor,
359                            };
360                            (pick, resolution)
361                        }
362                    }
363                    BuiltinDriver::Union => {
364                        let resolution = builtin_driver::text(
365                            out,
366                            input,
367                            labels,
368                            ours,
369                            base,
370                            theirs,
371                            builtin_driver::text::Options {
372                                conflict: builtin_driver::text::Conflict::ResolveWithUnion,
373                                ..self.options.text
374                            },
375                        );
376                        (Pick::Buffer, resolution)
377                    }
378                }
379            }
380        }
381
382        fn is_binary_buf(data: resource::Data<'_>) -> bool {
383            match data {
384                Data::Missing => false,
385                Data::Buffer(buf) => {
386                    let buf = &buf[..buf.len().min(8000)];
387                    buf.contains(&0)
388                }
389                Data::TooLarge { .. } => true,
390            }
391        }
392    }
393}
394
395/// Convenience
396impl<'parent> PlatformRef<'parent> {
397    /// Perform the merge, possibly invoking an external merge command, and store the result in `out`, returning `(pick, resolution)`.
398    /// Note that `pick` indicates which resource the buffer should be taken from, unless it's [`Pick::Buffer`](inner::builtin_merge::Pick::Buffer)
399    /// to indicate it's `out`.
400    /// Use `labels` to annotate conflict sections in case of a text-merge.
401    /// The merge is configured by `opts` and possible merge driver command executions are affected by `context`.
402    ///
403    /// Note that at this stage, none-existing input data will simply default to an empty buffer when running the actual merge algorithm.
404    /// Too-large resources will result in an error.
405    ///
406    /// Generally, it is assumed that standard logic, like deletions of files, is handled before any of this is called, so we are lenient
407    /// in terms of buffer handling to make it more useful in the face of missing local files.
408    pub fn merge(
409        &self,
410        out: &mut Vec<u8>,
411        labels: builtin_driver::text::Labels<'_>,
412        context: &gix_command::Context,
413    ) -> Result<(inner::builtin_merge::Pick, Resolution), Error> {
414        match self.configured_driver() {
415            Ok(driver) => {
416                let mut cmd = self.prepare_external_driver(driver.command.clone(), labels, context.clone())?;
417                let status = cmd.status().map_err(|err| Error::SpawnExternalDriver {
418                    cmd: format!("{:?}", cmd.cmd),
419                    source: err,
420                })?;
421                if !status.success() {
422                    return Err(Error::ExternalDriverFailure {
423                        cmd: format!("{:?}", cmd.cmd),
424                        status,
425                    });
426                }
427                out.clear();
428                cmd.open_result_file()?.read_to_end(out)?;
429                Ok((inner::builtin_merge::Pick::Buffer, Resolution::Complete))
430            }
431            Err(builtin) => {
432                let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]);
433                out.clear();
434                let (pick, resolution) = self.builtin_merge(builtin, out, &mut input, labels);
435                Ok((pick, resolution))
436            }
437        }
438    }
439
440    /// Using a `pick` obtained from [`merge()`](Self::merge), obtain the respective buffer suitable for reading or copying.
441    /// Return `Ok(None)`  if the `pick` corresponds to a buffer (that was written separately).
442    /// Return `Err(())` if the buffer is *too large*, so it was never read.
443    #[allow(clippy::result_unit_err)]
444    pub fn buffer_by_pick(&self, pick: inner::builtin_merge::Pick) -> Result<Option<&'parent [u8]>, ()> {
445        match pick {
446            inner::builtin_merge::Pick::Ancestor => self.ancestor.data.as_slice().map(Some).ok_or(()),
447            inner::builtin_merge::Pick::Ours => self.current.data.as_slice().map(Some).ok_or(()),
448            inner::builtin_merge::Pick::Theirs => self.other.data.as_slice().map(Some).ok_or(()),
449            inner::builtin_merge::Pick::Buffer => Ok(None),
450        }
451    }
452
453    /// Use `pick` to return the object id of the merged result, assuming that `buf` was passed as `out` to [merge()](Self::merge).
454    /// In case of binary or large files, this will simply be the existing ID of the resource.
455    /// In case of resources available in the object DB for binary merges, the object ID will be returned.
456    /// If new content was produced due to a content merge, `buf` will be written out
457    /// to the object database using `write_blob`.
458    /// Beware that the returned ID could be `Ok(None)` if the underlying resource was loaded
459    /// from the worktree *and* was too large so it was never loaded from disk.
460    /// `Ok(None)` will also be returned if one of the resources was missing.
461    /// `write_blob()` is used to turn buffers.
462    pub fn id_by_pick<E>(
463        &self,
464        pick: inner::builtin_merge::Pick,
465        buf: &[u8],
466        mut write_blob: impl FnMut(&[u8]) -> Result<gix_hash::ObjectId, E>,
467    ) -> Result<Option<gix_hash::ObjectId>, E> {
468        let field = match pick {
469            inner::builtin_merge::Pick::Ancestor => &self.ancestor,
470            inner::builtin_merge::Pick::Ours => &self.current,
471            inner::builtin_merge::Pick::Theirs => &self.other,
472            inner::builtin_merge::Pick::Buffer => return write_blob(buf).map(Some),
473        };
474        use crate::blob::platform::resource::Data;
475        match field.data {
476            Data::TooLarge { .. } | Data::Missing if !field.id.is_null() => Ok(Some(field.id.to_owned())),
477            Data::TooLarge { .. } | Data::Missing => Ok(None),
478            Data::Buffer(buf) if field.id.is_null() => write_blob(buf).map(Some),
479            Data::Buffer(_) => Ok(Some(field.id.to_owned())),
480        }
481    }
482}