gix_merge/blob/platform/
merge.rs

1use crate::blob::{builtin_driver, PlatformRef, Resolution};
2use std::io::Read;
3use std::path::PathBuf;
4
5/// Options for the use in the [`PlatformRef::merge()`] call.
6#[derive(Default, Copy, Clone, Debug, Eq, PartialEq)]
7pub struct Options {
8    /// If `true`, the resources being merged are contained in a virtual ancestor,
9    /// which is the case when merge bases are merged into one.
10    /// This flag affects the choice of merge drivers.
11    pub is_virtual_ancestor: bool,
12    /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible, and it picks a side.
13    pub resolve_binary_with: Option<builtin_driver::binary::ResolveWith>,
14    /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text).
15    pub text: builtin_driver::text::Options,
16}
17
18/// The error returned by [`PlatformRef::merge()`].
19#[derive(Debug, thiserror::Error)]
20#[allow(missing_docs)]
21pub enum Error {
22    #[error(transparent)]
23    PrepareExternalDriver(#[from] inner::prepare_external_driver::Error),
24    #[error("Failed to launch external merge driver: {cmd}")]
25    SpawnExternalDriver { cmd: String, source: std::io::Error },
26    #[error("External merge driver failed with non-zero exit status {status:?}: {cmd}")]
27    ExternalDriverFailure {
28        status: std::process::ExitStatus,
29        cmd: String,
30    },
31    #[error("IO failed when dealing with merge-driver output")]
32    ExternalDriverIO(#[from] std::io::Error),
33}
34
35/// The product of a [`PlatformRef::prepare_external_driver()`] operation.
36///
37/// This type allows to creation of [`std::process::Command`], ready to run, with `stderr` and `stdout` set to *inherit*,
38/// but `stdin` closed.
39/// It's expected to leave its result in the file substituted at `current` which is then supposed to be read back from there.
40// TODO: remove dead-code annotation
41#[allow(dead_code)]
42pub struct Command {
43    /// The pre-configured command
44    cmd: std::process::Command,
45    /// A tempfile holding the *current* (ours) state of the resource.
46    current: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
47    /// The path at which `current` is located, for reading the result back from later.
48    current_path: PathBuf,
49    /// A tempfile holding the *ancestor* (base) state of the resource.
50    ancestor: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
51    /// A tempfile holding the *other* (their) state of the resource.
52    other: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
53}
54
55// Just to keep things here but move them a level up later.
56pub(super) mod inner {
57    ///
58    pub mod prepare_external_driver {
59        use crate::blob::builtin_driver::text::Conflict;
60        use crate::blob::platform::{merge, DriverChoice};
61        use crate::blob::{builtin_driver, BuiltinDriver, Driver, PlatformRef, ResourceKind};
62        use bstr::{BString, ByteVec};
63        use gix_tempfile::{AutoRemove, ContainingDirectory};
64        use std::io::Write;
65        use std::ops::{Deref, DerefMut};
66        use std::path::{Path, PathBuf};
67        use std::process::Stdio;
68
69        /// The error returned by [PlatformRef::prepare_external_driver()](PlatformRef::prepare_external_driver()).
70        #[derive(Debug, thiserror::Error)]
71        #[allow(missing_docs)]
72        pub enum Error {
73            #[error("The resource of kind {kind:?} was too large to be processed")]
74            ResourceTooLarge { kind: ResourceKind },
75            #[error(
76                "Tempfile to store content of '{rela_path}' ({kind:?}) for passing to external merge command could not be created"
77            )]
78            CreateTempfile {
79                rela_path: BString,
80                kind: ResourceKind,
81                source: std::io::Error,
82            },
83            #[error(
84                "Could not write content of '{rela_path}' ({kind:?}) to tempfile for passing to external merge command"
85            )]
86            WriteTempfile {
87                rela_path: BString,
88                kind: ResourceKind,
89                source: std::io::Error,
90            },
91        }
92
93        /// Plumbing
94        impl<'parent> PlatformRef<'parent> {
95            /// Given `merge_command` and `context`, typically obtained from git-configuration, and the currently set merge-resources,
96            /// prepare the invocation and temporary files needed to launch it according to protocol.
97            /// See the documentation of [`Driver::command`] for possible substitutions.
98            ///
99            /// Please note that this is an expensive operation this will always create three temporary files to hold all sides of the merge.
100            ///
101            /// The resulting command should be spawned, and when successful, [the result file can be opened](merge::Command::open_result_file)
102            /// to read back the result into a suitable buffer.
103            ///
104            /// ### Deviation
105            ///
106            /// * We allow passing more context than Git would by taking a whole `context`,
107            ///   it's up to the caller to decide how much is filled.
108            /// * Our tempfiles aren't suffixed `.merge_file_XXXXXX` with `X` replaced with characters for uniqueness.
109            pub fn prepare_external_driver(
110                &self,
111                merge_command: BString,
112                builtin_driver::text::Labels {
113                    ancestor,
114                    current,
115                    other,
116                }: builtin_driver::text::Labels<'_>,
117                context: gix_command::Context,
118            ) -> Result<merge::Command, Error> {
119                fn write_data(
120                    data: &[u8],
121                ) -> std::io::Result<(gix_tempfile::Handle<gix_tempfile::handle::Closed>, PathBuf)> {
122                    let mut file = gix_tempfile::new(Path::new(""), ContainingDirectory::Exists, AutoRemove::Tempfile)?;
123                    file.write_all(data)?;
124                    let mut path = Default::default();
125                    file.with_mut(|f| {
126                        f.path().clone_into(&mut path);
127                    })?;
128                    let file = file.close()?;
129                    Ok((file, path))
130                }
131
132                let base = self.ancestor.data.as_slice().ok_or(Error::ResourceTooLarge {
133                    kind: ResourceKind::CommonAncestorOrBase,
134                })?;
135                let ours = self.current.data.as_slice().ok_or(Error::ResourceTooLarge {
136                    kind: ResourceKind::CurrentOrOurs,
137                })?;
138                let theirs = self.other.data.as_slice().ok_or(Error::ResourceTooLarge {
139                    kind: ResourceKind::OtherOrTheirs,
140                })?;
141
142                let (base_tmp, base_path) = write_data(base).map_err(|err| Error::CreateTempfile {
143                    rela_path: self.ancestor.rela_path.into(),
144                    kind: ResourceKind::CommonAncestorOrBase,
145                    source: err,
146                })?;
147                let (ours_tmp, ours_path) = write_data(ours).map_err(|err| Error::CreateTempfile {
148                    rela_path: self.current.rela_path.into(),
149                    kind: ResourceKind::CurrentOrOurs,
150                    source: err,
151                })?;
152                let (theirs_tmp, theirs_path) = write_data(theirs).map_err(|err| Error::CreateTempfile {
153                    rela_path: self.other.rela_path.into(),
154                    kind: ResourceKind::OtherOrTheirs,
155                    source: err,
156                })?;
157
158                let mut cmd = BString::from(Vec::with_capacity(merge_command.len()));
159                let mut count = 0;
160                for token in merge_command.split(|b| *b == b'%') {
161                    count += 1;
162                    let token = if count > 1 {
163                        match token.first() {
164                            Some(&b'O') => {
165                                cmd.push_str(gix_path::into_bstr(&base_path).as_ref());
166                                &token[1..]
167                            }
168                            Some(&b'A') => {
169                                cmd.push_str(gix_path::into_bstr(&ours_path).as_ref());
170                                &token[1..]
171                            }
172                            Some(&b'B') => {
173                                cmd.push_str(gix_path::into_bstr(&theirs_path).as_ref());
174                                &token[1..]
175                            }
176                            Some(&b'L') => {
177                                let marker_size = self
178                                    .options
179                                    .text
180                                    .conflict
181                                    .marker_size()
182                                    .unwrap_or(Conflict::DEFAULT_MARKER_SIZE);
183                                cmd.push_str(format!("{marker_size}"));
184                                &token[1..]
185                            }
186                            Some(&b'P') => {
187                                cmd.push_str(gix_quote::single(self.current.rela_path));
188                                &token[1..]
189                            }
190                            Some(&b'S') => {
191                                cmd.push_str(gix_quote::single(ancestor.unwrap_or_default()));
192                                &token[1..]
193                            }
194                            Some(&b'X') => {
195                                cmd.push_str(gix_quote::single(current.unwrap_or_default()));
196                                &token[1..]
197                            }
198                            Some(&b'Y') => {
199                                cmd.push_str(gix_quote::single(other.unwrap_or_default()));
200                                &token[1..]
201                            }
202                            Some(_other) => {
203                                cmd.push(b'%');
204                                token
205                            }
206                            None => b"%",
207                        }
208                    } else {
209                        token
210                    };
211                    cmd.extend_from_slice(token);
212                }
213
214                Ok(merge::Command {
215                    cmd: gix_command::prepare(gix_path::from_bstring(cmd))
216                        .with_context(context)
217                        .with_shell()
218                        .stdin(Stdio::null())
219                        .stdout(Stdio::inherit())
220                        .stderr(Stdio::inherit())
221                        .into(),
222                    current: ours_tmp,
223                    current_path: ours_path,
224                    ancestor: base_tmp,
225                    other: theirs_tmp,
226                })
227            }
228
229            /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err`
230            /// with the built-in driver to use instead.
231            pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> {
232                match self.driver {
233                    DriverChoice::BuiltIn(builtin) => Err(builtin),
234                    DriverChoice::Index(idx) => self.parent.drivers.get(idx).ok_or(BuiltinDriver::default()),
235                }
236            }
237        }
238
239        impl std::fmt::Debug for merge::Command {
240            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
241                self.cmd.fmt(f)
242            }
243        }
244
245        impl Deref for merge::Command {
246            type Target = std::process::Command;
247
248            fn deref(&self) -> &Self::Target {
249                &self.cmd
250            }
251        }
252
253        impl DerefMut for merge::Command {
254            fn deref_mut(&mut self) -> &mut Self::Target {
255                &mut self.cmd
256            }
257        }
258
259        impl merge::Command {
260            /// Open the file which should have been written to the location of `ours`, to yield the result of the merge operation.
261            /// Calling this makes sense only after the merge command has finished successfully.
262            pub fn open_result_file(&self) -> std::io::Result<std::fs::File> {
263                std::fs::File::open(&self.current_path)
264            }
265        }
266    }
267
268    ///
269    pub mod builtin_merge {
270        use crate::blob::platform::resource;
271        use crate::blob::platform::resource::Data;
272        use crate::blob::{builtin_driver, BuiltinDriver, PlatformRef, Resolution};
273
274        /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](PlatformRef::builtin_merge).
275        #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
276        pub enum Pick {
277            /// In a binary merge, chose the ancestor.
278            ///
279            /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
280            Ancestor,
281            /// In a binary merge, chose our side.
282            ///
283            /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
284            Ours,
285            /// In a binary merge, chose their side.
286            ///
287            /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
288            Theirs,
289            /// New data was produced with the result of the merge, to be found in the buffer that was passed to
290            /// [builtin_merge()](PlatformRef::builtin_merge).
291            /// This happens for any merge that isn't a binary merge.
292            Buffer,
293        }
294
295        /// Plumbing
296        impl<'parent> PlatformRef<'parent> {
297            /// Perform the merge using the given `driver`, possibly placing the output in `out`.
298            /// `input` can be used to keep tokens between runs, but note it will only grow in size unless cleared manually.
299            /// Use `labels` to annotate conflict sections in case of a text-merge.
300            /// Returns `None` if one of the buffers is too large, making a merge impossible.
301            /// Note that if the *pick* wasn't [`Pick::Buffer`], then `out` will not have been cleared,
302            /// and one has to take the data from the respective resource.
303            ///
304            /// If there is no buffer loaded as the resource is too big, we will automatically perform a binary merge
305            /// which effectively chooses our side by default.
306            pub fn builtin_merge(
307                &self,
308                driver: BuiltinDriver,
309                out: &mut Vec<u8>,
310                input: &mut imara_diff::intern::InternedInput<&'parent [u8]>,
311                labels: builtin_driver::text::Labels<'_>,
312            ) -> (Pick, Resolution) {
313                let base = self.ancestor.data.as_slice().unwrap_or_default();
314                let ours = self.current.data.as_slice().unwrap_or_default();
315                let theirs = self.other.data.as_slice().unwrap_or_default();
316                let driver = if driver != BuiltinDriver::Binary
317                    && (is_binary_buf(self.ancestor.data)
318                        || is_binary_buf(self.other.data)
319                        || is_binary_buf(self.current.data))
320                {
321                    BuiltinDriver::Binary
322                } else {
323                    driver
324                };
325                match driver {
326                    BuiltinDriver::Text => {
327                        let resolution =
328                            builtin_driver::text(out, input, labels, ours, base, theirs, self.options.text);
329                        (Pick::Buffer, resolution)
330                    }
331                    BuiltinDriver::Binary => {
332                        // easier to reason about the 'split' compared to merging both conditions
333                        #[allow(clippy::if_same_then_else)]
334                        if !(self.current.id.is_null() || self.other.id.is_null()) && self.current.id == self.other.id {
335                            (Pick::Ours, Resolution::Complete)
336                        } else if (self.current.id.is_null() || self.other.id.is_null()) && ours == theirs {
337                            (Pick::Ours, Resolution::Complete)
338                        } else {
339                            let (pick, resolution) = builtin_driver::binary(self.options.resolve_binary_with);
340                            let pick = match pick {
341                                builtin_driver::binary::Pick::Ours => Pick::Ours,
342                                builtin_driver::binary::Pick::Theirs => Pick::Theirs,
343                                builtin_driver::binary::Pick::Ancestor => Pick::Ancestor,
344                            };
345                            (pick, resolution)
346                        }
347                    }
348                    BuiltinDriver::Union => {
349                        let resolution = builtin_driver::text(
350                            out,
351                            input,
352                            labels,
353                            ours,
354                            base,
355                            theirs,
356                            builtin_driver::text::Options {
357                                conflict: builtin_driver::text::Conflict::ResolveWithUnion,
358                                ..self.options.text
359                            },
360                        );
361                        (Pick::Buffer, resolution)
362                    }
363                }
364            }
365        }
366
367        fn is_binary_buf(data: resource::Data<'_>) -> bool {
368            match data {
369                Data::Missing => false,
370                Data::Buffer(buf) => {
371                    let buf = &buf[..buf.len().min(8000)];
372                    buf.contains(&0)
373                }
374                Data::TooLarge { .. } => true,
375            }
376        }
377    }
378}
379
380/// Convenience
381impl<'parent> PlatformRef<'parent> {
382    /// Perform the merge, possibly invoking an external merge command, and store the result in `out`, returning `(pick, resolution)`.
383    /// Note that `pick` indicates which resource the buffer should be taken from, unless it's [`Pick::Buffer`](inner::builtin_merge::Pick::Buffer)
384    /// to indicate it's `out`.
385    /// Use `labels` to annotate conflict sections in case of a text-merge.
386    /// The merge is configured by `opts` and possible merge driver command executions are affected by `context`.
387    ///
388    /// Note that at this stage, none-existing input data will simply default to an empty buffer when running the actual merge algorithm.
389    /// Too-large resources will result in an error.
390    ///
391    /// Generally, it is assumed that standard logic, like deletions of files, is handled before any of this is called, so we are lenient
392    /// in terms of buffer handling to make it more useful in the face of missing local files.
393    pub fn merge(
394        &self,
395        out: &mut Vec<u8>,
396        labels: builtin_driver::text::Labels<'_>,
397        context: &gix_command::Context,
398    ) -> Result<(inner::builtin_merge::Pick, Resolution), Error> {
399        match self.configured_driver() {
400            Ok(driver) => {
401                let mut cmd = self.prepare_external_driver(driver.command.clone(), labels, context.clone())?;
402                let status = cmd.status().map_err(|err| Error::SpawnExternalDriver {
403                    cmd: format!("{:?}", cmd.cmd),
404                    source: err,
405                })?;
406                if !status.success() {
407                    return Err(Error::ExternalDriverFailure {
408                        cmd: format!("{:?}", cmd.cmd),
409                        status,
410                    });
411                }
412                out.clear();
413                cmd.open_result_file()?.read_to_end(out)?;
414                Ok((inner::builtin_merge::Pick::Buffer, Resolution::Complete))
415            }
416            Err(builtin) => {
417                let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]);
418                out.clear();
419                let (pick, resolution) = self.builtin_merge(builtin, out, &mut input, labels);
420                Ok((pick, resolution))
421            }
422        }
423    }
424
425    /// Using a `pick` obtained from [`merge()`](Self::merge), obtain the respective buffer suitable for reading or copying.
426    /// Return `Ok(None)`  if the `pick` corresponds to a buffer (that was written separately).
427    /// Return `Err(())` if the buffer is *too large*, so it was never read.
428    #[allow(clippy::result_unit_err)]
429    pub fn buffer_by_pick(&self, pick: inner::builtin_merge::Pick) -> Result<Option<&'parent [u8]>, ()> {
430        match pick {
431            inner::builtin_merge::Pick::Ancestor => self.ancestor.data.as_slice().map(Some).ok_or(()),
432            inner::builtin_merge::Pick::Ours => self.current.data.as_slice().map(Some).ok_or(()),
433            inner::builtin_merge::Pick::Theirs => self.other.data.as_slice().map(Some).ok_or(()),
434            inner::builtin_merge::Pick::Buffer => Ok(None),
435        }
436    }
437
438    /// Use `pick` to return the object id of the merged result, assuming that `buf` was passed as `out` to [merge()](Self::merge).
439    /// In case of binary or large files, this will simply be the existing ID of the resource.
440    /// In case of resources available in the object DB for binary merges, the object ID will be returned.
441    /// If new content was produced due to a content merge, `buf` will be written out
442    /// to the object database using `write_blob`.
443    /// Beware that the returned ID could be `Ok(None)` if the underlying resource was loaded
444    /// from the worktree *and* was too large so it was never loaded from disk.
445    /// `Ok(None)` will also be returned if one of the resources was missing.
446    /// `write_blob()` is used to turn buffers.
447    pub fn id_by_pick<E>(
448        &self,
449        pick: inner::builtin_merge::Pick,
450        buf: &[u8],
451        mut write_blob: impl FnMut(&[u8]) -> Result<gix_hash::ObjectId, E>,
452    ) -> Result<Option<gix_hash::ObjectId>, E> {
453        let field = match pick {
454            inner::builtin_merge::Pick::Ancestor => &self.ancestor,
455            inner::builtin_merge::Pick::Ours => &self.current,
456            inner::builtin_merge::Pick::Theirs => &self.other,
457            inner::builtin_merge::Pick::Buffer => return write_blob(buf).map(Some),
458        };
459        use crate::blob::platform::resource::Data;
460        match field.data {
461            Data::TooLarge { .. } | Data::Missing if !field.id.is_null() => Ok(Some(field.id.to_owned())),
462            Data::TooLarge { .. } | Data::Missing => Ok(None),
463            Data::Buffer(buf) if field.id.is_null() => write_blob(buf).map(Some),
464            Data::Buffer(_) => Ok(Some(field.id.to_owned())),
465        }
466    }
467}