gix_merge/blob/platform/merge.rs
1use std::{io::Read, path::PathBuf};
2
3use crate::blob::{builtin_driver, PlatformRef, Resolution};
4
5/// Options for the use in the [`PlatformRef::merge()`] call.
6#[derive(Default, Copy, Clone, Debug, Eq, PartialEq)]
7pub struct Options {
8 /// If `true`, the resources being merged are contained in a virtual ancestor,
9 /// which is the case when merge bases are merged into one.
10 /// This flag affects the choice of merge drivers.
11 pub is_virtual_ancestor: bool,
12 /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible, and it picks a side.
13 pub resolve_binary_with: Option<builtin_driver::binary::ResolveWith>,
14 /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text).
15 pub text: builtin_driver::text::Options,
16}
17
18/// The error returned by [`PlatformRef::merge()`].
19#[derive(Debug, thiserror::Error)]
20#[allow(missing_docs)]
21pub enum Error {
22 #[error(transparent)]
23 PrepareExternalDriver(#[from] inner::prepare_external_driver::Error),
24 #[error("Failed to launch external merge driver: {cmd}")]
25 SpawnExternalDriver { cmd: String, source: std::io::Error },
26 #[error("External merge driver failed with non-zero exit status {status:?}: {cmd}")]
27 ExternalDriverFailure {
28 status: std::process::ExitStatus,
29 cmd: String,
30 },
31 #[error("IO failed when dealing with merge-driver output")]
32 ExternalDriverIO(#[from] std::io::Error),
33}
34
35/// The product of a [`PlatformRef::prepare_external_driver()`] operation.
36///
37/// This type allows to creation of [`std::process::Command`], ready to run, with `stderr` and `stdout` set to *inherit*,
38/// but `stdin` closed.
39/// It's expected to leave its result in the file substituted at `current` which is then supposed to be read back from there.
40// TODO: remove dead-code annotation
41#[allow(dead_code)]
42pub struct Command {
43 /// The pre-configured command
44 cmd: std::process::Command,
45 /// A tempfile holding the *current* (ours) state of the resource.
46 current: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
47 /// The path at which `current` is located, for reading the result back from later.
48 current_path: PathBuf,
49 /// A tempfile holding the *ancestor* (base) state of the resource.
50 ancestor: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
51 /// A tempfile holding the *other* (their) state of the resource.
52 other: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
53}
54
55// Just to keep things here but move them a level up later.
56pub(super) mod inner {
57 ///
58 pub mod prepare_external_driver {
59 use std::{
60 io::Write,
61 ops::{Deref, DerefMut},
62 path::{Path, PathBuf},
63 process::Stdio,
64 };
65
66 use bstr::{BString, ByteVec};
67 use gix_tempfile::{AutoRemove, ContainingDirectory};
68
69 use crate::blob::{
70 builtin_driver,
71 builtin_driver::text::Conflict,
72 platform::{merge, DriverChoice},
73 BuiltinDriver, Driver, PlatformRef, ResourceKind,
74 };
75
76 /// The error returned by [PlatformRef::prepare_external_driver()](PlatformRef::prepare_external_driver()).
77 #[derive(Debug, thiserror::Error)]
78 #[allow(missing_docs)]
79 pub enum Error {
80 #[error("The resource of kind {kind:?} was too large to be processed")]
81 ResourceTooLarge { kind: ResourceKind },
82 #[error(
83 "Tempfile to store content of '{rela_path}' ({kind:?}) for passing to external merge command could not be created"
84 )]
85 CreateTempfile {
86 rela_path: BString,
87 kind: ResourceKind,
88 source: std::io::Error,
89 },
90 #[error(
91 "Could not write content of '{rela_path}' ({kind:?}) to tempfile for passing to external merge command"
92 )]
93 WriteTempfile {
94 rela_path: BString,
95 kind: ResourceKind,
96 source: std::io::Error,
97 },
98 }
99
100 /// Plumbing
101 impl<'parent> PlatformRef<'parent> {
102 /// Given `merge_command` and `context`, typically obtained from git-configuration, and the currently set merge-resources,
103 /// prepare the invocation and temporary files needed to launch it according to protocol.
104 /// See the documentation of [`Driver::command`] for possible substitutions.
105 ///
106 /// Please note that this is an expensive operation this will always create three temporary files to hold all sides of the merge.
107 ///
108 /// The resulting command should be spawned, and when successful, [the result file can be opened](merge::Command::open_result_file)
109 /// to read back the result into a suitable buffer.
110 ///
111 /// ### Deviation
112 ///
113 /// * We allow passing more context than Git would by taking a whole `context`,
114 /// it's up to the caller to decide how much is filled.
115 /// * Our tempfiles aren't suffixed `.merge_file_XXXXXX` with `X` replaced with characters for uniqueness.
116 pub fn prepare_external_driver(
117 &self,
118 merge_command: BString,
119 builtin_driver::text::Labels {
120 ancestor,
121 current,
122 other,
123 }: builtin_driver::text::Labels<'_>,
124 context: gix_command::Context,
125 ) -> Result<merge::Command, Error> {
126 fn write_data(
127 data: &[u8],
128 directory: &Path,
129 ) -> std::io::Result<(gix_tempfile::Handle<gix_tempfile::handle::Closed>, PathBuf)> {
130 let mut file = gix_tempfile::new(directory, ContainingDirectory::Exists, AutoRemove::Tempfile)?;
131 file.write_all(data)?;
132 let mut path = Default::default();
133 file.with_mut(|f| {
134 f.path().clone_into(&mut path);
135 })?;
136 let file = file.close()?;
137 Ok((file, path))
138 }
139
140 let base = self.ancestor.data.as_slice().ok_or(Error::ResourceTooLarge {
141 kind: ResourceKind::CommonAncestorOrBase,
142 })?;
143 let ours = self.current.data.as_slice().ok_or(Error::ResourceTooLarge {
144 kind: ResourceKind::CurrentOrOurs,
145 })?;
146 let theirs = self.other.data.as_slice().ok_or(Error::ResourceTooLarge {
147 kind: ResourceKind::OtherOrTheirs,
148 })?;
149
150 let tmp_dir = context
151 .worktree_dir
152 .as_deref()
153 .or(context.git_dir.as_deref())
154 .unwrap_or(Path::new(""));
155 let (base_tmp, base_path) = write_data(base, tmp_dir).map_err(|err| Error::CreateTempfile {
156 rela_path: self.ancestor.rela_path.into(),
157 kind: ResourceKind::CommonAncestorOrBase,
158 source: err,
159 })?;
160 let (ours_tmp, ours_path) = write_data(ours, tmp_dir).map_err(|err| Error::CreateTempfile {
161 rela_path: self.current.rela_path.into(),
162 kind: ResourceKind::CurrentOrOurs,
163 source: err,
164 })?;
165 let (theirs_tmp, theirs_path) = write_data(theirs, tmp_dir).map_err(|err| Error::CreateTempfile {
166 rela_path: self.other.rela_path.into(),
167 kind: ResourceKind::OtherOrTheirs,
168 source: err,
169 })?;
170
171 let mut cmd = BString::from(Vec::with_capacity(merge_command.len()));
172 let mut count = 0;
173 for token in merge_command.split(|b| *b == b'%') {
174 count += 1;
175 let token = if count > 1 {
176 match token.first() {
177 Some(&b'O') => {
178 cmd.push_str(gix_path::into_bstr(&base_path).as_ref());
179 &token[1..]
180 }
181 Some(&b'A') => {
182 cmd.push_str(gix_path::into_bstr(&ours_path).as_ref());
183 &token[1..]
184 }
185 Some(&b'B') => {
186 cmd.push_str(gix_path::into_bstr(&theirs_path).as_ref());
187 &token[1..]
188 }
189 Some(&b'L') => {
190 let marker_size = self
191 .options
192 .text
193 .conflict
194 .marker_size()
195 .unwrap_or(Conflict::DEFAULT_MARKER_SIZE);
196 cmd.push_str(format!("{marker_size}"));
197 &token[1..]
198 }
199 Some(&b'P') => {
200 cmd.push_str(gix_quote::single(self.current.rela_path));
201 &token[1..]
202 }
203 Some(&b'S') => {
204 cmd.push_str(gix_quote::single(ancestor.unwrap_or_default()));
205 &token[1..]
206 }
207 Some(&b'X') => {
208 cmd.push_str(gix_quote::single(current.unwrap_or_default()));
209 &token[1..]
210 }
211 Some(&b'Y') => {
212 cmd.push_str(gix_quote::single(other.unwrap_or_default()));
213 &token[1..]
214 }
215 Some(_other) => {
216 cmd.push(b'%');
217 token
218 }
219 None => b"%",
220 }
221 } else {
222 token
223 };
224 cmd.extend_from_slice(token);
225 }
226
227 Ok(merge::Command {
228 cmd: gix_command::prepare(gix_path::from_bstring(cmd))
229 .with_context(context)
230 .command_may_be_shell_script()
231 .stdin(Stdio::null())
232 .stdout(Stdio::inherit())
233 .stderr(Stdio::inherit())
234 .into(),
235 current: ours_tmp,
236 current_path: ours_path,
237 ancestor: base_tmp,
238 other: theirs_tmp,
239 })
240 }
241
242 /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err`
243 /// with the built-in driver to use instead.
244 pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> {
245 match self.driver {
246 DriverChoice::BuiltIn(builtin) => Err(builtin),
247 DriverChoice::Index(idx) => self.parent.drivers.get(idx).ok_or(BuiltinDriver::default()),
248 }
249 }
250 }
251
252 impl std::fmt::Debug for merge::Command {
253 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
254 self.cmd.fmt(f)
255 }
256 }
257
258 impl Deref for merge::Command {
259 type Target = std::process::Command;
260
261 fn deref(&self) -> &Self::Target {
262 &self.cmd
263 }
264 }
265
266 impl DerefMut for merge::Command {
267 fn deref_mut(&mut self) -> &mut Self::Target {
268 &mut self.cmd
269 }
270 }
271
272 impl merge::Command {
273 /// Open the file which should have been written to the location of `ours`, to yield the result of the merge operation.
274 /// Calling this makes sense only after the merge command has finished successfully.
275 pub fn open_result_file(&self) -> std::io::Result<std::fs::File> {
276 std::fs::File::open(&self.current_path)
277 }
278 }
279 }
280
281 ///
282 pub mod builtin_merge {
283 use crate::blob::{
284 builtin_driver,
285 platform::{resource, resource::Data},
286 BuiltinDriver, PlatformRef, Resolution,
287 };
288
289 /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](PlatformRef::builtin_merge).
290 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
291 pub enum Pick {
292 /// In a binary merge, chose the ancestor.
293 ///
294 /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
295 Ancestor,
296 /// In a binary merge, chose our side.
297 ///
298 /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
299 Ours,
300 /// In a binary merge, chose their side.
301 ///
302 /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
303 Theirs,
304 /// New data was produced with the result of the merge, to be found in the buffer that was passed to
305 /// [builtin_merge()](PlatformRef::builtin_merge).
306 /// This happens for any merge that isn't a binary merge.
307 Buffer,
308 }
309
310 /// Plumbing
311 impl<'parent> PlatformRef<'parent> {
312 /// Perform the merge using the given `driver`, possibly placing the output in `out`.
313 /// `input` can be used to keep tokens between runs, but note it will only grow in size unless cleared manually.
314 /// Use `labels` to annotate conflict sections in case of a text-merge.
315 /// Returns `None` if one of the buffers is too large, making a merge impossible.
316 /// Note that if the *pick* wasn't [`Pick::Buffer`], then `out` will not have been cleared,
317 /// and one has to take the data from the respective resource.
318 ///
319 /// If there is no buffer loaded as the resource is too big, we will automatically perform a binary merge
320 /// which effectively chooses our side by default.
321 pub fn builtin_merge(
322 &self,
323 driver: BuiltinDriver,
324 out: &mut Vec<u8>,
325 input: &mut imara_diff::intern::InternedInput<&'parent [u8]>,
326 labels: builtin_driver::text::Labels<'_>,
327 ) -> (Pick, Resolution) {
328 let base = self.ancestor.data.as_slice().unwrap_or_default();
329 let ours = self.current.data.as_slice().unwrap_or_default();
330 let theirs = self.other.data.as_slice().unwrap_or_default();
331 let driver = if driver != BuiltinDriver::Binary
332 && (is_binary_buf(self.ancestor.data)
333 || is_binary_buf(self.other.data)
334 || is_binary_buf(self.current.data))
335 {
336 BuiltinDriver::Binary
337 } else {
338 driver
339 };
340 match driver {
341 BuiltinDriver::Text => {
342 let resolution =
343 builtin_driver::text(out, input, labels, ours, base, theirs, self.options.text);
344 (Pick::Buffer, resolution)
345 }
346 BuiltinDriver::Binary => {
347 // easier to reason about the 'split' compared to merging both conditions
348 #[allow(clippy::if_same_then_else)]
349 if !(self.current.id.is_null() || self.other.id.is_null()) && self.current.id == self.other.id {
350 (Pick::Ours, Resolution::Complete)
351 } else if (self.current.id.is_null() || self.other.id.is_null()) && ours == theirs {
352 (Pick::Ours, Resolution::Complete)
353 } else {
354 let (pick, resolution) = builtin_driver::binary(self.options.resolve_binary_with);
355 let pick = match pick {
356 builtin_driver::binary::Pick::Ours => Pick::Ours,
357 builtin_driver::binary::Pick::Theirs => Pick::Theirs,
358 builtin_driver::binary::Pick::Ancestor => Pick::Ancestor,
359 };
360 (pick, resolution)
361 }
362 }
363 BuiltinDriver::Union => {
364 let resolution = builtin_driver::text(
365 out,
366 input,
367 labels,
368 ours,
369 base,
370 theirs,
371 builtin_driver::text::Options {
372 conflict: builtin_driver::text::Conflict::ResolveWithUnion,
373 ..self.options.text
374 },
375 );
376 (Pick::Buffer, resolution)
377 }
378 }
379 }
380 }
381
382 fn is_binary_buf(data: resource::Data<'_>) -> bool {
383 match data {
384 Data::Missing => false,
385 Data::Buffer(buf) => {
386 let buf = &buf[..buf.len().min(8000)];
387 buf.contains(&0)
388 }
389 Data::TooLarge { .. } => true,
390 }
391 }
392 }
393}
394
395/// Convenience
396impl<'parent> PlatformRef<'parent> {
397 /// Perform the merge, possibly invoking an external merge command, and store the result in `out`, returning `(pick, resolution)`.
398 /// Note that `pick` indicates which resource the buffer should be taken from, unless it's [`Pick::Buffer`](inner::builtin_merge::Pick::Buffer)
399 /// to indicate it's `out`.
400 /// Use `labels` to annotate conflict sections in case of a text-merge.
401 /// The merge is configured by `opts` and possible merge driver command executions are affected by `context`.
402 ///
403 /// Note that at this stage, none-existing input data will simply default to an empty buffer when running the actual merge algorithm.
404 /// Too-large resources will result in an error.
405 ///
406 /// Generally, it is assumed that standard logic, like deletions of files, is handled before any of this is called, so we are lenient
407 /// in terms of buffer handling to make it more useful in the face of missing local files.
408 pub fn merge(
409 &self,
410 out: &mut Vec<u8>,
411 labels: builtin_driver::text::Labels<'_>,
412 context: &gix_command::Context,
413 ) -> Result<(inner::builtin_merge::Pick, Resolution), Error> {
414 match self.configured_driver() {
415 Ok(driver) => {
416 let mut cmd = self.prepare_external_driver(driver.command.clone(), labels, context.clone())?;
417 let status = cmd.status().map_err(|err| Error::SpawnExternalDriver {
418 cmd: format!("{:?}", cmd.cmd),
419 source: err,
420 })?;
421 if !status.success() {
422 return Err(Error::ExternalDriverFailure {
423 cmd: format!("{:?}", cmd.cmd),
424 status,
425 });
426 }
427 out.clear();
428 cmd.open_result_file()?.read_to_end(out)?;
429 Ok((inner::builtin_merge::Pick::Buffer, Resolution::Complete))
430 }
431 Err(builtin) => {
432 let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]);
433 out.clear();
434 let (pick, resolution) = self.builtin_merge(builtin, out, &mut input, labels);
435 Ok((pick, resolution))
436 }
437 }
438 }
439
440 /// Using a `pick` obtained from [`merge()`](Self::merge), obtain the respective buffer suitable for reading or copying.
441 /// Return `Ok(None)` if the `pick` corresponds to a buffer (that was written separately).
442 /// Return `Err(())` if the buffer is *too large*, so it was never read.
443 #[allow(clippy::result_unit_err)]
444 pub fn buffer_by_pick(&self, pick: inner::builtin_merge::Pick) -> Result<Option<&'parent [u8]>, ()> {
445 match pick {
446 inner::builtin_merge::Pick::Ancestor => self.ancestor.data.as_slice().map(Some).ok_or(()),
447 inner::builtin_merge::Pick::Ours => self.current.data.as_slice().map(Some).ok_or(()),
448 inner::builtin_merge::Pick::Theirs => self.other.data.as_slice().map(Some).ok_or(()),
449 inner::builtin_merge::Pick::Buffer => Ok(None),
450 }
451 }
452
453 /// Use `pick` to return the object id of the merged result, assuming that `buf` was passed as `out` to [merge()](Self::merge).
454 /// In case of binary or large files, this will simply be the existing ID of the resource.
455 /// In case of resources available in the object DB for binary merges, the object ID will be returned.
456 /// If new content was produced due to a content merge, `buf` will be written out
457 /// to the object database using `write_blob`.
458 /// Beware that the returned ID could be `Ok(None)` if the underlying resource was loaded
459 /// from the worktree *and* was too large so it was never loaded from disk.
460 /// `Ok(None)` will also be returned if one of the resources was missing.
461 /// `write_blob()` is used to turn buffers.
462 pub fn id_by_pick<E>(
463 &self,
464 pick: inner::builtin_merge::Pick,
465 buf: &[u8],
466 mut write_blob: impl FnMut(&[u8]) -> Result<gix_hash::ObjectId, E>,
467 ) -> Result<Option<gix_hash::ObjectId>, E> {
468 let field = match pick {
469 inner::builtin_merge::Pick::Ancestor => &self.ancestor,
470 inner::builtin_merge::Pick::Ours => &self.current,
471 inner::builtin_merge::Pick::Theirs => &self.other,
472 inner::builtin_merge::Pick::Buffer => return write_blob(buf).map(Some),
473 };
474 use crate::blob::platform::resource::Data;
475 match field.data {
476 Data::TooLarge { .. } | Data::Missing if !field.id.is_null() => Ok(Some(field.id.to_owned())),
477 Data::TooLarge { .. } | Data::Missing => Ok(None),
478 Data::Buffer(buf) if field.id.is_null() => write_blob(buf).map(Some),
479 Data::Buffer(_) => Ok(Some(field.id.to_owned())),
480 }
481 }
482}