gix_merge/blob/platform/merge.rs
1use crate::blob::{builtin_driver, PlatformRef, Resolution};
2use std::io::Read;
3use std::path::PathBuf;
4
5/// Options for the use in the [`PlatformRef::merge()`] call.
6#[derive(Default, Copy, Clone, Debug, Eq, PartialEq)]
7pub struct Options {
8 /// If `true`, the resources being merged are contained in a virtual ancestor,
9 /// which is the case when merge bases are merged into one.
10 /// This flag affects the choice of merge drivers.
11 pub is_virtual_ancestor: bool,
12 /// Determine how to resolve conflicts. If `None`, no conflict resolution is possible, and it picks a side.
13 pub resolve_binary_with: Option<builtin_driver::binary::ResolveWith>,
14 /// Options for the builtin [text driver](crate::blob::BuiltinDriver::Text).
15 pub text: builtin_driver::text::Options,
16}
17
18/// The error returned by [`PlatformRef::merge()`].
19#[derive(Debug, thiserror::Error)]
20#[allow(missing_docs)]
21pub enum Error {
22 #[error(transparent)]
23 PrepareExternalDriver(#[from] inner::prepare_external_driver::Error),
24 #[error("Failed to launch external merge driver: {cmd}")]
25 SpawnExternalDriver { cmd: String, source: std::io::Error },
26 #[error("External merge driver failed with non-zero exit status {status:?}: {cmd}")]
27 ExternalDriverFailure {
28 status: std::process::ExitStatus,
29 cmd: String,
30 },
31 #[error("IO failed when dealing with merge-driver output")]
32 ExternalDriverIO(#[from] std::io::Error),
33}
34
35/// The product of a [`PlatformRef::prepare_external_driver()`] operation.
36///
37/// This type allows to creation of [`std::process::Command`], ready to run, with `stderr` and `stdout` set to *inherit*,
38/// but `stdin` closed.
39/// It's expected to leave its result in the file substituted at `current` which is then supposed to be read back from there.
40// TODO: remove dead-code annotation
41#[allow(dead_code)]
42pub struct Command {
43 /// The pre-configured command
44 cmd: std::process::Command,
45 /// A tempfile holding the *current* (ours) state of the resource.
46 current: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
47 /// The path at which `current` is located, for reading the result back from later.
48 current_path: PathBuf,
49 /// A tempfile holding the *ancestor* (base) state of the resource.
50 ancestor: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
51 /// A tempfile holding the *other* (their) state of the resource.
52 other: gix_tempfile::Handle<gix_tempfile::handle::Closed>,
53}
54
55// Just to keep things here but move them a level up later.
56pub(super) mod inner {
57 ///
58 pub mod prepare_external_driver {
59 use crate::blob::builtin_driver::text::Conflict;
60 use crate::blob::platform::{merge, DriverChoice};
61 use crate::blob::{builtin_driver, BuiltinDriver, Driver, PlatformRef, ResourceKind};
62 use bstr::{BString, ByteVec};
63 use gix_tempfile::{AutoRemove, ContainingDirectory};
64 use std::io::Write;
65 use std::ops::{Deref, DerefMut};
66 use std::path::{Path, PathBuf};
67 use std::process::Stdio;
68
69 /// The error returned by [PlatformRef::prepare_external_driver()](PlatformRef::prepare_external_driver()).
70 #[derive(Debug, thiserror::Error)]
71 #[allow(missing_docs)]
72 pub enum Error {
73 #[error("The resource of kind {kind:?} was too large to be processed")]
74 ResourceTooLarge { kind: ResourceKind },
75 #[error(
76 "Tempfile to store content of '{rela_path}' ({kind:?}) for passing to external merge command could not be created"
77 )]
78 CreateTempfile {
79 rela_path: BString,
80 kind: ResourceKind,
81 source: std::io::Error,
82 },
83 #[error(
84 "Could not write content of '{rela_path}' ({kind:?}) to tempfile for passing to external merge command"
85 )]
86 WriteTempfile {
87 rela_path: BString,
88 kind: ResourceKind,
89 source: std::io::Error,
90 },
91 }
92
93 /// Plumbing
94 impl<'parent> PlatformRef<'parent> {
95 /// Given `merge_command` and `context`, typically obtained from git-configuration, and the currently set merge-resources,
96 /// prepare the invocation and temporary files needed to launch it according to protocol.
97 /// See the documentation of [`Driver::command`] for possible substitutions.
98 ///
99 /// Please note that this is an expensive operation this will always create three temporary files to hold all sides of the merge.
100 ///
101 /// The resulting command should be spawned, and when successful, [the result file can be opened](merge::Command::open_result_file)
102 /// to read back the result into a suitable buffer.
103 ///
104 /// ### Deviation
105 ///
106 /// * We allow passing more context than Git would by taking a whole `context`,
107 /// it's up to the caller to decide how much is filled.
108 /// * Our tempfiles aren't suffixed `.merge_file_XXXXXX` with `X` replaced with characters for uniqueness.
109 pub fn prepare_external_driver(
110 &self,
111 merge_command: BString,
112 builtin_driver::text::Labels {
113 ancestor,
114 current,
115 other,
116 }: builtin_driver::text::Labels<'_>,
117 context: gix_command::Context,
118 ) -> Result<merge::Command, Error> {
119 fn write_data(
120 data: &[u8],
121 ) -> std::io::Result<(gix_tempfile::Handle<gix_tempfile::handle::Closed>, PathBuf)> {
122 let mut file = gix_tempfile::new(Path::new(""), ContainingDirectory::Exists, AutoRemove::Tempfile)?;
123 file.write_all(data)?;
124 let mut path = Default::default();
125 file.with_mut(|f| {
126 f.path().clone_into(&mut path);
127 })?;
128 let file = file.close()?;
129 Ok((file, path))
130 }
131
132 let base = self.ancestor.data.as_slice().ok_or(Error::ResourceTooLarge {
133 kind: ResourceKind::CommonAncestorOrBase,
134 })?;
135 let ours = self.current.data.as_slice().ok_or(Error::ResourceTooLarge {
136 kind: ResourceKind::CurrentOrOurs,
137 })?;
138 let theirs = self.other.data.as_slice().ok_or(Error::ResourceTooLarge {
139 kind: ResourceKind::OtherOrTheirs,
140 })?;
141
142 let (base_tmp, base_path) = write_data(base).map_err(|err| Error::CreateTempfile {
143 rela_path: self.ancestor.rela_path.into(),
144 kind: ResourceKind::CommonAncestorOrBase,
145 source: err,
146 })?;
147 let (ours_tmp, ours_path) = write_data(ours).map_err(|err| Error::CreateTempfile {
148 rela_path: self.current.rela_path.into(),
149 kind: ResourceKind::CurrentOrOurs,
150 source: err,
151 })?;
152 let (theirs_tmp, theirs_path) = write_data(theirs).map_err(|err| Error::CreateTempfile {
153 rela_path: self.other.rela_path.into(),
154 kind: ResourceKind::OtherOrTheirs,
155 source: err,
156 })?;
157
158 let mut cmd = BString::from(Vec::with_capacity(merge_command.len()));
159 let mut count = 0;
160 for token in merge_command.split(|b| *b == b'%') {
161 count += 1;
162 let token = if count > 1 {
163 match token.first() {
164 Some(&b'O') => {
165 cmd.push_str(gix_path::into_bstr(&base_path).as_ref());
166 &token[1..]
167 }
168 Some(&b'A') => {
169 cmd.push_str(gix_path::into_bstr(&ours_path).as_ref());
170 &token[1..]
171 }
172 Some(&b'B') => {
173 cmd.push_str(gix_path::into_bstr(&theirs_path).as_ref());
174 &token[1..]
175 }
176 Some(&b'L') => {
177 let marker_size = self
178 .options
179 .text
180 .conflict
181 .marker_size()
182 .unwrap_or(Conflict::DEFAULT_MARKER_SIZE);
183 cmd.push_str(format!("{marker_size}"));
184 &token[1..]
185 }
186 Some(&b'P') => {
187 cmd.push_str(gix_quote::single(self.current.rela_path));
188 &token[1..]
189 }
190 Some(&b'S') => {
191 cmd.push_str(gix_quote::single(ancestor.unwrap_or_default()));
192 &token[1..]
193 }
194 Some(&b'X') => {
195 cmd.push_str(gix_quote::single(current.unwrap_or_default()));
196 &token[1..]
197 }
198 Some(&b'Y') => {
199 cmd.push_str(gix_quote::single(other.unwrap_or_default()));
200 &token[1..]
201 }
202 Some(_other) => {
203 cmd.push(b'%');
204 token
205 }
206 None => b"%",
207 }
208 } else {
209 token
210 };
211 cmd.extend_from_slice(token);
212 }
213
214 Ok(merge::Command {
215 cmd: gix_command::prepare(gix_path::from_bstring(cmd))
216 .with_context(context)
217 .with_shell()
218 .stdin(Stdio::null())
219 .stdout(Stdio::inherit())
220 .stderr(Stdio::inherit())
221 .into(),
222 current: ours_tmp,
223 current_path: ours_path,
224 ancestor: base_tmp,
225 other: theirs_tmp,
226 })
227 }
228
229 /// Return the configured driver program for use with [`Self::prepare_external_driver()`], or `Err`
230 /// with the built-in driver to use instead.
231 pub fn configured_driver(&self) -> Result<&'parent Driver, BuiltinDriver> {
232 match self.driver {
233 DriverChoice::BuiltIn(builtin) => Err(builtin),
234 DriverChoice::Index(idx) => self.parent.drivers.get(idx).ok_or(BuiltinDriver::default()),
235 }
236 }
237 }
238
239 impl std::fmt::Debug for merge::Command {
240 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
241 self.cmd.fmt(f)
242 }
243 }
244
245 impl Deref for merge::Command {
246 type Target = std::process::Command;
247
248 fn deref(&self) -> &Self::Target {
249 &self.cmd
250 }
251 }
252
253 impl DerefMut for merge::Command {
254 fn deref_mut(&mut self) -> &mut Self::Target {
255 &mut self.cmd
256 }
257 }
258
259 impl merge::Command {
260 /// Open the file which should have been written to the location of `ours`, to yield the result of the merge operation.
261 /// Calling this makes sense only after the merge command has finished successfully.
262 pub fn open_result_file(&self) -> std::io::Result<std::fs::File> {
263 std::fs::File::open(&self.current_path)
264 }
265 }
266 }
267
268 ///
269 pub mod builtin_merge {
270 use crate::blob::platform::resource;
271 use crate::blob::platform::resource::Data;
272 use crate::blob::{builtin_driver, BuiltinDriver, PlatformRef, Resolution};
273
274 /// An identifier to tell us how a merge conflict was resolved by [builtin_merge](PlatformRef::builtin_merge).
275 #[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
276 pub enum Pick {
277 /// In a binary merge, chose the ancestor.
278 ///
279 /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
280 Ancestor,
281 /// In a binary merge, chose our side.
282 ///
283 /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
284 Ours,
285 /// In a binary merge, chose their side.
286 ///
287 /// Use [`PlatformRef::buffer_by_pick()`] to retrieve it.
288 Theirs,
289 /// New data was produced with the result of the merge, to be found in the buffer that was passed to
290 /// [builtin_merge()](PlatformRef::builtin_merge).
291 /// This happens for any merge that isn't a binary merge.
292 Buffer,
293 }
294
295 /// Plumbing
296 impl<'parent> PlatformRef<'parent> {
297 /// Perform the merge using the given `driver`, possibly placing the output in `out`.
298 /// `input` can be used to keep tokens between runs, but note it will only grow in size unless cleared manually.
299 /// Use `labels` to annotate conflict sections in case of a text-merge.
300 /// Returns `None` if one of the buffers is too large, making a merge impossible.
301 /// Note that if the *pick* wasn't [`Pick::Buffer`], then `out` will not have been cleared,
302 /// and one has to take the data from the respective resource.
303 ///
304 /// If there is no buffer loaded as the resource is too big, we will automatically perform a binary merge
305 /// which effectively chooses our side by default.
306 pub fn builtin_merge(
307 &self,
308 driver: BuiltinDriver,
309 out: &mut Vec<u8>,
310 input: &mut imara_diff::intern::InternedInput<&'parent [u8]>,
311 labels: builtin_driver::text::Labels<'_>,
312 ) -> (Pick, Resolution) {
313 let base = self.ancestor.data.as_slice().unwrap_or_default();
314 let ours = self.current.data.as_slice().unwrap_or_default();
315 let theirs = self.other.data.as_slice().unwrap_or_default();
316 let driver = if driver != BuiltinDriver::Binary
317 && (is_binary_buf(self.ancestor.data)
318 || is_binary_buf(self.other.data)
319 || is_binary_buf(self.current.data))
320 {
321 BuiltinDriver::Binary
322 } else {
323 driver
324 };
325 match driver {
326 BuiltinDriver::Text => {
327 let resolution =
328 builtin_driver::text(out, input, labels, ours, base, theirs, self.options.text);
329 (Pick::Buffer, resolution)
330 }
331 BuiltinDriver::Binary => {
332 // easier to reason about the 'split' compared to merging both conditions
333 #[allow(clippy::if_same_then_else)]
334 if !(self.current.id.is_null() || self.other.id.is_null()) && self.current.id == self.other.id {
335 (Pick::Ours, Resolution::Complete)
336 } else if (self.current.id.is_null() || self.other.id.is_null()) && ours == theirs {
337 (Pick::Ours, Resolution::Complete)
338 } else {
339 let (pick, resolution) = builtin_driver::binary(self.options.resolve_binary_with);
340 let pick = match pick {
341 builtin_driver::binary::Pick::Ours => Pick::Ours,
342 builtin_driver::binary::Pick::Theirs => Pick::Theirs,
343 builtin_driver::binary::Pick::Ancestor => Pick::Ancestor,
344 };
345 (pick, resolution)
346 }
347 }
348 BuiltinDriver::Union => {
349 let resolution = builtin_driver::text(
350 out,
351 input,
352 labels,
353 ours,
354 base,
355 theirs,
356 builtin_driver::text::Options {
357 conflict: builtin_driver::text::Conflict::ResolveWithUnion,
358 ..self.options.text
359 },
360 );
361 (Pick::Buffer, resolution)
362 }
363 }
364 }
365 }
366
367 fn is_binary_buf(data: resource::Data<'_>) -> bool {
368 match data {
369 Data::Missing => false,
370 Data::Buffer(buf) => {
371 let buf = &buf[..buf.len().min(8000)];
372 buf.contains(&0)
373 }
374 Data::TooLarge { .. } => true,
375 }
376 }
377 }
378}
379
380/// Convenience
381impl<'parent> PlatformRef<'parent> {
382 /// Perform the merge, possibly invoking an external merge command, and store the result in `out`, returning `(pick, resolution)`.
383 /// Note that `pick` indicates which resource the buffer should be taken from, unless it's [`Pick::Buffer`](inner::builtin_merge::Pick::Buffer)
384 /// to indicate it's `out`.
385 /// Use `labels` to annotate conflict sections in case of a text-merge.
386 /// The merge is configured by `opts` and possible merge driver command executions are affected by `context`.
387 ///
388 /// Note that at this stage, none-existing input data will simply default to an empty buffer when running the actual merge algorithm.
389 /// Too-large resources will result in an error.
390 ///
391 /// Generally, it is assumed that standard logic, like deletions of files, is handled before any of this is called, so we are lenient
392 /// in terms of buffer handling to make it more useful in the face of missing local files.
393 pub fn merge(
394 &self,
395 out: &mut Vec<u8>,
396 labels: builtin_driver::text::Labels<'_>,
397 context: &gix_command::Context,
398 ) -> Result<(inner::builtin_merge::Pick, Resolution), Error> {
399 match self.configured_driver() {
400 Ok(driver) => {
401 let mut cmd = self.prepare_external_driver(driver.command.clone(), labels, context.clone())?;
402 let status = cmd.status().map_err(|err| Error::SpawnExternalDriver {
403 cmd: format!("{:?}", cmd.cmd),
404 source: err,
405 })?;
406 if !status.success() {
407 return Err(Error::ExternalDriverFailure {
408 cmd: format!("{:?}", cmd.cmd),
409 status,
410 });
411 }
412 out.clear();
413 cmd.open_result_file()?.read_to_end(out)?;
414 Ok((inner::builtin_merge::Pick::Buffer, Resolution::Complete))
415 }
416 Err(builtin) => {
417 let mut input = imara_diff::intern::InternedInput::new(&[][..], &[]);
418 out.clear();
419 let (pick, resolution) = self.builtin_merge(builtin, out, &mut input, labels);
420 Ok((pick, resolution))
421 }
422 }
423 }
424
425 /// Using a `pick` obtained from [`merge()`](Self::merge), obtain the respective buffer suitable for reading or copying.
426 /// Return `Ok(None)` if the `pick` corresponds to a buffer (that was written separately).
427 /// Return `Err(())` if the buffer is *too large*, so it was never read.
428 #[allow(clippy::result_unit_err)]
429 pub fn buffer_by_pick(&self, pick: inner::builtin_merge::Pick) -> Result<Option<&'parent [u8]>, ()> {
430 match pick {
431 inner::builtin_merge::Pick::Ancestor => self.ancestor.data.as_slice().map(Some).ok_or(()),
432 inner::builtin_merge::Pick::Ours => self.current.data.as_slice().map(Some).ok_or(()),
433 inner::builtin_merge::Pick::Theirs => self.other.data.as_slice().map(Some).ok_or(()),
434 inner::builtin_merge::Pick::Buffer => Ok(None),
435 }
436 }
437
438 /// Use `pick` to return the object id of the merged result, assuming that `buf` was passed as `out` to [merge()](Self::merge).
439 /// In case of binary or large files, this will simply be the existing ID of the resource.
440 /// In case of resources available in the object DB for binary merges, the object ID will be returned.
441 /// If new content was produced due to a content merge, `buf` will be written out
442 /// to the object database using `write_blob`.
443 /// Beware that the returned ID could be `Ok(None)` if the underlying resource was loaded
444 /// from the worktree *and* was too large so it was never loaded from disk.
445 /// `Ok(None)` will also be returned if one of the resources was missing.
446 /// `write_blob()` is used to turn buffers.
447 pub fn id_by_pick<E>(
448 &self,
449 pick: inner::builtin_merge::Pick,
450 buf: &[u8],
451 mut write_blob: impl FnMut(&[u8]) -> Result<gix_hash::ObjectId, E>,
452 ) -> Result<Option<gix_hash::ObjectId>, E> {
453 let field = match pick {
454 inner::builtin_merge::Pick::Ancestor => &self.ancestor,
455 inner::builtin_merge::Pick::Ours => &self.current,
456 inner::builtin_merge::Pick::Theirs => &self.other,
457 inner::builtin_merge::Pick::Buffer => return write_blob(buf).map(Some),
458 };
459 use crate::blob::platform::resource::Data;
460 match field.data {
461 Data::TooLarge { .. } | Data::Missing if !field.id.is_null() => Ok(Some(field.id.to_owned())),
462 Data::TooLarge { .. } | Data::Missing => Ok(None),
463 Data::Buffer(buf) if field.id.is_null() => write_blob(buf).map(Some),
464 Data::Buffer(_) => Ok(Some(field.id.to_owned())),
465 }
466 }
467}