Skip to main content

gix_diff/blob/
platform.rs

1use std::{cmp::Ordering, io::Write, process::Stdio};
2
3use bstr::{BStr, BString, ByteSlice};
4
5use super::Algorithm;
6use crate::blob::{pipeline, Pipeline, Platform, ResourceKind};
7
8/// A key to uniquely identify either a location in the worktree, or in the object database.
9#[derive(Clone)]
10pub(crate) struct CacheKey {
11    id: gix_hash::ObjectId,
12    location: BString,
13    /// If `true`, this is an `id` based key, otherwise it's location based.
14    use_id: bool,
15    /// Only relevant when `id` is not null, to further differentiate content and allow us to
16    /// keep track of both links and blobs with the same content (rare, but possible).
17    is_link: bool,
18}
19
20/// A stored value representing a diffable resource.
21#[derive(Clone, Eq, PartialEq, Ord, PartialOrd, Debug)]
22pub(crate) struct CacheValue {
23    /// The outcome of converting a resource into a diffable format using [Pipeline::convert_to_diffable()].
24    conversion: pipeline::Outcome,
25    /// The kind of the resource we are looking at. Only possible values are `Blob`, `BlobExecutable` and `Link`.
26    mode: gix_object::tree::EntryKind,
27    /// A possibly empty buffer, depending on `conversion.data` which may indicate the data is considered binary.
28    buffer: Vec<u8>,
29}
30
31impl std::hash::Hash for CacheKey {
32    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
33        if self.use_id {
34            self.id.hash(state);
35            self.is_link.hash(state);
36        } else {
37            self.location.hash(state);
38        }
39    }
40}
41
42impl PartialEq for CacheKey {
43    fn eq(&self, other: &Self) -> bool {
44        match (self.use_id, other.use_id) {
45            (false, false) => self.location.eq(&other.location),
46            (true, true) => self.id.eq(&other.id) && self.is_link.eq(&other.is_link),
47            _ => false,
48        }
49    }
50}
51
52impl Eq for CacheKey {}
53
54impl Default for CacheKey {
55    fn default() -> Self {
56        CacheKey {
57            id: gix_hash::Kind::Sha1.null(),
58            use_id: false,
59            is_link: false,
60            location: BString::default(),
61        }
62    }
63}
64
65impl CacheKey {
66    fn set_location(&mut self, rela_path: &BStr) {
67        self.location.clear();
68        self.location.extend_from_slice(rela_path);
69    }
70}
71
72/// A resource ready to be diffed in one way or another.
73#[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
74pub struct Resource<'a> {
75    /// If available, an index into the `drivers` field to access more diff-related information of the driver for items
76    /// at the given path, as previously determined by git-attributes.
77    ///
78    /// Note that drivers are queried even if there is no object available.
79    pub driver_index: Option<usize>,
80    /// The data itself, suitable for diffing, and if the object or worktree item is present at all.
81    pub data: resource::Data<'a>,
82    /// The kind of the resource we are looking at. Only possible values are `Blob`, `BlobExecutable` and `Link`.
83    pub mode: gix_object::tree::EntryKind,
84    /// The location of the resource, relative to the working tree.
85    pub rela_path: &'a BStr,
86    /// The id of the content as it would be stored in `git`, or `null` if the content doesn't exist anymore at
87    /// `rela_path` or if it was never computed. This can happen with content read from the worktree, which has to
88    /// go through a filter to be converted back to what `git` would store.
89    pub id: &'a gix_hash::oid,
90}
91
92///
93pub mod resource {
94    use bstr::ByteSlice;
95
96    use crate::blob::{
97        pipeline,
98        platform::{CacheKey, CacheValue, Resource},
99    };
100
101    /// A token source that splits bytes into lines while removing trailing newline separators.
102    // TODO: use `bstr::Lines` here, but it's not `Copy`
103    #[derive(Clone, Copy)]
104    pub struct ByteLinesWithoutTerminator<'a>(&'a [u8]);
105
106    impl<'a> ByteLinesWithoutTerminator<'a> {
107        /// Create a new instance over `data`.
108        pub fn new(data: &'a [u8]) -> Self {
109            Self(data)
110        }
111    }
112
113    impl<'a> Iterator for ByteLinesWithoutTerminator<'a> {
114        type Item = &'a [u8];
115
116        fn next(&mut self) -> Option<Self::Item> {
117            let mut l = match self.0.find_byte(b'\n') {
118                None if self.0.is_empty() => None,
119                None => {
120                    let line = self.0;
121                    self.0 = b"";
122                    Some(line)
123                }
124                Some(end) => {
125                    let line = &self.0[..=end];
126                    self.0 = &self.0[end + 1..];
127                    Some(line)
128                }
129            }?;
130
131            if l.last_byte() == Some(b'\n') {
132                l = &l[..l.len() - 1];
133                if l.last_byte() == Some(b'\r') {
134                    l = &l[..l.len() - 1];
135                }
136            }
137            Some(l)
138        }
139    }
140
141    impl<'a> imara_diff::TokenSource for ByteLinesWithoutTerminator<'a> {
142        type Token = &'a [u8];
143        type Tokenizer = Self;
144
145        fn tokenize(&self) -> Self::Tokenizer {
146            *self
147        }
148
149        fn estimate_tokens(&self) -> u32 {
150            let len: usize = self.take(20).map(<[u8]>::len).sum();
151            (self.0.len() * 20).checked_div(len).unwrap_or(100) as u32
152        }
153    }
154
155    impl<'a> Resource<'a> {
156        pub(crate) fn new(key: &'a CacheKey, value: &'a CacheValue) -> Self {
157            Resource {
158                driver_index: value.conversion.driver_index,
159                data: value.conversion.data.map_or(Data::Missing, |data| match data {
160                    pipeline::Data::Buffer { is_derived } => Data::Buffer {
161                        buf: &value.buffer,
162                        is_derived,
163                    },
164                    pipeline::Data::Binary { size } => Data::Binary { size },
165                }),
166                mode: value.mode,
167                rela_path: key.location.as_ref(),
168                id: &key.id,
169            }
170        }
171
172        /// Produce an iterator over lines, separated by LF or CRLF and thus keeping newlines.
173        ///
174        /// Note that this will cause unusual diffs if a file didn't end in newline but lines were added
175        /// on the other side.
176        ///
177        /// Suitable to create tokens using [`crate::blob::InternedInput`].
178        pub fn intern_source(&self) -> imara_diff::sources::ByteLines<'a> {
179            crate::blob::sources::byte_lines(self.data.as_slice().unwrap_or_default())
180        }
181
182        /// Produce an iterator over lines, but remove LF or CRLF.
183        ///
184        /// This produces the expected diffs when lines were added at the end of a file that didn't end
185        /// with a newline before the change.
186        ///
187        /// Suitable to create tokens using [`crate::blob::InternedInput`].
188        pub fn intern_source_strip_newline_separators(&self) -> ByteLinesWithoutTerminator<'a> {
189            ByteLinesWithoutTerminator::new(self.data.as_slice().unwrap_or_default())
190        }
191    }
192
193    /// The data of a diffable resource, as it could be determined and computed previously.
194    #[derive(Debug, Copy, Clone, Ord, PartialOrd, Eq, PartialEq, Hash)]
195    pub enum Data<'a> {
196        /// The object is missing, either because it didn't exist in the working tree or because its `id` was null.
197        Missing,
198        /// The textual data as processed to be in a diffable state.
199        Buffer {
200            /// The buffer bytes.
201            buf: &'a [u8],
202            /// If `true`, a [binary to text filter](super::super::Driver::binary_to_text_command) was used to obtain the buffer,
203            /// making it a derived value.
204            ///
205            /// Applications should check for this to avoid treating the buffer content as (original) resource content.
206            is_derived: bool,
207        },
208        /// The size that the binary blob had at the given revision, without having applied filters, as it's either
209        /// considered binary or above the big-file threshold.
210        ///
211        /// In this state, the binary file cannot be diffed.
212        Binary {
213            /// The size of the object prior to performing any filtering or as it was found on disk.
214            ///
215            /// Note that technically, the size isn't always representative of the same 'state' of the
216            /// content, as once it can be the size of the blob in git, and once it's the size of file
217            /// in the worktree.
218            size: u64,
219        },
220    }
221
222    impl<'a> Data<'a> {
223        /// Return ourselves as slice of bytes if this instance stores data.
224        pub fn as_slice(&self) -> Option<&'a [u8]> {
225            match self {
226                Data::Buffer { buf, .. } => Some(buf),
227                Data::Binary { .. } | Data::Missing => None,
228            }
229        }
230
231        /// Returns `true` if the data in this instance is derived.
232        pub fn is_derived(&self) -> bool {
233            match self {
234                Data::Missing | Data::Binary { .. } => false,
235                Data::Buffer { is_derived, .. } => *is_derived,
236            }
237        }
238    }
239}
240
241///
242pub mod set_resource {
243    use bstr::BString;
244
245    use crate::blob::{pipeline, ResourceKind};
246
247    /// The error returned by [Platform::set_resource](super::Platform::set_resource).
248    #[derive(Debug, thiserror::Error)]
249    #[allow(missing_docs)]
250    pub enum Error {
251        #[error("Can only diff blobs and links, not {mode:?}")]
252        InvalidMode { mode: gix_object::tree::EntryKind },
253        #[error("Failed to read {kind} worktree data from '{rela_path}'")]
254        Io {
255            rela_path: BString,
256            kind: ResourceKind,
257            source: std::io::Error,
258        },
259        #[error("Failed to obtain attributes for {kind} resource at '{rela_path}'")]
260        Attributes {
261            rela_path: BString,
262            kind: ResourceKind,
263            source: std::io::Error,
264        },
265        #[error(transparent)]
266        ConvertToDiffable(#[from] pipeline::convert_to_diffable::Error),
267    }
268}
269
270///
271pub mod prepare_diff {
272    use bstr::BStr;
273
274    use crate::blob::platform::Resource;
275
276    /// The kind of operation that should be performed based on the configuration of the resources involved in the diff.
277    #[derive(Debug, Copy, Clone, Eq, PartialEq)]
278    pub enum Operation<'a> {
279        /// The internal diff algorithm should be computed with [`crate::blob::Diff::compute()`].
280        /// This only happens if none of the resources are binary, and if there is no external diff program configured via git-attributes
281        /// *or* [Options::skip_internal_diff_if_external_is_configured](super::Options::skip_internal_diff_if_external_is_configured)
282        /// is `false`.
283        ///
284        /// Use [`Outcome::interned_input()`] to easily obtain an interner for use with [`crate::blob::Diff::compute()`], or maintain one yourself
285        /// for greater reuse.
286        InternalDiff {
287            /// The algorithm we determined should be used, which is one of (in order, first set one wins):
288            ///
289            /// * the driver's override
290            /// * the platforms own configuration (typically from git-config)
291            /// * the default algorithm
292            algorithm: imara_diff::Algorithm,
293        },
294        /// Run the external diff program according as configured in the `source`-resources driver.
295        /// This only happens if [Options::skip_internal_diff_if_external_is_configured](super::Options::skip_internal_diff_if_external_is_configured)
296        /// was `true`, preventing the usage of the internal diff implementation.
297        ExternalCommand {
298            /// The command as extracted from [Driver::command](super::super::Driver::command).
299            /// Use it in [`Platform::prepare_diff_command`](super::Platform::prepare_diff_command()) to easily prepare a compatible invocation.
300            command: &'a BStr,
301        },
302        /// One of the involved resources, [`old`](Outcome::old) or [`new`](Outcome::new), was binary and thus no diff
303        /// can be performed.
304        SourceOrDestinationIsBinary,
305    }
306
307    /// The outcome of a [`prepare_diff`](super::Platform::prepare_diff()) operation.
308    #[derive(Debug, Copy, Clone, Eq, PartialEq)]
309    pub struct Outcome<'a> {
310        /// The kind of diff that was actually performed. This may include skipping the internal diff as well.
311        pub operation: Operation<'a>,
312        /// If `true`, a [binary to text filter](super::super::Driver::binary_to_text_command) was used to obtain the buffer
313        /// of `old` or `new`, making it a derived value.
314        ///
315        /// Applications should check for this to avoid treating the buffer content as (original) resource content.
316        pub old_or_new_is_derived: bool,
317        /// The old or source of the diff operation.
318        pub old: Resource<'a>,
319        /// The new or destination of the diff operation.
320        pub new: Resource<'a>,
321    }
322
323    impl<'a> Outcome<'a> {
324        /// Produce an instance of an interner which `git` would use to perform diffs.
325        ///
326        /// Note that newline separators will be removed to improve diff quality
327        /// at the end of files that didn't have a newline, but had lines added
328        /// past the end.
329        pub fn interned_input(&self) -> crate::blob::InternedInput<&'a [u8]> {
330            crate::blob::InternedInput::new(
331                self.old.intern_source_strip_newline_separators(),
332                self.new.intern_source_strip_newline_separators(),
333            )
334        }
335    }
336
337    /// The error returned by [Platform::prepare_diff()](super::Platform::prepare_diff()).
338    #[derive(Debug, thiserror::Error)]
339    #[allow(missing_docs)]
340    pub enum Error {
341        #[error("Either the source or the destination of the diff operation were not set")]
342        SourceOrDestinationUnset,
343        #[error("Tried to diff resources that are both considered removed")]
344        SourceAndDestinationRemoved,
345    }
346}
347
348///
349pub mod prepare_diff_command {
350    use std::ops::{Deref, DerefMut};
351
352    use bstr::BString;
353
354    /// The error returned by [Platform::prepare_diff_command()](super::Platform::prepare_diff_command()).
355    #[derive(Debug, thiserror::Error)]
356    #[allow(missing_docs)]
357    pub enum Error {
358        #[error("Either the source or the destination of the diff operation were not set")]
359        SourceOrDestinationUnset,
360        #[error("Binary resources can't be diffed with an external command (as we don't have the data anymore)")]
361        SourceOrDestinationBinary,
362        #[error(
363            "Tempfile to store content of '{rela_path}' for passing to external diff command could not be created"
364        )]
365        CreateTempfile { rela_path: BString, source: std::io::Error },
366        #[error("Could not write content of '{rela_path}' to tempfile for passing to external diff command")]
367        WriteTempfile { rela_path: BString, source: std::io::Error },
368    }
369
370    /// The outcome of a [`prepare_diff_command`](super::Platform::prepare_diff_command()) operation.
371    ///
372    /// This type acts like [`std::process::Command`], ready to run, with `stdin`, `stdout` and `stderr` set to *inherit*
373    /// all handles as this is expected to be for visual inspection.
374    pub struct Command {
375        pub(crate) cmd: std::process::Command,
376        /// Possibly a tempfile to be removed after the run, or `None` if there is no old version.
377        pub(crate) old: Option<gix_tempfile::Handle<gix_tempfile::handle::Closed>>,
378        /// Possibly a tempfile to be removed after the run, or `None` if there is no new version.
379        pub(crate) new: Option<gix_tempfile::Handle<gix_tempfile::handle::Closed>>,
380    }
381
382    impl Deref for Command {
383        type Target = std::process::Command;
384
385        fn deref(&self) -> &Self::Target {
386            &self.cmd
387        }
388    }
389
390    impl DerefMut for Command {
391        fn deref_mut(&mut self) -> &mut Self::Target {
392            &mut self.cmd
393        }
394    }
395}
396
397/// Options for use in [Platform::new()].
398#[derive(Default, Copy, Clone)]
399pub struct Options {
400    /// The algorithm to use when diffing.
401    /// If unset, it uses the [default algorithm](Algorithm::default()).
402    pub algorithm: Option<Algorithm>,
403    /// If `true`, default `false`, then an external `diff` configured using gitattributes and drivers,
404    /// will cause the built-in diff [to be skipped](prepare_diff::Operation::ExternalCommand).
405    /// Otherwise, the internal diff is called despite the configured external diff, which is
406    /// typically what callers expect by default.
407    pub skip_internal_diff_if_external_is_configured: bool,
408}
409
410/// Lifecycle
411impl Platform {
412    /// Create a new instance with `options`, and a way to `filter` data from the object database to data that is diff-able.
413    /// `filter_mode` decides how to do that specifically.
414    /// Use `attr_stack` to access attributes pertaining worktree filters and diff settings.
415    pub fn new(
416        options: Options,
417        filter: Pipeline,
418        filter_mode: pipeline::Mode,
419        attr_stack: gix_worktree::Stack,
420    ) -> Self {
421        Platform {
422            old: None,
423            new: None,
424            diff_cache: Default::default(),
425            free_list: Vec::with_capacity(2),
426            options,
427            filter,
428            filter_mode,
429            attr_stack,
430        }
431    }
432}
433
434/// Conversions
435impl Platform {
436    /// Store enough information about a resource to eventually diff it, where…
437    ///
438    /// * `id` is the hash of the resource. If it [is null](gix_hash::ObjectId::is_null()), it should either
439    ///   be a resource in the worktree, or it's considered a non-existing, deleted object.
440    ///   If an `id` is known, as the hash of the object as (would) be stored in `git`, then it should be provided
441    ///   for completeness.
442    /// * `mode` is the kind of object (only blobs and links are allowed)
443    /// * `rela_path` is the relative path as seen from the (work)tree root.
444    /// * `kind` identifies the side of the diff this resource will be used for.
445    ///   A diff needs both `OldOrSource` *and* `NewOrDestination`.
446    /// * `objects` provides access to the object database in case the resource can't be read from a worktree.
447    ///
448    /// Note that it's assumed that either `id + mode (` or `rela_path` can serve as unique identifier for the resource,
449    /// depending on whether or not a [worktree root](pipeline::WorktreeRoots) is set for the resource of `kind`,
450    /// with resources with worktree roots using the `rela_path` as unique identifier.
451    ///
452    /// ### Important
453    ///
454    /// If an error occurs, the previous resource of `kind` will be cleared, preventing further diffs
455    /// unless another attempt succeeds.
456    pub fn set_resource(
457        &mut self,
458        id: gix_hash::ObjectId,
459        mode: gix_object::tree::EntryKind,
460        rela_path: &BStr,
461        kind: ResourceKind,
462        objects: &impl gix_object::FindObjectOrHeader, // TODO: make this `dyn` once https://github.com/rust-lang/rust/issues/65991 is stable, then also make tracker.rs `objects` dyn
463    ) -> Result<(), set_resource::Error> {
464        let res = self.set_resource_inner(id, mode, rela_path, kind, objects);
465        if res.is_err() {
466            *match kind {
467                ResourceKind::OldOrSource => &mut self.old,
468                ResourceKind::NewOrDestination => &mut self.new,
469            } = None;
470        }
471        res
472    }
473
474    /// Given `diff_command` and `context`, typically obtained from git-configuration, and the currently set diff-resources,
475    /// prepare the invocation and temporary files needed to launch it according to protocol.
476    /// `count` / `total` are used for progress indication passed as environment variables `GIT_DIFF_PATH_(COUNTER|TOTAL)`
477    /// respectively (0-based), so the first path has `count=0` and `total=1` (assuming there is only one path).
478    /// Returns `None` if at least one resource is unset, see [`set_resource()`](Self::set_resource()).
479    ///
480    /// Please note that this is an expensive operation this will always create up to two temporary files to hold the data
481    /// for the old and new resources.
482    ///
483    /// ### Deviation
484    ///
485    /// If one of the resources is binary, the operation reports an error as such resources don't make their data available
486    /// which is required for the external diff to run.
487    // TODO: fix this - the diff shouldn't fail if binary (or large) files are used, just copy them into tempfiles.
488    pub fn prepare_diff_command(
489        &self,
490        diff_command: BString,
491        context: gix_command::Context,
492        count: usize,
493        total: usize,
494    ) -> Result<prepare_diff_command::Command, prepare_diff_command::Error> {
495        fn add_resource(
496            cmd: &mut std::process::Command,
497            res: Resource<'_>,
498        ) -> Result<Option<gix_tempfile::Handle<gix_tempfile::handle::Closed>>, prepare_diff_command::Error> {
499            let tmpfile = match res.data {
500                resource::Data::Missing => {
501                    cmd.args(["/dev/null", ".", "."]);
502                    None
503                }
504                resource::Data::Buffer { buf, is_derived: _ } => {
505                    let mut tmp = gix_tempfile::new(
506                        std::env::temp_dir(),
507                        gix_tempfile::ContainingDirectory::Exists,
508                        gix_tempfile::AutoRemove::Tempfile,
509                    )
510                    .map_err(|err| prepare_diff_command::Error::CreateTempfile {
511                        rela_path: res.rela_path.to_owned(),
512                        source: err,
513                    })?;
514                    tmp.write_all(buf)
515                        .map_err(|err| prepare_diff_command::Error::WriteTempfile {
516                            rela_path: res.rela_path.to_owned(),
517                            source: err,
518                        })?;
519                    tmp.with_mut(|f| {
520                        cmd.arg(f.path());
521                    })
522                    .map_err(|err| prepare_diff_command::Error::WriteTempfile {
523                        rela_path: res.rela_path.to_owned(),
524                        source: err,
525                    })?;
526                    cmd.arg(res.id.to_string()).arg(res.mode.as_octal_str().to_string());
527                    let tmp = tmp.close().map_err(|err| prepare_diff_command::Error::WriteTempfile {
528                        rela_path: res.rela_path.to_owned(),
529                        source: err,
530                    })?;
531                    Some(tmp)
532                }
533                resource::Data::Binary { .. } => return Err(prepare_diff_command::Error::SourceOrDestinationBinary),
534            };
535            Ok(tmpfile)
536        }
537
538        let (old, new) = self
539            .resources()
540            .ok_or(prepare_diff_command::Error::SourceOrDestinationUnset)?;
541        let mut cmd: std::process::Command = gix_command::prepare(gix_path::from_bstring(diff_command))
542            .with_context(context)
543            .env("GIT_DIFF_PATH_COUNTER", (count + 1).to_string())
544            .env("GIT_DIFF_PATH_TOTAL", total.to_string())
545            .stdin(Stdio::inherit())
546            .stdout(Stdio::inherit())
547            .stderr(Stdio::inherit())
548            .into();
549
550        cmd.arg(gix_path::from_bstr(old.rela_path).into_owned());
551        let mut out = prepare_diff_command::Command {
552            cmd,
553            old: None,
554            new: None,
555        };
556
557        out.old = add_resource(&mut out.cmd, old)?;
558        out.new = add_resource(&mut out.cmd, new)?;
559
560        if old.rela_path != new.rela_path {
561            out.cmd.arg(gix_path::from_bstr(new.rela_path).into_owned());
562        }
563
564        Ok(out)
565    }
566
567    /// Returns the resource of the given kind if it was set.
568    pub fn resource(&self, kind: ResourceKind) -> Option<Resource<'_>> {
569        let key = match kind {
570            ResourceKind::OldOrSource => self.old.as_ref(),
571            ResourceKind::NewOrDestination => self.new.as_ref(),
572        }?;
573        Resource::new(key, self.diff_cache.get(key)?).into()
574    }
575
576    /// Obtain the two resources that were previously set as `(OldOrSource, NewOrDestination)`, if both are set and available.
577    ///
578    /// This is useful if one wishes to manually prepare the diff, maybe for invoking external programs, instead of relying on
579    /// [`Self::prepare_diff()`].
580    pub fn resources(&self) -> Option<(Resource<'_>, Resource<'_>)> {
581        let key = &self.old.as_ref()?;
582        let value = self.diff_cache.get(key)?;
583        let old = Resource::new(key, value);
584
585        let key = &self.new.as_ref()?;
586        let value = self.diff_cache.get(key)?;
587        let new = Resource::new(key, value);
588        Some((old, new))
589    }
590
591    /// Prepare a diff operation on the [previously set](Self::set_resource()) [old](ResourceKind::OldOrSource) and
592    /// [new](ResourceKind::NewOrDestination) resources.
593    ///
594    /// The returned outcome allows to easily perform diff operations, based on the [`prepare_diff::Outcome::operation`] field,
595    /// which hints at what should be done.
596    pub fn prepare_diff(&mut self) -> Result<prepare_diff::Outcome<'_>, prepare_diff::Error> {
597        let old_key = &self.old.as_ref().ok_or(prepare_diff::Error::SourceOrDestinationUnset)?;
598        let old = self
599            .diff_cache
600            .get(old_key)
601            .ok_or(prepare_diff::Error::SourceOrDestinationUnset)?;
602        let new_key = &self.new.as_ref().ok_or(prepare_diff::Error::SourceOrDestinationUnset)?;
603        let new = self
604            .diff_cache
605            .get(new_key)
606            .ok_or(prepare_diff::Error::SourceOrDestinationUnset)?;
607        let mut out = {
608            let old = Resource::new(old_key, old);
609            let new = Resource::new(new_key, new);
610            prepare_diff::Outcome {
611                operation: prepare_diff::Operation::SourceOrDestinationIsBinary,
612                old_or_new_is_derived: old.data.is_derived() || new.data.is_derived(),
613                old,
614                new,
615            }
616        };
617
618        match (old.conversion.data, new.conversion.data) {
619            (None, None) => return Err(prepare_diff::Error::SourceAndDestinationRemoved),
620            (Some(pipeline::Data::Binary { .. }), _) | (_, Some(pipeline::Data::Binary { .. })) => return Ok(out),
621            _either_missing_or_non_binary => {
622                if let Some(command) = old
623                    .conversion
624                    .driver_index
625                    .and_then(|idx| self.filter.drivers[idx].command.as_deref())
626                    .filter(|_| self.options.skip_internal_diff_if_external_is_configured)
627                {
628                    out.operation = prepare_diff::Operation::ExternalCommand {
629                        command: command.as_bstr(),
630                    };
631                    return Ok(out);
632                }
633            }
634        }
635
636        out.operation = prepare_diff::Operation::InternalDiff {
637            algorithm: old
638                .conversion
639                .driver_index
640                .and_then(|idx| self.filter.drivers[idx].algorithm)
641                .or(self.options.algorithm)
642                .unwrap_or_default(),
643        };
644        Ok(out)
645    }
646
647    /// Every call to [set_resource()](Self::set_resource()) will keep the diffable data in memory, and that will never be cleared.
648    ///
649    /// Use this method to clear the cache, releasing memory. Note that this will also lose all information about resources
650    /// which means diffs would fail unless the resources are set again.
651    ///
652    /// Note that this also has to be called if the same resource is going to be diffed in different states, i.e. using different
653    /// `id`s, but the same `rela_path`.
654    pub fn clear_resource_cache(&mut self) {
655        self.old = None;
656        self.new = None;
657        self.diff_cache.clear();
658        self.free_list.clear();
659    }
660
661    /// Every call to [set_resource()](Self::set_resource()) will keep the diffable data in memory, and that will never be cleared.
662    ///
663    /// Use this method to clear the cache, but keep the previously used buffers around for later re-use.
664    ///
665    /// If there are more buffers on the free-list than there are stored sources, we half that amount each time this method is called,
666    /// or keep as many resources as were previously stored, or 2 buffers, whatever is larger.
667    /// If there are fewer buffers in the free-list than are in the resource cache, we will keep as many as needed to match the
668    /// number of previously stored resources.
669    ///
670    /// Returns the number of available buffers.
671    pub fn clear_resource_cache_keep_allocation(&mut self) -> usize {
672        self.old = None;
673        self.new = None;
674
675        let diff_cache = std::mem::take(&mut self.diff_cache);
676        match self.free_list.len().cmp(&diff_cache.len()) {
677            Ordering::Less => {
678                let to_take = diff_cache.len() - self.free_list.len();
679                self.free_list
680                    .extend(diff_cache.into_values().map(|v| v.buffer).take(to_take));
681            }
682            Ordering::Equal => {}
683            Ordering::Greater => {
684                let new_len = (self.free_list.len() / 2).max(diff_cache.len()).max(2);
685                self.free_list.truncate(new_len);
686            }
687        }
688        self.free_list.len()
689    }
690}
691
692impl Platform {
693    fn set_resource_inner(
694        &mut self,
695        id: gix_hash::ObjectId,
696        mode: gix_object::tree::EntryKind,
697        rela_path: &BStr,
698        kind: ResourceKind,
699        objects: &impl gix_object::FindObjectOrHeader,
700    ) -> Result<(), set_resource::Error> {
701        if matches!(
702            mode,
703            gix_object::tree::EntryKind::Commit | gix_object::tree::EntryKind::Tree
704        ) {
705            return Err(set_resource::Error::InvalidMode { mode });
706        }
707        let storage = match kind {
708            ResourceKind::OldOrSource => &mut self.old,
709            ResourceKind::NewOrDestination => &mut self.new,
710        }
711        .get_or_insert_with(Default::default);
712
713        storage.id = id;
714        storage.set_location(rela_path);
715        storage.is_link = matches!(mode, gix_object::tree::EntryKind::Link);
716        storage.use_id = self.filter.roots.by_kind(kind).is_none();
717
718        if self.diff_cache.contains_key(storage) {
719            return Ok(());
720        }
721        let entry =
722            self.attr_stack
723                .at_entry(rela_path, None, objects)
724                .map_err(|err| set_resource::Error::Attributes {
725                    source: err,
726                    kind,
727                    rela_path: rela_path.to_owned(),
728                })?;
729        let mut buf = self.free_list.pop().unwrap_or_default();
730        let out = self.filter.convert_to_diffable(
731            &id,
732            mode,
733            rela_path,
734            kind,
735            &mut |_, out| {
736                let _ = entry.matching_attributes(out);
737            },
738            objects,
739            self.filter_mode,
740            &mut buf,
741        )?;
742        let key = storage.clone();
743        assert!(
744            self.diff_cache
745                .insert(
746                    key,
747                    CacheValue {
748                        conversion: out,
749                        mode,
750                        buffer: buf,
751                    },
752                )
753                .is_none(),
754            "The key impl makes clashes impossible with our usage"
755        );
756        Ok(())
757    }
758}