gix_filter/pipeline/
convert.rs

1use std::{io::Read, path::Path};
2
3use bstr::BStr;
4
5use crate::{driver, eol, ident, pipeline::util::Configuration, worktree, Pipeline};
6
7///
8pub mod configuration {
9    use bstr::BString;
10
11    /// Errors related to the configuration of filter attributes.
12    #[derive(Debug, thiserror::Error)]
13    #[allow(missing_docs)]
14    pub enum Error {
15        #[error("The encoding named '{name}' isn't available")]
16        UnknownEncoding { name: BString },
17        #[error("Encodings must be names, like UTF-16, and cannot be booleans.")]
18        InvalidEncoding,
19    }
20}
21
22///
23pub mod to_git {
24    /// A function that fills `buf` `fn(&mut buf)` with the data stored in the index of the file that should be converted.
25    pub type IndexObjectFn<'a> = dyn FnMut(&mut Vec<u8>) -> Result<Option<()>, gix_object::find::Error> + 'a;
26
27    /// The error returned by [Pipeline::convert_to_git()][super::Pipeline::convert_to_git()].
28    #[derive(Debug, thiserror::Error)]
29    #[allow(missing_docs)]
30    pub enum Error {
31        #[error(transparent)]
32        Eol(#[from] crate::eol::convert_to_git::Error),
33        #[error(transparent)]
34        Worktree(#[from] crate::worktree::encode_to_git::Error),
35        #[error(transparent)]
36        Driver(#[from] crate::driver::apply::Error),
37        #[error(transparent)]
38        Configuration(#[from] super::configuration::Error),
39        #[error("Copy of driver process output to memory failed")]
40        ReadProcessOutputToBuffer(#[from] std::io::Error),
41        #[error("Could not allocate buffer")]
42        OutOfMemory(#[from] std::collections::TryReserveError),
43    }
44}
45
46///
47pub mod to_worktree {
48    /// The error returned by [Pipeline::convert_to_worktree()][super::Pipeline::convert_to_worktree()].
49    #[derive(Debug, thiserror::Error)]
50    #[allow(missing_docs)]
51    pub enum Error {
52        #[error(transparent)]
53        Ident(#[from] crate::ident::apply::Error),
54        #[error(transparent)]
55        Eol(#[from] crate::eol::convert_to_worktree::Error),
56        #[error(transparent)]
57        Worktree(#[from] crate::worktree::encode_to_worktree::Error),
58        #[error(transparent)]
59        Driver(#[from] crate::driver::apply::Error),
60        #[error(transparent)]
61        Configuration(#[from] super::configuration::Error),
62    }
63}
64
65/// Access
66impl Pipeline {
67    /// Convert a `src` stream (to be found at `rela_path`) to a representation suitable for storage in `git`
68    /// based on the `attributes` at `rela_path` which is passed as first argument..
69    /// When converting to `crlf`, and depending on the configuration, `index_object` might be called to obtain the index
70    /// version of `src` if available. It can return `Ok(None)` if this information isn't available.
71    pub fn convert_to_git<R>(
72        &mut self,
73        mut src: R,
74        rela_path: &Path,
75        attributes: &mut dyn FnMut(&BStr, &mut gix_attributes::search::Outcome),
76        index_object: &mut to_git::IndexObjectFn<'_>,
77    ) -> Result<ToGitOutcome<'_, R>, to_git::Error>
78    where
79        R: std::io::Read,
80    {
81        let bstr_rela_path = gix_path::to_unix_separators_on_windows(gix_path::into_bstr(rela_path));
82        let Configuration {
83            driver,
84            digest,
85            _attr_digest: _,
86            encoding,
87            apply_ident_filter,
88        } = Configuration::at_path(
89            bstr_rela_path.as_ref(),
90            &self.options.drivers,
91            &mut self.attrs,
92            attributes,
93            self.options.eol_config,
94        )?;
95
96        let mut in_src_buffer = false;
97        // this is just an approximation, but it's as good as it gets without reading the actual input.
98        let would_convert_eol = eol::convert_to_git(
99            b"\r\n",
100            digest,
101            &mut self.bufs.dest,
102            &mut |_| Ok(None),
103            eol::convert_to_git::Options {
104                round_trip_check: None,
105                config: self.options.eol_config,
106            },
107        )?;
108
109        if let Some(driver) = driver {
110            if let Some(mut read) = self.processes.apply(
111                driver,
112                &mut src,
113                driver::Operation::Clean,
114                self.context.with_path(bstr_rela_path.as_ref()),
115            )? {
116                if !apply_ident_filter && encoding.is_none() && !would_convert_eol {
117                    // Note that this is not typically a benefit in terms of saving memory as most filters
118                    // aren't expected to make the output file larger. It's more about who is waiting for the filter's
119                    // output to arrive, which won't be us now. For `git-lfs` it definitely won't matter though.
120                    return Ok(ToGitOutcome::Process(read));
121                }
122                self.bufs.clear();
123                read.read_to_end(&mut self.bufs.src)?;
124                in_src_buffer = true;
125            }
126        }
127        if !in_src_buffer && (apply_ident_filter || encoding.is_some() || would_convert_eol) {
128            self.bufs.clear();
129            src.read_to_end(&mut self.bufs.src)?;
130            in_src_buffer = true;
131        }
132
133        if let Some(encoding) = encoding {
134            worktree::encode_to_git(
135                &self.bufs.src,
136                encoding,
137                &mut self.bufs.dest,
138                if self.options.encodings_with_roundtrip_check.contains(&encoding) {
139                    worktree::encode_to_git::RoundTripCheck::Fail
140                } else {
141                    worktree::encode_to_git::RoundTripCheck::Skip
142                },
143            )?;
144            self.bufs.swap();
145        }
146
147        if eol::convert_to_git(
148            &self.bufs.src,
149            digest,
150            &mut self.bufs.dest,
151            &mut |buf| index_object(buf),
152            eol::convert_to_git::Options {
153                round_trip_check: self.options.crlf_roundtrip_check.to_eol_roundtrip_check(rela_path),
154                config: self.options.eol_config,
155            },
156        )? {
157            self.bufs.swap();
158        }
159
160        if apply_ident_filter && ident::undo(&self.bufs.src, &mut self.bufs.dest)? {
161            self.bufs.swap();
162        }
163        Ok(if in_src_buffer {
164            ToGitOutcome::Buffer(&self.bufs.src)
165        } else {
166            ToGitOutcome::Unchanged(src)
167        })
168    }
169
170    /// Convert a `src` buffer located at `rela_path` (in the index) from what's in `git` to the worktree representation,
171    /// asking for `attributes` with `rela_path` as first argument to configure the operation automatically.
172    /// `can_delay` defines if long-running processes can delay their response, and if they *choose* to the caller has to
173    /// specifically deal with it by interacting with the [`driver_state`][Pipeline::driver_state_mut()] directly.
174    ///
175    /// The reason `src` is a buffer is to indicate that `git` generally doesn't do well streaming data, so it should be small enough
176    /// to be performant while being held in memory. This is typically the case, especially if `git-lfs` is used as intended.
177    pub fn convert_to_worktree<'input>(
178        &mut self,
179        src: &'input [u8],
180        rela_path: &BStr,
181        attributes: &mut dyn FnMut(&BStr, &mut gix_attributes::search::Outcome),
182        can_delay: driver::apply::Delay,
183    ) -> Result<ToWorktreeOutcome<'input, '_>, to_worktree::Error> {
184        let Configuration {
185            driver,
186            digest,
187            _attr_digest: _,
188            encoding,
189            apply_ident_filter,
190        } = Configuration::at_path(
191            rela_path,
192            &self.options.drivers,
193            &mut self.attrs,
194            attributes,
195            self.options.eol_config,
196        )?;
197
198        let mut bufs = self.bufs.use_foreign_src(src);
199        let (src, dest) = bufs.src_and_dest();
200        if apply_ident_filter && ident::apply(src, self.options.object_hash, dest)? {
201            bufs.swap();
202        }
203
204        let (src, dest) = bufs.src_and_dest();
205        if eol::convert_to_worktree(src, digest, dest, self.options.eol_config)? {
206            bufs.swap();
207        }
208
209        if let Some(encoding) = encoding {
210            let (src, dest) = bufs.src_and_dest();
211            worktree::encode_to_worktree(src, encoding, dest)?;
212            bufs.swap();
213        }
214
215        if let Some(driver) = driver {
216            let (mut src, _dest) = bufs.src_and_dest();
217            if let Some(maybe_delayed) = self.processes.apply_delayed(
218                driver,
219                &mut src,
220                driver::Operation::Smudge,
221                can_delay,
222                self.context.with_path(rela_path),
223            )? {
224                return Ok(ToWorktreeOutcome::Process(maybe_delayed));
225            }
226        }
227
228        Ok(match bufs.ro_src {
229            Some(src) => ToWorktreeOutcome::Unchanged(src),
230            None => ToWorktreeOutcome::Buffer(bufs.src),
231        })
232    }
233}
234
235/// The result of a conversion with zero or more filters to be stored in git.
236pub enum ToGitOutcome<'pipeline, R> {
237    /// The original input wasn't changed and the reader is still available for consumption.
238    Unchanged(R),
239    /// An external filter (and only that) was applied and its results *have to be consumed*.
240    Process(Box<dyn std::io::Read + 'pipeline>),
241    /// A reference to the result of one or more filters of which one didn't support streaming.
242    ///
243    /// This can happen if an `eol`, `working-tree-encoding` or `ident` filter is applied, possibly on top of an external filter.
244    Buffer(&'pipeline [u8]),
245}
246
247/// The result of a conversion with zero or more filters.
248///
249/// ### Panics
250///
251/// If `std::io::Read` is used on it and the output is delayed, a panic will occur. The caller is responsible for either disallowing delayed
252/// results or if allowed, handle them. Use [`is_delayed()][Self::is_delayed()].
253pub enum ToWorktreeOutcome<'input, 'pipeline> {
254    /// The original input wasn't changed and the original buffer is present
255    Unchanged(&'input [u8]),
256    /// A reference to the result of one or more filters of which one didn't support streaming.
257    ///
258    /// This can happen if an `eol`, `working-tree-encoding` or `ident` filter is applied, possibly on top of an external filter.
259    Buffer(&'pipeline [u8]),
260    /// An external filter (and only that) was applied and its results *have to be consumed*. Note that the output might be delayed,
261    /// which requires special handling to eventually receive it.
262    Process(driver::apply::MaybeDelayed<'pipeline>),
263}
264
265impl ToWorktreeOutcome<'_, '_> {
266    /// Return true if this outcome is delayed. In that case, one isn't allowed to use [`Read`] or cause a panic.
267    pub fn is_delayed(&self) -> bool {
268        matches!(
269            self,
270            ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Delayed(_))
271        )
272    }
273
274    /// Returns `true` if the input buffer was actually changed, or `false` if it is returned directly.
275    pub fn is_changed(&self) -> bool {
276        !matches!(self, ToWorktreeOutcome::Unchanged(_))
277    }
278
279    /// Return a buffer if we contain one, or `None` otherwise.
280    ///
281    /// This method is useful only if it's clear that no driver is available, which may cause a stream to be returned and not a buffer.
282    pub fn as_bytes(&self) -> Option<&[u8]> {
283        match self {
284            ToWorktreeOutcome::Unchanged(b) | ToWorktreeOutcome::Buffer(b) => Some(b),
285            ToWorktreeOutcome::Process(_) => None,
286        }
287    }
288
289    /// Return a stream to read the drivers output from, if possible.
290    ///
291    /// Note that this is only the case if the driver process was applied last *and* didn't delay its output.
292    pub fn as_read(&mut self) -> Option<&mut (dyn std::io::Read + '_)> {
293        match self {
294            ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Delayed(_))
295            | ToWorktreeOutcome::Unchanged(_)
296            | ToWorktreeOutcome::Buffer(_) => None,
297            ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Immediate(read)) => Some(read),
298        }
299    }
300}
301
302impl std::io::Read for ToWorktreeOutcome<'_, '_> {
303    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
304        match self {
305            ToWorktreeOutcome::Unchanged(b) => b.read(buf),
306            ToWorktreeOutcome::Buffer(b) => b.read(buf),
307            ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Delayed(_)) => {
308                panic!("BUG: must not try to read delayed output")
309            }
310            ToWorktreeOutcome::Process(driver::apply::MaybeDelayed::Immediate(r)) => r.read(buf),
311        }
312    }
313}
314
315impl<R> std::io::Read for ToGitOutcome<'_, R>
316where
317    R: std::io::Read,
318{
319    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
320        match self {
321            ToGitOutcome::Unchanged(r) => r.read(buf),
322            ToGitOutcome::Process(r) => r.read(buf),
323            ToGitOutcome::Buffer(r) => r.read(buf),
324        }
325    }
326}
327
328impl<'a, R> ToGitOutcome<'a, R>
329where
330    R: std::io::Read,
331{
332    /// If we contain a buffer, and not a stream, return it.
333    pub fn as_bytes(&self) -> Option<&'a [u8]> {
334        match self {
335            ToGitOutcome::Unchanged(_) | ToGitOutcome::Process(_) => None,
336            ToGitOutcome::Buffer(b) => Some(b),
337        }
338    }
339
340    /// Return a stream to read the drivers output from. This is only possible if there is only a driver, and no other filter.
341    pub fn as_read(&mut self) -> Option<&mut (dyn std::io::Read + '_)> {
342        match self {
343            ToGitOutcome::Process(read) => Some(read),
344            ToGitOutcome::Unchanged(read) => Some(read),
345            ToGitOutcome::Buffer(_) => None,
346        }
347    }
348
349    /// Returns `true` if the input buffer was actually changed, or `false` if it is returned directly.
350    pub fn is_changed(&self) -> bool {
351        !matches!(self, ToGitOutcome::Unchanged(_))
352    }
353}