gix_archive/
write.rs

1use gix_worktree_stream::{Entry, Stream};
2
3use crate::{Error, Format, Options};
4
5#[cfg(feature = "zip")]
6use std::io::Write;
7
8/// Write all stream entries in `stream` as provided by `next_entry(stream)` to `out` configured according to `opts` which
9/// also includes the streaming format.
10///
11/// ### Performance
12///
13/// * The caller should be sure `out` is fast enough. If in doubt, wrap in [`std::io::BufWriter`].
14/// * Further, big files aren't suitable for archival into `tar` archives as they require the size of the stream to be known
15///   prior to writing the header of each entry.
16#[cfg_attr(not(feature = "tar"), allow(unused_mut, unused_variables))]
17pub fn write_stream<NextFn>(
18    stream: &mut Stream,
19    mut next_entry: NextFn,
20    out: impl std::io::Write,
21    opts: Options,
22) -> Result<(), Error>
23where
24    NextFn: FnMut(&mut Stream) -> Result<Option<Entry<'_>>, gix_worktree_stream::entry::Error>,
25{
26    if opts.format == Format::InternalTransientNonPersistable {
27        return Err(Error::InternalFormatMustNotPersist);
28    }
29    #[cfg(any(feature = "tar", feature = "tar_gz"))]
30    {
31        enum State<W: std::io::Write> {
32            #[cfg(feature = "tar")]
33            Tar((tar::Builder<W>, Vec<u8>)),
34            #[cfg(feature = "tar_gz")]
35            TarGz((tar::Builder<flate2::write::GzEncoder<W>>, Vec<u8>)),
36        }
37
38        impl<W: std::io::Write> State<W> {
39            pub fn new(format: Format, mtime: gix_date::SecondsSinceUnixEpoch, out: W) -> Result<Self, Error> {
40                match format {
41                    Format::InternalTransientNonPersistable => unreachable!("handled earlier"),
42                    Format::Zip { .. } => Err(Error::ZipWithoutSeek),
43                    Format::Tar => {
44                        #[cfg(feature = "tar")]
45                        {
46                            Ok(State::Tar((
47                                {
48                                    let mut ar = tar::Builder::new(out);
49                                    ar.mode(tar::HeaderMode::Deterministic);
50                                    ar
51                                },
52                                Vec::with_capacity(64 * 1024),
53                            )))
54                        }
55                        #[cfg(not(feature = "tar"))]
56                        {
57                            Err(Error::SupportNotCompiledIn { wanted: Format::Tar })
58                        }
59                    }
60                    Format::TarGz { compression_level } => {
61                        #[cfg(feature = "tar_gz")]
62                        {
63                            Ok(State::TarGz((
64                                {
65                                    let gz = flate2::GzBuilder::new().mtime(mtime as u32).write(
66                                        out,
67                                        match compression_level {
68                                            None => flate2::Compression::default(),
69                                            Some(level) => flate2::Compression::new(u32::from(level)),
70                                        },
71                                    );
72                                    let mut ar = tar::Builder::new(gz);
73                                    ar.mode(tar::HeaderMode::Deterministic);
74                                    ar
75                                },
76                                Vec::with_capacity(64 * 1024),
77                            )))
78                        }
79                        #[cfg(not(feature = "tar_gz"))]
80                        {
81                            Err(Error::SupportNotCompiledIn {
82                                wanted: Format::TarGz {
83                                    compression_level: None,
84                                },
85                            })
86                        }
87                    }
88                }
89            }
90        }
91
92        let mut state = State::new(opts.format, opts.modification_time, out)?;
93        while let Some(entry) = next_entry(stream)? {
94            match &mut state {
95                #[cfg(feature = "tar")]
96                State::Tar((ar, buf)) => {
97                    append_tar_entry(ar, buf, entry, opts.modification_time, &opts)?;
98                }
99                #[cfg(feature = "tar_gz")]
100                State::TarGz((ar, buf)) => {
101                    append_tar_entry(ar, buf, entry, opts.modification_time, &opts)?;
102                }
103            }
104        }
105
106        match state {
107            #[cfg(feature = "tar")]
108            State::Tar((mut ar, _)) => {
109                ar.finish()?;
110            }
111            #[cfg(feature = "tar_gz")]
112            State::TarGz((ar, _)) => {
113                ar.into_inner()?.finish()?;
114            }
115        }
116    }
117    Ok(())
118}
119
120/// Like [`write_stream()`], but requires [`std::io::Seek`] for `out`.
121///
122/// Note that `zip` is able to stream big files, which our `tar` implementation is not able to do, which makes it the
123/// only suitable container to support huge files from `git-lfs` without consuming excessive amounts of memory.
124#[cfg_attr(not(feature = "zip"), allow(unused_mut, unused_variables))]
125pub fn write_stream_seek<NextFn>(
126    stream: &mut Stream,
127    mut next_entry: NextFn,
128    out: impl std::io::Write + std::io::Seek,
129    opts: Options,
130) -> Result<(), Error>
131where
132    NextFn: FnMut(&mut Stream) -> Result<Option<Entry<'_>>, gix_worktree_stream::entry::Error>,
133{
134    let compression_level = match opts.format {
135        Format::Zip { compression_level } => compression_level.map(i64::from),
136        _other => return write_stream(stream, next_entry, out, opts),
137    };
138
139    #[cfg(feature = "zip")]
140    {
141        let mut ar = rawzip::ZipArchiveWriter::new(out);
142        let mut buf = Vec::new();
143        let mtime = rawzip::time::UtcDateTime::from_unix(opts.modification_time);
144        while let Some(entry) = next_entry(stream)? {
145            append_zip_entry(
146                &mut ar,
147                entry,
148                &mut buf,
149                mtime,
150                compression_level,
151                opts.tree_prefix.as_ref(),
152            )?;
153        }
154        ar.finish().map_err(std::io::Error::other)?;
155    }
156
157    Ok(())
158}
159
160#[cfg(feature = "zip")]
161fn append_zip_entry<W: std::io::Write + std::io::Seek>(
162    ar: &mut rawzip::ZipArchiveWriter<W>,
163    mut entry: gix_worktree_stream::Entry<'_>,
164    buf: &mut Vec<u8>,
165    mtime: rawzip::time::UtcDateTime,
166    compression_level: Option<i64>,
167    tree_prefix: Option<&bstr::BString>,
168) -> Result<(), Error> {
169    use bstr::ByteSlice;
170    let path = add_prefix(entry.relative_path(), tree_prefix).into_owned();
171    let unix_permissions = if entry.mode.is_executable() { 0o755 } else { 0o644 };
172    let path = path.to_str().map_err(|_| {
173        Error::Io(std::io::Error::new(
174            std::io::ErrorKind::InvalidData,
175            format!("Invalid UTF-8 in entry path: {path:?}"),
176        ))
177    })?;
178
179    match entry.mode.kind() {
180        gix_object::tree::EntryKind::Blob | gix_object::tree::EntryKind::BlobExecutable => {
181            let file_builder = ar
182                .new_file(path)
183                .compression_method(rawzip::CompressionMethod::Deflate)
184                .last_modified(mtime)
185                .unix_permissions(unix_permissions);
186
187            let (mut zip_entry, config) = file_builder.start().map_err(std::io::Error::other)?;
188
189            // Use flate2 for compression. Level 9 is the maximum compression level for deflate.
190            let encoder = flate2::write::DeflateEncoder::new(
191                &mut zip_entry,
192                match compression_level {
193                    None => flate2::Compression::default(),
194                    Some(level) => flate2::Compression::new(level.clamp(0, 9) as u32),
195                },
196            );
197            let mut writer = config.wrap(encoder);
198            std::io::copy(&mut entry, &mut writer)?;
199            let (encoder, descriptor) = writer.finish().map_err(std::io::Error::other)?;
200            encoder.finish()?;
201            zip_entry.finish(descriptor).map_err(std::io::Error::other)?;
202        }
203        gix_object::tree::EntryKind::Tree | gix_object::tree::EntryKind::Commit => {
204            // rawzip requires directory paths to end with '/'
205            let mut dir_path = path.to_owned();
206            if !dir_path.ends_with('/') {
207                dir_path.push('/');
208            }
209            ar.new_dir(&dir_path)
210                .last_modified(mtime)
211                .unix_permissions(unix_permissions)
212                .create()
213                .map_err(std::io::Error::other)?;
214        }
215        gix_object::tree::EntryKind::Link => {
216            buf.clear();
217            std::io::copy(&mut entry, buf)?;
218
219            // For symlinks, we need to create a file with symlink permissions
220            let symlink_path = path;
221            let target = buf.as_bstr().to_str().map_err(|_| {
222                Error::Io(std::io::Error::new(
223                    std::io::ErrorKind::InvalidData,
224                    format!(
225                        "Invalid UTF-8 in symlink target for entry '{symlink_path}': {:?}",
226                        buf.as_bstr()
227                    ),
228                ))
229            })?;
230
231            let (mut zip_entry, config) = ar
232                .new_file(symlink_path)
233                .compression_method(rawzip::CompressionMethod::Store)
234                .last_modified(mtime)
235                .unix_permissions(0o120644) // Symlink mode
236                .start()
237                .map_err(std::io::Error::other)?;
238
239            let mut writer = config.wrap(&mut zip_entry);
240            writer.write_all(target.as_bytes())?;
241            let (_, descriptor) = writer.finish().map_err(std::io::Error::other)?;
242            zip_entry.finish(descriptor).map_err(std::io::Error::other)?;
243        }
244    }
245    Ok(())
246}
247
248#[cfg(any(feature = "tar", feature = "tar_gz"))]
249fn append_tar_entry<W: std::io::Write>(
250    ar: &mut tar::Builder<W>,
251    buf: &mut Vec<u8>,
252    mut entry: gix_worktree_stream::Entry<'_>,
253    mtime_seconds_since_epoch: i64,
254    opts: &Options,
255) -> Result<(), Error> {
256    let mut header = tar::Header::new_gnu();
257    header.set_mtime(mtime_seconds_since_epoch as u64);
258    header.set_entry_type(tar_entry_type(entry.mode));
259    header.set_mode(if entry.mode.is_executable() { 0o755 } else { 0o644 });
260    buf.clear();
261    std::io::copy(&mut entry, buf)?;
262
263    let path = gix_path::from_bstr(add_prefix(entry.relative_path(), opts.tree_prefix.as_ref()));
264    header.set_size(buf.len() as u64);
265
266    if entry.mode.is_link() {
267        use bstr::ByteSlice;
268        let target = gix_path::from_bstr(buf.as_bstr());
269        header.set_entry_type(tar::EntryType::Symlink);
270        header.set_size(0);
271        ar.append_link(&mut header, path, target)?;
272    } else {
273        ar.append_data(&mut header, path, buf.as_slice())?;
274    }
275    Ok(())
276}
277
278#[cfg(any(feature = "tar", feature = "tar_gz"))]
279fn tar_entry_type(mode: gix_object::tree::EntryMode) -> tar::EntryType {
280    use gix_object::tree::EntryKind;
281    use tar::EntryType;
282    match mode.kind() {
283        EntryKind::Tree | EntryKind::Commit => EntryType::Directory,
284        EntryKind::Blob => EntryType::Regular,
285        EntryKind::BlobExecutable => EntryType::Regular,
286        EntryKind::Link => EntryType::Link,
287    }
288}
289
290#[cfg(any(feature = "tar", feature = "tar_gz", feature = "zip"))]
291fn add_prefix<'a>(relative_path: &'a bstr::BStr, prefix: Option<&bstr::BString>) -> std::borrow::Cow<'a, bstr::BStr> {
292    use std::borrow::Cow;
293    match prefix {
294        None => Cow::Borrowed(relative_path),
295        Some(prefix) => {
296            use bstr::ByteVec;
297            let mut buf = prefix.clone();
298            buf.push_str(relative_path);
299            Cow::Owned(buf)
300        }
301    }
302}