async_tar/
builder.rs

1use std::borrow::Cow;
2
3#[cfg(feature = "runtime-async-std")]
4use async_std::fs::Metadata;
5#[cfg(feature = "runtime-async-std")]
6use async_std::{
7    fs,
8    io::{self, Read, Write},
9    path::Path,
10    prelude::*,
11};
12#[cfg(feature = "runtime-tokio")]
13use std::fs::Metadata;
14#[cfg(feature = "runtime-tokio")]
15use std::path::Path;
16#[cfg(feature = "runtime-tokio")]
17use tokio::{
18    fs,
19    io::{self, AsyncRead as Read, AsyncReadExt, AsyncWrite as Write, AsyncWriteExt},
20};
21#[cfg(feature = "runtime-tokio")]
22use tokio_stream::StreamExt;
23
24use crate::{
25    EntryType, Header,
26    header::{HeaderMode, bytes2path, path2bytes},
27    metadata, other, symlink_metadata,
28};
29
30/// A structure for building archives
31///
32/// This structure has methods for building up an archive from scratch into any
33/// arbitrary writer.
34///
35/// You **must** call [`finish`] or [`into_inner`] to finalize the archive.
36/// The `runtime-tokio` feature will panic on drop if not finalized.
37///
38/// [`into_inner`]: Builder::into_inner
39/// [`finish`]: Builder::finish
40pub struct Builder<W: Write + Unpin + Send + Sync> {
41    mode: HeaderMode,
42    follow: bool,
43    finished: bool,
44    obj: Option<W>,
45}
46
47impl<W: Write + Unpin + Send + Sync> Builder<W> {
48    /// Create a new archive builder with the underlying object as the
49    /// destination of all data written. The builder will use
50    /// `HeaderMode::Complete` by default.
51    pub fn new(obj: W) -> Builder<W> {
52        Builder {
53            mode: HeaderMode::Complete,
54            follow: true,
55            finished: false,
56            obj: Some(obj),
57        }
58    }
59
60    /// Changes the HeaderMode that will be used when reading fs Metadata for
61    /// methods that implicitly read metadata for an input Path. Notably, this
62    /// does _not_ apply to `append(Header)`.
63    pub fn mode(&mut self, mode: HeaderMode) {
64        self.mode = mode;
65    }
66
67    /// Follow symlinks, archiving the contents of the file they point to rather
68    /// than adding a symlink to the archive. Defaults to true.
69    pub fn follow_symlinks(&mut self, follow: bool) {
70        self.follow = follow;
71    }
72
73    /// Gets shared reference to the underlying object.
74    pub fn get_ref(&self) -> &W {
75        self.obj.as_ref().unwrap()
76    }
77
78    /// Gets mutable reference to the underlying object.
79    ///
80    /// Note that care must be taken while writing to the underlying
81    /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
82    /// useful in the situations when one needs to be ensured that
83    /// tar entry was flushed to the disk.
84    pub fn get_mut(&mut self) -> &mut W {
85        self.obj.as_mut().unwrap()
86    }
87
88    /// Unwrap this archive, returning the underlying object.
89    ///
90    /// This function will finish writing the archive if the `finish` function
91    /// hasn't yet been called, returning any I/O error which happens during
92    /// that operation.
93    pub async fn into_inner(mut self) -> io::Result<W> {
94        if !self.finished {
95            self.finish().await?;
96        }
97        Ok(self.obj.take().unwrap())
98    }
99
100    /// Adds a new entry to this archive.
101    ///
102    /// This function will append the header specified, followed by contents of
103    /// the stream specified by `data`. To produce a valid archive the `size`
104    /// field of `header` must be the same as the length of the stream that's
105    /// being written. Additionally the checksum for the header should have been
106    /// set via the `set_cksum` method.
107    ///
108    /// Note that this will not attempt to seek the archive to a valid position,
109    /// so if the archive is in the middle of a read or some other similar
110    /// operation then this may corrupt the archive.
111    ///
112    /// Also note that after all entries have been written to an archive the
113    /// `finish` function needs to be called to finish writing the archive.
114    ///
115    /// # Errors
116    ///
117    /// This function will return an error for any intermittent I/O error which
118    /// occurs when either reading or writing.
119    ///
120    /// # Examples
121    ///
122    #[cfg_attr(feature = "runtime-async-std", doc = "```")]
123    #[cfg_attr(feature = "runtime-tokio", doc = "```ignore")]
124    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
125    /// #
126    /// use async_tar::{Builder, Header};
127    ///
128    /// let mut header = Header::new_gnu();
129    /// header.set_path("foo")?;
130    /// header.set_size(4);
131    /// header.set_cksum();
132    ///
133    /// let mut data: &[u8] = &[1, 2, 3, 4];
134    ///
135    /// let mut ar = Builder::new(Vec::new());
136    /// ar.append(&header, data).await?;
137    /// let data = ar.into_inner().await?;
138    /// #
139    /// # Ok(()) }) }
140    /// ```
141    pub async fn append<R: Read + Unpin + Send>(
142        &mut self,
143        header: &Header,
144        mut data: R,
145    ) -> io::Result<()> {
146        append(self.get_mut(), header, &mut data).await?;
147
148        Ok(())
149    }
150
151    /// Adds a new entry to this archive with the specified path.
152    ///
153    /// This function will set the specified path in the given header, which may
154    /// require appending a GNU long-name extension entry to the archive first.
155    /// The checksum for the header will be automatically updated via the
156    /// `set_cksum` method after setting the path. No other metadata in the
157    /// header will be modified.
158    ///
159    /// Then it will append the header, followed by contents of the stream
160    /// specified by `data`. To produce a valid archive the `size` field of
161    /// `header` must be the same as the length of the stream that's being
162    /// written.
163    ///
164    /// Note that this will not attempt to seek the archive to a valid position,
165    /// so if the archive is in the middle of a read or some other similar
166    /// operation then this may corrupt the archive.
167    ///
168    /// Also note that after all entries have been written to an archive the
169    /// `finish` function needs to be called to finish writing the archive.
170    ///
171    /// # Errors
172    ///
173    /// This function will return an error for any intermittent I/O error which
174    /// occurs when either reading or writing.
175    ///
176    /// # Examples
177    ///
178    /// ```
179    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
180    /// #
181    /// use async_tar::{Builder, Header};
182    ///
183    /// let mut header = Header::new_gnu();
184    /// header.set_size(4);
185    /// header.set_cksum();
186    ///
187    /// let mut data: &[u8] = &[1, 2, 3, 4];
188    ///
189    /// let mut ar = Builder::new(Vec::new());
190    /// ar.append_data(&mut header, "really/long/path/to/foo", data).await?;
191    /// let data = ar.into_inner().await?;
192    /// #
193    /// # Ok(()) }) }
194    /// ```
195    pub async fn append_data<P: AsRef<Path>, R: Read + Unpin + Send>(
196        &mut self,
197        header: &mut Header,
198        path: P,
199        data: R,
200    ) -> io::Result<()> {
201        prepare_header_path(self.get_mut(), header, path.as_ref()).await?;
202        header.set_cksum();
203        self.append(header, data).await?;
204
205        Ok(())
206    }
207
208    /// Adds a file on the local filesystem to this archive.
209    ///
210    /// This function will open the file specified by `path` and insert the file
211    /// into the archive with the appropriate metadata set, returning any I/O
212    /// error which occurs while writing. The path name for the file inside of
213    /// this archive will be the same as `path`, and it is required that the
214    /// path is a relative path.
215    ///
216    /// Note that this will not attempt to seek the archive to a valid position,
217    /// so if the archive is in the middle of a read or some other similar
218    /// operation then this may corrupt the archive.
219    ///
220    /// Also note that after all files have been written to an archive the
221    /// `finish` function needs to be called to finish writing the archive.
222    ///
223    /// # Examples
224    ///
225    /// ```no_run
226    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
227    /// #
228    /// use async_tar::Builder;
229    ///
230    /// let mut ar = Builder::new(Vec::new());
231    ///
232    /// ar.append_path("foo/bar.txt").await?;
233    /// #
234    /// # Ok(()) }) }
235    /// ```
236    pub async fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
237        let mode = self.mode;
238        let follow = self.follow;
239        append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow).await?;
240        Ok(())
241    }
242
243    /// Adds a file on the local filesystem to this archive under another name.
244    ///
245    /// This function will open the file specified by `path` and insert the file
246    /// into the archive as `name` with appropriate metadata set, returning any
247    /// I/O error which occurs while writing. The path name for the file inside
248    /// of this archive will be `name` is required to be a relative path.
249    ///
250    /// Note that this will not attempt to seek the archive to a valid position,
251    /// so if the archive is in the middle of a read or some other similar
252    /// operation then this may corrupt the archive.
253    ///
254    /// Also note that after all files have been written to an archive the
255    /// `finish` function needs to be called to finish writing the archive.
256    ///
257    /// # Examples
258    ///
259    /// ```no_run
260    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
261    /// #
262    /// use async_tar::Builder;
263    ///
264    /// let mut ar = Builder::new(Vec::new());
265    ///
266    /// // Insert the local file "foo/bar.txt" in the archive but with the name
267    /// // "bar/foo.txt".
268    /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").await?;
269    /// #
270    /// # Ok(()) }) }
271    /// ```
272    pub async fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
273        &mut self,
274        path: P,
275        name: N,
276    ) -> io::Result<()> {
277        let mode = self.mode;
278        let follow = self.follow;
279        append_path_with_name(
280            self.get_mut(),
281            path.as_ref(),
282            Some(name.as_ref()),
283            mode,
284            follow,
285        )
286        .await?;
287        Ok(())
288    }
289
290    /// Adds a file to this archive with the given path as the name of the file
291    /// in the archive.
292    ///
293    /// This will use the metadata of `file` to populate a `Header`, and it will
294    /// then append the file to the archive with the name `path`.
295    ///
296    /// Note that this will not attempt to seek the archive to a valid position,
297    /// so if the archive is in the middle of a read or some other similar
298    /// operation then this may corrupt the archive.
299    ///
300    /// Also note that after all files have been written to an archive the
301    /// `finish` function needs to be called to finish writing the archive.
302    ///
303    /// # Examples
304    ///
305    #[cfg_attr(feature = "runtime-async-std", doc = "```no_run")]
306    #[cfg_attr(feature = "runtime-tokio", doc = "```ignore")]
307    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
308    /// #
309    /// use async_std::fs::File;
310    /// use async_tar::Builder;
311    ///
312    /// let mut ar = Builder::new(Vec::new());
313    ///
314    /// // Open the file at one location, but insert it into the archive with a
315    /// // different name.
316    /// let mut f = File::open("foo/bar/baz.txt").await?;
317    /// ar.append_file("bar/baz.txt", &mut f).await?;
318    /// #
319    /// # Ok(()) }) }
320    /// ```
321    pub async fn append_file<P: AsRef<Path>>(
322        &mut self,
323        path: P,
324        file: &mut fs::File,
325    ) -> io::Result<()> {
326        let mode = self.mode;
327        append_file(self.get_mut(), path.as_ref(), file, mode).await?;
328        Ok(())
329    }
330
331    /// Adds a directory to this archive with the given path as the name of the
332    /// directory in the archive.
333    ///
334    /// This will use `stat` to populate a `Header`, and it will then append the
335    /// directory to the archive with the name `path`.
336    ///
337    /// Note that this will not attempt to seek the archive to a valid position,
338    /// so if the archive is in the middle of a read or some other similar
339    /// operation then this may corrupt the archive.
340    ///
341    /// Also note that after all files have been written to an archive the
342    /// `finish` function needs to be called to finish writing the archive.
343    ///
344    /// # Examples
345    ///
346    #[cfg_attr(feature = "runtime-async-std", doc = "```")]
347    #[cfg_attr(feature = "runtime-tokio", doc = "```ignore")]
348    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
349    /// #
350    /// use async_std::fs;
351    /// use async_tar::Builder;
352    ///
353    /// let mut ar = Builder::new(Vec::new());
354    ///
355    /// // Use the directory at one location, but insert it into the archive
356    /// // with a different name.
357    /// ar.append_dir("bardir", ".").await?;
358    /// #
359    /// # Ok(()) }) }
360    /// ```
361    pub async fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
362    where
363        P: AsRef<Path>,
364        Q: AsRef<Path>,
365    {
366        let mode = self.mode;
367        append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode).await?;
368        Ok(())
369    }
370
371    /// Adds a directory and all of its contents (recursively) to this archive
372    /// with the given path as the name of the directory in the archive.
373    ///
374    /// Note that this will not attempt to seek the archive to a valid position,
375    /// so if the archive is in the middle of a read or some other similar
376    /// operation then this may corrupt the archive.
377    ///
378    /// Also note that after all files have been written to an archive the
379    /// `finish` function needs to be called to finish writing the archive.
380    ///
381    /// # Examples
382    ///
383    #[cfg_attr(feature = "runtime-async-std", doc = "```")]
384    #[cfg_attr(feature = "runtime-tokio", doc = "```ignore")]
385    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
386    /// #
387    /// use async_std::fs;
388    /// use async_tar::Builder;
389    ///
390    /// let mut ar = Builder::new(Vec::new());
391    ///
392    /// // Use the directory at one location, but insert it into the archive
393    /// // with a different name.
394    /// ar.append_dir_all("bardir", ".").await?;
395    /// #
396    /// # Ok(()) })}
397    /// ```
398    pub async fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
399    where
400        P: AsRef<Path>,
401        Q: AsRef<Path>,
402    {
403        let mode = self.mode;
404        let follow = self.follow;
405        append_dir_all(
406            self.get_mut(),
407            path.as_ref(),
408            src_path.as_ref(),
409            mode,
410            follow,
411        )
412        .await?;
413        Ok(())
414    }
415
416    /// Finish writing this archive, emitting the termination sections.
417    ///
418    /// This function should only be called when the archive has been written
419    /// entirely and if an I/O error happens the underlying object still needs
420    /// to be acquired.
421    ///
422    /// In most situations the `into_inner` method should be preferred.
423    pub async fn finish(&mut self) -> io::Result<()> {
424        if self.finished {
425            return Ok(());
426        }
427        self.finished = true;
428        self.get_mut().write_all(&[0; 1024]).await?;
429        Ok(())
430    }
431}
432
433async fn append(
434    mut dst: &mut (dyn Write + Unpin + Send),
435    header: &Header,
436    mut data: &mut (dyn Read + Unpin + Send),
437) -> io::Result<()> {
438    dst.write_all(header.as_bytes()).await?;
439    let len = io::copy(&mut data, &mut dst).await?;
440
441    // Pad with zeros if necessary.
442    let buf = [0; 512];
443    let remaining = 512 - (len % 512);
444    if remaining < 512 {
445        dst.write_all(&buf[..remaining as usize]).await?;
446    }
447
448    Ok(())
449}
450
451async fn append_path_with_name(
452    dst: &mut (dyn Write + Unpin + Sync + Send),
453    path: &Path,
454    name: Option<&Path>,
455    mode: HeaderMode,
456    follow: bool,
457) -> io::Result<()> {
458    let stat = if follow {
459        metadata(path).await.map_err(|err| {
460            io::Error::new(
461                err.kind(),
462                format!("{} when getting metadata for {}", err, path.display()),
463            )
464        })?
465    } else {
466        symlink_metadata(path).await.map_err(|err| {
467            io::Error::new(
468                err.kind(),
469                format!("{} when getting metadata for {}", err, path.display()),
470            )
471        })?
472    };
473    let ar_name = name.unwrap_or(path);
474    if stat.is_file() {
475        append_fs(
476            dst,
477            ar_name,
478            &stat,
479            &mut fs::File::open(path).await?,
480            mode,
481            None,
482        )
483        .await?;
484        Ok(())
485    } else if stat.is_dir() {
486        append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None).await?;
487        Ok(())
488    } else if stat.file_type().is_symlink() {
489        let link_name = fs::read_link(path).await?;
490        append_fs(
491            dst,
492            ar_name,
493            &stat,
494            &mut io::empty(),
495            mode,
496            Some(&link_name),
497        )
498        .await?;
499        Ok(())
500    } else {
501        Err(other(&format!("{} has unknown file type", path.display())))
502    }
503}
504
505async fn append_file(
506    dst: &mut (dyn Write + Unpin + Send + Sync),
507    path: &Path,
508    file: &mut fs::File,
509    mode: HeaderMode,
510) -> io::Result<()> {
511    let stat = file.metadata().await?;
512    append_fs(dst, path, &stat, file, mode, None).await?;
513    Ok(())
514}
515
516async fn append_dir(
517    dst: &mut (dyn Write + Unpin + Send + Sync),
518    path: &Path,
519    src_path: &Path,
520    mode: HeaderMode,
521) -> io::Result<()> {
522    let stat = fs::metadata(src_path).await?;
523    append_fs(dst, path, &stat, &mut io::empty(), mode, None).await?;
524    Ok(())
525}
526
527fn prepare_header(size: u64, entry_type: EntryType) -> Header {
528    let mut header = Header::new_gnu();
529    let name = b"././@LongLink";
530    header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
531    header.set_mode(0o644);
532    header.set_uid(0);
533    header.set_gid(0);
534    header.set_mtime(0);
535    // + 1 to be compliant with GNU tar
536    header.set_size(size + 1);
537    header.set_entry_type(entry_type);
538    header.set_cksum();
539    header
540}
541
542async fn prepare_header_path(
543    dst: &mut (dyn Write + Unpin + Send + Sync),
544    header: &mut Header,
545    path: &Path,
546) -> io::Result<()> {
547    // Try to encode the path directly in the header, but if it ends up not
548    // working (probably because it's too long) then try to use the GNU-specific
549    // long name extension by emitting an entry which indicates that it's the
550    // filename.
551    if let Err(e) = header.set_path(path) {
552        let data = path2bytes(path)?;
553        let max = header.as_old().name.len();
554        //  Since e isn't specific enough to let us know the path is indeed too
555        //  long, verify it first before using the extension.
556        if data.len() < max {
557            return Err(e);
558        }
559        let header2 = prepare_header(data.len() as u64, EntryType::GNULongName);
560        // null-terminated string
561        let mut data2 = data.chain(io::repeat(0).take(1));
562        append(dst, &header2, &mut data2).await?;
563        // Truncate the path to store in the header we're about to emit to
564        // ensure we've got something at least mentioned.
565        let path = bytes2path(Cow::Borrowed(&data[..max]))?;
566        header.set_truncated_path_for_gnu_header(&path)?;
567    }
568    Ok(())
569}
570
571async fn prepare_header_link(
572    dst: &mut (dyn Write + Unpin + Send + Sync),
573    header: &mut Header,
574    link_name: &Path,
575) -> io::Result<()> {
576    // Same as previous function but for linkname
577    if let Err(e) = header.set_link_name(link_name) {
578        let data = path2bytes(link_name)?;
579        if data.len() < header.as_old().linkname.len() {
580            return Err(e);
581        }
582        let header2 = prepare_header(data.len() as u64, EntryType::GNULongLink);
583        let mut data2 = data.chain(io::repeat(0).take(1));
584        append(dst, &header2, &mut data2).await?;
585    }
586    Ok(())
587}
588
589async fn append_fs(
590    dst: &mut (dyn Write + Unpin + Send + Sync),
591    path: &Path,
592    meta: &Metadata,
593    read: &mut (dyn Read + Unpin + Sync + Send),
594    mode: HeaderMode,
595    link_name: Option<&Path>,
596) -> io::Result<()> {
597    let mut header = Header::new_gnu();
598
599    prepare_header_path(dst, &mut header, path).await?;
600    header.set_metadata_in_mode(meta, mode);
601    if let Some(link_name) = link_name {
602        prepare_header_link(dst, &mut header, link_name).await?;
603    }
604    header.set_cksum();
605    append(dst, &header, read).await?;
606
607    Ok(())
608}
609
610async fn append_dir_all(
611    dst: &mut (dyn Write + Unpin + Send + Sync),
612    path: &Path,
613    src_path: &Path,
614    mode: HeaderMode,
615    follow: bool,
616) -> io::Result<()> {
617    let mut stack = vec![(src_path.to_path_buf(), true, false)];
618    while let Some((src, is_dir, is_symlink)) = stack.pop() {
619        let dest = path.join(src.strip_prefix(src_path).unwrap());
620
621        #[cfg(feature = "runtime-async-std")]
622        async fn check_is_dir(path: &Path) -> bool {
623            path.is_dir().await
624        }
625        #[cfg(feature = "runtime-tokio")]
626        async fn check_is_dir(path: &Path) -> bool {
627            fs::metadata(path)
628                .await
629                .map(|m| m.is_dir())
630                .unwrap_or(false)
631        }
632
633        // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
634        if is_dir || (is_symlink && follow && check_is_dir(&src).await) {
635            #[cfg(feature = "runtime-async-std")]
636            let mut entries = fs::read_dir(&src).await?;
637            #[cfg(feature = "runtime-tokio")]
638            let mut entries = tokio_stream::wrappers::ReadDirStream::new(fs::read_dir(&src).await?);
639            while let Some(entry) = entries.next().await {
640                let entry = entry?;
641                let file_type = entry.file_type().await?;
642                stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
643            }
644            if dest != Path::new("") {
645                append_dir(dst, &dest, &src, mode).await?;
646            }
647        } else if !follow && is_symlink {
648            let stat = fs::symlink_metadata(&src).await?;
649            let link_name = fs::read_link(&src).await?;
650            append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name)).await?;
651        } else {
652            append_file(dst, &dest, &mut fs::File::open(src).await?, mode).await?;
653        }
654    }
655    Ok(())
656}
657
658#[cfg(feature = "runtime-async-std")]
659impl<W: Write + Unpin + Send + Sync> Drop for Builder<W> {
660    fn drop(&mut self) {
661        async_std::task::block_on(async move {
662            let _ = self.finish().await;
663        });
664    }
665}
666
667#[cfg(feature = "runtime-tokio")]
668impl<W: Write + Unpin + Send + Sync> Drop for Builder<W> {
669    fn drop(&mut self) {
670        if !self.finished && !std::thread::panicking() && self.obj.is_some() {
671            panic!("Builder dropped without finalizing; call finish() or into_inner()");
672        }
673    }
674}
675
676#[cfg(test)]
677mod tests {
678    use super::*;
679
680    assert_impl_all!(fs::File: Send, Sync);
681    assert_impl_all!(Builder<fs::File>: Send, Sync);
682}