tokio_tar/
builder.rs

1use crate::{
2    header::{bytes2path, path2bytes, HeaderMode},
3    other, EntryType, Header,
4};
5use std::{borrow::Cow, fs::Metadata, path::Path};
6use tokio::{
7    fs,
8    io::{self, AsyncRead as Read, AsyncReadExt, AsyncWrite as Write, AsyncWriteExt},
9};
10
11/// A structure for building archives
12///
13/// This structure has methods for building up an archive from scratch into any
14/// arbitrary writer.
15pub struct Builder<W: Write + Unpin + Send> {
16    mode: HeaderMode,
17    follow: bool,
18    finished: bool,
19    obj: Option<W>,
20    cancellation: Option<tokio::sync::oneshot::Sender<W>>,
21}
22
23const TERMINATION: &[u8; 1024] = &[0; 1024];
24
25impl<W: Write + Unpin + Send + 'static> Builder<W> {
26    /// Create a new archive builder with the underlying object as the
27    /// destination of all data written. The builder will use
28    /// `HeaderMode::Complete` by default.
29    ///
30    /// On drop, would write [`TERMINATION`] into the end of the archive,
31    /// use `skip_termination` method to disable this.
32    pub fn new(obj: W) -> Builder<W> {
33        let (tx, rx) = tokio::sync::oneshot::channel::<W>();
34        tokio::spawn(async move {
35            if let Ok(mut w) = rx.await {
36                let _ = w.write_all(TERMINATION).await;
37            }
38        });
39        Builder {
40            mode: HeaderMode::Complete,
41            follow: true,
42            finished: false,
43            obj: Some(obj),
44            cancellation: Some(tx),
45        }
46    }
47}
48
49impl<W: Write + Unpin + Send> Builder<W> {
50    /// Create a new archive builder with the underlying object as the
51    /// destination of all data written. The builder will use
52    /// `HeaderMode::Complete` by default.
53    ///
54    /// The [`TERMINATION`] symbol would not be written to the archive in the end.
55    pub fn new_non_terminated(obj: W) -> Builder<W> {
56        Builder {
57            mode: HeaderMode::Complete,
58            follow: true,
59            finished: false,
60            obj: Some(obj),
61            cancellation: None,
62        }
63    }
64
65    /// Changes the HeaderMode that will be used when reading fs Metadata for
66    /// methods that implicitly read metadata for an input Path. Notably, this
67    /// does _not_ apply to `append(Header)`.
68    pub fn mode(&mut self, mode: HeaderMode) {
69        self.mode = mode;
70    }
71
72    /// Follow symlinks, archiving the contents of the file they point to rather
73    /// than adding a symlink to the archive. Defaults to true.
74    pub fn follow_symlinks(&mut self, follow: bool) {
75        self.follow = follow;
76    }
77
78    /// Skip writing final termination bytes into the archive.
79    pub fn skip_termination(&mut self) {
80        drop(self.cancellation.take());
81    }
82
83    /// Gets shared reference to the underlying object.
84    pub fn get_ref(&self) -> &W {
85        self.obj.as_ref().unwrap()
86    }
87
88    /// Gets mutable reference to the underlying object.
89    ///
90    /// Note that care must be taken while writing to the underlying
91    /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
92    /// useful in the situations when one needs to be ensured that
93    /// tar entry was flushed to the disk.
94    pub fn get_mut(&mut self) -> &mut W {
95        self.obj.as_mut().unwrap()
96    }
97
98    /// Unwrap this archive, returning the underlying object.
99    ///
100    /// This function will finish writing the archive if the `finish` function
101    /// hasn't yet been called, returning any I/O error which happens during
102    /// that operation.
103    pub async fn into_inner(mut self) -> io::Result<W> {
104        if !self.finished {
105            self.finish().await?;
106        }
107        Ok(self.obj.take().unwrap())
108    }
109
110    /// Adds a new entry to this archive.
111    ///
112    /// This function will append the header specified, followed by contents of
113    /// the stream specified by `data`. To produce a valid archive the `size`
114    /// field of `header` must be the same as the length of the stream that's
115    /// being written. Additionally the checksum for the header should have been
116    /// set via the `set_cksum` method.
117    ///
118    /// Note that this will not attempt to seek the archive to a valid position,
119    /// so if the archive is in the middle of a read or some other similar
120    /// operation then this may corrupt the archive.
121    ///
122    /// Also note that after all entries have been written to an archive the
123    /// `finish` function needs to be called to finish writing the archive.
124    ///
125    /// # Errors
126    ///
127    /// This function will return an error for any intermittent I/O error which
128    /// occurs when either reading or writing.
129    ///
130    /// # Examples
131    ///
132    /// ```
133    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
134    /// #
135    /// use tokio_tar::{Builder, Header};
136    ///
137    /// let mut header = Header::new_gnu();
138    /// header.set_path("foo")?;
139    /// header.set_size(4);
140    /// header.set_cksum();
141    ///
142    /// let mut data: &[u8] = &[1, 2, 3, 4];
143    ///
144    /// let mut ar = Builder::new(Vec::new());
145    /// ar.append(&header, data).await?;
146    /// let data = ar.into_inner().await?;
147    /// #
148    /// # Ok(()) }) }
149    /// ```
150    pub async fn append<R: Read + Unpin>(
151        &mut self,
152        header: &Header,
153        mut data: R,
154    ) -> io::Result<()> {
155        append(self.get_mut(), header, &mut data).await?;
156
157        Ok(())
158    }
159
160    /// Adds a new entry to this archive with the specified path.
161    ///
162    /// This function will set the specified path in the given header, which may
163    /// require appending a GNU long-name extension entry to the archive first.
164    /// The checksum for the header will be automatically updated via the
165    /// `set_cksum` method after setting the path. No other metadata in the
166    /// header will be modified.
167    ///
168    /// Then it will append the header, followed by contents of the stream
169    /// specified by `data`. To produce a valid archive the `size` field of
170    /// `header` must be the same as the length of the stream that's being
171    /// written.
172    ///
173    /// Note that this will not attempt to seek the archive to a valid position,
174    /// so if the archive is in the middle of a read or some other similar
175    /// operation then this may corrupt the archive.
176    ///
177    /// Also note that after all entries have been written to an archive the
178    /// `finish` function needs to be called to finish writing the archive.
179    ///
180    /// # Errors
181    ///
182    /// This function will return an error for any intermittent I/O error which
183    /// occurs when either reading or writing.
184    ///
185    /// # Examples
186    ///
187    /// ```
188    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
189    /// #
190    /// use tokio_tar::{Builder, Header};
191    ///
192    /// let mut header = Header::new_gnu();
193    /// header.set_size(4);
194    /// header.set_cksum();
195    ///
196    /// let mut data: &[u8] = &[1, 2, 3, 4];
197    ///
198    /// let mut ar = Builder::new(Vec::new());
199    /// ar.append_data(&mut header, "really/long/path/to/foo", data).await?;
200    /// let data = ar.into_inner().await?;
201    /// #
202    /// # Ok(()) }) }
203    /// ```
204    pub async fn append_data<P: AsRef<Path>, R: Read + Unpin>(
205        &mut self,
206        header: &mut Header,
207        path: P,
208        data: R,
209    ) -> io::Result<()> {
210        prepare_header_path(self.get_mut(), header, path.as_ref()).await?;
211        header.set_cksum();
212        self.append(header, data).await?;
213
214        Ok(())
215    }
216
217    /// Adds a file on the local filesystem to this archive.
218    ///
219    /// This function will open the file specified by `path` and insert the file
220    /// into the archive with the appropriate metadata set, returning any I/O
221    /// error which occurs while writing. The path name for the file inside of
222    /// this archive will be the same as `path`, and it is required that the
223    /// path is a relative path.
224    ///
225    /// Note that this will not attempt to seek the archive to a valid position,
226    /// so if the archive is in the middle of a read or some other similar
227    /// operation then this may corrupt the archive.
228    ///
229    /// Also note that after all files have been written to an archive the
230    /// `finish` function needs to be called to finish writing the archive.
231    ///
232    /// # Examples
233    ///
234    /// ```no_run
235    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
236    /// #
237    /// use tokio_tar::Builder;
238    ///
239    /// let mut ar = Builder::new(Vec::new());
240    ///
241    /// ar.append_path("foo/bar.txt").await?;
242    /// #
243    /// # Ok(()) }) }
244    /// ```
245    pub async fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
246        let mode = self.mode;
247        let follow = self.follow;
248        append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow).await?;
249        Ok(())
250    }
251
252    /// Adds a file on the local filesystem to this archive under another name.
253    ///
254    /// This function will open the file specified by `path` and insert the file
255    /// into the archive as `name` with appropriate metadata set, returning any
256    /// I/O error which occurs while writing. The path name for the file inside
257    /// of this archive will be `name` is required to be a relative path.
258    ///
259    /// Note that this will not attempt to seek the archive to a valid position,
260    /// so if the archive is in the middle of a read or some other similar
261    /// operation then this may corrupt the archive.
262    ///
263    /// Also note that after all files have been written to an archive the
264    /// `finish` function needs to be called to finish writing the archive.
265    ///
266    /// # Examples
267    ///
268    /// ```no_run
269    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
270    /// #
271    /// use tokio_tar::Builder;
272    ///
273    /// let mut ar = Builder::new(Vec::new());
274    ///
275    /// // Insert the local file "foo/bar.txt" in the archive but with the name
276    /// // "bar/foo.txt".
277    /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").await?;
278    /// #
279    /// # Ok(()) }) }
280    /// ```
281    pub async fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
282        &mut self,
283        path: P,
284        name: N,
285    ) -> io::Result<()> {
286        let mode = self.mode;
287        let follow = self.follow;
288        append_path_with_name(
289            self.get_mut(),
290            path.as_ref(),
291            Some(name.as_ref()),
292            mode,
293            follow,
294        )
295        .await?;
296        Ok(())
297    }
298
299    /// Adds a file to this archive with the given path as the name of the file
300    /// in the archive.
301    ///
302    /// This will use the metadata of `file` to populate a `Header`, and it will
303    /// then append the file to the archive with the name `path`.
304    ///
305    /// Note that this will not attempt to seek the archive to a valid position,
306    /// so if the archive is in the middle of a read or some other similar
307    /// operation then this may corrupt the archive.
308    ///
309    /// Also note that after all files have been written to an archive the
310    /// `finish` function needs to be called to finish writing the archive.
311    ///
312    /// # Examples
313    ///
314    /// ```no_run
315    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
316    /// #
317    /// use tokio::fs::File;
318    /// use tokio_tar::Builder;
319    ///
320    /// let mut ar = Builder::new(Vec::new());
321    ///
322    /// // Open the file at one location, but insert it into the archive with a
323    /// // different name.
324    /// let mut f = File::open("foo/bar/baz.txt").await?;
325    /// ar.append_file("bar/baz.txt", &mut f).await?;
326    /// #
327    /// # Ok(()) }) }
328    /// ```
329    pub async fn append_file<P: AsRef<Path>>(
330        &mut self,
331        path: P,
332        file: &mut fs::File,
333    ) -> io::Result<()> {
334        let mode = self.mode;
335        append_file(self.get_mut(), path.as_ref(), file, mode).await?;
336        Ok(())
337    }
338
339    /// Adds a directory to this archive with the given path as the name of the
340    /// directory in the archive.
341    ///
342    /// This will use `stat` to populate a `Header`, and it will then append the
343    /// directory to the archive with the name `path`.
344    ///
345    /// Note that this will not attempt to seek the archive to a valid position,
346    /// so if the archive is in the middle of a read or some other similar
347    /// operation then this may corrupt the archive.
348    ///
349    /// Also note that after all files have been written to an archive the
350    /// `finish` function needs to be called to finish writing the archive.
351    ///
352    /// # Examples
353    ///
354    /// ```
355    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
356    /// #
357    /// use tokio::fs;
358    /// use tokio_tar::Builder;
359    ///
360    /// let mut ar = Builder::new(Vec::new());
361    ///
362    /// // Use the directory at one location, but insert it into the archive
363    /// // with a different name.
364    /// ar.append_dir("bardir", ".").await?;
365    /// #
366    /// # Ok(()) }) }
367    /// ```
368    pub async fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
369    where
370        P: AsRef<Path>,
371        Q: AsRef<Path>,
372    {
373        let mode = self.mode;
374        append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode).await?;
375        Ok(())
376    }
377
378    /// Adds a directory and all of its contents (recursively) to this archive
379    /// with the given path as the name of the directory in the archive.
380    ///
381    /// Note that this will not attempt to seek the archive to a valid position,
382    /// so if the archive is in the middle of a read or some other similar
383    /// operation then this may corrupt the archive.
384    ///
385    /// Also note that after all files have been written to an archive the
386    /// `finish` function needs to be called to finish writing the archive.
387    ///
388    /// # Examples
389    ///
390    /// ```
391    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
392    /// #
393    /// use tokio::fs;
394    /// use tokio_tar::Builder;
395    ///
396    /// let mut ar = Builder::new(Vec::new());
397    ///
398    /// // Use the directory at one location, but insert it into the archive
399    /// // with a different name.
400    /// ar.append_dir_all("bardir", ".").await?;
401    /// #
402    /// # Ok(()) }) }
403    /// ```
404    pub async fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
405    where
406        P: AsRef<Path>,
407        Q: AsRef<Path>,
408    {
409        let mode = self.mode;
410        let follow = self.follow;
411        append_dir_all(
412            self.get_mut(),
413            path.as_ref(),
414            src_path.as_ref(),
415            mode,
416            follow,
417        )
418        .await?;
419        Ok(())
420    }
421
422    /// Finish writing this archive, emitting the termination sections.
423    ///
424    /// This function should only be called when the archive has been written
425    /// entirely and if an I/O error happens the underlying object still needs
426    /// to be acquired.
427    ///
428    /// In most situations the `into_inner` method should be preferred.
429    pub async fn finish(&mut self) -> io::Result<()> {
430        if self.finished {
431            return Ok(());
432        }
433        self.finished = true;
434        self.get_mut().write_all(&[0; 1024]).await?;
435        Ok(())
436    }
437}
438
439async fn append<Dst: Write + Unpin + ?Sized, Data: Read + Unpin + ?Sized>(
440    mut dst: &mut Dst,
441    header: &Header,
442    mut data: &mut Data,
443) -> io::Result<()> {
444    dst.write_all(header.as_bytes()).await?;
445    let len = io::copy(&mut data, &mut dst).await?;
446
447    // Pad with zeros if necessary.
448    let buf = [0; 512];
449    let remaining = 512 - (len % 512);
450    if remaining < 512 {
451        dst.write_all(&buf[..remaining as usize]).await?;
452    }
453
454    Ok(())
455}
456
457async fn append_path_with_name<Dst: Write + Unpin + ?Sized>(
458    dst: &mut Dst,
459    path: &Path,
460    name: Option<&Path>,
461    mode: HeaderMode,
462    follow: bool,
463) -> io::Result<()> {
464    let stat = if follow {
465        fs::metadata(path).await.map_err(|err| {
466            io::Error::new(
467                err.kind(),
468                format!("{} when getting metadata for {}", err, path.display()),
469            )
470        })?
471    } else {
472        fs::symlink_metadata(path).await.map_err(|err| {
473            io::Error::new(
474                err.kind(),
475                format!("{} when getting metadata for {}", err, path.display()),
476            )
477        })?
478    };
479    let ar_name = name.unwrap_or(path);
480    if stat.is_file() {
481        append_fs(
482            dst,
483            ar_name,
484            &stat,
485            &mut fs::File::open(path).await?,
486            mode,
487            None,
488        )
489        .await?;
490        Ok(())
491    } else if stat.is_dir() {
492        append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None).await?;
493        Ok(())
494    } else if stat.file_type().is_symlink() {
495        let link_name = fs::read_link(path).await?;
496        append_fs(
497            dst,
498            ar_name,
499            &stat,
500            &mut io::empty(),
501            mode,
502            Some(&link_name),
503        )
504        .await?;
505        Ok(())
506    } else {
507        Err(other(&format!("{} has unknown file type", path.display())))
508    }
509}
510
511async fn append_file<Dst: Write + Unpin + ?Sized>(
512    dst: &mut Dst,
513    path: &Path,
514    file: &mut fs::File,
515    mode: HeaderMode,
516) -> io::Result<()> {
517    let stat = file.metadata().await?;
518    append_fs(dst, path, &stat, file, mode, None).await?;
519    Ok(())
520}
521
522async fn append_dir<Dst: Write + Unpin + ?Sized>(
523    dst: &mut Dst,
524    path: &Path,
525    src_path: &Path,
526    mode: HeaderMode,
527) -> io::Result<()> {
528    let stat = fs::metadata(src_path).await?;
529    append_fs(dst, path, &stat, &mut io::empty(), mode, None).await?;
530    Ok(())
531}
532
533fn prepare_header(size: u64, entry_type: EntryType) -> Header {
534    let mut header = Header::new_gnu();
535    let name = b"././@LongLink";
536    header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
537    header.set_mode(0o644);
538    header.set_uid(0);
539    header.set_gid(0);
540    header.set_mtime(0);
541    // + 1 to be compliant with GNU tar
542    header.set_size(size + 1);
543    header.set_entry_type(entry_type);
544    header.set_cksum();
545    header
546}
547
548async fn prepare_header_path<Dst: Write + Unpin + ?Sized>(
549    dst: &mut Dst,
550    header: &mut Header,
551    path: &Path,
552) -> io::Result<()> {
553    // Try to encode the path directly in the header, but if it ends up not
554    // working (probably because it's too long) then try to use the GNU-specific
555    // long name extension by emitting an entry which indicates that it's the
556    // filename.
557    if let Err(e) = header.set_path(path) {
558        let data = path2bytes(path)?;
559        let max = header.as_old().name.len();
560        //  Since e isn't specific enough to let us know the path is indeed too
561        //  long, verify it first before using the extension.
562        if data.len() < max {
563            return Err(e);
564        }
565        let header2 = prepare_header(data.len() as u64, EntryType::GNULongName);
566        // null-terminated string
567        let mut data2 = data.chain(io::repeat(0).take(1));
568        append(dst, &header2, &mut data2).await?;
569        // Truncate the path to store in the header we're about to emit to
570        // ensure we've got something at least mentioned.
571        let path = bytes2path(Cow::Borrowed(&data[..max]))?;
572        header.set_path(&path)?;
573    }
574    Ok(())
575}
576
577async fn prepare_header_link<Dst: Write + Unpin + ?Sized>(
578    dst: &mut Dst,
579    header: &mut Header,
580    link_name: &Path,
581) -> io::Result<()> {
582    // Same as previous function but for linkname
583    if let Err(e) = header.set_link_name(link_name) {
584        let data = path2bytes(link_name)?;
585        if data.len() < header.as_old().linkname.len() {
586            return Err(e);
587        }
588        let header2 = prepare_header(data.len() as u64, EntryType::GNULongLink);
589        let mut data2 = data.chain(io::repeat(0).take(1));
590        append(dst, &header2, &mut data2).await?;
591    }
592    Ok(())
593}
594
595async fn append_fs<Dst: Write + Unpin + ?Sized, R: Read + Unpin + ?Sized>(
596    dst: &mut Dst,
597    path: &Path,
598    meta: &Metadata,
599    read: &mut R,
600    mode: HeaderMode,
601    link_name: Option<&Path>,
602) -> io::Result<()> {
603    let mut header = Header::new_gnu();
604
605    prepare_header_path(dst, &mut header, path).await?;
606    header.set_metadata_in_mode(meta, mode);
607    if let Some(link_name) = link_name {
608        prepare_header_link(dst, &mut header, link_name).await?;
609    }
610    header.set_cksum();
611    append(dst, &header, read).await?;
612
613    Ok(())
614}
615
616async fn append_dir_all<Dst: Write + Unpin + ?Sized>(
617    dst: &mut Dst,
618    path: &Path,
619    src_path: &Path,
620    mode: HeaderMode,
621    follow: bool,
622) -> io::Result<()> {
623    let mut stack = vec![(src_path.to_path_buf(), true, false)];
624    while let Some((src, is_dir, is_symlink)) = stack.pop() {
625        let dest = path.join(src.strip_prefix(src_path).unwrap());
626
627        // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
628        if is_dir || (is_symlink && follow && src.is_dir()) {
629            let mut entries = fs::read_dir(&src).await?;
630            while let Some(entry) = entries.next_entry().await.transpose() {
631                let entry = entry?;
632                let file_type = entry.file_type().await?;
633                stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
634            }
635            if dest != Path::new("") {
636                append_dir(dst, &dest, &src, mode).await?;
637            }
638        } else if !follow && is_symlink {
639            let stat = fs::symlink_metadata(&src).await?;
640            let link_name = fs::read_link(&src).await?;
641            append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name)).await?;
642        } else {
643            append_file(dst, &dest, &mut fs::File::open(src).await?, mode).await?;
644        }
645    }
646    Ok(())
647}
648
649impl<W: Write + Unpin + Send> Drop for Builder<W> {
650    fn drop(&mut self) {
651        // TODO: proper async cancellation
652        if !self.finished {
653            if let Some(cancellation) = self.cancellation.take() {
654                cancellation.send(self.obj.take().unwrap()).ok();
655            }
656        }
657    }
658}