async_tar/
builder.rs

1use std::borrow::Cow;
2
3use async_std::{
4    fs,
5    io::{self, Read, Write},
6    path::Path,
7    prelude::*,
8};
9
10use crate::{
11    header::{bytes2path, path2bytes, HeaderMode},
12    other, EntryType, Header,
13};
14
15/// A structure for building archives
16///
17/// This structure has methods for building up an archive from scratch into any
18/// arbitrary writer.
19pub struct Builder<W: Write + Unpin + Send + Sync> {
20    mode: HeaderMode,
21    follow: bool,
22    finished: bool,
23    obj: Option<W>,
24}
25
26impl<W: Write + Unpin + Send + Sync> Builder<W> {
27    /// Create a new archive builder with the underlying object as the
28    /// destination of all data written. The builder will use
29    /// `HeaderMode::Complete` by default.
30    pub fn new(obj: W) -> Builder<W> {
31        Builder {
32            mode: HeaderMode::Complete,
33            follow: true,
34            finished: false,
35            obj: Some(obj),
36        }
37    }
38
39    /// Changes the HeaderMode that will be used when reading fs Metadata for
40    /// methods that implicitly read metadata for an input Path. Notably, this
41    /// does _not_ apply to `append(Header)`.
42    pub fn mode(&mut self, mode: HeaderMode) {
43        self.mode = mode;
44    }
45
46    /// Follow symlinks, archiving the contents of the file they point to rather
47    /// than adding a symlink to the archive. Defaults to true.
48    pub fn follow_symlinks(&mut self, follow: bool) {
49        self.follow = follow;
50    }
51
52    /// Gets shared reference to the underlying object.
53    pub fn get_ref(&self) -> &W {
54        self.obj.as_ref().unwrap()
55    }
56
57    /// Gets mutable reference to the underlying object.
58    ///
59    /// Note that care must be taken while writing to the underlying
60    /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
61    /// useful in the situations when one needs to be ensured that
62    /// tar entry was flushed to the disk.
63    pub fn get_mut(&mut self) -> &mut W {
64        self.obj.as_mut().unwrap()
65    }
66
67    /// Unwrap this archive, returning the underlying object.
68    ///
69    /// This function will finish writing the archive if the `finish` function
70    /// hasn't yet been called, returning any I/O error which happens during
71    /// that operation.
72    pub async fn into_inner(mut self) -> io::Result<W> {
73        if !self.finished {
74            self.finish().await?;
75        }
76        Ok(self.obj.take().unwrap())
77    }
78
79    /// Adds a new entry to this archive.
80    ///
81    /// This function will append the header specified, followed by contents of
82    /// the stream specified by `data`. To produce a valid archive the `size`
83    /// field of `header` must be the same as the length of the stream that's
84    /// being written. Additionally the checksum for the header should have been
85    /// set via the `set_cksum` method.
86    ///
87    /// Note that this will not attempt to seek the archive to a valid position,
88    /// so if the archive is in the middle of a read or some other similar
89    /// operation then this may corrupt the archive.
90    ///
91    /// Also note that after all entries have been written to an archive the
92    /// `finish` function needs to be called to finish writing the archive.
93    ///
94    /// # Errors
95    ///
96    /// This function will return an error for any intermittent I/O error which
97    /// occurs when either reading or writing.
98    ///
99    /// # Examples
100    ///
101    /// ```
102    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
103    /// #
104    /// use async_tar::{Builder, Header};
105    ///
106    /// let mut header = Header::new_gnu();
107    /// header.set_path("foo")?;
108    /// header.set_size(4);
109    /// header.set_cksum();
110    ///
111    /// let mut data: &[u8] = &[1, 2, 3, 4];
112    ///
113    /// let mut ar = Builder::new(Vec::new());
114    /// ar.append(&header, data).await?;
115    /// let data = ar.into_inner().await?;
116    /// #
117    /// # Ok(()) }) }
118    /// ```
119    pub async fn append<R: Read + Unpin + Send>(
120        &mut self,
121        header: &Header,
122        mut data: R,
123    ) -> io::Result<()> {
124        append(self.get_mut(), header, &mut data).await?;
125
126        Ok(())
127    }
128
129    /// Adds a new entry to this archive with the specified path.
130    ///
131    /// This function will set the specified path in the given header, which may
132    /// require appending a GNU long-name extension entry to the archive first.
133    /// The checksum for the header will be automatically updated via the
134    /// `set_cksum` method after setting the path. No other metadata in the
135    /// header will be modified.
136    ///
137    /// Then it will append the header, followed by contents of the stream
138    /// specified by `data`. To produce a valid archive the `size` field of
139    /// `header` must be the same as the length of the stream that's being
140    /// written.
141    ///
142    /// Note that this will not attempt to seek the archive to a valid position,
143    /// so if the archive is in the middle of a read or some other similar
144    /// operation then this may corrupt the archive.
145    ///
146    /// Also note that after all entries have been written to an archive the
147    /// `finish` function needs to be called to finish writing the archive.
148    ///
149    /// # Errors
150    ///
151    /// This function will return an error for any intermittent I/O error which
152    /// occurs when either reading or writing.
153    ///
154    /// # Examples
155    ///
156    /// ```
157    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
158    /// #
159    /// use async_tar::{Builder, Header};
160    ///
161    /// let mut header = Header::new_gnu();
162    /// header.set_size(4);
163    /// header.set_cksum();
164    ///
165    /// let mut data: &[u8] = &[1, 2, 3, 4];
166    ///
167    /// let mut ar = Builder::new(Vec::new());
168    /// ar.append_data(&mut header, "really/long/path/to/foo", data).await?;
169    /// let data = ar.into_inner().await?;
170    /// #
171    /// # Ok(()) }) }
172    /// ```
173    pub async fn append_data<P: AsRef<Path>, R: Read + Unpin + Send>(
174        &mut self,
175        header: &mut Header,
176        path: P,
177        data: R,
178    ) -> io::Result<()> {
179        prepare_header_path(self.get_mut(), header, path.as_ref()).await?;
180        header.set_cksum();
181        self.append(header, data).await?;
182
183        Ok(())
184    }
185
186    /// Adds a file on the local filesystem to this archive.
187    ///
188    /// This function will open the file specified by `path` and insert the file
189    /// into the archive with the appropriate metadata set, returning any I/O
190    /// error which occurs while writing. The path name for the file inside of
191    /// this archive will be the same as `path`, and it is required that the
192    /// path is a relative path.
193    ///
194    /// Note that this will not attempt to seek the archive to a valid position,
195    /// so if the archive is in the middle of a read or some other similar
196    /// operation then this may corrupt the archive.
197    ///
198    /// Also note that after all files have been written to an archive the
199    /// `finish` function needs to be called to finish writing the archive.
200    ///
201    /// # Examples
202    ///
203    /// ```no_run
204    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
205    /// #
206    /// use async_tar::Builder;
207    ///
208    /// let mut ar = Builder::new(Vec::new());
209    ///
210    /// ar.append_path("foo/bar.txt").await?;
211    /// #
212    /// # Ok(()) }) }
213    /// ```
214    pub async fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
215        let mode = self.mode;
216        let follow = self.follow;
217        append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow).await?;
218        Ok(())
219    }
220
221    /// Adds a file on the local filesystem to this archive under another name.
222    ///
223    /// This function will open the file specified by `path` and insert the file
224    /// into the archive as `name` with appropriate metadata set, returning any
225    /// I/O error which occurs while writing. The path name for the file inside
226    /// of this archive will be `name` is required to be a relative path.
227    ///
228    /// Note that this will not attempt to seek the archive to a valid position,
229    /// so if the archive is in the middle of a read or some other similar
230    /// operation then this may corrupt the archive.
231    ///
232    /// Also note that after all files have been written to an archive the
233    /// `finish` function needs to be called to finish writing the archive.
234    ///
235    /// # Examples
236    ///
237    /// ```no_run
238    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
239    /// #
240    /// use async_tar::Builder;
241    ///
242    /// let mut ar = Builder::new(Vec::new());
243    ///
244    /// // Insert the local file "foo/bar.txt" in the archive but with the name
245    /// // "bar/foo.txt".
246    /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").await?;
247    /// #
248    /// # Ok(()) }) }
249    /// ```
250    pub async fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
251        &mut self,
252        path: P,
253        name: N,
254    ) -> io::Result<()> {
255        let mode = self.mode;
256        let follow = self.follow;
257        append_path_with_name(
258            self.get_mut(),
259            path.as_ref(),
260            Some(name.as_ref()),
261            mode,
262            follow,
263        )
264        .await?;
265        Ok(())
266    }
267
268    /// Adds a file to this archive with the given path as the name of the file
269    /// in the archive.
270    ///
271    /// This will use the metadata of `file` to populate a `Header`, and it will
272    /// then append the file to the archive with the name `path`.
273    ///
274    /// Note that this will not attempt to seek the archive to a valid position,
275    /// so if the archive is in the middle of a read or some other similar
276    /// operation then this may corrupt the archive.
277    ///
278    /// Also note that after all files have been written to an archive the
279    /// `finish` function needs to be called to finish writing the archive.
280    ///
281    /// # Examples
282    ///
283    /// ```no_run
284    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
285    /// #
286    /// use async_std::fs::File;
287    /// use async_tar::Builder;
288    ///
289    /// let mut ar = Builder::new(Vec::new());
290    ///
291    /// // Open the file at one location, but insert it into the archive with a
292    /// // different name.
293    /// let mut f = File::open("foo/bar/baz.txt").await?;
294    /// ar.append_file("bar/baz.txt", &mut f).await?;
295    /// #
296    /// # Ok(()) }) }
297    /// ```
298    pub async fn append_file<P: AsRef<Path>>(
299        &mut self,
300        path: P,
301        file: &mut fs::File,
302    ) -> io::Result<()> {
303        let mode = self.mode;
304        append_file(self.get_mut(), path.as_ref(), file, mode).await?;
305        Ok(())
306    }
307
308    /// Adds a directory to this archive with the given path as the name of the
309    /// directory in the archive.
310    ///
311    /// This will use `stat` to populate a `Header`, and it will then append the
312    /// directory to the archive with the name `path`.
313    ///
314    /// Note that this will not attempt to seek the archive to a valid position,
315    /// so if the archive is in the middle of a read or some other similar
316    /// operation then this may corrupt the archive.
317    ///
318    /// Also note that after all files have been written to an archive the
319    /// `finish` function needs to be called to finish writing the archive.
320    ///
321    /// # Examples
322    ///
323    /// ```
324    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
325    /// #
326    /// use async_std::fs;
327    /// use async_tar::Builder;
328    ///
329    /// let mut ar = Builder::new(Vec::new());
330    ///
331    /// // Use the directory at one location, but insert it into the archive
332    /// // with a different name.
333    /// ar.append_dir("bardir", ".").await?;
334    /// #
335    /// # Ok(()) }) }
336    /// ```
337    pub async fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
338    where
339        P: AsRef<Path>,
340        Q: AsRef<Path>,
341    {
342        let mode = self.mode;
343        append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode).await?;
344        Ok(())
345    }
346
347    /// Adds a directory and all of its contents (recursively) to this archive
348    /// with the given path as the name of the directory in the archive.
349    ///
350    /// Note that this will not attempt to seek the archive to a valid position,
351    /// so if the archive is in the middle of a read or some other similar
352    /// operation then this may corrupt the archive.
353    ///
354    /// Also note that after all files have been written to an archive the
355    /// `finish` function needs to be called to finish writing the archive.
356    ///
357    /// # Examples
358    ///
359    /// ```
360    /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
361    /// #
362    /// use async_std::fs;
363    /// use async_tar::Builder;
364    ///
365    /// let mut ar = Builder::new(Vec::new());
366    ///
367    /// // Use the directory at one location, but insert it into the archive
368    /// // with a different name.
369    /// ar.append_dir_all("bardir", ".").await?;
370    /// #
371    /// # Ok(()) }) }
372    /// ```
373    pub async fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
374    where
375        P: AsRef<Path>,
376        Q: AsRef<Path>,
377    {
378        let mode = self.mode;
379        let follow = self.follow;
380        append_dir_all(
381            self.get_mut(),
382            path.as_ref(),
383            src_path.as_ref(),
384            mode,
385            follow,
386        )
387        .await?;
388        Ok(())
389    }
390
391    /// Finish writing this archive, emitting the termination sections.
392    ///
393    /// This function should only be called when the archive has been written
394    /// entirely and if an I/O error happens the underlying object still needs
395    /// to be acquired.
396    ///
397    /// In most situations the `into_inner` method should be preferred.
398    pub async fn finish(&mut self) -> io::Result<()> {
399        if self.finished {
400            return Ok(());
401        }
402        self.finished = true;
403        self.get_mut().write_all(&[0; 1024]).await?;
404        Ok(())
405    }
406}
407
408async fn append(
409    mut dst: &mut (dyn Write + Unpin + Send),
410    header: &Header,
411    mut data: &mut (dyn Read + Unpin + Send),
412) -> io::Result<()> {
413    dst.write_all(header.as_bytes()).await?;
414    let len = io::copy(&mut data, &mut dst).await?;
415
416    // Pad with zeros if necessary.
417    let buf = [0; 512];
418    let remaining = 512 - (len % 512);
419    if remaining < 512 {
420        dst.write_all(&buf[..remaining as usize]).await?;
421    }
422
423    Ok(())
424}
425
426async fn append_path_with_name(
427    dst: &mut (dyn Write + Unpin + Sync + Send),
428    path: &Path,
429    name: Option<&Path>,
430    mode: HeaderMode,
431    follow: bool,
432) -> io::Result<()> {
433    let stat = if follow {
434        fs::metadata(path).await.map_err(|err| {
435            io::Error::new(
436                err.kind(),
437                format!("{} when getting metadata for {}", err, path.display()),
438            )
439        })?
440    } else {
441        fs::symlink_metadata(path).await.map_err(|err| {
442            io::Error::new(
443                err.kind(),
444                format!("{} when getting metadata for {}", err, path.display()),
445            )
446        })?
447    };
448    let ar_name = name.unwrap_or(path);
449    if stat.is_file() {
450        append_fs(
451            dst,
452            ar_name,
453            &stat,
454            &mut fs::File::open(path).await?,
455            mode,
456            None,
457        )
458        .await?;
459        Ok(())
460    } else if stat.is_dir() {
461        append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None).await?;
462        Ok(())
463    } else if stat.file_type().is_symlink() {
464        let link_name = fs::read_link(path).await?;
465        append_fs(
466            dst,
467            ar_name,
468            &stat,
469            &mut io::empty(),
470            mode,
471            Some(&link_name),
472        )
473        .await?;
474        Ok(())
475    } else {
476        Err(other(&format!("{} has unknown file type", path.display())))
477    }
478}
479
480async fn append_file(
481    dst: &mut (dyn Write + Unpin + Send + Sync),
482    path: &Path,
483    file: &mut fs::File,
484    mode: HeaderMode,
485) -> io::Result<()> {
486    let stat = file.metadata().await?;
487    append_fs(dst, path, &stat, file, mode, None).await?;
488    Ok(())
489}
490
491async fn append_dir(
492    dst: &mut (dyn Write + Unpin + Send + Sync),
493    path: &Path,
494    src_path: &Path,
495    mode: HeaderMode,
496) -> io::Result<()> {
497    let stat = fs::metadata(src_path).await?;
498    append_fs(dst, path, &stat, &mut io::empty(), mode, None).await?;
499    Ok(())
500}
501
502fn prepare_header(size: u64, entry_type: EntryType) -> Header {
503    let mut header = Header::new_gnu();
504    let name = b"././@LongLink";
505    header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
506    header.set_mode(0o644);
507    header.set_uid(0);
508    header.set_gid(0);
509    header.set_mtime(0);
510    // + 1 to be compliant with GNU tar
511    header.set_size(size + 1);
512    header.set_entry_type(entry_type);
513    header.set_cksum();
514    header
515}
516
517async fn prepare_header_path(
518    dst: &mut (dyn Write + Unpin + Send + Sync),
519    header: &mut Header,
520    path: &Path,
521) -> io::Result<()> {
522    // Try to encode the path directly in the header, but if it ends up not
523    // working (probably because it's too long) then try to use the GNU-specific
524    // long name extension by emitting an entry which indicates that it's the
525    // filename.
526    if let Err(e) = header.set_path(path) {
527        let data = path2bytes(path)?;
528        let max = header.as_old().name.len();
529        //  Since e isn't specific enough to let us know the path is indeed too
530        //  long, verify it first before using the extension.
531        if data.len() < max {
532            return Err(e);
533        }
534        let header2 = prepare_header(data.len() as u64, EntryType::GNULongName);
535        // null-terminated string
536        let mut data2 = data.chain(io::repeat(0).take(1));
537        append(dst, &header2, &mut data2).await?;
538        // Truncate the path to store in the header we're about to emit to
539        // ensure we've got something at least mentioned.
540        let path = bytes2path(Cow::Borrowed(&data[..max]))?;
541        header.set_path(&path)?;
542    }
543    Ok(())
544}
545
546async fn prepare_header_link(
547    dst: &mut (dyn Write + Unpin + Send + Sync),
548    header: &mut Header,
549    link_name: &Path,
550) -> io::Result<()> {
551    // Same as previous function but for linkname
552    if let Err(e) = header.set_link_name(link_name) {
553        let data = path2bytes(link_name)?;
554        if data.len() < header.as_old().linkname.len() {
555            return Err(e);
556        }
557        let header2 = prepare_header(data.len() as u64, EntryType::GNULongLink);
558        let mut data2 = data.chain(io::repeat(0).take(1));
559        append(dst, &header2, &mut data2).await?;
560    }
561    Ok(())
562}
563
564async fn append_fs(
565    dst: &mut (dyn Write + Unpin + Send + Sync),
566    path: &Path,
567    meta: &fs::Metadata,
568    read: &mut (dyn Read + Unpin + Sync + Send),
569    mode: HeaderMode,
570    link_name: Option<&Path>,
571) -> io::Result<()> {
572    let mut header = Header::new_gnu();
573
574    prepare_header_path(dst, &mut header, path).await?;
575    header.set_metadata_in_mode(meta, mode);
576    if let Some(link_name) = link_name {
577        prepare_header_link(dst, &mut header, link_name).await?;
578    }
579    header.set_cksum();
580    append(dst, &header, read).await?;
581
582    Ok(())
583}
584
585async fn append_dir_all(
586    dst: &mut (dyn Write + Unpin + Send + Sync),
587    path: &Path,
588    src_path: &Path,
589    mode: HeaderMode,
590    follow: bool,
591) -> io::Result<()> {
592    let mut stack = vec![(src_path.to_path_buf(), true, false)];
593    while let Some((src, is_dir, is_symlink)) = stack.pop() {
594        let dest = path.join(src.strip_prefix(src_path).unwrap());
595
596        // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
597        if is_dir || (is_symlink && follow && src.is_dir().await) {
598            let mut entries = fs::read_dir(&src).await?;
599            while let Some(entry) = entries.next().await {
600                let entry = entry?;
601                let file_type = entry.file_type().await?;
602                stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
603            }
604            if dest != Path::new("") {
605                append_dir(dst, &dest, &src, mode).await?;
606            }
607        } else if !follow && is_symlink {
608            let stat = fs::symlink_metadata(&src).await?;
609            let link_name = fs::read_link(&src).await?;
610            append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name)).await?;
611        } else {
612            append_file(dst, &dest, &mut fs::File::open(src).await?, mode).await?;
613        }
614    }
615    Ok(())
616}
617
618impl<W: Write + Unpin + Send + Sync> Drop for Builder<W> {
619    fn drop(&mut self) {
620        async_std::task::block_on(async move {
621            let _ = self.finish().await;
622        });
623    }
624}
625
626#[cfg(test)]
627mod tests {
628    use super::*;
629
630    assert_impl_all!(async_std::fs::File: Send, Sync);
631    assert_impl_all!(Builder<async_std::fs::File>: Send, Sync);
632}