async_tar/builder.rs
1use std::borrow::Cow;
2
3#[cfg(feature = "runtime-async-std")]
4use async_std::fs::Metadata;
5#[cfg(feature = "runtime-async-std")]
6use async_std::{
7 fs,
8 io::{self, Read, Write},
9 path::Path,
10 prelude::*,
11};
12#[cfg(feature = "runtime-tokio")]
13use std::fs::Metadata;
14#[cfg(feature = "runtime-tokio")]
15use std::path::Path;
16#[cfg(feature = "runtime-tokio")]
17use tokio::{
18 fs,
19 io::{self, AsyncRead as Read, AsyncReadExt, AsyncWrite as Write, AsyncWriteExt},
20};
21#[cfg(feature = "runtime-tokio")]
22use tokio_stream::StreamExt;
23
24use crate::{
25 EntryType, Header,
26 header::{HeaderMode, bytes2path, path2bytes},
27 metadata, other, symlink_metadata,
28};
29
30/// A structure for building archives
31///
32/// This structure has methods for building up an archive from scratch into any
33/// arbitrary writer.
34///
35/// You **must** call [`finish`] or [`into_inner`] to finalize the archive.
36/// The `runtime-tokio` feature will panic on drop if not finalized.
37///
38/// [`into_inner`]: Builder::into_inner
39/// [`finish`]: Builder::finish
40pub struct Builder<W: Write + Unpin + Send + Sync> {
41 mode: HeaderMode,
42 follow: bool,
43 finished: bool,
44 obj: Option<W>,
45}
46
47impl<W: Write + Unpin + Send + Sync> Builder<W> {
48 /// Create a new archive builder with the underlying object as the
49 /// destination of all data written. The builder will use
50 /// `HeaderMode::Complete` by default.
51 pub fn new(obj: W) -> Builder<W> {
52 Builder {
53 mode: HeaderMode::Complete,
54 follow: true,
55 finished: false,
56 obj: Some(obj),
57 }
58 }
59
60 /// Changes the HeaderMode that will be used when reading fs Metadata for
61 /// methods that implicitly read metadata for an input Path. Notably, this
62 /// does _not_ apply to `append(Header)`.
63 pub fn mode(&mut self, mode: HeaderMode) {
64 self.mode = mode;
65 }
66
67 /// Follow symlinks, archiving the contents of the file they point to rather
68 /// than adding a symlink to the archive. Defaults to true.
69 pub fn follow_symlinks(&mut self, follow: bool) {
70 self.follow = follow;
71 }
72
73 /// Gets shared reference to the underlying object.
74 pub fn get_ref(&self) -> &W {
75 self.obj.as_ref().unwrap()
76 }
77
78 /// Gets mutable reference to the underlying object.
79 ///
80 /// Note that care must be taken while writing to the underlying
81 /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
82 /// useful in the situations when one needs to be ensured that
83 /// tar entry was flushed to the disk.
84 pub fn get_mut(&mut self) -> &mut W {
85 self.obj.as_mut().unwrap()
86 }
87
88 /// Unwrap this archive, returning the underlying object.
89 ///
90 /// This function will finish writing the archive if the `finish` function
91 /// hasn't yet been called, returning any I/O error which happens during
92 /// that operation.
93 pub async fn into_inner(mut self) -> io::Result<W> {
94 if !self.finished {
95 self.finish().await?;
96 }
97 Ok(self.obj.take().unwrap())
98 }
99
100 /// Adds a new entry to this archive.
101 ///
102 /// This function will append the header specified, followed by contents of
103 /// the stream specified by `data`. To produce a valid archive the `size`
104 /// field of `header` must be the same as the length of the stream that's
105 /// being written. Additionally the checksum for the header should have been
106 /// set via the `set_cksum` method.
107 ///
108 /// Note that this will not attempt to seek the archive to a valid position,
109 /// so if the archive is in the middle of a read or some other similar
110 /// operation then this may corrupt the archive.
111 ///
112 /// Also note that after all entries have been written to an archive the
113 /// `finish` function needs to be called to finish writing the archive.
114 ///
115 /// # Errors
116 ///
117 /// This function will return an error for any intermittent I/O error which
118 /// occurs when either reading or writing.
119 ///
120 /// # Examples
121 ///
122 #[cfg_attr(feature = "runtime-async-std", doc = "```")]
123 #[cfg_attr(feature = "runtime-tokio", doc = "```ignore")]
124 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
125 /// #
126 /// use async_tar::{Builder, Header};
127 ///
128 /// let mut header = Header::new_gnu();
129 /// header.set_path("foo")?;
130 /// header.set_size(4);
131 /// header.set_cksum();
132 ///
133 /// let mut data: &[u8] = &[1, 2, 3, 4];
134 ///
135 /// let mut ar = Builder::new(Vec::new());
136 /// ar.append(&header, data).await?;
137 /// let data = ar.into_inner().await?;
138 /// #
139 /// # Ok(()) }) }
140 /// ```
141 pub async fn append<R: Read + Unpin + Send>(
142 &mut self,
143 header: &Header,
144 mut data: R,
145 ) -> io::Result<()> {
146 append(self.get_mut(), header, &mut data).await?;
147
148 Ok(())
149 }
150
151 /// Adds a new entry to this archive with the specified path.
152 ///
153 /// This function will set the specified path in the given header, which may
154 /// require appending a GNU long-name extension entry to the archive first.
155 /// The checksum for the header will be automatically updated via the
156 /// `set_cksum` method after setting the path. No other metadata in the
157 /// header will be modified.
158 ///
159 /// Then it will append the header, followed by contents of the stream
160 /// specified by `data`. To produce a valid archive the `size` field of
161 /// `header` must be the same as the length of the stream that's being
162 /// written.
163 ///
164 /// Note that this will not attempt to seek the archive to a valid position,
165 /// so if the archive is in the middle of a read or some other similar
166 /// operation then this may corrupt the archive.
167 ///
168 /// Also note that after all entries have been written to an archive the
169 /// `finish` function needs to be called to finish writing the archive.
170 ///
171 /// # Errors
172 ///
173 /// This function will return an error for any intermittent I/O error which
174 /// occurs when either reading or writing.
175 ///
176 /// # Examples
177 ///
178 /// ```
179 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
180 /// #
181 /// use async_tar::{Builder, Header};
182 ///
183 /// let mut header = Header::new_gnu();
184 /// header.set_size(4);
185 /// header.set_cksum();
186 ///
187 /// let mut data: &[u8] = &[1, 2, 3, 4];
188 ///
189 /// let mut ar = Builder::new(Vec::new());
190 /// ar.append_data(&mut header, "really/long/path/to/foo", data).await?;
191 /// let data = ar.into_inner().await?;
192 /// #
193 /// # Ok(()) }) }
194 /// ```
195 pub async fn append_data<P: AsRef<Path>, R: Read + Unpin + Send>(
196 &mut self,
197 header: &mut Header,
198 path: P,
199 data: R,
200 ) -> io::Result<()> {
201 prepare_header_path(self.get_mut(), header, path.as_ref()).await?;
202 header.set_cksum();
203 self.append(header, data).await?;
204
205 Ok(())
206 }
207
208 /// Adds a file on the local filesystem to this archive.
209 ///
210 /// This function will open the file specified by `path` and insert the file
211 /// into the archive with the appropriate metadata set, returning any I/O
212 /// error which occurs while writing. The path name for the file inside of
213 /// this archive will be the same as `path`, and it is required that the
214 /// path is a relative path.
215 ///
216 /// Note that this will not attempt to seek the archive to a valid position,
217 /// so if the archive is in the middle of a read or some other similar
218 /// operation then this may corrupt the archive.
219 ///
220 /// Also note that after all files have been written to an archive the
221 /// `finish` function needs to be called to finish writing the archive.
222 ///
223 /// # Examples
224 ///
225 /// ```no_run
226 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
227 /// #
228 /// use async_tar::Builder;
229 ///
230 /// let mut ar = Builder::new(Vec::new());
231 ///
232 /// ar.append_path("foo/bar.txt").await?;
233 /// #
234 /// # Ok(()) }) }
235 /// ```
236 pub async fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
237 let mode = self.mode;
238 let follow = self.follow;
239 append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow).await?;
240 Ok(())
241 }
242
243 /// Adds a file on the local filesystem to this archive under another name.
244 ///
245 /// This function will open the file specified by `path` and insert the file
246 /// into the archive as `name` with appropriate metadata set, returning any
247 /// I/O error which occurs while writing. The path name for the file inside
248 /// of this archive will be `name` is required to be a relative path.
249 ///
250 /// Note that this will not attempt to seek the archive to a valid position,
251 /// so if the archive is in the middle of a read or some other similar
252 /// operation then this may corrupt the archive.
253 ///
254 /// Also note that after all files have been written to an archive the
255 /// `finish` function needs to be called to finish writing the archive.
256 ///
257 /// # Examples
258 ///
259 /// ```no_run
260 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
261 /// #
262 /// use async_tar::Builder;
263 ///
264 /// let mut ar = Builder::new(Vec::new());
265 ///
266 /// // Insert the local file "foo/bar.txt" in the archive but with the name
267 /// // "bar/foo.txt".
268 /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").await?;
269 /// #
270 /// # Ok(()) }) }
271 /// ```
272 pub async fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
273 &mut self,
274 path: P,
275 name: N,
276 ) -> io::Result<()> {
277 let mode = self.mode;
278 let follow = self.follow;
279 append_path_with_name(
280 self.get_mut(),
281 path.as_ref(),
282 Some(name.as_ref()),
283 mode,
284 follow,
285 )
286 .await?;
287 Ok(())
288 }
289
290 /// Adds a file to this archive with the given path as the name of the file
291 /// in the archive.
292 ///
293 /// This will use the metadata of `file` to populate a `Header`, and it will
294 /// then append the file to the archive with the name `path`.
295 ///
296 /// Note that this will not attempt to seek the archive to a valid position,
297 /// so if the archive is in the middle of a read or some other similar
298 /// operation then this may corrupt the archive.
299 ///
300 /// Also note that after all files have been written to an archive the
301 /// `finish` function needs to be called to finish writing the archive.
302 ///
303 /// # Examples
304 ///
305 #[cfg_attr(feature = "runtime-async-std", doc = "```no_run")]
306 #[cfg_attr(feature = "runtime-tokio", doc = "```ignore")]
307 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
308 /// #
309 /// use async_std::fs::File;
310 /// use async_tar::Builder;
311 ///
312 /// let mut ar = Builder::new(Vec::new());
313 ///
314 /// // Open the file at one location, but insert it into the archive with a
315 /// // different name.
316 /// let mut f = File::open("foo/bar/baz.txt").await?;
317 /// ar.append_file("bar/baz.txt", &mut f).await?;
318 /// #
319 /// # Ok(()) }) }
320 /// ```
321 pub async fn append_file<P: AsRef<Path>>(
322 &mut self,
323 path: P,
324 file: &mut fs::File,
325 ) -> io::Result<()> {
326 let mode = self.mode;
327 append_file(self.get_mut(), path.as_ref(), file, mode).await?;
328 Ok(())
329 }
330
331 /// Adds a directory to this archive with the given path as the name of the
332 /// directory in the archive.
333 ///
334 /// This will use `stat` to populate a `Header`, and it will then append the
335 /// directory to the archive with the name `path`.
336 ///
337 /// Note that this will not attempt to seek the archive to a valid position,
338 /// so if the archive is in the middle of a read or some other similar
339 /// operation then this may corrupt the archive.
340 ///
341 /// Also note that after all files have been written to an archive the
342 /// `finish` function needs to be called to finish writing the archive.
343 ///
344 /// # Examples
345 ///
346 #[cfg_attr(feature = "runtime-async-std", doc = "```")]
347 #[cfg_attr(feature = "runtime-tokio", doc = "```ignore")]
348 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
349 /// #
350 /// use async_std::fs;
351 /// use async_tar::Builder;
352 ///
353 /// let mut ar = Builder::new(Vec::new());
354 ///
355 /// // Use the directory at one location, but insert it into the archive
356 /// // with a different name.
357 /// ar.append_dir("bardir", ".").await?;
358 /// #
359 /// # Ok(()) }) }
360 /// ```
361 pub async fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
362 where
363 P: AsRef<Path>,
364 Q: AsRef<Path>,
365 {
366 let mode = self.mode;
367 append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode).await?;
368 Ok(())
369 }
370
371 /// Adds a directory and all of its contents (recursively) to this archive
372 /// with the given path as the name of the directory in the archive.
373 ///
374 /// Note that this will not attempt to seek the archive to a valid position,
375 /// so if the archive is in the middle of a read or some other similar
376 /// operation then this may corrupt the archive.
377 ///
378 /// Also note that after all files have been written to an archive the
379 /// `finish` function needs to be called to finish writing the archive.
380 ///
381 /// # Examples
382 ///
383 #[cfg_attr(feature = "runtime-async-std", doc = "```")]
384 #[cfg_attr(feature = "runtime-tokio", doc = "```ignore")]
385 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { async_std::task::block_on(async {
386 /// #
387 /// use async_std::fs;
388 /// use async_tar::Builder;
389 ///
390 /// let mut ar = Builder::new(Vec::new());
391 ///
392 /// // Use the directory at one location, but insert it into the archive
393 /// // with a different name.
394 /// ar.append_dir_all("bardir", ".").await?;
395 /// #
396 /// # Ok(()) })}
397 /// ```
398 pub async fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
399 where
400 P: AsRef<Path>,
401 Q: AsRef<Path>,
402 {
403 let mode = self.mode;
404 let follow = self.follow;
405 append_dir_all(
406 self.get_mut(),
407 path.as_ref(),
408 src_path.as_ref(),
409 mode,
410 follow,
411 )
412 .await?;
413 Ok(())
414 }
415
416 /// Finish writing this archive, emitting the termination sections.
417 ///
418 /// This function should only be called when the archive has been written
419 /// entirely and if an I/O error happens the underlying object still needs
420 /// to be acquired.
421 ///
422 /// In most situations the `into_inner` method should be preferred.
423 pub async fn finish(&mut self) -> io::Result<()> {
424 if self.finished {
425 return Ok(());
426 }
427 self.finished = true;
428 self.get_mut().write_all(&[0; 1024]).await?;
429 Ok(())
430 }
431}
432
433async fn append(
434 mut dst: &mut (dyn Write + Unpin + Send),
435 header: &Header,
436 mut data: &mut (dyn Read + Unpin + Send),
437) -> io::Result<()> {
438 dst.write_all(header.as_bytes()).await?;
439 let len = io::copy(&mut data, &mut dst).await?;
440
441 // Pad with zeros if necessary.
442 let buf = [0; 512];
443 let remaining = 512 - (len % 512);
444 if remaining < 512 {
445 dst.write_all(&buf[..remaining as usize]).await?;
446 }
447
448 Ok(())
449}
450
451async fn append_path_with_name(
452 dst: &mut (dyn Write + Unpin + Sync + Send),
453 path: &Path,
454 name: Option<&Path>,
455 mode: HeaderMode,
456 follow: bool,
457) -> io::Result<()> {
458 let stat = if follow {
459 metadata(path).await.map_err(|err| {
460 io::Error::new(
461 err.kind(),
462 format!("{} when getting metadata for {}", err, path.display()),
463 )
464 })?
465 } else {
466 symlink_metadata(path).await.map_err(|err| {
467 io::Error::new(
468 err.kind(),
469 format!("{} when getting metadata for {}", err, path.display()),
470 )
471 })?
472 };
473 let ar_name = name.unwrap_or(path);
474 if stat.is_file() {
475 append_fs(
476 dst,
477 ar_name,
478 &stat,
479 &mut fs::File::open(path).await?,
480 mode,
481 None,
482 )
483 .await?;
484 Ok(())
485 } else if stat.is_dir() {
486 append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None).await?;
487 Ok(())
488 } else if stat.file_type().is_symlink() {
489 let link_name = fs::read_link(path).await?;
490 append_fs(
491 dst,
492 ar_name,
493 &stat,
494 &mut io::empty(),
495 mode,
496 Some(&link_name),
497 )
498 .await?;
499 Ok(())
500 } else {
501 Err(other(&format!("{} has unknown file type", path.display())))
502 }
503}
504
505async fn append_file(
506 dst: &mut (dyn Write + Unpin + Send + Sync),
507 path: &Path,
508 file: &mut fs::File,
509 mode: HeaderMode,
510) -> io::Result<()> {
511 let stat = file.metadata().await?;
512 append_fs(dst, path, &stat, file, mode, None).await?;
513 Ok(())
514}
515
516async fn append_dir(
517 dst: &mut (dyn Write + Unpin + Send + Sync),
518 path: &Path,
519 src_path: &Path,
520 mode: HeaderMode,
521) -> io::Result<()> {
522 let stat = fs::metadata(src_path).await?;
523 append_fs(dst, path, &stat, &mut io::empty(), mode, None).await?;
524 Ok(())
525}
526
527fn prepare_header(size: u64, entry_type: EntryType) -> Header {
528 let mut header = Header::new_gnu();
529 let name = b"././@LongLink";
530 header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
531 header.set_mode(0o644);
532 header.set_uid(0);
533 header.set_gid(0);
534 header.set_mtime(0);
535 // + 1 to be compliant with GNU tar
536 header.set_size(size + 1);
537 header.set_entry_type(entry_type);
538 header.set_cksum();
539 header
540}
541
542async fn prepare_header_path(
543 dst: &mut (dyn Write + Unpin + Send + Sync),
544 header: &mut Header,
545 path: &Path,
546) -> io::Result<()> {
547 // Try to encode the path directly in the header, but if it ends up not
548 // working (probably because it's too long) then try to use the GNU-specific
549 // long name extension by emitting an entry which indicates that it's the
550 // filename.
551 if let Err(e) = header.set_path(path) {
552 let data = path2bytes(path)?;
553 let max = header.as_old().name.len();
554 // Since e isn't specific enough to let us know the path is indeed too
555 // long, verify it first before using the extension.
556 if data.len() < max {
557 return Err(e);
558 }
559 let header2 = prepare_header(data.len() as u64, EntryType::GNULongName);
560 // null-terminated string
561 let mut data2 = data.chain(io::repeat(0).take(1));
562 append(dst, &header2, &mut data2).await?;
563 // Truncate the path to store in the header we're about to emit to
564 // ensure we've got something at least mentioned.
565 let path = bytes2path(Cow::Borrowed(&data[..max]))?;
566 header.set_truncated_path_for_gnu_header(&path)?;
567 }
568 Ok(())
569}
570
571async fn prepare_header_link(
572 dst: &mut (dyn Write + Unpin + Send + Sync),
573 header: &mut Header,
574 link_name: &Path,
575) -> io::Result<()> {
576 // Same as previous function but for linkname
577 if let Err(e) = header.set_link_name(link_name) {
578 let data = path2bytes(link_name)?;
579 if data.len() < header.as_old().linkname.len() {
580 return Err(e);
581 }
582 let header2 = prepare_header(data.len() as u64, EntryType::GNULongLink);
583 let mut data2 = data.chain(io::repeat(0).take(1));
584 append(dst, &header2, &mut data2).await?;
585 }
586 Ok(())
587}
588
589async fn append_fs(
590 dst: &mut (dyn Write + Unpin + Send + Sync),
591 path: &Path,
592 meta: &Metadata,
593 read: &mut (dyn Read + Unpin + Sync + Send),
594 mode: HeaderMode,
595 link_name: Option<&Path>,
596) -> io::Result<()> {
597 let mut header = Header::new_gnu();
598
599 prepare_header_path(dst, &mut header, path).await?;
600 header.set_metadata_in_mode(meta, mode);
601 if let Some(link_name) = link_name {
602 prepare_header_link(dst, &mut header, link_name).await?;
603 }
604 header.set_cksum();
605 append(dst, &header, read).await?;
606
607 Ok(())
608}
609
610async fn append_dir_all(
611 dst: &mut (dyn Write + Unpin + Send + Sync),
612 path: &Path,
613 src_path: &Path,
614 mode: HeaderMode,
615 follow: bool,
616) -> io::Result<()> {
617 let mut stack = vec![(src_path.to_path_buf(), true, false)];
618 while let Some((src, is_dir, is_symlink)) = stack.pop() {
619 let dest = path.join(src.strip_prefix(src_path).unwrap());
620
621 #[cfg(feature = "runtime-async-std")]
622 async fn check_is_dir(path: &Path) -> bool {
623 path.is_dir().await
624 }
625 #[cfg(feature = "runtime-tokio")]
626 async fn check_is_dir(path: &Path) -> bool {
627 fs::metadata(path)
628 .await
629 .map(|m| m.is_dir())
630 .unwrap_or(false)
631 }
632
633 // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
634 if is_dir || (is_symlink && follow && check_is_dir(&src).await) {
635 #[cfg(feature = "runtime-async-std")]
636 let mut entries = fs::read_dir(&src).await?;
637 #[cfg(feature = "runtime-tokio")]
638 let mut entries = tokio_stream::wrappers::ReadDirStream::new(fs::read_dir(&src).await?);
639 while let Some(entry) = entries.next().await {
640 let entry = entry?;
641 let file_type = entry.file_type().await?;
642 stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
643 }
644 if dest != Path::new("") {
645 append_dir(dst, &dest, &src, mode).await?;
646 }
647 } else if !follow && is_symlink {
648 let stat = fs::symlink_metadata(&src).await?;
649 let link_name = fs::read_link(&src).await?;
650 append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name)).await?;
651 } else {
652 append_file(dst, &dest, &mut fs::File::open(src).await?, mode).await?;
653 }
654 }
655 Ok(())
656}
657
658#[cfg(feature = "runtime-async-std")]
659impl<W: Write + Unpin + Send + Sync> Drop for Builder<W> {
660 fn drop(&mut self) {
661 async_std::task::block_on(async move {
662 let _ = self.finish().await;
663 });
664 }
665}
666
667#[cfg(feature = "runtime-tokio")]
668impl<W: Write + Unpin + Send + Sync> Drop for Builder<W> {
669 fn drop(&mut self) {
670 if !self.finished && !std::thread::panicking() && self.obj.is_some() {
671 panic!("Builder dropped without finalizing; call finish() or into_inner()");
672 }
673 }
674}
675
676#[cfg(test)]
677mod tests {
678 use super::*;
679
680 assert_impl_all!(fs::File: Send, Sync);
681 assert_impl_all!(Builder<fs::File>: Send, Sync);
682}