tokio_tar/builder.rs
1use crate::{
2 header::{bytes2path, path2bytes, HeaderMode},
3 other, EntryType, Header,
4};
5use std::{borrow::Cow, fs::Metadata, path::Path};
6use tokio::{
7 fs,
8 io::{self, AsyncRead as Read, AsyncReadExt, AsyncWrite as Write, AsyncWriteExt},
9};
10
11/// A structure for building archives
12///
13/// This structure has methods for building up an archive from scratch into any
14/// arbitrary writer.
15pub struct Builder<W: Write + Unpin + Send> {
16 mode: HeaderMode,
17 follow: bool,
18 finished: bool,
19 obj: Option<W>,
20 cancellation: Option<tokio::sync::oneshot::Sender<W>>,
21}
22
23const TERMINATION: &[u8; 1024] = &[0; 1024];
24
25impl<W: Write + Unpin + Send + 'static> Builder<W> {
26 /// Create a new archive builder with the underlying object as the
27 /// destination of all data written. The builder will use
28 /// `HeaderMode::Complete` by default.
29 ///
30 /// On drop, would write [`TERMINATION`] into the end of the archive,
31 /// use `skip_termination` method to disable this.
32 pub fn new(obj: W) -> Builder<W> {
33 let (tx, rx) = tokio::sync::oneshot::channel::<W>();
34 tokio::spawn(async move {
35 if let Ok(mut w) = rx.await {
36 let _ = w.write_all(TERMINATION).await;
37 }
38 });
39 Builder {
40 mode: HeaderMode::Complete,
41 follow: true,
42 finished: false,
43 obj: Some(obj),
44 cancellation: Some(tx),
45 }
46 }
47}
48
49impl<W: Write + Unpin + Send> Builder<W> {
50 /// Create a new archive builder with the underlying object as the
51 /// destination of all data written. The builder will use
52 /// `HeaderMode::Complete` by default.
53 ///
54 /// The [`TERMINATION`] symbol would not be written to the archive in the end.
55 pub fn new_non_terminated(obj: W) -> Builder<W> {
56 Builder {
57 mode: HeaderMode::Complete,
58 follow: true,
59 finished: false,
60 obj: Some(obj),
61 cancellation: None,
62 }
63 }
64
65 /// Changes the HeaderMode that will be used when reading fs Metadata for
66 /// methods that implicitly read metadata for an input Path. Notably, this
67 /// does _not_ apply to `append(Header)`.
68 pub fn mode(&mut self, mode: HeaderMode) {
69 self.mode = mode;
70 }
71
72 /// Follow symlinks, archiving the contents of the file they point to rather
73 /// than adding a symlink to the archive. Defaults to true.
74 pub fn follow_symlinks(&mut self, follow: bool) {
75 self.follow = follow;
76 }
77
78 /// Skip writing final termination bytes into the archive.
79 pub fn skip_termination(&mut self) {
80 drop(self.cancellation.take());
81 }
82
83 /// Gets shared reference to the underlying object.
84 pub fn get_ref(&self) -> &W {
85 self.obj.as_ref().unwrap()
86 }
87
88 /// Gets mutable reference to the underlying object.
89 ///
90 /// Note that care must be taken while writing to the underlying
91 /// object. But, e.g. `get_mut().flush()` is claimed to be safe and
92 /// useful in the situations when one needs to be ensured that
93 /// tar entry was flushed to the disk.
94 pub fn get_mut(&mut self) -> &mut W {
95 self.obj.as_mut().unwrap()
96 }
97
98 /// Unwrap this archive, returning the underlying object.
99 ///
100 /// This function will finish writing the archive if the `finish` function
101 /// hasn't yet been called, returning any I/O error which happens during
102 /// that operation.
103 pub async fn into_inner(mut self) -> io::Result<W> {
104 if !self.finished {
105 self.finish().await?;
106 }
107 Ok(self.obj.take().unwrap())
108 }
109
110 /// Adds a new entry to this archive.
111 ///
112 /// This function will append the header specified, followed by contents of
113 /// the stream specified by `data`. To produce a valid archive the `size`
114 /// field of `header` must be the same as the length of the stream that's
115 /// being written. Additionally the checksum for the header should have been
116 /// set via the `set_cksum` method.
117 ///
118 /// Note that this will not attempt to seek the archive to a valid position,
119 /// so if the archive is in the middle of a read or some other similar
120 /// operation then this may corrupt the archive.
121 ///
122 /// Also note that after all entries have been written to an archive the
123 /// `finish` function needs to be called to finish writing the archive.
124 ///
125 /// # Errors
126 ///
127 /// This function will return an error for any intermittent I/O error which
128 /// occurs when either reading or writing.
129 ///
130 /// # Examples
131 ///
132 /// ```
133 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
134 /// #
135 /// use tokio_tar::{Builder, Header};
136 ///
137 /// let mut header = Header::new_gnu();
138 /// header.set_path("foo")?;
139 /// header.set_size(4);
140 /// header.set_cksum();
141 ///
142 /// let mut data: &[u8] = &[1, 2, 3, 4];
143 ///
144 /// let mut ar = Builder::new(Vec::new());
145 /// ar.append(&header, data).await?;
146 /// let data = ar.into_inner().await?;
147 /// #
148 /// # Ok(()) }) }
149 /// ```
150 pub async fn append<R: Read + Unpin>(
151 &mut self,
152 header: &Header,
153 mut data: R,
154 ) -> io::Result<()> {
155 append(self.get_mut(), header, &mut data).await?;
156
157 Ok(())
158 }
159
160 /// Adds a new entry to this archive with the specified path.
161 ///
162 /// This function will set the specified path in the given header, which may
163 /// require appending a GNU long-name extension entry to the archive first.
164 /// The checksum for the header will be automatically updated via the
165 /// `set_cksum` method after setting the path. No other metadata in the
166 /// header will be modified.
167 ///
168 /// Then it will append the header, followed by contents of the stream
169 /// specified by `data`. To produce a valid archive the `size` field of
170 /// `header` must be the same as the length of the stream that's being
171 /// written.
172 ///
173 /// Note that this will not attempt to seek the archive to a valid position,
174 /// so if the archive is in the middle of a read or some other similar
175 /// operation then this may corrupt the archive.
176 ///
177 /// Also note that after all entries have been written to an archive the
178 /// `finish` function needs to be called to finish writing the archive.
179 ///
180 /// # Errors
181 ///
182 /// This function will return an error for any intermittent I/O error which
183 /// occurs when either reading or writing.
184 ///
185 /// # Examples
186 ///
187 /// ```
188 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
189 /// #
190 /// use tokio_tar::{Builder, Header};
191 ///
192 /// let mut header = Header::new_gnu();
193 /// header.set_size(4);
194 /// header.set_cksum();
195 ///
196 /// let mut data: &[u8] = &[1, 2, 3, 4];
197 ///
198 /// let mut ar = Builder::new(Vec::new());
199 /// ar.append_data(&mut header, "really/long/path/to/foo", data).await?;
200 /// let data = ar.into_inner().await?;
201 /// #
202 /// # Ok(()) }) }
203 /// ```
204 pub async fn append_data<P: AsRef<Path>, R: Read + Unpin>(
205 &mut self,
206 header: &mut Header,
207 path: P,
208 data: R,
209 ) -> io::Result<()> {
210 prepare_header_path(self.get_mut(), header, path.as_ref()).await?;
211 header.set_cksum();
212 self.append(header, data).await?;
213
214 Ok(())
215 }
216
217 /// Adds a file on the local filesystem to this archive.
218 ///
219 /// This function will open the file specified by `path` and insert the file
220 /// into the archive with the appropriate metadata set, returning any I/O
221 /// error which occurs while writing. The path name for the file inside of
222 /// this archive will be the same as `path`, and it is required that the
223 /// path is a relative path.
224 ///
225 /// Note that this will not attempt to seek the archive to a valid position,
226 /// so if the archive is in the middle of a read or some other similar
227 /// operation then this may corrupt the archive.
228 ///
229 /// Also note that after all files have been written to an archive the
230 /// `finish` function needs to be called to finish writing the archive.
231 ///
232 /// # Examples
233 ///
234 /// ```no_run
235 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
236 /// #
237 /// use tokio_tar::Builder;
238 ///
239 /// let mut ar = Builder::new(Vec::new());
240 ///
241 /// ar.append_path("foo/bar.txt").await?;
242 /// #
243 /// # Ok(()) }) }
244 /// ```
245 pub async fn append_path<P: AsRef<Path>>(&mut self, path: P) -> io::Result<()> {
246 let mode = self.mode;
247 let follow = self.follow;
248 append_path_with_name(self.get_mut(), path.as_ref(), None, mode, follow).await?;
249 Ok(())
250 }
251
252 /// Adds a file on the local filesystem to this archive under another name.
253 ///
254 /// This function will open the file specified by `path` and insert the file
255 /// into the archive as `name` with appropriate metadata set, returning any
256 /// I/O error which occurs while writing. The path name for the file inside
257 /// of this archive will be `name` is required to be a relative path.
258 ///
259 /// Note that this will not attempt to seek the archive to a valid position,
260 /// so if the archive is in the middle of a read or some other similar
261 /// operation then this may corrupt the archive.
262 ///
263 /// Also note that after all files have been written to an archive the
264 /// `finish` function needs to be called to finish writing the archive.
265 ///
266 /// # Examples
267 ///
268 /// ```no_run
269 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
270 /// #
271 /// use tokio_tar::Builder;
272 ///
273 /// let mut ar = Builder::new(Vec::new());
274 ///
275 /// // Insert the local file "foo/bar.txt" in the archive but with the name
276 /// // "bar/foo.txt".
277 /// ar.append_path_with_name("foo/bar.txt", "bar/foo.txt").await?;
278 /// #
279 /// # Ok(()) }) }
280 /// ```
281 pub async fn append_path_with_name<P: AsRef<Path>, N: AsRef<Path>>(
282 &mut self,
283 path: P,
284 name: N,
285 ) -> io::Result<()> {
286 let mode = self.mode;
287 let follow = self.follow;
288 append_path_with_name(
289 self.get_mut(),
290 path.as_ref(),
291 Some(name.as_ref()),
292 mode,
293 follow,
294 )
295 .await?;
296 Ok(())
297 }
298
299 /// Adds a file to this archive with the given path as the name of the file
300 /// in the archive.
301 ///
302 /// This will use the metadata of `file` to populate a `Header`, and it will
303 /// then append the file to the archive with the name `path`.
304 ///
305 /// Note that this will not attempt to seek the archive to a valid position,
306 /// so if the archive is in the middle of a read or some other similar
307 /// operation then this may corrupt the archive.
308 ///
309 /// Also note that after all files have been written to an archive the
310 /// `finish` function needs to be called to finish writing the archive.
311 ///
312 /// # Examples
313 ///
314 /// ```no_run
315 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
316 /// #
317 /// use tokio::fs::File;
318 /// use tokio_tar::Builder;
319 ///
320 /// let mut ar = Builder::new(Vec::new());
321 ///
322 /// // Open the file at one location, but insert it into the archive with a
323 /// // different name.
324 /// let mut f = File::open("foo/bar/baz.txt").await?;
325 /// ar.append_file("bar/baz.txt", &mut f).await?;
326 /// #
327 /// # Ok(()) }) }
328 /// ```
329 pub async fn append_file<P: AsRef<Path>>(
330 &mut self,
331 path: P,
332 file: &mut fs::File,
333 ) -> io::Result<()> {
334 let mode = self.mode;
335 append_file(self.get_mut(), path.as_ref(), file, mode).await?;
336 Ok(())
337 }
338
339 /// Adds a directory to this archive with the given path as the name of the
340 /// directory in the archive.
341 ///
342 /// This will use `stat` to populate a `Header`, and it will then append the
343 /// directory to the archive with the name `path`.
344 ///
345 /// Note that this will not attempt to seek the archive to a valid position,
346 /// so if the archive is in the middle of a read or some other similar
347 /// operation then this may corrupt the archive.
348 ///
349 /// Also note that after all files have been written to an archive the
350 /// `finish` function needs to be called to finish writing the archive.
351 ///
352 /// # Examples
353 ///
354 /// ```
355 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
356 /// #
357 /// use tokio::fs;
358 /// use tokio_tar::Builder;
359 ///
360 /// let mut ar = Builder::new(Vec::new());
361 ///
362 /// // Use the directory at one location, but insert it into the archive
363 /// // with a different name.
364 /// ar.append_dir("bardir", ".").await?;
365 /// #
366 /// # Ok(()) }) }
367 /// ```
368 pub async fn append_dir<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
369 where
370 P: AsRef<Path>,
371 Q: AsRef<Path>,
372 {
373 let mode = self.mode;
374 append_dir(self.get_mut(), path.as_ref(), src_path.as_ref(), mode).await?;
375 Ok(())
376 }
377
378 /// Adds a directory and all of its contents (recursively) to this archive
379 /// with the given path as the name of the directory in the archive.
380 ///
381 /// Note that this will not attempt to seek the archive to a valid position,
382 /// so if the archive is in the middle of a read or some other similar
383 /// operation then this may corrupt the archive.
384 ///
385 /// Also note that after all files have been written to an archive the
386 /// `finish` function needs to be called to finish writing the archive.
387 ///
388 /// # Examples
389 ///
390 /// ```
391 /// # fn main() -> Result<(), Box<dyn std::error::Error + Send + Sync + 'static>> { tokio::runtime::Runtime::new().unwrap().block_on(async {
392 /// #
393 /// use tokio::fs;
394 /// use tokio_tar::Builder;
395 ///
396 /// let mut ar = Builder::new(Vec::new());
397 ///
398 /// // Use the directory at one location, but insert it into the archive
399 /// // with a different name.
400 /// ar.append_dir_all("bardir", ".").await?;
401 /// #
402 /// # Ok(()) }) }
403 /// ```
404 pub async fn append_dir_all<P, Q>(&mut self, path: P, src_path: Q) -> io::Result<()>
405 where
406 P: AsRef<Path>,
407 Q: AsRef<Path>,
408 {
409 let mode = self.mode;
410 let follow = self.follow;
411 append_dir_all(
412 self.get_mut(),
413 path.as_ref(),
414 src_path.as_ref(),
415 mode,
416 follow,
417 )
418 .await?;
419 Ok(())
420 }
421
422 /// Finish writing this archive, emitting the termination sections.
423 ///
424 /// This function should only be called when the archive has been written
425 /// entirely and if an I/O error happens the underlying object still needs
426 /// to be acquired.
427 ///
428 /// In most situations the `into_inner` method should be preferred.
429 pub async fn finish(&mut self) -> io::Result<()> {
430 if self.finished {
431 return Ok(());
432 }
433 self.finished = true;
434 self.get_mut().write_all(&[0; 1024]).await?;
435 Ok(())
436 }
437}
438
439async fn append<Dst: Write + Unpin + ?Sized, Data: Read + Unpin + ?Sized>(
440 mut dst: &mut Dst,
441 header: &Header,
442 mut data: &mut Data,
443) -> io::Result<()> {
444 dst.write_all(header.as_bytes()).await?;
445 let len = io::copy(&mut data, &mut dst).await?;
446
447 // Pad with zeros if necessary.
448 let buf = [0; 512];
449 let remaining = 512 - (len % 512);
450 if remaining < 512 {
451 dst.write_all(&buf[..remaining as usize]).await?;
452 }
453
454 Ok(())
455}
456
457async fn append_path_with_name<Dst: Write + Unpin + ?Sized>(
458 dst: &mut Dst,
459 path: &Path,
460 name: Option<&Path>,
461 mode: HeaderMode,
462 follow: bool,
463) -> io::Result<()> {
464 let stat = if follow {
465 fs::metadata(path).await.map_err(|err| {
466 io::Error::new(
467 err.kind(),
468 format!("{} when getting metadata for {}", err, path.display()),
469 )
470 })?
471 } else {
472 fs::symlink_metadata(path).await.map_err(|err| {
473 io::Error::new(
474 err.kind(),
475 format!("{} when getting metadata for {}", err, path.display()),
476 )
477 })?
478 };
479 let ar_name = name.unwrap_or(path);
480 if stat.is_file() {
481 append_fs(
482 dst,
483 ar_name,
484 &stat,
485 &mut fs::File::open(path).await?,
486 mode,
487 None,
488 )
489 .await?;
490 Ok(())
491 } else if stat.is_dir() {
492 append_fs(dst, ar_name, &stat, &mut io::empty(), mode, None).await?;
493 Ok(())
494 } else if stat.file_type().is_symlink() {
495 let link_name = fs::read_link(path).await?;
496 append_fs(
497 dst,
498 ar_name,
499 &stat,
500 &mut io::empty(),
501 mode,
502 Some(&link_name),
503 )
504 .await?;
505 Ok(())
506 } else {
507 Err(other(&format!("{} has unknown file type", path.display())))
508 }
509}
510
511async fn append_file<Dst: Write + Unpin + ?Sized>(
512 dst: &mut Dst,
513 path: &Path,
514 file: &mut fs::File,
515 mode: HeaderMode,
516) -> io::Result<()> {
517 let stat = file.metadata().await?;
518 append_fs(dst, path, &stat, file, mode, None).await?;
519 Ok(())
520}
521
522async fn append_dir<Dst: Write + Unpin + ?Sized>(
523 dst: &mut Dst,
524 path: &Path,
525 src_path: &Path,
526 mode: HeaderMode,
527) -> io::Result<()> {
528 let stat = fs::metadata(src_path).await?;
529 append_fs(dst, path, &stat, &mut io::empty(), mode, None).await?;
530 Ok(())
531}
532
533fn prepare_header(size: u64, entry_type: EntryType) -> Header {
534 let mut header = Header::new_gnu();
535 let name = b"././@LongLink";
536 header.as_gnu_mut().unwrap().name[..name.len()].clone_from_slice(&name[..]);
537 header.set_mode(0o644);
538 header.set_uid(0);
539 header.set_gid(0);
540 header.set_mtime(0);
541 // + 1 to be compliant with GNU tar
542 header.set_size(size + 1);
543 header.set_entry_type(entry_type);
544 header.set_cksum();
545 header
546}
547
548async fn prepare_header_path<Dst: Write + Unpin + ?Sized>(
549 dst: &mut Dst,
550 header: &mut Header,
551 path: &Path,
552) -> io::Result<()> {
553 // Try to encode the path directly in the header, but if it ends up not
554 // working (probably because it's too long) then try to use the GNU-specific
555 // long name extension by emitting an entry which indicates that it's the
556 // filename.
557 if let Err(e) = header.set_path(path) {
558 let data = path2bytes(path)?;
559 let max = header.as_old().name.len();
560 // Since e isn't specific enough to let us know the path is indeed too
561 // long, verify it first before using the extension.
562 if data.len() < max {
563 return Err(e);
564 }
565 let header2 = prepare_header(data.len() as u64, EntryType::GNULongName);
566 // null-terminated string
567 let mut data2 = data.chain(io::repeat(0).take(1));
568 append(dst, &header2, &mut data2).await?;
569 // Truncate the path to store in the header we're about to emit to
570 // ensure we've got something at least mentioned.
571 let path = bytes2path(Cow::Borrowed(&data[..max]))?;
572 header.set_path(&path)?;
573 }
574 Ok(())
575}
576
577async fn prepare_header_link<Dst: Write + Unpin + ?Sized>(
578 dst: &mut Dst,
579 header: &mut Header,
580 link_name: &Path,
581) -> io::Result<()> {
582 // Same as previous function but for linkname
583 if let Err(e) = header.set_link_name(link_name) {
584 let data = path2bytes(link_name)?;
585 if data.len() < header.as_old().linkname.len() {
586 return Err(e);
587 }
588 let header2 = prepare_header(data.len() as u64, EntryType::GNULongLink);
589 let mut data2 = data.chain(io::repeat(0).take(1));
590 append(dst, &header2, &mut data2).await?;
591 }
592 Ok(())
593}
594
595async fn append_fs<Dst: Write + Unpin + ?Sized, R: Read + Unpin + ?Sized>(
596 dst: &mut Dst,
597 path: &Path,
598 meta: &Metadata,
599 read: &mut R,
600 mode: HeaderMode,
601 link_name: Option<&Path>,
602) -> io::Result<()> {
603 let mut header = Header::new_gnu();
604
605 prepare_header_path(dst, &mut header, path).await?;
606 header.set_metadata_in_mode(meta, mode);
607 if let Some(link_name) = link_name {
608 prepare_header_link(dst, &mut header, link_name).await?;
609 }
610 header.set_cksum();
611 append(dst, &header, read).await?;
612
613 Ok(())
614}
615
616async fn append_dir_all<Dst: Write + Unpin + ?Sized>(
617 dst: &mut Dst,
618 path: &Path,
619 src_path: &Path,
620 mode: HeaderMode,
621 follow: bool,
622) -> io::Result<()> {
623 let mut stack = vec![(src_path.to_path_buf(), true, false)];
624 while let Some((src, is_dir, is_symlink)) = stack.pop() {
625 let dest = path.join(src.strip_prefix(src_path).unwrap());
626
627 // In case of a symlink pointing to a directory, is_dir is false, but src.is_dir() will return true
628 if is_dir || (is_symlink && follow && src.is_dir()) {
629 let mut entries = fs::read_dir(&src).await?;
630 while let Some(entry) = entries.next_entry().await.transpose() {
631 let entry = entry?;
632 let file_type = entry.file_type().await?;
633 stack.push((entry.path(), file_type.is_dir(), file_type.is_symlink()));
634 }
635 if dest != Path::new("") {
636 append_dir(dst, &dest, &src, mode).await?;
637 }
638 } else if !follow && is_symlink {
639 let stat = fs::symlink_metadata(&src).await?;
640 let link_name = fs::read_link(&src).await?;
641 append_fs(dst, &dest, &stat, &mut io::empty(), mode, Some(&link_name)).await?;
642 } else {
643 append_file(dst, &dest, &mut fs::File::open(src).await?, mode).await?;
644 }
645 }
646 Ok(())
647}
648
649impl<W: Write + Unpin + Send> Drop for Builder<W> {
650 fn drop(&mut self) {
651 // TODO: proper async cancellation
652 if !self.finished {
653 if let Some(cancellation) = self.cancellation.take() {
654 cancellation.send(self.obj.take().unwrap()).ok();
655 }
656 }
657 }
658}