1use std::{
75 future::poll_fn,
76 ops::Range,
77 pin::Pin,
78 sync::Arc,
79 task::{Context, Poll},
80};
81
82use tokio::io::{AsyncRead, ReadBuf};
83
84use crate::{
85 ArchiveFormat, BLOCK_SIZE, Block, DEFAULT_MAX_GLOBAL_PAX_EXTENSIONS_SIZE,
86 DEFAULT_MAX_GNU_EXTENSION_SIZE, DEFAULT_MAX_PAX_EXTENSION_SIZE, FrameError, FrameErrorInner,
87 GnuKind, HdrCharset, PaxError, PaxKind, PaxRecord, PaxState, PaxValue, UstarKind,
88 header::{
89 CHECKSUM_RANGE, GID_RANGE, GNAME_RANGE, GNU_IDENTITY, IDENTITY_RANGE, MODE_RANGE,
90 MTIME_RANGE, NAME_RANGE, PREFIX_RANGE, SIZE_RANGE, TYPEFLAG_OFFSET, UID_RANGE, UNAME_RANGE,
91 USTAR_IDENTITY, checksum, is_all_nul, parse_number, parse_octal,
92 },
93 pax::{GlobalPaxRecords, PaxRecords, SharedPaxRecords},
94};
95
96type PositionedBlock = (u64, Block);
97
98#[derive(Clone, Debug, Eq, PartialEq)]
100pub enum Frame {
101 Pax(PaxFrame),
103 Gnu(GnuFrame),
105 Header(HeaderFrame),
107 Data(DataFrame),
109}
110
111#[derive(Clone, Debug, Eq, PartialEq)]
113pub struct PaxFrame {
114 pub position: u64,
116 pub block: Block,
118 pub kind: PaxKind,
120 pub payload_size: u64,
122}
123
124#[derive(Clone, Debug, Eq, PartialEq)]
126pub struct GnuFrame {
127 pub position: u64,
129 pub block: Block,
131 pub kind: GnuKind,
133 pub payload_size: u64,
135}
136
137#[derive(Clone, Debug, Eq, PartialEq)]
142pub struct HeaderFrame {
143 pub position: u64,
145 pub block: Block,
147 pub format: ArchiveFormat,
149 pub kind: UstarKind,
151 pub declared_size: u64,
153 pub effective_size: u64,
159 pub(crate) mode: Option<u64>,
160 pub(crate) uid: Option<u64>,
161 pub(crate) gid: Option<u64>,
162 pub(crate) mtime: Option<u64>,
163}
164
165impl HeaderFrame {
166 fn ustar(
167 position: u64,
168 block: Block,
169 typeflag: u8,
170 declared_size: u64,
171 local_records: Option<&PaxRecords>,
172 global_records: Option<&GlobalPaxRecords>,
173 allow_all_nul_numeric_fields: bool,
174 ) -> Result<Self, FrameError> {
175 let kind = UstarKind::try_from_framed(position, typeflag)?;
176
177 let parse_numeric_field = |field, range: Range<usize>| {
181 Self::parse_numeric_field(
182 position,
183 ArchiveFormat::Pax,
184 field,
185 &block[range],
186 allow_all_nul_numeric_fields,
187 )
188 };
189 let mode = parse_numeric_field("mode", MODE_RANGE)?;
190 let uid = parse_numeric_field("uid", UID_RANGE)?;
191 let gid = parse_numeric_field("gid", GID_RANGE)?;
192 let mtime = parse_numeric_field("mtime", MTIME_RANGE)?;
193
194 let validate_string_field = |field: &'static str, bytes: &[u8]| {
195 if bytes.contains(&0) {
196 Ok(())
197 } else {
198 Err(FrameError::at(
199 position,
200 FrameErrorInner::UnterminatedUstarStringField { field },
201 ))
202 }
203 };
204 validate_string_field("uname", &block[UNAME_RANGE])?;
205 validate_string_field("gname", &block[GNAME_RANGE])?;
206
207 let effective_size = PaxState::effective_size(local_records, global_records).map_or(
213 Ok(declared_size),
214 |size| match size {
215 PaxValue::Value(size) => Ok(*size),
216 PaxValue::Deleted => Err(FrameError::deleted_pax_metadata(position, "size")),
217 },
218 )?;
219 validate_pax_member_size(position, kind, declared_size, effective_size)?;
220
221 Ok(Self {
222 position,
223 block,
224 format: ArchiveFormat::Pax,
225 kind,
226 declared_size,
227 effective_size,
228 mode,
229 uid,
230 gid,
231 mtime,
232 })
233 }
234
235 fn gnu(
236 position: u64,
237 block: Block,
238 typeflag: u8,
239 declared_size: u64,
240 require_link_kind: bool,
241 allow_all_nul_numeric_fields: bool,
242 ) -> Result<Self, FrameError> {
243 let kind = UstarKind::try_from_framed(position, typeflag)?;
244 if require_link_kind && !matches!(kind, UstarKind::HardLink | UstarKind::SymbolicLink) {
245 return Err(FrameError::unexpected_order(
246 position,
247 "hard-link or symbolic-link member after GNU long-link extension",
248 "non-link ordinary member",
249 ));
250 }
251 validate_gnu_member_size(position, kind, declared_size)?;
252 let parse_numeric_field = |field, range: Range<usize>| {
253 Self::parse_numeric_field(
254 position,
255 ArchiveFormat::Gnu,
256 field,
257 &block[range],
258 allow_all_nul_numeric_fields,
259 )
260 };
261 let mode = parse_numeric_field("mode", MODE_RANGE)?;
262 let uid = parse_numeric_field("uid", UID_RANGE)?;
263 let gid = parse_numeric_field("gid", GID_RANGE)?;
264 let mtime = parse_numeric_field("mtime", MTIME_RANGE)?;
265
266 Ok(Self {
267 position,
268 block,
269 format: ArchiveFormat::Gnu,
270 kind,
271 declared_size,
272 effective_size: declared_size,
273 mode,
274 uid,
275 gid,
276 mtime,
277 })
278 }
279
280 fn parse_numeric_field(
281 position: u64,
282 format: ArchiveFormat,
283 field: &'static str,
284 bytes: &[u8],
285 allow_all_nul_numeric_fields: bool,
286 ) -> Result<Option<u64>, FrameError> {
287 if allow_all_nul_numeric_fields && is_all_nul(bytes) {
288 return Ok(None);
289 }
290 parse_number(format, bytes).map(Some).ok_or_else(|| {
291 FrameError::at(
292 position,
293 FrameErrorInner::InvalidNumericField {
294 field,
295 found: bytes.to_vec(),
296 },
297 )
298 })
299 }
300
301 pub(crate) fn copy_header_path_into(&self, path: &mut Vec<u8>) {
302 path.clear();
303 let name = trim_nul(&self.block[NAME_RANGE]);
304 if self.format == ArchiveFormat::Gnu {
305 path.extend_from_slice(name);
306 return;
307 }
308 let prefix = trim_nul(&self.block[PREFIX_RANGE]);
309 if !prefix.is_empty() {
310 path.extend_from_slice(prefix);
311 path.push(b'/');
312 }
313 path.extend_from_slice(name);
314 }
315}
316
317#[derive(Clone, Copy, Debug, Eq, PartialEq)]
319pub enum DataOwner {
320 Pax(PaxKind),
322 Gnu(GnuKind),
324 Member,
326}
327
328#[derive(Clone, Debug, Eq, PartialEq)]
333pub struct DataFrame {
334 pub position: u64,
336 pub block: Block,
338 pub len: usize,
340 pub owner: DataOwner,
342 completed_pax_records: Option<SharedPaxRecords>,
347}
348
349impl DataFrame {
350 pub fn completed_pax_records(&self) -> Option<&[PaxRecord]> {
355 self.completed_pax_records
356 .as_deref()
357 .map(PaxRecords::as_slice)
358 }
359
360 pub(crate) fn into_completed_pax_records(self) -> Option<SharedPaxRecords> {
361 self.completed_pax_records
362 }
363}
364
365#[derive(Debug)]
367pub(super) enum State {
368 AwaitingHeader,
370 ReadingPax {
372 kind: PaxKind,
373 header_position: u64,
374 remaining: u64,
375 payload: Vec<u8>,
376 },
377 AwaitingUstarHeader { records: SharedPaxRecords },
379 ReadingGnu {
381 kind: GnuKind,
382 remaining: u64,
383 pending: PendingGnu,
384 },
385 AwaitingGnuMember { pending: PendingGnu },
387 ReadingMember { remaining: u64 },
389 AwaitingSecondZero,
391 Complete,
393 Failed,
395}
396
397#[derive(Clone, Copy, Debug, Default)]
398pub(super) struct PendingGnu {
399 pub(super) long_name: bool,
400 pub(super) long_link: bool,
401}
402
403#[derive(Default)]
405struct MemberChunk {
406 buffer: Vec<u8>,
407 start_position: u64,
408 physical_len: usize,
409 meaningful_len: usize,
410 state: Option<MemberChunkState>,
411}
412
413#[derive(Clone, Copy)]
414enum MemberChunkState {
415 Reading {
416 member_remaining: u64,
417 filled: usize,
418 },
419 Ready {
420 delivered: usize,
421 },
422}
423
424pub struct TarStream<R> {
426 pub(super) position: u64,
428 pub(super) inner: R,
430 pub(super) block: Block,
431 pub(super) block_len: usize,
432 pub(super) format: Option<ArchiveFormat>,
433 pub(super) global_pax_records: Option<GlobalPaxRecords>,
435 max_pax_extension_size: u64,
436 max_global_pax_extensions_size: u64,
437 global_pax_extensions_size: u64,
438 allow_all_nul_numeric_fields: bool,
439 max_gnu_extension_size: u64,
440 member_chunk: MemberChunk,
441 pub(super) state: State,
442}
443
444impl<R> TarStream<R> {
445 pub fn new(reader: R) -> Self {
447 Self {
448 position: 0,
449 inner: reader,
450 block: [0; BLOCK_SIZE],
451 block_len: 0,
452 format: None,
453 global_pax_records: None,
454 max_pax_extension_size: DEFAULT_MAX_PAX_EXTENSION_SIZE,
455 max_global_pax_extensions_size: DEFAULT_MAX_GLOBAL_PAX_EXTENSIONS_SIZE,
456 global_pax_extensions_size: 0,
457 allow_all_nul_numeric_fields: true,
458 max_gnu_extension_size: DEFAULT_MAX_GNU_EXTENSION_SIZE,
459 member_chunk: MemberChunk::default(),
460 state: State::AwaitingHeader,
461 }
462 }
463
464 pub fn set_max_pax_extension_size(&mut self, max_pax_extension_size: u64) {
472 self.max_pax_extension_size = max_pax_extension_size;
473 }
474
475 pub fn set_max_global_pax_extensions_size(&mut self, max_global_pax_extensions_size: u64) {
484 self.max_global_pax_extensions_size = max_global_pax_extensions_size;
485 }
486
487 pub fn set_allow_all_nul_numeric_fields(&mut self, allow: bool) {
494 self.allow_all_nul_numeric_fields = allow;
495 }
496
497 pub fn set_max_gnu_extension_size(&mut self, max_gnu_extension_size: u64) {
503 self.max_gnu_extension_size = max_gnu_extension_size;
504 }
505
506 pub fn format(&self) -> Option<ArchiveFormat> {
508 self.format
509 }
510}
511
512impl<R: AsyncRead + Unpin> TarStream<R> {
513 pub async fn next_frame(&mut self) -> Result<Option<Frame>, FrameError> {
519 poll_fn(|context| self.poll_next_frame(context)).await
520 }
521
522 pub(crate) async fn read_member_block(&mut self) -> Result<(u64, Block, usize), FrameError> {
526 if self.member_chunk.state.is_some() {
527 self.complete_member_chunk().await?;
528 return self.take_member_block_from_chunk();
529 }
530 let remaining = match &self.state {
531 State::ReadingMember { remaining } => *remaining,
532 _ => {
533 self.state = State::Failed;
534 return Err(FrameError::unexpected_order(
535 self.position,
536 "ordinary member payload",
537 "parser state without member payload",
538 ));
539 }
540 };
541 let (position, block) = match poll_fn(|context| self.poll_read_block(context)).await {
542 Ok(Some(block)) => block,
543 Ok(None) => {
544 let error = self.handle_eof();
545 self.state = State::Failed;
546 return Err(error);
547 }
548 Err(error) => {
549 self.state = State::Failed;
550 return Err(error);
551 }
552 };
553 let meaningful_len = remaining.min(BLOCK_SIZE as u64) as usize;
554 self.state = member_payload_state(remaining - meaningful_len as u64);
555 Ok((position, block, meaningful_len))
556 }
557
558 pub(crate) async fn read_member_chunk(
563 &mut self,
564 buffer: &mut Vec<u8>,
565 target_len: usize,
566 ) -> Result<usize, FrameError> {
567 if self.member_chunk.state.is_none() && self.block_len != 0 {
570 let (_, block, meaningful_len) = self.read_member_block().await?;
571 buffer.clear();
572 buffer.extend_from_slice(&block[..meaningful_len]);
573 return Ok(meaningful_len);
574 }
575 if self.member_chunk.state.is_none() {
576 self.start_member_chunk(buffer, target_len)?;
577 }
578 self.complete_member_chunk().await?;
579 self.take_member_chunk(buffer)
580 }
581
582 fn start_member_chunk(
583 &mut self,
584 buffer: &mut Vec<u8>,
585 target_len: usize,
586 ) -> Result<(), FrameError> {
587 let member_remaining = match &self.state {
588 State::ReadingMember { remaining } => *remaining,
589 _ => {
590 self.state = State::Failed;
591 return Err(FrameError::unexpected_order(
592 self.position,
593 "ordinary member payload",
594 "parser state without member payload",
595 ));
596 }
597 };
598 if self.block_len != 0 {
599 self.state = State::Failed;
600 return Err(FrameError::unexpected_order(
601 self.position,
602 "aligned ordinary member payload",
603 "partially buffered physical block",
604 ));
605 }
606
607 let target_len = u64::try_from(target_len.max(BLOCK_SIZE)).map_err(|_| {
608 FrameError::arithmetic_overflow(self.position, "member payload chunk target length")
609 })?;
610 let physical_len = member_remaining
611 .min(target_len)
612 .div_ceil(BLOCK_SIZE as u64)
613 .checked_mul(BLOCK_SIZE as u64)
614 .ok_or_else(|| {
615 FrameError::arithmetic_overflow(
616 self.position,
617 "member payload chunk physical length",
618 )
619 })?;
620 let meaningful_len = member_remaining.min(physical_len);
621 let physical_len = usize::try_from(physical_len).map_err(|_| {
622 FrameError::arithmetic_overflow(self.position, "member payload chunk physical length")
623 })?;
624 let meaningful_len = usize::try_from(meaningful_len).map_err(|_| {
625 FrameError::arithmetic_overflow(self.position, "member payload chunk meaningful length")
626 })?;
627
628 self.member_chunk.buffer.clear();
631 std::mem::swap(buffer, &mut self.member_chunk.buffer);
632 if self.member_chunk.buffer.len() != physical_len {
633 self.member_chunk.buffer.resize(physical_len, 0);
634 }
635 self.member_chunk.start_position = self.position;
636 self.member_chunk.physical_len = physical_len;
637 self.member_chunk.meaningful_len = meaningful_len;
638 self.member_chunk.state = Some(MemberChunkState::Reading {
639 member_remaining,
640 filled: 0,
641 });
642 Ok(())
643 }
644
645 async fn complete_member_chunk(&mut self) -> Result<(), FrameError> {
646 loop {
647 let (member_remaining, filled) = match self.member_chunk.state {
648 Some(MemberChunkState::Reading {
649 member_remaining,
650 filled,
651 }) => (member_remaining, filled),
652 Some(MemberChunkState::Ready { .. }) => return Ok(()),
653 None => {
654 self.state = State::Failed;
655 return Err(FrameError::unexpected_order(
656 self.position,
657 "pending member payload chunk",
658 "parser state without a pending chunk",
659 ));
660 }
661 };
662 let start_position = self.member_chunk.start_position;
663 let physical_len = self.member_chunk.physical_len;
664 let meaningful_len = self.member_chunk.meaningful_len;
665 if filled == physical_len {
666 self.position =
667 checked_position(start_position, physical_len).inspect_err(|_| {
668 self.state = State::Failed;
669 self.member_chunk.state = None;
670 })?;
671 let remaining = member_remaining
672 .checked_sub(meaningful_len as u64)
673 .ok_or_else(|| {
674 self.state = State::Failed;
675 self.member_chunk.state = None;
676 FrameError::arithmetic_overflow(
677 start_position,
678 "remaining member payload length",
679 )
680 })?;
681 self.state = member_payload_state(remaining);
682 self.member_chunk.state = Some(MemberChunkState::Ready { delivered: 0 });
683 return Ok(());
684 }
685
686 let read = match poll_fn(|context| {
687 let mut read_buffer =
688 ReadBuf::new(&mut self.member_chunk.buffer[filled..physical_len]);
689 match Pin::new(&mut self.inner).poll_read(context, &mut read_buffer) {
690 Poll::Pending => Poll::Pending,
691 Poll::Ready(Ok(())) => Poll::Ready(Ok(read_buffer.filled().len())),
692 Poll::Ready(Err(source)) => Poll::Ready(Err(source)),
693 }
694 })
695 .await
696 {
697 Ok(read) => read,
698 Err(source) => {
699 self.state = State::Failed;
700 self.member_chunk.state = None;
701 let error_position = checked_position(start_position, filled)?;
702 self.position = checked_position(start_position, filled - filled % BLOCK_SIZE)?;
703 return Err(FrameError::at(
704 error_position,
705 FrameErrorInner::Io { source },
706 ));
707 }
708 };
709 if read == 0 {
710 self.state = State::Failed;
711 self.member_chunk.state = None;
712 let partial_len = filled % BLOCK_SIZE;
713 let completed_len = filled - partial_len;
714 self.position = checked_position(start_position, completed_len)?;
715 if partial_len != 0 {
716 return Err(FrameError::at(
717 self.position,
718 FrameErrorInner::IncompleteBlock { read: partial_len },
719 ));
720 }
721 let completed_len = u64::try_from(completed_len).map_err(|_| {
722 FrameError::arithmetic_overflow(
723 self.position,
724 "completed member payload chunk length",
725 )
726 })?;
727 return Err(FrameError::truncated_payload(
728 self.position,
729 DataOwner::Member,
730 member_remaining - member_remaining.min(completed_len),
731 ));
732 }
733 if let Some(MemberChunkState::Reading { filled, .. }) = &mut self.member_chunk.state {
734 *filled += read;
735 }
736 }
737 }
738
739 fn take_member_chunk(&mut self, buffer: &mut Vec<u8>) -> Result<usize, FrameError> {
740 let Some(MemberChunkState::Ready { delivered }) = self.member_chunk.state.take() else {
741 self.state = State::Failed;
742 return Err(FrameError::unexpected_order(
743 self.position,
744 "completed member payload chunk",
745 "incomplete member payload chunk",
746 ));
747 };
748 let meaningful_len = self.member_chunk.meaningful_len;
749 let remaining_len = meaningful_len.checked_sub(delivered).ok_or_else(|| {
750 self.state = State::Failed;
751 FrameError::arithmetic_overflow(self.position, "undelivered member payload length")
752 })?;
753 if delivered != 0 {
754 self.member_chunk
755 .buffer
756 .copy_within(delivered..meaningful_len, 0);
757 }
758 self.member_chunk.buffer.truncate(remaining_len);
759 std::mem::swap(buffer, &mut self.member_chunk.buffer);
760 Ok(remaining_len)
761 }
762
763 fn take_member_block_from_chunk(&mut self) -> Result<(u64, Block, usize), FrameError> {
764 let Some(MemberChunkState::Ready { delivered }) = self.member_chunk.state else {
765 self.state = State::Failed;
766 return Err(FrameError::unexpected_order(
767 self.position,
768 "completed member payload chunk",
769 "incomplete member payload chunk",
770 ));
771 };
772 let start_position = self.member_chunk.start_position;
773 let physical_len = self.member_chunk.physical_len;
774 let total_meaningful_len = self.member_chunk.meaningful_len;
775 let position = checked_position(start_position, delivered).inspect_err(|_| {
776 self.state = State::Failed;
777 self.member_chunk.state = None;
778 })?;
779 let mut block = [0; BLOCK_SIZE];
780 block.copy_from_slice(&self.member_chunk.buffer[delivered..delivered + BLOCK_SIZE]);
781 let meaningful_len = total_meaningful_len
782 .checked_sub(delivered)
783 .ok_or_else(|| {
784 self.state = State::Failed;
785 self.member_chunk.state = None;
786 FrameError::arithmetic_overflow(self.position, "undelivered member payload length")
787 })?
788 .min(BLOCK_SIZE);
789 let delivered = delivered + BLOCK_SIZE;
790 if delivered == physical_len {
791 self.member_chunk.state = None;
792 } else {
793 self.member_chunk.state = Some(MemberChunkState::Ready { delivered });
794 }
795 Ok((position, block, meaningful_len))
796 }
797
798 fn poll_read_block(
799 &mut self,
800 cx: &mut Context<'_>,
801 ) -> Poll<Result<Option<PositionedBlock>, FrameError>> {
802 while self.block_len < BLOCK_SIZE {
803 let mut read_buf = ReadBuf::new(&mut self.block[self.block_len..]);
804 match Pin::new(&mut self.inner).poll_read(cx, &mut read_buf) {
805 Poll::Pending => return Poll::Pending,
806 Poll::Ready(Err(source)) => {
807 return Poll::Ready(Err(FrameError::at(
808 self.position + self.block_len as u64,
809 FrameErrorInner::Io { source },
810 )));
811 }
812 Poll::Ready(Ok(())) => {
813 let read = read_buf.filled().len();
814 if read == 0 {
815 if self.block_len == 0 {
816 return Poll::Ready(Ok(None));
817 }
818 return Poll::Ready(Err(FrameError::at(
819 self.position,
820 FrameErrorInner::IncompleteBlock {
821 read: self.block_len,
822 },
823 )));
824 }
825 self.block_len += read;
826 }
827 }
828 }
829
830 let position = self.position;
831 self.position = self
832 .position
833 .checked_add(BLOCK_SIZE as u64)
834 .ok_or_else(|| FrameError::arithmetic_overflow(position, "stream position"))?;
835 self.block_len = 0;
836 let block = std::mem::replace(&mut self.block, [0; BLOCK_SIZE]);
837 Poll::Ready(Ok(Some((position, block))))
838 }
839
840 fn handle_eof(&mut self) -> FrameError {
841 let inner = match &self.state {
842 State::AwaitingHeader | State::AwaitingSecondZero => FrameErrorInner::MissingEndMarker,
843 State::ReadingPax {
844 kind, remaining, ..
845 } => FrameErrorInner::TruncatedPayload {
846 owner: DataOwner::Pax(*kind),
847 remaining: *remaining,
848 },
849 State::AwaitingUstarHeader { .. } => FrameErrorInner::UnexpectedEof {
850 expected: "ordinary ustar member header after a local pax header",
851 },
852 State::ReadingGnu {
853 kind, remaining, ..
854 } => FrameErrorInner::TruncatedPayload {
855 owner: DataOwner::Gnu(*kind),
856 remaining: *remaining,
857 },
858 State::AwaitingGnuMember { .. } => FrameErrorInner::UnexpectedEof {
859 expected: "ordinary GNU member header after a GNU metadata extension",
860 },
861 State::ReadingMember { remaining } => FrameErrorInner::TruncatedPayload {
862 owner: DataOwner::Member,
863 remaining: *remaining,
864 },
865 State::Complete | State::Failed => FrameErrorInner::UnexpectedEof {
866 expected: "no further input",
867 },
868 };
869 FrameError::at(self.position, inner)
870 }
871
872 fn process_block(&mut self, position: u64, block: Block) -> Result<Option<Frame>, FrameError> {
873 let state = std::mem::replace(&mut self.state, State::Failed);
874 match state {
875 State::AwaitingHeader => {
876 if is_zero_block(&block) {
877 self.state = State::AwaitingSecondZero;
878 Ok(None)
879 } else {
880 self.process_boundary_header(position, block).map(Some)
881 }
882 }
883 State::ReadingPax {
884 kind,
885 header_position,
886 mut remaining,
887 mut payload,
888 } => {
889 let len = remaining.min(BLOCK_SIZE as u64) as usize;
890 payload.extend_from_slice(&block[..len]);
891 remaining -= len as u64;
892 let completed_pax_records = if remaining == 0 {
893 let records = Arc::new(
894 PaxRecords::parse(
895 &payload,
896 self.global_pax_records
897 .as_ref()
898 .map_or(HdrCharset::Utf8, GlobalPaxRecords::hdrcharset),
899 )
900 .map_err(|source| {
901 FrameError::invalid_pax_record(header_position, source)
902 })?,
903 );
904 match kind {
905 PaxKind::Local => {
906 self.state = State::AwaitingUstarHeader {
907 records: records.clone(),
908 };
909 }
910 PaxKind::Global => {
911 records.apply_global(&mut self.global_pax_records);
912 self.state = State::AwaitingHeader;
913 }
914 }
915 Some(records)
916 } else {
917 self.state = State::ReadingPax {
918 kind,
919 header_position,
920 remaining,
921 payload,
922 };
923 None
924 };
925 Ok(Some(Frame::Data(DataFrame {
926 position,
927 block,
928 len,
929 owner: DataOwner::Pax(kind),
930 completed_pax_records,
931 })))
932 }
933 State::AwaitingUstarHeader { records } => {
934 if is_zero_block(&block) {
935 return Err(FrameError::unexpected_order(
936 position,
937 "ordinary ustar member header after a local pax header",
938 "end-of-archive marker",
939 ));
940 }
941 let parsed = self.parse_format_checked_header(position, &block)?;
942 if matches!(parsed.typeflag, b'x' | b'g') {
943 return Err(FrameError::unexpected_order(
944 position,
945 "ordinary ustar member header after a local pax header",
946 "another pax extended header",
947 ));
948 }
949 self.process_ustar_header(position, block, parsed, Some(records))
950 .map(Some)
951 }
952 State::ReadingGnu {
953 kind,
954 mut remaining,
955 pending,
956 } => {
957 let len = remaining.min(BLOCK_SIZE as u64) as usize;
958 remaining -= len as u64;
959 if remaining == 0 {
960 self.state = State::AwaitingGnuMember { pending };
961 } else {
962 self.state = State::ReadingGnu {
963 kind,
964 remaining,
965 pending,
966 };
967 }
968 Ok(Some(Frame::Data(DataFrame {
969 position,
970 block,
971 len,
972 owner: DataOwner::Gnu(kind),
973 completed_pax_records: None,
974 })))
975 }
976 State::AwaitingGnuMember { pending } => {
977 if is_zero_block(&block) {
978 return Err(FrameError::unexpected_order(
979 position,
980 "ordinary GNU member header after a GNU metadata extension",
981 "end-of-archive marker",
982 ));
983 }
984 let parsed = self.parse_format_checked_header(position, &block)?;
985 self.process_gnu_header(position, block, parsed, pending)
986 .map(Some)
987 }
988 State::ReadingMember { mut remaining } => {
989 let len = remaining.min(BLOCK_SIZE as u64) as usize;
990 remaining -= len as u64;
991 self.state = member_payload_state(remaining);
992 Ok(Some(Frame::Data(DataFrame {
993 position,
994 block,
995 len,
996 owner: DataOwner::Member,
997 completed_pax_records: None,
998 })))
999 }
1000 State::AwaitingSecondZero => {
1001 if !is_zero_block(&block) {
1002 return Err(FrameError::at(position, FrameErrorInner::InvalidEndMarker));
1003 }
1004 self.state = State::Complete;
1005 Ok(None)
1006 }
1007 State::Complete => {
1008 self.state = State::Complete;
1009 Ok(None)
1010 }
1011 State::Failed => Ok(None),
1012 }
1013 }
1014
1015 fn process_boundary_header(
1016 &mut self,
1017 position: u64,
1018 block: Block,
1019 ) -> Result<Frame, FrameError> {
1020 let parsed = self.parse_format_checked_header(position, &block)?;
1021 match parsed.format {
1022 ArchiveFormat::Pax => self.process_pax_boundary_header(position, block, parsed),
1023 ArchiveFormat::Gnu => {
1024 self.process_gnu_header(position, block, parsed, PendingGnu::default())
1025 }
1026 }
1027 }
1028
1029 fn parse_format_checked_header(
1034 &mut self,
1035 position: u64,
1036 block: &Block,
1037 ) -> Result<ParsedHeader, FrameError> {
1038 let parsed = ParsedHeader::try_from_framed(position, block)?;
1039 if let Some(expected) = self.format
1040 && parsed.format != expected
1041 {
1042 return Err(FrameError::at(
1043 position,
1044 FrameErrorInner::FormatMismatch {
1045 expected,
1046 found: parsed.format,
1047 },
1048 ));
1049 }
1050 self.format.get_or_insert(parsed.format);
1051 Ok(parsed)
1052 }
1053
1054 fn process_pax_boundary_header(
1060 &mut self,
1061 position: u64,
1062 block: Block,
1063 parsed: ParsedHeader,
1064 ) -> Result<Frame, FrameError> {
1065 match parsed.typeflag {
1066 b'x' => self.process_pax_header(position, block, parsed.size, PaxKind::Local),
1067 b'g' => self.process_pax_header(position, block, parsed.size, PaxKind::Global),
1068 _ => self.process_ustar_header(position, block, parsed, None),
1069 }
1070 }
1071
1072 fn process_pax_header(
1077 &mut self,
1078 position: u64,
1079 block: Block,
1080 payload_size: u64,
1081 kind: PaxKind,
1082 ) -> Result<Frame, FrameError> {
1083 if payload_size > self.max_pax_extension_size {
1084 return Err(FrameError::at(
1085 position,
1086 FrameErrorInner::ExtensionTooLarge {
1087 format: ArchiveFormat::Pax,
1088 size: payload_size,
1089 limit: self.max_pax_extension_size,
1090 },
1091 ));
1092 }
1093 if kind == PaxKind::Global {
1094 let size = self
1095 .global_pax_extensions_size
1096 .checked_add(payload_size)
1097 .ok_or_else(|| {
1098 FrameError::arithmetic_overflow(position, "global pax extension payload total")
1099 })?;
1100 if size > self.max_global_pax_extensions_size {
1101 return Err(FrameError::at(
1102 position,
1103 FrameErrorInner::GlobalPaxExtensionsTooLarge {
1104 size,
1105 limit: self.max_global_pax_extensions_size,
1106 },
1107 ));
1108 }
1109 self.global_pax_extensions_size = size;
1110 }
1111 if payload_size == 0 {
1112 return Err(FrameError::invalid_pax_record(
1113 position,
1114 PaxError::InvalidRecords {
1115 reason: "extended header payload contains no records",
1116 },
1117 ));
1118 }
1119 self.state = State::ReadingPax {
1120 kind,
1121 header_position: position,
1122 remaining: payload_size,
1123 payload: Vec::new(),
1124 };
1125 Ok(Frame::Pax(PaxFrame {
1126 position,
1127 block,
1128 kind,
1129 payload_size,
1130 }))
1131 }
1132
1133 fn process_ustar_header(
1139 &mut self,
1140 position: u64,
1141 block: Block,
1142 parsed: ParsedHeader,
1143 local_pax_records: Option<SharedPaxRecords>,
1144 ) -> Result<Frame, FrameError> {
1145 let frame = HeaderFrame::ustar(
1146 position,
1147 block,
1148 parsed.typeflag,
1149 parsed.size,
1150 local_pax_records.as_deref(),
1151 self.global_pax_records.as_ref(),
1152 self.allow_all_nul_numeric_fields,
1153 )?;
1154 self.global_pax_extensions_size = 0;
1155 self.state = member_payload_state(frame.effective_size);
1156 Ok(Frame::Header(frame))
1157 }
1158
1159 fn process_gnu_header(
1160 &mut self,
1161 position: u64,
1162 block: Block,
1163 parsed: ParsedHeader,
1164 mut pending: PendingGnu,
1165 ) -> Result<Frame, FrameError> {
1166 let extension = match parsed.typeflag {
1167 b'L' => Some(GnuKind::LongName),
1168 b'K' => Some(GnuKind::LongLink),
1169 _ => None,
1170 };
1171 if let Some(kind) = extension {
1172 let already_seen = match kind {
1173 GnuKind::LongName => &mut pending.long_name,
1174 GnuKind::LongLink => &mut pending.long_link,
1175 };
1176 if *already_seen {
1177 return Err(FrameError::unexpected_order(
1178 position,
1179 "ordinary GNU member header or the other GNU metadata extension",
1180 "duplicate GNU metadata extension",
1181 ));
1182 }
1183 if parsed.size > self.max_gnu_extension_size {
1184 return Err(FrameError::at(
1185 position,
1186 FrameErrorInner::ExtensionTooLarge {
1187 format: ArchiveFormat::Gnu,
1188 size: parsed.size,
1189 limit: self.max_gnu_extension_size,
1190 },
1191 ));
1192 }
1193 *already_seen = true;
1194 self.state = if parsed.size == 0 {
1195 State::AwaitingGnuMember { pending }
1196 } else {
1197 State::ReadingGnu {
1198 kind,
1199 remaining: parsed.size,
1200 pending,
1201 }
1202 };
1203 return Ok(Frame::Gnu(GnuFrame {
1204 position,
1205 block,
1206 kind,
1207 payload_size: parsed.size,
1208 }));
1209 }
1210
1211 let frame = HeaderFrame::gnu(
1212 position,
1213 block,
1214 parsed.typeflag,
1215 parsed.size,
1216 pending.long_link,
1217 self.allow_all_nul_numeric_fields,
1218 )?;
1219 self.state = member_payload_state(frame.effective_size);
1220 Ok(Frame::Header(frame))
1221 }
1222
1223 fn poll_next_frame(
1224 &mut self,
1225 context: &mut Context<'_>,
1226 ) -> Poll<Result<Option<Frame>, FrameError>> {
1227 loop {
1228 if matches!(self.state, State::Complete | State::Failed) {
1229 return Poll::Ready(Ok(None));
1230 }
1231
1232 let (position, block) = match self.poll_read_block(context) {
1233 Poll::Pending => return Poll::Pending,
1234 Poll::Ready(Ok(Some(block))) => block,
1235 Poll::Ready(Ok(None)) => {
1236 let error = self.handle_eof();
1237 self.state = State::Failed;
1238 return Poll::Ready(Err(error));
1239 }
1240 Poll::Ready(Err(error)) => {
1241 self.state = State::Failed;
1242 return Poll::Ready(Err(error));
1243 }
1244 };
1245
1246 match self.process_block(position, block) {
1247 Ok(Some(frame)) => return Poll::Ready(Ok(Some(frame))),
1248 Ok(None) => continue,
1249 Err(error) => {
1250 self.state = State::Failed;
1251 return Poll::Ready(Err(error));
1252 }
1253 }
1254 }
1255 }
1256}
1257
1258struct ParsedHeader {
1259 format: ArchiveFormat,
1260 typeflag: u8,
1261 size: u64,
1262}
1263
1264trait TryFromFramed<T>: Sized {
1267 fn try_from_framed(position: u64, source: T) -> Result<Self, FrameError>;
1268}
1269
1270fn is_zero_block(block: &Block) -> bool {
1271 block.iter().all(|byte| *byte == 0)
1272}
1273
1274fn trim_nul(bytes: &[u8]) -> &[u8] {
1275 let end = bytes
1276 .iter()
1277 .position(|byte| *byte == 0)
1278 .unwrap_or(bytes.len());
1279 &bytes[..end]
1280}
1281
1282fn member_payload_state(remaining: u64) -> State {
1283 if remaining == 0 {
1284 State::AwaitingHeader
1285 } else {
1286 State::ReadingMember { remaining }
1287 }
1288}
1289
1290fn checked_position(position: u64, len: usize) -> Result<u64, FrameError> {
1291 let len = u64::try_from(len)
1292 .map_err(|_| FrameError::arithmetic_overflow(position, "stream position"))?;
1293 position
1294 .checked_add(len)
1295 .ok_or_else(|| FrameError::arithmetic_overflow(position, "stream position"))
1296}
1297
1298impl TryFromFramed<&Block> for ParsedHeader {
1299 fn try_from_framed(position: u64, block: &Block) -> Result<Self, FrameError> {
1300 let format = match &block[IDENTITY_RANGE] {
1301 identity if identity == USTAR_IDENTITY => ArchiveFormat::Pax,
1302 identity if identity == GNU_IDENTITY => ArchiveFormat::Gnu,
1303 identity => {
1304 return Err(FrameError::at(
1305 position,
1306 FrameErrorInner::InvalidIdentity {
1307 found: identity.try_into().expect("fixed header range"),
1308 },
1309 ));
1310 }
1311 };
1312
1313 let actual_checksum = checksum(block);
1314 let expected_checksum = parse_octal(&block[CHECKSUM_RANGE]);
1315 if expected_checksum != Some(actual_checksum) {
1316 return Err(FrameError::at(
1317 position,
1318 FrameErrorInner::InvalidChecksum {
1319 expected: expected_checksum,
1320 actual: actual_checksum,
1321 },
1322 ));
1323 }
1324
1325 let size_bytes: [u8; 12] = block[SIZE_RANGE].try_into().expect("fixed header range");
1326 let size = parse_number(format, &size_bytes).ok_or_else(|| {
1327 FrameError::at(position, FrameErrorInner::InvalidSize { found: size_bytes })
1328 })?;
1329
1330 Ok(Self {
1331 format,
1332 typeflag: block[TYPEFLAG_OFFSET],
1333 size,
1334 })
1335 }
1336}
1337
1338impl TryFromFramed<u8> for UstarKind {
1339 fn try_from_framed(position: u64, typeflag: u8) -> Result<Self, FrameError> {
1340 match typeflag {
1341 0 | b'0' => Ok(Self::Regular),
1342 b'1' => Ok(Self::HardLink),
1343 b'2' => Ok(Self::SymbolicLink),
1344 b'3' => Ok(Self::CharacterDevice),
1345 b'4' => Ok(Self::BlockDevice),
1346 b'5' => Ok(Self::Directory),
1347 b'6' => Ok(Self::Fifo),
1348 b'7' => Ok(Self::Contiguous),
1349 _ => Err(FrameError::at(
1350 position,
1351 FrameErrorInner::UnsupportedTypeflag { typeflag },
1352 )),
1353 }
1354 }
1355}
1356
1357fn validate_pax_member_size(
1358 position: u64,
1359 kind: UstarKind,
1360 declared_size: u64,
1361 effective_size: u64,
1362) -> Result<(), FrameError> {
1363 match kind {
1364 UstarKind::Regular | UstarKind::HardLink | UstarKind::Contiguous => Ok(()),
1369 UstarKind::SymbolicLink
1370 | UstarKind::CharacterDevice
1371 | UstarKind::BlockDevice
1372 | UstarKind::Directory
1373 | UstarKind::Fifo => {
1374 validate_payload_free_size(position, kind, declared_size)?;
1381 validate_payload_free_size(position, kind, effective_size)
1382 }
1383 }
1384}
1385
1386fn validate_gnu_member_size(position: u64, kind: UstarKind, size: u64) -> Result<(), FrameError> {
1387 match kind {
1388 UstarKind::Regular | UstarKind::Contiguous => Ok(()),
1389 UstarKind::HardLink
1390 | UstarKind::SymbolicLink
1391 | UstarKind::CharacterDevice
1392 | UstarKind::BlockDevice
1393 | UstarKind::Directory
1394 | UstarKind::Fifo => validate_payload_free_size(position, kind, size),
1395 }
1396}
1397
1398fn validate_payload_free_size(position: u64, kind: UstarKind, size: u64) -> Result<(), FrameError> {
1399 if size == 0 {
1400 Ok(())
1401 } else {
1402 Err(FrameError::at(
1403 position,
1404 FrameErrorInner::InvalidMemberSize { kind, size },
1405 ))
1406 }
1407}
1408
1409#[cfg(test)]
1410mod tests {
1411 use std::{
1412 cell::Cell,
1413 pin::Pin,
1414 rc::Rc,
1415 task::{Context, Poll},
1416 };
1417
1418 use tokio::io::ReadBuf;
1419
1420 use super::*;
1421 use crate::{
1422 ArchiveFormat, FrameError, FrameErrorInner, HdrCharset, PaxString, PaxValue,
1423 header::{DEVMAJOR_RANGE, DEVMINOR_RANGE},
1424 test_support::{
1425 ChunkedReader, append_block, append_gnu, append_pax, append_payload, append_terminator,
1426 collect_frames, gnu_base256_header, gnu_header, header, ready, record, set_checksum,
1427 },
1428 };
1429
1430 fn collect(bytes: Vec<u8>, max_chunk: usize) -> Vec<Result<Frame, FrameError>> {
1431 ready(collect_frames(TarStream::new(ChunkedReader::new(
1432 bytes, max_chunk,
1433 ))))
1434 }
1435
1436 fn collect_with_max_pax_extension_size(
1437 bytes: Vec<u8>,
1438 max_chunk: usize,
1439 max_pax_extension_size: u64,
1440 ) -> Vec<Result<Frame, FrameError>> {
1441 let mut stream = TarStream::new(ChunkedReader::new(bytes, max_chunk));
1442 stream.set_max_pax_extension_size(max_pax_extension_size);
1443 ready(collect_frames(stream))
1444 }
1445
1446 fn header_frame(frames: &[Result<Frame, FrameError>], index: usize) -> &HeaderFrame {
1447 let Ok(Frame::Header(frame)) = &frames[index] else {
1448 panic!("expected header frame");
1449 };
1450 frame
1451 }
1452
1453 fn data_frame(frames: &[Result<Frame, FrameError>], index: usize) -> &DataFrame {
1454 let Ok(Frame::Data(frame)) = &frames[index] else {
1455 panic!("expected data frame");
1456 };
1457 frame
1458 }
1459
1460 fn last_error(frames: &[Result<Frame, FrameError>]) -> &FrameError {
1461 frames
1462 .last()
1463 .expect("stream should emit an item")
1464 .as_ref()
1465 .expect_err("last item should be an error")
1466 }
1467
1468 fn last_error_inner(frames: &[Result<Frame, FrameError>]) -> &FrameErrorInner {
1469 &last_error(frames).inner
1470 }
1471
1472 struct CountingReader {
1473 bytes: Vec<u8>,
1474 position: usize,
1475 consumed: Rc<Cell<usize>>,
1476 }
1477
1478 impl AsyncRead for CountingReader {
1479 fn poll_read(
1480 mut self: Pin<&mut Self>,
1481 _context: &mut Context<'_>,
1482 buffer: &mut ReadBuf<'_>,
1483 ) -> Poll<std::io::Result<()>> {
1484 let len = buffer
1485 .remaining()
1486 .min(self.bytes.len().saturating_sub(self.position));
1487 let end = self.position + len;
1488 buffer.put_slice(&self.bytes[self.position..end]);
1489 self.position = end;
1490 self.consumed.set(self.consumed.get() + len);
1491 Poll::Ready(Ok(()))
1492 }
1493 }
1494
1495 #[derive(Clone, Copy)]
1496 enum ExpectedHeaderError {
1497 InvalidIdentity,
1498 InvalidChecksum,
1499 InvalidSize,
1500 InvalidNumericField(&'static str),
1501 UnterminatedUstarStringField(&'static str),
1502 UnsupportedTypeflag(u8),
1503 }
1504
1505 impl ExpectedHeaderError {
1506 fn matches(self, error: &FrameErrorInner) -> bool {
1507 match (self, error) {
1508 (Self::InvalidIdentity, FrameErrorInner::InvalidIdentity { .. })
1509 | (Self::InvalidChecksum, FrameErrorInner::InvalidChecksum { .. })
1510 | (Self::InvalidSize, FrameErrorInner::InvalidSize { .. }) => true,
1511 (
1512 Self::InvalidNumericField(field),
1513 FrameErrorInner::InvalidNumericField { field: found, .. },
1514 )
1515 | (
1516 Self::UnterminatedUstarStringField(field),
1517 FrameErrorInner::UnterminatedUstarStringField { field: found },
1518 ) => field == *found,
1519 (
1520 Self::UnsupportedTypeflag(typeflag),
1521 FrameErrorInner::UnsupportedTypeflag { typeflag: found },
1522 ) => typeflag == *found,
1523 _ => false,
1524 }
1525 }
1526 }
1527
1528 fn checksummed_header(mutate: impl FnOnce(&mut Block)) -> Block {
1529 let mut block = header(b'0', 0);
1530 mutate(&mut block);
1531 set_checksum(&mut block);
1532 block
1533 }
1534
1535 fn invalid_header_cases() -> Vec<(&'static str, Block, ExpectedHeaderError)> {
1536 let mut bad_magic = header(b'0', 0);
1537 bad_magic[IDENTITY_RANGE.start] = b'g';
1538 let mut bad_version = header(b'0', 0);
1539 bad_version[IDENTITY_RANGE.end - 2..IDENTITY_RANGE.end].copy_from_slice(b" ");
1540 let mut bad_checksum = header(b'0', 0);
1541 bad_checksum[0] = b'X';
1542
1543 vec![
1544 ("magic", bad_magic, ExpectedHeaderError::InvalidIdentity),
1545 ("version", bad_version, ExpectedHeaderError::InvalidIdentity),
1546 (
1547 "checksum",
1548 bad_checksum,
1549 ExpectedHeaderError::InvalidChecksum,
1550 ),
1551 (
1552 "octal size",
1553 checksummed_header(|block| {
1554 block[SIZE_RANGE].copy_from_slice(b"00000000008\0");
1555 }),
1556 ExpectedHeaderError::InvalidSize,
1557 ),
1558 (
1559 "base256 size",
1560 checksummed_header(|block| block[SIZE_RANGE.start] = 0x80),
1561 ExpectedHeaderError::InvalidSize,
1562 ),
1563 (
1564 "octal mode",
1565 checksummed_header(|block| {
1566 block[MODE_RANGE].copy_from_slice(b"0000080\0");
1567 }),
1568 ExpectedHeaderError::InvalidNumericField("mode"),
1569 ),
1570 (
1571 "uid",
1572 checksummed_header(|block| {
1573 block[UID_RANGE].copy_from_slice(b"invalid\0");
1574 }),
1575 ExpectedHeaderError::InvalidNumericField("uid"),
1576 ),
1577 (
1578 "gid",
1579 checksummed_header(|block| block[GID_RANGE.start] = b'8'),
1580 ExpectedHeaderError::InvalidNumericField("gid"),
1581 ),
1582 (
1583 "mtime",
1584 checksummed_header(|block| {
1585 block[MTIME_RANGE].copy_from_slice(b"00000000008\0");
1586 }),
1587 ExpectedHeaderError::InvalidNumericField("mtime"),
1588 ),
1589 (
1590 "uname",
1591 checksummed_header(|block| block[UNAME_RANGE].fill(b'u')),
1592 ExpectedHeaderError::UnterminatedUstarStringField("uname"),
1593 ),
1594 (
1595 "gname",
1596 checksummed_header(|block| block[GNAME_RANGE].fill(b'g')),
1597 ExpectedHeaderError::UnterminatedUstarStringField("gname"),
1598 ),
1599 (
1600 "POSIX typeflag",
1601 header(b'X', 0),
1602 ExpectedHeaderError::UnsupportedTypeflag(b'X'),
1603 ),
1604 (
1605 "GNU typeflag",
1606 header(b'L', 0),
1607 ExpectedHeaderError::UnsupportedTypeflag(b'L'),
1608 ),
1609 ]
1610 }
1611
1612 #[test]
1613 fn frames_bare_member_across_fragmented_reads() {
1614 let mut bytes = Vec::new();
1615 append_block(&mut bytes, &header(b'0', 513));
1616 append_payload(&mut bytes, &[b'a'; BLOCK_SIZE]);
1617 append_payload(&mut bytes, b"b");
1618 append_terminator(&mut bytes);
1619
1620 let frames = collect(bytes, 7);
1621 assert_eq!(frames.len(), 3);
1622 let header = header_frame(&frames, 0);
1623 assert_eq!(header.kind, UstarKind::Regular);
1624 assert_eq!(header.declared_size, 513);
1625 assert_eq!(header.effective_size, 513);
1626 let first = data_frame(&frames, 1);
1627 let last = data_frame(&frames, 2);
1628 assert_eq!(first.len, BLOCK_SIZE);
1629 assert_eq!(last.len, 1);
1630 assert_eq!(last.owner, DataOwner::Member);
1631 assert!(first.completed_pax_records().is_none());
1632 assert!(last.completed_pax_records().is_none());
1633 }
1634
1635 #[test]
1636 fn frames_multiblock_pax_records_and_applies_size_override() {
1637 let mut payload = record("comment", &"x".repeat(BLOCK_SIZE));
1638 payload.extend_from_slice(&record("size", "513"));
1639 assert!(payload.len() > BLOCK_SIZE);
1640
1641 let mut bytes = Vec::new();
1642 append_pax(&mut bytes, b'x', &payload);
1643 append_block(&mut bytes, &header(b'0', 1));
1644 append_payload(&mut bytes, &[b'a'; BLOCK_SIZE]);
1645 append_payload(&mut bytes, b"b");
1646 append_terminator(&mut bytes);
1647
1648 let frames = collect(bytes, 19);
1649 assert_eq!(frames.len(), 6);
1650 let Frame::Pax(pax) = frames[0].as_ref().unwrap() else {
1651 panic!("expected pax header");
1652 };
1653 assert_eq!(pax.kind, PaxKind::Local);
1654 assert_eq!(pax.payload_size, payload.len() as u64);
1655 let first_pax_data = data_frame(&frames, 1);
1656 assert_eq!(first_pax_data.owner, DataOwner::Pax(PaxKind::Local));
1657 assert!(first_pax_data.completed_pax_records().is_none());
1658 let final_pax_data = data_frame(&frames, 2);
1659 assert_eq!(final_pax_data.owner, DataOwner::Pax(PaxKind::Local));
1660 assert_eq!(
1661 final_pax_data
1662 .completed_pax_records()
1663 .and_then(|records| records.last()),
1664 Some(&PaxRecord::Size(PaxValue::Value(513)))
1665 );
1666 let header = header_frame(&frames, 3);
1667 assert_eq!(header.declared_size, 1);
1668 assert_eq!(header.effective_size, 513);
1669 let last = data_frame(&frames, 5);
1670 assert_eq!(last.len, 1);
1671 }
1672
1673 #[test]
1674 fn rejects_oversized_pax_extensions_before_consuming_payload() {
1675 let mut payload = record("comment", "metadata");
1676 payload.extend_from_slice(&record("mtime", "1"));
1677 let declared_size = u64::try_from(payload.len()).expect("payload size should fit u64");
1678 for (case, typeflag) in [("local", b'x'), ("global", b'g')] {
1679 let mut bytes = Vec::new();
1680 append_pax(&mut bytes, typeflag, &payload);
1681 let frames = collect_with_max_pax_extension_size(bytes, BLOCK_SIZE, declared_size - 1);
1682 assert_eq!(frames.len(), 1, "{case}");
1683 assert!(matches!(
1684 last_error(&frames),
1685 FrameError {
1686 position: 0,
1687 inner: FrameErrorInner::ExtensionTooLarge {
1688 format: ArchiveFormat::Pax,
1689 size,
1690 limit,
1691 },
1692 } if *size == declared_size && *limit == declared_size - 1
1693 ));
1694 }
1695
1696 let frames = collect(
1697 header(b'x', DEFAULT_MAX_PAX_EXTENSION_SIZE + 1).to_vec(),
1698 BLOCK_SIZE,
1699 );
1700 assert_eq!(frames.len(), 1);
1701 assert!(matches!(
1702 last_error(&frames),
1703 FrameError {
1704 position: 0,
1705 inner: FrameErrorInner::ExtensionTooLarge {
1706 format: ArchiveFormat::Pax,
1707 size,
1708 limit: DEFAULT_MAX_PAX_EXTENSION_SIZE,
1709 },
1710 } if *size == DEFAULT_MAX_PAX_EXTENSION_SIZE + 1
1711 ));
1712 }
1713
1714 #[test]
1715 fn oversized_pax_extension_does_not_read_its_payload_block() {
1716 let mut bytes = header(b'x', 1).to_vec();
1717 bytes.resize(BLOCK_SIZE * 2, 0);
1718 let consumed = Rc::new(Cell::new(0));
1719 let reader = CountingReader {
1720 bytes,
1721 position: 0,
1722 consumed: Rc::clone(&consumed),
1723 };
1724 let mut stream = TarStream::new(reader);
1725 stream.set_max_pax_extension_size(0);
1726
1727 assert!(matches!(
1728 ready(stream.next_frame()),
1729 Err(FrameError {
1730 position: 0,
1731 inner: FrameErrorInner::ExtensionTooLarge {
1732 format: ArchiveFormat::Pax,
1733 size: 1,
1734 limit: 0,
1735 },
1736 })
1737 ));
1738 assert_eq!(consumed.get(), BLOCK_SIZE);
1739 }
1740
1741 #[test]
1742 fn accepts_pax_extensions_at_the_configured_limit() {
1743 let mut payload = record("comment", "metadata");
1744 payload.extend_from_slice(&record("ACME.attribute", "value"));
1745 for (case, typeflag) in [("local", b'x'), ("global", b'g')] {
1746 let mut bytes = Vec::new();
1747 append_pax(&mut bytes, typeflag, &payload);
1748 if typeflag == b'x' {
1749 append_block(&mut bytes, &header(b'0', 0));
1750 }
1751 append_terminator(&mut bytes);
1752
1753 let frames = collect_with_max_pax_extension_size(
1754 bytes,
1755 7,
1756 payload
1757 .len()
1758 .try_into()
1759 .expect("payload size should fit u64"),
1760 );
1761 assert!(frames.iter().all(Result::is_ok), "{case}");
1762 }
1763 }
1764
1765 #[test]
1766 fn applies_global_pax_records_overrides_and_rejects_size_deletions() {
1767 let mut initial_global = record("comment", "old");
1768 initial_global.extend_from_slice(&record("size", "2"));
1769 let replacement_global = record("comment", "new");
1770 let mut local = record("comment", "local");
1771 local.extend_from_slice(&record("size", "3"));
1772 let mut deletion = record("comment", "");
1773 deletion.extend_from_slice(&record("size", ""));
1774
1775 let mut bytes = Vec::new();
1776 append_pax(&mut bytes, b'g', &initial_global);
1777 append_pax(&mut bytes, b'g', &replacement_global);
1778 append_block(&mut bytes, &header(b'0', 1));
1779 append_payload(&mut bytes, b"ab");
1780 append_pax(&mut bytes, b'x', &local);
1781 append_block(&mut bytes, &header(b'0', 1));
1782 append_payload(&mut bytes, b"abc");
1783 append_pax(&mut bytes, b'g', &deletion);
1784 append_block(&mut bytes, &header(b'5', 1));
1785 append_terminator(&mut bytes);
1786
1787 let frames = collect(bytes, 31);
1788 assert!(frames.iter().any(|frame| matches!(
1789 frame,
1790 Ok(Frame::Pax(PaxFrame {
1791 kind: PaxKind::Global,
1792 ..
1793 }))
1794 )));
1795 assert!(frames.iter().any(|frame| matches!(
1796 frame,
1797 Ok(Frame::Data(DataFrame {
1798 owner: DataOwner::Pax(PaxKind::Global),
1799 ..
1800 }))
1801 )));
1802 let completed_global_payloads: Vec<&[PaxRecord]> = frames
1803 .iter()
1804 .filter_map(|frame| match frame {
1805 Ok(Frame::Data(frame)) if frame.owner == DataOwner::Pax(PaxKind::Global) => {
1806 frame.completed_pax_records()
1807 }
1808 _ => None,
1809 })
1810 .collect();
1811 assert_eq!(completed_global_payloads.len(), 3);
1812 assert_eq!(
1813 completed_global_payloads[2],
1814 [
1815 PaxRecord::Comment(PaxValue::Deleted),
1816 PaxRecord::Size(PaxValue::Deleted),
1817 ]
1818 );
1819 let headers: Vec<&HeaderFrame> = frames
1820 .iter()
1821 .filter_map(|frame| match frame {
1822 Ok(Frame::Header(header)) => Some(header),
1823 _ => None,
1824 })
1825 .collect();
1826 assert_eq!(headers.len(), 2);
1827 assert_eq!(headers[0].effective_size, 2);
1828 assert_eq!(headers[1].effective_size, 3);
1829 assert!(frames.iter().any(|frame| {
1830 matches!(
1831 frame,
1832 Ok(Frame::Data(frame))
1833 if frame.owner == DataOwner::Pax(PaxKind::Local)
1834 && frame.completed_pax_records() == Some(local_records("local", 3).as_slice())
1835 )
1836 }));
1837 assert!(matches!(
1838 last_error_inner(&frames),
1839 FrameErrorInner::DeletedPaxMetadata { keyword: "size" }
1840 ));
1841 }
1842
1843 fn local_records(comment: &str, size: u64) -> Vec<PaxRecord> {
1844 vec![
1845 PaxRecord::Comment(PaxValue::Value(comment.into())),
1846 PaxRecord::Size(PaxValue::Value(size)),
1847 ]
1848 }
1849
1850 #[test]
1851 fn allows_local_size_deletion_when_a_later_record_restores_size() {
1852 let mut local = record("size", "");
1853 local.extend_from_slice(&record("size", "2"));
1854 let mut bytes = Vec::new();
1855 append_pax(&mut bytes, b'x', &local);
1856 append_block(&mut bytes, &header(b'0', 1));
1857 append_payload(&mut bytes, b"ab");
1858 append_terminator(&mut bytes);
1859
1860 let frames = collect(bytes, BLOCK_SIZE);
1861 let header = header_frame(&frames, 2);
1862 assert_eq!(header.effective_size, 2);
1863 assert_eq!(
1864 data_frame(&frames, 1).completed_pax_records(),
1865 Some(
1866 [
1867 PaxRecord::Size(PaxValue::Deleted),
1868 PaxRecord::Size(PaxValue::Value(2)),
1869 ]
1870 .as_slice()
1871 )
1872 );
1873 }
1874
1875 #[test]
1876 fn pax_records_do_not_make_malformed_ordinary_header_fields_valid() {
1877 let cases = [
1878 (
1879 "local uid",
1880 b'x',
1881 record("uid", "1"),
1882 checksummed_header(|block| block[UID_RANGE].fill(b'u')),
1883 ExpectedHeaderError::InvalidNumericField("uid"),
1884 ),
1885 (
1886 "global gid",
1887 b'g',
1888 record("gid", "2"),
1889 checksummed_header(|block| block[GID_RANGE].fill(b'g')),
1890 ExpectedHeaderError::InvalidNumericField("gid"),
1891 ),
1892 (
1893 "local mtime",
1894 b'x',
1895 record("mtime", "3"),
1896 checksummed_header(|block| block[MTIME_RANGE].fill(b'm')),
1897 ExpectedHeaderError::InvalidNumericField("mtime"),
1898 ),
1899 (
1900 "global uname",
1901 b'g',
1902 record("uname", "user"),
1903 checksummed_header(|block| block[UNAME_RANGE].fill(b'u')),
1904 ExpectedHeaderError::UnterminatedUstarStringField("uname"),
1905 ),
1906 (
1907 "local gname",
1908 b'x',
1909 record("gname", "group"),
1910 checksummed_header(|block| block[GNAME_RANGE].fill(b'g')),
1911 ExpectedHeaderError::UnterminatedUstarStringField("gname"),
1912 ),
1913 ];
1914
1915 for (case, typeflag, records, malformed, expected) in cases {
1916 let mut bytes = Vec::new();
1917 append_pax(&mut bytes, typeflag, &records);
1918 append_block(&mut bytes, &malformed);
1919 append_terminator(&mut bytes);
1920
1921 let frames = collect(bytes, BLOCK_SIZE);
1922 assert!(
1923 expected.matches(last_error_inner(&frames)),
1924 "{case}: {frames:?}"
1925 );
1926 }
1927 }
1928
1929 #[test]
1930 fn accepts_all_nul_unused_device_fields() {
1931 let block = header(b'0', 0);
1932 assert_eq!(parse_octal(&block[DEVMAJOR_RANGE]), None);
1933 assert_eq!(parse_octal(&block[DEVMINOR_RANGE]), None);
1934
1935 let mut bytes = Vec::new();
1936 append_block(&mut bytes, &block);
1937 append_terminator(&mut bytes);
1938 assert!(collect(bytes, BLOCK_SIZE).iter().all(Result::is_ok));
1939 }
1940
1941 #[test]
1942 fn rejects_local_size_deletion_for_payload_free_members() {
1943 let global = record("size", "7");
1944 let local = record("size", "");
1945 let mut bytes = Vec::new();
1946 append_pax(&mut bytes, b'g', &global);
1947 append_pax(&mut bytes, b'x', &local);
1948 append_block(&mut bytes, &header(b'5', 3));
1949 append_terminator(&mut bytes);
1950
1951 assert!(matches!(
1952 last_error_inner(&collect(bytes, BLOCK_SIZE)),
1953 FrameErrorInner::DeletedPaxMetadata { keyword: "size" }
1954 ));
1955 }
1956
1957 #[test]
1958 fn rejects_deleted_size_when_member_payload_cannot_be_framed() {
1959 let records = record("size", "");
1960 for typeflag in [b'x', b'g'] {
1961 let mut bytes = Vec::new();
1962 append_pax(&mut bytes, typeflag, &records);
1963 append_block(&mut bytes, &header(b'0', 0));
1964
1965 assert!(
1966 matches!(
1967 last_error_inner(&collect(bytes, BLOCK_SIZE)),
1968 FrameErrorInner::DeletedPaxMetadata { keyword: "size" }
1969 ),
1970 "{typeflag:?}"
1971 );
1972 }
1973 }
1974
1975 #[test]
1976 fn allows_local_size_to_restore_an_active_global_deletion() {
1977 let global = record("size", "");
1978 let local = record("size", "2");
1979 let mut bytes = Vec::new();
1980 append_pax(&mut bytes, b'g', &global);
1981 append_pax(&mut bytes, b'x', &local);
1982 append_block(&mut bytes, &header(b'0', 1));
1983 append_payload(&mut bytes, b"ab");
1984 append_terminator(&mut bytes);
1985
1986 let frames = collect(bytes, BLOCK_SIZE);
1987 let header = header_frame(&frames, 4);
1988 assert_eq!(header.effective_size, 2);
1989 assert_eq!(
1990 data_frame(&frames, 1).completed_pax_records(),
1991 Some([PaxRecord::Size(PaxValue::Deleted)].as_slice())
1992 );
1993 assert_eq!(
1994 data_frame(&frames, 3).completed_pax_records(),
1995 Some([PaxRecord::Size(PaxValue::Value(2))].as_slice())
1996 );
1997 }
1998
1999 #[test]
2000 fn frames_pax_hard_link_bodies_from_header_or_size_override() {
2001 for (case, declared_size, override_size, header_index, data_index) in [
2002 ("physical size", 3, None, 0, 1),
2003 ("pax size", 0, Some("3"), 2, 3),
2004 ("pax size overrides physical size", 1, Some("3"), 2, 3),
2005 ] {
2006 let mut bytes = Vec::new();
2007 if let Some(override_size) = override_size {
2008 append_pax(&mut bytes, b'x', &record("size", override_size));
2009 }
2010 append_block(&mut bytes, &header(b'1', declared_size));
2011 append_payload(&mut bytes, b"abc");
2012 append_terminator(&mut bytes);
2013
2014 let frames = collect(bytes, BLOCK_SIZE);
2015 let header = header_frame(&frames, header_index);
2016 assert_eq!(header.format, ArchiveFormat::Pax, "{case}");
2017 assert_eq!(header.kind, UstarKind::HardLink, "{case}");
2018 assert_eq!(header.declared_size, declared_size, "{case}");
2019 assert_eq!(header.effective_size, 3, "{case}");
2020 assert_eq!(data_frame(&frames, data_index).len, 3, "{case}");
2021 }
2022 }
2023
2024 #[test]
2025 fn zero_data_block_is_not_a_terminator() {
2026 let mut bytes = Vec::new();
2027 append_block(&mut bytes, &header(b'0', BLOCK_SIZE as u64));
2028 append_block(&mut bytes, &[0; BLOCK_SIZE]);
2029 append_terminator(&mut bytes);
2030
2031 let frames = collect(bytes, BLOCK_SIZE);
2032 assert_eq!(frames.len(), 2);
2033 assert!(matches!(frames[1], Ok(Frame::Data(_))));
2034 }
2035
2036 #[test]
2037 fn zero_filled_block_inside_pax_payload_is_data() {
2038 let payload = record("comment", &"\0".repeat(BLOCK_SIZE * 3));
2039 let mut bytes = Vec::new();
2040 append_pax(&mut bytes, b'x', &payload);
2041 append_block(&mut bytes, &header(b'0', 0));
2042 append_terminator(&mut bytes);
2043
2044 let frames = collect(bytes, BLOCK_SIZE);
2045 assert!(frames.iter().any(|frame| matches!(
2046 frame,
2047 Ok(Frame::Data(DataFrame {
2048 block,
2049 owner: DataOwner::Pax(PaxKind::Local),
2050 ..
2051 })) if is_zero_block(block)
2052 )));
2053 }
2054
2055 #[test]
2056 fn frames_gnu_long_metadata_and_base256_payloads() {
2057 let mut bytes = Vec::new();
2058 append_block(&mut bytes, &gnu_base256_header(b'L', 513));
2059 append_payload(&mut bytes, &[b'n'; BLOCK_SIZE]);
2060 append_payload(&mut bytes, b"\0");
2061 append_gnu(&mut bytes, b'K', b"link\0");
2062 append_block(&mut bytes, &gnu_header(b'2', 0));
2063 append_terminator(&mut bytes);
2064
2065 let frames = collect(bytes, 13);
2066 assert_eq!(frames.len(), 6);
2067 assert!(matches!(
2068 frames[0].as_ref().unwrap(),
2069 Frame::Gnu(GnuFrame {
2070 kind: GnuKind::LongName,
2071 payload_size: 513,
2072 ..
2073 })
2074 ));
2075 let final_name = data_frame(&frames, 2);
2076 assert_eq!(final_name.owner, DataOwner::Gnu(GnuKind::LongName));
2077 assert_eq!(final_name.len, 1);
2078 assert!(final_name.completed_pax_records().is_none());
2079 assert!(matches!(
2080 frames[3].as_ref().unwrap(),
2081 Frame::Gnu(GnuFrame {
2082 kind: GnuKind::LongLink,
2083 ..
2084 })
2085 ));
2086 let header = header_frame(&frames, 5);
2087 assert_eq!(header.kind, UstarKind::SymbolicLink);
2088 }
2089
2090 #[test]
2091 fn rejects_header_format_type_and_field_errors() {
2092 for (case, block, expected) in invalid_header_cases() {
2093 let frames = collect(block.to_vec(), BLOCK_SIZE);
2094 let error = last_error_inner(&frames);
2095 assert!(expected.matches(error), "{case}: {error:?}");
2096 }
2097 }
2098
2099 #[test]
2100 fn rejects_malformed_gnu_numeric_fields() {
2101 for (field, range) in [
2102 ("mode", MODE_RANGE),
2103 ("uid", UID_RANGE),
2104 ("gid", GID_RANGE),
2105 ("mtime", MTIME_RANGE),
2106 ] {
2107 let mut block = gnu_header(b'0', 0);
2108 block[range].fill(b'x');
2109 set_checksum(&mut block);
2110
2111 assert!(matches!(
2112 last_error_inner(&collect(block.to_vec(), BLOCK_SIZE)),
2113 FrameErrorInner::InvalidNumericField { field: found, .. } if *found == field
2114 ));
2115 }
2116 }
2117
2118 #[test]
2119 fn rejects_nonzero_physical_sizes_for_payload_free_members() {
2120 for (format, block, kind) in [
2121 (ArchiveFormat::Pax, header(b'2', 1), UstarKind::SymbolicLink),
2122 (ArchiveFormat::Gnu, gnu_header(b'1', 1), UstarKind::HardLink),
2123 (
2124 ArchiveFormat::Gnu,
2125 gnu_header(b'2', 1),
2126 UstarKind::SymbolicLink,
2127 ),
2128 (
2129 ArchiveFormat::Pax,
2130 header(b'3', 1),
2131 UstarKind::CharacterDevice,
2132 ),
2133 (
2134 ArchiveFormat::Gnu,
2135 gnu_header(b'3', 1),
2136 UstarKind::CharacterDevice,
2137 ),
2138 (ArchiveFormat::Pax, header(b'4', 1), UstarKind::BlockDevice),
2139 (
2140 ArchiveFormat::Gnu,
2141 gnu_header(b'4', 1),
2142 UstarKind::BlockDevice,
2143 ),
2144 (ArchiveFormat::Pax, header(b'5', 1), UstarKind::Directory),
2145 (
2146 ArchiveFormat::Gnu,
2147 gnu_header(b'5', 1),
2148 UstarKind::Directory,
2149 ),
2150 (ArchiveFormat::Pax, header(b'6', 1), UstarKind::Fifo),
2151 (ArchiveFormat::Gnu, gnu_header(b'6', 1), UstarKind::Fifo),
2152 ] {
2153 let frames = collect(block.to_vec(), BLOCK_SIZE);
2154 assert!(
2155 matches!(
2156 last_error_inner(&frames),
2157 FrameErrorInner::InvalidMemberSize {
2158 kind: found,
2159 size: 1,
2160 } if *found == kind
2161 ),
2162 "{format:?} {kind:?}"
2163 );
2164 }
2165 }
2166
2167 #[test]
2168 fn rejects_nonzero_declared_or_effective_pax_sizes_for_payload_free_members() {
2169 for (case, declared_size, override_size) in [("effective", 0, "1"), ("declared", 1, "0")] {
2170 for (typeflag, kind) in [
2171 (b'2', UstarKind::SymbolicLink),
2172 (b'3', UstarKind::CharacterDevice),
2173 (b'4', UstarKind::BlockDevice),
2174 (b'5', UstarKind::Directory),
2175 (b'6', UstarKind::Fifo),
2176 ] {
2177 let mut bytes = Vec::new();
2178 append_pax(&mut bytes, b'x', &record("size", override_size));
2179 append_block(&mut bytes, &header(typeflag, declared_size));
2180
2181 assert!(
2182 matches!(
2183 last_error_inner(&collect(bytes, BLOCK_SIZE)),
2184 FrameErrorInner::InvalidMemberSize {
2185 kind: found,
2186 size: 1,
2187 } if *found == kind
2188 ),
2189 "{case} {kind:?}"
2190 );
2191 }
2192 }
2193 }
2194
2195 #[test]
2196 fn header_errors_preserve_later_header_positions() {
2197 let position = BLOCK_SIZE as u64;
2198
2199 for (case, block, expected) in invalid_header_cases() {
2200 let mut bytes = Vec::new();
2201 append_block(&mut bytes, &header(b'0', 0));
2202 append_block(&mut bytes, &block);
2203 let frames = collect(bytes, BLOCK_SIZE);
2204 let error = last_error(&frames);
2205 assert_eq!(error.position, position, "{case}");
2206 assert!(expected.matches(&error.inner), "{case}: {error:?}");
2207 }
2208 }
2209
2210 #[test]
2211 fn rejects_invalid_pax_sequences() {
2212 assert!(matches!(
2213 last_error_inner(&collect(header(b'x', 0).to_vec(), BLOCK_SIZE)),
2214 FrameErrorInner::InvalidPaxRecord {
2215 source: PaxError::InvalidRecords { .. },
2216 }
2217 ));
2218
2219 let valid = record("path", "name");
2220 let mut consecutive = Vec::new();
2221 append_pax(&mut consecutive, b'x', &valid);
2222 append_block(&mut consecutive, &header(b'x', valid.len() as u64));
2223 assert!(matches!(
2224 last_error_inner(&collect(consecutive, BLOCK_SIZE)),
2225 FrameErrorInner::UnexpectedOrder { .. }
2226 ));
2227
2228 let mut missing_member = Vec::new();
2229 append_pax(&mut missing_member, b'x', &valid);
2230 assert!(matches!(
2231 last_error_inner(&collect(missing_member, BLOCK_SIZE)),
2232 FrameErrorInner::UnexpectedEof { .. }
2233 ));
2234 }
2235
2236 #[test]
2237 fn preserves_pax_parse_error_positions_in_stream() {
2238 let invalid = record("size", "bad");
2239 let mut bytes = Vec::new();
2240 append_block(&mut bytes, &header(b'0', 0));
2241 append_pax(&mut bytes, b'x', &invalid);
2242
2243 let frames = collect(bytes, BLOCK_SIZE);
2244 assert!(matches!(
2245 frames.last(),
2246 Some(Err(FrameError {
2247 position,
2248 inner: FrameErrorInner::InvalidPaxRecord {
2249 source: PaxError::InvalidInteger { .. },
2250 },
2251 })) if *position == BLOCK_SIZE as u64
2252 ));
2253 }
2254
2255 #[test]
2256 fn accepts_binary_and_rejects_unknown_pax_charsets() {
2257 let mut global = record("hdrcharset", "BINARY");
2258 global.extend_from_slice(&record("path", "global"));
2259 let local = record("path", "local");
2260 let mut bytes = Vec::new();
2261 append_pax(&mut bytes, b'g', &global);
2262 append_pax(&mut bytes, b'x', &local);
2263 append_block(&mut bytes, &header(b'0', 0));
2264 append_terminator(&mut bytes);
2265 let frames = collect(bytes, BLOCK_SIZE);
2266 let member_header = header_frame(&frames, 4);
2267 assert_eq!(member_header.kind, UstarKind::Regular);
2268 assert_eq!(
2269 data_frame(&frames, 1).completed_pax_records(),
2270 Some(
2271 [
2272 PaxRecord::HdrCharset(PaxValue::Value(HdrCharset::Binary)),
2273 PaxRecord::Path(PaxValue::Value(PaxString::Binary(
2274 b"global".to_vec().into(),
2275 ))),
2276 ]
2277 .as_slice()
2278 )
2279 );
2280 assert_eq!(
2281 data_frame(&frames, 3).completed_pax_records(),
2282 Some(
2283 [PaxRecord::Path(PaxValue::Value(PaxString::Binary(
2284 b"local".to_vec().into()
2285 )))]
2286 .as_slice()
2287 )
2288 );
2289
2290 let records = record("hdrcharset", "ISO-IR 8859 1 1998");
2291 let mut bytes = Vec::new();
2292 append_pax(&mut bytes, b'x', &records);
2293 assert!(matches!(
2294 last_error_inner(&collect(bytes, BLOCK_SIZE)),
2295 FrameErrorInner::InvalidPaxRecord {
2296 source: PaxError::UnsupportedCharset { value },
2297 } if value == "ISO-IR 8859 1 1998"
2298 ));
2299 }
2300
2301 #[test]
2302 fn rejects_invalid_gnu_sequences_and_sizes() {
2303 let mut duplicate = Vec::new();
2304 append_block(&mut duplicate, &gnu_header(b'L', 0));
2305 append_block(&mut duplicate, &gnu_header(b'L', 0));
2306 let mut long_link_for_regular = Vec::new();
2307 append_block(&mut long_link_for_regular, &gnu_header(b'K', 0));
2308 append_block(&mut long_link_for_regular, &gnu_header(b'0', 0));
2309 let mut dangling = Vec::new();
2310 append_block(&mut dangling, &gnu_header(b'L', 0));
2311 append_terminator(&mut dangling);
2312 for (case, bytes) in [
2313 ("duplicate", duplicate),
2314 ("long-link-for-regular", long_link_for_regular),
2315 ("dangling", dangling),
2316 ] {
2317 assert!(
2318 matches!(
2319 last_error_inner(&collect(bytes, BLOCK_SIZE)),
2320 FrameErrorInner::UnexpectedOrder { .. }
2321 ),
2322 "{case}"
2323 );
2324 }
2325
2326 assert!(matches!(
2327 last_error_inner(&collect(gnu_header(b'S', 0).to_vec(), BLOCK_SIZE)),
2328 FrameErrorInner::UnsupportedTypeflag { typeflag: b'S' }
2329 ));
2330
2331 let mut negative_size = gnu_header(b'0', 0);
2332 negative_size[SIZE_RANGE].fill(0xff);
2333 set_checksum(&mut negative_size);
2334 assert!(matches!(
2335 last_error_inner(&collect(negative_size.to_vec(), BLOCK_SIZE)),
2336 FrameErrorInner::InvalidSize { .. }
2337 ));
2338 }
2339
2340 #[test]
2341 fn detects_one_archive_family_and_rejects_mixing() {
2342 let mut posix_then_gnu = Vec::new();
2343 append_block(&mut posix_then_gnu, &header(b'0', 0));
2344 append_block(&mut posix_then_gnu, &gnu_header(b'0', 0));
2345 assert!(matches!(
2346 last_error_inner(&collect(posix_then_gnu, BLOCK_SIZE)),
2347 FrameErrorInner::FormatMismatch {
2348 expected: ArchiveFormat::Pax,
2349 found: ArchiveFormat::Gnu,
2350 }
2351 ));
2352
2353 let mut malformed_gnu = gnu_header(b'0', 0);
2355 malformed_gnu[0] = b'X';
2356 let mut posix_then_malformed_gnu = Vec::new();
2357 append_block(&mut posix_then_malformed_gnu, &header(b'0', 0));
2358 append_block(&mut posix_then_malformed_gnu, &malformed_gnu);
2359 assert!(matches!(
2360 last_error_inner(&collect(posix_then_malformed_gnu, BLOCK_SIZE)),
2361 FrameErrorInner::InvalidChecksum { .. }
2362 ));
2363
2364 let mut gnu_then_posix = Vec::new();
2365 append_block(&mut gnu_then_posix, &gnu_header(b'0', 0));
2366 append_block(&mut gnu_then_posix, &header(b'0', 0));
2367 assert!(matches!(
2368 last_error_inner(&collect(gnu_then_posix, BLOCK_SIZE)),
2369 FrameErrorInner::FormatMismatch {
2370 expected: ArchiveFormat::Gnu,
2371 found: ArchiveFormat::Pax,
2372 }
2373 ));
2374
2375 for typeflag in [b'x', b'g'] {
2376 assert!(
2377 matches!(
2378 last_error_inner(&collect(gnu_header(typeflag, 0).to_vec(), BLOCK_SIZE)),
2379 FrameErrorInner::UnsupportedTypeflag { typeflag: found } if *found == typeflag
2380 ),
2381 "{typeflag:?}"
2382 );
2383 }
2384
2385 let mut empty = Vec::new();
2386 append_terminator(&mut empty);
2387 let mut stream = TarStream::new(ChunkedReader::new(empty, BLOCK_SIZE));
2388 assert!(matches!(ready(stream.next_frame()), Ok(None)));
2389 assert_eq!(stream.format(), None);
2390 }
2391
2392 #[test]
2393 fn rejects_truncation_and_invalid_termination() {
2394 assert!(matches!(
2395 last_error_inner(&collect(vec![0; 3], 1)),
2396 FrameErrorInner::IncompleteBlock { read: 3 }
2397 ));
2398
2399 let mut payload_truncated = Vec::new();
2400 append_block(&mut payload_truncated, &header(b'0', 1));
2401 assert!(matches!(
2402 last_error_inner(&collect(payload_truncated, BLOCK_SIZE)),
2403 FrameErrorInner::TruncatedPayload {
2404 owner: DataOwner::Member,
2405 ..
2406 }
2407 ));
2408
2409 let mut pax_payload_truncated = Vec::new();
2410 append_block(&mut pax_payload_truncated, &header(b'x', 513));
2411 append_payload(&mut pax_payload_truncated, b"11 path=x\n");
2412 assert!(matches!(
2413 last_error_inner(&collect(pax_payload_truncated, BLOCK_SIZE)),
2414 FrameErrorInner::TruncatedPayload {
2415 owner: DataOwner::Pax(PaxKind::Local),
2416 ..
2417 }
2418 ));
2419
2420 let mut missing_second_zero = Vec::new();
2421 append_block(&mut missing_second_zero, &header(b'0', 0));
2422 append_block(&mut missing_second_zero, &[0; BLOCK_SIZE]);
2423 assert!(matches!(
2424 last_error_inner(&collect(missing_second_zero, BLOCK_SIZE)),
2425 FrameErrorInner::MissingEndMarker
2426 ));
2427
2428 let mut bad_second_zero = Vec::new();
2429 append_block(&mut bad_second_zero, &header(b'0', 0));
2430 append_block(&mut bad_second_zero, &[0; BLOCK_SIZE]);
2431 append_block(&mut bad_second_zero, &header(b'0', 0));
2432 assert!(matches!(
2433 last_error_inner(&collect(bad_second_zero, BLOCK_SIZE)),
2434 FrameErrorInner::InvalidEndMarker
2435 ));
2436 }
2437
2438 #[test]
2439 fn stream_is_fused_after_first_error() {
2440 let mut stream = TarStream::new(ChunkedReader::new(header(b'L', 0).to_vec(), BLOCK_SIZE));
2441 assert!(matches!(
2442 ready(stream.next_frame()),
2443 Err(FrameError {
2444 position: 0,
2445 inner: FrameErrorInner::UnsupportedTypeflag { typeflag: b'L' },
2446 })
2447 ));
2448 assert!(matches!(ready(stream.next_frame()), Ok(None)));
2449 }
2450}