1use alloc::borrow::Cow;
49use alloc::borrow::ToOwned;
50use alloc::format;
51use alloc::string::String;
52use alloc::vec::Vec;
53use core::str::Utf8Error;
54
55use thiserror::Error;
56use zerocopy::FromBytes;
57
58use crate::{
59 EntryType, GnuExtSparseHeader, Header, HeaderError, PaxError, PaxExtensions, SparseEntry,
60 HEADER_SIZE, PAX_GID, PAX_GNAME, PAX_GNU_SPARSE_MAJOR, PAX_GNU_SPARSE_MAP,
61 PAX_GNU_SPARSE_MINOR, PAX_GNU_SPARSE_NAME, PAX_GNU_SPARSE_NUMBYTES, PAX_GNU_SPARSE_OFFSET,
62 PAX_GNU_SPARSE_REALSIZE, PAX_GNU_SPARSE_SIZE, PAX_LINKPATH, PAX_MTIME, PAX_PATH,
63 PAX_SCHILY_XATTR, PAX_SIZE, PAX_UID, PAX_UNAME,
64};
65
66#[derive(Debug, Clone, PartialEq, Eq)]
92pub struct Limits {
93 pub max_metadata_size: u32,
102
103 pub max_path_len: Option<u32>,
115
116 pub max_pending_entries: usize,
124
125 pub max_sparse_entries: usize,
136}
137
138impl Default for Limits {
139 fn default() -> Self {
140 Self {
141 max_metadata_size: 1024 * 1024, max_path_len: None,
143 max_pending_entries: 16,
144 max_sparse_entries: 10_000,
145 }
146 }
147}
148
149impl Limits {
150 #[must_use]
152 pub fn new() -> Self {
153 Self::default()
154 }
155
156 #[must_use]
161 pub fn permissive() -> Self {
162 Self {
163 max_metadata_size: u32::MAX,
164 max_path_len: None,
165 max_pending_entries: usize::MAX,
166 max_sparse_entries: 1_000_000,
167 }
168 }
169
170 pub fn check_path_len(&self, len: usize) -> Result<()> {
175 if let Some(limit) = self.max_path_len {
176 if len > limit as usize {
177 return Err(ParseError::PathTooLong { len, limit });
178 }
179 }
180 Ok(())
181 }
182}
183
184#[derive(Debug, Error)]
190pub enum ParseError {
191 #[cfg(feature = "std")]
193 #[error("I/O error: {0}")]
194 Io(#[from] std::io::Error),
195
196 #[error("header error: {0}")]
198 Header(#[from] HeaderError),
199
200 #[error("PAX error: {0}")]
202 Pax(#[from] PaxError),
203
204 #[error("invalid UTF-8 in PAX key: {0}")]
206 InvalidUtf8(#[from] Utf8Error),
207
208 #[error("path exceeds limit: {len} bytes > {limit} bytes")]
210 PathTooLong {
211 len: usize,
213 limit: u32,
215 },
216
217 #[error("metadata exceeds limit: {size} bytes > {limit} bytes")]
222 MetadataTooLarge {
223 size: u64,
225 limit: u32,
227 },
228
229 #[error("duplicate GNU long name entry")]
231 DuplicateGnuLongName,
232
233 #[error("duplicate GNU long link entry")]
235 DuplicateGnuLongLink,
236
237 #[error("duplicate PAX extended header")]
239 DuplicatePaxHeader,
240
241 #[error("metadata entries without a following actual entry")]
243 OrphanedMetadata,
244
245 #[error("too many pending metadata entries: {count} > {limit}")]
247 TooManyPendingEntries {
248 count: usize,
250 limit: usize,
252 },
253
254 #[error("too many sparse entries: {count} > {limit}")]
256 TooManySparseEntries {
257 count: usize,
259 limit: usize,
261 },
262
263 #[error("sparse entry type but header is not GNU format")]
265 SparseNotGnu,
266
267 #[error("invalid PAX sparse map: {0}")]
269 InvalidPaxSparseMap(Cow<'static, str>),
270
271 #[error("invalid PAX {key} value: {value:?}")]
273 InvalidPaxValue {
274 key: &'static str,
276 value: Cow<'static, str>,
278 },
279
280 #[error("entry has empty path")]
282 EmptyPath,
283
284 #[error("invalid entry size: {0}")]
286 InvalidSize(u64),
287
288 #[error("unexpected EOF at position {pos}")]
290 UnexpectedEof {
291 pos: u64,
293 },
294}
295
296pub type Result<T> = core::result::Result<T, ParseError>;
298
299#[derive(Debug)]
305#[allow(clippy::large_enum_variant)]
306pub enum ParseEvent<'a> {
307 NeedData {
313 min_bytes: usize,
315 },
316
317 Entry {
326 consumed: usize,
328 entry: ParsedEntry<'a>,
330 },
331
332 SparseEntry {
344 consumed: usize,
347 entry: ParsedEntry<'a>,
351 sparse_map: Vec<SparseEntry>,
353 real_size: u64,
356 },
357
358 GlobalExtensions {
368 consumed: usize,
370 pax_data: &'a [u8],
372 },
373
374 End {
376 consumed: usize,
378 },
379}
380
381impl<'a> ParseEvent<'a> {
382 fn add_consumed(self, n: usize) -> Self {
391 match self {
392 ParseEvent::NeedData { min_bytes } => ParseEvent::NeedData {
393 min_bytes: min_bytes.saturating_add(n),
394 },
395 ParseEvent::Entry { consumed, entry } => ParseEvent::Entry {
396 consumed: consumed.saturating_add(n),
397 entry,
398 },
399 ParseEvent::SparseEntry {
400 consumed,
401 entry,
402 sparse_map,
403 real_size,
404 } => ParseEvent::SparseEntry {
405 consumed: consumed.saturating_add(n),
406 entry,
407 sparse_map,
408 real_size,
409 },
410 ParseEvent::GlobalExtensions { consumed, pax_data } => ParseEvent::GlobalExtensions {
411 consumed: consumed.saturating_add(n),
412 pax_data,
413 },
414 ParseEvent::End { consumed } => ParseEvent::End {
415 consumed: consumed.saturating_add(n),
416 },
417 }
418 }
419}
420
421#[derive(Debug)]
426pub struct ParsedEntry<'a> {
427 pub header: &'a Header,
429
430 pub entry_type: EntryType,
432
433 pub path: Cow<'a, [u8]>,
437
438 pub link_target: Option<Cow<'a, [u8]>>,
442
443 pub mode: u32,
445
446 pub uid: u64,
448
449 pub gid: u64,
451
452 pub mtime: u64,
454
455 pub size: u64,
457
458 pub uname: Option<Cow<'a, [u8]>>,
460
461 pub gname: Option<Cow<'a, [u8]>>,
463
464 pub dev_major: Option<u32>,
466
467 pub dev_minor: Option<u32>,
469
470 #[allow(clippy::type_complexity)]
472 pub xattrs: Vec<(Cow<'a, [u8]>, Cow<'a, [u8]>)>,
473
474 pub pax: Option<&'a [u8]>,
481}
482
483impl<'a> ParsedEntry<'a> {
484 #[must_use]
486 pub fn path_lossy(&self) -> Cow<'_, str> {
487 String::from_utf8_lossy(&self.path)
488 }
489
490 #[must_use]
492 pub fn link_target_lossy(&self) -> Option<Cow<'_, str>> {
493 self.link_target
494 .as_ref()
495 .map(|t| String::from_utf8_lossy(t))
496 }
497
498 #[must_use]
500 pub fn is_file(&self) -> bool {
501 self.entry_type.is_file()
502 }
503
504 #[must_use]
506 pub fn is_dir(&self) -> bool {
507 self.entry_type.is_dir()
508 }
509
510 #[must_use]
512 pub fn is_symlink(&self) -> bool {
513 self.entry_type.is_symlink()
514 }
515
516 #[must_use]
518 pub fn is_hard_link(&self) -> bool {
519 self.entry_type.is_hard_link()
520 }
521
522 #[must_use]
524 pub fn padded_size(&self) -> u64 {
525 self.size.next_multiple_of(HEADER_SIZE as u64)
526 }
527}
528
529#[derive(Debug, Clone, Copy, PartialEq, Eq)]
531enum State {
532 ReadHeader,
534 Done,
536}
537
538#[derive(Debug, Clone, Copy)]
540enum ExtensionKind {
541 GnuLongName,
542 GnuLongLink,
543 Pax,
544}
545
546#[derive(Debug, Default, Clone, Copy)]
553struct PendingMetadata<'a> {
554 gnu_long_name: Option<&'a [u8]>,
555 gnu_long_link: Option<&'a [u8]>,
556 pax_extensions: Option<&'a [u8]>,
557 count: usize,
558 metadata_size: u64,
560}
561
562struct SparseContext {
565 sparse_map: Vec<SparseEntry>,
566 real_size: u64,
567 ext_consumed: usize,
570}
571
572impl PendingMetadata<'_> {
573 fn is_empty(&self) -> bool {
574 self.gnu_long_name.is_none()
575 && self.gnu_long_link.is_none()
576 && self.pax_extensions.is_none()
577 }
578}
579
580fn pax_sparse_version(pax: &[u8], ignore_errors: bool) -> Result<Option<(u64, u64)>> {
587 let mut major = None;
588 let mut minor = None;
589 for ext in PaxExtensions::new(pax) {
590 let ext = ext?;
591 let key = match ext.key() {
592 Ok(k) => k,
593 Err(_) if ignore_errors => continue,
594 Err(e) => return Err(ParseError::from(e)),
595 };
596 match key {
597 PAX_GNU_SPARSE_MAJOR => {
598 let s = match ext.value() {
599 Ok(s) => s,
600 Err(_) if ignore_errors => continue,
601 Err(_) => {
602 return Err(ParseError::InvalidPaxValue {
603 key: PAX_GNU_SPARSE_MAJOR,
604 value: Cow::Borrowed("<non-UTF-8>"),
605 })
606 }
607 };
608 match s.parse::<u64>() {
609 Ok(v) => major = Some(v),
610 Err(_) if ignore_errors => {}
611 Err(_) => {
612 return Err(ParseError::InvalidPaxValue {
613 key: PAX_GNU_SPARSE_MAJOR,
614 value: s.to_owned().into(),
615 })
616 }
617 }
618 }
619 PAX_GNU_SPARSE_MINOR => {
620 let s = match ext.value() {
621 Ok(s) => s,
622 Err(_) if ignore_errors => continue,
623 Err(_) => {
624 return Err(ParseError::InvalidPaxValue {
625 key: PAX_GNU_SPARSE_MINOR,
626 value: Cow::Borrowed("<non-UTF-8>"),
627 })
628 }
629 };
630 match s.parse::<u64>() {
631 Ok(v) => minor = Some(v),
632 Err(_) if ignore_errors => {}
633 Err(_) => {
634 return Err(ParseError::InvalidPaxValue {
635 key: PAX_GNU_SPARSE_MINOR,
636 value: s.to_owned().into(),
637 })
638 }
639 }
640 }
641 _ => {}
642 }
643 if major.is_some() && minor.is_some() {
644 break;
645 }
646 }
647 match (major, minor) {
648 (Some(maj), Some(min)) => Ok(Some((maj, min))),
649 _ => Ok(None),
650 }
651}
652
653#[derive(Debug)]
693pub struct Parser {
694 limits: Limits,
695 state: State,
696 allow_empty_path: bool,
699 verify_checksums: bool,
705 ignore_pax_errors: bool,
712}
713
714impl Parser {
715 #[must_use]
717 pub fn new(limits: Limits) -> Self {
718 Self {
719 limits,
720 state: State::ReadHeader,
721 allow_empty_path: false,
722 verify_checksums: true,
723 ignore_pax_errors: false,
724 }
725 }
726
727 pub fn set_allow_empty_path(&mut self, allow: bool) {
730 self.allow_empty_path = allow;
731 }
732
733 pub fn set_verify_checksums(&mut self, verify: bool) {
743 self.verify_checksums = verify;
744 }
745
746 pub fn set_ignore_pax_errors(&mut self, ignore: bool) {
755 self.ignore_pax_errors = ignore;
756 }
757
758 #[must_use]
760 pub fn with_defaults() -> Self {
761 Self::new(Limits::default())
762 }
763
764 #[must_use]
766 pub fn limits(&self) -> &Limits {
767 &self.limits
768 }
769
770 #[must_use]
772 pub fn is_done(&self) -> bool {
773 self.state == State::Done
774 }
775
776 pub fn parse<'a>(&mut self, input: &'a [u8]) -> Result<ParseEvent<'a>> {
792 match self.state {
793 State::Done => Ok(ParseEvent::End { consumed: 0 }),
794 State::ReadHeader => self.parse_header(input, PendingMetadata::default()),
795 }
796 }
797
798 fn parse_header<'a>(
800 &mut self,
801 input: &'a [u8],
802 slices: PendingMetadata<'a>,
803 ) -> Result<ParseEvent<'a>> {
804 if input.len() < HEADER_SIZE {
806 return Ok(ParseEvent::NeedData {
807 min_bytes: HEADER_SIZE,
808 });
809 }
810
811 let header_bytes: &[u8; HEADER_SIZE] = input[..HEADER_SIZE]
816 .try_into()
817 .expect("already checked input.len() >= HEADER_SIZE");
818 if header_bytes.iter().all(|&b| b == 0) {
819 if input.len() < 2 * HEADER_SIZE {
822 return Ok(ParseEvent::NeedData {
823 min_bytes: 2 * HEADER_SIZE,
824 });
825 }
826 let second_block = &input[HEADER_SIZE..2 * HEADER_SIZE];
828 if second_block.iter().all(|&b| b == 0) {
829 self.state = State::Done;
830 if !slices.is_empty() {
831 return Err(ParseError::OrphanedMetadata);
832 }
833 return Ok(ParseEvent::End {
834 consumed: 2 * HEADER_SIZE,
835 });
836 }
837 return self
840 .parse_header(&input[HEADER_SIZE..], slices)
841 .map(|e| e.add_consumed(HEADER_SIZE));
842 }
843
844 if slices.count > self.limits.max_pending_entries {
846 return Err(ParseError::TooManyPendingEntries {
847 count: slices.count,
848 limit: self.limits.max_pending_entries,
849 });
850 }
851
852 let header = Header::from_bytes(header_bytes);
854 if self.verify_checksums {
855 header.verify_checksum()?;
856 }
857
858 let entry_type = header.entry_type();
859 let size = header.entry_size()?;
860 let padded_size = size
861 .checked_next_multiple_of(HEADER_SIZE as u64)
862 .ok_or(ParseError::InvalidSize(size))?;
863
864 let is_extension_format = header.is_gnu() || header.is_ustar();
870 match entry_type {
871 EntryType::GnuLongName if is_extension_format => {
872 self.handle_extension(input, size, padded_size, ExtensionKind::GnuLongName, slices)
873 }
874 EntryType::GnuLongLink if is_extension_format => {
875 self.handle_extension(input, size, padded_size, ExtensionKind::GnuLongLink, slices)
876 }
877 EntryType::XHeader if is_extension_format => {
878 self.handle_extension(input, size, padded_size, ExtensionKind::Pax, slices)
879 }
880 EntryType::XGlobalHeader => {
885 if size > self.limits.max_metadata_size as u64 {
887 return Err(ParseError::MetadataTooLarge {
888 size,
889 limit: self.limits.max_metadata_size,
890 });
891 }
892
893 let total_size = (HEADER_SIZE as u64)
894 .checked_add(padded_size)
895 .ok_or(ParseError::InvalidSize(size))?;
896 if (input.len() as u64) < total_size {
897 return Ok(ParseEvent::NeedData {
898 min_bytes: total_size as usize,
899 });
900 }
901
902 let content_start = HEADER_SIZE;
903 let content_end = content_start + size as usize;
904 let pax_data = &input[content_start..content_end];
905
906 Ok(ParseEvent::GlobalExtensions {
907 consumed: total_size as usize,
908 pax_data,
909 })
910 }
911 EntryType::GnuSparse if is_extension_format => {
912 self.handle_gnu_sparse(input, header, size, slices)
913 }
914 _ => {
915 let sparse_version = if let Some(pax) = slices.pax_extensions {
918 pax_sparse_version(pax, self.ignore_pax_errors)?
919 } else {
920 None
921 };
922 if sparse_version == Some((1, 0)) {
923 self.handle_pax_sparse_v1(input, header, size, slices)
924 } else {
925 self.emit_entry(header, size, None, slices)
928 }
929 }
930 }
931 }
932
933 fn handle_extension<'a>(
940 &mut self,
941 input: &'a [u8],
942 size: u64,
943 padded_size: u64,
944 kind: ExtensionKind,
945 slices: PendingMetadata<'a>,
946 ) -> Result<ParseEvent<'a>> {
947 let has_dup = match kind {
949 ExtensionKind::GnuLongName => slices.gnu_long_name.is_some(),
950 ExtensionKind::GnuLongLink => slices.gnu_long_link.is_some(),
951 ExtensionKind::Pax => slices.pax_extensions.is_some(),
952 };
953 if has_dup {
954 return Err(match kind {
955 ExtensionKind::GnuLongName => ParseError::DuplicateGnuLongName,
956 ExtensionKind::GnuLongLink => ParseError::DuplicateGnuLongLink,
957 ExtensionKind::Pax => ParseError::DuplicatePaxHeader,
958 });
959 }
960
961 let new_metadata_size = slices.metadata_size + size;
963 if new_metadata_size > self.limits.max_metadata_size as u64 {
964 return Err(ParseError::MetadataTooLarge {
965 size: new_metadata_size,
966 limit: self.limits.max_metadata_size,
967 });
968 }
969
970 let total_size = (HEADER_SIZE as u64)
971 .checked_add(padded_size)
972 .ok_or(ParseError::InvalidSize(size))?;
973 if (input.len() as u64) < total_size {
974 return Ok(ParseEvent::NeedData {
975 min_bytes: total_size as usize,
976 });
977 }
978
979 let content_start = HEADER_SIZE;
981 let content_end = content_start + size as usize;
982 let mut data: &'a [u8] = &input[content_start..content_end];
983
984 if matches!(
986 kind,
987 ExtensionKind::GnuLongName | ExtensionKind::GnuLongLink
988 ) {
989 if let Some(trimmed) = data.strip_suffix(&[0]) {
990 data = trimmed;
991 }
992 self.limits.check_path_len(data.len())?;
993 }
994
995 let mut new_slices = PendingMetadata {
997 count: slices.count + 1,
998 metadata_size: new_metadata_size,
999 ..slices
1000 };
1001 match kind {
1002 ExtensionKind::GnuLongName => new_slices.gnu_long_name = Some(data),
1003 ExtensionKind::GnuLongLink => new_slices.gnu_long_link = Some(data),
1004 ExtensionKind::Pax => new_slices.pax_extensions = Some(data),
1005 }
1006
1007 self.parse_header(&input[total_size as usize..], new_slices)
1008 .map(|e| e.add_consumed(total_size as usize))
1009 }
1010
1011 fn handle_pax_sparse_v1<'a>(
1026 &mut self,
1027 input: &'a [u8],
1028 header: &'a Header,
1029 size: u64,
1030 slices: PendingMetadata<'a>,
1031 ) -> Result<ParseEvent<'a>> {
1032 let pax = slices
1034 .pax_extensions
1035 .ok_or(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
1036 "missing PAX extensions",
1037 )))?;
1038
1039 let ignore_errors = self.ignore_pax_errors;
1040 let mut real_size = None;
1041 let mut sparse_name = None;
1042 for ext in PaxExtensions::new(pax) {
1043 let ext = ext?;
1044 let key = match ext.key() {
1045 Ok(k) => k,
1046 Err(_) if ignore_errors => continue,
1047 Err(e) => return Err(ParseError::from(e)),
1048 };
1049 match key {
1050 PAX_GNU_SPARSE_REALSIZE | PAX_GNU_SPARSE_SIZE => {
1051 let s = match ext.value() {
1052 Ok(s) => s,
1053 Err(_) if ignore_errors => continue,
1054 Err(_) => {
1055 return Err(ParseError::InvalidPaxValue {
1056 key: PAX_GNU_SPARSE_REALSIZE,
1057 value: Cow::Borrowed("<non-UTF-8>"),
1058 })
1059 }
1060 };
1061 match s.parse::<u64>() {
1062 Ok(v) => real_size = Some(v),
1063 Err(_) if ignore_errors => {}
1064 Err(_) => {
1065 return Err(ParseError::InvalidPaxValue {
1066 key: PAX_GNU_SPARSE_REALSIZE,
1067 value: s.to_owned().into(),
1068 })
1069 }
1070 }
1071 }
1072 PAX_GNU_SPARSE_NAME => {
1073 sparse_name = Some(ext.value_bytes());
1074 }
1075 _ => {}
1076 }
1077 }
1078
1079 let real_size = real_size.ok_or(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
1080 "missing GNU.sparse.realsize",
1081 )))?;
1082
1083 let data_start = HEADER_SIZE;
1090 let data = &input[data_start..];
1091
1092 let mut pos = 0usize;
1094
1095 let read_line = |data: &[u8], pos: &mut usize| -> Option<Result<u64>> {
1097 let remaining = &data[*pos..];
1098 let nl = remaining.iter().position(|&b| b == b'\n')?;
1099 let line = &remaining[..nl];
1100 *pos += nl + 1;
1101 let s = match core::str::from_utf8(line) {
1102 Ok(s) => s,
1103 Err(_) => {
1104 return Some(Err(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
1105 "non-UTF8 in sparse map",
1106 ))))
1107 }
1108 };
1109 match s.parse::<u64>() {
1110 Ok(v) => Some(Ok(v)),
1111 Err(_) => Some(Err(ParseError::InvalidPaxSparseMap(
1112 format!("invalid decimal: {s:?}").into(),
1113 ))),
1114 }
1115 };
1116
1117 let num_entries = match read_line(data, &mut pos) {
1119 Some(r) => r?,
1120 None => {
1121 return Ok(ParseEvent::NeedData {
1124 min_bytes: data_start + pos + HEADER_SIZE,
1125 });
1126 }
1127 };
1128
1129 if num_entries as usize > self.limits.max_sparse_entries {
1130 return Err(ParseError::TooManySparseEntries {
1131 count: num_entries as usize,
1132 limit: self.limits.max_sparse_entries,
1133 });
1134 }
1135
1136 let mut sparse_map = Vec::with_capacity((num_entries as usize).min(1024));
1139 for _ in 0..num_entries {
1140 let offset = match read_line(data, &mut pos) {
1141 Some(r) => r?,
1142 None => {
1143 return Ok(ParseEvent::NeedData {
1144 min_bytes: data_start + pos + HEADER_SIZE,
1145 });
1146 }
1147 };
1148 let length = match read_line(data, &mut pos) {
1149 Some(r) => r?,
1150 None => {
1151 return Ok(ParseEvent::NeedData {
1152 min_bytes: data_start + pos + HEADER_SIZE,
1153 });
1154 }
1155 };
1156 sparse_map.push(SparseEntry { offset, length });
1157 }
1158
1159 let map_size = pos.next_multiple_of(HEADER_SIZE);
1161
1162 if data.len() < map_size {
1164 return Ok(ParseEvent::NeedData {
1165 min_bytes: data_start + map_size,
1166 });
1167 }
1168
1169 let content_size =
1172 size.checked_sub(map_size as u64)
1173 .ok_or(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
1174 "sparse map prefix larger than entry size",
1175 )))?;
1176
1177 let sparse_ctx = SparseContext {
1178 sparse_map,
1179 real_size,
1180 ext_consumed: map_size,
1182 };
1183
1184 let slices = if let Some(name) = sparse_name {
1187 PendingMetadata {
1188 gnu_long_name: Some(name),
1189 ..slices
1190 }
1191 } else {
1192 slices
1193 };
1194
1195 self.emit_entry(header, content_size, Some(sparse_ctx), slices)
1196 }
1197
1198 fn handle_gnu_sparse<'a>(
1205 &mut self,
1206 input: &'a [u8],
1207 header: &'a Header,
1208 size: u64,
1209 slices: PendingMetadata<'a>,
1210 ) -> Result<ParseEvent<'a>> {
1211 let gnu = header.try_as_gnu().ok_or(ParseError::SparseNotGnu)?;
1212 let real_size = gnu.real_size()?;
1213
1214 let mut sparse_map = Vec::new();
1216 for desc in &gnu.sparse {
1217 if desc.is_empty() {
1218 break;
1219 }
1220 let entry = desc.to_sparse_entry()?;
1221 sparse_map.push(entry);
1222 }
1223
1224 let mut ext_consumed = 0usize;
1229 if gnu.is_extended() {
1230 let mut offset = HEADER_SIZE; loop {
1232 if input.len() < offset + HEADER_SIZE {
1233 return Ok(ParseEvent::NeedData {
1234 min_bytes: offset + HEADER_SIZE,
1235 });
1236 }
1237
1238 let ext_bytes: &[u8; HEADER_SIZE] = input[offset..offset + HEADER_SIZE]
1239 .try_into()
1240 .expect("checked length");
1241 let ext = GnuExtSparseHeader::ref_from_bytes(ext_bytes)
1242 .expect("GnuExtSparseHeader is 512 bytes");
1243
1244 for desc in &ext.sparse {
1245 if desc.is_empty() {
1246 break;
1247 }
1248 if sparse_map.len() >= self.limits.max_sparse_entries {
1249 return Err(ParseError::TooManySparseEntries {
1250 count: sparse_map.len() + 1,
1251 limit: self.limits.max_sparse_entries,
1252 });
1253 }
1254 let entry = desc.to_sparse_entry()?;
1255 sparse_map.push(entry);
1256 }
1257
1258 offset += HEADER_SIZE;
1259
1260 if !ext.is_extended() {
1261 break;
1262 }
1263 }
1264 ext_consumed = offset - HEADER_SIZE; }
1266
1267 if sparse_map.len() > self.limits.max_sparse_entries {
1269 return Err(ParseError::TooManySparseEntries {
1270 count: sparse_map.len(),
1271 limit: self.limits.max_sparse_entries,
1272 });
1273 }
1274
1275 let sparse_ctx = SparseContext {
1276 sparse_map,
1277 real_size,
1278 ext_consumed,
1279 };
1280
1281 self.emit_entry(header, size, Some(sparse_ctx), slices)
1282 }
1283
1284 fn emit_entry<'a>(
1285 &mut self,
1286 header: &'a Header,
1287 size: u64,
1288 sparse: Option<SparseContext>,
1289 slices: PendingMetadata<'a>,
1290 ) -> Result<ParseEvent<'a>> {
1291 let mut path: Cow<'a, [u8]> = Cow::Borrowed(header.path_bytes());
1293 let mut link_target: Option<Cow<'a, [u8]>> = None;
1294 let mut uid = header.uid()?;
1295 let mut gid = header.gid()?;
1296 let mut mtime = header.mtime()?;
1297 let mut entry_size = size;
1298 let mut xattrs = Vec::new();
1299 let mut uname: Option<Cow<'a, [u8]>> = header
1300 .username()
1301 .filter(|b| !b.is_empty())
1302 .map(Cow::Borrowed);
1303 let mut gname: Option<Cow<'a, [u8]>> = header
1304 .groupname()
1305 .filter(|b| !b.is_empty())
1306 .map(Cow::Borrowed);
1307
1308 if let Some(prefix) = header.prefix() {
1310 if !prefix.is_empty() {
1311 let mut full_path = prefix.to_vec();
1312 full_path.push(b'/');
1313 full_path.extend_from_slice(header.path_bytes());
1314 path = Cow::Owned(full_path);
1315 }
1316 }
1317
1318 if let Some(long_name) = slices.gnu_long_name {
1320 path = Cow::Borrowed(long_name);
1321 }
1322
1323 if let Some(long_link) = slices.gnu_long_link {
1325 link_target = Some(Cow::Borrowed(long_link));
1326 } else {
1327 let header_link = header.link_name_bytes();
1328 if !header_link.is_empty() {
1329 link_target = Some(Cow::Borrowed(header_link));
1330 }
1331 }
1332
1333 let raw_pax = slices.pax_extensions;
1335
1336 let mut pax_sparse_map: Option<Vec<SparseEntry>> = None;
1339 let mut pax_sparse_real_size: Option<u64> = None;
1340 let mut pax_sparse_name: Option<&'a [u8]> = None;
1341 let mut pax_sparse_pending_offset: Option<u64> = None;
1343
1344 if let Some(pax) = raw_pax {
1345 let ignore_errors = self.ignore_pax_errors;
1346 let extensions = PaxExtensions::new(pax);
1347
1348 let parse_pax_u64 =
1351 |ext: &crate::PaxExtension<'_>, key: &'static str| -> Result<Option<u64>> {
1352 let s = match ext.value() {
1353 Ok(s) => s,
1354 Err(_) if ignore_errors => return Ok(None),
1355 Err(_) => {
1356 return Err(ParseError::InvalidPaxValue {
1357 key,
1358 value: Cow::Borrowed("<non-UTF-8>"),
1359 })
1360 }
1361 };
1362 match s.parse::<u64>() {
1363 Ok(v) => Ok(Some(v)),
1364 Err(_) if ignore_errors => Ok(None),
1365 Err(_) => Err(ParseError::InvalidPaxValue {
1366 key,
1367 value: s.to_owned().into(),
1368 }),
1369 }
1370 };
1371
1372 for ext in extensions {
1373 let ext = ext?;
1374 let key = ext.key().map_err(ParseError::from)?;
1375 let value = ext.value_bytes();
1376
1377 match key {
1378 PAX_PATH => {
1379 self.limits.check_path_len(value.len())?;
1380 path = Cow::Borrowed(value);
1381 }
1382 PAX_LINKPATH => {
1383 self.limits.check_path_len(value.len())?;
1384 link_target = Some(Cow::Borrowed(value));
1385 }
1386 PAX_SIZE => {
1387 if let Some(v) = parse_pax_u64(&ext, PAX_SIZE)? {
1388 entry_size = v;
1389 }
1390 }
1391 PAX_UID => {
1392 if let Some(v) = parse_pax_u64(&ext, PAX_UID)? {
1393 uid = v;
1394 }
1395 }
1396 PAX_GID => {
1397 if let Some(v) = parse_pax_u64(&ext, PAX_GID)? {
1398 gid = v;
1399 }
1400 }
1401 PAX_MTIME => {
1402 let s = match ext.value() {
1405 Ok(s) => s,
1406 Err(_) if ignore_errors => continue,
1407 Err(_) => {
1408 return Err(ParseError::InvalidPaxValue {
1409 key: PAX_MTIME,
1410 value: Cow::Borrowed("<non-UTF-8>"),
1411 })
1412 }
1413 };
1414 let int_part = s.split('.').next().unwrap_or(s);
1415 match int_part.parse::<u64>() {
1416 Ok(v) => mtime = v,
1417 Err(_) if ignore_errors => {}
1418 Err(_) => {
1419 return Err(ParseError::InvalidPaxValue {
1420 key: PAX_MTIME,
1421 value: s.to_owned().into(),
1422 })
1423 }
1424 }
1425 }
1426 PAX_UNAME => {
1427 uname = Some(Cow::Borrowed(value));
1428 }
1429 PAX_GNAME => {
1430 gname = Some(Cow::Borrowed(value));
1431 }
1432
1433 PAX_GNU_SPARSE_OFFSET => {
1435 let v = parse_pax_u64(&ext, PAX_GNU_SPARSE_OFFSET)?;
1436 pax_sparse_pending_offset = v;
1437 }
1438 PAX_GNU_SPARSE_NUMBYTES => {
1439 if let (Some(offset), Some(length)) = (
1440 pax_sparse_pending_offset.take(),
1441 parse_pax_u64(&ext, PAX_GNU_SPARSE_NUMBYTES)?,
1442 ) {
1443 let map = pax_sparse_map.get_or_insert_with(Vec::new);
1444 if map.len() >= self.limits.max_sparse_entries {
1445 return Err(ParseError::TooManySparseEntries {
1446 count: map.len() + 1,
1447 limit: self.limits.max_sparse_entries,
1448 });
1449 }
1450 map.push(SparseEntry { offset, length });
1451 }
1452 }
1453
1454 PAX_GNU_SPARSE_MAP => {
1456 let s = match ext.value() {
1457 Ok(s) => s,
1458 Err(_) if ignore_errors => continue,
1459 Err(_) => {
1460 return Err(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
1461 "non-UTF8 sparse map",
1462 )))
1463 }
1464 };
1465 let mut map = Vec::new();
1466 let parts: Vec<&str> = s.split(',').filter(|p| !p.is_empty()).collect();
1467 if parts.len() % 2 != 0 {
1468 return Err(ParseError::InvalidPaxSparseMap(Cow::Borrowed(
1469 "odd number of values in GNU.sparse.map",
1470 )));
1471 }
1472 for pair in parts.chunks(2) {
1473 if map.len() >= self.limits.max_sparse_entries {
1474 return Err(ParseError::TooManySparseEntries {
1475 count: map.len() + 1,
1476 limit: self.limits.max_sparse_entries,
1477 });
1478 }
1479 let offset = pair[0].parse::<u64>().map_err(|_| {
1480 ParseError::InvalidPaxSparseMap(
1481 format!("invalid offset: {:?}", pair[0]).into(),
1482 )
1483 })?;
1484 let length = pair[1].parse::<u64>().map_err(|_| {
1485 ParseError::InvalidPaxSparseMap(
1486 format!("invalid length: {:?}", pair[1]).into(),
1487 )
1488 })?;
1489 map.push(SparseEntry { offset, length });
1490 }
1491 pax_sparse_map = Some(map);
1492 }
1493
1494 PAX_GNU_SPARSE_REALSIZE | PAX_GNU_SPARSE_SIZE => {
1496 if let Some(v) = parse_pax_u64(&ext, PAX_GNU_SPARSE_REALSIZE)? {
1497 pax_sparse_real_size = Some(v);
1498 }
1499 }
1500 PAX_GNU_SPARSE_NAME => {
1501 self.limits.check_path_len(value.len())?;
1502 pax_sparse_name = Some(value);
1503 }
1504
1505 PAX_GNU_SPARSE_MAJOR | PAX_GNU_SPARSE_MINOR => {}
1508
1509 _ => {
1510 if let Some(attr_name) = key.strip_prefix(PAX_SCHILY_XATTR) {
1511 xattrs
1512 .push((Cow::Borrowed(attr_name.as_bytes()), Cow::Borrowed(value)));
1513 }
1514 }
1515 }
1516 }
1517 }
1518
1519 if let Some(name) = pax_sparse_name {
1521 path = Cow::Borrowed(name);
1522 }
1523
1524 if link_target.as_ref().is_some_and(|v| v.is_empty()) {
1528 link_target = None;
1529 }
1530 if uname.as_ref().is_some_and(|v| v.is_empty()) {
1531 uname = None;
1532 }
1533 if gname.as_ref().is_some_and(|v| v.is_empty()) {
1534 gname = None;
1535 }
1536
1537 if path.is_empty() && !self.allow_empty_path {
1539 return Err(ParseError::EmptyPath);
1540 }
1541
1542 self.limits.check_path_len(path.len())?;
1544
1545 let entry = ParsedEntry {
1546 header,
1547 entry_type: header.entry_type(),
1548 path,
1549 link_target,
1550 mode: header.mode()?,
1551 uid,
1552 gid,
1553 mtime,
1554 size: entry_size,
1555 uname,
1556 gname,
1557 dev_major: header.device_major()?,
1558 dev_minor: header.device_minor()?,
1559 xattrs,
1560 pax: raw_pax,
1561 };
1562
1563 let sparse = sparse.or_else(|| {
1567 pax_sparse_map.map(|map| SparseContext {
1568 sparse_map: map,
1569 real_size: pax_sparse_real_size.unwrap_or(entry_size),
1570 ext_consumed: 0, })
1572 });
1573
1574 if let Some(ctx) = sparse {
1575 Ok(ParseEvent::SparseEntry {
1577 consumed: HEADER_SIZE + ctx.ext_consumed,
1578 entry,
1579 sparse_map: ctx.sparse_map,
1580 real_size: ctx.real_size,
1581 })
1582 } else {
1583 Ok(ParseEvent::Entry {
1585 consumed: HEADER_SIZE,
1586 entry,
1587 })
1588 }
1589 }
1590}
1591
1592#[cfg(test)]
1593mod tests {
1594 use super::*;
1595 use crate::{GNU_MAGIC, GNU_VERSION, USTAR_MAGIC, USTAR_VERSION};
1596
1597 #[test]
1598 fn test_default_limits() {
1599 let limits = Limits::default();
1600 assert_eq!(limits.max_metadata_size, 1024 * 1024);
1601 assert_eq!(limits.max_path_len, None);
1602 assert_eq!(limits.max_pending_entries, 16);
1603 }
1604
1605 #[test]
1606 fn test_permissive_limits() {
1607 let limits = Limits::permissive();
1608 assert_eq!(limits.max_metadata_size, u32::MAX);
1609 assert_eq!(limits.max_path_len, None);
1610 }
1611
1612 #[test]
1613 fn test_permissive_limits_relaxed() {
1614 let limits = Limits::permissive();
1615 assert!(limits.max_metadata_size > Limits::default().max_metadata_size);
1616 assert!(limits.max_pending_entries > Limits::default().max_pending_entries);
1617 }
1618
1619 #[test]
1620 fn test_parser_empty_archive() {
1621 let mut parser = Parser::new(Limits::default());
1622
1623 let data = [0u8; 1024];
1625
1626 let event = parser.parse(&data).unwrap();
1627 assert!(matches!(event, ParseEvent::End { consumed: 1024 }));
1628 assert!(parser.is_done());
1629 }
1630
1631 #[test]
1632 fn test_parser_need_data() {
1633 let mut parser = Parser::new(Limits::default());
1634
1635 let data = [0u8; 256];
1637
1638 let event = parser.parse(&data).unwrap();
1639 assert!(matches!(event, ParseEvent::NeedData { min_bytes: 512 }));
1640 }
1641
1642 #[test]
1643 fn test_parser_need_more_for_end() {
1644 let mut parser = Parser::new(Limits::default());
1645
1646 let data = [0u8; 512];
1648
1649 let event = parser.parse(&data).unwrap();
1650 assert!(matches!(event, ParseEvent::NeedData { min_bytes: 1024 }));
1651 }
1652
1653 #[test]
1654 fn test_parser_with_real_header() {
1655 let mut parser = Parser::new(Limits::default());
1656
1657 let mut data = vec![0u8; 2048];
1659
1660 data[0..8].copy_from_slice(b"test.txt");
1663 data[100..107].copy_from_slice(b"0000644");
1665 data[108..115].copy_from_slice(b"0000000");
1667 data[116..123].copy_from_slice(b"0000000");
1669 data[124..135].copy_from_slice(b"00000000000");
1671 data[136..147].copy_from_slice(b"00000000000");
1673 data[156] = b'0';
1675 data[257..263].copy_from_slice(USTAR_MAGIC);
1677 data[263..265].copy_from_slice(USTAR_VERSION);
1679
1680 let header = Header::from_bytes((&data[..512]).try_into().unwrap());
1682 let checksum = header.compute_checksum();
1683 let checksum_str = format!("{checksum:06o}\0 ");
1684 data[148..156].copy_from_slice(checksum_str.as_bytes());
1685
1686 let event = parser.parse(&data).unwrap();
1690 match event {
1691 ParseEvent::Entry { consumed, entry } => {
1692 assert_eq!(consumed, 512);
1693 assert_eq!(entry.path_lossy(), "test.txt");
1694 assert_eq!(entry.size, 0);
1695 assert!(entry.is_file());
1696 }
1697 other => panic!("Expected Entry, got {:?}", other),
1698 }
1699
1700 let event = parser.parse(&data[512..]).unwrap();
1702 assert!(matches!(event, ParseEvent::End { consumed: 1024 }));
1703 }
1704
1705 #[test]
1706 fn test_parser_entry_with_content() {
1707 let mut parser = Parser::new(Limits::default());
1708
1709 let mut data = vec![0u8; 2560]; data[0..8].copy_from_slice(b"test.txt");
1714 data[100..107].copy_from_slice(b"0000644");
1715 data[108..115].copy_from_slice(b"0000000");
1716 data[116..123].copy_from_slice(b"0000000");
1717 data[124..135].copy_from_slice(b"00000000005"); data[136..147].copy_from_slice(b"00000000000");
1719 data[156] = b'0';
1720 data[257..263].copy_from_slice(USTAR_MAGIC);
1721 data[263..265].copy_from_slice(USTAR_VERSION);
1722
1723 let header = Header::from_bytes((&data[..512]).try_into().unwrap());
1725 let checksum = header.compute_checksum();
1726 let checksum_str = format!("{checksum:06o}\0 ");
1727 data[148..156].copy_from_slice(checksum_str.as_bytes());
1728
1729 data[512..517].copy_from_slice(b"hello");
1731
1732 let event = parser.parse(&data).unwrap();
1733 match event {
1734 ParseEvent::Entry { consumed, entry } => {
1735 assert_eq!(consumed, 512);
1736 assert_eq!(entry.path_lossy(), "test.txt");
1737 assert_eq!(entry.size, 5);
1738 assert_eq!(entry.padded_size(), 512);
1739 }
1740 other => panic!("Expected Entry, got {:?}", other),
1741 }
1742
1743 let event = parser.parse(&data[1024..]).unwrap();
1748 assert!(matches!(event, ParseEvent::End { consumed: 1024 }));
1749 }
1750
1751 fn make_header(name: &[u8], size: u64, typeflag: u8) -> [u8; HEADER_SIZE] {
1762 let mut header = [0u8; HEADER_SIZE];
1763
1764 let name_len = name.len().min(100);
1766 header[0..name_len].copy_from_slice(&name[..name_len]);
1767
1768 header[100..107].copy_from_slice(b"0000644");
1770
1771 header[108..115].copy_from_slice(b"0001750");
1773
1774 header[116..123].copy_from_slice(b"0001750");
1776
1777 let size_str = format!("{size:011o}");
1779 header[124..135].copy_from_slice(size_str.as_bytes());
1780
1781 header[136..147].copy_from_slice(b"14712345670");
1783
1784 header[156] = typeflag;
1786
1787 header[257..263].copy_from_slice(USTAR_MAGIC);
1789
1790 header[263..265].copy_from_slice(USTAR_VERSION);
1792
1793 let hdr = Header::from_bytes(&header);
1795 let checksum = hdr.compute_checksum();
1796 let checksum_str = format!("{checksum:06o}\0 ");
1797 header[148..156].copy_from_slice(checksum_str.as_bytes());
1798
1799 header
1800 }
1801
1802 fn make_link_header(name: &[u8], link_target: &[u8], typeflag: u8) -> [u8; HEADER_SIZE] {
1804 let mut header = make_header(name, 0, typeflag);
1805
1806 let link_len = link_target.len().min(100);
1808 header[157..157 + link_len].copy_from_slice(&link_target[..link_len]);
1809
1810 let hdr = Header::from_bytes(&header);
1812 let checksum = hdr.compute_checksum();
1813 let checksum_str = format!("{checksum:06o}\0 ");
1814 header[148..156].copy_from_slice(checksum_str.as_bytes());
1815
1816 header
1817 }
1818
1819 fn make_gnu_long_name(name: &[u8]) -> Vec<u8> {
1823 let content_size = name.len() + 1; let padded = content_size.next_multiple_of(HEADER_SIZE);
1826 let header = make_header(b"././@LongLink", content_size as u64, b'L');
1827
1828 let mut result = Vec::with_capacity(HEADER_SIZE + padded);
1829 result.extend_from_slice(&header);
1830 result.extend_from_slice(name);
1831 result.push(0); result.extend(zeroes(padded - content_size));
1833
1834 result
1835 }
1836
1837 fn make_gnu_long_link(link: &[u8]) -> Vec<u8> {
1841 let content_size = link.len() + 1; let padded = content_size.next_multiple_of(HEADER_SIZE);
1843 let header = make_header(b"././@LongLink", content_size as u64, b'K');
1844
1845 let mut result = Vec::with_capacity(HEADER_SIZE + padded);
1846 result.extend_from_slice(&header);
1847 result.extend_from_slice(link);
1848 result.push(0); result.extend(zeroes(padded - content_size));
1850
1851 result
1852 }
1853
1854 fn make_pax_entry(name: &[u8], type_flag: u8, entries: &[(&str, &[u8])]) -> Vec<u8> {
1856 use crate::builder::DecU64;
1857
1858 let mut content = Vec::new();
1860 for (key, value) in entries {
1861 let rest_len = 3 + key.len() + value.len();
1863 let mut len_len = 1;
1864 let mut max_len = 10;
1865 while rest_len + len_len >= max_len {
1866 len_len += 1;
1867 max_len *= 10;
1868 }
1869 let total_len = rest_len + len_len;
1870 let len_dec = DecU64::new(total_len as u64);
1871 content.extend_from_slice(len_dec.as_bytes());
1872 content.push(b' ');
1873 content.extend_from_slice(key.as_bytes());
1874 content.push(b'=');
1875 content.extend_from_slice(value);
1876 content.push(b'\n');
1877 }
1878
1879 let content_size = content.len();
1880 let header = make_header(name, content_size as u64, type_flag);
1881
1882 let padded = content_size.next_multiple_of(HEADER_SIZE);
1883 let mut result = Vec::with_capacity(HEADER_SIZE + padded);
1884 result.extend_from_slice(&header);
1885 result.extend_from_slice(&content);
1886 result.extend(zeroes(padded - content_size));
1887
1888 result
1889 }
1890
1891 fn make_pax_header(entries: &[(&str, &[u8])]) -> Vec<u8> {
1892 make_pax_entry(b"PaxHeader/file", b'x', entries)
1893 }
1894
1895 fn make_pax_global_header(entries: &[(&str, &[u8])]) -> Vec<u8> {
1896 make_pax_entry(b"pax_global_header", b'g', entries)
1897 }
1898
1899 fn zeroes(n: usize) -> impl Iterator<Item = u8> {
1901 std::iter::repeat_n(0u8, n)
1902 }
1903
1904 #[test]
1909 fn test_parser_gnu_long_name() {
1910 let long_name =
1912 "very/long/path/that/exceeds/one/hundred/bytes/".to_string() + &"x".repeat(60);
1913 assert!(long_name.len() > 100);
1914
1915 let mut archive = Vec::new();
1916 archive.extend(make_gnu_long_name(long_name.as_bytes()));
1917 archive.extend_from_slice(&make_header(b"placeholder", 5, b'0'));
1918 let mut content_block = [0u8; 512];
1920 content_block[0..5].copy_from_slice(b"hello");
1921 archive.extend_from_slice(&content_block);
1922 archive.extend(zeroes(1024));
1923
1924 let mut parser = Parser::new(Limits::default());
1925 let event = parser.parse(&archive).unwrap();
1926
1927 let consumed = match &event {
1929 ParseEvent::Entry { consumed, entry } => {
1930 assert!(*consumed > 512);
1931 assert_eq!(entry.path.as_ref(), long_name.as_bytes());
1932 assert_eq!(entry.size, 5);
1933 assert!(entry.is_file());
1934 *consumed
1935 }
1936 other => panic!("Expected Entry, got {:?}", other),
1937 };
1938
1939 let remaining = &archive[consumed + 512..];
1941 let event = parser.parse(remaining).unwrap();
1942 assert!(matches!(event, ParseEvent::End { .. }));
1943 }
1944
1945 #[test]
1950 fn test_parser_gnu_long_link() {
1951 let long_target = "/some/very/long/symlink/target/path/".to_string() + &"t".repeat(80);
1953 assert!(long_target.len() > 100);
1954
1955 let mut archive = Vec::new();
1956 archive.extend(make_gnu_long_link(long_target.as_bytes()));
1957 archive.extend_from_slice(&make_link_header(b"mylink", b"placeholder", b'2'));
1959 archive.extend(zeroes(1024));
1960
1961 let mut parser = Parser::new(Limits::default());
1962 let event = parser.parse(&archive).unwrap();
1963
1964 let consumed = match &event {
1965 ParseEvent::Entry { consumed, entry } => {
1966 assert_eq!(entry.path.as_ref(), b"mylink");
1967 assert!(entry.is_symlink());
1968 assert_eq!(
1969 entry.link_target.as_ref().unwrap().as_ref(),
1970 long_target.as_bytes()
1971 );
1972 *consumed
1973 }
1974 other => panic!("Expected Entry, got {:?}", other),
1975 };
1976
1977 let remaining = &archive[consumed..];
1978 let event = parser.parse(remaining).unwrap();
1979 assert!(matches!(event, ParseEvent::End { .. }));
1980 }
1981
1982 #[test]
1987 fn test_parser_pax_path_override() {
1988 let pax_path = "pax/overridden/path/to/file.txt";
1990
1991 let mut archive = Vec::new();
1992 archive.extend(make_pax_header(&[("path", pax_path.as_bytes())]));
1993 archive.extend_from_slice(&make_header(b"original.txt", 0, b'0'));
1994 archive.extend(zeroes(1024));
1995
1996 let mut parser = Parser::new(Limits::default());
1997 let event = parser.parse(&archive).unwrap();
1998
1999 match event {
2000 ParseEvent::Entry { entry, .. } => {
2001 assert_eq!(entry.path.as_ref(), pax_path.as_bytes());
2002 }
2003 other => panic!("Expected Entry, got {:?}", other),
2004 }
2005 }
2006
2007 #[test]
2008 fn test_parser_pax_size_override() {
2009 let mut archive = Vec::new();
2011 archive.extend(make_pax_header(&[("size", b"999")]));
2012 archive.extend_from_slice(&make_header(b"file.txt", 5, b'0'));
2014 archive.extend(zeroes(1024)); let mut parser = Parser::new(Limits::default());
2018 let event = parser.parse(&archive).unwrap();
2019
2020 match event {
2021 ParseEvent::Entry { entry, .. } => {
2022 assert_eq!(entry.size, 999);
2023 }
2024 other => panic!("Expected Entry, got {:?}", other),
2025 }
2026 }
2027
2028 #[test]
2029 fn test_parser_pax_metadata() {
2030 let mut archive = Vec::new();
2032 archive.extend(make_pax_header(&[
2033 ("uid", b"65534"),
2034 ("gid", b"65535"),
2035 ("mtime", b"1700000000.123456789"),
2036 ]));
2037 archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2038 archive.extend(zeroes(1024));
2039
2040 let mut parser = Parser::new(Limits::default());
2041 let event = parser.parse(&archive).unwrap();
2042
2043 match event {
2044 ParseEvent::Entry { entry, .. } => {
2045 assert_eq!(entry.uid, 65534);
2046 assert_eq!(entry.gid, 65535);
2047 assert_eq!(entry.mtime, 1700000000);
2049 }
2050 other => panic!("Expected Entry, got {:?}", other),
2051 }
2052 }
2053
2054 #[test]
2055 fn test_parser_pax_xattr() {
2056 let mut archive = Vec::new();
2058 archive.extend(make_pax_header(&[
2059 ("SCHILY.xattr.user.test", b"test_value"),
2060 (
2061 "SCHILY.xattr.security.selinux",
2062 b"system_u:object_r:unlabeled_t:s0",
2063 ),
2064 ]));
2065 archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2066 archive.extend(zeroes(1024));
2067
2068 let mut parser = Parser::new(Limits::default());
2069 let event = parser.parse(&archive).unwrap();
2070
2071 match event {
2072 ParseEvent::Entry { entry, .. } => {
2073 assert_eq!(entry.xattrs.len(), 2);
2074
2075 assert_eq!(entry.xattrs[0].0.as_ref(), b"user.test");
2077 assert_eq!(entry.xattrs[0].1.as_ref(), b"test_value");
2078
2079 assert_eq!(entry.xattrs[1].0.as_ref(), b"security.selinux");
2080 assert_eq!(
2081 entry.xattrs[1].1.as_ref(),
2082 b"system_u:object_r:unlabeled_t:s0"
2083 );
2084 }
2085 other => panic!("Expected Entry, got {:?}", other),
2086 }
2087 }
2088
2089 #[test]
2090 fn test_parser_pax_raw_bytes_preserved() {
2091 let mut archive = Vec::new();
2095 archive.extend(make_pax_header(&[
2096 ("path", b"custom/path.txt"),
2097 ("SCHILY.xattr.user.key", b"val"),
2098 ("myfancykey", b"myfancyvalue"),
2099 ]));
2100 archive.extend_from_slice(&make_header(b"orig.txt", 0, b'0'));
2101 archive.extend(zeroes(1024));
2102
2103 let mut parser = Parser::new(Limits::default());
2104 let event = parser.parse(&archive).unwrap();
2105
2106 match event {
2107 ParseEvent::Entry { entry, .. } => {
2108 assert_eq!(entry.path.as_ref(), b"custom/path.txt");
2110 assert_eq!(entry.xattrs.len(), 1);
2111
2112 let raw = entry.pax.expect("pax should be Some");
2114 let exts = PaxExtensions::new(raw);
2115 let keys: Vec<&str> = exts
2116 .filter_map(|e| e.ok())
2117 .filter_map(|e| e.key().ok())
2118 .collect();
2119 assert_eq!(keys, &["path", "SCHILY.xattr.user.key", "myfancykey"]);
2120 }
2121 other => panic!("Expected Entry, got {:?}", other),
2122 }
2123 }
2124
2125 #[test]
2126 fn test_parser_no_pax_means_none() {
2127 let mut archive = Vec::new();
2129 archive.extend_from_slice(&make_header(b"plain.txt", 0, b'0'));
2130 archive.extend(zeroes(1024));
2131
2132 let mut parser = Parser::new(Limits::default());
2133 let event = parser.parse(&archive).unwrap();
2134
2135 match event {
2136 ParseEvent::Entry { entry, .. } => {
2137 assert!(entry.pax.is_none());
2138 }
2139 other => panic!("Expected Entry, got {:?}", other),
2140 }
2141 }
2142
2143 #[test]
2144 fn test_parser_gnu_long_name_no_pax() {
2145 let long_name = "long/path/".to_string() + &"x".repeat(100);
2147 let mut archive = Vec::new();
2148 archive.extend(make_gnu_long_name(long_name.as_bytes()));
2149 archive.extend_from_slice(&make_header(b"short", 0, b'0'));
2150 archive.extend(zeroes(1024));
2151
2152 let mut parser = Parser::new(Limits::default());
2153 let event = parser.parse(&archive).unwrap();
2154
2155 match event {
2156 ParseEvent::Entry { entry, .. } => {
2157 assert_eq!(entry.path.as_ref(), long_name.as_bytes());
2158 assert!(entry.pax.is_none());
2159 }
2160 other => panic!("Expected Entry, got {:?}", other),
2161 }
2162 }
2163
2164 #[test]
2165 fn test_parser_pax_linkpath() {
2166 let pax_linkpath = "/a/very/long/symlink/target/from/pax";
2168
2169 let mut archive = Vec::new();
2170 archive.extend(make_pax_header(&[("linkpath", pax_linkpath.as_bytes())]));
2171 archive.extend_from_slice(&make_link_header(b"mylink", b"short", b'2'));
2172 archive.extend(zeroes(1024));
2173
2174 let mut parser = Parser::new(Limits::default());
2175 let event = parser.parse(&archive).unwrap();
2176
2177 match event {
2178 ParseEvent::Entry { entry, .. } => {
2179 assert!(entry.is_symlink());
2180 assert_eq!(
2181 entry.link_target.as_ref().unwrap().as_ref(),
2182 pax_linkpath.as_bytes()
2183 );
2184 }
2185 other => panic!("Expected Entry, got {:?}", other),
2186 }
2187 }
2188
2189 #[test]
2194 fn test_parser_global_pax_header() {
2195 let mut archive = Vec::new();
2198 archive.extend(make_pax_global_header(&[
2199 ("mtime", b"1700000000"),
2200 (
2201 "SCHILY.xattr.security.selinux",
2202 b"system_u:object_r:default_t:s0",
2203 ),
2204 ]));
2205 archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2207 archive.extend(zeroes(1024));
2208
2209 let mut parser = Parser::new(Limits::default());
2210
2211 let event = parser.parse(&archive).unwrap();
2213 let consumed = match &event {
2214 ParseEvent::GlobalExtensions { consumed, pax_data } => {
2215 let exts = PaxExtensions::new(pax_data);
2217 let keys: Vec<&str> = exts
2218 .filter_map(|e| e.ok())
2219 .filter_map(|e| e.key().ok())
2220 .collect();
2221 assert_eq!(keys, &["mtime", "SCHILY.xattr.security.selinux"]);
2222 *consumed
2223 }
2224 other => panic!("Expected GlobalExtensions, got {:?}", other),
2225 };
2226
2227 let event = parser.parse(&archive[consumed..]).unwrap();
2229 match event {
2230 ParseEvent::Entry { entry, .. } => {
2231 assert_eq!(entry.path_lossy(), "file.txt");
2232 assert!(entry.pax.is_none());
2234 }
2235 other => panic!("Expected Entry, got {:?}", other),
2236 }
2237 }
2238
2239 #[test]
2240 fn test_parser_global_pax_header_need_data() {
2241 let header = make_header(b"pax_global_header", 100, b'g');
2243
2244 let mut parser = Parser::new(Limits::default());
2245 let event = parser.parse(&header).unwrap();
2246
2247 match event {
2248 ParseEvent::NeedData { min_bytes } => {
2249 assert_eq!(min_bytes, 1024); }
2251 other => panic!("Expected NeedData, got {:?}", other),
2252 }
2253 }
2254
2255 #[test]
2256 fn test_parser_global_pax_header_too_large() {
2257 let large_value = "x".repeat(1000);
2259
2260 let mut archive = Vec::new();
2261 archive.extend(make_pax_global_header(&[(
2262 "comment",
2263 large_value.as_bytes(),
2264 )]));
2265 archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2266 archive.extend(zeroes(1024));
2267
2268 let limits = Limits {
2269 max_metadata_size: 100,
2270 ..Default::default()
2271 };
2272 let mut parser = Parser::new(limits);
2273 let result = parser.parse(&archive);
2274
2275 assert!(matches!(result, Err(ParseError::MetadataTooLarge { .. })));
2276 }
2277
2278 #[test]
2279 fn test_parser_multiple_global_pax_headers() {
2280 let mut archive = Vec::new();
2284 archive.extend(make_pax_global_header(&[("comment", b"first")]));
2285 archive.extend(make_pax_global_header(&[("comment", b"second")]));
2286 archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2287 archive.extend(zeroes(1024));
2288
2289 let mut parser = Parser::new(Limits::default());
2290
2291 let event = parser.parse(&archive).unwrap();
2293 let consumed1 = match &event {
2294 ParseEvent::GlobalExtensions { consumed, pax_data } => {
2295 let exts: Vec<_> = PaxExtensions::new(pax_data)
2296 .filter_map(|e| e.ok())
2297 .collect();
2298 assert_eq!(exts[0].value_bytes(), b"first");
2299 *consumed
2300 }
2301 other => panic!("Expected GlobalExtensions, got {:?}", other),
2302 };
2303
2304 let event = parser.parse(&archive[consumed1..]).unwrap();
2306 let consumed2 = match &event {
2307 ParseEvent::GlobalExtensions { consumed, pax_data } => {
2308 let exts: Vec<_> = PaxExtensions::new(pax_data)
2309 .filter_map(|e| e.ok())
2310 .collect();
2311 assert_eq!(exts[0].value_bytes(), b"second");
2312 *consumed
2313 }
2314 other => panic!("Expected GlobalExtensions, got {:?}", other),
2315 };
2316
2317 let event = parser.parse(&archive[consumed1 + consumed2..]).unwrap();
2319 assert!(matches!(event, ParseEvent::Entry { .. }));
2320 }
2321
2322 #[test]
2323 fn test_parser_global_pax_does_not_interfere_with_local_pax() {
2324 let mut archive = Vec::new();
2327 archive.extend(make_pax_global_header(&[("mtime", b"1000000000")]));
2328 archive.extend(make_pax_header(&[("path", b"overridden.txt")]));
2329 archive.extend_from_slice(&make_header(b"original.txt", 0, b'0'));
2330 archive.extend(zeroes(1024));
2331
2332 let mut parser = Parser::new(Limits::default());
2333
2334 let event = parser.parse(&archive).unwrap();
2336 let consumed = match &event {
2337 ParseEvent::GlobalExtensions { consumed, .. } => *consumed,
2338 other => panic!("Expected GlobalExtensions, got {:?}", other),
2339 };
2340
2341 let event = parser.parse(&archive[consumed..]).unwrap();
2343 match event {
2344 ParseEvent::Entry { entry, .. } => {
2345 assert_eq!(entry.path.as_ref(), b"overridden.txt");
2347 assert!(entry.pax.is_some());
2348 }
2349 other => panic!("Expected Entry, got {:?}", other),
2350 }
2351 }
2352
2353 #[test]
2358 fn test_parser_orphaned_metadata() {
2359 let mut archive = Vec::new();
2361 archive.extend(make_gnu_long_name(b"some/long/name/here"));
2362 archive.extend(zeroes(1024));
2363
2364 let mut parser = Parser::new(Limits::default());
2365 let result = parser.parse(&archive);
2366
2367 assert!(matches!(result, Err(ParseError::OrphanedMetadata)));
2368 }
2369
2370 #[test]
2371 fn test_parser_orphaned_pax_metadata() {
2372 let mut archive = Vec::new();
2374 archive.extend(make_pax_header(&[("path", b"test")]));
2375 archive.extend(zeroes(1024));
2376
2377 let mut parser = Parser::new(Limits::default());
2378 let result = parser.parse(&archive);
2379
2380 assert!(matches!(result, Err(ParseError::OrphanedMetadata)));
2381 }
2382
2383 #[test]
2384 fn test_parser_duplicate_gnu_long_name() {
2385 let mut archive = Vec::new();
2387 archive.extend(make_gnu_long_name(b"first/long/name"));
2388 archive.extend(make_gnu_long_name(b"second/long/name"));
2389 archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2390 archive.extend(zeroes(1024));
2391
2392 let mut parser = Parser::new(Limits::default());
2393 let result = parser.parse(&archive);
2394
2395 assert!(matches!(result, Err(ParseError::DuplicateGnuLongName)));
2396 }
2397
2398 #[test]
2399 fn test_parser_duplicate_gnu_long_link() {
2400 let mut archive = Vec::new();
2402 archive.extend(make_gnu_long_link(b"first/long/target"));
2403 archive.extend(make_gnu_long_link(b"second/long/target"));
2404 archive.extend_from_slice(&make_link_header(b"link", b"x", b'2'));
2405 archive.extend(zeroes(1024));
2406
2407 let mut parser = Parser::new(Limits::default());
2408 let result = parser.parse(&archive);
2409
2410 assert!(matches!(result, Err(ParseError::DuplicateGnuLongLink)));
2411 }
2412
2413 #[test]
2414 fn test_parser_duplicate_pax_header() {
2415 let mut archive = Vec::new();
2417 archive.extend(make_pax_header(&[("path", b"first")]));
2418 archive.extend(make_pax_header(&[("path", b"second")]));
2419 archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2420 archive.extend(zeroes(1024));
2421
2422 let mut parser = Parser::new(Limits::default());
2423 let result = parser.parse(&archive);
2424
2425 assert!(matches!(result, Err(ParseError::DuplicatePaxHeader)));
2426 }
2427
2428 #[test]
2433 fn test_parser_combined_gnu_pax() {
2434 let gnu_name = "gnu/long/name/".to_string() + &"g".repeat(100);
2436 let pax_path = "pax/should/win/file.txt";
2437
2438 let mut archive = Vec::new();
2439 archive.extend(make_gnu_long_name(gnu_name.as_bytes()));
2440 archive.extend(make_pax_header(&[("path", pax_path.as_bytes())]));
2441 archive.extend_from_slice(&make_header(b"header.txt", 0, b'0'));
2442 archive.extend(zeroes(1024));
2443
2444 let mut parser = Parser::new(Limits::default());
2445 let event = parser.parse(&archive).unwrap();
2446
2447 match event {
2448 ParseEvent::Entry { entry, .. } => {
2449 assert_eq!(entry.path.as_ref(), pax_path.as_bytes());
2451 }
2452 other => panic!("Expected Entry, got {:?}", other),
2453 }
2454 }
2455
2456 #[test]
2457 fn test_parser_gnu_long_name_and_link_combined() {
2458 let long_name = "long/symlink/name/".to_string() + &"n".repeat(100);
2460 let long_target = "long/target/path/".to_string() + &"t".repeat(100);
2461
2462 let mut archive = Vec::new();
2463 archive.extend(make_gnu_long_name(long_name.as_bytes()));
2464 archive.extend(make_gnu_long_link(long_target.as_bytes()));
2465 archive.extend_from_slice(&make_link_header(b"short", b"short", b'2'));
2466 archive.extend(zeroes(1024));
2467
2468 let mut parser = Parser::new(Limits::default());
2469 let event = parser.parse(&archive).unwrap();
2470
2471 match event {
2472 ParseEvent::Entry { entry, .. } => {
2473 assert_eq!(entry.path.as_ref(), long_name.as_bytes());
2474 assert_eq!(
2475 entry.link_target.as_ref().unwrap().as_ref(),
2476 long_target.as_bytes()
2477 );
2478 assert!(entry.is_symlink());
2479 }
2480 other => panic!("Expected Entry, got {:?}", other),
2481 }
2482 }
2483
2484 #[test]
2485 fn test_parser_pax_multiple_entries() {
2486 let mut archive = Vec::new();
2488
2489 archive.extend(make_pax_header(&[("path", b"first/file.txt")]));
2491 archive.extend_from_slice(&make_header(b"f1", 5, b'0'));
2492 let mut content1 = [0u8; 512];
2493 content1[0..5].copy_from_slice(b"hello");
2494 archive.extend_from_slice(&content1);
2495
2496 archive.extend(make_pax_header(&[("path", b"second/file.txt")]));
2498 archive.extend_from_slice(&make_header(b"f2", 5, b'0'));
2499 let mut content2 = [0u8; 512];
2500 content2[0..5].copy_from_slice(b"world");
2501 archive.extend_from_slice(&content2);
2502
2503 archive.extend(zeroes(1024));
2504
2505 let mut parser = Parser::new(Limits::default());
2506
2507 let event1 = parser.parse(&archive).unwrap();
2509 let consumed1 = match &event1 {
2510 ParseEvent::Entry { consumed, entry } => {
2511 assert_eq!(entry.path.as_ref(), b"first/file.txt");
2512 assert_eq!(entry.size, 5);
2513 *consumed
2514 }
2515 other => panic!("Expected Entry, got {:?}", other),
2516 };
2517
2518 let offset = consumed1 + 512;
2520 let event2 = parser.parse(&archive[offset..]).unwrap();
2521 let consumed2 = match &event2 {
2522 ParseEvent::Entry { consumed, entry } => {
2523 assert_eq!(entry.path.as_ref(), b"second/file.txt");
2524 assert_eq!(entry.size, 5);
2525 *consumed
2526 }
2527 other => panic!("Expected Entry, got {:?}", other),
2528 };
2529
2530 let final_offset = offset + consumed2 + 512;
2532 let event3 = parser.parse(&archive[final_offset..]).unwrap();
2533 assert!(matches!(event3, ParseEvent::End { .. }));
2534 }
2535
2536 #[test]
2537 fn test_parser_pax_uname_gname() {
2538 let mut archive = Vec::new();
2540 archive.extend(make_pax_header(&[
2541 ("uname", b"testuser"),
2542 ("gname", b"testgroup"),
2543 ]));
2544 archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2545 archive.extend(zeroes(1024));
2546
2547 let mut parser = Parser::new(Limits::default());
2548 let event = parser.parse(&archive).unwrap();
2549
2550 match event {
2551 ParseEvent::Entry { entry, .. } => {
2552 assert_eq!(entry.uname.as_ref().unwrap().as_ref(), b"testuser");
2553 assert_eq!(entry.gname.as_ref().unwrap().as_ref(), b"testgroup");
2554 }
2555 other => panic!("Expected Entry, got {:?}", other),
2556 }
2557 }
2558
2559 #[test]
2564 fn test_parser_gnu_long_too_large() {
2565 let long_name = "x".repeat(200);
2566
2567 let mut archive = Vec::new();
2568 archive.extend(make_gnu_long_name(long_name.as_bytes()));
2569 archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2570 archive.extend(zeroes(1024));
2571
2572 let limits = Limits {
2573 max_metadata_size: 100,
2574 ..Default::default()
2575 };
2576 let mut parser = Parser::new(limits);
2577 let result = parser.parse(&archive);
2578
2579 assert!(matches!(result, Err(ParseError::MetadataTooLarge { .. })));
2580 }
2581
2582 #[test]
2583 fn test_parser_pax_path_too_long() {
2584 let long_path = "x".repeat(200);
2585
2586 let mut archive = Vec::new();
2587 archive.extend(make_pax_header(&[("path", long_path.as_bytes())]));
2588 archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2589 archive.extend(zeroes(1024));
2590
2591 let limits = Limits {
2592 max_path_len: Some(100),
2593 ..Default::default()
2594 };
2595 let mut parser = Parser::new(limits);
2596 let result = parser.parse(&archive);
2597
2598 assert!(matches!(
2599 result,
2600 Err(ParseError::PathTooLong {
2601 len: 200,
2602 limit: 100
2603 })
2604 ));
2605 }
2606
2607 #[test]
2608 fn test_parser_pax_too_large() {
2609 let large_value = "x".repeat(1000);
2611
2612 let mut archive = Vec::new();
2613 archive.extend(make_pax_header(&[("path", large_value.as_bytes())]));
2614 archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2615 archive.extend(zeroes(1024));
2616
2617 let limits = Limits {
2618 max_metadata_size: 100,
2619 ..Default::default()
2620 };
2621 let mut parser = Parser::new(limits);
2622 let result = parser.parse(&archive);
2623
2624 assert!(matches!(result, Err(ParseError::MetadataTooLarge { .. })));
2625 }
2626
2627 #[test]
2632 fn test_parser_need_data_for_gnu_long_content() {
2633 let header = make_header(b"././@LongLink", 200, b'L');
2635
2636 let mut parser = Parser::new(Limits::default());
2637 let event = parser.parse(&header).unwrap();
2638
2639 match event {
2641 ParseEvent::NeedData { min_bytes } => {
2642 assert_eq!(min_bytes, 1024);
2643 }
2644 other => panic!("Expected NeedData, got {:?}", other),
2645 }
2646 }
2647
2648 #[test]
2649 fn test_parser_need_data_for_pax_content() {
2650 let header = make_header(b"PaxHeader/file", 100, b'x');
2652
2653 let mut parser = Parser::new(Limits::default());
2654 let event = parser.parse(&header).unwrap();
2655
2656 match event {
2658 ParseEvent::NeedData { min_bytes } => {
2659 assert_eq!(min_bytes, 1024);
2660 }
2661 other => panic!("Expected NeedData, got {:?}", other),
2662 }
2663 }
2664
2665 #[test]
2666 fn test_need_data_adjusted_through_extension_headers() {
2667 let long_name = "long/path/name/".to_string() + &"x".repeat(90);
2675 let gnu_entry = make_gnu_long_name(long_name.as_bytes());
2676 assert_eq!(gnu_entry.len(), 1024);
2678
2679 let mut parser = Parser::new(Limits::default());
2680 let event = parser.parse(&gnu_entry).unwrap();
2681
2682 match event {
2683 ParseEvent::NeedData { min_bytes } => {
2684 assert_eq!(
2687 min_bytes, 1536,
2688 "NeedData.min_bytes must account for bytes consumed by \
2689 extension headers (1024 + 512 = 1536)"
2690 );
2691 }
2692 other => panic!("Expected NeedData, got {:?}", other),
2693 }
2694 }
2695
2696 #[test]
2706 fn test_cve_2025_62518_pax_size_overrides_header() {
2707 let pax_entries: &[(&str, &[u8])] = &[("size", b"1024")];
2709 let pax_data = make_pax_header(pax_entries);
2710
2711 let file_header = make_header(b"nested.tar", 0, b'0'); let mut content = vec![0u8; 1024];
2718 content[0..9].copy_from_slice(b"MALICIOUS");
2720 content[156] = b'0'; let mut archive = Vec::new();
2724 archive.extend_from_slice(&pax_data);
2725 archive.extend_from_slice(&file_header);
2726 archive.extend_from_slice(&content);
2727 archive.extend(zeroes(1024));
2729
2730 let mut parser = Parser::new(Limits::default());
2732 let event = parser.parse(&archive).unwrap();
2733
2734 let consumed = match &event {
2735 ParseEvent::Entry { consumed, entry } => {
2736 assert_eq!(
2738 entry.size, 1024,
2739 "CVE-2025-62518: Parser MUST use PAX size (1024), not header size (0)"
2740 );
2741
2742 assert_eq!(entry.padded_size(), 1024, "Padded size must match PAX size");
2744
2745 assert_eq!(entry.path_lossy(), "nested.tar");
2747
2748 *consumed
2749 }
2750 other => panic!("Expected Entry, got {:?}", other),
2751 };
2752
2753 let remaining = &archive[consumed + 1024..]; let event = parser.parse(remaining).unwrap();
2756
2757 match event {
2758 ParseEvent::End { .. } => {
2759 }
2761 ParseEvent::Entry { entry, .. } => {
2762 panic!(
2763 "CVE-2025-62518 VULNERABLE: Parser found unexpected entry '{}' \
2764 because it used header size (0) instead of PAX size (1024)",
2765 entry.path_lossy()
2766 );
2767 }
2768 other => panic!("Expected End, got {:?}", other),
2769 }
2770 }
2771
2772 #[test]
2774 fn test_pax_size_affects_parser_state() {
2775 let pax_entries: &[(&str, &[u8])] = &[("size", b"512")];
2777 let pax_data = make_pax_header(pax_entries);
2778 let file_header = make_header(b"test.bin", 0, b'0');
2779
2780 let content = vec![0u8; 512];
2781 let mut archive = Vec::new();
2782 archive.extend_from_slice(&pax_data);
2783 archive.extend_from_slice(&file_header);
2784 archive.extend_from_slice(&content);
2785 archive.extend(zeroes(1024));
2786
2787 let mut parser = Parser::new(Limits::default());
2788
2789 let event = parser.parse(&archive).unwrap();
2791 let size = match event {
2792 ParseEvent::Entry { entry, .. } => entry.size,
2793 other => panic!("Expected Entry, got {:?}", other),
2794 };
2795
2796 assert_eq!(size, 512, "Entry size must reflect PAX override");
2797 }
2798
2799 fn make_archive_with_pax(key: &str, value: &[u8]) -> Vec<u8> {
2806 let mut archive = Vec::new();
2807 archive.extend(make_pax_header(&[(key, value)]));
2808 archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2809 archive.extend(zeroes(1024));
2810 archive
2811 }
2812
2813 #[test]
2814 fn test_strict_rejects_invalid_pax_uid() {
2815 let archive = make_archive_with_pax("uid", b"notanumber");
2816 let mut parser = Parser::new(Limits::default());
2817 let err = parser.parse(&archive).unwrap_err();
2818 assert!(
2819 matches!(err, ParseError::InvalidPaxValue { key: "uid", .. }),
2820 "expected InvalidPaxValue for uid, got {err:?}"
2821 );
2822 }
2823
2824 #[test]
2825 fn test_strict_rejects_invalid_pax_size() {
2826 let archive = make_archive_with_pax("size", b"xyz");
2827 let mut parser = Parser::new(Limits::default());
2828 let err = parser.parse(&archive).unwrap_err();
2829 assert!(matches!(
2830 err,
2831 ParseError::InvalidPaxValue { key: "size", .. }
2832 ));
2833 }
2834
2835 #[test]
2836 fn test_strict_rejects_invalid_pax_gid() {
2837 let archive = make_archive_with_pax("gid", b"bad");
2838 let mut parser = Parser::new(Limits::default());
2839 let err = parser.parse(&archive).unwrap_err();
2840 assert!(matches!(
2841 err,
2842 ParseError::InvalidPaxValue { key: "gid", .. }
2843 ));
2844 }
2845
2846 #[test]
2847 fn test_strict_rejects_invalid_pax_mtime() {
2848 let archive = make_archive_with_pax("mtime", b"nottime");
2849 let mut parser = Parser::new(Limits::default());
2850 let err = parser.parse(&archive).unwrap_err();
2851 assert!(matches!(
2852 err,
2853 ParseError::InvalidPaxValue { key: PAX_MTIME, .. }
2854 ));
2855 }
2856
2857 #[test]
2858 fn test_lenient_ignores_invalid_pax_uid() {
2859 let archive = make_archive_with_pax("uid", b"notanumber");
2860 let mut parser = Parser::new(Limits::default());
2861 parser.set_ignore_pax_errors(true);
2862 let event = parser.parse(&archive).unwrap();
2863 match event {
2864 ParseEvent::Entry { entry, .. } => {
2865 assert_eq!(entry.uid, 1000);
2867 }
2868 other => panic!("Expected Entry, got {other:?}"),
2869 }
2870 }
2871
2872 #[test]
2873 fn test_lenient_ignores_invalid_pax_size() {
2874 let archive = make_archive_with_pax("size", b"xyz");
2875 let mut parser = Parser::new(Limits::default());
2876 parser.set_ignore_pax_errors(true);
2877 let event = parser.parse(&archive).unwrap();
2878 match event {
2879 ParseEvent::Entry { entry, .. } => {
2880 assert_eq!(entry.size, 0);
2882 }
2883 other => panic!("Expected Entry, got {other:?}"),
2884 }
2885 }
2886
2887 #[test]
2888 fn test_strict_accepts_valid_pax_values() {
2889 let mut archive = Vec::new();
2890 archive.extend(make_pax_header(&[
2891 ("uid", b"2000"),
2892 ("gid", b"3000"),
2893 ("size", b"42"),
2894 ("mtime", b"1700000000"),
2895 ]));
2896 archive.extend_from_slice(&make_header(b"file.txt", 0, b'0'));
2897 archive.extend(zeroes(1024));
2898
2899 let mut parser = Parser::new(Limits::default());
2900 let event = parser.parse(&archive).unwrap();
2901 match event {
2902 ParseEvent::Entry { entry, .. } => {
2903 assert_eq!(entry.uid, 2000);
2904 assert_eq!(entry.gid, 3000);
2905 assert_eq!(entry.size, 42);
2906 assert_eq!(entry.mtime, 1700000000);
2907 }
2908 other => panic!("Expected Entry, got {other:?}"),
2909 }
2910 }
2911
2912 #[test]
2913 fn test_strict_accepts_fractional_mtime() {
2914 let archive = make_archive_with_pax("mtime", b"1234567890.123456");
2915 let mut parser = Parser::new(Limits::default());
2916 let event = parser.parse(&archive).unwrap();
2917 match event {
2918 ParseEvent::Entry { entry, .. } => {
2919 assert_eq!(entry.mtime, 1234567890);
2920 }
2921 other => panic!("Expected Entry, got {other:?}"),
2922 }
2923 }
2924
2925 fn encode_octal_12(value: u64) -> [u8; 12] {
2931 let s = format!("{value:011o}\0");
2932 let mut field = [0u8; 12];
2933 field.copy_from_slice(s.as_bytes());
2934 field
2935 }
2936
2937 fn make_gnu_sparse_header(
2944 name: &[u8],
2945 entries: &[(u64, u64)],
2946 on_disk_size: u64,
2947 real_size: u64,
2948 is_extended: bool,
2949 ) -> [u8; HEADER_SIZE] {
2950 assert!(entries.len() <= 4, "max 4 inline sparse descriptors");
2951
2952 let mut header = [0u8; HEADER_SIZE];
2953
2954 let name_len = name.len().min(100);
2956 header[0..name_len].copy_from_slice(&name[..name_len]);
2957
2958 header[100..107].copy_from_slice(b"0000644");
2960 header[108..115].copy_from_slice(b"0001750");
2962 header[116..123].copy_from_slice(b"0001750");
2964
2965 let size_str = format!("{on_disk_size:011o}");
2967 header[124..135].copy_from_slice(size_str.as_bytes());
2968
2969 header[136..147].copy_from_slice(b"14712345670");
2971
2972 header[156] = b'S';
2974
2975 header[257..263].copy_from_slice(GNU_MAGIC);
2977 header[263..265].copy_from_slice(GNU_VERSION);
2979
2980 for (i, &(offset, length)) in entries.iter().enumerate() {
2982 let base = 386 + i * 24;
2983 header[base..base + 12].copy_from_slice(&encode_octal_12(offset));
2984 header[base + 12..base + 24].copy_from_slice(&encode_octal_12(length));
2985 }
2986
2987 header[482] = if is_extended { 1 } else { 0 };
2989
2990 let real_str = format!("{real_size:011o}");
2992 header[483..494].copy_from_slice(real_str.as_bytes());
2993
2994 let hdr = Header::from_bytes(&header);
2996 let checksum = hdr.compute_checksum();
2997 let checksum_str = format!("{checksum:06o}\0 ");
2998 header[148..156].copy_from_slice(checksum_str.as_bytes());
2999
3000 header
3001 }
3002
3003 fn make_gnu_ext_sparse(entries: &[(u64, u64)], is_extended: bool) -> [u8; HEADER_SIZE] {
3006 assert!(entries.len() <= 21, "max 21 descriptors per ext block");
3007
3008 let mut block = [0u8; HEADER_SIZE];
3009
3010 for (i, &(offset, length)) in entries.iter().enumerate() {
3011 let base = i * 24;
3012 block[base..base + 12].copy_from_slice(&encode_octal_12(offset));
3013 block[base + 12..base + 24].copy_from_slice(&encode_octal_12(length));
3014 }
3015
3016 block[504] = if is_extended { 1 } else { 0 };
3018
3019 block
3020 }
3021
3022 #[test]
3027 fn test_sparse_basic() {
3028 let header = make_gnu_sparse_header(
3031 b"sparse.txt",
3032 &[(0x1000, 5), (0x3000, 5)],
3033 10, 0x3005, false,
3036 );
3037
3038 let mut archive = Vec::new();
3039 archive.extend_from_slice(&header);
3040 let mut content = [0u8; HEADER_SIZE];
3042 content[0..5].copy_from_slice(b"hello");
3043 content[5..10].copy_from_slice(b"world");
3044 archive.extend_from_slice(&content);
3045 archive.extend(zeroes(1024)); let mut parser = Parser::new(Limits::default());
3048 let event = parser.parse(&archive).unwrap();
3049
3050 match event {
3051 ParseEvent::SparseEntry {
3052 consumed,
3053 entry,
3054 sparse_map,
3055 real_size,
3056 } => {
3057 assert_eq!(consumed, HEADER_SIZE);
3058 assert_eq!(entry.path_lossy(), "sparse.txt");
3059 assert_eq!(entry.size, 10);
3060 assert_eq!(real_size, 0x3005);
3061 assert_eq!(sparse_map.len(), 2);
3062 assert_eq!(
3063 sparse_map[0],
3064 SparseEntry {
3065 offset: 0x1000,
3066 length: 5
3067 }
3068 );
3069 assert_eq!(
3070 sparse_map[1],
3071 SparseEntry {
3072 offset: 0x3000,
3073 length: 5
3074 }
3075 );
3076 }
3077 other => panic!("Expected SparseEntry, got {other:?}"),
3078 }
3079 }
3080
3081 #[test]
3082 fn test_sparse_no_entries() {
3083 let header = make_gnu_sparse_header(b"empty_sparse.txt", &[], 0, 4096, false);
3085
3086 let mut archive = Vec::new();
3087 archive.extend_from_slice(&header);
3088 archive.extend(zeroes(1024));
3089
3090 let mut parser = Parser::new(Limits::default());
3091 let event = parser.parse(&archive).unwrap();
3092
3093 match event {
3094 ParseEvent::SparseEntry {
3095 sparse_map,
3096 real_size,
3097 entry,
3098 ..
3099 } => {
3100 assert!(sparse_map.is_empty());
3101 assert_eq!(real_size, 4096);
3102 assert_eq!(entry.size, 0);
3103 }
3104 other => panic!("Expected SparseEntry, got {other:?}"),
3105 }
3106 }
3107
3108 #[test]
3109 fn test_sparse_four_inline_entries() {
3110 let entries = [(0u64, 512), (1024, 512), (2048, 512), (3072, 512)];
3112 let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
3113 let real_size = 3072 + 512;
3114 let header = make_gnu_sparse_header(b"four.txt", &entries, on_disk, real_size, false);
3115
3116 let mut archive = Vec::new();
3117 archive.extend_from_slice(&header);
3118 archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3119 archive.extend(zeroes(1024));
3120
3121 let mut parser = Parser::new(Limits::default());
3122 let event = parser.parse(&archive).unwrap();
3123
3124 match event {
3125 ParseEvent::SparseEntry {
3126 sparse_map,
3127 real_size: rs,
3128 ..
3129 } => {
3130 assert_eq!(sparse_map.len(), 4);
3131 assert_eq!(rs, real_size);
3132 for (i, &(off, len)) in entries.iter().enumerate() {
3133 assert_eq!(sparse_map[i].offset, off);
3134 assert_eq!(sparse_map[i].length, len);
3135 }
3136 }
3137 other => panic!("Expected SparseEntry, got {other:?}"),
3138 }
3139 }
3140
3141 #[test]
3142 fn test_sparse_with_extension_block() {
3143 let inline_entries = [(0u64, 100), (512, 100), (1024, 100), (1536, 100)];
3145 let ext_entries = [(2048u64, 100), (2560, 100)];
3146 let on_disk: u64 = 600; let real_size = 2660; let header =
3150 make_gnu_sparse_header(b"extended.txt", &inline_entries, on_disk, real_size, true);
3151 let ext = make_gnu_ext_sparse(&ext_entries, false);
3152
3153 let mut archive = Vec::new();
3154 archive.extend_from_slice(&header);
3155 archive.extend_from_slice(&ext);
3156 archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3157 archive.extend(zeroes(1024));
3158
3159 let mut parser = Parser::new(Limits::default());
3160 let event = parser.parse(&archive).unwrap();
3161
3162 match event {
3163 ParseEvent::SparseEntry {
3164 consumed,
3165 sparse_map,
3166 real_size: rs,
3167 ..
3168 } => {
3169 assert_eq!(consumed, 2 * HEADER_SIZE);
3171 assert_eq!(rs, real_size);
3172 assert_eq!(sparse_map.len(), 6);
3173 assert_eq!(sparse_map[4].offset, 2048);
3174 assert_eq!(sparse_map[5].offset, 2560);
3175 }
3176 other => panic!("Expected SparseEntry, got {other:?}"),
3177 }
3178 }
3179
3180 #[test]
3181 fn test_sparse_multiple_extension_blocks() {
3182 let inline = [(0u64, 10), (100, 10), (200, 10), (300, 10)];
3184 let mut ext1_entries = Vec::new();
3185 for i in 0..21 {
3186 ext1_entries.push((400 + i * 100, 10u64));
3187 }
3188 let ext2_entries = [(2500u64, 10), (2600, 10), (2700, 10)];
3189 let on_disk = 28 * 10u64;
3190 let real_size = 2710;
3191
3192 let header = make_gnu_sparse_header(b"multi_ext.txt", &inline, on_disk, real_size, true);
3193 let ext1 = make_gnu_ext_sparse(&ext1_entries, true);
3194 let ext2 = make_gnu_ext_sparse(&ext2_entries, false);
3195
3196 let mut archive = Vec::new();
3197 archive.extend_from_slice(&header);
3198 archive.extend_from_slice(&ext1);
3199 archive.extend_from_slice(&ext2);
3200 archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3201 archive.extend(zeroes(1024));
3202
3203 let mut parser = Parser::new(Limits::default());
3204 let event = parser.parse(&archive).unwrap();
3205
3206 match event {
3207 ParseEvent::SparseEntry {
3208 consumed,
3209 sparse_map,
3210 real_size: rs,
3211 ..
3212 } => {
3213 assert_eq!(consumed, 3 * HEADER_SIZE);
3214 assert_eq!(rs, real_size);
3215 assert_eq!(sparse_map.len(), 28);
3216 }
3217 other => panic!("Expected SparseEntry, got {other:?}"),
3218 }
3219 }
3220
3221 #[test]
3222 fn test_sparse_need_data_for_extension() {
3223 let header = make_gnu_sparse_header(
3226 b"need_ext.txt",
3227 &[(0, 100)],
3228 100,
3229 100,
3230 true, );
3232
3233 let mut parser = Parser::new(Limits::default());
3234 let event = parser.parse(&header).unwrap();
3235
3236 match event {
3237 ParseEvent::NeedData { min_bytes } => {
3238 assert_eq!(min_bytes, 2 * HEADER_SIZE);
3239 }
3240 other => panic!("Expected NeedData, got {other:?}"),
3241 }
3242 }
3243
3244 #[test]
3245 fn test_sparse_need_data_chained_extensions() {
3246 let header = make_gnu_sparse_header(b"chain.txt", &[(0, 10)], 20, 20, true);
3248 let ext1 = make_gnu_ext_sparse(&[(10, 10)], true); let mut input = Vec::new();
3251 input.extend_from_slice(&header);
3252 input.extend_from_slice(&ext1);
3253
3254 let mut parser = Parser::new(Limits::default());
3255 let event = parser.parse(&input).unwrap();
3256
3257 match event {
3258 ParseEvent::NeedData { min_bytes } => {
3259 assert_eq!(min_bytes, 3 * HEADER_SIZE);
3260 }
3261 other => panic!("Expected NeedData, got {other:?}"),
3262 }
3263 }
3264
3265 #[test]
3266 fn test_sparse_not_gnu_header() {
3267 let header = make_header(b"bad_sparse.txt", 0, b'S');
3269 let mut archive = Vec::new();
3270 archive.extend_from_slice(&header);
3271 archive.extend(zeroes(1024));
3272
3273 let mut parser = Parser::new(Limits::default());
3274 let err = parser.parse(&archive).unwrap_err();
3275 assert!(matches!(err, ParseError::SparseNotGnu));
3276 }
3277
3278 #[test]
3279 fn test_sparse_too_many_entries() {
3280 let header = make_gnu_sparse_header(
3282 b"too_many.txt",
3283 &[(0, 10), (100, 10), (200, 10)],
3284 40,
3285 400,
3286 true,
3287 );
3288 let ext = make_gnu_ext_sparse(&[(300, 10)], false);
3290
3291 let mut archive = Vec::new();
3292 archive.extend_from_slice(&header);
3293 archive.extend_from_slice(&ext);
3294 archive.extend(zeroes(512));
3295 archive.extend(zeroes(1024));
3296
3297 let limits = Limits {
3298 max_sparse_entries: 3,
3299 ..Default::default()
3300 };
3301 let mut parser = Parser::new(limits);
3302 let err = parser.parse(&archive).unwrap_err();
3303 assert!(matches!(
3304 err,
3305 ParseError::TooManySparseEntries { count: 4, limit: 3 }
3306 ));
3307 }
3308
3309 #[test]
3310 fn test_sparse_with_gnu_long_name() {
3311 let long_name = "a/".to_string() + &"x".repeat(200);
3314
3315 let on_disk = 512u64;
3316 let real_size = 8192u64;
3317 let header = make_gnu_sparse_header(b"placeholder", &[(0, 512)], on_disk, real_size, false);
3318
3319 let mut archive = Vec::new();
3320 archive.extend(make_gnu_long_name(long_name.as_bytes()));
3321 archive.extend_from_slice(&header);
3322 archive.extend(zeroes(on_disk as usize)); archive.extend(zeroes(1024)); let mut parser = Parser::new(Limits::default());
3326 let event = parser.parse(&archive).unwrap();
3327
3328 match event {
3329 ParseEvent::SparseEntry {
3330 entry,
3331 sparse_map,
3332 real_size: rs,
3333 ..
3334 } => {
3335 assert_eq!(entry.path.as_ref(), long_name.as_bytes());
3336 assert_eq!(rs, real_size);
3337 assert_eq!(sparse_map.len(), 1);
3338 assert_eq!(sparse_map[0].length, 512);
3339 }
3340 other => panic!("Expected SparseEntry, got {other:?}"),
3341 }
3342 }
3343
3344 #[test]
3345 fn test_sparse_need_data_is_side_effect_free() {
3346 let header = make_gnu_sparse_header(b"retry.txt", &[(0, 100)], 200, 300, true);
3350 let ext = make_gnu_ext_sparse(&[(100, 100)], false);
3351
3352 let mut parser = Parser::new(Limits::default());
3353
3354 let event = parser.parse(&header).unwrap();
3356 assert!(matches!(event, ParseEvent::NeedData { .. }));
3357
3358 let mut full = Vec::new();
3360 full.extend_from_slice(&header);
3361 full.extend_from_slice(&ext);
3362 full.extend(zeroes(512)); full.extend(zeroes(1024)); let event = parser.parse(&full).unwrap();
3366 match event {
3367 ParseEvent::SparseEntry {
3368 consumed,
3369 sparse_map,
3370 ..
3371 } => {
3372 assert_eq!(consumed, 2 * HEADER_SIZE);
3373 assert_eq!(sparse_map.len(), 2);
3374 }
3375 other => panic!("Expected SparseEntry, got {other:?}"),
3376 }
3377 }
3378
3379 #[test]
3384 fn test_pax_sparse_v01_map() {
3385 let mut archive = Vec::new();
3387 archive.extend(make_pax_header(&[
3388 ("GNU.sparse.map", b"0,100,200,100,400,50"),
3389 ("GNU.sparse.realsize", b"450"),
3390 ("GNU.sparse.name", b"real_name.txt"),
3391 ]));
3392 archive.extend_from_slice(&make_header(b"placeholder.txt", 250, b'0'));
3394 archive.extend(zeroes(512)); archive.extend(zeroes(1024)); let mut parser = Parser::new(Limits::default());
3398 let event = parser.parse(&archive).unwrap();
3399
3400 match event {
3401 ParseEvent::SparseEntry {
3402 entry,
3403 sparse_map,
3404 real_size,
3405 ..
3406 } => {
3407 assert_eq!(entry.path.as_ref(), b"real_name.txt");
3408 assert_eq!(real_size, 450);
3409 assert_eq!(sparse_map.len(), 3);
3410 assert_eq!(
3411 sparse_map[0],
3412 SparseEntry {
3413 offset: 0,
3414 length: 100
3415 }
3416 );
3417 assert_eq!(
3418 sparse_map[1],
3419 SparseEntry {
3420 offset: 200,
3421 length: 100
3422 }
3423 );
3424 assert_eq!(
3425 sparse_map[2],
3426 SparseEntry {
3427 offset: 400,
3428 length: 50
3429 }
3430 );
3431 }
3432 other => panic!("Expected SparseEntry, got {other:?}"),
3433 }
3434 }
3435
3436 #[test]
3437 fn test_pax_sparse_v00_pairs() {
3438 let mut archive = Vec::new();
3440 archive.extend(make_pax_header(&[
3441 ("GNU.sparse.offset", b"0"),
3442 ("GNU.sparse.numbytes", b"100"),
3443 ("GNU.sparse.offset", b"1024"),
3444 ("GNU.sparse.numbytes", b"200"),
3445 ("GNU.sparse.realsize", b"1224"),
3446 ("GNU.sparse.name", b"v00_sparse.dat"),
3447 ]));
3448 archive.extend_from_slice(&make_header(b"placeholder", 300, b'0'));
3449 archive.extend(zeroes(512)); archive.extend(zeroes(1024)); let mut parser = Parser::new(Limits::default());
3453 let event = parser.parse(&archive).unwrap();
3454
3455 match event {
3456 ParseEvent::SparseEntry {
3457 entry,
3458 sparse_map,
3459 real_size,
3460 ..
3461 } => {
3462 assert_eq!(entry.path.as_ref(), b"v00_sparse.dat");
3463 assert_eq!(real_size, 1224);
3464 assert_eq!(sparse_map.len(), 2);
3465 assert_eq!(
3466 sparse_map[0],
3467 SparseEntry {
3468 offset: 0,
3469 length: 100
3470 }
3471 );
3472 assert_eq!(
3473 sparse_map[1],
3474 SparseEntry {
3475 offset: 1024,
3476 length: 200
3477 }
3478 );
3479 }
3480 other => panic!("Expected SparseEntry, got {other:?}"),
3481 }
3482 }
3483
3484 #[test]
3485 fn test_pax_sparse_v10_data_prefix() {
3486 let mut archive = Vec::new();
3488 archive.extend(make_pax_header(&[
3489 ("GNU.sparse.major", b"1"),
3490 ("GNU.sparse.minor", b"0"),
3491 ("GNU.sparse.realsize", b"2048"),
3492 ("GNU.sparse.name", b"v10_sparse.bin"),
3493 ]));
3494
3495 let sparse_data = b"2\n0\n100\n1024\n200\n";
3498 let on_disk_content = 300u64; let total_size = 512 + on_disk_content; archive.extend_from_slice(&make_header(b"placeholder", total_size, b'0'));
3502 let mut data_block = vec![0u8; 512];
3504 data_block[..sparse_data.len()].copy_from_slice(sparse_data);
3505 archive.extend_from_slice(&data_block);
3506 archive.extend(zeroes(on_disk_content.next_multiple_of(512) as usize));
3507 archive.extend(zeroes(1024)); let mut parser = Parser::new(Limits::default());
3510 let event = parser.parse(&archive).unwrap();
3511
3512 match event {
3513 ParseEvent::SparseEntry {
3514 consumed,
3515 entry,
3516 sparse_map,
3517 real_size,
3518 } => {
3519 assert_eq!(entry.path.as_ref(), b"v10_sparse.bin");
3520 assert_eq!(real_size, 2048);
3521 assert_eq!(sparse_map.len(), 2);
3522 assert_eq!(
3523 sparse_map[0],
3524 SparseEntry {
3525 offset: 0,
3526 length: 100
3527 }
3528 );
3529 assert_eq!(
3530 sparse_map[1],
3531 SparseEntry {
3532 offset: 1024,
3533 length: 200
3534 }
3535 );
3536 assert_eq!(entry.size, on_disk_content);
3538 let pax_hdr_size = archive.len()
3541 - HEADER_SIZE - 512 - on_disk_content.next_multiple_of(512) as usize
3544 - 1024; let expected_consumed = pax_hdr_size + HEADER_SIZE + 512;
3546 assert_eq!(consumed, expected_consumed);
3547 }
3548 other => panic!("Expected SparseEntry, got {other:?}"),
3549 }
3550 }
3551
3552 #[test]
3553 fn test_pax_sparse_v10_need_data() {
3554 let mut archive = Vec::new();
3556 archive.extend(make_pax_header(&[
3557 ("GNU.sparse.major", b"1"),
3558 ("GNU.sparse.minor", b"0"),
3559 ("GNU.sparse.realsize", b"100"),
3560 ("GNU.sparse.name", b"v10_need.txt"),
3561 ]));
3562
3563 archive.extend_from_slice(&make_header(b"placeholder", 512, b'0'));
3565
3566 let mut parser = Parser::new(Limits::default());
3567 let event = parser.parse(&archive).unwrap();
3568
3569 assert!(
3570 matches!(event, ParseEvent::NeedData { .. }),
3571 "Expected NeedData, got {event:?}"
3572 );
3573 }
3574
3575 #[test]
3576 fn test_pax_sparse_v01_odd_map_values() {
3577 let mut archive = Vec::new();
3579 archive.extend(make_pax_header(&[
3580 ("GNU.sparse.map", b"0,100,200"),
3581 ("GNU.sparse.realsize", b"300"),
3582 ]));
3583 archive.extend_from_slice(&make_header(b"file.txt", 100, b'0'));
3584 archive.extend(zeroes(512));
3585 archive.extend(zeroes(1024));
3586
3587 let mut parser = Parser::new(Limits::default());
3588 let err = parser.parse(&archive).unwrap_err();
3589 assert!(matches!(err, ParseError::InvalidPaxSparseMap(_)));
3590 }
3591
3592 #[test]
3593 fn test_pax_sparse_v10_too_many_entries() {
3594 let mut archive = Vec::new();
3595 archive.extend(make_pax_header(&[
3596 ("GNU.sparse.major", b"1"),
3597 ("GNU.sparse.minor", b"0"),
3598 ("GNU.sparse.realsize", b"100"),
3599 ("GNU.sparse.name", b"toomany.txt"),
3600 ]));
3601
3602 let sparse_data = b"1000\n";
3604 let total_size = 512u64;
3605 archive.extend_from_slice(&make_header(b"placeholder", total_size, b'0'));
3606 let mut data_block = vec![0u8; 512];
3607 data_block[..sparse_data.len()].copy_from_slice(sparse_data);
3608 archive.extend_from_slice(&data_block);
3609 archive.extend(zeroes(1024));
3610
3611 let limits = Limits {
3612 max_sparse_entries: 100,
3613 ..Default::default()
3614 };
3615 let mut parser = Parser::new(limits);
3616 let err = parser.parse(&archive).unwrap_err();
3617 assert!(
3618 matches!(
3619 err,
3620 ParseError::TooManySparseEntries {
3621 count: 1000,
3622 limit: 100
3623 }
3624 ),
3625 "got: {err:?}"
3626 );
3627 }
3628
3629 #[test]
3630 fn test_pax_sparse_without_version_is_v00() {
3631 let mut archive = Vec::new();
3634 archive.extend(make_pax_header(&[
3635 ("GNU.sparse.offset", b"0"),
3636 ("GNU.sparse.numbytes", b"50"),
3637 ("GNU.sparse.realsize", b"50"),
3638 ]));
3639 archive.extend_from_slice(&make_header(b"noversion.txt", 50, b'0'));
3640 archive.extend(zeroes(512)); archive.extend(zeroes(1024)); let mut parser = Parser::new(Limits::default());
3644 let event = parser.parse(&archive).unwrap();
3645
3646 match event {
3647 ParseEvent::SparseEntry {
3648 sparse_map,
3649 real_size,
3650 ..
3651 } => {
3652 assert_eq!(sparse_map.len(), 1);
3653 assert_eq!(
3654 sparse_map[0],
3655 SparseEntry {
3656 offset: 0,
3657 length: 50
3658 }
3659 );
3660 assert_eq!(real_size, 50);
3661 }
3662 other => panic!("Expected SparseEntry, got {other:?}"),
3663 }
3664 }
3665
3666 mod sparse_proptests {
3671 use super::*;
3672 use proptest::prelude::*;
3673
3674 fn sparse_map_strategy(max_entries: usize) -> impl Strategy<Value = Vec<(u64, u64)>> {
3677 proptest::collection::vec((0u64..0x10_000, 1u64..0x1000), 0..=max_entries).prop_map(
3678 |raw| {
3679 let mut entries: Vec<(u64, u64)> = Vec::new();
3682 let mut cursor = 0u64;
3683 for (gap, length) in raw {
3684 let offset = cursor.saturating_add(gap);
3685 entries.push((offset, length));
3686 cursor = offset.saturating_add(length);
3687 }
3688 entries
3689 },
3690 )
3691 }
3692
3693 proptest! {
3694 #[test]
3695 fn test_sparse_roundtrip_inline(
3696 entries in sparse_map_strategy(4),
3697 name_len in 1usize..50,
3698 ) {
3699 let name: Vec<u8> = (0..name_len).map(|i| b'a' + (i % 26) as u8).collect();
3700 let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
3701 let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
3702
3703 let header = make_gnu_sparse_header(
3704 &name,
3705 &entries,
3706 on_disk,
3707 real_size,
3708 false,
3709 );
3710
3711 let mut archive = Vec::new();
3712 archive.extend_from_slice(&header);
3713 archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3714 archive.extend(zeroes(1024));
3715
3716 let mut parser = Parser::new(Limits::default());
3717 let event = parser.parse(&archive).unwrap();
3718
3719 match event {
3720 ParseEvent::SparseEntry {
3721 consumed,
3722 sparse_map,
3723 real_size: rs,
3724 entry,
3725 ..
3726 } => {
3727 prop_assert_eq!(consumed, HEADER_SIZE);
3728 prop_assert_eq!(&entry.path[..], &name[..]);
3729 prop_assert_eq!(rs, real_size);
3730 prop_assert_eq!(sparse_map.len(), entries.len());
3731 for (i, &(off, len)) in entries.iter().enumerate() {
3732 prop_assert_eq!(sparse_map[i].offset, off);
3733 prop_assert_eq!(sparse_map[i].length, len);
3734 }
3735 }
3736 other => {
3737 return Err(proptest::test_runner::TestCaseError::fail(
3738 format!("Expected SparseEntry, got {other:?}")));
3739 }
3740 }
3741 }
3742
3743 #[test]
3744 fn test_sparse_roundtrip_extended(
3745 entries in sparse_map_strategy(25).prop_filter(
3747 "need >4 entries for extension",
3748 |e| e.len() > 4
3749 ),
3750 ) {
3751 let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
3752 let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
3753
3754 let (inline, rest) = entries.split_at(4);
3756 let header = make_gnu_sparse_header(
3757 b"proptest_ext.bin",
3758 inline,
3759 on_disk,
3760 real_size,
3761 !rest.is_empty(),
3762 );
3763
3764 let mut archive = Vec::new();
3765 archive.extend_from_slice(&header);
3766
3767 let chunks: Vec<&[(u64, u64)]> = rest.chunks(21).collect();
3769 for (i, chunk) in chunks.iter().enumerate() {
3770 let is_last = i == chunks.len() - 1;
3771 let ext = make_gnu_ext_sparse(chunk, !is_last);
3772 archive.extend_from_slice(&ext);
3773 }
3774
3775 archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3776 archive.extend(zeroes(1024));
3777
3778 let mut parser = Parser::new(Limits::default());
3779 let event = parser.parse(&archive).unwrap();
3780
3781 match event {
3782 ParseEvent::SparseEntry {
3783 consumed,
3784 sparse_map,
3785 real_size: rs,
3786 ..
3787 } => {
3788 let expected_blocks = 1 + chunks.len();
3789 prop_assert_eq!(consumed, expected_blocks * HEADER_SIZE);
3790 prop_assert_eq!(rs, real_size);
3791 prop_assert_eq!(sparse_map.len(), entries.len());
3792 for (i, &(off, len)) in entries.iter().enumerate() {
3793 prop_assert_eq!(sparse_map[i].offset, off);
3794 prop_assert_eq!(sparse_map[i].length, len);
3795 }
3796 }
3797 other => {
3798 return Err(proptest::test_runner::TestCaseError::fail(
3799 format!("Expected SparseEntry, got {other:?}")));
3800 }
3801 }
3802 }
3803
3804 #[test]
3805 fn test_sparse_need_data_then_retry(
3806 n_ext_entries in 1usize..10,
3807 ) {
3808 let inline = [(0u64, 100), (200, 100), (400, 100), (600, 100)];
3812 let ext_entries: Vec<(u64, u64)> = (0..n_ext_entries)
3813 .map(|i| (800 + i as u64 * 200, 100))
3814 .collect();
3815 let total = 4 + n_ext_entries;
3816 let on_disk = total as u64 * 100;
3817 let real_size = ext_entries.last().map(|(o, l)| o + l).unwrap_or(800);
3818
3819 let header = make_gnu_sparse_header(
3820 b"retry_ext.txt",
3821 &inline,
3822 on_disk,
3823 real_size,
3824 true,
3825 );
3826 let ext = make_gnu_ext_sparse(&ext_entries, false);
3827
3828 let mut parser = Parser::new(Limits::default());
3829
3830 let event = parser.parse(&header).unwrap();
3832 assert!(matches!(event, ParseEvent::NeedData { .. }));
3833
3834 let mut full = Vec::new();
3836 full.extend_from_slice(&header);
3837 full.extend_from_slice(&ext);
3838 full.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3839 full.extend(zeroes(1024));
3840
3841 let event = parser.parse(&full).unwrap();
3842 match event {
3843 ParseEvent::SparseEntry { sparse_map, .. } => {
3844 prop_assert_eq!(sparse_map.len(), total);
3845 }
3846 other => {
3847 return Err(proptest::test_runner::TestCaseError::fail(
3848 format!("Expected SparseEntry, got {other:?}")));
3849 }
3850 }
3851 }
3852
3853 #[test]
3858 fn test_pax_sparse_v00_roundtrip(
3859 entries in sparse_map_strategy(15),
3860 name_len in 1usize..50,
3861 ) {
3862 let name: Vec<u8> = (0..name_len).map(|i| b'a' + (i % 26) as u8).collect();
3863 let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
3864 let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
3865
3866 let mut pax_kv: Vec<(&str, Vec<u8>)> = Vec::new();
3867 for &(offset, length) in &entries {
3868 pax_kv.push(("GNU.sparse.offset", offset.to_string().into_bytes()));
3869 pax_kv.push(("GNU.sparse.numbytes", length.to_string().into_bytes()));
3870 }
3871 pax_kv.push(("GNU.sparse.realsize", real_size.to_string().into_bytes()));
3872 pax_kv.push(("GNU.sparse.name", name.clone()));
3873
3874 let pax_refs: Vec<(&str, &[u8])> =
3875 pax_kv.iter().map(|(k, v)| (*k, v.as_slice())).collect();
3876
3877 let mut archive = Vec::new();
3878 archive.extend(make_pax_header(&pax_refs));
3879 archive.extend_from_slice(&make_header(b"placeholder", on_disk, b'0'));
3880 archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3881 archive.extend(zeroes(1024));
3882
3883 let mut parser = Parser::new(Limits::default());
3884 let event = parser.parse(&archive).unwrap();
3885
3886 match event {
3887 ParseEvent::SparseEntry { sparse_map, real_size: rs, entry, .. } => {
3888 prop_assert_eq!(&entry.path[..], &name[..]);
3889 prop_assert_eq!(rs, real_size);
3890 prop_assert_eq!(sparse_map.len(), entries.len());
3891 for (i, &(off, len)) in entries.iter().enumerate() {
3892 prop_assert_eq!(sparse_map[i].offset, off);
3893 prop_assert_eq!(sparse_map[i].length, len);
3894 }
3895 }
3896 ParseEvent::Entry { .. } if entries.is_empty() => {}
3897 other => {
3898 return Err(proptest::test_runner::TestCaseError::fail(
3899 format!("Expected SparseEntry, got {other:?}")));
3900 }
3901 }
3902 }
3903
3904 #[test]
3905 fn test_pax_sparse_v01_roundtrip(
3906 entries in sparse_map_strategy(15),
3907 name_len in 1usize..50,
3908 ) {
3909 let name: Vec<u8> = (0..name_len).map(|i| b'a' + (i % 26) as u8).collect();
3910 let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
3911 let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
3912
3913 let map_str: String = entries
3914 .iter()
3915 .flat_map(|(o, l)| [o.to_string(), l.to_string()])
3916 .collect::<Vec<_>>()
3917 .join(",");
3918 let map_bytes = map_str.into_bytes();
3919 let rs_bytes = real_size.to_string().into_bytes();
3920
3921 let pax_refs: Vec<(&str, &[u8])> = vec![
3922 ("GNU.sparse.map", &map_bytes),
3923 ("GNU.sparse.realsize", &rs_bytes),
3924 ("GNU.sparse.name", &name),
3925 ];
3926
3927 let mut archive = Vec::new();
3928 archive.extend(make_pax_header(&pax_refs));
3929 archive.extend_from_slice(&make_header(b"placeholder", on_disk, b'0'));
3930 archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3931 archive.extend(zeroes(1024));
3932
3933 let mut parser = Parser::new(Limits::default());
3934 let event = parser.parse(&archive).unwrap();
3935
3936 match event {
3937 ParseEvent::SparseEntry { sparse_map, real_size: rs, entry, .. } => {
3938 prop_assert_eq!(&entry.path[..], &name[..]);
3939 prop_assert_eq!(rs, real_size);
3940 prop_assert_eq!(sparse_map.len(), entries.len());
3941 for (i, &(off, len)) in entries.iter().enumerate() {
3942 prop_assert_eq!(sparse_map[i].offset, off);
3943 prop_assert_eq!(sparse_map[i].length, len);
3944 }
3945 }
3946 ParseEvent::Entry { .. } if entries.is_empty() => {}
3947 other => {
3948 return Err(proptest::test_runner::TestCaseError::fail(
3949 format!("Expected SparseEntry, got {other:?}")));
3950 }
3951 }
3952 }
3953
3954 #[test]
3955 fn test_pax_sparse_v10_roundtrip(
3956 entries in sparse_map_strategy(20),
3957 name_len in 1usize..50,
3958 ) {
3959 let name: Vec<u8> = (0..name_len).map(|i| b'a' + (i % 26) as u8).collect();
3960 let on_disk: u64 = entries.iter().map(|(_, l)| l).sum();
3961 let real_size = entries.last().map(|(o, l)| o + l).unwrap_or(0);
3962
3963 let mut map_data = format!("{}\n", entries.len());
3964 for &(offset, length) in &entries {
3965 map_data.push_str(&format!("{offset}\n{length}\n"));
3966 }
3967 let map_bytes = map_data.into_bytes();
3968 let map_padded = map_bytes.len().next_multiple_of(HEADER_SIZE);
3969 let total_size = map_padded as u64 + on_disk;
3970 let rs_bytes = real_size.to_string().into_bytes();
3971
3972 let pax_refs: Vec<(&str, &[u8])> = vec![
3973 ("GNU.sparse.major", b"1"),
3974 ("GNU.sparse.minor", b"0"),
3975 ("GNU.sparse.realsize", &rs_bytes),
3976 ("GNU.sparse.name", &name),
3977 ];
3978
3979 let mut archive = Vec::new();
3980 archive.extend(make_pax_header(&pax_refs));
3981 archive.extend_from_slice(&make_header(b"placeholder", total_size, b'0'));
3982 let mut data_block = vec![0u8; map_padded];
3983 data_block[..map_bytes.len()].copy_from_slice(&map_bytes);
3984 archive.extend_from_slice(&data_block);
3985 archive.extend(zeroes(on_disk.next_multiple_of(512) as usize));
3986 archive.extend(zeroes(1024));
3987
3988 let mut parser = Parser::new(Limits::default());
3989 let event = parser.parse(&archive).unwrap();
3990
3991 match event {
3992 ParseEvent::SparseEntry { sparse_map, real_size: rs, entry, .. } => {
3993 prop_assert_eq!(&entry.path[..], &name[..]);
3994 prop_assert_eq!(rs, real_size);
3995 prop_assert_eq!(entry.size, on_disk);
3996 prop_assert_eq!(sparse_map.len(), entries.len());
3997 for (i, &(off, len)) in entries.iter().enumerate() {
3998 prop_assert_eq!(sparse_map[i].offset, off);
3999 prop_assert_eq!(sparse_map[i].length, len);
4000 }
4001 }
4002 other => {
4003 return Err(proptest::test_runner::TestCaseError::fail(
4004 format!("Expected SparseEntry, got {other:?}")));
4005 }
4006 }
4007 }
4008 }
4009 }
4010
4011 #[test]
4023 fn test_add_consumed_no_overflow() {
4024 let long_name = b"a]long/path".to_vec();
4027 let gnu_entry = make_gnu_long_name(&long_name);
4028 let first_entry_size = gnu_entry.len(); let pax_size: u64 = u32::MAX as u64 - long_name.len() as u64 - 512;
4037 let pax_header = make_header(b"PaxHeaders/file", pax_size, b'x');
4038
4039 let mut input = Vec::with_capacity(first_entry_size + HEADER_SIZE);
4042 input.extend_from_slice(&gnu_entry);
4043 input.extend_from_slice(&pax_header);
4044
4045 let mut parser = Parser::new(Limits::permissive());
4046 let result = parser.parse(&input);
4047
4048 match result {
4051 Ok(ParseEvent::NeedData { min_bytes }) => {
4052 assert!(
4056 min_bytes > HEADER_SIZE,
4057 "min_bytes should be large, got {min_bytes}"
4058 );
4059 }
4060 Err(_) => {
4061 }
4064 other => panic!(
4065 "Expected NeedData or Err for truncated extension chain, got {:?}",
4066 other
4067 ),
4068 }
4069 }
4070}