1use std::io::{Cursor, Read, Seek, SeekFrom, Write};
49
50use thiserror::Error;
51
52const DAR_MAGIC: [u8; 4] = [0x00, 0x00, 0x00, 0x7b];
54
55const MAX_CATALOGUE_COMPRESSED: u64 = 512 * 1024 * 1024;
59const MAX_CATALOGUE_INFLATED: u64 = 1024 * 1024 * 1024;
60
61const SEQT_CATALOGUE: [u8; 6] = [0xAD, 0xFD, 0xEA, 0x77, 0x21, 0x43];
63
64const FORMAT_11_1: u32 = 11 * 256 + 1;
68
69#[derive(Debug, Error)]
71pub enum DarError {
72 #[error("I/O error: {0}")]
73 Io(#[from] std::io::Error),
74 #[error("not a DAR archive")]
75 NotADar,
76 #[error("corrupt archive: {0}")]
77 Corrupt(String),
78 #[error("entry not found: '{0}'")]
79 EntryNotFound(String),
80}
81
82#[derive(Debug, Clone)]
84pub struct DarEntry {
85 pub path: String,
86 pub size: u64,
87}
88
89#[derive(Debug, Clone)]
90struct EntryRef {
91 path: String,
92 size: u64,
93 archive_offset: u64,
94 stored_size: u64,
95 compression: u8,
96 encrypted: bool,
97}
98
99pub struct DarReader<R: Read + Seek> {
101 inner: R,
102 archive_origin: u64,
105 entries: Vec<EntryRef>,
106}
107
108impl<R: Read + Seek> DarReader<R> {
109 pub fn open(mut reader: R) -> Result<Self, DarError> {
111 let mut magic = [0u8; 4];
112 reader
113 .read_exact(&mut magic)
114 .map_err(|_| DarError::NotADar)?;
115 if magic != DAR_MAGIC {
116 return Err(DarError::NotADar);
117 }
118
119 let mut label = [0u8; 10];
120 reader.read_exact(&mut label)?; let _flag = read_u8(&mut reader)?; let extension = read_u8(&mut reader)?; let entries;
128 let archive_origin;
129 if extension == b'T' {
130 let tlv_count = read_infinint(&mut reader).map_err(|e| match e {
132 DarError::Io(_) => DarError::Corrupt("truncated TLV block".into()),
133 other => other,
134 })?;
135 for _ in 0..tlv_count {
136 skip(&mut reader, 2)?;
137 let len = read_infinint(&mut reader)?;
138 skip(&mut reader, len)?;
139 }
140
141 archive_origin = reader.stream_position()?;
142 let format_value = read_format_value(&mut reader);
143 let global_comp = read_u8(&mut reader).unwrap_or(b'n');
147 reader.seek(SeekFrom::Start(archive_origin))?;
148
149 let via_escape = find_catalogue(&mut reader, &label)?;
152 let format_major = format_value >> 8;
153 if via_escape && is_compressed(global_comp) {
154 let mut compressed = Vec::new();
160 reader
161 .by_ref()
162 .take(MAX_CATALOGUE_COMPRESSED)
163 .read_to_end(&mut compressed)?;
164 let inflated = decompress(&compressed, global_comp, MAX_CATALOGUE_INFLATED)?;
165 let mut cur = Cursor::new(inflated);
166 skip(&mut cur, 10)?; if format_value >= FORMAT_11_1 {
168 skip_nul_string(&mut cur)?;
169 }
170 entries = parse_catalog(&mut cur, format_major)?;
171 } else {
172 if via_escape {
173 skip(&mut reader, 10)?; if format_value >= FORMAT_11_1 {
177 skip_nul_string(&mut reader)?;
178 }
179 }
180 entries = parse_catalog(&mut reader, format_major)?;
181 }
182 } else if extension == b'N' || extension == b'S' {
183 if extension == b'S' {
184 read_infinint(&mut reader)?; }
186 archive_origin = reader.stream_position()?;
187 let format_value = read_format_value(&mut reader); let cat_offset = read_terminateur(&mut reader)?;
189 let cat_start = archive_origin
190 .checked_add(cat_offset)
191 .ok_or_else(|| DarError::Corrupt("catalogue offset overflows".into()))?;
192 let end = reader.seek(SeekFrom::End(0))?;
193 if cat_start >= end {
194 return Err(DarError::Corrupt(format!(
195 "catalogue start {cat_start} past archive end {end}"
196 )));
197 }
198 reader.seek(SeekFrom::Start(cat_start))?;
199 entries = parse_catalog(&mut reader, format_value >> 8)?;
201 } else {
202 return Err(DarError::Corrupt(format!(
203 "unknown slice-header extension {extension:#04x}"
204 )));
205 }
206
207 Ok(Self {
208 inner: reader,
209 archive_origin,
210 entries,
211 })
212 }
213
214 pub fn entries(&self) -> Vec<DarEntry> {
216 self.entries
217 .iter()
218 .map(|e| DarEntry {
219 path: e.path.clone(),
220 size: e.size,
221 })
222 .collect()
223 }
224
225 pub fn extract(&mut self, path: &str) -> Result<Vec<u8>, DarError> {
227 let entry = self
228 .entries
229 .iter()
230 .find(|e| e.path == path)
231 .ok_or_else(|| DarError::EntryNotFound(path.to_string()))?
232 .clone();
233
234 if entry.encrypted {
235 return Err(DarError::Corrupt(format!("'{path}' is encrypted")));
236 }
237
238 let start = self
243 .archive_origin
244 .checked_add(entry.archive_offset)
245 .ok_or_else(|| {
246 DarError::Corrupt(format!("'{path}' archive offset overflows file position"))
247 })?;
248 let end = self.inner.seek(SeekFrom::End(0))?;
249 if start > end {
250 return Err(DarError::Corrupt(format!(
251 "'{path}' starts at {start}, past archive end {end}"
252 )));
253 }
254 let available = end - start;
255 if entry.stored_size > available {
256 return Err(DarError::Corrupt(format!(
257 "'{path}' claims {} stored bytes but only {available} remain",
258 entry.stored_size
259 )));
260 }
261
262 self.inner.seek(SeekFrom::Start(start))?;
263 let mut data = vec![0u8; entry.stored_size as usize];
264 self.inner.read_exact(&mut data)?;
265
266 if !is_compressed(entry.compression) {
267 return Ok(data);
268 }
269 let out = decompress(&data, entry.compression, entry.size)?;
273 if out.len() as u64 != entry.size {
274 return Err(DarError::Corrupt(format!(
275 "'{path}' decompressed to {} bytes but catalog declares {}",
276 out.len(),
277 entry.size
278 )));
279 }
280 Ok(out)
281 }
282}
283
284const TAIL_SCAN: u64 = 256 * 1024 * 1024;
290
291const CHUNK: usize = 4 * 1024 * 1024;
292const OVERLAP: usize = 9;
294
295fn scan_window<R: Read + Seek>(
302 r: &mut R,
303 label: &[u8; 10],
304 use_label: bool,
305) -> Result<Option<bool>, DarError> {
306 let mut buf = vec![0u8; CHUNK + OVERLAP];
307 let mut overlap_len: usize = 0;
308 loop {
309 let chunk_file_pos = r.stream_position()?;
310 let n = r.read(&mut buf[overlap_len..overlap_len + CHUNK])?;
311 if n == 0 {
312 break;
313 }
314 let total = overlap_len + n;
315 let buf_base = chunk_file_pos - overlap_len as u64;
318
319 if let Some(i) = buf[..total]
320 .windows(SEQT_CATALOGUE.len())
321 .position(|w| w == SEQT_CATALOGUE)
322 {
323 r.seek(SeekFrom::Start(
324 buf_base + i as u64 + SEQT_CATALOGUE.len() as u64,
325 ))?;
326 return Ok(Some(true));
327 }
328 if use_label {
329 if let Some(i) = buf[..total]
330 .windows(label.len())
331 .position(|w| w == label.as_ref())
332 {
333 r.seek(SeekFrom::Start(buf_base + i as u64 + label.len() as u64))?;
334 return Ok(Some(false));
335 }
336 }
337
338 let keep = OVERLAP.min(total);
339 buf.copy_within(total - keep..total, 0);
340 overlap_len = keep;
341 }
342 Ok(None)
343}
344
345fn find_catalogue<R: Read + Seek>(r: &mut R, label: &[u8; 10]) -> Result<bool, DarError> {
365 find_catalogue_within(r, label, TAIL_SCAN)
366}
367
368fn find_catalogue_within<R: Read + Seek>(
372 r: &mut R,
373 label: &[u8; 10],
374 tail_scan: u64,
375) -> Result<bool, DarError> {
376 let use_label = !label.iter().all(|&b| b == 0);
379
380 let archive_origin = r.stream_position()?;
381 let file_end = r.seek(SeekFrom::End(0))?;
382
383 if file_end <= archive_origin {
384 return Err(DarError::Corrupt("archive body too short".into()));
385 }
386
387 let tail_start = archive_origin.max(file_end.saturating_sub(tail_scan));
389 r.seek(SeekFrom::Start(tail_start))?;
390
391 if let Some(result) = scan_window(r, label, use_label)? {
392 return Ok(result);
393 }
394
395 if tail_start > archive_origin {
397 r.seek(SeekFrom::Start(archive_origin))?;
398 if let Some(result) = scan_window(r, label, use_label)? {
399 return Ok(result);
400 }
401 }
402
403 Err(DarError::Corrupt("seqt_catalogue not found".into()))
404}
405
406fn read_format_value<R: Read>(r: &mut R) -> u32 {
412 let s = read_nul_string(r).unwrap_or_default();
413 let b = s.as_bytes();
414 if b.len() >= 2 {
415 let major = u32::from(b[0].saturating_sub(48)) * 256 + u32::from(b[1].saturating_sub(48));
416 let fix = if b.len() >= 3 {
417 u32::from(b[2].saturating_sub(48))
418 } else {
419 0
420 };
421 major * 256 + fix
422 } else {
423 u32::MAX
424 }
425}
426
427fn is_compressed(algo: u8) -> bool {
434 matches!(
435 algo.to_ascii_lowercase(),
436 b'z' | b'y' | b'x' | b'l' | b'j' | b'k' | b'd' | b'q'
437 )
438}
439
440fn decompress(data: &[u8], algo: u8, max_out: u64) -> Result<Vec<u8>, DarError> {
444 match algo.to_ascii_lowercase() {
445 b'z' => read_bounded(flate2::read::ZlibDecoder::new(data), max_out, "zlib"),
447 b'y' => read_bounded(bzip2_rs::DecoderReader::new(data), max_out, "bzip2"),
448 b'x' => {
449 let mut input: &[u8] = data;
452 let mut out = BoundedWriter {
453 buf: Vec::new(),
454 max: max_out,
455 };
456 match lzma_rs::xz_decompress(&mut input, &mut out) {
457 Ok(()) => Ok(out.buf),
458 Err(lzma_rs::error::Error::XzError(ref m))
465 if m == "Unexpected data after last XZ block" =>
466 {
467 Ok(out.buf)
468 }
469 Err(e) => Err(DarError::Corrupt(format!("xz decode failed: {e}"))),
470 }
471 }
472 other => Err(DarError::Corrupt(format!(
473 "unsupported compression '{}'",
474 other as char
475 ))),
476 }
477}
478
479struct BoundedWriter {
482 buf: Vec<u8>,
483 max: u64,
484}
485
486impl Write for BoundedWriter {
487 fn write(&mut self, data: &[u8]) -> std::io::Result<usize> {
488 if self.buf.len() as u64 + data.len() as u64 > self.max {
489 return Err(std::io::Error::other("decompressed data exceeds bound"));
490 }
491 self.buf.extend_from_slice(data);
492 Ok(data.len())
493 }
494
495 fn flush(&mut self) -> std::io::Result<()> {
496 Ok(())
497 }
498}
499
500fn read_bounded<R: Read>(decoder: R, max_out: u64, what: &str) -> Result<Vec<u8>, DarError> {
503 let mut out = Vec::new();
504 decoder
505 .take(max_out.saturating_add(1))
506 .read_to_end(&mut out)
507 .map_err(|e| DarError::Corrupt(format!("{what} decode failed: {e}")))?;
508 if out.len() as u64 > max_out {
509 return Err(DarError::Corrupt("decompressed data exceeds bound".into()));
510 }
511 Ok(out)
512}
513
514fn read_terminateur<R: Read + Seek>(r: &mut R) -> Result<u64, DarError> {
524 const BLOCK_SIZE: u64 = 4;
525 const MAX_BITS: u64 = 4096; let mut pos = r.seek(SeekFrom::End(0))?;
528 let mut bits: u64 = 0;
529 let terminal = loop {
530 if pos == 0 {
531 return Err(DarError::Corrupt("terminator underflows archive".into()));
532 }
533 pos -= 1;
534 r.seek(SeekFrom::Start(pos))?;
535 let b = read_u8(r)?;
536 if b == 0xFF {
537 bits += 8;
538 if bits > MAX_BITS {
539 return Err(DarError::Corrupt("terminator padding too long".into()));
540 }
541 } else {
542 break b;
543 }
544 };
545 if terminal & 0x80 == 0 {
547 return Err(DarError::Corrupt(format!(
548 "invalid terminator byte {terminal:#04x}"
549 )));
550 }
551 let mut x = terminal;
552 while x != 0 {
553 if x & 0x80 == 0 {
554 return Err(DarError::Corrupt("malformed terminator bit run".into()));
555 }
556 bits += 1;
557 x <<= 1;
558 }
559 let byte_offset = bits * BLOCK_SIZE;
560 let infinint_start = pos
561 .checked_sub(byte_offset)
562 .ok_or_else(|| DarError::Corrupt("terminator offset underflows".into()))?;
563 r.seek(SeekFrom::Start(infinint_start))?;
564 read_infinint(r)
565}
566
567fn parse_catalog<R: Read + Seek>(r: &mut R, format_major: u32) -> Result<Vec<EntryRef>, DarError> {
572 let mut entries = Vec::new();
573 let mut dir_stack: Vec<String> = Vec::new();
574 let mut depth: u32 = 0;
575
576 loop {
577 let mut buf = [0u8; 1];
578 match r.read_exact(&mut buf) {
579 Ok(()) => {}
580 Err(_) => break,
581 }
582
583 let entry_type = ((buf[0] & 0x1f) | 0x60) as char;
585
586 match entry_type {
587 'z' => {
588 depth = depth.saturating_sub(1);
590 dir_stack.pop();
591 if depth == 0 {
592 break;
593 }
594 }
595 'd' => {
596 let name = read_nul_string(r)?;
597 let flags = read_inode_base(r, format_major)?;
598 if format_major >= 9 && (flags >> 4) & 1 != 0 {
599 skip_fsa(r)?;
600 }
601 depth += 1;
602 if name != "<ROOT>" {
604 dir_stack.push(name);
605 }
606 }
607 'f' => {
608 let name = read_nul_string(r)?;
609 let flags = read_inode_base(r, format_major)?;
610 if format_major >= 9 && (flags >> 4) & 1 != 0 {
611 skip_fsa(r)?;
612 }
613
614 let size = read_infinint(r)?;
615 let archive_offset = read_infinint(r)?;
616 let mut stored_size = read_infinint(r)?;
617 let (encryption_flag, compression) = if format_major >= 8 {
621 (read_u8(r)?, read_u8(r)?)
622 } else {
623 (0u8, b'n')
624 };
625 if format_major >= 8 {
626 let crc_size = read_infinint(r)?;
627 skip(r, crc_size)?;
628 } else {
629 skip(r, 2)?; }
631 if format_major <= 7 && stored_size == 0 {
633 stored_size = size;
634 }
635
636 let path = if dir_stack.is_empty() {
637 name
638 } else {
639 format!("{}/{}", dir_stack.join("/"), name)
640 };
641
642 entries.push(EntryRef {
643 path,
644 size,
645 archive_offset,
646 stored_size,
647 compression,
648 encrypted: encryption_flag != 0,
649 });
650 }
651 'l' => {
652 let _name = read_nul_string(r)?;
654 let flags = read_inode_base(r, format_major)?;
655 if format_major >= 9 && (flags >> 4) & 1 != 0 {
656 skip_fsa(r)?;
657 }
658 skip_nul_string(r)?; }
660 _ => break, }
662 }
663
664 Ok(entries)
665}
666
667fn read_infinint<R: Read>(r: &mut R) -> Result<u64, DarError> {
682 let terminal = read_u8(r)?;
683 if terminal == 0x00 {
684 return Err(DarError::Corrupt(
686 "infinint exceeds 64-bit range (multi-group encoding)".into(),
687 ));
688 }
689 if terminal.count_ones() != 1 {
690 return Err(DarError::Corrupt(format!(
691 "invalid infinint terminal: {terminal:#04x}"
692 )));
693 }
694 let pos = terminal.leading_zeros(); if pos > 1 {
696 return Err(DarError::Corrupt(format!(
698 "infinint exceeds 64-bit range: terminal {terminal:#04x} implies {} bytes",
699 (pos + 1) * 4
700 )));
701 }
702 let data_bytes = (pos + 1) * 4; let mut val: u64 = 0;
704 for _ in 0..data_bytes {
705 val = (val << 8) | u64::from(read_u8(r)?);
706 }
707 Ok(val)
708}
709
710fn read_u8<R: Read>(r: &mut R) -> Result<u8, DarError> {
711 let mut b = [0u8; 1];
712 r.read_exact(&mut b)?;
713 Ok(b[0])
714}
715
716const MAX_NUL_STRING: usize = 64 * 1024;
720
721fn read_nul_string<R: Read>(r: &mut R) -> Result<String, DarError> {
723 let mut bytes = Vec::new();
724 loop {
725 let b = read_u8(r)?;
726 if b == 0 {
727 break;
728 }
729 if bytes.len() >= MAX_NUL_STRING {
730 return Err(DarError::Corrupt(format!(
731 "NUL-terminated string exceeds {MAX_NUL_STRING} bytes"
732 )));
733 }
734 bytes.push(b);
735 }
736 String::from_utf8(bytes).map_err(|e| DarError::Corrupt(e.to_string()))
737}
738
739fn skip_nul_string<R: Read>(r: &mut R) -> Result<(), DarError> {
741 let mut len: usize = 0;
742 loop {
743 if read_u8(r)? == 0 {
744 return Ok(());
745 }
746 len += 1;
747 if len > MAX_NUL_STRING {
748 return Err(DarError::Corrupt(format!(
749 "NUL-terminated string exceeds {MAX_NUL_STRING} bytes"
750 )));
751 }
752 }
753}
754
755fn skip<R: Seek>(r: &mut R, n: u64) -> Result<(), DarError> {
757 if n > 0 {
758 let off = i64::try_from(n)
762 .map_err(|_| DarError::Corrupt(format!("skip length {n} exceeds seekable range")))?;
763 r.seek(SeekFrom::Current(off)).map_err(DarError::Io)?;
764 }
765 Ok(())
766}
767
768fn skip_timestamp<R: Read + Seek>(r: &mut R, format_major: u32) -> Result<(), DarError> {
774 if format_major < 9 {
778 read_infinint(r)?;
779 return Ok(());
780 }
781 let ts_type = read_u8(r)?;
782 read_infinint(r)?;
783 if ts_type == b'n' || ts_type == b'u' {
784 read_infinint(r)?;
785 }
786 Ok(())
787}
788
789fn read_inode_base<R: Read + Seek>(r: &mut R, format_major: u32) -> Result<u8, DarError> {
795 let flags = read_u8(r)?;
796 if format_major <= 7 {
798 skip(r, 4)?; } else {
800 read_infinint(r)?; read_infinint(r)?; }
803 skip(r, 2)?; skip_timestamp(r, format_major)?; skip_timestamp(r, format_major)?; if format_major >= 8 {
808 skip_timestamp(r, format_major)?;
809 }
810 if format_major >= 9 && (flags >> 4) & 1 != 0 {
813 read_infinint(r)?;
814 read_infinint(r)?;
815 }
816 Ok(flags)
817}
818
819fn skip_fsa<R: Read + Seek>(r: &mut R) -> Result<(), DarError> {
823 let _tag = read_infinint(r)?;
824 let size = read_infinint(r)?;
825 skip(r, size)
826}
827
828#[cfg(test)]
831mod tests {
832 use super::*;
833 use std::io::Cursor;
834
835 #[test]
838 fn infinint_decodes_value() {
839 let data = [0x80u8, 0x00, 0x00, 0x00, 0x0d];
840 assert_eq!(read_infinint(&mut Cursor::new(&data[..])).unwrap(), 13);
841 }
842
843 #[test]
844 fn infinint_bad_preamble_returns_corrupt() {
845 let data = [0x03u8, 0x00, 0x00, 0x00, 0x00];
847 let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
848 assert!(matches!(&err, DarError::Corrupt(_)));
849 }
850
851 #[test]
852 fn infinint_truncated_returns_io() {
853 let err = read_infinint(&mut Cursor::new(&[0x80u8, 0x00][..])).unwrap_err();
855 assert!(matches!(err, DarError::Io(_)));
856 }
857
858 #[test]
859 fn infinint_0x40_preamble_reads_8_data_bytes() {
860 let mut data = vec![0x40u8];
863 data.extend_from_slice(&[0x00, 0x00, 0x00, 0x00, 0x5d, 0x15, 0x93, 0x31]);
864 assert_eq!(
865 read_infinint(&mut Cursor::new(data)).unwrap(),
866 0x5d15_9331u64
867 );
868 }
869
870 #[test]
871 fn infinint_multi_bit_terminal_returns_corrupt() {
872 let data = [0x60u8, 0x00, 0x00, 0x00, 0x00];
874 let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
875 assert!(matches!(&err, DarError::Corrupt(_)));
876 }
877
878 #[test]
881 fn read_u8_reads_single_byte() {
882 assert_eq!(read_u8(&mut Cursor::new(&[0x42u8][..])).unwrap(), 0x42);
883 }
884
885 #[test]
886 fn read_u8_eof_returns_io() {
887 let err = read_u8(&mut Cursor::new(&[][..])).unwrap_err();
888 assert!(matches!(err, DarError::Io(_)));
889 }
890
891 #[test]
894 fn nul_string_reads_until_nul() {
895 let data = b"hello\x00world";
896 assert_eq!(
897 read_nul_string(&mut Cursor::new(&data[..])).unwrap(),
898 "hello"
899 );
900 }
901
902 #[test]
903 fn nul_string_invalid_utf8_returns_corrupt() {
904 let data = [0xFF, 0x80, 0x00];
906 let err = read_nul_string(&mut Cursor::new(&data[..])).unwrap_err();
907 assert!(matches!(err, DarError::Corrupt(_)));
908 }
909
910 #[test]
911 fn nul_string_eof_before_nul_returns_io() {
912 let err = read_nul_string(&mut Cursor::new(b"no-nul".to_vec())).unwrap_err();
913 assert!(matches!(err, DarError::Io(_)));
914 }
915
916 #[test]
919 fn skip_nul_string_advances_past_nul() {
920 let data = b"skip\x00rest";
921 let mut c = Cursor::new(data.to_vec());
922 skip_nul_string(&mut c).unwrap();
923 assert_eq!(c.position(), 5); }
925
926 #[test]
927 fn skip_nul_string_eof_returns_io() {
928 let err = skip_nul_string(&mut Cursor::new(b"no-nul".to_vec())).unwrap_err();
929 assert!(matches!(err, DarError::Io(_)));
930 }
931
932 #[test]
935 fn find_catalogue_body_too_short() {
936 let label = [0u8; 10];
938 let err = find_catalogue(&mut Cursor::new(&[0x01u8, 0x02, 0x03][..]), &label).unwrap_err();
939 assert!(
940 matches!(&err, DarError::Corrupt(s) if s == "archive body too short"
941 || s == "seqt_catalogue not found")
942 );
943 }
944
945 #[test]
946 fn find_catalogue_escape_at_start() {
947 let mut data = [0xAD, 0xFD, 0xEA, 0x77, 0x21, 0x43, 0xFF];
948 let mut c = Cursor::new(&mut data[..]);
949 let via_escape = find_catalogue(&mut c, &[0u8; 10]).unwrap();
950 assert!(via_escape);
951 assert_eq!(c.position(), 6);
952 }
953
954 #[test]
955 fn find_catalogue_escape_not_found() {
956 let label = [0xFFu8; 10];
958 let err = find_catalogue(&mut Cursor::new(&[0u8; 10][..]), &label).unwrap_err();
959 assert!(matches!(&err, DarError::Corrupt(s) if s == "seqt_catalogue not found"));
960 }
961
962 #[test]
963 fn find_catalogue_label_fallback() {
964 let label: [u8; 10] = [0xA1, 0xB2, 0xC3, 0xD4, 0xE5, 0xF6, 0x07, 0x18, 0x29, 0x3A];
965 let mut data = vec![0x00u8; 5];
967 data.extend_from_slice(&label);
968 let mut c = Cursor::new(data);
969 let via_escape = find_catalogue(&mut c, &label).unwrap();
970 assert!(!via_escape);
971 assert_eq!(c.position(), 15); }
973
974 #[test]
977 fn skip_zero_does_not_move_cursor() {
978 let mut c = Cursor::new(vec![0xFFu8; 10]);
979 skip(&mut c, 0).unwrap();
980 assert_eq!(c.position(), 0);
981 }
982
983 #[test]
984 fn skip_n_advances_cursor() {
985 let mut c = Cursor::new(vec![0xFFu8; 10]);
986 skip(&mut c, 7).unwrap();
987 assert_eq!(c.position(), 7);
988 }
989
990 #[test]
993 fn inode_base_bit4_clear_reads_31_bytes() {
994 let mut data = vec![0x00u8]; data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x00, 0x00]); for _ in 0..3 {
1000 data.push(b's'); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); }
1003 data.push(0xFF); let mut c = Cursor::new(data);
1005 assert_eq!(read_inode_base(&mut c, 11).unwrap(), 0x00);
1006 assert_eq!(c.position(), 31);
1007 }
1008
1009 #[test]
1010 fn inode_base_bit4_set_reads_41_bytes() {
1011 let mut data = vec![0x10u8]; data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x00, 0x00]); for _ in 0..3 {
1017 data.push(b's');
1018 data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]);
1019 }
1020 data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.push(0xFF); let mut c = Cursor::new(data);
1024 assert_eq!(read_inode_base(&mut c, 11).unwrap(), 0x10);
1025 assert_eq!(c.position(), 41);
1026 }
1027
1028 #[test]
1031 fn skip_fsa_consumes_tag_size_and_data() {
1032 let mut data = Vec::new();
1034 data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x05]); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x03]); data.extend_from_slice(&[0xAA, 0xBB, 0xCC]); data.push(0xFF); let mut c = Cursor::new(data);
1039 skip_fsa(&mut c).unwrap();
1040 assert_eq!(c.position(), 13); }
1042
1043 #[test]
1050 fn infinint_leading_zero_byte_returns_corrupt() {
1051 let data = [0x00u8, 0x80, 0x00, 0x00, 0x00, 0x00];
1054 let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
1055 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1056 }
1057
1058 #[test]
1059 fn infinint_12_byte_group_exceeds_u64_returns_corrupt() {
1060 let mut data = vec![0x20u8];
1063 data.extend_from_slice(&[0x11; 12]);
1064 let err = read_infinint(&mut Cursor::new(data)).unwrap_err();
1065 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1066 }
1067
1068 #[test]
1069 fn infinint_all_zero_run_returns_corrupt_without_hanging() {
1070 let data = vec![0u8; 4096];
1073 let err = read_infinint(&mut Cursor::new(data)).unwrap_err();
1074 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1075 }
1076
1077 #[test]
1080 fn nul_string_without_terminator_is_length_bounded() {
1081 let data = vec![b'A'; 200_000];
1084 let err = read_nul_string(&mut Cursor::new(data)).unwrap_err();
1085 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1086 }
1087
1088 #[test]
1089 fn skip_nul_string_without_terminator_is_length_bounded() {
1090 let data = vec![b'A'; 200_000];
1091 let err = skip_nul_string(&mut Cursor::new(data)).unwrap_err();
1092 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1093 }
1094
1095 #[test]
1098 fn skip_value_above_i64_max_returns_corrupt() {
1099 let mut c = Cursor::new(vec![0u8; 64]);
1103 c.set_position(32);
1104 let err = skip(&mut c, 0x8000_0000_0000_0000).unwrap_err();
1105 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1106 assert_eq!(c.position(), 32); }
1108
1109 #[test]
1112 fn terminateur_reads_catalogue_offset() {
1113 let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xc0];
1116 assert_eq!(read_terminateur(&mut Cursor::new(data)).unwrap(), 24);
1117 }
1118
1119 #[test]
1120 fn terminateur_all_ff_underflows_returns_corrupt() {
1121 let err = read_terminateur(&mut Cursor::new(vec![0xFFu8; 4])).unwrap_err();
1122 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1123 }
1124
1125 #[test]
1126 fn terminateur_excessive_ff_padding_returns_corrupt() {
1127 let err = read_terminateur(&mut Cursor::new(vec![0xFFu8; 600])).unwrap_err();
1128 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1129 }
1130
1131 #[test]
1132 fn terminateur_low_terminator_byte_returns_corrupt() {
1133 let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0x01];
1135 let err = read_terminateur(&mut Cursor::new(data)).unwrap_err();
1136 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1137 }
1138
1139 #[test]
1140 fn terminateur_noncontiguous_high_bits_returns_corrupt() {
1141 let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0xA0];
1143 let err = read_terminateur(&mut Cursor::new(data)).unwrap_err();
1144 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1145 }
1146
1147 #[test]
1150 fn find_catalogue_falls_back_to_full_scan() {
1151 let mut data = vec![0x11u8, 0x22]; data.extend_from_slice(&SEQT_CATALOGUE);
1155 data.extend_from_slice(&[0x33u8; 12]); let mut c = Cursor::new(data);
1157 let via_escape = find_catalogue_within(&mut c, &[0u8; 10], 4).unwrap();
1158 assert!(via_escape);
1159 assert_eq!(c.position(), 2 + SEQT_CATALOGUE.len() as u64);
1160 }
1161
1162 #[test]
1163 fn find_catalogue_full_scan_miss_returns_not_found() {
1164 let mut c = Cursor::new(vec![0x11u8; 16]);
1167 let err = find_catalogue_within(&mut c, &[0xABu8; 10], 4).unwrap_err();
1168 assert!(matches!(&err, DarError::Corrupt(s) if s == "seqt_catalogue not found"));
1169 }
1170
1171 #[test]
1172 fn find_catalogue_body_too_short_when_origin_at_eof() {
1173 let mut c = Cursor::new(vec![0u8; 6]);
1174 c.seek(SeekFrom::Start(6)).unwrap();
1175 let err = find_catalogue(&mut c, &[0u8; 10]).unwrap_err();
1176 assert!(matches!(&err, DarError::Corrupt(s) if s == "archive body too short"));
1177 }
1178
1179 #[test]
1182 fn decompress_rejects_decompression_bomb() {
1183 use flate2::{write::ZlibEncoder, Compression};
1184 use std::io::Write;
1185 let mut enc = ZlibEncoder::new(Vec::new(), Compression::default());
1186 enc.write_all(&[0u8; 4096]).unwrap();
1187 let blob = enc.finish().unwrap();
1188 let err = decompress(&blob, b'z', 16).unwrap_err();
1190 assert!(matches!(&err, DarError::Corrupt(s) if s.contains("exceeds bound")));
1191 }
1192
1193 #[test]
1194 fn decompress_rejects_malformed_zlib() {
1195 let err = decompress(b"not a zlib stream at all", b'z', 1024).unwrap_err();
1196 assert!(matches!(&err, DarError::Corrupt(s) if s.contains("zlib decode failed")));
1197 }
1198
1199 #[test]
1200 fn decompress_rejects_malformed_xz() {
1201 let err = decompress(b"this is not an xz stream", b'x', 1024).unwrap_err();
1202 assert!(matches!(&err, DarError::Corrupt(s) if s.contains("xz decode failed")));
1203 }
1204
1205 #[test]
1206 fn bounded_writer_caps_output_and_flushes() {
1207 let mut w = BoundedWriter {
1208 buf: Vec::new(),
1209 max: 4,
1210 };
1211 assert_eq!(w.write(b"ab").unwrap(), 2); w.flush().unwrap();
1213 let err = w.write(b"cde").unwrap_err(); assert_eq!(err.to_string(), "decompressed data exceeds bound");
1215 assert_eq!(w.buf, b"ab");
1216 }
1217}