1use std::io::{Cursor, Read, Seek, SeekFrom, Write};
49
50use thiserror::Error;
51
52const DAR_MAGIC: [u8; 4] = [0x00, 0x00, 0x00, 0x7b];
54
55const MAX_CATALOGUE_COMPRESSED: u64 = 512 * 1024 * 1024;
59const MAX_CATALOGUE_INFLATED: u64 = 1024 * 1024 * 1024;
60
61const SEQT_CATALOGUE: [u8; 6] = [0xAD, 0xFD, 0xEA, 0x77, 0x21, 0x43];
63
64const FORMAT_11_1: u32 = 11 * 256 + 1;
68
69#[derive(Debug, Error)]
71pub enum DarError {
72 #[error("I/O error: {0}")]
73 Io(#[from] std::io::Error),
74 #[error("not a DAR archive")]
75 NotADar,
76 #[error("corrupt archive: {0}")]
77 Corrupt(String),
78 #[error("entry not found: '{0}'")]
79 EntryNotFound(String),
80}
81
82#[derive(Debug, Clone)]
84pub struct DarEntry {
85 pub path: String,
86 pub size: u64,
87}
88
89#[derive(Debug, Clone)]
90struct EntryRef {
91 path: String,
92 size: u64,
93 archive_offset: u64,
94 stored_size: u64,
95 compression: u8,
96 encrypted: bool,
97}
98
99pub struct DarReader<R: Read + Seek> {
101 inner: R,
102 archive_origin: u64,
105 format_major: u32,
109 entries: Vec<EntryRef>,
110}
111
112impl<R: Read + Seek> DarReader<R> {
113 pub fn open(mut reader: R) -> Result<Self, DarError> {
115 let mut magic = [0u8; 4];
116 reader
117 .read_exact(&mut magic)
118 .map_err(|_| DarError::NotADar)?;
119 if magic != DAR_MAGIC {
120 return Err(DarError::NotADar);
121 }
122
123 let mut label = [0u8; 10];
124 reader.read_exact(&mut label)?; let _flag = read_u8(&mut reader)?; let extension = read_u8(&mut reader)?; let entries;
132 let archive_origin;
133 let format_major;
134 if extension == b'T' {
135 let tlv_count = read_infinint(&mut reader).map_err(|e| match e {
137 DarError::Io(_) => DarError::Corrupt("truncated TLV block".into()),
138 other => other,
139 })?;
140 for _ in 0..tlv_count {
141 skip(&mut reader, 2)?;
142 let len = read_infinint(&mut reader)?;
143 skip(&mut reader, len)?;
144 }
145
146 archive_origin = reader.stream_position()?;
147 let format_value = read_format_value(&mut reader);
148 let global_comp = read_u8(&mut reader).unwrap_or(b'n');
152 reader.seek(SeekFrom::Start(archive_origin))?;
153
154 let via_escape = find_catalogue(&mut reader, &label)?;
157 format_major = format_value >> 8;
158 if via_escape && is_compressed(global_comp) {
159 let mut compressed = Vec::new();
165 reader
166 .by_ref()
167 .take(MAX_CATALOGUE_COMPRESSED)
168 .read_to_end(&mut compressed)?;
169 let inflated = decompress(&compressed, global_comp, MAX_CATALOGUE_INFLATED)?;
170 let mut cur = Cursor::new(inflated);
171 skip(&mut cur, 10)?; if format_value >= FORMAT_11_1 {
173 skip_nul_string(&mut cur)?;
174 }
175 entries = parse_catalog(&mut cur, format_major, global_comp)?;
176 } else {
177 if via_escape {
178 skip(&mut reader, 10)?; if format_value >= FORMAT_11_1 {
182 skip_nul_string(&mut reader)?;
183 }
184 }
185 entries = parse_catalog(&mut reader, format_major, global_comp)?;
186 }
187 } else if extension == b'N' || extension == b'S' {
188 if extension == b'S' {
189 read_infinint(&mut reader)?; }
191 archive_origin = reader.stream_position()?;
192 let format_value = read_format_value(&mut reader); format_major = format_value >> 8;
194 let global_comp = read_u8(&mut reader).unwrap_or(b'n');
198 let cat_offset = read_terminateur(&mut reader)?;
199 let cat_start = archive_origin
200 .checked_add(cat_offset)
201 .ok_or_else(|| DarError::Corrupt("catalogue offset overflows".into()))?;
202 let end = reader.seek(SeekFrom::End(0))?;
203 if cat_start >= end {
204 return Err(DarError::Corrupt(format!(
205 "catalogue start {cat_start} past archive end {end}"
206 )));
207 }
208 reader.seek(SeekFrom::Start(cat_start))?;
209 if is_compressed(global_comp) {
213 let mut compressed = Vec::new();
214 reader
215 .by_ref()
216 .take(MAX_CATALOGUE_COMPRESSED)
217 .read_to_end(&mut compressed)?;
218 let inflated = decompress(&compressed, global_comp, MAX_CATALOGUE_INFLATED)?;
219 entries = parse_catalog(&mut Cursor::new(inflated), format_major, global_comp)?;
220 } else {
221 entries = parse_catalog(&mut reader, format_major, global_comp)?;
222 }
223 } else {
224 return Err(DarError::Corrupt(format!(
225 "unknown slice-header extension {extension:#04x}"
226 )));
227 }
228
229 Ok(Self {
230 inner: reader,
231 archive_origin,
232 format_major,
233 entries,
234 })
235 }
236
237 pub fn entries(&self) -> Vec<DarEntry> {
239 self.entries
240 .iter()
241 .map(|e| DarEntry {
242 path: e.path.clone(),
243 size: e.size,
244 })
245 .collect()
246 }
247
248 pub fn extract(&mut self, path: &str) -> Result<Vec<u8>, DarError> {
250 let entry = self
251 .entries
252 .iter()
253 .find(|e| e.path == path)
254 .ok_or_else(|| DarError::EntryNotFound(path.to_string()))?
255 .clone();
256
257 if entry.encrypted {
258 return Err(DarError::Corrupt(format!("'{path}' is encrypted")));
259 }
260
261 let start = self
266 .archive_origin
267 .checked_add(entry.archive_offset)
268 .ok_or_else(|| {
269 DarError::Corrupt(format!("'{path}' archive offset overflows file position"))
270 })?;
271 let end = self.inner.seek(SeekFrom::End(0))?;
272 if start > end {
273 return Err(DarError::Corrupt(format!(
274 "'{path}' starts at {start}, past archive end {end}"
275 )));
276 }
277
278 if self.format_major == 1 && is_compressed(entry.compression) {
284 self.inner.seek(SeekFrom::Start(start))?;
285 let out = read_bounded(
286 flate2::read::ZlibDecoder::new(&mut self.inner),
287 entry.size,
288 "zlib",
289 )?;
290 if out.len() as u64 != entry.size {
291 return Err(DarError::Corrupt(format!(
292 "'{path}' decompressed to {} bytes but catalog declares {}",
293 out.len(),
294 entry.size
295 )));
296 }
297 return Ok(out);
298 }
299
300 let available = end - start;
301 if entry.stored_size > available {
302 return Err(DarError::Corrupt(format!(
303 "'{path}' claims {} stored bytes but only {available} remain",
304 entry.stored_size
305 )));
306 }
307
308 self.inner.seek(SeekFrom::Start(start))?;
309 let mut data = vec![0u8; entry.stored_size as usize];
310 self.inner.read_exact(&mut data)?;
311
312 if !is_compressed(entry.compression) {
313 return Ok(data);
314 }
315 let out = decompress(&data, entry.compression, entry.size)?;
319 if out.len() as u64 != entry.size {
320 return Err(DarError::Corrupt(format!(
321 "'{path}' decompressed to {} bytes but catalog declares {}",
322 out.len(),
323 entry.size
324 )));
325 }
326 Ok(out)
327 }
328}
329
330const TAIL_SCAN: u64 = 256 * 1024 * 1024;
336
337const CHUNK: usize = 4 * 1024 * 1024;
338const OVERLAP: usize = 9;
340
341fn scan_window<R: Read + Seek>(
348 r: &mut R,
349 label: &[u8; 10],
350 use_label: bool,
351) -> Result<Option<bool>, DarError> {
352 let mut buf = vec![0u8; CHUNK + OVERLAP];
353 let mut overlap_len: usize = 0;
354 loop {
355 let chunk_file_pos = r.stream_position()?;
356 let n = r.read(&mut buf[overlap_len..overlap_len + CHUNK])?;
357 if n == 0 {
358 break;
359 }
360 let total = overlap_len + n;
361 let buf_base = chunk_file_pos - overlap_len as u64;
364
365 if let Some(i) = buf[..total]
366 .windows(SEQT_CATALOGUE.len())
367 .position(|w| w == SEQT_CATALOGUE)
368 {
369 r.seek(SeekFrom::Start(
370 buf_base + i as u64 + SEQT_CATALOGUE.len() as u64,
371 ))?;
372 return Ok(Some(true));
373 }
374 if use_label {
375 if let Some(i) = buf[..total]
376 .windows(label.len())
377 .position(|w| w == label.as_ref())
378 {
379 r.seek(SeekFrom::Start(buf_base + i as u64 + label.len() as u64))?;
380 return Ok(Some(false));
381 }
382 }
383
384 let keep = OVERLAP.min(total);
385 buf.copy_within(total - keep..total, 0);
386 overlap_len = keep;
387 }
388 Ok(None)
389}
390
391fn find_catalogue<R: Read + Seek>(r: &mut R, label: &[u8; 10]) -> Result<bool, DarError> {
411 find_catalogue_within(r, label, TAIL_SCAN)
412}
413
414fn find_catalogue_within<R: Read + Seek>(
418 r: &mut R,
419 label: &[u8; 10],
420 tail_scan: u64,
421) -> Result<bool, DarError> {
422 let use_label = !label.iter().all(|&b| b == 0);
425
426 let archive_origin = r.stream_position()?;
427 let file_end = r.seek(SeekFrom::End(0))?;
428
429 if file_end <= archive_origin {
430 return Err(DarError::Corrupt("archive body too short".into()));
431 }
432
433 let tail_start = archive_origin.max(file_end.saturating_sub(tail_scan));
435 r.seek(SeekFrom::Start(tail_start))?;
436
437 if let Some(result) = scan_window(r, label, use_label)? {
438 return Ok(result);
439 }
440
441 if tail_start > archive_origin {
443 r.seek(SeekFrom::Start(archive_origin))?;
444 if let Some(result) = scan_window(r, label, use_label)? {
445 return Ok(result);
446 }
447 }
448
449 Err(DarError::Corrupt("seqt_catalogue not found".into()))
450}
451
452fn read_format_value<R: Read>(r: &mut R) -> u32 {
458 let s = read_nul_string(r).unwrap_or_default();
459 let b = s.as_bytes();
460 if b.len() >= 2 {
461 let major = u32::from(b[0].saturating_sub(48)) * 256 + u32::from(b[1].saturating_sub(48));
462 let fix = if b.len() >= 3 {
463 u32::from(b[2].saturating_sub(48))
464 } else {
465 0
466 };
467 major * 256 + fix
468 } else {
469 u32::MAX
470 }
471}
472
473fn is_compressed(algo: u8) -> bool {
480 matches!(
481 algo.to_ascii_lowercase(),
482 b'z' | b'y' | b'x' | b'l' | b'j' | b'k' | b'd' | b'q'
483 )
484}
485
486fn decompress(data: &[u8], algo: u8, max_out: u64) -> Result<Vec<u8>, DarError> {
490 match algo.to_ascii_lowercase() {
491 b'z' => read_bounded(flate2::read::ZlibDecoder::new(data), max_out, "zlib"),
493 b'y' => read_bounded(bzip2_rs::DecoderReader::new(data), max_out, "bzip2"),
494 b'x' => {
495 let mut input: &[u8] = data;
498 let mut out = BoundedWriter {
499 buf: Vec::new(),
500 max: max_out,
501 };
502 match lzma_rs::xz_decompress(&mut input, &mut out) {
503 Ok(()) => Ok(out.buf),
504 Err(lzma_rs::error::Error::XzError(ref m))
511 if m == "Unexpected data after last XZ block" =>
512 {
513 Ok(out.buf)
514 }
515 Err(e) => Err(DarError::Corrupt(format!("xz decode failed: {e}"))),
516 }
517 }
518 other => Err(DarError::Corrupt(format!(
519 "unsupported compression '{}'",
520 other as char
521 ))),
522 }
523}
524
525struct BoundedWriter {
528 buf: Vec<u8>,
529 max: u64,
530}
531
532impl Write for BoundedWriter {
533 fn write(&mut self, data: &[u8]) -> std::io::Result<usize> {
534 if self.buf.len() as u64 + data.len() as u64 > self.max {
535 return Err(std::io::Error::other("decompressed data exceeds bound"));
536 }
537 self.buf.extend_from_slice(data);
538 Ok(data.len())
539 }
540
541 fn flush(&mut self) -> std::io::Result<()> {
542 Ok(())
543 }
544}
545
546fn read_bounded<R: Read>(decoder: R, max_out: u64, what: &str) -> Result<Vec<u8>, DarError> {
549 let mut out = Vec::new();
550 decoder
551 .take(max_out.saturating_add(1))
552 .read_to_end(&mut out)
553 .map_err(|e| DarError::Corrupt(format!("{what} decode failed: {e}")))?;
554 if out.len() as u64 > max_out {
555 return Err(DarError::Corrupt("decompressed data exceeds bound".into()));
556 }
557 Ok(out)
558}
559
560fn read_terminateur<R: Read + Seek>(r: &mut R) -> Result<u64, DarError> {
570 const BLOCK_SIZE: u64 = 4;
571 const MAX_BITS: u64 = 4096; let mut pos = r.seek(SeekFrom::End(0))?;
574 let mut bits: u64 = 0;
575 let terminal = loop {
576 if pos == 0 {
577 return Err(DarError::Corrupt("terminator underflows archive".into()));
578 }
579 pos -= 1;
580 r.seek(SeekFrom::Start(pos))?;
581 let b = read_u8(r)?;
582 if b == 0xFF {
583 bits += 8;
584 if bits > MAX_BITS {
585 return Err(DarError::Corrupt("terminator padding too long".into()));
586 }
587 } else {
588 break b;
589 }
590 };
591 if terminal & 0x80 == 0 {
593 return Err(DarError::Corrupt(format!(
594 "invalid terminator byte {terminal:#04x}"
595 )));
596 }
597 let mut x = terminal;
598 while x != 0 {
599 if x & 0x80 == 0 {
600 return Err(DarError::Corrupt("malformed terminator bit run".into()));
601 }
602 bits += 1;
603 x <<= 1;
604 }
605 let byte_offset = bits * BLOCK_SIZE;
606 let infinint_start = pos
607 .checked_sub(byte_offset)
608 .ok_or_else(|| DarError::Corrupt("terminator offset underflows".into()))?;
609 r.seek(SeekFrom::Start(infinint_start))?;
610 read_infinint(r)
611}
612
613fn parse_catalog<R: Read + Seek>(
618 r: &mut R,
619 format_major: u32,
620 global_comp: u8,
621) -> Result<Vec<EntryRef>, DarError> {
622 let mut entries = Vec::new();
623 let mut dir_stack: Vec<String> = Vec::new();
624 let mut depth: u32 = 0;
625
626 loop {
627 let mut buf = [0u8; 1];
628 match r.read_exact(&mut buf) {
629 Ok(()) => {}
630 Err(_) => break,
631 }
632
633 let entry_type = ((buf[0] & 0x1f) | 0x60) as char;
635
636 match entry_type {
637 'z' => {
638 depth = depth.saturating_sub(1);
640 dir_stack.pop();
641 if depth == 0 {
642 break;
643 }
644 }
645 'd' => {
646 let name = read_nul_string(r)?;
647 let flags = read_inode_base(r, format_major)?;
648 if format_major >= 9 && (flags >> 4) & 1 != 0 {
649 skip_fsa(r)?;
650 }
651 depth += 1;
652 if name != "<ROOT>" {
654 dir_stack.push(name);
655 }
656 }
657 'f' => {
658 let name = read_nul_string(r)?;
659 let flags = read_inode_base(r, format_major)?;
660 if format_major >= 9 && (flags >> 4) & 1 != 0 {
661 skip_fsa(r)?;
662 }
663
664 let size = read_infinint(r)?;
665 let archive_offset = read_infinint(r)?;
666 let (mut stored_size, encryption_flag, compression) = if format_major >= 8 {
673 let ss = read_infinint(r)?;
674 let enc = read_u8(r)?;
675 let comp = read_u8(r)?;
676 let crc_size = read_infinint(r)?;
677 skip(r, crc_size)?;
678 (ss, enc, comp)
679 } else if format_major >= 2 {
680 let ss = read_infinint(r)?;
681 skip(r, 2)?; (ss, 0u8, global_comp)
683 } else {
684 (size, 0u8, global_comp) };
686 if format_major <= 7 && stored_size == 0 {
688 stored_size = size;
689 }
690
691 let path = if dir_stack.is_empty() {
692 name
693 } else {
694 format!("{}/{}", dir_stack.join("/"), name)
695 };
696
697 entries.push(EntryRef {
698 path,
699 size,
700 archive_offset,
701 stored_size,
702 compression,
703 encrypted: encryption_flag != 0,
704 });
705 }
706 'l' => {
707 let _name = read_nul_string(r)?;
709 let flags = read_inode_base(r, format_major)?;
710 if format_major >= 9 && (flags >> 4) & 1 != 0 {
711 skip_fsa(r)?;
712 }
713 skip_nul_string(r)?; }
715 'p' | 's' => {
716 let _name = read_nul_string(r)?;
720 let flags = read_inode_base(r, format_major)?;
721 if format_major >= 9 && (flags >> 4) & 1 != 0 {
722 skip_fsa(r)?;
723 }
724 }
725 _ => break, }
727 }
728
729 Ok(entries)
730}
731
732fn read_infinint<R: Read>(r: &mut R) -> Result<u64, DarError> {
747 let terminal = read_u8(r)?;
748 if terminal == 0x00 {
749 return Err(DarError::Corrupt(
751 "infinint exceeds 64-bit range (multi-group encoding)".into(),
752 ));
753 }
754 if terminal.count_ones() != 1 {
755 return Err(DarError::Corrupt(format!(
756 "invalid infinint terminal: {terminal:#04x}"
757 )));
758 }
759 let pos = terminal.leading_zeros(); if pos > 1 {
761 return Err(DarError::Corrupt(format!(
763 "infinint exceeds 64-bit range: terminal {terminal:#04x} implies {} bytes",
764 (pos + 1) * 4
765 )));
766 }
767 let data_bytes = (pos + 1) * 4; let mut val: u64 = 0;
769 for _ in 0..data_bytes {
770 val = (val << 8) | u64::from(read_u8(r)?);
771 }
772 Ok(val)
773}
774
775fn read_u8<R: Read>(r: &mut R) -> Result<u8, DarError> {
776 let mut b = [0u8; 1];
777 r.read_exact(&mut b)?;
778 Ok(b[0])
779}
780
781const MAX_NUL_STRING: usize = 64 * 1024;
785
786fn read_nul_string<R: Read>(r: &mut R) -> Result<String, DarError> {
788 let mut bytes = Vec::new();
789 loop {
790 let b = read_u8(r)?;
791 if b == 0 {
792 break;
793 }
794 if bytes.len() >= MAX_NUL_STRING {
795 return Err(DarError::Corrupt(format!(
796 "NUL-terminated string exceeds {MAX_NUL_STRING} bytes"
797 )));
798 }
799 bytes.push(b);
800 }
801 String::from_utf8(bytes).map_err(|e| DarError::Corrupt(e.to_string()))
802}
803
804fn skip_nul_string<R: Read>(r: &mut R) -> Result<(), DarError> {
806 let mut len: usize = 0;
807 loop {
808 if read_u8(r)? == 0 {
809 return Ok(());
810 }
811 len += 1;
812 if len > MAX_NUL_STRING {
813 return Err(DarError::Corrupt(format!(
814 "NUL-terminated string exceeds {MAX_NUL_STRING} bytes"
815 )));
816 }
817 }
818}
819
820fn skip<R: Seek>(r: &mut R, n: u64) -> Result<(), DarError> {
822 if n > 0 {
823 let off = i64::try_from(n)
827 .map_err(|_| DarError::Corrupt(format!("skip length {n} exceeds seekable range")))?;
828 r.seek(SeekFrom::Current(off)).map_err(DarError::Io)?;
829 }
830 Ok(())
831}
832
833fn skip_timestamp<R: Read + Seek>(r: &mut R, format_major: u32) -> Result<(), DarError> {
839 if format_major < 9 {
843 read_infinint(r)?;
844 return Ok(());
845 }
846 let ts_type = read_u8(r)?;
847 read_infinint(r)?;
848 if ts_type == b'n' || ts_type == b'u' {
849 read_infinint(r)?;
850 }
851 Ok(())
852}
853
854fn read_inode_base<R: Read + Seek>(r: &mut R, format_major: u32) -> Result<u8, DarError> {
860 let flags = if format_major >= 2 { read_u8(r)? } else { 0 };
863 if format_major <= 7 {
865 skip(r, 4)?; } else {
867 read_infinint(r)?; read_infinint(r)?; }
870 skip(r, 2)?; skip_timestamp(r, format_major)?; skip_timestamp(r, format_major)?; if format_major >= 8 {
875 skip_timestamp(r, format_major)?;
876 }
877 if format_major >= 9 && (flags >> 4) & 1 != 0 {
880 read_infinint(r)?;
881 read_infinint(r)?;
882 }
883 Ok(flags)
884}
885
886fn skip_fsa<R: Read + Seek>(r: &mut R) -> Result<(), DarError> {
890 let _tag = read_infinint(r)?;
891 let size = read_infinint(r)?;
892 skip(r, size)
893}
894
895#[cfg(test)]
898mod tests {
899 use super::*;
900 use std::io::Cursor;
901
902 #[test]
905 fn infinint_decodes_value() {
906 let data = [0x80u8, 0x00, 0x00, 0x00, 0x0d];
907 assert_eq!(read_infinint(&mut Cursor::new(&data[..])).unwrap(), 13);
908 }
909
910 #[test]
911 fn infinint_bad_preamble_returns_corrupt() {
912 let data = [0x03u8, 0x00, 0x00, 0x00, 0x00];
914 let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
915 assert!(matches!(&err, DarError::Corrupt(_)));
916 }
917
918 #[test]
919 fn infinint_truncated_returns_io() {
920 let err = read_infinint(&mut Cursor::new(&[0x80u8, 0x00][..])).unwrap_err();
922 assert!(matches!(err, DarError::Io(_)));
923 }
924
925 #[test]
926 fn infinint_0x40_preamble_reads_8_data_bytes() {
927 let mut data = vec![0x40u8];
930 data.extend_from_slice(&[0x00, 0x00, 0x00, 0x00, 0x5d, 0x15, 0x93, 0x31]);
931 assert_eq!(
932 read_infinint(&mut Cursor::new(data)).unwrap(),
933 0x5d15_9331u64
934 );
935 }
936
937 #[test]
938 fn infinint_multi_bit_terminal_returns_corrupt() {
939 let data = [0x60u8, 0x00, 0x00, 0x00, 0x00];
941 let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
942 assert!(matches!(&err, DarError::Corrupt(_)));
943 }
944
945 #[test]
948 fn read_u8_reads_single_byte() {
949 assert_eq!(read_u8(&mut Cursor::new(&[0x42u8][..])).unwrap(), 0x42);
950 }
951
952 #[test]
953 fn read_u8_eof_returns_io() {
954 let err = read_u8(&mut Cursor::new(&[][..])).unwrap_err();
955 assert!(matches!(err, DarError::Io(_)));
956 }
957
958 #[test]
961 fn nul_string_reads_until_nul() {
962 let data = b"hello\x00world";
963 assert_eq!(
964 read_nul_string(&mut Cursor::new(&data[..])).unwrap(),
965 "hello"
966 );
967 }
968
969 #[test]
970 fn nul_string_invalid_utf8_returns_corrupt() {
971 let data = [0xFF, 0x80, 0x00];
973 let err = read_nul_string(&mut Cursor::new(&data[..])).unwrap_err();
974 assert!(matches!(err, DarError::Corrupt(_)));
975 }
976
977 #[test]
978 fn nul_string_eof_before_nul_returns_io() {
979 let err = read_nul_string(&mut Cursor::new(b"no-nul".to_vec())).unwrap_err();
980 assert!(matches!(err, DarError::Io(_)));
981 }
982
983 #[test]
986 fn skip_nul_string_advances_past_nul() {
987 let data = b"skip\x00rest";
988 let mut c = Cursor::new(data.to_vec());
989 skip_nul_string(&mut c).unwrap();
990 assert_eq!(c.position(), 5); }
992
993 #[test]
994 fn skip_nul_string_eof_returns_io() {
995 let err = skip_nul_string(&mut Cursor::new(b"no-nul".to_vec())).unwrap_err();
996 assert!(matches!(err, DarError::Io(_)));
997 }
998
999 #[test]
1002 fn find_catalogue_body_too_short() {
1003 let label = [0u8; 10];
1005 let err = find_catalogue(&mut Cursor::new(&[0x01u8, 0x02, 0x03][..]), &label).unwrap_err();
1006 assert!(
1007 matches!(&err, DarError::Corrupt(s) if s == "archive body too short"
1008 || s == "seqt_catalogue not found")
1009 );
1010 }
1011
1012 #[test]
1013 fn find_catalogue_escape_at_start() {
1014 let mut data = [0xAD, 0xFD, 0xEA, 0x77, 0x21, 0x43, 0xFF];
1015 let mut c = Cursor::new(&mut data[..]);
1016 let via_escape = find_catalogue(&mut c, &[0u8; 10]).unwrap();
1017 assert!(via_escape);
1018 assert_eq!(c.position(), 6);
1019 }
1020
1021 #[test]
1022 fn find_catalogue_escape_not_found() {
1023 let label = [0xFFu8; 10];
1025 let err = find_catalogue(&mut Cursor::new(&[0u8; 10][..]), &label).unwrap_err();
1026 assert!(matches!(&err, DarError::Corrupt(s) if s == "seqt_catalogue not found"));
1027 }
1028
1029 #[test]
1030 fn find_catalogue_label_fallback() {
1031 let label: [u8; 10] = [0xA1, 0xB2, 0xC3, 0xD4, 0xE5, 0xF6, 0x07, 0x18, 0x29, 0x3A];
1032 let mut data = vec![0x00u8; 5];
1034 data.extend_from_slice(&label);
1035 let mut c = Cursor::new(data);
1036 let via_escape = find_catalogue(&mut c, &label).unwrap();
1037 assert!(!via_escape);
1038 assert_eq!(c.position(), 15); }
1040
1041 #[test]
1044 fn skip_zero_does_not_move_cursor() {
1045 let mut c = Cursor::new(vec![0xFFu8; 10]);
1046 skip(&mut c, 0).unwrap();
1047 assert_eq!(c.position(), 0);
1048 }
1049
1050 #[test]
1051 fn skip_n_advances_cursor() {
1052 let mut c = Cursor::new(vec![0xFFu8; 10]);
1053 skip(&mut c, 7).unwrap();
1054 assert_eq!(c.position(), 7);
1055 }
1056
1057 #[test]
1060 fn inode_base_bit4_clear_reads_31_bytes() {
1061 let mut data = vec![0x00u8]; data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x00, 0x00]); for _ in 0..3 {
1067 data.push(b's'); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); }
1070 data.push(0xFF); let mut c = Cursor::new(data);
1072 assert_eq!(read_inode_base(&mut c, 11).unwrap(), 0x00);
1073 assert_eq!(c.position(), 31);
1074 }
1075
1076 #[test]
1077 fn inode_base_bit4_set_reads_41_bytes() {
1078 let mut data = vec![0x10u8]; data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x00, 0x00]); for _ in 0..3 {
1084 data.push(b's');
1085 data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]);
1086 }
1087 data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x00]); data.push(0xFF); let mut c = Cursor::new(data);
1091 assert_eq!(read_inode_base(&mut c, 11).unwrap(), 0x10);
1092 assert_eq!(c.position(), 41);
1093 }
1094
1095 #[test]
1098 fn skip_fsa_consumes_tag_size_and_data() {
1099 let mut data = Vec::new();
1101 data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x05]); data.extend_from_slice(&[0x80, 0x00, 0x00, 0x00, 0x03]); data.extend_from_slice(&[0xAA, 0xBB, 0xCC]); data.push(0xFF); let mut c = Cursor::new(data);
1106 skip_fsa(&mut c).unwrap();
1107 assert_eq!(c.position(), 13); }
1109
1110 #[test]
1117 fn infinint_leading_zero_byte_returns_corrupt() {
1118 let data = [0x00u8, 0x80, 0x00, 0x00, 0x00, 0x00];
1121 let err = read_infinint(&mut Cursor::new(&data[..])).unwrap_err();
1122 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1123 }
1124
1125 #[test]
1126 fn infinint_12_byte_group_exceeds_u64_returns_corrupt() {
1127 let mut data = vec![0x20u8];
1130 data.extend_from_slice(&[0x11; 12]);
1131 let err = read_infinint(&mut Cursor::new(data)).unwrap_err();
1132 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1133 }
1134
1135 #[test]
1136 fn infinint_all_zero_run_returns_corrupt_without_hanging() {
1137 let data = vec![0u8; 4096];
1140 let err = read_infinint(&mut Cursor::new(data)).unwrap_err();
1141 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1142 }
1143
1144 #[test]
1147 fn nul_string_without_terminator_is_length_bounded() {
1148 let data = vec![b'A'; 200_000];
1151 let err = read_nul_string(&mut Cursor::new(data)).unwrap_err();
1152 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1153 }
1154
1155 #[test]
1156 fn skip_nul_string_without_terminator_is_length_bounded() {
1157 let data = vec![b'A'; 200_000];
1158 let err = skip_nul_string(&mut Cursor::new(data)).unwrap_err();
1159 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1160 }
1161
1162 #[test]
1165 fn skip_value_above_i64_max_returns_corrupt() {
1166 let mut c = Cursor::new(vec![0u8; 64]);
1170 c.set_position(32);
1171 let err = skip(&mut c, 0x8000_0000_0000_0000).unwrap_err();
1172 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1173 assert_eq!(c.position(), 32); }
1175
1176 #[test]
1179 fn terminateur_reads_catalogue_offset() {
1180 let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0x00, 0x00, 0x00, 0xc0];
1183 assert_eq!(read_terminateur(&mut Cursor::new(data)).unwrap(), 24);
1184 }
1185
1186 #[test]
1187 fn terminateur_all_ff_underflows_returns_corrupt() {
1188 let err = read_terminateur(&mut Cursor::new(vec![0xFFu8; 4])).unwrap_err();
1189 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1190 }
1191
1192 #[test]
1193 fn terminateur_excessive_ff_padding_returns_corrupt() {
1194 let err = read_terminateur(&mut Cursor::new(vec![0xFFu8; 600])).unwrap_err();
1195 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1196 }
1197
1198 #[test]
1199 fn terminateur_low_terminator_byte_returns_corrupt() {
1200 let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0x01];
1202 let err = read_terminateur(&mut Cursor::new(data)).unwrap_err();
1203 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1204 }
1205
1206 #[test]
1207 fn terminateur_noncontiguous_high_bits_returns_corrupt() {
1208 let data = vec![0x80u8, 0x00, 0x00, 0x00, 0x18, 0xA0];
1210 let err = read_terminateur(&mut Cursor::new(data)).unwrap_err();
1211 assert!(matches!(err, DarError::Corrupt(_)), "got {err:?}");
1212 }
1213
1214 #[test]
1217 fn find_catalogue_falls_back_to_full_scan() {
1218 let mut data = vec![0x11u8, 0x22]; data.extend_from_slice(&SEQT_CATALOGUE);
1222 data.extend_from_slice(&[0x33u8; 12]); let mut c = Cursor::new(data);
1224 let via_escape = find_catalogue_within(&mut c, &[0u8; 10], 4).unwrap();
1225 assert!(via_escape);
1226 assert_eq!(c.position(), 2 + SEQT_CATALOGUE.len() as u64);
1227 }
1228
1229 #[test]
1230 fn find_catalogue_full_scan_miss_returns_not_found() {
1231 let mut c = Cursor::new(vec![0x11u8; 16]);
1234 let err = find_catalogue_within(&mut c, &[0xABu8; 10], 4).unwrap_err();
1235 assert!(matches!(&err, DarError::Corrupt(s) if s == "seqt_catalogue not found"));
1236 }
1237
1238 #[test]
1239 fn find_catalogue_body_too_short_when_origin_at_eof() {
1240 let mut c = Cursor::new(vec![0u8; 6]);
1241 c.seek(SeekFrom::Start(6)).unwrap();
1242 let err = find_catalogue(&mut c, &[0u8; 10]).unwrap_err();
1243 assert!(matches!(&err, DarError::Corrupt(s) if s == "archive body too short"));
1244 }
1245
1246 #[test]
1249 fn decompress_rejects_decompression_bomb() {
1250 use flate2::{write::ZlibEncoder, Compression};
1251 use std::io::Write;
1252 let mut enc = ZlibEncoder::new(Vec::new(), Compression::default());
1253 enc.write_all(&[0u8; 4096]).unwrap();
1254 let blob = enc.finish().unwrap();
1255 let err = decompress(&blob, b'z', 16).unwrap_err();
1257 assert!(matches!(&err, DarError::Corrupt(s) if s.contains("exceeds bound")));
1258 }
1259
1260 #[test]
1261 fn decompress_rejects_malformed_zlib() {
1262 let err = decompress(b"not a zlib stream at all", b'z', 1024).unwrap_err();
1263 assert!(matches!(&err, DarError::Corrupt(s) if s.contains("zlib decode failed")));
1264 }
1265
1266 #[test]
1267 fn decompress_rejects_malformed_xz() {
1268 let err = decompress(b"this is not an xz stream", b'x', 1024).unwrap_err();
1269 assert!(matches!(&err, DarError::Corrupt(s) if s.contains("xz decode failed")));
1270 }
1271
1272 #[test]
1273 fn bounded_writer_caps_output_and_flushes() {
1274 let mut w = BoundedWriter {
1275 buf: Vec::new(),
1276 max: 4,
1277 };
1278 assert_eq!(w.write(b"ab").unwrap(), 2); w.flush().unwrap();
1280 let err = w.write(b"cde").unwrap_err(); assert_eq!(err.to_string(), "decompressed data exceeds bound");
1282 assert_eq!(w.buf, b"ab");
1283 }
1284}