1use std::{fmt, mem::offset_of};
18
19use positioned_io::ReadAt;
20use xxhash_rust::xxh3::Xxh3Default;
21use zerocopy::{FromBytes, FromZeros, Immutable, IntoBytes, KnownLayout, little_endian as le};
22
23use crate::SUPPORTED_VERSION_RANGE;
24
25type Result<T> = std::result::Result<T, Error>;
26
27pub struct Error(Box<ErrorInner>);
29
30impl fmt::Debug for Error {
31 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
32 self.0.fmt(f)
33 }
34}
35
36#[derive(Debug)]
37#[cfg_attr(not(feature = "default"), allow(dead_code))]
39enum ErrorInner {
40 InvalidMagic([u8; 6]),
42 UnsupportedVersion(u8, u8),
43 LengthMismatch,
44 ChecksumMismatch,
45 OffsetOverflow,
46
47 UnsupportedCompressAlgo(CompressAlgo),
49 TypeMismatch {
50 expect: SectionType,
51 got: SectionType,
52 },
53 PayloadTooLong {
54 limit: usize,
55 got: Option<u64>,
56 },
57 Decompress(std::io::Error),
58 MalformedSectionIndex(String),
59
60 Io(std::io::Error),
62}
63
64impl fmt::Display for Error {
65 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
66 match &*self.0 {
67 ErrorInner::InvalidMagic(magic) => {
68 write!(f, "invalid section magic: b\"{}\"", magic.escape_ascii())
69 }
70 ErrorInner::UnsupportedVersion(maj, min) => {
71 write!(f, "unsupported section version: DWARFS{maj}.{min}")
72 }
73 ErrorInner::LengthMismatch => f.pad("section payload length mismatch"),
74 ErrorInner::ChecksumMismatch => f.pad("section checksum mismatch"),
75 ErrorInner::OffsetOverflow => f.pad("section offset overflow"),
76
77 ErrorInner::UnsupportedCompressAlgo(algo) => {
78 write!(f, "unsupported section compress algorithm {algo:?}")
79 }
80 ErrorInner::TypeMismatch { expect, got } => {
81 write!(
82 f,
83 "section type mismatch, expect {expect:?} but got {got:?}"
84 )
85 }
86 ErrorInner::PayloadTooLong {
87 limit,
88 got: Some(got),
89 } => {
90 write!(
91 f,
92 "section payload has {got} bytes, exceeding the limit of {limit} bytes"
93 )
94 }
95 ErrorInner::PayloadTooLong { limit, got: None } => {
96 write!(f, "section payload exceeds the limit of {limit} bytes")
97 }
98 ErrorInner::MalformedSectionIndex(msg) => {
99 write!(f, "malformed section index: {msg}")
100 }
101
102 ErrorInner::Decompress(err) => write!(f, "failed to decompress section payload: {err}"),
103
104 ErrorInner::Io(err) => err.fmt(f),
105 }
106 }
107}
108
109impl std::error::Error for Error {
110 fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
111 match &*self.0 {
112 ErrorInner::Decompress(err) | ErrorInner::Io(err) => Some(err),
113 _ => None,
114 }
115 }
116}
117
118impl From<std::io::Error> for Error {
119 #[cold]
120 fn from(err: std::io::Error) -> Self {
121 Self(Box::new(ErrorInner::Io(err)))
122 }
123}
124
125impl From<ErrorInner> for Error {
126 #[cold]
127 fn from(err: ErrorInner) -> Self {
128 Self(Box::new(err))
129 }
130}
131
132pub(crate) const HEADER_SIZE: u64 = size_of::<Header>() as u64;
133
134#[derive(Clone, Copy, PartialEq, Eq, Hash, FromBytes, IntoBytes, Immutable, KnownLayout)]
136#[repr(C, align(8))]
137pub struct Header {
138 pub magic_version: MagicVersion,
140 pub slow_hash: [u8; 32],
142 pub fast_hash: [u8; 8],
144 pub section_number: le::U32,
146 pub section_type: SectionType,
148 pub compress_algo: CompressAlgo,
150 pub payload_size: le::U64,
152}
153
154impl fmt::Debug for Header {
155 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
156 f.debug_struct("BlockHeader")
157 .field("magic_version", &self.magic_version)
158 .field("slow_hash", &format_args!("{:02x?}", self.slow_hash))
159 .field("slow_hash", &format_args!("{:02x?}", self.fast_hash))
160 .field("section_number", &self.section_number.get())
161 .field("section_type", &self.section_type)
162 .field("compress_algo", &self.compress_algo)
163 .field("payload_size", &self.payload_size.get())
164 .finish()
165 }
166}
167
168impl Header {
169 pub fn calculate_fast_checksum(&self, payload: &[u8]) -> Result<[u8; 8]> {
177 if payload.len() as u64 != self.payload_size.get() {
178 bail!(ErrorInner::LengthMismatch);
179 }
180 let mut h = Xxh3Default::new();
181 h.update(&self.as_bytes()[offset_of!(Self, section_number)..]);
182 h.update(payload);
183 Ok(h.digest().to_le_bytes())
184 }
185
186 pub fn validate_fast_checksum(&self, payload: &[u8]) -> Result<()> {
193 let h = self.calculate_fast_checksum(payload)?;
194 if h != self.fast_hash {
195 bail!(ErrorInner::ChecksumMismatch);
196 }
197 Ok(())
198 }
199
200 pub fn calculate_slow_checksum(&self, payload: &[u8]) -> Result<[u8; 32]> {
210 use sha2::Digest;
211
212 if payload.len() as u64 != self.payload_size.get() {
213 bail!(ErrorInner::LengthMismatch);
214 }
215 let mut h = sha2::Sha512_256::new();
216 h.update(&self.as_bytes()[offset_of!(Self, fast_hash)..]);
217 h.update(payload);
218 Ok(*h.finalize().as_ref())
219 }
220
221 pub fn validate_slow_checksum(&self, payload: &[u8]) -> Result<()> {
228 let h = self.calculate_slow_checksum(payload)?;
229 if h != self.slow_hash {
230 bail!(ErrorInner::ChecksumMismatch);
231 }
232 Ok(())
233 }
234
235 pub fn update_size_and_checksum(&mut self, payload: &[u8]) {
239 self.payload_size = u64::try_from(payload.len())
240 .expect("payload length overflows u64")
241 .into();
242 self.fast_hash = self
243 .calculate_fast_checksum(payload)
244 .expect("length matches");
245 self.slow_hash = self
246 .calculate_slow_checksum(payload)
247 .expect("length matches");
248 }
249
250 pub(crate) fn check_type(&self, expect: SectionType) -> Result<()> {
252 if self.section_type != expect {
253 bail!(ErrorInner::TypeMismatch {
254 expect,
255 got: self.section_type,
256 });
257 }
258 Ok(())
259 }
260
261 fn payload_size_limited(&self, limit: usize) -> Result<usize> {
262 let size = self.payload_size.get();
263 if let Some(size) = usize::try_from(size).ok().filter(|&n| n <= limit) {
264 Ok(size)
265 } else {
266 bail!(ErrorInner::PayloadTooLong {
267 limit,
268 got: Some(size)
269 })
270 }
271 }
272}
273
274#[derive(Clone, Copy, PartialEq, Eq, Hash, FromBytes, IntoBytes, Immutable, KnownLayout)]
276#[repr(C)]
277pub struct MagicVersion {
278 pub magic: [u8; 6],
280 pub major: u8,
282 pub minor: u8,
284}
285
286impl fmt::Debug for MagicVersion {
287 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
288 f.debug_struct("MagicVersion")
289 .field("magic", &format_args!("b\"{}\"", self.magic.escape_ascii()))
290 .field("major", &self.major)
291 .field("minor", &self.minor)
292 .finish()
293 }
294}
295
296impl MagicVersion {
297 pub const MAGIC: [u8; 6] = *b"DWARFS";
299
300 pub const LATEST: Self = Self {
302 magic: Self::MAGIC,
303 major: SUPPORTED_VERSION_RANGE.end().0,
304 minor: SUPPORTED_VERSION_RANGE.end().1,
305 };
306
307 pub fn validate(self) -> Result<()> {
314 let ver = (self.major, self.minor);
315 if self.magic != Self::MAGIC {
316 bail!(ErrorInner::InvalidMagic(self.magic));
317 }
318 if !SUPPORTED_VERSION_RANGE.contains(&ver) {
319 bail!(ErrorInner::UnsupportedVersion(ver.0, ver.1));
320 }
321 Ok(())
322 }
323}
324
325#[derive(Clone, Copy, PartialEq, Eq, Hash, FromBytes, IntoBytes, Immutable, KnownLayout)]
327#[repr(C, align(2))]
328pub struct SectionType(pub le::U16);
329
330macro_rules! impl_open_enum {
331 ($name:ident; $ctor:path; $($(#[$meta:meta])* $variant:ident = $value:expr,)*) => {
332 impl std::fmt::Debug for $name {
333 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
334 f.pad(match *self {
335 $(Self::$variant => stringify!($variant),)*
336 _ => return f
337 .debug_tuple(stringify!($name))
338 .field(&self.0.get())
339 .finish(),
340 })
341 }
342 }
343
344 impl $name {
345 $(
346 $(#[$meta])*
347 pub const $variant: Self = Self($ctor($value));
348 )*
349
350 #[must_use]
352 #[inline]
353 pub fn is_known(self) -> bool {
354 matches!(self, $(Self::$variant)|*)
355 }
356 }
357 };
358}
359
360impl_open_enum! {
361 SectionType; le::U16::new;
362
363 BLOCK = 0,
365 METADATA_V2_SCHEMA = 7,
367 METADATA_V2 = 8,
369 SECTION_INDEX = 9,
372 HISTORY = 10,
374}
375
376#[derive(Clone, Copy, PartialEq, Eq, Hash, FromBytes, IntoBytes, Immutable, KnownLayout)]
378#[repr(C, align(2))]
379pub struct CompressAlgo(pub le::U16);
380
381impl_open_enum! {
382 CompressAlgo; le::U16::new;
383
384 NONE = 0,
386 LZMA = 1,
388 ZSTD = 2,
390 LZ4 = 3,
392 LZ4HC = 4,
395 BROTLI = 5,
397 FLAC = 6,
399 RICEPP = 7,
401}
402
403#[derive(Clone, Copy, PartialEq, Eq, Hash, FromBytes, IntoBytes, Immutable, KnownLayout)]
405#[repr(C, align(8))]
406pub struct SectionIndexEntry(pub le::U64);
407
408impl fmt::Debug for SectionIndexEntry {
409 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
410 f.debug_struct("SectionIndexEntry")
411 .field("section_type", &self.section_type())
412 .field("offset", &self.offset())
413 .finish()
414 }
415}
416
417impl SectionIndexEntry {
418 #[must_use]
424 #[inline]
425 pub fn new(typ: SectionType, offset: u64) -> Option<Self> {
426 if offset < 1u64 << 48 {
427 Some(Self((u64::from(typ.0.get()) << 48 | offset).into()))
428 } else {
429 None
430 }
431 }
432
433 #[must_use]
435 #[inline]
436 #[allow(clippy::missing_panics_doc, reason = "never panics")]
437 pub fn section_type(self) -> SectionType {
438 SectionType((self.0 >> 48).try_into().expect("always in u16 range"))
439 }
440
441 #[must_use]
444 #[inline]
445 pub fn offset(self) -> u64 {
446 self.0.get() & ((1u64 << 48) - 1)
447 }
448}
449
450pub struct SectionReader<R: ?Sized> {
461 archive_start: u64,
464 raw_buf: Vec<u8>,
467 rdr: R,
468}
469
470impl<R: fmt::Debug + ?Sized> fmt::Debug for SectionReader<R> {
471 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
472 f.debug_struct("SectionReader")
473 .field("archive_start", &self.archive_start)
474 .field(
475 "raw_buf",
476 &format_args!("{}/{}", self.raw_buf.len(), self.raw_buf.capacity()),
477 )
478 .field("rdr", &&self.rdr)
479 .finish()
480 }
481}
482
483impl<R> SectionReader<R> {
484 pub fn new(rdr: R) -> Self {
491 Self::new_with_offset(rdr, 0)
492 }
493
494 pub fn new_with_offset(rdr: R, archive_start: u64) -> Self {
501 SectionReader {
502 archive_start,
503 raw_buf: Vec::new(),
504 rdr,
505 }
506 }
507}
508
509impl<R: ?Sized> SectionReader<R> {
510 #[inline]
512 #[must_use]
513 pub fn get_ref(&self) -> &R {
514 &self.rdr
515 }
516
517 #[inline]
519 #[must_use]
520 pub fn get_mut(&mut self) -> &mut R {
521 &mut self.rdr
522 }
523
524 #[inline]
526 #[must_use]
527 pub fn into_inner(self) -> R
528 where
529 R: Sized,
530 {
531 self.rdr
532 }
533}
534
535impl<R: ReadAt + ?Sized> SectionReader<R> {
536 #[inline]
538 #[must_use]
539 pub fn archive_start(&self) -> u64 {
540 self.archive_start
541 }
542
543 pub fn read_section_at(
552 &mut self,
553 section_offset: u64,
554 payload_size_limit: usize,
555 ) -> Result<(Header, Vec<u8>)> {
556 let header = self.read_header_at(section_offset)?;
557 let payload_offset = section_offset + HEADER_SIZE;
559 let payload = self.read_payload_at(&header, payload_offset, payload_size_limit)?;
560 Ok((header, payload))
561 }
562
563 pub fn read_header_at(&mut self, section_offset: u64) -> Result<Header> {
570 let file_offset = self
571 .archive_start
572 .checked_add(section_offset)
573 .ok_or(ErrorInner::OffsetOverflow)?;
574 let mut header = Header::new_zeroed();
575 self.rdr.read_exact_at(file_offset, header.as_mut_bytes())?;
577 header.magic_version.validate()?;
578 Ok(header)
579 }
580
581 pub fn read_payload_at(
590 &mut self,
591 header: &Header,
592 payload_offset: u64,
593 payload_size_limit: usize,
594 ) -> Result<Vec<u8>> {
595 let mut out = vec![0u8; payload_size_limit];
596 let len = self.read_payload_at_into(header, payload_offset, &mut out)?;
597 out.truncate(len);
598 Ok(out)
599 }
600
601 pub fn read_payload_at_into(
616 &mut self,
617 header: &Header,
618 payload_offset: u64,
619 out: &mut [u8],
620 ) -> Result<usize> {
621 let file_offset = self
622 .archive_start
623 .checked_add(payload_offset)
624 .ok_or(ErrorInner::OffsetOverflow)?;
625
626 let size_limit = out.len();
627 let compressed_size = header.payload_size_limited(size_limit)?;
628 let raw_buf = &mut self.raw_buf;
629 raw_buf.resize(compressed_size, 0);
630 self.rdr.read_exact_at(file_offset, raw_buf)?;
631 header.validate_fast_checksum(raw_buf)?;
632
633 match header.compress_algo {
634 CompressAlgo::NONE => {
635 out[..compressed_size].copy_from_slice(raw_buf);
636 Ok(compressed_size)
637 }
638 #[cfg(feature = "zstd")]
639 CompressAlgo::ZSTD => zstd_safe::decompress(out, raw_buf).map_err(|code| {
640 let msg = zstd_safe::get_error_name(code);
641 ErrorInner::Decompress(std::io::Error::new(std::io::ErrorKind::InvalidData, msg))
642 .into()
643 }),
644 #[cfg(feature = "lzma")]
645 #[expect(
646 clippy::cast_possible_truncation,
647 reason = "will not overflow usize because all data is in memory"
648 )]
649 CompressAlgo::LZMA => (|| {
650 let mut stream = liblzma::stream::Stream::new_stream_decoder(u64::MAX, 0)?;
651 let st = stream.process(raw_buf, out, liblzma::stream::Action::Run)?;
652 if stream.total_in() as usize != raw_buf.len()
653 || st != liblzma::stream::Status::StreamEnd
654 {
655 bail!(std::io::Error::new(
656 std::io::ErrorKind::InvalidData,
657 "LZMA stream did not end cleanly",
658 ));
659 }
660 Ok(stream.total_out() as usize)
661 })()
662 .map_err(|err| ErrorInner::Decompress(err).into()),
663 #[cfg(feature = "lz4")]
664 CompressAlgo::LZ4 | CompressAlgo::LZ4HC => {
665 let len = lz4::block::decompress_to_buffer(raw_buf, None, out)
666 .map_err(ErrorInner::Decompress)?;
667 Ok(len)
668 }
669 algo => Err(ErrorInner::UnsupportedCompressAlgo(algo).into()),
671 }
672 }
673
674 pub fn build_section_index(
689 &mut self,
690 stream_len: u64,
691 size_limit: usize,
692 ) -> Result<Vec<SectionIndexEntry>> {
693 let end_offset = stream_len
694 .checked_sub(self.archive_start())
695 .ok_or(ErrorInner::OffsetOverflow)?;
696
697 let mut offset = 0u64;
698 let mut index = Vec::with_capacity(size_limit / size_of::<SectionIndexEntry>());
699 while offset < end_offset {
700 let header = self.read_header_at(offset)?;
701 let ent = SectionIndexEntry::new(header.section_type, offset)
702 .ok_or(ErrorInner::OffsetOverflow)?;
703 if index.len() == index.capacity() {
704 bail!(ErrorInner::PayloadTooLong {
705 limit: size_limit,
706 got: None,
707 });
708 }
709 index.push(ent);
710
711 offset = (offset + HEADER_SIZE)
713 .checked_add(header.payload_size.get())
714 .ok_or(ErrorInner::OffsetOverflow)?;
715 }
716 if offset != end_offset {
717 bail!(std::io::Error::new(
718 std::io::ErrorKind::UnexpectedEof,
719 "unexpected end of file"
720 ));
721 }
722 Ok(index)
723 }
724
725 #[allow(
768 clippy::missing_panics_doc,
769 reason = "allocation failures are allowed to panic at anytime"
770 )]
771 pub fn read_section_index(
772 &mut self,
773 stream_len: u64,
774 payload_size_limit: usize,
775 ) -> Result<Option<(Header, Vec<SectionIndexEntry>)>> {
776 const INDEX_ENTRY_SIZE64: u64 = size_of::<SectionIndexEntry>() as u64;
777 const SECTION_INDEX_MIN_VERSION: (u8, u8) = (2, 4);
779
780 let first_magic = self.read_header_at(0)?.magic_version;
783 if (first_magic.major, first_magic.minor) < SECTION_INDEX_MIN_VERSION {
784 return Ok(None);
785 }
786
787 let mut last_entry = SectionIndexEntry::new_zeroed();
789 self.rdr
790 .read_exact_at(stream_len - INDEX_ENTRY_SIZE64, last_entry.as_mut_bytes())?;
791 if last_entry.section_type() != SectionType::SECTION_INDEX {
792 return Ok(None);
793 }
794
795 let index_header_offset = last_entry.offset();
798 let Ok(header) = self.read_header_at(index_header_offset) else {
799 return Ok(None);
801 };
802 let payload_size = header.payload_size.get();
803 let num_sections = payload_size / INDEX_ENTRY_SIZE64;
804 if payload_size != stream_len - index_header_offset - HEADER_SIZE
806 || payload_size % INDEX_ENTRY_SIZE64 != 0
807 || header.section_type != SectionType::SECTION_INDEX
808 || header.compress_algo != CompressAlgo::NONE
809 || u64::from(header.section_number.get()) != num_sections - 1
810 {
811 return Ok(None);
812 }
813
814 if payload_size > payload_size_limit as u64 {
816 bail!(ErrorInner::PayloadTooLong {
817 got: Some(payload_size),
818 limit: payload_size_limit
819 });
820 }
821 let mut entries =
824 SectionIndexEntry::new_vec_zeroed(num_sections as usize).expect("alloc failed");
825 let buf_bytes = entries.as_mut_bytes();
826 debug_assert_eq!(buf_bytes.len() as u64, payload_size);
827 self.rdr
829 .read_exact_at(index_header_offset + HEADER_SIZE, buf_bytes)?;
830
831 header.validate_fast_checksum(buf_bytes)?;
841
842 let mut prev = None;
843 for (i, ent) in entries.iter().enumerate() {
844 let (typ, offset) = (ent.section_type(), ent.offset());
845 if !typ.is_known() {
846 bail!(ErrorInner::MalformedSectionIndex(format!(
847 "entry {i} has unknown section type {typ:?}",
848 )))
849 }
850 if prev.is_some_and(|prev| prev >= offset) {
851 bail!(ErrorInner::MalformedSectionIndex(format!(
852 "entry {i} has unsorted offset {offset} >= previous offset {prev:?}",
853 )));
854 }
855 prev = Some(offset)
856 }
857
858 Ok(Some((header, entries)))
859 }
860}