1mod reader;
4#[cfg(feature = "std")]
5mod reader_mt;
6#[cfg(feature = "encoder")]
7mod writer;
8#[cfg(all(feature = "encoder", feature = "std"))]
9mod writer_mt;
10
11use alloc::{boxed::Box, vec, vec::Vec};
12#[cfg(feature = "std")]
13use std::io::{self, Seek, SeekFrom};
14
15pub use reader::XzReader;
16#[cfg(feature = "std")]
17pub use reader_mt::XzReaderMt;
18use sha2::Digest;
19#[cfg(feature = "encoder")]
20pub use writer::{XzOptions, XzWriter};
21#[cfg(all(feature = "encoder", feature = "std"))]
22pub use writer_mt::XzWriterMt;
23
24use crate::{error_invalid_data, error_invalid_input, ByteReader, Read};
25#[cfg(feature = "std")]
26use crate::{
27 filter::{bcj::BcjReader, delta::DeltaReader},
28 Lzma2Reader,
29};
30#[cfg(feature = "encoder")]
31use crate::{ByteWriter, Write};
32
33const CRC32: crc::Crc<u32, crc::Table<16>> =
34 crc::Crc::<u32, crc::Table<16>>::new(&crc::CRC_32_ISO_HDLC);
35const CRC64: crc::Crc<u64, crc::Table<16>> = crc::Crc::<u64, crc::Table<16>>::new(&crc::CRC_64_XZ);
36
37const XZ_MAGIC: [u8; 6] = [0xFD, b'7', b'z', b'X', b'Z', 0x00];
38
39const XZ_FOOTER_MAGIC: [u8; 2] = [b'Y', b'Z'];
40
41#[derive(Debug, Clone)]
42struct IndexRecord {
43 unpadded_size: u64,
44 uncompressed_size: u64,
45}
46
47#[derive(Debug)]
48struct Index {
49 pub number_of_records: u64,
50 pub records: Vec<IndexRecord>,
51}
52
53#[derive(Debug)]
54struct StreamHeader {
55 pub check_type: CheckType,
56}
57
58#[derive(Debug)]
59struct StreamFooter {
60 pub backward_size: u32,
61 pub stream_flags: [u8; 2],
62}
63
64#[derive(Debug)]
65struct BlockHeader {
66 header_size: usize,
67 compressed_size: Option<u64>,
68 uncompressed_size: Option<u64>,
69 filters: [Option<FilterType>; 4],
70 properties: [u32; 4],
71}
72
73#[derive(Debug, Clone)]
74struct Block {
75 start_pos: u64,
76 unpadded_size: u64,
77 uncompressed_size: u64,
78}
79
80#[derive(Debug, Clone)]
82pub struct FilterConfig {
83 pub filter_type: FilterType,
85 pub property: u32,
87}
88
89impl FilterConfig {
90 pub fn new_delta(distance: u32) -> Self {
92 Self {
93 filter_type: FilterType::Delta,
94 property: distance,
95 }
96 }
97
98 pub fn new_bcj_x86(start_pos: u32) -> Self {
100 Self {
101 filter_type: FilterType::BcjX86,
102 property: start_pos,
103 }
104 }
105
106 pub fn new_bcj_arm(start_pos: u32) -> Self {
108 Self {
109 filter_type: FilterType::BcjArm,
110 property: start_pos,
111 }
112 }
113
114 pub fn new_bcj_arm_thumb(start_pos: u32) -> Self {
116 Self {
117 filter_type: FilterType::BcjArmThumb,
118 property: start_pos,
119 }
120 }
121
122 pub fn new_bcj_arm64(start_pos: u32) -> Self {
124 Self {
125 filter_type: FilterType::BcjArm64,
126 property: start_pos,
127 }
128 }
129
130 pub fn new_bcj_ia64(start_pos: u32) -> Self {
132 Self {
133 filter_type: FilterType::BcjIa64,
134 property: start_pos,
135 }
136 }
137
138 pub fn new_bcj_ppc(start_pos: u32) -> Self {
140 Self {
141 filter_type: FilterType::BcjPpc,
142 property: start_pos,
143 }
144 }
145
146 pub fn new_bcj_sparc(start_pos: u32) -> Self {
148 Self {
149 filter_type: FilterType::BcjSparc,
150 property: start_pos,
151 }
152 }
153
154 pub fn new_bcj_risc_v(start_pos: u32) -> Self {
156 Self {
157 filter_type: FilterType::BcjRiscv,
158 property: start_pos,
159 }
160 }
161}
162
163#[derive(Default, Debug, Clone, Copy, PartialEq, Eq)]
165pub enum CheckType {
166 None = 0x00,
168 Crc32 = 0x01,
170 #[default]
172 Crc64 = 0x04,
173 Sha256 = 0x0A,
175}
176
177impl CheckType {
178 fn from_byte(byte: u8) -> crate::Result<Self> {
179 match byte {
180 0x00 => Ok(CheckType::None),
181 0x01 => Ok(CheckType::Crc32),
182 0x04 => Ok(CheckType::Crc64),
183 0x0A => Ok(CheckType::Sha256),
184 _ => Err(error_invalid_data("unsupported XZ check type")),
185 }
186 }
187
188 #[cfg(any(feature = "encoder", feature = "xz"))]
189 fn checksum_size(self) -> u64 {
190 match self {
191 CheckType::None => 0,
192 CheckType::Crc32 => 4,
193 CheckType::Crc64 => 8,
194 CheckType::Sha256 => 32,
195 }
196 }
197}
198
199#[derive(Debug, Copy, Clone, Eq, PartialEq)]
201pub enum FilterType {
202 Delta,
204 BcjX86,
206 BcjPpc,
208 BcjIa64,
210 BcjArm,
212 BcjArmThumb,
214 BcjSparc,
216 BcjArm64,
218 BcjRiscv,
220 Lzma2,
222}
223
224impl TryFrom<u64> for FilterType {
225 type Error = ();
226
227 fn try_from(value: u64) -> Result<Self, Self::Error> {
228 match value {
229 0x03 => Ok(FilterType::Delta),
230 0x04 => Ok(FilterType::BcjX86),
231 0x05 => Ok(FilterType::BcjPpc),
232 0x06 => Ok(FilterType::BcjIa64),
233 0x07 => Ok(FilterType::BcjArm),
234 0x08 => Ok(FilterType::BcjArmThumb),
235 0x09 => Ok(FilterType::BcjSparc),
236 0x0A => Ok(FilterType::BcjArm64),
237 0x0B => Ok(FilterType::BcjRiscv),
238 0x21 => Ok(FilterType::Lzma2),
239 _ => Err(()),
240 }
241 }
242}
243
244fn parse_multibyte_integer(data: &[u8]) -> crate::Result<u64> {
246 let mut result = 0u64;
247 let mut shift = 0;
248
249 for &byte in data {
250 if shift >= 63 {
251 return Err(error_invalid_data("XZ multibyte integer too large"));
252 }
253
254 result |= ((byte & 0x7F) as u64) << shift;
255 shift += 7;
256
257 if (byte & 0x80) == 0 {
258 return Ok(result);
259 }
260 }
261
262 Err(error_invalid_data("incomplete XZ multibyte integer"))
263}
264
265fn count_multibyte_integer_size(data: &[u8]) -> usize {
267 for (i, &byte) in data.iter().enumerate() {
268 if (byte & 0x80) == 0 {
269 return i + 1;
270 }
271 }
272 data.len()
273}
274
275fn parse_multibyte_integer_from_reader<R: Read>(reader: &mut R) -> crate::Result<u64> {
276 let mut result = 0u64;
277 let mut shift = 0;
278
279 for _ in 0..9 {
280 let byte = reader.read_u8()?;
282
283 if shift >= 63 {
284 return Err(error_invalid_data("XZ multibyte integer too large"));
285 }
286
287 result |= ((byte & 0x7F) as u64) << shift;
288 shift += 7;
289
290 if (byte & 0x80) == 0 {
291 return Ok(result);
292 }
293 }
294
295 Err(error_invalid_data("XZ multibyte integer too long"))
296}
297
298fn count_multibyte_integer_size_for_value(mut value: u64) -> usize {
299 if value == 0 {
300 return 1;
301 }
302
303 let mut count = 0;
304 while value > 0 {
305 count += 1;
306 value >>= 7;
307 }
308 count
309}
310
311fn encode_multibyte_integer(mut value: u64, buf: &mut [u8]) -> crate::Result<usize> {
312 if value > (u64::MAX / 2) {
313 return Err(error_invalid_data("value too big to encode"));
314 }
315
316 let mut i = 0;
317 while value >= 0x80 && i < buf.len() {
318 buf[i] = (value as u8) | 0x80;
319 value >>= 7;
320 i += 1;
321 }
322
323 if i < buf.len() {
324 buf[i] = value as u8;
325 i += 1;
326 }
327
328 Ok(i)
329}
330
331impl BlockHeader {
332 fn parse<R: Read>(reader: &mut R) -> crate::Result<Option<Self>> {
333 let header_size_encoded = reader.read_u8()?;
334
335 if header_size_encoded == 0 {
336 return Ok(None);
338 }
339
340 let header_size = (header_size_encoded as usize + 1) * 4;
341 if !(8..=1024).contains(&header_size) {
342 return Err(error_invalid_data("invalid XZ block header size"));
343 }
344
345 let mut header_data = vec![0u8; header_size - 1];
347 reader.read_exact(&mut header_data)?;
348
349 let block_flags = header_data[0];
350 let num_filters = ((block_flags & 0x03) + 1) as usize;
351 let has_compressed_size = (block_flags & 0x40) != 0;
352 let has_uncompressed_size = (block_flags & 0x80) != 0;
353
354 let mut offset = 1;
355 let mut compressed_size = None;
356 let mut uncompressed_size = None;
357
358 if has_compressed_size {
360 if offset + 8 > header_data.len() {
361 return Err(error_invalid_data(
362 "XZ block header too short for compressed size",
363 ));
364 }
365 compressed_size = Some(parse_multibyte_integer(&header_data[offset..])?);
366 offset += count_multibyte_integer_size(&header_data[offset..]);
367 }
368
369 if has_uncompressed_size {
370 if offset >= header_data.len() {
371 return Err(error_invalid_data(
372 "XZ block header too short for uncompressed size",
373 ));
374 }
375 uncompressed_size = Some(parse_multibyte_integer(&header_data[offset..])?);
376 offset += count_multibyte_integer_size(&header_data[offset..]);
377 }
378
379 let mut filters = [None; 4];
380 let mut properties = [0; 4];
381
382 for i in 0..num_filters {
383 if offset >= header_data.len() {
384 return Err(error_invalid_data("XZ block header too short for filters"));
385 }
386
387 let filter_type =
388 FilterType::try_from(parse_multibyte_integer(&header_data[offset..])?)
389 .map_err(|_| error_invalid_input("unsupported filter type found"))?;
390
391 offset += count_multibyte_integer_size(&header_data[offset..]);
392
393 let property = match filter_type {
394 FilterType::Delta => {
395 if offset >= header_data.len() {
396 return Err(error_invalid_data(
397 "XZ block header too short for Delta properties",
398 ));
399 }
400
401 let props_size = parse_multibyte_integer(&header_data[offset..])?;
402 offset += count_multibyte_integer_size(&header_data[offset..]);
403
404 if props_size != 1 {
405 return Err(error_invalid_data("invalid Delta properties size"));
406 }
407
408 if offset >= header_data.len() {
409 return Err(error_invalid_data(
410 "XZ block header too short for Delta properties",
411 ));
412 }
413
414 let distance_prop = header_data[offset];
415 offset += 1;
416
417 (distance_prop as u32) + 1
419 }
420 FilterType::BcjX86
421 | FilterType::BcjPpc
422 | FilterType::BcjIa64
423 | FilterType::BcjArm
424 | FilterType::BcjArmThumb
425 | FilterType::BcjSparc
426 | FilterType::BcjArm64
427 | FilterType::BcjRiscv => {
428 if offset >= header_data.len() {
429 return Err(error_invalid_data(
430 "XZ block header too short for BCJ properties",
431 ));
432 }
433
434 let props_size = parse_multibyte_integer(&header_data[offset..])?;
435 offset += count_multibyte_integer_size(&header_data[offset..]);
436
437 match props_size {
438 0 => {
439 0
441 }
442 4 => {
443 if offset + 4 > header_data.len() {
445 return Err(error_invalid_data(
446 "XZ block header too short for BCJ start offset",
447 ));
448 }
449
450 let start_offset_value = u32::from_le_bytes([
451 header_data[offset],
452 header_data[offset + 1],
453 header_data[offset + 2],
454 header_data[offset + 3],
455 ]);
456 offset += 4;
457
458 let bcj_alignment = match filter_type {
460 FilterType::BcjX86 => 1,
461 FilterType::BcjPpc => 4,
462 FilterType::BcjIa64 => 16,
463 FilterType::BcjArm => 4,
464 FilterType::BcjArmThumb => 2,
465 FilterType::BcjSparc => 4,
466 FilterType::BcjArm64 => 4,
467 FilterType::BcjRiscv => 2,
468 _ => unreachable!(),
469 };
470
471 if start_offset_value % bcj_alignment != 0 {
472 return Err(error_invalid_data(
473 "BCJ start offset not aligned to filter requirements",
474 ));
475 }
476
477 start_offset_value
478 }
479 _ => {
480 return Err(error_invalid_data("invalid BCJ properties size"));
481 }
482 }
483 }
484 FilterType::Lzma2 => {
485 if offset >= header_data.len() {
486 return Err(error_invalid_data(
487 "XZ block header too short for LZMA2 properties",
488 ));
489 }
490
491 let props_size = parse_multibyte_integer(&header_data[offset..])?;
492 offset += count_multibyte_integer_size(&header_data[offset..]);
493
494 if props_size != 1 {
495 return Err(error_invalid_data("invalid LZMA2 properties size"));
496 }
497
498 if offset >= header_data.len() {
499 return Err(error_invalid_data(
500 "XZ block header too short for LZMA2 properties",
501 ));
502 }
503
504 let dict_size_prop = header_data[offset];
505 offset += 1;
506
507 if dict_size_prop > 40 {
508 return Err(error_invalid_data("invalid LZMA2 dictionary size"));
509 }
510
511 if dict_size_prop == 40 {
512 0xFFFFFFFF
513 } else {
514 let base = 2 | ((dict_size_prop & 1) as u32);
515 base << (dict_size_prop / 2 + 11)
516 }
517 }
518 };
519
520 filters[i] = Some(filter_type);
521 properties[i] = property;
522 }
523
524 if filters.iter().filter_map(|x| *x).next_back() != Some(FilterType::Lzma2) {
525 return Err(error_invalid_input(
526 "XZ block's last filter must be a LZMA2 filter",
527 ));
528 }
529
530 let expected_offset = header_size - 1 - 4; while offset < expected_offset {
534 if offset >= header_data.len() || header_data[offset] != 0 {
535 return Err(error_invalid_data("invalid XZ block header padding"));
536 }
537 offset += 1;
538 }
539
540 if offset + 4 != header_data.len() {
542 return Err(error_invalid_data("invalid XZ block header CRC32 position"));
543 }
544
545 let expected_crc = u32::from_le_bytes([
546 header_data[offset],
547 header_data[offset + 1],
548 header_data[offset + 2],
549 header_data[offset + 3],
550 ]);
551
552 let mut crc = CRC32.digest();
554 crc.update(&[header_size_encoded]);
555 crc.update(&header_data[..offset]);
556
557 if expected_crc != crc.finalize() {
558 return Err(error_invalid_data("XZ block header CRC32 mismatch"));
559 }
560
561 Ok(Some(BlockHeader {
562 header_size,
563 compressed_size,
564 uncompressed_size,
565 filters,
566 properties,
567 }))
568 }
569
570 pub fn parse_from_slice(
571 block_data: &[u8],
572 ) -> crate::Result<([Option<FilterType>; 4], [u32; 4], usize)> {
573 if block_data.is_empty() {
574 return Err(error_invalid_data("Empty block data"));
575 }
576
577 let header_size_encoded = block_data[0];
578 if header_size_encoded == 0 {
579 return Err(error_invalid_data("Invalid block header size"));
580 }
581
582 let header_size = (header_size_encoded as usize + 1) * 4;
583 if header_size > block_data.len() {
584 return Err(error_invalid_data("Block data too short for header"));
585 }
586
587 let header_data = &block_data[1..header_size];
588 let block_flags = header_data[0];
589 let num_filters = ((block_flags & 0x03) + 1) as usize;
590 let has_compressed_size = (block_flags & 0x40) != 0;
591 let has_uncompressed_size = (block_flags & 0x80) != 0;
592
593 let mut offset = 1;
594
595 if has_compressed_size {
597 if offset >= header_data.len() {
598 return Err(error_invalid_data(
599 "Block header too short for compressed size",
600 ));
601 }
602 offset += count_multibyte_integer_size(&header_data[offset..]);
603 }
604
605 if has_uncompressed_size {
607 if offset >= header_data.len() {
608 return Err(error_invalid_data(
609 "Block header too short for uncompressed size",
610 ));
611 }
612 offset += count_multibyte_integer_size(&header_data[offset..]);
613 }
614
615 let mut filters = [None; 4];
616 let mut properties = [0; 4];
617
618 for i in 0..num_filters {
620 if offset >= header_data.len() {
621 return Err(error_invalid_data("Block header too short for filters"));
622 }
623
624 let filter_id = parse_multibyte_integer(&header_data[offset..])?;
625 let filter_type = FilterType::try_from(filter_id)
626 .map_err(|_| error_invalid_data("Unsupported filter type"))?;
627
628 offset += count_multibyte_integer_size(&header_data[offset..]);
629
630 let property = match filter_type {
631 FilterType::Delta => {
632 if offset >= header_data.len() {
633 return Err(error_invalid_data(
634 "Block header too short for Delta properties",
635 ));
636 }
637
638 let props_size = parse_multibyte_integer(&header_data[offset..])?;
639 offset += count_multibyte_integer_size(&header_data[offset..]);
640
641 if props_size != 1 {
642 return Err(error_invalid_data("Invalid Delta properties size"));
643 }
644
645 if offset >= header_data.len() {
646 return Err(error_invalid_data(
647 "Block header too short for Delta properties",
648 ));
649 }
650
651 let distance_prop = header_data[offset];
652 offset += 1;
653 (distance_prop as u32) + 1
654 }
655 FilterType::BcjX86
656 | FilterType::BcjPpc
657 | FilterType::BcjIa64
658 | FilterType::BcjArm
659 | FilterType::BcjArmThumb
660 | FilterType::BcjSparc
661 | FilterType::BcjArm64
662 | FilterType::BcjRiscv => {
663 if offset >= header_data.len() {
664 return Err(error_invalid_data(
665 "Block header too short for BCJ properties",
666 ));
667 }
668
669 let props_size = parse_multibyte_integer(&header_data[offset..])?;
670 offset += count_multibyte_integer_size(&header_data[offset..]);
671
672 match props_size {
673 0 => 0,
674 4 => {
675 if offset + 4 > header_data.len() {
676 return Err(error_invalid_data(
677 "Block header too short for BCJ start offset",
678 ));
679 }
680
681 let start_offset = u32::from_le_bytes([
682 header_data[offset],
683 header_data[offset + 1],
684 header_data[offset + 2],
685 header_data[offset + 3],
686 ]);
687 offset += 4;
688 start_offset
689 }
690 _ => return Err(error_invalid_data("Invalid BCJ properties size")),
691 }
692 }
693 FilterType::Lzma2 => {
694 if offset >= header_data.len() {
695 return Err(error_invalid_data(
696 "Block header too short for LZMA2 properties",
697 ));
698 }
699
700 let props_size = parse_multibyte_integer(&header_data[offset..])?;
701 offset += count_multibyte_integer_size(&header_data[offset..]);
702
703 if props_size != 1 {
704 return Err(error_invalid_data("Invalid LZMA2 properties size"));
705 }
706
707 if offset >= header_data.len() {
708 return Err(error_invalid_data(
709 "Block header too short for LZMA2 properties",
710 ));
711 }
712
713 let dict_size_prop = header_data[offset];
714 offset += 1;
715
716 if dict_size_prop > 40 {
717 return Err(error_invalid_data("Invalid LZMA2 dictionary size"));
718 }
719
720 if dict_size_prop == 40 {
721 0xFFFFFFFF
722 } else {
723 let base = 2 | ((dict_size_prop & 1) as u32);
724 base << (dict_size_prop / 2 + 11)
725 }
726 }
727 };
728
729 filters[i] = Some(filter_type);
730 properties[i] = property;
731 }
732
733 Ok((filters, properties, header_size))
734 }
735}
736
737enum ChecksumCalculator {
739 None,
740 Crc32(crc::Digest<'static, u32, crc::Table<16>>),
741 Crc64(crc::Digest<'static, u64, crc::Table<16>>),
742 Sha256(sha2::Sha256),
743}
744
745impl ChecksumCalculator {
746 fn new(check_type: CheckType) -> Self {
747 match check_type {
748 CheckType::None => Self::None,
749 CheckType::Crc32 => Self::Crc32(CRC32.digest()),
750 CheckType::Crc64 => Self::Crc64(CRC64.digest()),
751 CheckType::Sha256 => Self::Sha256(sha2::Sha256::new()),
752 }
753 }
754
755 fn update(&mut self, data: &[u8]) {
756 match self {
757 ChecksumCalculator::None => {}
758 ChecksumCalculator::Crc32(crc) => {
759 crc.update(data);
760 }
761 ChecksumCalculator::Crc64(crc) => {
762 crc.update(data);
763 }
764 ChecksumCalculator::Sha256(sha) => {
765 sha.update(data);
766 }
767 }
768 }
769
770 fn verify(self, expected: &[u8]) -> bool {
771 match self {
772 ChecksumCalculator::None => true,
773 ChecksumCalculator::Crc32(crc) => {
774 if expected.len() != 4 {
775 return false;
776 }
777
778 let expected_crc =
779 u32::from_le_bytes([expected[0], expected[1], expected[2], expected[3]]);
780
781 let final_crc = crc.finalize();
782
783 final_crc == expected_crc
784 }
785 ChecksumCalculator::Crc64(crc) => {
786 if expected.len() != 8 {
787 return false;
788 }
789
790 let expected_crc = u64::from_le_bytes([
791 expected[0],
792 expected[1],
793 expected[2],
794 expected[3],
795 expected[4],
796 expected[5],
797 expected[6],
798 expected[7],
799 ]);
800
801 let final_crc = crc.finalize();
802
803 final_crc == expected_crc
804 }
805 ChecksumCalculator::Sha256(sha) => {
806 if expected.len() != 32 {
807 return false;
808 }
809
810 let final_sha = sha.finalize();
811
812 &final_sha[..32] == expected
813 }
814 }
815 }
816
817 #[cfg(feature = "encoder")]
818 fn finalize_to_bytes(self) -> Vec<u8> {
819 match self {
820 ChecksumCalculator::None => Vec::new(),
821 ChecksumCalculator::Crc32(crc) => crc.finalize().to_le_bytes().to_vec(),
822 ChecksumCalculator::Crc64(crc) => crc.finalize().to_le_bytes().to_vec(),
823 ChecksumCalculator::Sha256(sha) => sha.finalize().to_vec(),
824 }
825 }
826}
827
828impl StreamHeader {
829 fn parse<R: Read>(reader: &mut R) -> crate::Result<Self> {
830 let mut magic = [0u8; 6];
831 reader.read_exact(&mut magic)?;
832 if magic != XZ_MAGIC {
833 return Err(error_invalid_data("invalid XZ magic bytes"));
834 }
835
836 Self::parse_stream_header_flags_and_crc(reader)
837 }
838
839 pub(crate) fn parse_stream_header_flags_and_crc<R: Read>(
840 reader: &mut R,
841 ) -> crate::Result<Self> {
842 let mut flags = [0u8; 2];
843 reader.read_exact(&mut flags)?;
844
845 if flags[0] != 0 {
846 return Err(error_invalid_data("invalid XZ stream flags"));
847 }
848
849 let check_type = CheckType::from_byte(flags[1])?;
850
851 let expected_crc = reader.read_u32()?;
852
853 if expected_crc != CRC32.checksum(&flags) {
854 return Err(error_invalid_data("XZ stream header CRC32 mismatch"));
855 }
856
857 Ok(StreamHeader { check_type })
858 }
859}
860
861impl StreamFooter {
862 pub(crate) fn parse<R: Read>(reader: &mut R) -> crate::Result<Self> {
863 let expected_crc = reader.read_u32()?;
864
865 let backward_size = reader.read_u32()?;
866
867 let mut stream_flags = [0u8; 2];
868 reader.read_exact(&mut stream_flags)?;
869
870 let mut crc = CRC32.digest();
872 crc.update(&backward_size.to_le_bytes());
873 crc.update(&stream_flags);
874
875 if expected_crc != crc.finalize() {
876 return Err(error_invalid_data("stream footer CRC32 mismatch"));
877 }
878
879 let mut footer_magic = [0u8; 2];
880 reader.read_exact(&mut footer_magic)?;
881 if footer_magic != XZ_FOOTER_MAGIC {
882 return Err(error_invalid_data("invalid XZ footer magic bytes"));
883 }
884
885 Ok(StreamFooter {
886 backward_size,
887 stream_flags,
888 })
889 }
890}
891
892impl Index {
893 pub(crate) fn parse<R: Read>(reader: &mut R) -> crate::Result<Index> {
894 let number_of_records = parse_multibyte_integer_from_reader(reader)?;
896 let mut records = Vec::new();
897 records.try_reserve_exact(number_of_records as usize)?;
898
899 for _ in 0..number_of_records {
900 let unpadded_size = parse_multibyte_integer_from_reader(reader)?;
901 let uncompressed_size = parse_multibyte_integer_from_reader(reader)?;
902
903 if unpadded_size == 0 {
904 return Err(error_invalid_data("invalid index record unpadded size"));
905 }
906
907 records.push(IndexRecord {
908 unpadded_size,
909 uncompressed_size,
910 });
911 }
912
913 let mut bytes_read = 1;
915 bytes_read += count_multibyte_integer_size_for_value(number_of_records);
916 for record in &records {
917 bytes_read += count_multibyte_integer_size_for_value(record.unpadded_size);
918 bytes_read += count_multibyte_integer_size_for_value(record.uncompressed_size);
919 }
920
921 let padding_needed = (4 - (bytes_read % 4)) % 4;
922
923 if padding_needed > 0 {
924 let mut padding_buf = [0u8; 3];
925 reader.read_exact(&mut padding_buf[..padding_needed])?;
926
927 if !padding_buf[..padding_needed].iter().all(|&b| b == 0) {
928 return Err(error_invalid_data("invalid index padding"));
929 }
930 }
931
932 let expected_crc = reader.read_u32()?;
933
934 let mut crc = CRC32.digest();
936 crc.update(&[0]);
937
938 let mut temp_buf = [0u8; 10];
940 let size = encode_multibyte_integer(number_of_records, &mut temp_buf)?;
941 crc.update(&temp_buf[..size]);
942
943 for record in &records {
945 let size = encode_multibyte_integer(record.unpadded_size, &mut temp_buf)?;
946 crc.update(&temp_buf[..size]);
947 let size = encode_multibyte_integer(record.uncompressed_size, &mut temp_buf)?;
948 crc.update(&temp_buf[..size]);
949 }
950
951 update_crc_with_padding(&mut crc, padding_needed);
952
953 if expected_crc != crc.finalize() {
954 return Err(error_invalid_data("index CRC32 mismatch"));
955 }
956
957 Ok(Index {
958 number_of_records,
959 records,
960 })
961 }
962}
963
964#[cfg(feature = "encoder")]
965fn write_xz_stream_header<W: Write>(writer: &mut W, check_type: CheckType) -> crate::Result<()> {
966 writer.write_all(&XZ_MAGIC)?;
967
968 let stream_flags = [0u8, check_type as u8];
969 writer.write_all(&stream_flags)?;
970
971 let crc = CRC32.checksum(&stream_flags);
972 writer.write_u32(crc)?;
973
974 Ok(())
975}
976
977#[cfg(feature = "encoder")]
978fn encode_lzma2_dict_size(dict_size: u32) -> crate::Result<u8> {
979 if dict_size < 4096 {
980 return Err(error_invalid_input("LZMA2 dictionary size too small"));
981 }
982
983 if dict_size == 0xFFFFFFFF {
984 return Ok(40);
985 }
986
987 for prop in 0u8..40 {
989 let base = 2 | ((prop & 1) as u32);
990 let size = base << (prop / 2 + 11);
991
992 if size >= dict_size {
993 return Ok(prop);
994 }
995 }
996
997 Err(error_invalid_input("LZMA2 dictionary size too large"))
998}
999
1000fn update_crc_with_padding(crc: &mut crc::Digest<'_, u32, crc::Table<16>>, padding_needed: usize) {
1001 match padding_needed {
1002 1 => crc.update(&[0]),
1003 2 => crc.update(&[0, 0]),
1004 3 => crc.update(&[0, 0, 0]),
1005 _ => {}
1006 }
1007}
1008
1009#[cfg(feature = "std")]
1012fn scan_blocks<R: Read + Seek>(mut reader: R) -> io::Result<(R, Vec<Block>, CheckType)> {
1013 let stream_header = StreamHeader::parse(&mut reader)?;
1014 let check_type = stream_header.check_type;
1015
1016 let header_end_pos = reader.stream_position()?;
1017
1018 let file_size = reader.seek(SeekFrom::End(0))?;
1019
1020 if file_size < 32 {
1022 return Err(error_invalid_data(
1023 "File too small to contain a valid XZ stream",
1024 ));
1025 }
1026
1027 reader.seek(SeekFrom::End(-12))?;
1028
1029 let stream_footer = StreamFooter::parse(&mut reader)?;
1030
1031 let header_flags = [0, check_type as u8];
1032
1033 if stream_footer.stream_flags != header_flags {
1034 return Err(error_invalid_data(
1035 "stream header and footer flags mismatch",
1036 ));
1037 }
1038
1039 let index_size = (stream_footer.backward_size + 1) * 4;
1041 let index_start_pos = file_size - 12 - index_size as u64;
1042
1043 reader.seek(SeekFrom::Start(index_start_pos))?;
1044
1045 let index_indicator = reader.read_u8()?;
1047
1048 if index_indicator != 0 {
1049 return Err(error_invalid_data("invalid XZ index indicator"));
1050 }
1051
1052 let index = Index::parse(&mut reader)?;
1053
1054 let mut blocks = Vec::new();
1055 let mut block_start_pos = header_end_pos;
1056
1057 for record in &index.records {
1058 blocks.push(Block {
1059 start_pos: block_start_pos,
1060 unpadded_size: record.unpadded_size,
1061 uncompressed_size: record.uncompressed_size,
1062 });
1063
1064 let padding_needed = (4 - (record.unpadded_size % 4)) % 4;
1065 let actual_block_size = record.unpadded_size + padding_needed;
1066
1067 block_start_pos += actual_block_size;
1068 }
1069
1070 if blocks.is_empty() {
1071 return Err(io::Error::new(
1072 io::ErrorKind::InvalidData,
1073 "No valid XZ blocks found",
1074 ));
1075 }
1076
1077 reader.seek(SeekFrom::Start(0))?;
1078
1079 Ok((reader, blocks, check_type))
1080}
1081
1082#[cfg(feature = "std")]
1083fn create_filter_chain<'reader>(
1084 mut chain_reader: Box<dyn Read + 'reader>,
1085 filters: &[Option<FilterType>],
1086 properties: &[u32],
1087) -> Box<dyn Read + 'reader> {
1088 for (filter, property) in filters
1089 .iter()
1090 .copied()
1091 .zip(properties)
1092 .filter_map(|(filter, property)| filter.map(|filter| (filter, *property)))
1093 .rev()
1094 {
1095 chain_reader = match filter {
1096 FilterType::Delta => {
1097 let distance = property as usize;
1098 Box::new(DeltaReader::new(chain_reader, distance))
1099 }
1100 FilterType::BcjX86 => {
1101 let start_offset = property as usize;
1102 Box::new(BcjReader::new_x86(chain_reader, start_offset))
1103 }
1104 FilterType::BcjPpc => {
1105 let start_offset = property as usize;
1106 Box::new(BcjReader::new_ppc(chain_reader, start_offset))
1107 }
1108 FilterType::BcjIa64 => {
1109 let start_offset = property as usize;
1110 Box::new(BcjReader::new_ia64(chain_reader, start_offset))
1111 }
1112 FilterType::BcjArm => {
1113 let start_offset = property as usize;
1114 Box::new(BcjReader::new_arm(chain_reader, start_offset))
1115 }
1116 FilterType::BcjArmThumb => {
1117 let start_offset = property as usize;
1118 Box::new(BcjReader::new_arm_thumb(chain_reader, start_offset))
1119 }
1120 FilterType::BcjSparc => {
1121 let start_offset = property as usize;
1122 Box::new(BcjReader::new_sparc(chain_reader, start_offset))
1123 }
1124 FilterType::BcjArm64 => {
1125 let start_offset = property as usize;
1126 Box::new(BcjReader::new_arm64(chain_reader, start_offset))
1127 }
1128 FilterType::BcjRiscv => {
1129 let start_offset = property as usize;
1130 Box::new(BcjReader::new_riscv(chain_reader, start_offset))
1131 }
1132 FilterType::Lzma2 => {
1133 let dict_size = property;
1134 Box::new(Lzma2Reader::new(chain_reader, dict_size, None))
1135 }
1136 };
1137 }
1138
1139 chain_reader
1140}
1141
1142#[cfg(feature = "encoder")]
1143fn add_padding<W: Write + ?Sized>(writer: &mut W, padding_needed: usize) -> crate::Result<()> {
1144 match padding_needed {
1145 1 => writer.write_all(&[0]),
1146 2 => writer.write_all(&[0, 0]),
1147 3 => writer.write_all(&[0, 0, 0]),
1148 _ => Ok(()),
1149 }
1150}
1151
1152#[cfg(feature = "encoder")]
1153fn generate_block_header_data(
1154 filters: &[FilterConfig],
1155 lzma_dict_size: u32,
1156) -> crate::Result<Vec<u8>> {
1157 let mut header_data = Vec::new();
1158 let num_filters = filters.len();
1159
1160 if num_filters > 4 {
1161 return Err(error_invalid_input("too many filters in chain (maximum 4)"));
1162 }
1163
1164 let block_flags = (num_filters - 1) as u8; header_data.push(block_flags);
1167
1168 let mut temp_buf = [0u8; 10];
1169
1170 for filter_config in filters {
1171 let filter_id = match filter_config.filter_type {
1173 FilterType::Delta => 0x03,
1174 FilterType::BcjX86 => 0x04,
1175 FilterType::BcjPpc => 0x05,
1176 FilterType::BcjIa64 => 0x06,
1177 FilterType::BcjArm => 0x07,
1178 FilterType::BcjArmThumb => 0x08,
1179 FilterType::BcjSparc => 0x09,
1180 FilterType::BcjArm64 => 0x0A,
1181 FilterType::BcjRiscv => 0x0B,
1182 FilterType::Lzma2 => 0x21,
1183 };
1184 let size = encode_multibyte_integer(filter_id, &mut temp_buf)?;
1185 header_data.extend_from_slice(&temp_buf[..size]);
1186
1187 match filter_config.filter_type {
1189 FilterType::Delta => {
1190 let size = encode_multibyte_integer(1, &mut temp_buf)?;
1192 header_data.extend_from_slice(&temp_buf[..size]);
1193 let distance_prop = (filter_config.property - 1) as u8;
1195 header_data.push(distance_prop);
1196 }
1197 FilterType::BcjX86
1198 | FilterType::BcjPpc
1199 | FilterType::BcjIa64
1200 | FilterType::BcjArm
1201 | FilterType::BcjArmThumb
1202 | FilterType::BcjSparc
1203 | FilterType::BcjArm64
1204 | FilterType::BcjRiscv => {
1205 if filter_config.property == 0 {
1206 let size = encode_multibyte_integer(0, &mut temp_buf)?;
1208 header_data.extend_from_slice(&temp_buf[..size]);
1209 } else {
1210 let size = encode_multibyte_integer(4, &mut temp_buf)?;
1212 header_data.extend_from_slice(&temp_buf[..size]);
1213 header_data.extend_from_slice(&filter_config.property.to_le_bytes());
1214 }
1215 }
1216 FilterType::Lzma2 => {
1217 let size = encode_multibyte_integer(1, &mut temp_buf)?;
1218 header_data.extend_from_slice(&temp_buf[..size]);
1219
1220 let dict_size_prop = encode_lzma2_dict_size(lzma_dict_size)?;
1221 header_data.push(dict_size_prop);
1222 }
1223 }
1224 }
1225
1226 Ok(header_data)
1227}
1228
1229#[cfg(feature = "encoder")]
1230fn write_xz_block_header<W: Write>(
1231 writer: &mut W,
1232 filters: &[FilterConfig],
1233 lzma_dict_size: u32,
1234) -> crate::Result<u64> {
1235 let header_data = generate_block_header_data(filters, lzma_dict_size)?;
1236
1237 let total_size_needed: usize = 1 + header_data.len() + 4;
1239 let header_size = total_size_needed.div_ceil(4) * 4;
1240 let header_size_encoded = ((header_size / 4) - 1) as u8;
1241
1242 let padding_needed = header_size - 1 - header_data.len() - 4;
1243
1244 let mut crc = CRC32.digest();
1246 crc.update(&[header_size_encoded]);
1247 crc.update(&header_data);
1248 update_crc_with_padding(&mut crc, padding_needed);
1249
1250 let crc_value = crc.finalize();
1251
1252 writer.write_u8(header_size_encoded)?;
1254 writer.write_all(&header_data)?;
1255 add_padding(writer, padding_needed)?;
1256 writer.write_u32(crc_value)?;
1257
1258 Ok(header_size as u64)
1259}
1260
1261#[cfg(feature = "encoder")]
1262fn write_xz_index<W: Write>(writer: &mut W, index_records: &[IndexRecord]) -> crate::Result<()> {
1263 let mut index_data = Vec::new();
1264
1265 let mut temp_buf = [0u8; 10];
1266 let size = encode_multibyte_integer(index_records.len() as u64, &mut temp_buf)?;
1267 index_data.extend_from_slice(&temp_buf[..size]);
1268
1269 for record in index_records {
1270 let size = encode_multibyte_integer(record.unpadded_size, &mut temp_buf)?;
1271 index_data.extend_from_slice(&temp_buf[..size]);
1272
1273 let size = encode_multibyte_integer(record.uncompressed_size, &mut temp_buf)?;
1274 index_data.extend_from_slice(&temp_buf[..size]);
1275 }
1276
1277 let bytes_written = 1 + index_data.len(); let padding_needed = (4 - (bytes_written % 4)) % 4;
1279
1280 let mut crc = CRC32.digest();
1281 crc.update(&[0x00]);
1282 crc.update(&index_data);
1283 update_crc_with_padding(&mut crc, padding_needed);
1284
1285 let crc_value = crc.finalize();
1286
1287 writer.write_u8(0x00)?;
1289 writer.write_all(&index_data)?;
1290 add_padding(writer, padding_needed)?;
1291 writer.write_u32(crc_value)?;
1292
1293 Ok(())
1294}
1295
1296#[cfg(feature = "encoder")]
1297fn write_xz_stream_footer<W: Write>(
1298 writer: &mut W,
1299 index_records: &[IndexRecord],
1300 check_type: CheckType,
1301) -> crate::Result<()> {
1302 let mut index_size = 1; index_size += count_multibyte_integer_size_for_value(index_records.len() as u64);
1305
1306 for record in index_records {
1307 index_size += count_multibyte_integer_size_for_value(record.unpadded_size);
1308 index_size += count_multibyte_integer_size_for_value(record.uncompressed_size);
1309 }
1310
1311 let padding_needed = (4 - (index_size % 4)) % 4;
1312 index_size += padding_needed;
1313 index_size += 4; let backward_size = ((index_size / 4) - 1) as u32;
1316
1317 let stream_flags = [0u8, check_type as u8];
1319
1320 let mut crc = CRC32.digest();
1322 crc.update(&backward_size.to_le_bytes());
1323 crc.update(&stream_flags);
1324
1325 writer.write_u32(crc.finalize())?;
1326 writer.write_u32(backward_size)?;
1327 writer.write_all(&stream_flags)?;
1328 writer.write_all(&XZ_FOOTER_MAGIC)?;
1329
1330 Ok(())
1331}
1332
1333#[cfg(test)]
1334mod tests {
1335 use super::*;
1336
1337 #[test]
1338 fn test_encode_decode_multibyte_integer() {
1339 let values = [0, 127, 128, 16383, 16384, 2097151, 2097152];
1340
1341 for &value in &values {
1342 let mut buf = [0u8; 9];
1343 let encoded_size = encode_multibyte_integer(value, &mut buf).unwrap();
1344
1345 let decoded = parse_multibyte_integer(&buf[..encoded_size]).unwrap();
1346 assert_eq!(decoded, value);
1347
1348 let size_for_value = count_multibyte_integer_size_for_value(value);
1349 assert_eq!(size_for_value, encoded_size);
1350 }
1351 }
1352
1353 #[test]
1354 fn test_multibyte_integer_limits() {
1355 let max_value = u64::MAX / 2;
1357 let mut buf = [0u8; 9];
1358 let encoded_size = encode_multibyte_integer(max_value, &mut buf).unwrap();
1359
1360 let decoded = parse_multibyte_integer(&buf[..encoded_size]).unwrap();
1361 assert_eq!(decoded, max_value);
1362
1363 let too_large = u64::MAX;
1365 let encoded_size = encode_multibyte_integer(too_large, &mut buf);
1366 assert!(encoded_size.is_err());
1367 }
1368
1369 #[test]
1370 fn test_index_record_creation() {
1371 let record = IndexRecord {
1372 unpadded_size: 1024,
1373 uncompressed_size: 2048,
1374 };
1375
1376 assert_eq!(record.unpadded_size, 1024);
1377 assert_eq!(record.uncompressed_size, 2048);
1378 }
1379
1380 #[test]
1381 fn test_checksum_calculator_crc32() {
1382 let mut calc = ChecksumCalculator::new(CheckType::Crc32);
1383 calc.update(b"123456789");
1384
1385 let expected = [0x26, 0x39, 0xF4, 0xCB];
1387 assert!(calc.verify(&expected));
1388 }
1389
1390 #[test]
1391 fn test_checksum_calculator_crc64() {
1392 let mut calc = ChecksumCalculator::new(CheckType::Crc64);
1393 calc.update(b"123456789");
1394
1395 let expected = [250, 57, 25, 223, 187, 201, 93, 153];
1397 assert!(calc.verify(&expected));
1398 }
1399
1400 #[test]
1401 fn test_checksum_calculator_sha256() {
1402 let mut calc = ChecksumCalculator::new(CheckType::Sha256);
1403 calc.update(b"123456789");
1404
1405 let expected = [
1407 21, 226, 176, 211, 195, 56, 145, 235, 176, 241, 239, 96, 158, 196, 25, 66, 12, 32, 227,
1408 32, 206, 148, 198, 95, 188, 140, 51, 18, 68, 142, 178, 37,
1409 ];
1410 assert!(calc.verify(&expected));
1411 }
1412}