mla/layers/
compress.rs

1use std::fmt;
2use std::io::{self, Read, Seek, SeekFrom, Take, Write};
3
4use bincode::Options;
5use brotli::writer::StandardAlloc;
6use brotli::BrotliState;
7use byteorder::{LittleEndian, ReadBytesExt, WriteBytesExt};
8use serde::{Deserialize, Serialize};
9
10use crate::layers::traits::{
11    InnerWriterTrait, InnerWriterType, LayerFailSafeReader, LayerReader, LayerWriter,
12};
13use crate::{Error, BINCODE_MAX_DESERIALIZE};
14
15use crate::config::{ArchiveWriterConfig, ConfigResult};
16use crate::errors::ConfigError;
17
18// ---------- Config ----------
19
20/// A bigger value means a better compression ratio, less indexes to save (in
21/// memory), but also a slower random access. In the worst case, an access may
22/// implies decompressing a whole block to obtain just the last byte.
23///
24/// According to benchmarking on compression of representative data, 4MB seems
25/// to be a good choice
26const UNCOMPRESSED_DATA_SIZE: u32 = 4 * 1024 * 1024;
27
28/// A bigger value means a better compression ratio, but a slower compression
29///
30/// According to benchmarking on compression of representative data, level 5
31/// seems to be a good choice
32const DEFAULT_COMPRESSION_LEVEL: u32 = 5;
33
34/// Default value which seems advised by brotli libraries
35const BROTLI_LOG_WINDOW: u32 = 22;
36
37pub struct CompressionConfig {
38    compression_level: u32,
39}
40
41impl std::default::Default for CompressionConfig {
42    fn default() -> Self {
43        CompressionConfig {
44            compression_level: DEFAULT_COMPRESSION_LEVEL,
45        }
46    }
47}
48
49impl ArchiveWriterConfig {
50    /// Set the compression level
51    /// compression level (0-11); bigger values cause denser, but slower compression
52    pub fn with_compression_level(&mut self, compression_level: u32) -> ConfigResult {
53        if compression_level > 11 {
54            Err(ConfigError::CompressionLevelOutOfRange)
55        } else {
56            self.compress.compression_level = compression_level;
57            Ok(self)
58        }
59    }
60}
61
62// ---------- Reader ----------
63
64/// See `CompressionLayerWriter` for more information
65enum CompressionLayerReaderState<R: Read> {
66    /// Ready contains the real inner destination
67    Ready(R),
68    /// How many uncompressed bytes have already been read for the current
69    /// block
70    InData {
71        read: u32,
72        uncompressed_size: u32,
73        /// Use a Box to avoid a too big enum
74        /// Use a `Take` to instanciate the `Decompressor` only on the current block's compressed bytes
75        decompressor: Box<brotli::Decompressor<Take<R>>>,
76    },
77    /// Empty is a placeholder to allow state replacement
78    Empty,
79}
80
81impl<R: Read> fmt::Debug for CompressionLayerReaderState<R> {
82    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83        match self {
84            CompressionLayerReaderState::Ready(_inner) => write!(f, "Ready"),
85            CompressionLayerReaderState::InData { .. } => write!(f, "InData"),
86            CompressionLayerReaderState::Empty => write!(f, "Empty"),
87        }
88    }
89}
90
91#[derive(Serialize, Deserialize, Debug)]
92pub struct SizesInfo {
93    /// Ordered list of chunk compressed size; only set at init
94    pub compressed_sizes: Vec<u32>,
95    /// Last block uncompressed size
96    last_block_size: u32,
97}
98
99impl SizesInfo {
100    /// Get the uncompressed block size of block `block_num`
101    fn uncompressed_block_size_at(&self, block_num: usize) -> u32 {
102        if block_num < self.compressed_sizes.len() - 1 {
103            UNCOMPRESSED_DATA_SIZE
104        } else {
105            self.last_block_size
106        }
107    }
108
109    /// Get the compressed block at position `uncompressed_pos`
110    fn compressed_block_size_at(&self, uncompressed_pos: u64) -> u32 {
111        let block_num = uncompressed_pos / (UNCOMPRESSED_DATA_SIZE as u64);
112        self.compressed_sizes[block_num as usize]
113    }
114
115    /// Maximum uncompressed available position
116    fn max_uncompressed_pos(&self) -> u64 {
117        (self.compressed_sizes.len() as u64 - 1) * UNCOMPRESSED_DATA_SIZE as u64
118            + self.last_block_size as u64
119    }
120
121    // Sum the compressed_sizes
122    pub fn get_compressed_size(&self) -> u64 {
123        self.compressed_sizes.iter().map(|v| *v as u64).sum()
124    }
125}
126
127pub struct CompressionLayerReader<'a, R: 'a + Read> {
128    state: CompressionLayerReaderState<Box<dyn 'a + LayerReader<'a, R>>>,
129    pub sizes_info: Option<SizesInfo>,
130    /// Position in the under-layer (uncompressed stream)
131    // /!\ Due to the decompressor having a block size of the compressed size,
132    // any read on it may forward the inner layer to the beginning of the next
133    // block
134    //
135    // [compressed block][compressed block]
136    //      ^            ^
137    //      |            The inner layer is here
138    //      We're actually here
139    //
140    // Additionnaly, the `brotli` implementation may consume more or less bytes
141    // than presumed. For instance, the compression may dump n bytes, while the
142    // decompressor is able to recover the decompressed part with only n -
143    // epsilon bytes.
144    //
145    // As a result, `underlayer_pos` and `inner` position
146    // corrected with `sizes_info` may seems unsync; `underlayer_pos` is the one
147    // to trust.
148    underlayer_pos: u64,
149}
150
151impl<R: Read> CompressionLayerReaderState<R> {
152    fn into_inner(self) -> R {
153        match self {
154            CompressionLayerReaderState::Ready(inner) => inner,
155            CompressionLayerReaderState::InData { decompressor, .. } => {
156                decompressor.into_inner().into_inner()
157            }
158            // `panic!` explicitly called to avoid propagating an error which
159            // must never happens (ie, calling `into_inner` in an inconsistent
160            // internal state)
161            _ => panic!("[Reader] Empty type to inner is impossible"),
162        }
163    }
164}
165
166impl<'a, R: 'a + Read> CompressionLayerReader<'a, R> {
167    pub fn new(mut inner: Box<dyn 'a + LayerReader<'a, R>>) -> Result<Self, Error> {
168        let underlayer_pos = inner.stream_position()?;
169        Ok(Self {
170            state: CompressionLayerReaderState::Ready(inner),
171            sizes_info: None,
172            underlayer_pos,
173        })
174    }
175
176    /// Returns whether `uncompressed_pos` is in the data stream
177    /// If no index is used, always return `true`
178    fn pos_in_stream(&self, uncompressed_pos: u64) -> bool {
179        match &self.sizes_info {
180            Some(sizes_info) => {
181                let pos_max = sizes_info.max_uncompressed_pos();
182                uncompressed_pos < pos_max
183            }
184            None => true,
185        }
186    }
187
188    /// Instantiate a new decompressor at position `uncompressed_pos`
189    /// `uncompressed_pos` must be a compressed block's starting position
190    fn new_decompressor_at<S: Read + Seek>(
191        &self,
192        inner: S,
193        uncompressed_pos: u64,
194    ) -> Result<brotli::Decompressor<Take<S>>, Error> {
195        // Ensure it's a starting position
196        if uncompressed_pos % (UNCOMPRESSED_DATA_SIZE as u64) != 0 {
197            return Err(Error::BadAPIArgument(
198                "[new_decompressor_at] not a starting position".to_string(),
199            ));
200        }
201
202        // Check we are still in the stream
203        if !self.pos_in_stream(uncompressed_pos) {
204            // No more in the compressed stream -> nothing to read
205            return Err(Error::EndOfStream);
206        }
207
208        match &self.sizes_info {
209            Some(sizes_info) => {
210                // Use index for faster decompression
211                let compressed_block_size =
212                    sizes_info.compressed_block_size_at(uncompressed_pos) as usize;
213                Ok(brotli::Decompressor::new(
214                    // Make the Decompressor work only on the compressed block's bytes, no more
215                    inner.take(compressed_block_size as u64),
216                    compressed_block_size,
217                ))
218            }
219            None => Err(Error::MissingMetadata),
220        }
221    }
222
223    // TODO add regression test
224    /// Get the uncompressed block size at position `uncompressed_pos`
225    /// `uncompressed_pos` must be a compressed block's starting position
226    fn uncompressed_block_size_at(&self, uncompressed_pos: u64) -> Result<u32, Error> {
227        // Ensure it's a starting position
228        if uncompressed_pos % (UNCOMPRESSED_DATA_SIZE as u64) != 0 {
229            return Err(Error::BadAPIArgument(
230                "[uncompressed_block_size_at] not a starting position".to_string(),
231            ));
232        }
233
234        // Check we are still in the stream
235        if !self.pos_in_stream(uncompressed_pos) {
236            // No more in the compressed stream -> nothing to read
237            return Err(Error::EndOfStream);
238        }
239
240        match &self.sizes_info {
241            Some(sizes_info) => {
242                // Use index for faster decompression
243
244                // Get the uncompressed block size
245                let block_num = uncompressed_pos / (UNCOMPRESSED_DATA_SIZE as u64);
246                Ok(sizes_info.uncompressed_block_size_at(block_num as usize))
247            }
248            None => Err(Error::MissingMetadata),
249        }
250    }
251
252    // TODO add regression test
253    /// Resynchronize the inner layer with `uncompressed_pos` (ie., seek inner with expected position)
254    /// `uncompressed_pos` must be a compressed block's starting position
255    fn sync_inner_with_uncompressed_pos<S: Read + Seek>(
256        &self,
257        inner: &mut S,
258        uncompressed_pos: u64,
259    ) -> Result<(), Error> {
260        // Ensure it's a starting position
261        if uncompressed_pos % (UNCOMPRESSED_DATA_SIZE as u64) != 0 {
262            return Err(Error::BadAPIArgument(
263                "[sync_inner_with_uncompressed_pos] not a starting position".to_string(),
264            ));
265        }
266
267        // Check we are still in the stream
268        if !self.pos_in_stream(uncompressed_pos) {
269            // No more in the compressed stream -> nothing to read
270            return Err(Error::EndOfStream);
271        }
272
273        // Find the right block
274        let block_num = uncompressed_pos / (UNCOMPRESSED_DATA_SIZE as u64);
275        match &self.sizes_info {
276            Some(SizesInfo {
277                compressed_sizes, ..
278            }) => {
279                // Move the underlayer at the start of the block
280                let start_position = compressed_sizes
281                    .iter()
282                    .take(block_num as usize)
283                    .map(|size| *size as u64)
284                    .sum();
285                inner.seek(SeekFrom::Start(start_position))?;
286            }
287            None => {
288                return Err(Error::MissingMetadata);
289            }
290        }
291        Ok(())
292    }
293}
294
295impl<'a, R: 'a + Read + Seek> LayerReader<'a, R> for CompressionLayerReader<'a, R> {
296    fn into_inner(self) -> Option<Box<dyn 'a + LayerReader<'a, R>>> {
297        Some(self.state.into_inner())
298    }
299
300    fn into_raw(self: Box<Self>) -> R {
301        self.state.into_inner().into_raw()
302    }
303
304    fn initialize(&mut self) -> Result<(), Error> {
305        match &mut self.state {
306            CompressionLayerReaderState::Ready(inner) => {
307                // Recursive call
308                inner.initialize()?;
309
310                // Read the footer: [SizesInfo][SizesInfo length, on 4 bytes]
311                let pos = inner.seek(SeekFrom::End(-4))?;
312                let len = inner.read_u32::<LittleEndian>()? as u64;
313
314                // Read SizesInfo
315                inner.seek(SeekFrom::Start(pos - len))?;
316                self.sizes_info = match bincode::options()
317                    .with_limit(BINCODE_MAX_DESERIALIZE)
318                    .with_fixint_encoding()
319                    .deserialize_from(inner.take(len))
320                {
321                    Ok(sinfo) => Some(sinfo),
322                    _ => {
323                        return Err(Error::DeserializationError);
324                    }
325                };
326
327                Ok(())
328            }
329            _ => {
330                // At init, should not be in this state
331                Err(Error::WrongReaderState(
332                    "[Compression Layer]: on initialization, must be in Ready state".to_string(),
333                ))
334            }
335        }
336    }
337}
338
339impl<'a, R: 'a + Read + Seek> Read for CompressionLayerReader<'a, R> {
340    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
341        if !self.pos_in_stream(self.underlayer_pos) {
342            // No more in the compressed stream -> nothing to read
343            return Ok(0);
344        }
345
346        // Use this mem::replace trick to be able to get back the compressor
347        // inner and freely move from CompressionLayerReaderState to others
348        let old_state = std::mem::replace(&mut self.state, CompressionLayerReaderState::Empty);
349        match old_state {
350            CompressionLayerReaderState::Ready(mut inner) => {
351                self.sync_inner_with_uncompressed_pos(&mut inner, self.underlayer_pos)?;
352                let decompressor = Box::new(self.new_decompressor_at(inner, self.underlayer_pos)?);
353                let uncompressed_size = self.uncompressed_block_size_at(self.underlayer_pos)?;
354                self.state = CompressionLayerReaderState::InData {
355                    read: 0,
356                    uncompressed_size,
357                    decompressor,
358                };
359                self.read(buf)
360            }
361            CompressionLayerReaderState::InData {
362                read,
363                uncompressed_size,
364                mut decompressor,
365            } => {
366                if read > uncompressed_size {
367                    return Err(Error::WrongReaderState(
368                        "[Compression Layer] Too much data read".to_string(),
369                    )
370                    .into());
371                }
372                if read == uncompressed_size {
373                    self.state =
374                        CompressionLayerReaderState::Ready(decompressor.into_inner().into_inner());
375                    // Start a new block, fill it with new values!
376                    return self.read(buf);
377                }
378                let size = std::cmp::min((uncompressed_size - read) as usize, buf.len());
379                let read_add = decompressor.read(&mut buf[..size])?;
380                self.underlayer_pos += read_add as u64;
381                self.state = CompressionLayerReaderState::InData {
382                    read: read + read_add as u32,
383                    uncompressed_size,
384                    decompressor,
385                };
386                Ok(read_add)
387            }
388            CompressionLayerReaderState::Empty => Err(Error::WrongReaderState(
389                "[Compression Layer] Should never happens, unless an error already occurs before"
390                    .to_string(),
391            )
392            .into()),
393        }
394    }
395}
396
397impl<'a, R: Read + Seek> Seek for CompressionLayerReader<'a, R> {
398    /// Seek to the position `pos` in the uncompressed stream
399    fn seek(&mut self, pos: SeekFrom) -> io::Result<u64> {
400        // Seeking may instantiate a decompressor, and therefore position the
401        // inner layer at the end of the asked position's compressed block
402        match &self.sizes_info {
403            Some(_sizes_info) => {
404                match pos {
405                    SeekFrom::Start(pos) => {
406                        // Find the right block
407                        let inside_block = pos % (UNCOMPRESSED_DATA_SIZE as u64);
408                        let rounded_pos = pos - inside_block;
409
410                        // Move the underlayer at the start of the block
411                        let old_state =
412                            std::mem::replace(&mut self.state, CompressionLayerReaderState::Empty);
413                        let mut inner = old_state.into_inner();
414                        self.sync_inner_with_uncompressed_pos(&mut inner, rounded_pos)?;
415
416                        // New decompressor at the start of the block
417                        let mut decompressor = self.new_decompressor_at(inner, rounded_pos)?;
418                        let uncompressed_size = self.uncompressed_block_size_at(rounded_pos)?;
419
420                        // Move forward inside the block to reach the expected position
421                        io::copy(&mut (&mut decompressor).take(inside_block), &mut io::sink())?;
422                        self.state = CompressionLayerReaderState::InData {
423                            read: inside_block as u32,
424                            uncompressed_size,
425                            decompressor: Box::new(decompressor),
426                        };
427                        self.underlayer_pos = pos;
428                        Ok(pos)
429                    }
430                    SeekFrom::Current(pos) => {
431                        // Get the position and do nothing
432                        if pos == 0 {
433                            Ok(self.underlayer_pos)
434                        } else {
435                            self.seek(SeekFrom::Start((pos + self.underlayer_pos as i64) as u64))
436                        }
437
438                        // TODO: Possible optimization:
439                        // - if pos is positive and inside the current block,
440                        // just advance the decompressor
441                    }
442                    SeekFrom::End(pos) => {
443                        if pos > 0 {
444                            // Seeking past the end is unsupported
445                            return Err(Error::EndOfStream.into());
446                        }
447
448                        let end_pos = self.sizes_info.as_ref().unwrap().max_uncompressed_pos();
449                        let distance_from_end = -pos;
450                        self.seek(SeekFrom::Start(end_pos - distance_from_end as u64))
451                    }
452                }
453            }
454            None => Err(Error::MissingMetadata.into()),
455        }
456    }
457}
458
459// ---------- Writer ----------
460
461/// Wrap a Writer with counting of written bytes
462struct WriterWithCount<W: Write> {
463    inner: W,
464    pos: u32,
465}
466
467impl<W: Write> WriterWithCount<W> {
468    fn new(inner: W) -> Self {
469        Self { inner, pos: 0 }
470    }
471
472    fn into_inner(self) -> W {
473        self.inner
474    }
475}
476
477impl<W: Write> Write for WriterWithCount<W> {
478    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
479        self.inner.write(buf).map(|i| {
480            self.pos += i as u32;
481            i
482        })
483    }
484
485    fn flush(&mut self) -> io::Result<()> {
486        self.inner.flush()
487    }
488}
489
490enum CompressionLayerWriterState<W: Write> {
491    /// Ready contains the real inner destination
492    Ready(W),
493    /// How many uncompressed bytes have already been written for the current
494    /// block
495    // Use a Box to avoid a too big enum
496    InData(u32, Box<brotli::CompressorWriter<WriterWithCount<W>>>),
497    /// Empty is a placeholder to allow state replacement
498    Empty,
499}
500
501/// Compression layer is made of independent CompressedBlock, ending by an index for seekable accesses
502/// [CompressedBlock][CompressedBlock]...[CompressedBlock][Index]
503///
504/// Compression is made of nested independent compressed block of a fixed
505/// uncompressed size
506/// Pro:
507/// * no need to store the compressed size
508/// * compression can be streamed (storing the compressed size before the
509/// compressed block leads to either seekable stream, which is not an option
510/// here, or full-memory compression before actual write, which add limits to
511/// the memory footprint)
512/// Cons:
513/// * if the index is lost, a slow decompression with a block size of 1 is
514/// needed to found the CompressedBlock boundaries
515pub struct CompressionLayerWriter<'a, W: 'a + InnerWriterTrait> {
516    state: CompressionLayerWriterState<InnerWriterType<'a, W>>,
517    // Ordered list of compressed size of block of `UNCOMPRESSED_DATA_SIZE`
518    // bytes
519    //
520    // Thus, accessing the `n`th byte in the sublayer, is accessing the `n %
521    // C`th uncompressed byte in the chunk beginning at `sum(compressed_sizes[:n
522    // / C])`, with `C = UNCOMPRESSED_DATA_SIZE`
523    compressed_sizes: Vec<u32>,
524    // From config
525    compression_level: u32,
526}
527
528impl<W: InnerWriterTrait> CompressionLayerWriterState<W> {
529    fn into_inner(self) -> W {
530        match self {
531            CompressionLayerWriterState::Ready(inner) => inner,
532            CompressionLayerWriterState::InData(_written, compress) => {
533                compress.into_inner().into_inner()
534            }
535            // `panic!` explicitly called to avoid propagating an error which
536            // must never happens (ie, calling `into_inner` in an inconsistent
537            // internal state)
538            _ => panic!("[Writer] Empty type to inner is impossible"),
539        }
540    }
541}
542
543impl<'a, W: 'a + InnerWriterTrait> CompressionLayerWriter<'a, W> {
544    pub fn new(
545        inner: InnerWriterType<'a, W>,
546        config: &CompressionConfig,
547    ) -> CompressionLayerWriter<'a, W> {
548        Self {
549            state: CompressionLayerWriterState::Ready(inner),
550            compressed_sizes: Vec::new(),
551            compression_level: config.compression_level,
552        }
553    }
554}
555
556impl<'a, W: 'a + InnerWriterTrait> LayerWriter<'a, W> for CompressionLayerWriter<'a, W> {
557    fn into_inner(self) -> Option<InnerWriterType<'a, W>> {
558        Some(self.state.into_inner())
559    }
560
561    fn into_raw(self: Box<Self>) -> W {
562        self.state.into_inner().into_raw()
563    }
564
565    fn finalize(&mut self) -> Result<(), Error> {
566        // Use this mem::replace trick to be able to get back the compressor
567        // inner and freely move from CompressionLayerWriterState to others
568        let old_state = std::mem::replace(&mut self.state, CompressionLayerWriterState::Empty);
569        let mut last_block_size = 0;
570        let mut inner = match old_state {
571            CompressionLayerWriterState::Ready(inner) => inner,
572            CompressionLayerWriterState::InData(written, compress) => {
573                let inner_count = compress.into_inner();
574                self.compressed_sizes.push(inner_count.pos);
575                last_block_size = written;
576                inner_count.into_inner()
577            }
578            CompressionLayerWriterState::Empty => {
579                // Should never happens, except if an error already occurs before
580                return Err(Error::WrongReaderState("[Compression Layer] bad state in finalization, an error may already occurs before".to_string()));
581            }
582        };
583
584        // Footer:
585        // [SizesInfo][SizesInfo length]
586
587        // `std::mem::replace` used to perform zero-copy serialization of `self.compressed_sizes`
588        // The values is restored just after the operation (non-thread safe, but
589        // in a multi-thread env, we will already required a lock for the
590        // writing)
591        let compressed_sizes = std::mem::take(&mut self.compressed_sizes);
592        let sinfo = SizesInfo {
593            compressed_sizes,
594            last_block_size,
595        };
596        if bincode::options()
597            .with_limit(BINCODE_MAX_DESERIALIZE)
598            .with_fixint_encoding()
599            .serialize_into(&mut inner, &sinfo)
600            .is_err()
601        {
602            return Err(Error::SerializationError);
603        };
604        match bincode::serialized_size(&sinfo) {
605            Ok(size) => {
606                inner.write_u32::<LittleEndian>(size as u32)?;
607            }
608            Err(_) => {
609                return Err(Error::SerializationError);
610            }
611        };
612        self.compressed_sizes = sinfo.compressed_sizes;
613
614        // Recursive call
615        inner.finalize()?;
616        // Store inner, for further into_inner / into_raw calls
617        self.state = CompressionLayerWriterState::Ready(inner);
618        Ok(())
619    }
620}
621
622impl<'a, W: 'a + InnerWriterTrait> Write for CompressionLayerWriter<'a, W> {
623    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
624        // Use this mem::replace trick to be able to get back the compressor
625        // inner and freely move from CompressionLayerWriterState to others
626        let old_state = std::mem::replace(&mut self.state, CompressionLayerWriterState::Empty);
627        match old_state {
628            CompressionLayerWriterState::Ready(inner) => {
629                let inner_count = WriterWithCount::new(inner);
630                let mut compress = brotli::CompressorWriter::new(
631                    inner_count,
632                    0,
633                    self.compression_level,
634                    BROTLI_LOG_WINDOW,
635                );
636                let size = std::cmp::min(UNCOMPRESSED_DATA_SIZE as usize, buf.len());
637                let written = compress.write(&buf[..size])?;
638                self.state = CompressionLayerWriterState::InData(written as u32, Box::new(compress));
639                Ok(written)
640            }
641            CompressionLayerWriterState::InData(written, mut compress) => {
642                if written > UNCOMPRESSED_DATA_SIZE {
643                    return Err(Error::WrongReaderState(
644                        "[Compression Layer] Too much written".to_string(),
645                    ).into());
646                }
647                if written == UNCOMPRESSED_DATA_SIZE {
648                    let inner_count = compress.into_inner();
649                    self.compressed_sizes.push(inner_count.pos);
650                    self.state = CompressionLayerWriterState::Ready(inner_count.into_inner());
651                    // Start a new block, fill it with new values!
652                    return self.write(buf);
653                }
654                let size = std::cmp::min((UNCOMPRESSED_DATA_SIZE - written) as usize, buf.len());
655                let written_add = compress.write(&buf[..size])?;
656                self.state =
657                    CompressionLayerWriterState::InData(written + written_add as u32, compress);
658                Ok(written_add)
659            }
660            CompressionLayerWriterState::Empty => {
661                Err(Error::WrongReaderState("[Compression Layer] On write, should never happens, unless an error already occurs before".to_string()).into())
662            }
663        }
664    }
665
666    fn flush(&mut self) -> io::Result<()> {
667        match &mut self.state {
668            CompressionLayerWriterState::Ready(inner) => inner.flush(),
669            CompressionLayerWriterState::InData(_written, compress) => compress.flush(),
670            CompressionLayerWriterState::Empty => {
671                // Should never happens, except if an error already occurs before
672                Err(Error::WrongReaderState("[Compression Layer] On flush, should never happens, unless an error already occurs before".to_string()).into())
673            }
674        }
675    }
676}
677
678// ---------- Fail-Safe Reader ----------
679
680/// Internal state for the `CompressionLayerFailSafeReader`
681enum CompressionLayerFailSafeReaderState<R: Read> {
682    /// Ready contains the real inner destination
683    /// Only used for the initialization
684    Ready(R),
685    /// Inside a decompression stream
686    InData {
687        /// While decompressing, one doesn't know in advance the number of compressed bytes
688        /// As a result, the following is done:
689        /// 1. read from the source inside a buffer
690        /// 2. decompress the data from the buffer
691        ///     - if there is still data to decompress, go to 1.
692        ///     - if this is the end of the stream, continue to 3.
693        /// 3. the decompressor may have read too many byte, ie. `[end of stream n-1][start of stream n]`
694        ///                                                                          ^                 ^
695        ///                                                                     input_offset    last read position
696        /// 4. rewind, using the cache, to `input_offset`
697        ///
698        /// A cache must be used, as the source is `Read` but not `Seek`.
699        /// `input_offset` is guaranted to be in the cache because it must be in the decompressor working buffer,
700        /// and the working buffer is contained in the cache (in the worst case, it is the whole cache)
701        ///
702        /// Cache management:
703        /// ```ascii
704        ///                cache_filled_offset
705        ///                        v
706        /// cache: [................    ]
707        ///            ^
708        ///        read_offset
709        /// ```
710        /// Data read from the source, not yet used
711        /// Invariant:
712        ///     - cache.len() == FAIL_SAFE_BUFFER_SIZE (cache always allocated)
713        cache: Vec<u8>,
714        /// Bytes valid in the cache : [0..`cache_filled_offset`[ (0 -> no valid data)
715        cache_filled_offset: usize,
716        /// Next offset to read from the cache
717        /// Invariant:
718        ///     - `read_offset` <= `cache_filled_offset`
719        read_offset: usize,
720        /// Internal decompressor state
721        state: Box<BrotliState<StandardAlloc, StandardAlloc, StandardAlloc>>,
722        /// Number of bytes decompressed and returned for the current stream
723        uncompressed_read: u32,
724        /// Inner layer (data source)
725        inner: R,
726    },
727    /// Empty is a placeholder to allow state replacement
728    Empty,
729}
730
731impl<R: Read> CompressionLayerFailSafeReaderState<R> {
732    fn into_inner(self) -> R {
733        match self {
734            CompressionLayerFailSafeReaderState::Ready(inner) => inner,
735            CompressionLayerFailSafeReaderState::InData { inner, .. } => inner,
736            // `panic!` explicitly called to avoid propagating an error which
737            // must never happens (ie, calling `into_inner` in an inconsistent
738            // internal state)
739            _ => panic!("[Reader] Empty type to inner is impossible"),
740        }
741    }
742}
743
744pub struct CompressionLayerFailSafeReader<'a, R: 'a + Read> {
745    state: CompressionLayerFailSafeReaderState<Box<dyn 'a + LayerFailSafeReader<'a, R>>>,
746}
747
748impl<'a, R: 'a + Read> CompressionLayerFailSafeReader<'a, R> {
749    pub fn new(inner: Box<dyn 'a + LayerFailSafeReader<'a, R>>) -> Result<Self, Error> {
750        Ok(Self {
751            state: CompressionLayerFailSafeReaderState::Ready(inner),
752        })
753    }
754}
755
756impl<'a, R: 'a + Read> LayerFailSafeReader<'a, R> for CompressionLayerFailSafeReader<'a, R> {
757    fn into_inner(self) -> Option<Box<dyn 'a + LayerFailSafeReader<'a, R>>> {
758        Some(self.state.into_inner())
759    }
760
761    fn into_raw(self: Box<Self>) -> R {
762        self.state.into_inner().into_raw()
763    }
764}
765
766const FAIL_SAFE_BUFFER_SIZE: usize = 4096;
767
768impl<'a, R: 'a + Read> Read for CompressionLayerFailSafeReader<'a, R> {
769    /// This `read` is expected to end by failing
770    ///
771    /// Even in the best configuration, when the inner layer is not broken, the
772    /// decompression will fail while reading not-compressed data such as
773    /// CompressionLayerReader footer
774    fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
775        // Use this mem::replace trick to be able to get back the compressor
776        // inner and freely move from CompressionLayerReaderState to others
777        let old_state =
778            std::mem::replace(&mut self.state, CompressionLayerFailSafeReaderState::Empty);
779        match old_state {
780            CompressionLayerFailSafeReaderState::Ready(inner) => {
781                self.state = CompressionLayerFailSafeReaderState::InData {
782                    cache: vec![0u8; FAIL_SAFE_BUFFER_SIZE],
783                    read_offset: 0,
784                    cache_filled_offset: 0,
785                    state: Box::new(BrotliState::new(
786                        StandardAlloc::default(),
787                        StandardAlloc::default(),
788                        StandardAlloc::default(),
789                    )),
790                    uncompressed_read: 0,
791                    inner,
792                };
793                self.read(buf)
794            }
795            CompressionLayerFailSafeReaderState::InData {
796                mut cache,
797                mut read_offset,
798                mut cache_filled_offset,
799                mut state,
800                mut uncompressed_read,
801                mut inner,
802            } => {
803                if uncompressed_read > UNCOMPRESSED_DATA_SIZE {
804                    return Err(Error::WrongReaderState(
805                        "[Compress FailSafe Layer] Too much data read".to_string(),
806                    )
807                    .into());
808                }
809
810                if read_offset == cache_filled_offset
811                    && cache_filled_offset == FAIL_SAFE_BUFFER_SIZE
812                {
813                    // Cache is full and there is no more data to read from
814                    // -> cache must be reset
815                    cache.fill(0);
816                    cache_filled_offset = 0;
817                    read_offset = 0;
818                }
819
820                // Try to fill the cache from the inner source
821                match inner.read(&mut cache[cache_filled_offset..]) {
822                    Ok(read) => {
823                        if read == 0 && read_offset == cache_filled_offset {
824                            // No more data from inner and the cache has been fully read
825                            // -> return either an error or Ok(0)
826                            if uncompressed_read > 0 {
827                                // Inside a stream and no more data available
828                                return Err(io::Error::new(
829                                    io::ErrorKind::UnexpectedEof,
830                                    "No more data from the inner layer",
831                                ));
832                            } else {
833                                // No more data available but not in a stream
834                                return Ok(0);
835                            }
836                        }
837                        cache_filled_offset += read;
838                    }
839                    error => {
840                        if read_offset == cache_filled_offset {
841                            // No more data in the cache
842                            return error;
843                        }
844                        // There is still data in the cache to read
845                        // Will fail and return the error on the next .read()
846                    }
847                }
848
849                // Number of byte available in the source
850                let mut available_in = cache_filled_offset - read_offset;
851                // IN: Offset in the source
852                // OUT: Offset in the source after the decompression pass
853                let mut input_offset = 0;
854                // Available spaces in the output
855                let mut available_out = std::cmp::min(
856                    buf.len(),
857                    (UNCOMPRESSED_DATA_SIZE - uncompressed_read) as usize,
858                );
859                // IN: Offset in the output
860                // OUT: number of bytes written in the output
861                let mut output_offset = 0;
862                // OUT: total number of byte written for the current stream (cumulative)
863                let mut written = 0;
864
865                let ret = match brotli::BrotliDecompressStream(
866                    &mut available_in,
867                    &mut input_offset,
868                    &cache[read_offset..cache_filled_offset],
869                    &mut available_out,
870                    &mut output_offset,
871                    buf,
872                    &mut written,
873                    &mut state,
874                ) {
875                    brotli::BrotliResult::ResultSuccess => {
876                        // End of stream reached
877
878                        // Rewind the cache to the actual start of the new block
879                        // input_offset \in [0; cache_filled_offset - read_offset[
880                        read_offset += input_offset;
881
882                        // Reset others
883                        state = Box::new(BrotliState::new(
884                            StandardAlloc::default(),
885                            StandardAlloc::default(),
886                            StandardAlloc::default(),
887                        ));
888                        uncompressed_read = 0;
889
890                        Ok(output_offset)
891                    }
892                    brotli::BrotliResult::NeedsMoreInput => {
893                        // Bytes may have been read and produced
894                        read_offset += input_offset;
895                        uncompressed_read += output_offset as u32;
896
897                        Ok(output_offset)
898                    }
899                    brotli::BrotliResult::NeedsMoreOutput => {
900                        // Bytes may have been read and produced
901                        read_offset += input_offset;
902                        uncompressed_read += output_offset as u32;
903
904                        Ok(output_offset)
905                    }
906                    brotli::BrotliResult::ResultFailure => Err(io::Error::new(
907                        io::ErrorKind::InvalidData,
908                        "Invalid Data while decompressing",
909                    )),
910                };
911
912                self.state = CompressionLayerFailSafeReaderState::InData {
913                    cache,
914                    cache_filled_offset,
915                    read_offset,
916                    state,
917                    uncompressed_read,
918                    inner,
919                };
920
921                ret
922            }
923            CompressionLayerFailSafeReaderState::Empty => Err(Error::WrongReaderState(
924                "[Compression Layer] Should never happens, unless an error already occurs before"
925                    .to_string(),
926            )
927            .into()),
928        }
929    }
930}
931
932#[cfg(test)]
933mod tests {
934    use super::*;
935    use crate::Layers;
936
937    use crate::layers::raw::{RawLayerFailSafeReader, RawLayerReader, RawLayerWriter};
938    use brotli::writer::StandardAlloc;
939    use rand::distributions::{Alphanumeric, Distribution, Standard};
940    use rand::SeedableRng;
941    use std::io::{Cursor, Read, Write};
942    use std::time::Instant;
943
944    // Use few UNCOMPRESSED_DATA_SIZE to force few blocks, and
945    // UNCOMPRESSED_DATA_SIZE / 2 to add a non complete one
946    static SIZE: usize = (UNCOMPRESSED_DATA_SIZE * 2 + UNCOMPRESSED_DATA_SIZE / 2) as usize;
947
948    // Return a vector of data of size SIZE
949    fn get_data() -> Vec<u8> {
950        // Use only alphanumeric charset to allow for compression
951        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0);
952        let data: Vec<u8> = Alphanumeric.sample_iter(&mut rng).take(SIZE).collect();
953        assert_eq!(data.len(), SIZE);
954        data
955    }
956
957    // Return a vector of uncompressable data (ie. purely random) of size SIZE
958    fn get_uncompressable_data() -> Vec<u8> {
959        // Use only alphanumeric charset to allow for compression
960        let mut rng = rand_chacha::ChaCha8Rng::seed_from_u64(0);
961        let data: Vec<u8> = Standard.sample_iter(&mut rng).take(SIZE).collect();
962        assert_eq!(data.len(), SIZE);
963        data
964    }
965
966    #[test]
967    fn compress_layer_writer() {
968        // Test with one "CompressedBlock"
969        let file = Vec::new();
970        let mut comp = Box::new(CompressionLayerWriter::new(
971            Box::new(RawLayerWriter::new(file)),
972            &CompressionConfig::default(),
973        ));
974        let mut fake_data = vec![1, 2, 3, 4];
975        let fake_data2 = vec![5, 6, 7, 8];
976        comp.write_all(fake_data.as_slice()).unwrap();
977        comp.write_all(fake_data2.as_slice()).unwrap();
978        let file = comp.into_raw();
979
980        let mut src = Cursor::new(file.as_slice());
981        let mut reader = brotli::Decompressor::new(&mut src, 0);
982        let mut buf = Vec::new();
983        reader.read_to_end(&mut buf).unwrap();
984        println!("{:?}", buf);
985        fake_data.extend(fake_data2);
986        assert_eq!(fake_data, buf);
987    }
988
989    #[test]
990    fn compress_layer_several() {
991        // Test with several CompressedBlock - ensure that having only
992        // compressed blocks without header is enough to be able to distinguish
993        // them at decompression, knowing the uncompressed block size
994
995        let data = get_data();
996        let bytes = data.as_slice();
997
998        let file = Vec::new();
999        let mut comp = Box::new(CompressionLayerWriter::new(
1000            Box::new(RawLayerWriter::new(file)),
1001            &CompressionConfig::default(),
1002        ));
1003        let now = Instant::now();
1004        comp.write_all(bytes).unwrap();
1005        println!(
1006            "Compression: {} us for {} bytes",
1007            now.elapsed().as_micros(),
1008            bytes.len()
1009        );
1010
1011        let file = comp.into_raw();
1012        println!("{}", file.len());
1013        let mut src = Cursor::new(file.as_slice());
1014        // Highlight the use of BrotliDecompressStream
1015        let now = Instant::now();
1016        let mut buf = vec![0; UNCOMPRESSED_DATA_SIZE as usize];
1017
1018        // A similar result can be obtained by using a buffer_size of 1, as demonstrated below
1019        //
1020        // Using a Decompressor with a bigger buffer size lead to an over read of the inner source:
1021        // let mut reader = brotli::Decompressor::new(&mut src, 4096);
1022        // reader.read_exact(&mut buf).expect("First buffer");
1023        //
1024        // reader.__0.__0.input_offset -> current offset in the underlying buffer
1025        // reader.__0.__0: DecompressorCustomIo
1026        // src.position() - buffer_size + input_offset -> actual last byte read
1027        //
1028        // But this information is not exposed by the API
1029
1030        let mut brotli_state = BrotliState::new(
1031            StandardAlloc::default(),
1032            StandardAlloc::default(),
1033            StandardAlloc::default(),
1034        );
1035
1036        // at this point the decompressor simply needs an input and output buffer and the ability to track
1037        // the available data left in each buffer
1038        let mut available_in = file.len();
1039        let mut input_offset = 0;
1040        let mut available_out = buf.len();
1041        let mut output_offset = 0;
1042        let mut written = 0;
1043
1044        match brotli::BrotliDecompressStream(
1045            &mut available_in,
1046            &mut input_offset,
1047            src.get_ref(),
1048            &mut available_out,
1049            &mut output_offset,
1050            &mut buf,
1051            &mut written,
1052            &mut brotli_state,
1053        ) {
1054            brotli::BrotliResult::ResultSuccess => {}
1055            _ => panic!(),
1056        };
1057
1058        // Ensure the decompression is correct
1059        assert_eq!(written, buf.len());
1060        assert_eq!(buf.len(), UNCOMPRESSED_DATA_SIZE as usize);
1061        assert_eq!(buf.as_slice(), &bytes[..(UNCOMPRESSED_DATA_SIZE as usize)]);
1062
1063        // Use the `input_offset` information to seek to the beginning of the next compressed block
1064        src.set_position(input_offset as u64);
1065
1066        // Use a Decompressor with a buffer size of 1, as a replacement of the above optimization (must be compatible)
1067        let mut reader = brotli::Decompressor::new(&mut src, 1);
1068        let mut buf2 = vec![0; UNCOMPRESSED_DATA_SIZE as usize];
1069        reader.read_exact(&mut buf2).expect("Second buffer");
1070        assert_eq!(buf2.len(), UNCOMPRESSED_DATA_SIZE as usize);
1071        assert_eq!(
1072            buf2.as_slice(),
1073            &bytes[(UNCOMPRESSED_DATA_SIZE as usize)..((UNCOMPRESSED_DATA_SIZE * 2) as usize)]
1074        );
1075
1076        let mut reader = brotli::Decompressor::new(&mut src, 1);
1077        let mut buf3 = vec![0; SIZE - buf.len() - buf2.len()];
1078        reader.read_exact(&mut buf3).expect("Last buffer");
1079        assert_eq!(buf.len() + buf2.len() + buf3.len(), SIZE);
1080        assert_eq!(
1081            buf3.as_slice(),
1082            &bytes[(buf.len() + buf2.len())..(buf.len() + buf2.len() + buf3.len())]
1083        );
1084
1085        println!(
1086            "Decompression: {} us for {} bytes",
1087            now.elapsed().as_micros(),
1088            buf.len() + buf2.len() + buf3.len()
1089        );
1090        println!("Buf sizes {} {} {}", buf.len(), buf2.len(), buf3.len());
1091    }
1092
1093    #[test]
1094    fn compress_layer() {
1095        // Compress then decompress with dedicated Layer structs
1096
1097        for data in [get_data(), get_uncompressable_data()] {
1098            let bytes = data.as_slice();
1099
1100            let file = Vec::new();
1101            let mut comp = Box::new(CompressionLayerWriter::new(
1102                Box::new(RawLayerWriter::new(file)),
1103                &CompressionConfig::default(),
1104            ));
1105            let now = Instant::now();
1106            comp.write_all(bytes).unwrap();
1107            comp.finalize().unwrap();
1108            let file = comp.into_raw();
1109            let buf = Cursor::new(file.as_slice());
1110            let mut decomp =
1111                Box::new(CompressionLayerReader::new(Box::new(RawLayerReader::new(buf))).unwrap());
1112            decomp.initialize().unwrap();
1113            let mut buf = Vec::new();
1114            decomp.read_to_end(&mut buf).unwrap();
1115            println!(
1116                "Compression / Decompression: {} us for {} bytes ({} compressed)",
1117                now.elapsed().as_micros(),
1118                bytes.len(),
1119                file.len()
1120            );
1121            assert_eq!(buf.len(), bytes.len());
1122            assert_eq!(buf.as_slice(), bytes);
1123        }
1124    }
1125
1126    #[test]
1127    fn compress_failsafe_layer() {
1128        // Compress then decompress with Fail-Safe Layer structs
1129
1130        for data in [get_data(), get_uncompressable_data()] {
1131            let bytes = data.as_slice();
1132
1133            let file = Vec::new();
1134            let mut comp = Box::new(CompressionLayerWriter::new(
1135                Box::new(RawLayerWriter::new(file)),
1136                &CompressionConfig::default(),
1137            ));
1138            let now = Instant::now();
1139            comp.write_all(bytes).unwrap();
1140            comp.finalize().unwrap();
1141            let file = comp.into_raw();
1142            let mut decomp = Box::new(
1143                CompressionLayerFailSafeReader::new(Box::new(RawLayerFailSafeReader::new(
1144                    file.as_slice(),
1145                )))
1146                .unwrap(),
1147            );
1148            let mut buf = Vec::new();
1149            // This must ends with an error, when we start reading the footer (invalid for decompression)
1150            decomp.read_to_end(&mut buf).unwrap_err();
1151            println!(
1152                "Compression / Decompression (fail-safe): {} us for {} bytes ({} compressed)",
1153                now.elapsed().as_micros(),
1154                bytes.len(),
1155                file.len()
1156            );
1157            assert_eq!(buf.len(), bytes.len());
1158            assert_eq!(buf.as_slice(), bytes);
1159        }
1160    }
1161
1162    #[test]
1163    fn compress_failsafe_truncated() {
1164        // Compress then decompress with Fail-Safe Layer structs, while truncating the intermediate buffer
1165
1166        for data in [get_data(), get_uncompressable_data()] {
1167            let bytes = data.as_slice();
1168
1169            let file = Vec::new();
1170            let mut comp = Box::new(CompressionLayerWriter::new(
1171                Box::new(RawLayerWriter::new(file)),
1172                &CompressionConfig::default(),
1173            ));
1174            let now = Instant::now();
1175            comp.write_all(bytes).unwrap();
1176            comp.finalize().unwrap();
1177            let file = comp.into_raw();
1178
1179            // Truncate at the middle
1180            let stop = file.len() / 2;
1181
1182            let mut decomp = Box::new(
1183                CompressionLayerFailSafeReader::new(Box::new(RawLayerFailSafeReader::new(
1184                    &file[..stop],
1185                )))
1186                .unwrap(),
1187            );
1188            let mut buf = Vec::new();
1189            // This is expected to ends with an error
1190            decomp.read_to_end(&mut buf).unwrap_err();
1191            println!(
1192                "Compression / Decompression (fail-safe): {} us for {} bytes ({} compressed, {} keeped)",
1193                now.elapsed().as_micros(),
1194                bytes.len(),
1195                file.len(),
1196                buf.len(),
1197            );
1198            // Ensure the obtained bytes are correct
1199            assert_eq!(buf.as_slice(), &bytes[..buf.len()]);
1200            // We hope still having enough data (keeping half of the compressed
1201            // stream should give us at least a third of the uncompressed stream)
1202            assert!(buf.len() >= bytes.len() / 3);
1203        }
1204    }
1205
1206    #[test]
1207    fn compress_layer_with_footer() {
1208        // Inspect footer of Compress / decompress
1209
1210        let data = get_data();
1211        let bytes = data.as_slice();
1212
1213        let file = Vec::new();
1214        let mut comp = Box::new(CompressionLayerWriter::new(
1215            Box::new(RawLayerWriter::new(file)),
1216            &CompressionConfig::default(),
1217        ));
1218        comp.write_all(bytes).unwrap();
1219        comp.finalize().unwrap();
1220
1221        let mut compressed_sizes = Vec::new();
1222        compressed_sizes.extend_from_slice(&comp.compressed_sizes);
1223
1224        let file = comp.into_raw();
1225        let buf = Cursor::new(file.as_slice());
1226        let mut decomp =
1227            Box::new(CompressionLayerReader::new(Box::new(RawLayerReader::new(buf))).unwrap());
1228        decomp.initialize().unwrap();
1229
1230        // Check the footer has been correctly re-read
1231        assert_eq!(
1232            compressed_sizes,
1233            decomp.sizes_info.unwrap().compressed_sizes
1234        );
1235    }
1236
1237    #[test]
1238    fn seek_with_footer() {
1239        for data in [get_data(), get_uncompressable_data()] {
1240            let bytes = data.as_slice();
1241
1242            let file = Vec::new();
1243            let mut comp = Box::new(CompressionLayerWriter::new(
1244                Box::new(RawLayerWriter::new(file)),
1245                &CompressionConfig::default(),
1246            ));
1247            comp.write_all(bytes).unwrap();
1248            comp.finalize().unwrap();
1249
1250            let file = comp.into_raw();
1251            let buf = Cursor::new(file.as_slice());
1252            let mut decomp =
1253                Box::new(CompressionLayerReader::new(Box::new(RawLayerReader::new(buf))).unwrap());
1254            decomp.initialize().unwrap();
1255
1256            // Seek in the first block
1257            let pos = decomp.seek(SeekFrom::Start(5)).unwrap();
1258            assert_eq!(pos, 5);
1259            let mut buf = [0u8; 5];
1260            decomp.read_exact(&mut buf).unwrap();
1261            assert_eq!(&buf, &bytes[5..10]);
1262
1263            // Seek in the second block
1264            let pos = decomp
1265                .seek(SeekFrom::Start((UNCOMPRESSED_DATA_SIZE + 4).into()))
1266                .unwrap();
1267            assert_eq!(pos, (UNCOMPRESSED_DATA_SIZE + 4).into());
1268            let mut buf = [0u8; 5];
1269            decomp.read_exact(&mut buf).unwrap();
1270            assert_eq!(&buf, &bytes[pos as usize..(pos + 5) as usize]);
1271
1272            // Seek relatively (same block)
1273            let pos = decomp.seek(SeekFrom::Current(2)).unwrap();
1274            assert_eq!(pos, (UNCOMPRESSED_DATA_SIZE + 4 + 5 + 2).into());
1275            let mut buf = [0u8; 5];
1276            decomp.read_exact(&mut buf).unwrap();
1277            assert_eq!(&buf, &bytes[pos as usize..(pos + 5) as usize]);
1278
1279            // Seek relatively (next block)
1280            let pos = decomp
1281                .seek(SeekFrom::Current(UNCOMPRESSED_DATA_SIZE.into()))
1282                .unwrap();
1283            assert_eq!(pos, (UNCOMPRESSED_DATA_SIZE * 2 + 4 + 5 + 2 + 5).into());
1284            let mut buf = [0u8; 5];
1285            decomp.read_exact(&mut buf).unwrap();
1286            assert_eq!(&buf, &bytes[pos as usize..(pos + 5) as usize]);
1287
1288            // Seek relatively (backward)
1289            let pos = decomp.seek(SeekFrom::Current(-5)).unwrap();
1290            assert_eq!(pos, (UNCOMPRESSED_DATA_SIZE * 2 + 4 + 5 + 2 + 5).into());
1291            let mut buf = [0u8; 5];
1292            decomp.read_exact(&mut buf).unwrap();
1293            assert_eq!(&buf, &bytes[pos as usize..(pos + 5) as usize]);
1294
1295            // Seek from end
1296            let pos = decomp.seek(SeekFrom::End(-5)).unwrap();
1297            assert_eq!(pos, (SIZE - 5) as u64);
1298            let mut buf = [0u8; 5];
1299            decomp.read_exact(&mut buf).unwrap();
1300            assert_eq!(&buf, &bytes[pos as usize..(pos + 5) as usize]);
1301        }
1302    }
1303
1304    #[test]
1305    fn sizes_info() {
1306        let sizes_info = SizesInfo {
1307            compressed_sizes: vec![1, 2, 5],
1308            last_block_size: 42,
1309        };
1310
1311        assert_eq!(
1312            sizes_info.uncompressed_block_size_at(1),
1313            UNCOMPRESSED_DATA_SIZE
1314        );
1315        assert_eq!(sizes_info.uncompressed_block_size_at(3), 42);
1316
1317        assert_eq!(
1318            sizes_info.max_uncompressed_pos(),
1319            2 * UNCOMPRESSED_DATA_SIZE as u64 + 42
1320        );
1321
1322        assert_eq!(
1323            sizes_info.compressed_block_size_at(UNCOMPRESSED_DATA_SIZE as u64 + 1),
1324            2
1325        );
1326    }
1327
1328    #[test]
1329    fn compress_config() {
1330        // Check the compression level is indeed use
1331        let data = get_data();
1332        let bytes = data.as_slice();
1333
1334        let file = Vec::new();
1335        let mut config = ArchiveWriterConfig::new();
1336        config
1337            .enable_layer(Layers::COMPRESS)
1338            .with_compression_level(0)
1339            .unwrap();
1340        let mut comp = Box::new(CompressionLayerWriter::new(
1341            Box::new(RawLayerWriter::new(file)),
1342            &config.compress,
1343        ));
1344        comp.write_all(bytes).unwrap();
1345        comp.finalize().unwrap();
1346
1347        let file2 = Vec::new();
1348        let mut config2 = ArchiveWriterConfig::new();
1349        config2
1350            .enable_layer(Layers::COMPRESS)
1351            .with_compression_level(5)
1352            .unwrap();
1353        let mut comp2 = Box::new(CompressionLayerWriter::new(
1354            Box::new(RawLayerWriter::new(file2)),
1355            &config2.compress,
1356        ));
1357        comp2.write_all(bytes).unwrap();
1358        comp2.finalize().unwrap();
1359
1360        // file2 must be better compressed than file
1361        let file = comp.into_raw();
1362        let file2 = comp2.into_raw();
1363        assert!(file.len() > file2.len());
1364
1365        // Check content
1366        let buf = Cursor::new(file.as_slice());
1367        let mut buf_out = Vec::new();
1368        let mut decomp =
1369            Box::new(CompressionLayerReader::new(Box::new(RawLayerReader::new(buf))).unwrap());
1370        decomp.initialize().unwrap();
1371        decomp.read_to_end(&mut buf_out).unwrap();
1372        let buf2 = Cursor::new(file2.as_slice());
1373        let mut buf2_out = Vec::new();
1374        let mut decomp =
1375            Box::new(CompressionLayerReader::new(Box::new(RawLayerReader::new(buf2))).unwrap());
1376        decomp.initialize().unwrap();
1377        decomp.read_to_end(&mut buf2_out).unwrap();
1378        assert_eq!(buf_out, buf2_out);
1379    }
1380}