mla/
helpers.rs

1use super::layers::traits::InnerWriterTrait;
2/// Helpers for common operation with MLA Archives
3use super::{ArchiveFileBlock, ArchiveFileID, ArchiveReader, ArchiveWriter, Error};
4use std::collections::HashMap;
5use std::hash::BuildHasher;
6use std::io::{self, Read, Seek, Write};
7
8/// Extract an Archive linearly.
9///
10/// `export` maps filenames to Write objects, which will receives the
11/// corresponding file's content. If a file is in the archive but not in
12/// `export`, this file will be silently ignored.
13///
14/// This is an effective way to extract all elements from an MLA Archive. It
15/// avoids seeking for each files, and for each files parts if files are
16/// interleaved. For an MLA Archive, seeking could be a costly operation, and might
17/// involve reading data to `Sink` (seeking in decompression), or involves
18/// additional computation (getting a whole encrypted block to check its
19/// encryption tag).
20/// Linear extraction avoids these costs by reading once and only once each byte,
21/// and by reducing the amount of seeks.
22pub fn linear_extract<W1: InnerWriterTrait, R: Read + Seek, S: BuildHasher>(
23    archive: &mut ArchiveReader<R>,
24    export: &mut HashMap<&String, W1, S>,
25) -> Result<(), Error> {
26    // Seek at the beginning
27    archive.src.rewind()?;
28
29    // Use a BufReader to cache, by merging them into one bigger read, small
30    // read calls (like the ones on ArchiveFileBlock reading)
31    let mut src = io::BufReader::new(&mut archive.src);
32
33    // Associate an ID in the archive to the corresponding filename
34    // Do not directly associate to the writer to keep an easier fn API
35    let mut id2filename: HashMap<ArchiveFileID, String> = HashMap::new();
36
37    'read_block: loop {
38        match ArchiveFileBlock::from(&mut src)? {
39            ArchiveFileBlock::FileStart { filename, id } => {
40                // If the starting file is meant to be extracted, get the
41                // corresponding writer
42                if export.contains_key(&filename) {
43                    id2filename.insert(id, filename.clone());
44                }
45            }
46            ArchiveFileBlock::EndOfFile { id, .. } => {
47                // Drop the corresponding writer
48                id2filename.remove(&id);
49            }
50            ArchiveFileBlock::FileContent { length, id, .. } => {
51                // Write a block to the corresponding output, if any
52
53                let copy_src = &mut (&mut src).take(length);
54                // Is the file considered?
55                let mut extracted: bool = false;
56                if let Some(fname) = id2filename.get(&id) {
57                    if let Some(writer) = export.get_mut(fname) {
58                        io::copy(copy_src, writer)?;
59                        extracted = true;
60                    }
61                };
62                if !extracted {
63                    // Exhaust the block to Sink to forward the reader
64                    io::copy(copy_src, &mut io::sink())?;
65                }
66            }
67            ArchiveFileBlock::EndOfArchiveData {} => {
68                // Proper termination
69                break 'read_block;
70            }
71        }
72    }
73    Ok(())
74}
75
76/// Provides a Write interface on an ArchiveWriter file
77///
78/// This interface is meant to be used in situations where length of the data
79/// source is unknown, such as a stream. One can then use the `io::copy`
80/// facilities to perform multiples block addition in the archive
81pub struct StreamWriter<'a, 'b, W: InnerWriterTrait> {
82    archive: &'b mut ArchiveWriter<'a, W>,
83    file_id: ArchiveFileID,
84}
85
86impl<'a, 'b, W: InnerWriterTrait> StreamWriter<'a, 'b, W> {
87    pub fn new(archive: &'b mut ArchiveWriter<'a, W>, file_id: ArchiveFileID) -> Self {
88        Self { archive, file_id }
89    }
90}
91
92impl<'a, 'b, W: InnerWriterTrait> Write for StreamWriter<'a, 'b, W> {
93    fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
94        self.archive
95            .append_file_content(self.file_id, buf.len() as u64, buf)?;
96        Ok(buf.len())
97    }
98
99    fn flush(&mut self) -> io::Result<()> {
100        self.archive.flush()
101    }
102}
103
104#[cfg(test)]
105mod tests {
106    use rand::distributions::Standard;
107    use rand::prelude::Distribution;
108    use rand::{RngCore, SeedableRng};
109    use rand_chacha::ChaChaRng;
110    use x25519_dalek::{PublicKey, StaticSecret};
111
112    use super::*;
113    use crate::tests::build_archive;
114    use crate::*;
115    use std::io::Cursor;
116
117    // From mla.layers.compress
118    const UNCOMPRESSED_DATA_SIZE: u32 = 4 * 1024 * 1024;
119
120    #[test]
121    fn full_linear_extract() {
122        // Build an archive with 3 files
123        let (mla, key, files) = build_archive(None, false);
124
125        // Prepare the reader
126        let dest = Cursor::new(mla.into_raw());
127        let mut config = ArchiveReaderConfig::new();
128        config.add_private_keys(std::slice::from_ref(&key));
129        let mut mla_read = ArchiveReader::from_config(dest, config).unwrap();
130
131        // Prepare writers
132        let file_list: Vec<String> = mla_read
133            .list_files()
134            .expect("reader.list_files")
135            .cloned()
136            .collect();
137        let mut export: HashMap<&String, Vec<u8>> =
138            file_list.iter().map(|fname| (fname, Vec::new())).collect();
139        linear_extract(&mut mla_read, &mut export).expect("Extract error");
140
141        // Check file per file
142        for (fname, content) in files.iter() {
143            assert_eq!(export.get(fname).unwrap(), content);
144        }
145    }
146
147    #[test]
148    fn one_linear_extract() {
149        // Build an archive with 3 files
150        let (mla, key, files) = build_archive(None, false);
151
152        // Prepare the reader
153        let dest = Cursor::new(mla.into_raw());
154        let mut config = ArchiveReaderConfig::new();
155        config.add_private_keys(std::slice::from_ref(&key));
156        let mut mla_read = ArchiveReader::from_config(dest, config).unwrap();
157
158        // Prepare writers
159        let mut export: HashMap<&String, Vec<u8>> = HashMap::new();
160        export.insert(&files[0].0, Vec::new());
161        linear_extract(&mut mla_read, &mut export).expect("Extract error");
162
163        // Check file
164        assert_eq!(export.get(&files[0].0).unwrap(), &files[0].1);
165    }
166
167    #[test]
168    /// Linear extraction of a file big enough to use several block
169    ///
170    /// This test is different from the layers' compress ones:
171    /// - in the standard use, between each block, a `Seek` operation is made
172    /// - the use of `linear_extract` avoid that repetitive `Seek` usage, as layers are "raw"-read
173    ///
174    /// Regression test for `brotli-decompressor` 2.3.3 to 2.3.4 (issue #146)
175    fn linear_extract_big_file() {
176        let file_length = 4 * UNCOMPRESSED_DATA_SIZE as usize;
177
178        // --------- SETUP ----------
179        let file = Vec::new();
180        // Use a deterministic RNG in tests, for reproductability. DO NOT DO THIS IS IN ANY RELEASED BINARY!
181        let mut rng = ChaChaRng::seed_from_u64(0);
182        let mut bytes = [0u8; 32];
183        rng.fill_bytes(&mut bytes);
184        let key = StaticSecret::from(bytes);
185        let mut config = ArchiveWriterConfig::new();
186        let layers = Layers::ENCRYPT | Layers::COMPRESS;
187        config
188            .set_layers(layers)
189            .add_public_keys(&[PublicKey::from(&key)]);
190        let mut mla = ArchiveWriter::from_config(file, config).expect("Writer init failed");
191
192        let fname = "my_file".to_string();
193        let data: Vec<u8> = Standard.sample_iter(&mut rng).take(file_length).collect();
194        assert_eq!(data.len(), file_length);
195        mla.add_file(&fname, data.len() as u64, data.as_slice())
196            .unwrap();
197
198        mla.finalize().unwrap();
199
200        // --------------------------
201
202        // Prepare the reader
203        let dest = Cursor::new(mla.into_raw());
204        let mut config = ArchiveReaderConfig::new();
205        config.add_private_keys(std::slice::from_ref(&key));
206        let mut mla_read = ArchiveReader::from_config(dest, config).unwrap();
207
208        // Prepare writers
209        let mut export: HashMap<&String, Vec<u8>> = HashMap::new();
210        export.insert(&fname, Vec::new());
211        linear_extract(&mut mla_read, &mut export).expect("Extract error");
212
213        // Check file
214        assert_eq!(export.get(&fname).unwrap(), &data);
215    }
216
217    #[test]
218    fn stream_writer() {
219        let file = Vec::new();
220        let mut mla = ArchiveWriter::from_config(file, ArchiveWriterConfig::new())
221            .expect("Writer init failed");
222
223        let fake_file = vec![1, 2, 3, 4, 5, 6, 7, 8, 9, 10];
224
225        // Using write API
226        let id = mla.start_file("my_file").unwrap();
227        let mut sw = StreamWriter::new(&mut mla, id);
228        sw.write_all(&fake_file[..5]).unwrap();
229        sw.write_all(&fake_file[5..]).unwrap();
230        mla.end_file(id).unwrap();
231
232        // Using io::copy
233        let id = mla.start_file("my_file2").unwrap();
234        let mut sw = StreamWriter::new(&mut mla, id);
235        assert_eq!(
236            io::copy(&mut fake_file.as_slice(), &mut sw).unwrap(),
237            fake_file.len() as u64
238        );
239        mla.end_file(id).unwrap();
240
241        mla.finalize().unwrap();
242
243        // Read the obtained stream
244        let dest = mla.into_raw();
245        let buf = Cursor::new(dest.as_slice());
246        let mut mla_read = ArchiveReader::from_config(buf, ArchiveReaderConfig::new()).unwrap();
247        let mut content1 = Vec::new();
248        mla_read
249            .get_file("my_file".to_string())
250            .unwrap()
251            .unwrap()
252            .data
253            .read_to_end(&mut content1)
254            .unwrap();
255        assert_eq!(content1.as_slice(), fake_file.as_slice());
256        let mut content2 = Vec::new();
257        mla_read
258            .get_file("my_file2".to_string())
259            .unwrap()
260            .unwrap()
261            .data
262            .read_to_end(&mut content2)
263            .unwrap();
264        assert_eq!(content2.as_slice(), fake_file.as_slice());
265    }
266}