sevenz_rust2/
writer.rs

1mod counting;
2mod pack_info;
3mod seq_reader;
4mod unpack_info;
5
6pub use self::counting::CountingWriter;
7pub use self::seq_reader::*;
8use self::{pack_info::PackInfo, unpack_info::UnpackInfo};
9use crate::{Error, SevenZArchiveEntry, archive::*, encoders};
10use bit_set::BitSet;
11use byteorder::*;
12use crc32fast::Hasher;
13use std::{
14    cell::Cell,
15    io::{Read, Seek, Write},
16    rc::Rc,
17    sync::Arc,
18};
19
20#[cfg(not(target_arch = "wasm32"))]
21use std::{fs::File, path::Path};
22
23macro_rules! write_times {
24    //write_i64
25    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt) => {
26        write_times!($fn_name, $nid, $has_time, $time, write_u64);
27    };
28    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt, $write_fn:tt) => {
29        fn $fn_name<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
30            let mut num = 0;
31            for entry in self.files.iter() {
32                if entry.$has_time {
33                    num += 1;
34                }
35            }
36            if num > 0 {
37                header.write_u8($nid)?;
38                let mut temp: Vec<u8> = Vec::with_capacity(128);
39                let mut out = &mut temp;
40                if num != self.files.len() {
41                    out.write_u8(0)?;
42                    let mut times = BitSet::with_capacity(self.files.len());
43                    for i in 0..self.files.len() {
44                        if self.files[i].$has_time {
45                            times.insert(i);
46                        }
47                    }
48                    write_bit_set(&mut out, &times)?;
49                } else {
50                    out.write_u8(1)?;
51                }
52                out.write_u8(0)?;
53                for file in self.files.iter() {
54                    if file.$has_time {
55                        out.$write_fn::<LittleEndian>((file.$time).into())?;
56                    }
57                }
58                out.flush()?;
59                write_u64(header, temp.len() as u64)?;
60                header.write_all(&temp)?;
61            }
62            Ok(())
63        }
64    };
65}
66
67type Result<T> = std::result::Result<T, Error>;
68
69/// Writes a 7z file.
70#[cfg_attr(docsrs, doc(cfg(feature = "compress")))]
71pub struct SevenZWriter<W: Write> {
72    output: W,
73    files: Vec<SevenZArchiveEntry>,
74    content_methods: Arc<Vec<SevenZMethodConfiguration>>,
75    pack_info: PackInfo,
76    unpack_info: UnpackInfo,
77    encrypt_header: bool,
78}
79
80#[cfg(not(target_arch = "wasm32"))]
81impl SevenZWriter<File> {
82    /// Creates a file to write a 7z archive to.
83    pub fn create(path: impl AsRef<Path>) -> Result<Self> {
84        let file = File::create(path.as_ref())
85            .map_err(|e| Error::file_open(e, path.as_ref().to_string_lossy().to_string()))?;
86        Self::new(file)
87    }
88}
89
90impl<W: Write + Seek> SevenZWriter<W> {
91    /// Prepares writer to write a 7z archive to.
92    pub fn new(mut writer: W) -> Result<Self> {
93        writer
94            .seek(std::io::SeekFrom::Start(SIGNATURE_HEADER_SIZE))
95            .map_err(Error::io)?;
96
97        Ok(Self {
98            output: writer,
99            files: Default::default(),
100            content_methods: Arc::new(vec![SevenZMethodConfiguration::new(SevenZMethod::LZMA2)]),
101            pack_info: Default::default(),
102            unpack_info: Default::default(),
103            encrypt_header: true,
104        })
105    }
106
107    /// Sets the default compression methods to use for entry data. Default is LZMA2.
108    pub fn set_content_methods(
109        &mut self,
110        content_methods: Vec<SevenZMethodConfiguration>,
111    ) -> &mut Self {
112        if content_methods.is_empty() {
113            return self;
114        }
115        self.content_methods = Arc::new(content_methods);
116        self
117    }
118
119    /// Whether to enable the encryption of the -header. Default is `true`.
120    pub fn set_encrypt_header(&mut self, enabled: bool) {
121        self.encrypt_header = enabled;
122    }
123
124    /// Adds an archive `entry` with data from `reader`.
125    ///
126    /// # Example
127    /// ```no_run
128    /// use sevenz_rust2::*;
129    /// use std::fs::File;
130    /// use std::path::Path;
131    /// let mut sz = SevenZWriter::create("path/to/dest.7z").expect("create writer ok");
132    /// let src = Path::new("path/to/source.txt");
133    /// let name = "source.txt".to_string();
134    /// let entry = sz.push_archive_entry(
135    ///               SevenZArchiveEntry::from_path(&src, name),
136    ///               Some(File::open(src).unwrap()),
137    ///           )
138    ///           .expect("ok");
139    /// let compressed_size = entry.compressed_size;
140    /// sz.finish().expect("done");
141    /// ```
142    pub fn push_archive_entry<R: Read>(
143        &mut self,
144        mut entry: SevenZArchiveEntry,
145        reader: Option<R>,
146    ) -> Result<&SevenZArchiveEntry> {
147        if !entry.is_directory {
148            if let Some(mut r) = reader {
149                let mut compressed_len = 0;
150                let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
151
152                let mut more_sizes: Vec<Rc<Cell<usize>>> =
153                    Vec::with_capacity(self.content_methods.len() - 1);
154
155                let (crc, size) = {
156                    let mut w = Self::create_writer(
157                        &self.content_methods,
158                        &mut compressed,
159                        &mut more_sizes,
160                    )?;
161                    let mut write_len = 0;
162                    let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
163                    let mut buf = [0u8; 4096];
164                    loop {
165                        match r.read(&mut buf) {
166                            Ok(n) => {
167                                if n == 0 {
168                                    break;
169                                }
170                                w.write_all(&buf[..n]).map_err(|e| {
171                                    Error::io_msg(e, format!("Encode entry:{}", entry.name()))
172                                })?;
173                            }
174                            Err(e) => {
175                                return Err(Error::io_msg(
176                                    e,
177                                    format!("Encode entry:{}", entry.name()),
178                                ));
179                            }
180                        }
181                    }
182                    w.flush()
183                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
184                    w.write(&[])
185                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
186
187                    (w.crc_value(), write_len)
188                };
189                let compressed_crc = compressed.crc_value();
190                entry.has_stream = true;
191                entry.size = size as u64;
192                entry.crc = crc as u64;
193                entry.has_crc = true;
194                entry.compressed_crc = compressed_crc as u64;
195                entry.compressed_size = compressed_len as u64;
196                self.pack_info
197                    .add_stream(compressed_len as u64, compressed_crc);
198
199                let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
200                sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
201                sizes.push(size as u64);
202
203                self.unpack_info
204                    .add(self.content_methods.clone(), sizes, crc);
205
206                self.files.push(entry);
207                return Ok(self.files.last().unwrap());
208            }
209        }
210        entry.has_stream = false;
211        entry.size = 0;
212        entry.compressed_size = 0;
213        entry.has_crc = false;
214        self.files.push(entry);
215        Ok(self.files.last().unwrap())
216    }
217
218    /// Solid compression - packs `entries` into one pack.
219    ///
220    /// # Panics
221    /// * If `entries`'s length not equals to `reader.reader_len()`
222    pub fn push_archive_entries<R: Read>(
223        &mut self,
224        mut entries: Vec<SevenZArchiveEntry>,
225        reader: SeqReader<SourceReader<R>>,
226    ) -> Result<&mut Self> {
227        let mut r = reader;
228        assert_eq!(r.reader_len(), entries.len());
229        let mut compressed_len = 0;
230        let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
231        let content_methods = &self.content_methods;
232        let mut more_sizes: Vec<Rc<Cell<usize>>> = Vec::with_capacity(content_methods.len() - 1);
233
234        let (crc, size) = {
235            let mut w = Self::create_writer(content_methods, &mut compressed, &mut more_sizes)?;
236            let mut write_len = 0;
237            let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
238            let mut buf = [0u8; 4096];
239
240            fn entries_names(entries: &[SevenZArchiveEntry]) -> String {
241                let mut names = String::with_capacity(512);
242                for ele in entries.iter() {
243                    names.push_str(&ele.name);
244                    names.push(';');
245                    if names.len() > 512 {
246                        break;
247                    }
248                }
249                names
250            }
251
252            loop {
253                match r.read(&mut buf) {
254                    Ok(n) => {
255                        if n == 0 {
256                            break;
257                        }
258                        w.write_all(&buf[..n]).map_err(|e| {
259                            Error::io_msg(e, format!("Encode entries:{}", entries_names(&entries)))
260                        })?;
261                    }
262                    Err(e) => {
263                        return Err(Error::io_msg(
264                            e,
265                            format!("Encode entries:{}", entries_names(&entries)),
266                        ));
267                    }
268                }
269            }
270            w.flush().map_err(|e| {
271                let mut names = String::with_capacity(512);
272                for ele in entries.iter() {
273                    names.push_str(&ele.name);
274                    names.push(';');
275                    if names.len() > 512 {
276                        break;
277                    }
278                }
279                Error::io_msg(e, format!("Encode entry:{}", names))
280            })?;
281            w.write(&[]).map_err(|e| {
282                Error::io_msg(e, format!("Encode entry:{}", entries_names(&entries)))
283            })?;
284
285            (w.crc_value(), write_len)
286        };
287        let compressed_crc = compressed.crc_value();
288        let mut sub_stream_crcs = Vec::with_capacity(entries.len());
289        let mut sub_stream_sizes = Vec::with_capacity(entries.len());
290        for i in 0..entries.len() {
291            let entry = &mut entries[i];
292            let ri = &r[i];
293            entry.crc = ri.crc_value() as u64;
294            entry.size = ri.read_count() as u64;
295            sub_stream_crcs.push(entry.crc as u32);
296            sub_stream_sizes.push(entry.size);
297            entry.has_crc = true;
298        }
299
300        self.pack_info
301            .add_stream(compressed_len as u64, compressed_crc);
302
303        let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
304        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
305        sizes.push(size as u64);
306
307        self.unpack_info.add_multiple(
308            content_methods.clone(),
309            sizes,
310            crc,
311            entries.len() as u64,
312            sub_stream_sizes,
313            sub_stream_crcs,
314        );
315
316        self.files.extend(entries);
317        Ok(self)
318    }
319
320    fn create_writer<'a, O: Write + 'a>(
321        methods: &[SevenZMethodConfiguration],
322        out: O,
323        more_sized: &mut Vec<Rc<Cell<usize>>>,
324    ) -> Result<Box<dyn Write + 'a>> {
325        let mut encoder: Box<dyn Write> = Box::new(out);
326        let mut first = true;
327        for mc in methods.iter() {
328            if !first {
329                let counting = CountingWriter::new(encoder);
330                more_sized.push(counting.counting());
331                encoder = Box::new(encoders::add_encoder(counting, mc)?);
332            } else {
333                let counting = CountingWriter::new(encoder);
334                encoder = Box::new(encoders::add_encoder(counting, mc)?);
335            }
336            first = false;
337        }
338        Ok(encoder)
339    }
340
341    /// Finishes the compression.
342    pub fn finish(mut self) -> std::io::Result<W> {
343        let mut header: Vec<u8> = Vec::with_capacity(64 * 1024);
344        self.write_encoded_header(&mut header)?;
345        let header_pos = self.output.stream_position()?;
346        self.output.write_all(&header)?;
347        let crc32 = crc32fast::hash(&header);
348        let mut hh = [0u8; SIGNATURE_HEADER_SIZE as usize];
349        {
350            let mut hhw = hh.as_mut_slice();
351            //sig
352            hhw.write_all(SEVEN_Z_SIGNATURE)?;
353            //version
354            hhw.write_u8(0)?;
355            hhw.write_u8(2)?;
356            //placeholder for crc: index = 8
357            hhw.write_u32::<LittleEndian>(0)?;
358
359            // start header
360            hhw.write_u64::<LittleEndian>(header_pos - SIGNATURE_HEADER_SIZE)?;
361            hhw.write_u64::<LittleEndian>(0xffffffff & header.len() as u64)?;
362            hhw.write_u32::<LittleEndian>(crc32)?;
363        }
364        let crc32 = crc32fast::hash(&hh[12..]);
365        hh[8..12].copy_from_slice(&crc32.to_le_bytes());
366
367        self.output.seek(std::io::SeekFrom::Start(0))?;
368        self.output.write_all(&hh)?;
369        self.output.flush()?;
370        Ok(self.output)
371    }
372
373    fn write_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
374        header.write_u8(K_HEADER)?;
375        header.write_u8(K_MAIN_STREAMS_INFO)?;
376        self.write_streams_info(header)?;
377        self.write_files_info(header)?;
378        header.write_u8(K_END)?;
379        Ok(())
380    }
381
382    fn write_encoded_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
383        let mut raw_header = Vec::with_capacity(64 * 1024);
384        self.write_header(&mut raw_header)?;
385        let mut pack_info = PackInfo::default();
386
387        let position = self.output.stream_position()?;
388        let pos = position - SIGNATURE_HEADER_SIZE;
389        pack_info.pos = pos;
390
391        let mut more_sizes = vec![];
392        let size = raw_header.len() as u64;
393        let crc32 = crc32fast::hash(&raw_header);
394        let mut methods = vec![];
395
396        if self.encrypt_header {
397            for conf in self.content_methods.iter() {
398                if conf.method.id() == SevenZMethod::AES256SHA256.id() {
399                    methods.push(conf.clone());
400                    break;
401                }
402            }
403        }
404
405        methods.push(SevenZMethodConfiguration::new(SevenZMethod::LZMA));
406
407        let methods = Arc::new(methods);
408
409        let mut encoded_data = Vec::with_capacity(size as usize / 2);
410
411        let mut compress_size = 0;
412        let mut compressed = CompressWrapWriter::new(&mut encoded_data, &mut compress_size);
413        {
414            let mut encoder = Self::create_writer(&methods, &mut compressed, &mut more_sizes)
415                .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
416            encoder.write_all(&raw_header)?;
417            encoder.flush()?;
418            let _ = encoder.write(&[])?;
419        }
420
421        let compress_crc = compressed.crc_value();
422        let compress_size = *compressed.bytes_written;
423        if compress_size as u64 + 20 >= size {
424            // compression made it worse. Write raw data
425            header.write_all(&raw_header)?;
426            return Ok(());
427        }
428        self.output.write_all(&encoded_data[..compress_size])?;
429
430        pack_info.add_stream(compress_size as u64, compress_crc);
431
432        let mut unpack_info = UnpackInfo::default();
433        let mut sizes = Vec::with_capacity(1 + more_sizes.len());
434        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
435        sizes.push(size);
436        unpack_info.add(methods, sizes, crc32);
437
438        header.write_u8(K_ENCODED_HEADER)?;
439
440        pack_info.write_to(header)?;
441        unpack_info.write_to(header)?;
442        unpack_info.write_substreams(header)?;
443
444        header.write_u8(K_END)?;
445
446        Ok(())
447    }
448
449    fn write_streams_info<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
450        if self.pack_info.len() > 0 {
451            self.pack_info.write_to(header)?;
452            self.unpack_info.write_to(header)?;
453        }
454        self.unpack_info.write_substreams(header)?;
455
456        header.write_u8(K_END)?;
457        Ok(())
458    }
459
460    fn write_files_info<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
461        header.write_u8(K_FILES_INFO)?;
462        write_u64(header, self.files.len() as u64)?;
463        self.write_file_empty_streams(header)?;
464        self.write_file_empty_files(header)?;
465        self.write_file_anti_items(header)?;
466        self.write_file_names(header)?;
467        self.write_file_ctimes(header)?;
468        self.write_file_atimes(header)?;
469        self.write_file_mtimes(header)?;
470        self.write_file_windows_attrs(header)?;
471        header.write_u8(K_END)?;
472        Ok(())
473    }
474
475    fn write_file_empty_streams<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
476        let mut has_empty = false;
477        for entry in self.files.iter() {
478            if !entry.has_stream {
479                has_empty = true;
480                break;
481            }
482        }
483        if has_empty {
484            header.write_u8(K_EMPTY_STREAM)?;
485            let mut bitset = BitSet::with_capacity(self.files.len());
486            for (i, entry) in self.files.iter().enumerate() {
487                if !entry.has_stream {
488                    bitset.insert(i);
489                }
490            }
491            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
492            write_bit_set(&mut temp, &bitset)?;
493            write_u64(header, temp.len() as u64)?;
494            header.write_all(temp.as_slice())?;
495        }
496        Ok(())
497    }
498
499    fn write_file_empty_files<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
500        let mut has_empty = false;
501        let mut empty_stream_counter = 0;
502        let mut bitset = BitSet::new();
503        for entry in self.files.iter() {
504            if !entry.has_stream {
505                let is_dir = entry.is_directory();
506                has_empty |= !is_dir;
507                if !is_dir {
508                    bitset.insert(empty_stream_counter);
509                }
510                empty_stream_counter += 1;
511            }
512        }
513        if has_empty {
514            header.write_u8(K_EMPTY_FILE)?;
515
516            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
517            write_bit_set(&mut temp, &bitset)?;
518            write_u64(header, temp.len() as u64)?;
519            header.write_all(&temp)?;
520        }
521        Ok(())
522    }
523
524    fn write_file_anti_items<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
525        let mut has_anti = false;
526        let mut counter = 0;
527        let mut bitset = BitSet::new();
528        for entry in self.files.iter() {
529            if !entry.has_stream {
530                let is_anti = entry.is_anti_item();
531                has_anti |= !is_anti;
532                if !is_anti {
533                    bitset.insert(counter);
534                }
535                counter += 1;
536            }
537        }
538        if has_anti {
539            header.write_u8(K_ANTI)?;
540
541            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
542            write_bit_set(&mut temp, &bitset)?;
543            write_u64(header, temp.len() as u64)?;
544            header.write_all(temp.as_slice())?;
545        }
546        Ok(())
547    }
548
549    fn write_file_names<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
550        header.write_u8(K_NAME)?;
551        let mut temp: Vec<u8> = Vec::with_capacity(128);
552        let out = &mut temp;
553        out.write_u8(0)?;
554        for file in self.files.iter() {
555            for c in file.name().encode_utf16() {
556                let buf = c.to_le_bytes();
557                out.write_all(&buf)?;
558            }
559            out.write_all(&[0u8; 2])?;
560        }
561        write_u64(header, temp.len() as u64)?;
562        header.write_all(temp.as_slice())?;
563        Ok(())
564    }
565
566    write_times!(
567        write_file_ctimes,
568        K_C_TIME,
569        has_creation_date,
570        creation_date
571    );
572    write_times!(write_file_atimes, K_A_TIME, has_access_date, access_date);
573    write_times!(
574        write_file_mtimes,
575        K_M_TIME,
576        has_last_modified_date,
577        last_modified_date
578    );
579    write_times!(
580        write_file_windows_attrs,
581        K_WIN_ATTRIBUTES,
582        has_windows_attributes,
583        windows_attributes,
584        write_u32
585    );
586}
587
588pub(crate) fn write_u64<W: Write>(header: &mut W, mut value: u64) -> std::io::Result<()> {
589    let mut first = 0;
590    let mut mask = 0x80;
591    let mut i = 0;
592    while i < 8 {
593        if value < (1u64 << (7 * (i + 1))) {
594            first |= value >> (8 * i);
595            break;
596        }
597        first |= mask;
598        mask >>= 1;
599        i += 1;
600    }
601    header.write_u8((first & 0xff) as u8)?;
602    while i > 0 {
603        header.write_u8((value & 0xff) as u8)?;
604        value >>= 8;
605        i -= 1;
606    }
607    Ok(())
608}
609
610fn write_bit_set<W: Write>(mut write: W, bs: &BitSet) -> std::io::Result<()> {
611    let mut cache = 0;
612    let mut shift = 7;
613    for i in 0..bs.get_ref().len() {
614        let set = if bs.contains(i) { 1 } else { 0 };
615        cache |= set << shift;
616        shift -= 1;
617        if shift < 0 {
618            write.write_u8(cache)?;
619            shift = 7;
620            cache = 0;
621        }
622    }
623    if shift != 7 {
624        write.write_u8(cache)?;
625    }
626    Ok(())
627}
628
629struct CompressWrapWriter<'a, W> {
630    writer: W,
631    crc: Hasher,
632    cache: Vec<u8>,
633    bytes_written: &'a mut usize,
634}
635
636impl<'a, W: Write> CompressWrapWriter<'a, W> {
637    pub fn new(writer: W, bytes_written: &'a mut usize) -> Self {
638        Self {
639            writer,
640            crc: Hasher::new(),
641            cache: Vec::with_capacity(8192),
642            bytes_written,
643        }
644    }
645
646    pub fn crc_value(&mut self) -> u32 {
647        let crc = std::mem::replace(&mut self.crc, Hasher::new());
648        crc.finalize()
649    }
650}
651
652impl<W: Write> Write for CompressWrapWriter<'_, W> {
653    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
654        self.cache.resize(buf.len(), Default::default());
655        let len = self.writer.write(buf)?;
656        self.crc.update(&buf[..len]);
657        *self.bytes_written += len;
658        Ok(len)
659    }
660
661    fn flush(&mut self) -> std::io::Result<()> {
662        self.writer.flush()
663    }
664}