sevenz_rust2/
writer.rs

1mod counting;
2mod pack_info;
3mod seq_reader;
4mod unpack_info;
5
6pub use self::counting::CountingWriter;
7pub use self::seq_reader::*;
8use self::{pack_info::PackInfo, unpack_info::UnpackInfo};
9use crate::{Error, SevenZArchiveEntry, archive::*, encoders};
10use bit_set::BitSet;
11use byteorder::*;
12use crc32fast::Hasher;
13use std::{
14    cell::Cell,
15    fs::File,
16    io::{Read, Seek, Write},
17    path::Path,
18    rc::Rc,
19    sync::Arc,
20};
21
22macro_rules! write_times {
23    //write_i64
24    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt) => {
25        write_times!($fn_name, $nid, $has_time, $time, write_u64);
26    };
27    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt, $write_fn:tt) => {
28        fn $fn_name<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
29            let mut num = 0;
30            for entry in self.files.iter() {
31                if entry.$has_time {
32                    num += 1;
33                }
34            }
35            if num > 0 {
36                header.write_u8($nid)?;
37                let mut temp: Vec<u8> = Vec::with_capacity(128);
38                let mut out = &mut temp;
39                if num != self.files.len() {
40                    out.write_u8(0)?;
41                    let mut times = BitSet::with_capacity(self.files.len());
42                    for i in 0..self.files.len() {
43                        if self.files[i].$has_time {
44                            times.insert(i);
45                        }
46                    }
47                    write_bit_set(&mut out, &times)?;
48                } else {
49                    out.write_u8(1)?;
50                }
51                out.write_u8(0)?;
52                for file in self.files.iter() {
53                    if file.$has_time {
54                        out.$write_fn::<LittleEndian>((file.$time).into())?;
55                    }
56                }
57                out.flush()?;
58                write_u64(header, temp.len() as u64)?;
59                header.write_all(&temp)?;
60            }
61            Ok(())
62        }
63    };
64}
65
66type Result<T> = std::result::Result<T, Error>;
67
68/// Writes a 7z file.
69#[cfg_attr(docsrs, doc(cfg(feature = "compress")))]
70pub struct SevenZWriter<W: Write> {
71    output: W,
72    files: Vec<SevenZArchiveEntry>,
73    content_methods: Arc<Vec<SevenZMethodConfiguration>>,
74    pack_info: PackInfo,
75    unpack_info: UnpackInfo,
76    encrypt_header: bool,
77}
78
79#[cfg(not(target_arch = "wasm32"))]
80impl SevenZWriter<File> {
81    /// Creates a file to write a 7z archive to.
82    pub fn create(path: impl AsRef<Path>) -> Result<Self> {
83        let file = File::create(path.as_ref())
84            .map_err(|e| Error::file_open(e, path.as_ref().to_string_lossy().to_string()))?;
85        Self::new(file)
86    }
87}
88
89impl<W: Write + Seek> SevenZWriter<W> {
90    /// Prepares writer to write a 7z archive to.
91    pub fn new(mut writer: W) -> Result<Self> {
92        writer
93            .seek(std::io::SeekFrom::Start(SIGNATURE_HEADER_SIZE))
94            .map_err(Error::io)?;
95
96        Ok(Self {
97            output: writer,
98            files: Default::default(),
99            content_methods: Arc::new(vec![SevenZMethodConfiguration::new(SevenZMethod::LZMA2)]),
100            pack_info: Default::default(),
101            unpack_info: Default::default(),
102            encrypt_header: true,
103        })
104    }
105
106    /// Sets the default compression methods to use for entry data. Default is LZMA2.
107    pub fn set_content_methods(
108        &mut self,
109        content_methods: Vec<SevenZMethodConfiguration>,
110    ) -> &mut Self {
111        if content_methods.is_empty() {
112            return self;
113        }
114        self.content_methods = Arc::new(content_methods);
115        self
116    }
117
118    /// Whether to enable the encryption of the -header. Default is `true`.
119    pub fn set_encrypt_header(&mut self, enabled: bool) {
120        self.encrypt_header = enabled;
121    }
122
123    /// Adds an archive `entry` with data from `reader`.
124    ///
125    /// # Example
126    /// ```no_run
127    /// use sevenz_rust2::*;
128    /// use std::fs::File;
129    /// use std::path::Path;
130    /// let mut sz = SevenZWriter::create("path/to/dest.7z").expect("create writer ok");
131    /// let src = Path::new("path/to/source.txt");
132    /// let name = "source.txt".to_string();
133    /// let entry = sz.push_archive_entry(
134    ///               SevenZArchiveEntry::from_path(&src, name),
135    ///               Some(File::open(src).unwrap()),
136    ///           )
137    ///           .expect("ok");
138    /// let compressed_size = entry.compressed_size;
139    /// sz.finish().expect("done");
140    /// ```
141    pub fn push_archive_entry<R: Read>(
142        &mut self,
143        mut entry: SevenZArchiveEntry,
144        reader: Option<R>,
145    ) -> Result<&SevenZArchiveEntry> {
146        if !entry.is_directory {
147            if let Some(mut r) = reader {
148                let mut compressed_len = 0;
149                let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
150
151                let mut more_sizes: Vec<Rc<Cell<usize>>> =
152                    Vec::with_capacity(self.content_methods.len() - 1);
153
154                let (crc, size) = {
155                    let mut w = Self::create_writer(
156                        &self.content_methods,
157                        &mut compressed,
158                        &mut more_sizes,
159                    )?;
160                    let mut write_len = 0;
161                    let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
162                    let mut buf = [0u8; 4096];
163                    loop {
164                        match r.read(&mut buf) {
165                            Ok(n) => {
166                                if n == 0 {
167                                    break;
168                                }
169                                w.write_all(&buf[..n]).map_err(|e| {
170                                    Error::io_msg(e, format!("Encode entry:{}", entry.name()))
171                                })?;
172                            }
173                            Err(e) => {
174                                return Err(Error::io_msg(
175                                    e,
176                                    format!("Encode entry:{}", entry.name()),
177                                ));
178                            }
179                        }
180                    }
181                    w.flush()
182                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
183                    w.write(&[])
184                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
185
186                    (w.crc_value(), write_len)
187                };
188                let compressed_crc = compressed.crc_value();
189                entry.has_stream = true;
190                entry.size = size as u64;
191                entry.crc = crc as u64;
192                entry.has_crc = true;
193                entry.compressed_crc = compressed_crc as u64;
194                entry.compressed_size = compressed_len as u64;
195                self.pack_info
196                    .add_stream(compressed_len as u64, compressed_crc);
197
198                let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
199                sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
200                sizes.push(size as u64);
201
202                self.unpack_info
203                    .add(self.content_methods.clone(), sizes, crc);
204
205                self.files.push(entry);
206                return Ok(self.files.last().unwrap());
207            }
208        }
209        entry.has_stream = false;
210        entry.size = 0;
211        entry.compressed_size = 0;
212        entry.has_crc = false;
213        self.files.push(entry);
214        Ok(self.files.last().unwrap())
215    }
216
217    /// Solid compression - packs `entries` into one pack.
218    ///
219    /// # Panics
220    /// * If `entries`'s length not equals to `reader.reader_len()`
221    pub fn push_archive_entries<R: Read>(
222        &mut self,
223        mut entries: Vec<SevenZArchiveEntry>,
224        reader: SeqReader<SourceReader<R>>,
225    ) -> Result<&mut Self> {
226        let mut r = reader;
227        assert_eq!(r.reader_len(), entries.len());
228        let mut compressed_len = 0;
229        let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
230        let content_methods = &self.content_methods;
231        let mut more_sizes: Vec<Rc<Cell<usize>>> = Vec::with_capacity(content_methods.len() - 1);
232
233        let (crc, size) = {
234            let mut w = Self::create_writer(content_methods, &mut compressed, &mut more_sizes)?;
235            let mut write_len = 0;
236            let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
237            let mut buf = [0u8; 4096];
238
239            fn entries_names(entries: &[SevenZArchiveEntry]) -> String {
240                let mut names = String::with_capacity(512);
241                for ele in entries.iter() {
242                    names.push_str(&ele.name);
243                    names.push(';');
244                    if names.len() > 512 {
245                        break;
246                    }
247                }
248                names
249            }
250
251            loop {
252                match r.read(&mut buf) {
253                    Ok(n) => {
254                        if n == 0 {
255                            break;
256                        }
257                        w.write_all(&buf[..n]).map_err(|e| {
258                            Error::io_msg(e, format!("Encode entries:{}", entries_names(&entries)))
259                        })?;
260                    }
261                    Err(e) => {
262                        return Err(Error::io_msg(
263                            e,
264                            format!("Encode entries:{}", entries_names(&entries)),
265                        ));
266                    }
267                }
268            }
269            w.flush().map_err(|e| {
270                let mut names = String::with_capacity(512);
271                for ele in entries.iter() {
272                    names.push_str(&ele.name);
273                    names.push(';');
274                    if names.len() > 512 {
275                        break;
276                    }
277                }
278                Error::io_msg(e, format!("Encode entry:{}", names))
279            })?;
280            w.write(&[]).map_err(|e| {
281                Error::io_msg(e, format!("Encode entry:{}", entries_names(&entries)))
282            })?;
283
284            (w.crc_value(), write_len)
285        };
286        let compressed_crc = compressed.crc_value();
287        let mut sub_stream_crcs = Vec::with_capacity(entries.len());
288        let mut sub_stream_sizes = Vec::with_capacity(entries.len());
289        for i in 0..entries.len() {
290            let entry = &mut entries[i];
291            let ri = &r[i];
292            entry.crc = ri.crc_value() as u64;
293            entry.size = ri.read_count() as u64;
294            sub_stream_crcs.push(entry.crc as u32);
295            sub_stream_sizes.push(entry.size);
296            entry.has_crc = true;
297        }
298
299        self.pack_info
300            .add_stream(compressed_len as u64, compressed_crc);
301
302        let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
303        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
304        sizes.push(size as u64);
305
306        self.unpack_info.add_multiple(
307            content_methods.clone(),
308            sizes,
309            crc,
310            entries.len() as u64,
311            sub_stream_sizes,
312            sub_stream_crcs,
313        );
314
315        self.files.extend(entries);
316        Ok(self)
317    }
318
319    fn create_writer<'a, O: Write + 'a>(
320        methods: &[SevenZMethodConfiguration],
321        out: O,
322        more_sized: &mut Vec<Rc<Cell<usize>>>,
323    ) -> Result<Box<dyn Write + 'a>> {
324        let mut encoder: Box<dyn Write> = Box::new(out);
325        let mut first = true;
326        for mc in methods.iter() {
327            if !first {
328                let counting = CountingWriter::new(encoder);
329                more_sized.push(counting.counting());
330                encoder = Box::new(encoders::add_encoder(counting, mc)?);
331            } else {
332                let counting = CountingWriter::new(encoder);
333                encoder = Box::new(encoders::add_encoder(counting, mc)?);
334            }
335            first = false;
336        }
337        Ok(encoder)
338    }
339
340    /// Finishes the compression.
341    pub fn finish(mut self) -> std::io::Result<W> {
342        let mut header: Vec<u8> = Vec::with_capacity(64 * 1024);
343        self.write_encoded_header(&mut header)?;
344        let header_pos = self.output.stream_position()?;
345        self.output.write_all(&header)?;
346        let crc32 = crc32fast::hash(&header);
347        let mut hh = [0u8; SIGNATURE_HEADER_SIZE as usize];
348        {
349            let mut hhw = hh.as_mut_slice();
350            //sig
351            hhw.write_all(SEVEN_Z_SIGNATURE)?;
352            //version
353            hhw.write_u8(0)?;
354            hhw.write_u8(2)?;
355            //placeholder for crc: index = 8
356            hhw.write_u32::<LittleEndian>(0)?;
357
358            // start header
359            hhw.write_u64::<LittleEndian>(header_pos - SIGNATURE_HEADER_SIZE)?;
360            hhw.write_u64::<LittleEndian>(0xffffffff & header.len() as u64)?;
361            hhw.write_u32::<LittleEndian>(crc32)?;
362        }
363        let crc32 = crc32fast::hash(&hh[12..]);
364        hh[8..12].copy_from_slice(&crc32.to_le_bytes());
365
366        self.output.seek(std::io::SeekFrom::Start(0))?;
367        self.output.write_all(&hh)?;
368        self.output.flush()?;
369        Ok(self.output)
370    }
371
372    fn write_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
373        header.write_u8(K_HEADER)?;
374        header.write_u8(K_MAIN_STREAMS_INFO)?;
375        self.write_streams_info(header)?;
376        self.write_files_info(header)?;
377        header.write_u8(K_END)?;
378        Ok(())
379    }
380
381    fn write_encoded_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
382        let mut raw_header = Vec::with_capacity(64 * 1024);
383        self.write_header(&mut raw_header)?;
384        let mut pack_info = PackInfo::default();
385
386        let position = self.output.stream_position()?;
387        let pos = position - SIGNATURE_HEADER_SIZE;
388        pack_info.pos = pos;
389
390        let mut more_sizes = vec![];
391        let size = raw_header.len() as u64;
392        let crc32 = crc32fast::hash(&raw_header);
393        let mut methods = vec![];
394
395        if self.encrypt_header {
396            for conf in self.content_methods.iter() {
397                if conf.method.id() == SevenZMethod::AES256SHA256.id() {
398                    methods.push(conf.clone());
399                    break;
400                }
401            }
402        }
403
404        methods.push(SevenZMethodConfiguration::new(SevenZMethod::LZMA));
405
406        let methods = Arc::new(methods);
407
408        let mut encoded_data = Vec::with_capacity(size as usize / 2);
409
410        let mut compress_size = 0;
411        let mut compressed = CompressWrapWriter::new(&mut encoded_data, &mut compress_size);
412        {
413            let mut encoder = Self::create_writer(&methods, &mut compressed, &mut more_sizes)
414                .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
415            encoder.write_all(&raw_header)?;
416            encoder.flush()?;
417            let _ = encoder.write(&[])?;
418        }
419
420        let compress_crc = compressed.crc_value();
421        let compress_size = *compressed.bytes_written;
422        if compress_size as u64 + 20 >= size {
423            // compression made it worse. Write raw data
424            header.write_all(&raw_header)?;
425            return Ok(());
426        }
427        self.output.write_all(&encoded_data[..compress_size])?;
428
429        pack_info.add_stream(compress_size as u64, compress_crc);
430
431        let mut unpack_info = UnpackInfo::default();
432        let mut sizes = Vec::with_capacity(1 + more_sizes.len());
433        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
434        sizes.push(size);
435        unpack_info.add(methods, sizes, crc32);
436
437        header.write_u8(K_ENCODED_HEADER)?;
438
439        pack_info.write_to(header)?;
440        unpack_info.write_to(header)?;
441        unpack_info.write_substreams(header)?;
442
443        header.write_u8(K_END)?;
444
445        Ok(())
446    }
447
448    fn write_streams_info<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
449        if self.pack_info.len() > 0 {
450            self.pack_info.write_to(header)?;
451            self.unpack_info.write_to(header)?;
452        }
453        self.unpack_info.write_substreams(header)?;
454
455        header.write_u8(K_END)?;
456        Ok(())
457    }
458
459    fn write_files_info<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
460        header.write_u8(K_FILES_INFO)?;
461        write_u64(header, self.files.len() as u64)?;
462        self.write_file_empty_streams(header)?;
463        self.write_file_empty_files(header)?;
464        self.write_file_anti_items(header)?;
465        self.write_file_names(header)?;
466        self.write_file_ctimes(header)?;
467        self.write_file_atimes(header)?;
468        self.write_file_mtimes(header)?;
469        self.write_file_windows_attrs(header)?;
470        header.write_u8(K_END)?;
471        Ok(())
472    }
473
474    fn write_file_empty_streams<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
475        let mut has_empty = false;
476        for entry in self.files.iter() {
477            if !entry.has_stream {
478                has_empty = true;
479                break;
480            }
481        }
482        if has_empty {
483            header.write_u8(K_EMPTY_STREAM)?;
484            let mut bitset = BitSet::with_capacity(self.files.len());
485            for (i, entry) in self.files.iter().enumerate() {
486                if !entry.has_stream {
487                    bitset.insert(i);
488                }
489            }
490            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
491            write_bit_set(&mut temp, &bitset)?;
492            write_u64(header, temp.len() as u64)?;
493            header.write_all(temp.as_slice())?;
494        }
495        Ok(())
496    }
497
498    fn write_file_empty_files<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
499        let mut has_empty = false;
500        let mut empty_stream_counter = 0;
501        let mut bitset = BitSet::new();
502        for entry in self.files.iter() {
503            if !entry.has_stream {
504                let is_dir = entry.is_directory();
505                has_empty |= !is_dir;
506                if !is_dir {
507                    bitset.insert(empty_stream_counter);
508                }
509                empty_stream_counter += 1;
510            }
511        }
512        if has_empty {
513            header.write_u8(K_EMPTY_FILE)?;
514
515            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
516            write_bit_set(&mut temp, &bitset)?;
517            write_u64(header, temp.len() as u64)?;
518            header.write_all(&temp)?;
519        }
520        Ok(())
521    }
522
523    fn write_file_anti_items<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
524        let mut has_anti = false;
525        let mut counter = 0;
526        let mut bitset = BitSet::new();
527        for entry in self.files.iter() {
528            if !entry.has_stream {
529                let is_anti = entry.is_anti_item();
530                has_anti |= !is_anti;
531                if !is_anti {
532                    bitset.insert(counter);
533                }
534                counter += 1;
535            }
536        }
537        if has_anti {
538            header.write_u8(K_ANTI)?;
539
540            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
541            write_bit_set(&mut temp, &bitset)?;
542            write_u64(header, temp.len() as u64)?;
543            header.write_all(temp.as_slice())?;
544        }
545        Ok(())
546    }
547
548    fn write_file_names<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
549        header.write_u8(K_NAME)?;
550        let mut temp: Vec<u8> = Vec::with_capacity(128);
551        let out = &mut temp;
552        out.write_u8(0)?;
553        for file in self.files.iter() {
554            for c in file.name().encode_utf16() {
555                let buf = c.to_le_bytes();
556                out.write_all(&buf)?;
557            }
558            out.write_all(&[0u8; 2])?;
559        }
560        write_u64(header, temp.len() as u64)?;
561        header.write_all(temp.as_slice())?;
562        Ok(())
563    }
564
565    write_times!(
566        write_file_ctimes,
567        K_C_TIME,
568        has_creation_date,
569        creation_date
570    );
571    write_times!(write_file_atimes, K_A_TIME, has_access_date, access_date);
572    write_times!(
573        write_file_mtimes,
574        K_M_TIME,
575        has_last_modified_date,
576        last_modified_date
577    );
578    write_times!(
579        write_file_windows_attrs,
580        K_WIN_ATTRIBUTES,
581        has_windows_attributes,
582        windows_attributes,
583        write_u32
584    );
585}
586
587pub(crate) fn write_u64<W: Write>(header: &mut W, mut value: u64) -> std::io::Result<()> {
588    let mut first = 0;
589    let mut mask = 0x80;
590    let mut i = 0;
591    while i < 8 {
592        if value < (1u64 << (7 * (i + 1))) {
593            first |= value >> (8 * i);
594            break;
595        }
596        first |= mask;
597        mask >>= 1;
598        i += 1;
599    }
600    header.write_u8((first & 0xff) as u8)?;
601    while i > 0 {
602        header.write_u8((value & 0xff) as u8)?;
603        value >>= 8;
604        i -= 1;
605    }
606    Ok(())
607}
608
609fn write_bit_set<W: Write>(mut write: W, bs: &BitSet) -> std::io::Result<()> {
610    let mut cache = 0;
611    let mut shift = 7;
612    for i in 0..bs.get_ref().len() {
613        let set = if bs.contains(i) { 1 } else { 0 };
614        cache |= set << shift;
615        shift -= 1;
616        if shift < 0 {
617            write.write_u8(cache)?;
618            shift = 7;
619            cache = 0;
620        }
621    }
622    if shift != 7 {
623        write.write_u8(cache)?;
624    }
625    Ok(())
626}
627
628struct CompressWrapWriter<'a, W> {
629    writer: W,
630    crc: Hasher,
631    cache: Vec<u8>,
632    bytes_written: &'a mut usize,
633}
634
635impl<'a, W: Write> CompressWrapWriter<'a, W> {
636    pub fn new(writer: W, bytes_written: &'a mut usize) -> Self {
637        Self {
638            writer,
639            crc: Hasher::new(),
640            cache: Vec::with_capacity(8192),
641            bytes_written,
642        }
643    }
644
645    pub fn crc_value(&mut self) -> u32 {
646        let crc = std::mem::replace(&mut self.crc, Hasher::new());
647        crc.finalize()
648    }
649}
650
651impl<W: Write> Write for CompressWrapWriter<'_, W> {
652    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
653        self.cache.resize(buf.len(), Default::default());
654        let len = self.writer.write(buf)?;
655        self.crc.update(&buf[..len]);
656        *self.bytes_written += len;
657        Ok(len)
658    }
659
660    fn flush(&mut self) -> std::io::Result<()> {
661        self.writer.flush()
662    }
663}