sevenz_rust2/
writer.rs

1mod pack_info;
2mod seq_reader;
3mod unpack_info;
4
5pub use self::seq_reader::*;
6use self::{pack_info::PackInfo, unpack_info::UnpackInfo};
7use crate::{Error, SevenZArchiveEntry, archive::*, encoders, lzma::*};
8use bit_set::BitSet;
9use byteorder::*;
10use crc32fast::Hasher;
11use std::{
12    cell::Cell,
13    fs::File,
14    io::{Read, Seek, Write},
15    path::Path,
16    rc::Rc,
17    sync::Arc,
18};
19
20macro_rules! write_times {
21    //write_i64
22    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt) => {
23        write_times!($fn_name, $nid, $has_time, $time, write_u64);
24    };
25    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt, $write_fn:tt) => {
26        fn $fn_name<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
27            let mut num = 0;
28            for entry in self.files.iter() {
29                if entry.$has_time {
30                    num += 1;
31                }
32            }
33            if num > 0 {
34                header.write_u8($nid)?;
35                let mut temp: Vec<u8> = Vec::with_capacity(128);
36                let mut out = &mut temp;
37                if num != self.files.len() {
38                    out.write_u8(0)?;
39                    let mut times = BitSet::with_capacity(self.files.len());
40                    for i in 0..self.files.len() {
41                        if self.files[i].$has_time {
42                            times.insert(i);
43                        }
44                    }
45                    write_bit_set(&mut out, &times)?;
46                } else {
47                    out.write_u8(1)?;
48                }
49                out.write_u8(0)?;
50                for file in self.files.iter() {
51                    if file.$has_time {
52                        out.$write_fn::<LittleEndian>((file.$time).into())?;
53                    }
54                }
55                out.flush()?;
56                write_u64(header, temp.len() as u64)?;
57                header.write_all(&temp)?;
58            }
59            Ok(())
60        }
61    };
62}
63
64type Result<T> = std::result::Result<T, Error>;
65
66/// Writes a 7z file.
67#[cfg_attr(docsrs, doc(cfg(feature = "compress")))]
68pub struct SevenZWriter<W: Write> {
69    output: W,
70    files: Vec<SevenZArchiveEntry>,
71    content_methods: Arc<Vec<SevenZMethodConfiguration>>,
72    pack_info: PackInfo,
73    unpack_info: UnpackInfo,
74    encrypt_header: bool,
75}
76
77#[cfg(not(target_arch = "wasm32"))]
78impl SevenZWriter<File> {
79    /// Creates a file to write a 7z archive to.
80    pub fn create(path: impl AsRef<Path>) -> Result<Self> {
81        let file = File::create(path.as_ref())
82            .map_err(|e| Error::file_open(e, path.as_ref().to_string_lossy().to_string()))?;
83        Self::new(file)
84    }
85}
86
87impl<W: Write + Seek> SevenZWriter<W> {
88    /// Prepares writer to write a 7z archive to.
89    pub fn new(mut writer: W) -> Result<Self> {
90        writer
91            .seek(std::io::SeekFrom::Start(SIGNATURE_HEADER_SIZE))
92            .map_err(Error::io)?;
93
94        Ok(Self {
95            output: writer,
96            files: Default::default(),
97            content_methods: Arc::new(vec![SevenZMethodConfiguration::new(SevenZMethod::LZMA2)]),
98            pack_info: Default::default(),
99            unpack_info: Default::default(),
100            encrypt_header: true,
101        })
102    }
103
104    /// Sets the default compression methods to use for entry data. Default is LZMA2.
105    pub fn set_content_methods(
106        &mut self,
107        content_methods: Vec<SevenZMethodConfiguration>,
108    ) -> &mut Self {
109        if content_methods.is_empty() {
110            return self;
111        }
112        self.content_methods = Arc::new(content_methods);
113        self
114    }
115
116    /// Whether to enable the encryption of the -header. Default is `true`.
117    pub fn set_encrypt_header(&mut self, enabled: bool) {
118        self.encrypt_header = enabled;
119    }
120
121    /// Adds an archive `entry` with data from `reader`.
122    ///
123    /// # Example
124    /// ```no_run
125    /// use sevenz_rust2::*;
126    /// use std::fs::File;
127    /// use std::path::Path;
128    /// let mut sz = SevenZWriter::create("path/to/dest.7z").expect("create writer ok");
129    /// let src = Path::new("path/to/source.txt");
130    /// let name = "source.txt".to_string();
131    /// let entry = sz.push_archive_entry(
132    ///               SevenZArchiveEntry::from_path(&src, name),
133    ///               Some(File::open(src).unwrap()),
134    ///           )
135    ///           .expect("ok");
136    /// let compressed_size = entry.compressed_size;
137    /// sz.finish().expect("done");
138    /// ```
139    pub fn push_archive_entry<R: Read>(
140        &mut self,
141        mut entry: SevenZArchiveEntry,
142        reader: Option<R>,
143    ) -> Result<&SevenZArchiveEntry> {
144        if !entry.is_directory {
145            if let Some(mut r) = reader {
146                let mut compressed_len = 0;
147                let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
148
149                let mut more_sizes: Vec<Rc<Cell<usize>>> =
150                    Vec::with_capacity(self.content_methods.len() - 1);
151
152                let (crc, size) = {
153                    let mut w = Self::create_writer(
154                        &self.content_methods,
155                        &mut compressed,
156                        &mut more_sizes,
157                    )?;
158                    let mut write_len = 0;
159                    let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
160                    let mut buf = [0u8; 4096];
161                    loop {
162                        match r.read(&mut buf) {
163                            Ok(n) => {
164                                if n == 0 {
165                                    break;
166                                }
167                                w.write_all(&buf[..n]).map_err(|e| {
168                                    Error::io_msg(e, format!("Encode entry:{}", entry.name()))
169                                })?;
170                            }
171                            Err(e) => {
172                                return Err(Error::io_msg(
173                                    e,
174                                    format!("Encode entry:{}", entry.name()),
175                                ));
176                            }
177                        }
178                    }
179                    w.flush()
180                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
181                    w.write(&[])
182                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
183
184                    (w.crc_value(), write_len)
185                };
186                let compressed_crc = compressed.crc_value();
187                entry.has_stream = true;
188                entry.size = size as u64;
189                entry.crc = crc as u64;
190                entry.has_crc = true;
191                entry.compressed_crc = compressed_crc as u64;
192                entry.compressed_size = compressed_len as u64;
193                self.pack_info
194                    .add_stream(compressed_len as u64, compressed_crc);
195
196                let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
197                sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
198                sizes.push(size as u64);
199
200                self.unpack_info
201                    .add(self.content_methods.clone(), sizes, crc);
202
203                self.files.push(entry);
204                return Ok(self.files.last().unwrap());
205            }
206        }
207        entry.has_stream = false;
208        entry.size = 0;
209        entry.compressed_size = 0;
210        entry.has_crc = false;
211        self.files.push(entry);
212        Ok(self.files.last().unwrap())
213    }
214
215    /// Solid compression - packs `entries` into one pack.
216    ///
217    /// # Panics
218    /// * If `entries`'s length not equals to `reader.reader_len()`
219    pub fn push_archive_entries<R: Read>(
220        &mut self,
221        mut entries: Vec<SevenZArchiveEntry>,
222        reader: SeqReader<SourceReader<R>>,
223    ) -> Result<&mut Self> {
224        let mut r = reader;
225        assert_eq!(r.reader_len(), entries.len());
226        let mut compressed_len = 0;
227        let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
228        let content_methods = &self.content_methods;
229        let mut more_sizes: Vec<Rc<Cell<usize>>> = Vec::with_capacity(content_methods.len() - 1);
230
231        let (crc, size) = {
232            let mut w = Self::create_writer(content_methods, &mut compressed, &mut more_sizes)?;
233            let mut write_len = 0;
234            let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
235            let mut buf = [0u8; 4096];
236            fn entries_names(entries: &[SevenZArchiveEntry]) -> String {
237                let mut names = String::with_capacity(512);
238                for ele in entries.iter() {
239                    names.push_str(&ele.name);
240                    names.push(';');
241                    if names.len() > 512 {
242                        break;
243                    }
244                }
245                names
246            }
247            loop {
248                match r.read(&mut buf) {
249                    Ok(n) => {
250                        if n == 0 {
251                            break;
252                        }
253                        w.write_all(&buf[..n]).map_err(|e| {
254                            Error::io_msg(e, format!("Encode entries:{}", entries_names(&entries)))
255                        })?;
256                    }
257                    Err(e) => {
258                        return Err(Error::io_msg(
259                            e,
260                            format!("Encode entries:{}", entries_names(&entries)),
261                        ));
262                    }
263                }
264            }
265            w.flush().map_err(|e| {
266                let mut names = String::with_capacity(512);
267                for ele in entries.iter() {
268                    names.push_str(&ele.name);
269                    names.push(';');
270                    if names.len() > 512 {
271                        break;
272                    }
273                }
274                Error::io_msg(e, format!("Encode entry:{}", names))
275            })?;
276            w.write(&[]).map_err(|e| {
277                Error::io_msg(e, format!("Encode entry:{}", entries_names(&entries)))
278            })?;
279
280            (w.crc_value(), write_len)
281        };
282        let compressed_crc = compressed.crc_value();
283        let mut sub_stream_crcs = Vec::with_capacity(entries.len());
284        let mut sub_stream_sizes = Vec::with_capacity(entries.len());
285        for i in 0..entries.len() {
286            let entry = &mut entries[i];
287            let ri = &r[i];
288            entry.crc = ri.crc_value() as u64;
289            entry.size = ri.read_count() as u64;
290            sub_stream_crcs.push(entry.crc as u32);
291            sub_stream_sizes.push(entry.size);
292            entry.has_crc = true;
293        }
294
295        self.pack_info
296            .add_stream(compressed_len as u64, compressed_crc);
297
298        let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
299        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
300        sizes.push(size as u64);
301
302        self.unpack_info.add_multiple(
303            content_methods.clone(),
304            sizes,
305            crc,
306            entries.len() as u64,
307            sub_stream_sizes,
308            sub_stream_crcs,
309        );
310
311        self.files.extend(entries);
312        Ok(self)
313    }
314
315    fn create_writer<'a, O: Write + 'a>(
316        methods: &[SevenZMethodConfiguration],
317        out: O,
318        more_sized: &mut Vec<Rc<Cell<usize>>>,
319    ) -> Result<Box<dyn Write + 'a>> {
320        let mut encoder: Box<dyn Write> = Box::new(out);
321        let mut first = true;
322        for mc in methods.iter() {
323            if !first {
324                let counting = CountingWriter::new(encoder);
325                more_sized.push(counting.counting());
326                encoder = Box::new(encoders::add_encoder(counting, mc)?);
327            } else {
328                let counting = CountingWriter::new(encoder);
329                encoder = Box::new(encoders::add_encoder(counting, mc)?);
330            }
331            first = false;
332        }
333        Ok(encoder)
334    }
335
336    /// Finishes the compression.
337    pub fn finish(mut self) -> std::io::Result<W> {
338        let mut header: Vec<u8> = Vec::with_capacity(64 * 1024);
339        self.write_encoded_header(&mut header)?;
340        let header_pos = self.output.stream_position()?;
341        self.output.write_all(&header)?;
342        let crc32 = crc32fast::hash(&header);
343        let mut hh = [0u8; SIGNATURE_HEADER_SIZE as usize];
344        {
345            let mut hhw = hh.as_mut_slice();
346            //sig
347            hhw.write_all(SEVEN_Z_SIGNATURE)?;
348            //version
349            hhw.write_u8(0)?;
350            hhw.write_u8(2)?;
351            //placeholder for crc: index = 8
352            hhw.write_u32::<LittleEndian>(0)?;
353
354            // start header
355            hhw.write_u64::<LittleEndian>(header_pos - SIGNATURE_HEADER_SIZE)?;
356            hhw.write_u64::<LittleEndian>(0xffffffff & header.len() as u64)?;
357            hhw.write_u32::<LittleEndian>(crc32)?;
358        }
359        let crc32 = crc32fast::hash(&hh[12..]);
360        hh[8..12].copy_from_slice(&crc32.to_le_bytes());
361
362        self.output.seek(std::io::SeekFrom::Start(0))?;
363        self.output.write_all(&hh)?;
364        Ok(self.output)
365    }
366
367    fn write_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
368        header.write_u8(K_HEADER)?;
369        header.write_u8(K_MAIN_STREAMS_INFO)?;
370        self.write_streams_info(header)?;
371        self.write_files_info(header)?;
372        header.write_u8(K_END)?;
373        Ok(())
374    }
375
376    fn write_encoded_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
377        let mut raw_header = Vec::with_capacity(64 * 1024);
378        self.write_header(&mut raw_header)?;
379        let mut pack_info = PackInfo::default();
380
381        let position = self.output.stream_position()?;
382        let pos = position - SIGNATURE_HEADER_SIZE;
383        pack_info.pos = pos;
384
385        let mut more_sizes = vec![];
386        let size = raw_header.len() as u64;
387        let crc32 = crc32fast::hash(&raw_header);
388        let mut methods = vec![];
389
390        if self.encrypt_header {
391            for conf in self.content_methods.iter() {
392                if conf.method.id() == SevenZMethod::AES256SHA256.id() {
393                    methods.push(conf.clone());
394                    break;
395                }
396            }
397        }
398
399        methods.push(SevenZMethodConfiguration::new(SevenZMethod::LZMA));
400
401        let methods = Arc::new(methods);
402
403        let mut encoded_data = Vec::with_capacity(size as usize / 2);
404
405        let mut compress_size = 0;
406        let mut compressed = CompressWrapWriter::new(&mut encoded_data, &mut compress_size);
407        {
408            let mut encoder = Self::create_writer(&methods, &mut compressed, &mut more_sizes)
409                .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
410            encoder.write_all(&raw_header)?;
411            let _ = encoder.write(&[])?;
412        }
413
414        let compress_crc = compressed.crc_value();
415        let compress_size = *compressed.bytes_written;
416        if compress_size as u64 + 20 >= size {
417            // compression made it worse. Write raw data
418            header.write_all(&raw_header)?;
419            return Ok(());
420        }
421        self.output.write_all(&encoded_data[..compress_size])?;
422
423        pack_info.add_stream(compress_size as u64, compress_crc);
424
425        let mut unpack_info = UnpackInfo::default();
426        let mut sizes = Vec::with_capacity(1 + more_sizes.len());
427        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
428        sizes.push(size);
429        unpack_info.add(methods, sizes, crc32);
430
431        header.write_u8(K_ENCODED_HEADER)?;
432
433        pack_info.write_to(header)?;
434        unpack_info.write_to(header)?;
435        unpack_info.write_substreams(header)?;
436
437        header.write_u8(K_END)?;
438
439        Ok(())
440    }
441
442    fn write_streams_info<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
443        if self.pack_info.len() > 0 {
444            self.pack_info.write_to(header)?;
445            self.unpack_info.write_to(header)?;
446        }
447        self.unpack_info.write_substreams(header)?;
448
449        header.write_u8(K_END)?;
450        Ok(())
451    }
452
453    fn write_files_info<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
454        header.write_u8(K_FILES_INFO)?;
455        write_u64(header, self.files.len() as u64)?;
456        self.write_file_empty_streams(header)?;
457        self.write_file_empty_files(header)?;
458        self.write_file_anti_items(header)?;
459        self.write_file_names(header)?;
460        self.write_file_ctimes(header)?;
461        self.write_file_atimes(header)?;
462        self.write_file_mtimes(header)?;
463        self.write_file_windows_attrs(header)?;
464        header.write_u8(K_END)?;
465        Ok(())
466    }
467
468    fn write_file_empty_streams<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
469        let mut has_empty = false;
470        for entry in self.files.iter() {
471            if !entry.has_stream {
472                has_empty = true;
473                break;
474            }
475        }
476        if has_empty {
477            header.write_u8(K_EMPTY_STREAM)?;
478            let mut bitset = BitSet::with_capacity(self.files.len());
479            for (i, entry) in self.files.iter().enumerate() {
480                if !entry.has_stream {
481                    bitset.insert(i);
482                }
483            }
484            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
485            write_bit_set(&mut temp, &bitset)?;
486            write_u64(header, temp.len() as u64)?;
487            header.write_all(temp.as_slice())?;
488        }
489        Ok(())
490    }
491
492    fn write_file_empty_files<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
493        let mut has_empty = false;
494        let mut empty_stream_counter = 0;
495        let mut bitset = BitSet::new();
496        for entry in self.files.iter() {
497            if !entry.has_stream {
498                let is_dir = entry.is_directory();
499                has_empty |= !is_dir;
500                if !is_dir {
501                    bitset.insert(empty_stream_counter);
502                }
503                empty_stream_counter += 1;
504            }
505        }
506        if has_empty {
507            header.write_u8(K_EMPTY_FILE)?;
508
509            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
510            write_bit_set(&mut temp, &bitset)?;
511            write_u64(header, temp.len() as u64)?;
512            header.write_all(&temp)?;
513        }
514        Ok(())
515    }
516
517    fn write_file_anti_items<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
518        let mut has_anti = false;
519        let mut counter = 0;
520        let mut bitset = BitSet::new();
521        for entry in self.files.iter() {
522            if !entry.has_stream {
523                let is_anti = entry.is_anti_item();
524                has_anti |= !is_anti;
525                if !is_anti {
526                    bitset.insert(counter);
527                }
528                counter += 1;
529            }
530        }
531        if has_anti {
532            header.write_u8(K_ANTI)?;
533
534            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
535            write_bit_set(&mut temp, &bitset)?;
536            write_u64(header, temp.len() as u64)?;
537            header.write_all(temp.as_slice())?;
538        }
539        Ok(())
540    }
541
542    fn write_file_names<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
543        header.write_u8(K_NAME)?;
544        let mut temp: Vec<u8> = Vec::with_capacity(128);
545        let out = &mut temp;
546        out.write_u8(0)?;
547        for file in self.files.iter() {
548            for c in file.name().encode_utf16() {
549                let buf = c.to_le_bytes();
550                out.write_all(&buf)?;
551            }
552            out.write_all(&[0u8; 2])?;
553        }
554        write_u64(header, temp.len() as u64)?;
555        header.write_all(temp.as_slice())?;
556        Ok(())
557    }
558
559    write_times!(
560        write_file_ctimes,
561        K_C_TIME,
562        has_creation_date,
563        creation_date
564    );
565    write_times!(write_file_atimes, K_A_TIME, has_access_date, access_date);
566    write_times!(
567        write_file_mtimes,
568        K_M_TIME,
569        has_last_modified_date,
570        last_modified_date
571    );
572    write_times!(
573        write_file_windows_attrs,
574        K_WIN_ATTRIBUTES,
575        has_windows_attributes,
576        windows_attributes,
577        write_u32
578    );
579}
580
581pub(crate) fn write_u64<W: Write>(header: &mut W, mut value: u64) -> std::io::Result<()> {
582    let mut first = 0;
583    let mut mask = 0x80;
584    let mut i = 0;
585    while i < 8 {
586        if value < (1u64 << (7 * (i + 1))) {
587            first |= value >> (8 * i);
588            break;
589        }
590        first |= mask;
591        mask >>= 1;
592        i += 1;
593    }
594    header.write_u8((first & 0xff) as u8)?;
595    while i > 0 {
596        header.write_u8((value & 0xff) as u8)?;
597        value >>= 8;
598        i -= 1;
599    }
600    Ok(())
601}
602
603fn write_bit_set<W: Write>(mut write: W, bs: &BitSet) -> std::io::Result<()> {
604    let mut cache = 0;
605    let mut shift = 7;
606    for i in 0..bs.get_ref().len() {
607        let set = if bs.contains(i) { 1 } else { 0 };
608        cache |= set << shift;
609        shift -= 1;
610        if shift < 0 {
611            write.write_u8(cache)?;
612            shift = 7;
613            cache = 0;
614        }
615    }
616    if shift != 7 {
617        write.write_u8(cache)?;
618    }
619    Ok(())
620}
621
622struct CompressWrapWriter<'a, W> {
623    writer: W,
624    crc: Hasher,
625    cache: Vec<u8>,
626    bytes_written: &'a mut usize,
627}
628
629impl<'a, W: Write> CompressWrapWriter<'a, W> {
630    pub fn new(writer: W, bytes_written: &'a mut usize) -> Self {
631        Self {
632            writer,
633            crc: Hasher::new(),
634            cache: Vec::with_capacity(8192),
635            bytes_written,
636        }
637    }
638
639    pub fn crc_value(&mut self) -> u32 {
640        let crc = std::mem::replace(&mut self.crc, Hasher::new());
641        crc.finalize()
642    }
643}
644
645impl<W: Write> Write for CompressWrapWriter<'_, W> {
646    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
647        self.cache.resize(buf.len(), Default::default());
648        let len = self.writer.write(buf)?;
649        self.crc.update(&buf[..len]);
650        *self.bytes_written += len;
651        Ok(len)
652    }
653
654    fn flush(&mut self) -> std::io::Result<()> {
655        self.writer.flush()
656    }
657}