sevenz_rust/
writer.rs

1mod pack_info;
2mod seq_reader;
3mod unpack_info;
4
5use crate::{archive::*, encoders, lzma::*, reader::CRC32, Error, SevenZArchiveEntry};
6use bit_set::BitSet;
7use byteorder::*;
8use std::{
9    cell::Cell,
10    fs::File,
11    io::{Read, Seek, Write},
12    path::Path,
13    rc::Rc,
14    sync::Arc,
15};
16
17pub use self::seq_reader::*;
18use self::{pack_info::PackInfo, unpack_info::UnpackInfo};
19
20macro_rules! write_times {
21    //write_i64
22    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt) => {
23        write_times!($fn_name, $nid, $has_time, $time, write_u64);
24    };
25    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt, $write_fn:tt) => {
26        fn $fn_name<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
27            let mut num = 0;
28            for entry in self.files.iter() {
29                if entry.$has_time {
30                    num += 1;
31                }
32            }
33            if num > 0 {
34                header.write_u8($nid)?;
35                let mut temp: Vec<u8> = Vec::with_capacity(128);
36                let mut out = &mut temp;
37                if num != self.files.len() {
38                    out.write_u8(0)?;
39                    let mut times = BitSet::with_capacity(self.files.len());
40                    for i in 0..self.files.len() {
41                        if self.files[i].$has_time {
42                            times.insert(i);
43                        }
44                    }
45                    write_bit_set(&mut out, &times)?;
46                } else {
47                    out.write_u8(1)?;
48                }
49                out.write_u8(0)?;
50                for file in self.files.iter() {
51                    if file.$has_time {
52                        out.$write_fn::<LittleEndian>((file.$time).into())?;
53                    }
54                }
55                out.flush()?;
56                write_u64(header, temp.len() as u64)?;
57                header.write_all(&temp)?;
58            }
59            Ok(())
60        }
61    };
62}
63
64type Result<T> = std::result::Result<T, crate::Error>;
65
66/// Writes a 7z file
67pub struct SevenZWriter<W: Write> {
68    output: W,
69    files: Vec<SevenZArchiveEntry>,
70    content_methods: Arc<Vec<SevenZMethodConfiguration>>,
71    pack_info: PackInfo,
72    unpack_info: UnpackInfo,
73    encrypt_header: bool,
74}
75
76#[cfg(not(target_arch = "wasm32"))]
77impl SevenZWriter<File> {
78    /// Creates a file to write a 7z archive to
79    pub fn create(path: impl AsRef<Path>) -> Result<Self> {
80        let file = std::fs::File::create(path.as_ref())
81            .map_err(|e| crate::Error::file_open(e, path.as_ref().to_string_lossy().to_string()))?;
82        Self::new(file)
83    }
84}
85impl<W: Write + Seek> SevenZWriter<W> {
86    /// Prepares writer to write a 7z archive to
87    pub fn new(mut writer: W) -> Result<Self> {
88        writer
89            .seek(std::io::SeekFrom::Start(
90                crate::archive::SIGNATURE_HEADER_SIZE,
91            ))
92            .map_err(Error::io)?;
93
94        Ok(Self {
95            output: writer,
96            files: Default::default(),
97            content_methods: Arc::new(vec![SevenZMethodConfiguration::new(SevenZMethod::LZMA2)]),
98            pack_info: Default::default(),
99            unpack_info: Default::default(),
100            encrypt_header: true,
101        })
102    }
103
104    /// Sets the default compression methods to use for entry contents.
105    /// The default is LZMA2.
106    /// And currently only support LZMA2
107    ///
108    pub fn set_content_methods(
109        &mut self,
110        content_methods: Vec<SevenZMethodConfiguration>,
111    ) -> &mut Self {
112        if content_methods.is_empty() {
113            return self;
114        }
115        self.content_methods = Arc::new(content_methods);
116        self
117    }
118
119    /// Whether to enable encrypt-header
120    /// Default's true
121    pub fn set_encrypt_header(&mut self, enabled: bool) {
122        self.encrypt_header = enabled;
123    }
124
125    /// Create an archive entry using the file in `path` and entry_name provided.
126    /// #deprecated use SevenZArchiveEntry::from_path instead
127    #[deprecated]
128    pub fn create_archive_entry(path: impl AsRef<Path>, entry_name: String) -> SevenZArchiveEntry {
129        let path = path.as_ref();
130
131        let mut entry = SevenZArchiveEntry {
132            name: entry_name,
133            has_stream: path.is_file(),
134            is_directory: path.is_dir(),
135            ..Default::default()
136        };
137
138        if let Ok(meta) = path.metadata() {
139            if let Ok(modified) = meta.modified() {
140                entry.last_modified_date = modified
141                    .try_into()
142                    .expect("last modified date should be in the range of file time");
143                entry.has_last_modified_date = entry.last_modified_date.to_raw() > 0;
144            }
145        }
146        entry
147    }
148
149    /// Adds an archive `entry` with data from `reader`
150    /// # Examples
151    /// ```no_run
152    /// use sevenz_rust::*;
153    /// use std::fs::File;
154    /// use std::path::Path;
155    /// let mut sz = SevenZWriter::create("path/to/dest.7z").expect("create writer ok");
156    /// let src = Path::new("path/to/source.txt");
157    /// let name = "source.txt".to_string();
158    /// let entry = sz.push_archive_entry(
159    ///               SevenZArchiveEntry::from_path(&src, name),
160    ///               Some(File::open(src).unwrap()),
161    ///           )
162    ///           .expect("ok");
163    /// let compressed_size = entry.compressed_size;
164    /// sz.finish().expect("done");
165    /// ```
166    pub fn push_archive_entry<R: Read>(
167        &mut self,
168        mut entry: SevenZArchiveEntry,
169        reader: Option<R>,
170    ) -> Result<&SevenZArchiveEntry> {
171        if !entry.is_directory {
172            if let Some(mut r) = reader {
173                let mut compressed_len = 0;
174                let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
175                let content_methods = if entry.content_methods.is_empty() {
176                    &self.content_methods
177                } else {
178                    &entry.content_methods
179                };
180                let mut more_sizes: Vec<Rc<Cell<usize>>> =
181                    Vec::with_capacity(content_methods.len() - 1);
182
183                let (crc, size) = {
184                    let mut w =
185                        Self::create_writer(content_methods, &mut compressed, &mut more_sizes)?;
186                    let mut write_len = 0;
187                    let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
188                    let mut buf = [0u8; 4096];
189                    loop {
190                        match r.read(&mut buf) {
191                            Ok(n) => {
192                                if n == 0 {
193                                    break;
194                                }
195                                w.write_all(&buf[..n]).map_err(|e| {
196                                    Error::io_msg(e, format!("Encode entry:{}", entry.name()))
197                                })?;
198                            }
199                            Err(e) => {
200                                return Err(Error::io_msg(
201                                    e,
202                                    format!("Encode entry:{}", entry.name()),
203                                ));
204                            }
205                        }
206                    }
207                    w.flush()
208                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
209                    w.write(&[])
210                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
211
212                    (w.crc_value(), write_len)
213                };
214                let compressed_crc = compressed.crc_value();
215                entry.has_stream = true;
216                entry.size = size as u64;
217                entry.crc = crc as u64;
218                entry.has_crc = true;
219                entry.compressed_crc = compressed_crc as u64;
220                entry.compressed_size = compressed_len as u64;
221                self.pack_info
222                    .add_stream(compressed_len as u64, compressed_crc);
223
224                let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
225                sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
226                sizes.push(size as u64);
227
228                self.unpack_info.add(content_methods.clone(), sizes, crc);
229
230                self.files.push(entry);
231                return Ok(self.files.last().unwrap());
232            }
233        }
234        entry.has_stream = false;
235        entry.size = 0;
236        entry.compressed_size = 0;
237        entry.has_crc = false;
238        self.files.push(entry);
239        Ok(self.files.last().unwrap())
240    }
241
242    /// [Solid compression](https://en.wikipedia.org/wiki/Solid_compression)
243    /// pack [entries] into one pack
244    /// # Panics
245    /// Panics if `entries`'s length not equals to `reader.reader_len()`
246    pub fn push_archive_entries<R: Read>(
247        &mut self,
248        mut entries: Vec<SevenZArchiveEntry>,
249        reader: SeqReader<SourceReader<R>>,
250    ) -> Result<&mut Self> {
251        let mut r = reader;
252        assert_eq!(r.reader_len(), entries.len());
253        let mut compressed_len = 0;
254        let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
255        let content_methods = &self.content_methods;
256        let mut more_sizes: Vec<Rc<Cell<usize>>> = Vec::with_capacity(content_methods.len() - 1);
257
258        let (crc, size) = {
259            let mut w = Self::create_writer(content_methods, &mut compressed, &mut more_sizes)?;
260            let mut write_len = 0;
261            let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
262            let mut buf = [0u8; 4096];
263            fn entries_names(entries: &[SevenZArchiveEntry]) -> String {
264                let mut names = String::with_capacity(512);
265                for ele in entries.iter() {
266                    names.push_str(&ele.name);
267                    names.push(';');
268                    if names.len() > 512 {
269                        break;
270                    }
271                }
272                names
273            }
274            loop {
275                match r.read(&mut buf) {
276                    Ok(n) => {
277                        if n == 0 {
278                            break;
279                        }
280                        w.write_all(&buf[..n]).map_err(|e| {
281                            Error::io_msg(e, format!("Encode entries:{}", entries_names(&entries)))
282                        })?;
283                    }
284                    Err(e) => {
285                        return Err(Error::io_msg(
286                            e,
287                            format!("Encode entries:{}", entries_names(&entries)),
288                        ));
289                    }
290                }
291            }
292            w.flush().map_err(|e| {
293                let mut names = String::with_capacity(512);
294                for ele in entries.iter() {
295                    names.push_str(&ele.name);
296                    names.push(';');
297                    if names.len() > 512 {
298                        break;
299                    }
300                }
301                Error::io_msg(e, format!("Encode entry:{}", names))
302            })?;
303            w.write(&[]).map_err(|e| {
304                Error::io_msg(e, format!("Encode entry:{}", entries_names(&entries)))
305            })?;
306
307            (w.crc_value(), write_len)
308        };
309        let compressed_crc = compressed.crc_value();
310        let mut sub_stream_crcs = Vec::with_capacity(entries.len());
311        let mut sub_stream_sizes = Vec::with_capacity(entries.len());
312        for i in 0..entries.len() {
313            let entry = &mut entries[i];
314            let ri = &r[i];
315            entry.crc = ri.crc_value() as u64;
316            entry.size = ri.read_count() as u64;
317            sub_stream_crcs.push(entry.crc as u32);
318            sub_stream_sizes.push(entry.size);
319            entry.has_crc = true;
320        }
321
322        self.pack_info
323            .add_stream(compressed_len as u64, compressed_crc);
324
325        let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
326        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
327        sizes.push(size as u64);
328
329        self.unpack_info.add_multiple(
330            content_methods.clone(),
331            sizes,
332            crc,
333            entries.len() as u64,
334            sub_stream_sizes,
335            sub_stream_crcs,
336        );
337
338        self.files.extend(entries);
339        Ok(self)
340    }
341
342    fn create_writer<'a, O: Write + 'a>(
343        methods: &[SevenZMethodConfiguration],
344        out: O,
345        more_sized: &mut Vec<Rc<Cell<usize>>>,
346    ) -> Result<Box<dyn Write + 'a>> {
347        let mut encoder: Box<dyn Write> = Box::new(out);
348        let mut first = true;
349        for mc in methods.iter() {
350            if !first {
351                let counting = CountingWriter::new(encoder);
352                more_sized.push(counting.counting());
353                encoder = Box::new(encoders::add_encoder(counting, mc)?);
354            } else {
355                let counting = CountingWriter::new(encoder);
356                encoder = Box::new(encoders::add_encoder(counting, mc)?);
357            }
358            first = false;
359        }
360        Ok(encoder)
361    }
362
363    /// Finishes the compression.
364    pub fn finish(mut self) -> std::io::Result<W> {
365        let mut header: Vec<u8> = Vec::with_capacity(64 * 1024);
366        self.write_encoded_header(&mut header)?;
367        let header_pos = self.output.stream_position()?;
368        self.output.write_all(&header)?;
369        let crc32 = CRC32.checksum(&header);
370        let mut hh = [0u8; SIGNATURE_HEADER_SIZE as usize];
371        {
372            let mut hhw = hh.as_mut_slice();
373            //sig
374            hhw.write_all(SEVEN_Z_SIGNATURE)?;
375            //version
376            hhw.write_u8(0)?;
377            hhw.write_u8(2)?;
378            //placeholder for crc: index = 8
379            hhw.write_u32::<LittleEndian>(0)?;
380
381            // start header
382            hhw.write_u64::<LittleEndian>(header_pos - SIGNATURE_HEADER_SIZE)?;
383            hhw.write_u64::<LittleEndian>(0xffffffff & header.len() as u64)?;
384            hhw.write_u32::<LittleEndian>(crc32)?;
385        }
386        let crc32 = CRC32.checksum(&hh[12..]);
387        hh[8..12].copy_from_slice(&crc32.to_le_bytes());
388
389        self.output.seek(std::io::SeekFrom::Start(0))?;
390        self.output.write_all(&hh)?;
391        Ok(self.output)
392    }
393
394    fn write_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
395        header.write_u8(K_HEADER)?;
396        header.write_u8(K_MAIN_STREAMS_INFO)?;
397        self.write_streams_info(header)?;
398        self.write_files_info(header)?;
399        header.write_u8(K_END)?;
400        Ok(())
401    }
402
403    fn write_encoded_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
404        let mut raw_header = Vec::with_capacity(64 * 1024);
405        self.write_header(&mut raw_header)?;
406        let mut pack_info = PackInfo::default();
407
408        let position = self.output.stream_position()?;
409        let pos = position - SIGNATURE_HEADER_SIZE;
410        pack_info.pos = pos;
411
412        let mut more_sizes = vec![];
413        let size = raw_header.len() as u64;
414        let crc = CRC32.checksum(&raw_header);
415        let mut methods = vec![];
416
417        if self.encrypt_header {
418            for conf in self.content_methods.iter() {
419                if conf.method.id() == SevenZMethod::AES256SHA256.id() {
420                    methods.push(conf.clone());
421                    break;
422                }
423            }
424        }
425        methods.push(SevenZMethodConfiguration::new(SevenZMethod::LZMA));
426
427        let methods = Arc::new(methods);
428
429        let mut encoded_data = Vec::with_capacity(size as usize / 2);
430
431        let mut compress_size = 0;
432        let mut compressed = CompressWrapWriter::new(&mut encoded_data, &mut compress_size);
433        {
434            let mut encoder = Self::create_writer(&methods, &mut compressed, &mut more_sizes)
435                .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
436            encoder.write_all(&raw_header)?;
437            let _ = encoder.write(&[])?;
438        }
439
440        let compress_crc = compressed.crc_value();
441        let compress_size = *compressed.bytes_written;
442        if compress_size as u64 + 20 >= size {
443            // compression made it worse. Write raw data
444            header.write_all(&raw_header)?;
445            return Ok(());
446        }
447        self.output.write_all(&encoded_data[..compress_size])?;
448
449        pack_info.add_stream(compress_size as u64, compress_crc);
450
451        let mut unpack_info = UnpackInfo::default();
452        let mut sizes = Vec::with_capacity(1 + more_sizes.len());
453        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
454        sizes.push(size);
455        unpack_info.add(methods, sizes, crc);
456
457        header.write_u8(K_ENCODED_HEADER)?;
458
459        pack_info.write_to(header)?;
460        unpack_info.write_to(header)?;
461        unpack_info.write_substreams(header)?;
462
463        header.write_u8(K_END)?;
464
465        Ok(())
466    }
467
468    fn write_streams_info<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
469        if self.pack_info.len() > 0 {
470            self.pack_info.write_to(header)?;
471            self.unpack_info.write_to(header)?;
472        }
473        self.unpack_info.write_substreams(header)?;
474
475        header.write_u8(K_END)?;
476        Ok(())
477    }
478
479    fn write_files_info<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
480        header.write_u8(K_FILES_INFO)?;
481        write_u64(header, self.files.len() as u64)?;
482        self.write_file_empty_streams(header)?;
483        self.write_file_empty_files(header)?;
484        self.write_file_anti_items(header)?;
485        self.write_file_names(header)?;
486        self.write_file_ctimes(header)?;
487        self.write_file_atimes(header)?;
488        self.write_file_mtimes(header)?;
489        self.write_file_windows_attrs(header)?;
490        header.write_u8(K_END)?;
491        Ok(())
492    }
493
494    fn write_file_empty_streams<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
495        let mut has_empty = false;
496        for entry in self.files.iter() {
497            if !entry.has_stream {
498                has_empty = true;
499                break;
500            }
501        }
502        if has_empty {
503            header.write_u8(K_EMPTY_STREAM)?;
504            let mut bitset = BitSet::with_capacity(self.files.len());
505            for (i, entry) in self.files.iter().enumerate() {
506                if !entry.has_stream {
507                    bitset.insert(i);
508                }
509            }
510            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
511            write_bit_set(&mut temp, &bitset)?;
512            write_u64(header, temp.len() as u64)?;
513            header.write_all(temp.as_slice())?;
514        }
515        Ok(())
516    }
517    fn write_file_empty_files<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
518        let mut has_empty = false;
519        let mut empty_stream_counter = 0;
520        let mut bitset = BitSet::new();
521        for entry in self.files.iter() {
522            if !entry.has_stream {
523                let is_dir = entry.is_directory();
524                has_empty |= !is_dir;
525                if !is_dir {
526                    bitset.insert(empty_stream_counter);
527                }
528                empty_stream_counter += 1;
529            }
530        }
531        if has_empty {
532            header.write_u8(K_EMPTY_FILE)?;
533
534            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
535            write_bit_set(&mut temp, &bitset)?;
536            write_u64(header, temp.len() as u64)?;
537            header.write_all(&temp)?;
538        }
539        Ok(())
540    }
541
542    fn write_file_anti_items<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
543        let mut has_anti = false;
544        let mut counter = 0;
545        let mut bitset = BitSet::new();
546        for entry in self.files.iter() {
547            if !entry.has_stream {
548                let is_anti = entry.is_anti_item();
549                has_anti |= !is_anti;
550                if !is_anti {
551                    bitset.insert(counter);
552                }
553                counter += 1;
554            }
555        }
556        if has_anti {
557            header.write_u8(K_ANTI)?;
558
559            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
560            write_bit_set(&mut temp, &bitset)?;
561            write_u64(header, temp.len() as u64)?;
562            header.write_all(temp.as_slice())?;
563        }
564        Ok(())
565    }
566    fn write_file_names<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
567        header.write_u8(K_NAME)?;
568        let mut temp: Vec<u8> = Vec::with_capacity(128);
569        let out = &mut temp;
570        out.write_u8(0)?;
571        for file in self.files.iter() {
572            for c in file.name().encode_utf16() {
573                let buf = c.to_le_bytes();
574                out.write_all(&buf)?;
575            }
576            out.write_all(&[0u8; 2])?;
577        }
578        write_u64(header, temp.len() as u64)?;
579        header.write_all(temp.as_slice())?;
580        Ok(())
581    }
582
583    write_times!(
584        write_file_ctimes,
585        K_C_TIME,
586        has_creation_date,
587        creation_date
588    );
589    write_times!(write_file_atimes, K_A_TIME, has_access_date, access_date);
590    write_times!(
591        write_file_mtimes,
592        K_M_TIME,
593        has_last_modified_date,
594        last_modified_date
595    );
596    write_times!(
597        write_file_windows_attrs,
598        K_WIN_ATTRIBUTES,
599        has_windows_attributes,
600        windows_attributes,
601        write_u32
602    );
603}
604
605pub(crate) fn write_u64<W: Write>(header: &mut W, mut value: u64) -> std::io::Result<()> {
606    let mut first = 0;
607    let mut mask = 0x80;
608    let mut i = 0;
609    while i < 8 {
610        if value < (1u64 << (7 * (i + 1))) {
611            first |= value >> (8 * i);
612            break;
613        }
614        first |= mask;
615        mask >>= 1;
616        i += 1;
617    }
618    header.write_u8((first & 0xff) as u8)?;
619    while i > 0 {
620        header.write_u8((value & 0xff) as u8)?;
621        value >>= 8;
622        i -= 1;
623    }
624    Ok(())
625}
626
627fn write_bit_set<W: Write>(mut write: W, bs: &BitSet) -> std::io::Result<()> {
628    let mut cache = 0;
629    let mut shift = 7;
630    for i in 0..bs.get_ref().len() {
631        let set = if bs.contains(i) { 1 } else { 0 };
632        cache |= set << shift;
633        shift -= 1;
634        if shift < 0 {
635            write.write_u8(cache)?;
636            shift = 7;
637            cache = 0;
638        }
639    }
640    if shift != 7 {
641        write.write_u8(cache)?;
642    }
643    Ok(())
644}
645
646struct CompressWrapWriter<'a, W> {
647    writer: W,
648    crc: crc::Digest<'static, u32>,
649    cache: Vec<u8>,
650    bytes_written: &'a mut usize,
651}
652impl<'a, W: Write> CompressWrapWriter<'a, W> {
653    pub fn new(writer: W, bytes_written: &'a mut usize) -> Self {
654        Self {
655            writer,
656            crc: crate::reader::CRC32.digest(),
657            cache: Vec::with_capacity(8192),
658            bytes_written,
659        }
660    }
661
662    pub fn crc_value(&mut self) -> u32 {
663        let crc = std::mem::replace(&mut self.crc, crate::reader::CRC32.digest());
664        crc.finalize()
665    }
666}
667
668impl<'a, W: Write> Write for CompressWrapWriter<'a, W> {
669    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
670        self.cache.resize(buf.len(), Default::default());
671        let len = self.writer.write(buf)?;
672        self.crc.update(&buf[..len]);
673        *self.bytes_written += len;
674        Ok(len)
675    }
676
677    fn flush(&mut self) -> std::io::Result<()> {
678        self.writer.flush()
679    }
680}