sevenz_rust2/
writer.rs

1mod pack_info;
2mod seq_reader;
3mod unpack_info;
4
5pub use self::seq_reader::*;
6use self::{pack_info::PackInfo, unpack_info::UnpackInfo};
7use crate::{archive::*, encoders, lzma::*, Error, SevenZArchiveEntry};
8use bit_set::BitSet;
9use byteorder::*;
10use crc32fast::Hasher;
11use std::{
12    cell::Cell,
13    fs::File,
14    io::{Read, Seek, Write},
15    path::Path,
16    rc::Rc,
17    sync::Arc,
18};
19
20macro_rules! write_times {
21    //write_i64
22    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt) => {
23        write_times!($fn_name, $nid, $has_time, $time, write_u64);
24    };
25    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt, $write_fn:tt) => {
26        fn $fn_name<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
27            let mut num = 0;
28            for entry in self.files.iter() {
29                if entry.$has_time {
30                    num += 1;
31                }
32            }
33            if num > 0 {
34                header.write_u8($nid)?;
35                let mut temp: Vec<u8> = Vec::with_capacity(128);
36                let mut out = &mut temp;
37                if num != self.files.len() {
38                    out.write_u8(0)?;
39                    let mut times = BitSet::with_capacity(self.files.len());
40                    for i in 0..self.files.len() {
41                        if self.files[i].$has_time {
42                            times.insert(i);
43                        }
44                    }
45                    write_bit_set(&mut out, &times)?;
46                } else {
47                    out.write_u8(1)?;
48                }
49                out.write_u8(0)?;
50                for file in self.files.iter() {
51                    if file.$has_time {
52                        out.$write_fn::<LittleEndian>((file.$time).into())?;
53                    }
54                }
55                out.flush()?;
56                write_u64(header, temp.len() as u64)?;
57                header.write_all(&temp)?;
58            }
59            Ok(())
60        }
61    };
62}
63
64type Result<T> = std::result::Result<T, Error>;
65
66/// Writes a 7z file
67pub struct SevenZWriter<W: Write> {
68    output: W,
69    files: Vec<SevenZArchiveEntry>,
70    content_methods: Arc<Vec<SevenZMethodConfiguration>>,
71    pack_info: PackInfo,
72    unpack_info: UnpackInfo,
73    encrypt_header: bool,
74}
75
76#[cfg(not(target_arch = "wasm32"))]
77impl SevenZWriter<File> {
78    /// Creates a file to write a 7z archive to
79    pub fn create(path: impl AsRef<Path>) -> Result<Self> {
80        let file = File::create(path.as_ref())
81            .map_err(|e| Error::file_open(e, path.as_ref().to_string_lossy().to_string()))?;
82        Self::new(file)
83    }
84}
85
86impl<W: Write + Seek> SevenZWriter<W> {
87    /// Prepares writer to write a 7z archive to
88    pub fn new(mut writer: W) -> Result<Self> {
89        writer
90            .seek(std::io::SeekFrom::Start(SIGNATURE_HEADER_SIZE))
91            .map_err(Error::io)?;
92
93        Ok(Self {
94            output: writer,
95            files: Default::default(),
96            content_methods: Arc::new(vec![SevenZMethodConfiguration::new(SevenZMethod::LZMA2)]),
97            pack_info: Default::default(),
98            unpack_info: Default::default(),
99            encrypt_header: true,
100        })
101    }
102
103    /// Sets the default compression methods to use for entry contents.
104    /// The default is LZMA2.
105    /// And currently only support LZMA2
106    pub fn set_content_methods(
107        &mut self,
108        content_methods: Vec<SevenZMethodConfiguration>,
109    ) -> &mut Self {
110        if content_methods.is_empty() {
111            return self;
112        }
113        self.content_methods = Arc::new(content_methods);
114        self
115    }
116
117    /// Whether to enable encrypt-header
118    /// Default's true
119    pub fn set_encrypt_header(&mut self, enabled: bool) {
120        self.encrypt_header = enabled;
121    }
122
123    /// Adds an archive `entry` with data from `reader`
124    /// # Examples
125    /// ```no_run
126    /// use sevenz_rust2::*;
127    /// use std::fs::File;
128    /// use std::path::Path;
129    /// let mut sz = SevenZWriter::create("path/to/dest.7z").expect("create writer ok");
130    /// let src = Path::new("path/to/source.txt");
131    /// let name = "source.txt".to_string();
132    /// let entry = sz.push_archive_entry(
133    ///               SevenZArchiveEntry::from_path(&src, name),
134    ///               Some(File::open(src).unwrap()),
135    ///           )
136    ///           .expect("ok");
137    /// let compressed_size = entry.compressed_size;
138    /// sz.finish().expect("done");
139    /// ```
140    pub fn push_archive_entry<R: Read>(
141        &mut self,
142        mut entry: SevenZArchiveEntry,
143        reader: Option<R>,
144    ) -> Result<&SevenZArchiveEntry> {
145        if !entry.is_directory {
146            if let Some(mut r) = reader {
147                let mut compressed_len = 0;
148                let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
149                let content_methods = if entry.content_methods.is_empty() {
150                    &self.content_methods
151                } else {
152                    &entry.content_methods
153                };
154                let mut more_sizes: Vec<Rc<Cell<usize>>> =
155                    Vec::with_capacity(content_methods.len() - 1);
156
157                let (crc, size) = {
158                    let mut w =
159                        Self::create_writer(content_methods, &mut compressed, &mut more_sizes)?;
160                    let mut write_len = 0;
161                    let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
162                    let mut buf = [0u8; 4096];
163                    loop {
164                        match r.read(&mut buf) {
165                            Ok(n) => {
166                                if n == 0 {
167                                    break;
168                                }
169                                w.write_all(&buf[..n]).map_err(|e| {
170                                    Error::io_msg(e, format!("Encode entry:{}", entry.name()))
171                                })?;
172                            }
173                            Err(e) => {
174                                return Err(Error::io_msg(
175                                    e,
176                                    format!("Encode entry:{}", entry.name()),
177                                ));
178                            }
179                        }
180                    }
181                    w.flush()
182                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
183                    w.write(&[])
184                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
185
186                    (w.crc_value(), write_len)
187                };
188                let compressed_crc = compressed.crc_value();
189                entry.has_stream = true;
190                entry.size = size as u64;
191                entry.crc = crc as u64;
192                entry.has_crc = true;
193                entry.compressed_crc = compressed_crc as u64;
194                entry.compressed_size = compressed_len as u64;
195                self.pack_info
196                    .add_stream(compressed_len as u64, compressed_crc);
197
198                let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
199                sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
200                sizes.push(size as u64);
201
202                self.unpack_info.add(content_methods.clone(), sizes, crc);
203
204                self.files.push(entry);
205                return Ok(self.files.last().unwrap());
206            }
207        }
208        entry.has_stream = false;
209        entry.size = 0;
210        entry.compressed_size = 0;
211        entry.has_crc = false;
212        self.files.push(entry);
213        Ok(self.files.last().unwrap())
214    }
215
216    /// [Solid compression](https://en.wikipedia.org/wiki/Solid_compression)
217    /// pack [entries] into one pack
218    ///
219    /// # Panics
220    /// Panics if `entries`'s length not equals to `reader.reader_len()`
221    pub fn push_archive_entries<R: Read>(
222        &mut self,
223        mut entries: Vec<SevenZArchiveEntry>,
224        reader: SeqReader<SourceReader<R>>,
225    ) -> Result<&mut Self> {
226        let mut r = reader;
227        assert_eq!(r.reader_len(), entries.len());
228        let mut compressed_len = 0;
229        let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
230        let content_methods = &self.content_methods;
231        let mut more_sizes: Vec<Rc<Cell<usize>>> = Vec::with_capacity(content_methods.len() - 1);
232
233        let (crc, size) = {
234            let mut w = Self::create_writer(content_methods, &mut compressed, &mut more_sizes)?;
235            let mut write_len = 0;
236            let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
237            let mut buf = [0u8; 4096];
238            fn entries_names(entries: &[SevenZArchiveEntry]) -> String {
239                let mut names = String::with_capacity(512);
240                for ele in entries.iter() {
241                    names.push_str(&ele.name);
242                    names.push(';');
243                    if names.len() > 512 {
244                        break;
245                    }
246                }
247                names
248            }
249            loop {
250                match r.read(&mut buf) {
251                    Ok(n) => {
252                        if n == 0 {
253                            break;
254                        }
255                        w.write_all(&buf[..n]).map_err(|e| {
256                            Error::io_msg(e, format!("Encode entries:{}", entries_names(&entries)))
257                        })?;
258                    }
259                    Err(e) => {
260                        return Err(Error::io_msg(
261                            e,
262                            format!("Encode entries:{}", entries_names(&entries)),
263                        ));
264                    }
265                }
266            }
267            w.flush().map_err(|e| {
268                let mut names = String::with_capacity(512);
269                for ele in entries.iter() {
270                    names.push_str(&ele.name);
271                    names.push(';');
272                    if names.len() > 512 {
273                        break;
274                    }
275                }
276                Error::io_msg(e, format!("Encode entry:{}", names))
277            })?;
278            w.write(&[]).map_err(|e| {
279                Error::io_msg(e, format!("Encode entry:{}", entries_names(&entries)))
280            })?;
281
282            (w.crc_value(), write_len)
283        };
284        let compressed_crc = compressed.crc_value();
285        let mut sub_stream_crcs = Vec::with_capacity(entries.len());
286        let mut sub_stream_sizes = Vec::with_capacity(entries.len());
287        for i in 0..entries.len() {
288            let entry = &mut entries[i];
289            let ri = &r[i];
290            entry.crc = ri.crc_value() as u64;
291            entry.size = ri.read_count() as u64;
292            sub_stream_crcs.push(entry.crc as u32);
293            sub_stream_sizes.push(entry.size);
294            entry.has_crc = true;
295        }
296
297        self.pack_info
298            .add_stream(compressed_len as u64, compressed_crc);
299
300        let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
301        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
302        sizes.push(size as u64);
303
304        self.unpack_info.add_multiple(
305            content_methods.clone(),
306            sizes,
307            crc,
308            entries.len() as u64,
309            sub_stream_sizes,
310            sub_stream_crcs,
311        );
312
313        self.files.extend(entries);
314        Ok(self)
315    }
316
317    fn create_writer<'a, O: Write + 'a>(
318        methods: &[SevenZMethodConfiguration],
319        out: O,
320        more_sized: &mut Vec<Rc<Cell<usize>>>,
321    ) -> Result<Box<dyn Write + 'a>> {
322        let mut encoder: Box<dyn Write> = Box::new(out);
323        let mut first = true;
324        for mc in methods.iter() {
325            if !first {
326                let counting = CountingWriter::new(encoder);
327                more_sized.push(counting.counting());
328                encoder = Box::new(encoders::add_encoder(counting, mc)?);
329            } else {
330                let counting = CountingWriter::new(encoder);
331                encoder = Box::new(encoders::add_encoder(counting, mc)?);
332            }
333            first = false;
334        }
335        Ok(encoder)
336    }
337
338    /// Finishes the compression.
339    pub fn finish(mut self) -> std::io::Result<W> {
340        let mut header: Vec<u8> = Vec::with_capacity(64 * 1024);
341        self.write_encoded_header(&mut header)?;
342        let header_pos = self.output.stream_position()?;
343        self.output.write_all(&header)?;
344        let crc32 = crc32fast::hash(&header);
345        let mut hh = [0u8; SIGNATURE_HEADER_SIZE as usize];
346        {
347            let mut hhw = hh.as_mut_slice();
348            //sig
349            hhw.write_all(SEVEN_Z_SIGNATURE)?;
350            //version
351            hhw.write_u8(0)?;
352            hhw.write_u8(2)?;
353            //placeholder for crc: index = 8
354            hhw.write_u32::<LittleEndian>(0)?;
355
356            // start header
357            hhw.write_u64::<LittleEndian>(header_pos - SIGNATURE_HEADER_SIZE)?;
358            hhw.write_u64::<LittleEndian>(0xffffffff & header.len() as u64)?;
359            hhw.write_u32::<LittleEndian>(crc32)?;
360        }
361        let crc32 = crc32fast::hash(&hh[12..]);
362        hh[8..12].copy_from_slice(&crc32.to_le_bytes());
363
364        self.output.seek(std::io::SeekFrom::Start(0))?;
365        self.output.write_all(&hh)?;
366        Ok(self.output)
367    }
368
369    fn write_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
370        header.write_u8(K_HEADER)?;
371        header.write_u8(K_MAIN_STREAMS_INFO)?;
372        self.write_streams_info(header)?;
373        self.write_files_info(header)?;
374        header.write_u8(K_END)?;
375        Ok(())
376    }
377
378    fn write_encoded_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
379        let mut raw_header = Vec::with_capacity(64 * 1024);
380        self.write_header(&mut raw_header)?;
381        let mut pack_info = PackInfo::default();
382
383        let position = self.output.stream_position()?;
384        let pos = position - SIGNATURE_HEADER_SIZE;
385        pack_info.pos = pos;
386
387        let mut more_sizes = vec![];
388        let size = raw_header.len() as u64;
389        let crc32 = crc32fast::hash(&raw_header);
390        let mut methods = vec![];
391
392        if self.encrypt_header {
393            for conf in self.content_methods.iter() {
394                if conf.method.id() == SevenZMethod::AES256SHA256.id() {
395                    methods.push(conf.clone());
396                    break;
397                }
398            }
399        }
400        methods.push(SevenZMethodConfiguration::new(SevenZMethod::LZMA));
401
402        let methods = Arc::new(methods);
403
404        let mut encoded_data = Vec::with_capacity(size as usize / 2);
405
406        let mut compress_size = 0;
407        let mut compressed = CompressWrapWriter::new(&mut encoded_data, &mut compress_size);
408        {
409            let mut encoder = Self::create_writer(&methods, &mut compressed, &mut more_sizes)
410                .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e))?;
411            encoder.write_all(&raw_header)?;
412            let _ = encoder.write(&[])?;
413        }
414
415        let compress_crc = compressed.crc_value();
416        let compress_size = *compressed.bytes_written;
417        if compress_size as u64 + 20 >= size {
418            // compression made it worse. Write raw data
419            header.write_all(&raw_header)?;
420            return Ok(());
421        }
422        self.output.write_all(&encoded_data[..compress_size])?;
423
424        pack_info.add_stream(compress_size as u64, compress_crc);
425
426        let mut unpack_info = UnpackInfo::default();
427        let mut sizes = Vec::with_capacity(1 + more_sizes.len());
428        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
429        sizes.push(size);
430        unpack_info.add(methods, sizes, crc32);
431
432        header.write_u8(K_ENCODED_HEADER)?;
433
434        pack_info.write_to(header)?;
435        unpack_info.write_to(header)?;
436        unpack_info.write_substreams(header)?;
437
438        header.write_u8(K_END)?;
439
440        Ok(())
441    }
442
443    fn write_streams_info<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
444        if self.pack_info.len() > 0 {
445            self.pack_info.write_to(header)?;
446            self.unpack_info.write_to(header)?;
447        }
448        self.unpack_info.write_substreams(header)?;
449
450        header.write_u8(K_END)?;
451        Ok(())
452    }
453
454    fn write_files_info<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
455        header.write_u8(K_FILES_INFO)?;
456        write_u64(header, self.files.len() as u64)?;
457        self.write_file_empty_streams(header)?;
458        self.write_file_empty_files(header)?;
459        self.write_file_anti_items(header)?;
460        self.write_file_names(header)?;
461        self.write_file_ctimes(header)?;
462        self.write_file_atimes(header)?;
463        self.write_file_mtimes(header)?;
464        self.write_file_windows_attrs(header)?;
465        header.write_u8(K_END)?;
466        Ok(())
467    }
468
469    fn write_file_empty_streams<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
470        let mut has_empty = false;
471        for entry in self.files.iter() {
472            if !entry.has_stream {
473                has_empty = true;
474                break;
475            }
476        }
477        if has_empty {
478            header.write_u8(K_EMPTY_STREAM)?;
479            let mut bitset = BitSet::with_capacity(self.files.len());
480            for (i, entry) in self.files.iter().enumerate() {
481                if !entry.has_stream {
482                    bitset.insert(i);
483                }
484            }
485            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
486            write_bit_set(&mut temp, &bitset)?;
487            write_u64(header, temp.len() as u64)?;
488            header.write_all(temp.as_slice())?;
489        }
490        Ok(())
491    }
492
493    fn write_file_empty_files<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
494        let mut has_empty = false;
495        let mut empty_stream_counter = 0;
496        let mut bitset = BitSet::new();
497        for entry in self.files.iter() {
498            if !entry.has_stream {
499                let is_dir = entry.is_directory();
500                has_empty |= !is_dir;
501                if !is_dir {
502                    bitset.insert(empty_stream_counter);
503                }
504                empty_stream_counter += 1;
505            }
506        }
507        if has_empty {
508            header.write_u8(K_EMPTY_FILE)?;
509
510            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
511            write_bit_set(&mut temp, &bitset)?;
512            write_u64(header, temp.len() as u64)?;
513            header.write_all(&temp)?;
514        }
515        Ok(())
516    }
517
518    fn write_file_anti_items<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
519        let mut has_anti = false;
520        let mut counter = 0;
521        let mut bitset = BitSet::new();
522        for entry in self.files.iter() {
523            if !entry.has_stream {
524                let is_anti = entry.is_anti_item();
525                has_anti |= !is_anti;
526                if !is_anti {
527                    bitset.insert(counter);
528                }
529                counter += 1;
530            }
531        }
532        if has_anti {
533            header.write_u8(K_ANTI)?;
534
535            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
536            write_bit_set(&mut temp, &bitset)?;
537            write_u64(header, temp.len() as u64)?;
538            header.write_all(temp.as_slice())?;
539        }
540        Ok(())
541    }
542
543    fn write_file_names<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
544        header.write_u8(K_NAME)?;
545        let mut temp: Vec<u8> = Vec::with_capacity(128);
546        let out = &mut temp;
547        out.write_u8(0)?;
548        for file in self.files.iter() {
549            for c in file.name().encode_utf16() {
550                let buf = c.to_le_bytes();
551                out.write_all(&buf)?;
552            }
553            out.write_all(&[0u8; 2])?;
554        }
555        write_u64(header, temp.len() as u64)?;
556        header.write_all(temp.as_slice())?;
557        Ok(())
558    }
559
560    write_times!(
561        write_file_ctimes,
562        K_C_TIME,
563        has_creation_date,
564        creation_date
565    );
566    write_times!(write_file_atimes, K_A_TIME, has_access_date, access_date);
567    write_times!(
568        write_file_mtimes,
569        K_M_TIME,
570        has_last_modified_date,
571        last_modified_date
572    );
573    write_times!(
574        write_file_windows_attrs,
575        K_WIN_ATTRIBUTES,
576        has_windows_attributes,
577        windows_attributes,
578        write_u32
579    );
580}
581
582pub(crate) fn write_u64<W: Write>(header: &mut W, mut value: u64) -> std::io::Result<()> {
583    let mut first = 0;
584    let mut mask = 0x80;
585    let mut i = 0;
586    while i < 8 {
587        if value < (1u64 << (7 * (i + 1))) {
588            first |= value >> (8 * i);
589            break;
590        }
591        first |= mask;
592        mask >>= 1;
593        i += 1;
594    }
595    header.write_u8((first & 0xff) as u8)?;
596    while i > 0 {
597        header.write_u8((value & 0xff) as u8)?;
598        value >>= 8;
599        i -= 1;
600    }
601    Ok(())
602}
603
604fn write_bit_set<W: Write>(mut write: W, bs: &BitSet) -> std::io::Result<()> {
605    let mut cache = 0;
606    let mut shift = 7;
607    for i in 0..bs.get_ref().len() {
608        let set = if bs.contains(i) { 1 } else { 0 };
609        cache |= set << shift;
610        shift -= 1;
611        if shift < 0 {
612            write.write_u8(cache)?;
613            shift = 7;
614            cache = 0;
615        }
616    }
617    if shift != 7 {
618        write.write_u8(cache)?;
619    }
620    Ok(())
621}
622
623struct CompressWrapWriter<'a, W> {
624    writer: W,
625    crc: Hasher,
626    cache: Vec<u8>,
627    bytes_written: &'a mut usize,
628}
629
630impl<'a, W: Write> CompressWrapWriter<'a, W> {
631    pub fn new(writer: W, bytes_written: &'a mut usize) -> Self {
632        Self {
633            writer,
634            crc: Hasher::new(),
635            cache: Vec::with_capacity(8192),
636            bytes_written,
637        }
638    }
639
640    pub fn crc_value(&mut self) -> u32 {
641        let crc = std::mem::replace(&mut self.crc, Hasher::new());
642        crc.finalize()
643    }
644}
645
646impl<W: Write> Write for CompressWrapWriter<'_, W> {
647    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
648        self.cache.resize(buf.len(), Default::default());
649        let len = self.writer.write(buf)?;
650        self.crc.update(&buf[..len]);
651        *self.bytes_written += len;
652        Ok(len)
653    }
654
655    fn flush(&mut self) -> std::io::Result<()> {
656        self.writer.flush()
657    }
658}