sevenz_rust2/
writer.rs

1mod counting_writer;
2#[cfg(all(feature = "util", not(target_arch = "wasm32")))]
3mod lazy_file_reader;
4mod pack_info;
5mod seq_reader;
6mod source_reader;
7mod unpack_info;
8
9use std::{
10    cell::Cell,
11    io::{Read, Seek, Write},
12    rc::Rc,
13    sync::Arc,
14};
15#[cfg(not(target_arch = "wasm32"))]
16use std::{fs::File, path::Path};
17
18pub(crate) use counting_writer::CountingWriter;
19use crc32fast::Hasher;
20
21#[cfg(all(feature = "util", not(target_arch = "wasm32")))]
22pub(crate) use self::lazy_file_reader::LazyFileReader;
23pub(crate) use self::seq_reader::SeqReader;
24pub use self::source_reader::SourceReader;
25use self::{pack_info::PackInfo, unpack_info::UnpackInfo};
26use crate::{
27    ArchiveEntry, AutoFinish, AutoFinisher, ByteWriter, Error,
28    archive::*,
29    bitset::{BitSet, write_bit_set},
30    encoder,
31};
32
33macro_rules! write_times {
34    //write_i64
35    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt) => {
36        write_times!($fn_name, $nid, $has_time, $time, write_u64);
37    };
38    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt, $write_fn:tt) => {
39        fn $fn_name<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
40            let mut num = 0;
41            for entry in self.files.iter() {
42                if entry.$has_time {
43                    num += 1;
44                }
45            }
46            if num > 0 {
47                header.write_u8($nid)?;
48                let mut temp: Vec<u8> = Vec::with_capacity(128);
49                let mut out = &mut temp;
50                if num != self.files.len() {
51                    out.write_u8(0)?;
52                    let mut times = BitSet::with_capacity(self.files.len());
53                    for i in 0..self.files.len() {
54                        if self.files[i].$has_time {
55                            times.insert(i);
56                        }
57                    }
58                    write_bit_set(&mut out, &times)?;
59                } else {
60                    out.write_u8(1)?;
61                }
62                out.write_u8(0)?;
63                for file in self.files.iter() {
64                    if file.$has_time {
65                        out.$write_fn((file.$time).into())?;
66                    }
67                }
68                out.flush()?;
69                write_u64(header, temp.len() as u64)?;
70                header.write_all(&temp)?;
71            }
72            Ok(())
73        }
74    };
75}
76
77type Result<T> = std::result::Result<T, Error>;
78
79/// Writes a 7z archive file.
80pub struct ArchiveWriter<W: Write> {
81    output: W,
82    files: Vec<ArchiveEntry>,
83    content_methods: Arc<Vec<EncoderConfiguration>>,
84    pack_info: PackInfo,
85    unpack_info: UnpackInfo,
86    encrypt_header: bool,
87}
88
89#[cfg(not(target_arch = "wasm32"))]
90impl ArchiveWriter<File> {
91    /// Creates a file to write a 7z archive to.
92    pub fn create(path: impl AsRef<Path>) -> Result<Self> {
93        let file = File::create(path.as_ref())
94            .map_err(|e| Error::file_open(e, path.as_ref().to_string_lossy().to_string()))?;
95        Self::new(file)
96    }
97}
98
99impl<W: Write + Seek> ArchiveWriter<W> {
100    /// Prepares writer to write a 7z archive to.
101    pub fn new(mut writer: W) -> Result<Self> {
102        writer.seek(std::io::SeekFrom::Start(SIGNATURE_HEADER_SIZE))?;
103
104        Ok(Self {
105            output: writer,
106            files: Default::default(),
107            content_methods: Arc::new(vec![EncoderConfiguration::new(EncoderMethod::LZMA2)]),
108            pack_info: Default::default(),
109            unpack_info: Default::default(),
110            encrypt_header: true,
111        })
112    }
113
114    /// Returns a wrapper around `self` that will finish the stream on drop.
115    pub fn auto_finish(self) -> AutoFinisher<Self> {
116        AutoFinisher(Some(self))
117    }
118
119    /// Sets the default compression methods to use for entry data. Default is LZMA2.
120    pub fn set_content_methods(&mut self, content_methods: Vec<EncoderConfiguration>) -> &mut Self {
121        if content_methods.is_empty() {
122            return self;
123        }
124        self.content_methods = Arc::new(content_methods);
125        self
126    }
127
128    /// Whether to enable the encryption of the -header. Default is `true`.
129    pub fn set_encrypt_header(&mut self, enabled: bool) {
130        self.encrypt_header = enabled;
131    }
132
133    /// Non-solid compression - Adds an archive `entry` with data from `reader`.
134    ///
135    /// # Example
136    /// ```no_run
137    /// use std::{fs::File, path::Path};
138    ///
139    /// use sevenz_rust2::*;
140    /// let mut sz = ArchiveWriter::create("path/to/dest.7z").expect("create writer ok");
141    /// let src = Path::new("path/to/source.txt");
142    /// let name = "source.txt".to_string();
143    /// let entry = sz
144    ///     .push_archive_entry(
145    ///         ArchiveEntry::from_path(&src, name),
146    ///         Some(File::open(src).unwrap()),
147    ///     )
148    ///     .expect("ok");
149    /// let compressed_size = entry.compressed_size;
150    /// sz.finish().expect("done");
151    /// ```
152    pub fn push_archive_entry<R: Read>(
153        &mut self,
154        mut entry: ArchiveEntry,
155        reader: Option<R>,
156    ) -> Result<&ArchiveEntry> {
157        if !entry.is_directory {
158            if let Some(mut r) = reader {
159                let mut compressed_len = 0;
160                let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
161
162                let mut more_sizes: Vec<Rc<Cell<usize>>> =
163                    Vec::with_capacity(self.content_methods.len() - 1);
164
165                let (crc, size) = {
166                    let mut w = Self::create_writer(
167                        &self.content_methods,
168                        &mut compressed,
169                        &mut more_sizes,
170                    )?;
171                    let mut write_len = 0;
172                    let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
173                    let mut buf = [0u8; 4096];
174                    loop {
175                        match r.read(&mut buf) {
176                            Ok(n) => {
177                                if n == 0 {
178                                    break;
179                                }
180                                w.write_all(&buf[..n]).map_err(|e| {
181                                    Error::io_msg(e, format!("Encode entry:{}", entry.name()))
182                                })?;
183                            }
184                            Err(e) => {
185                                return Err(Error::io_msg(
186                                    e,
187                                    format!("Encode entry:{}", entry.name()),
188                                ));
189                            }
190                        }
191                    }
192                    w.flush()
193                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
194                    w.write(&[])
195                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
196
197                    (w.crc_value(), write_len)
198                };
199                let compressed_crc = compressed.crc_value();
200                entry.has_stream = true;
201                entry.size = size as u64;
202                entry.crc = crc as u64;
203                entry.has_crc = true;
204                entry.compressed_crc = compressed_crc as u64;
205                entry.compressed_size = compressed_len as u64;
206                self.pack_info
207                    .add_stream(compressed_len as u64, compressed_crc);
208
209                let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
210                sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
211                sizes.push(size as u64);
212
213                self.unpack_info
214                    .add(self.content_methods.clone(), sizes, crc);
215
216                self.files.push(entry);
217                return Ok(self.files.last().unwrap());
218            }
219        }
220        entry.has_stream = false;
221        entry.size = 0;
222        entry.compressed_size = 0;
223        entry.has_crc = false;
224        self.files.push(entry);
225        Ok(self.files.last().unwrap())
226    }
227
228    /// Solid compression - packs `entries` into one pack.
229    ///
230    /// # Panics
231    /// * If `entries`'s length not equals to `reader.reader_len()`
232    pub fn push_archive_entries<R: Read>(
233        &mut self,
234        entries: Vec<ArchiveEntry>,
235        reader: Vec<SourceReader<R>>,
236    ) -> Result<&mut Self> {
237        let mut entries = entries;
238        let mut r = SeqReader::new(reader);
239        assert_eq!(r.reader_len(), entries.len());
240        let mut compressed_len = 0;
241        let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
242        let content_methods = &self.content_methods;
243        let mut more_sizes: Vec<Rc<Cell<usize>>> = Vec::with_capacity(content_methods.len() - 1);
244
245        let (crc, size) = {
246            let mut w = Self::create_writer(content_methods, &mut compressed, &mut more_sizes)?;
247            let mut write_len = 0;
248            let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
249            let mut buf = [0u8; 4096];
250
251            fn entries_names(entries: &[ArchiveEntry]) -> String {
252                let mut names = String::with_capacity(512);
253                for ele in entries.iter() {
254                    names.push_str(&ele.name);
255                    names.push(';');
256                    if names.len() > 512 {
257                        break;
258                    }
259                }
260                names
261            }
262
263            loop {
264                match r.read(&mut buf) {
265                    Ok(n) => {
266                        if n == 0 {
267                            break;
268                        }
269                        w.write_all(&buf[..n]).map_err(|e| {
270                            Error::io_msg(e, format!("Encode entries:{}", entries_names(&entries)))
271                        })?;
272                    }
273                    Err(e) => {
274                        return Err(Error::io_msg(
275                            e,
276                            format!("Encode entries:{}", entries_names(&entries)),
277                        ));
278                    }
279                }
280            }
281            w.flush().map_err(|e| {
282                let mut names = String::with_capacity(512);
283                for ele in entries.iter() {
284                    names.push_str(&ele.name);
285                    names.push(';');
286                    if names.len() > 512 {
287                        break;
288                    }
289                }
290                Error::io_msg(e, format!("Encode entry:{names}"))
291            })?;
292            w.write(&[]).map_err(|e| {
293                Error::io_msg(e, format!("Encode entry:{}", entries_names(&entries)))
294            })?;
295
296            (w.crc_value(), write_len)
297        };
298        let compressed_crc = compressed.crc_value();
299        let mut sub_stream_crcs = Vec::with_capacity(entries.len());
300        let mut sub_stream_sizes = Vec::with_capacity(entries.len());
301        for i in 0..entries.len() {
302            let entry = &mut entries[i];
303            let ri = &r[i];
304            entry.crc = ri.crc_value() as u64;
305            entry.size = ri.read_count() as u64;
306            sub_stream_crcs.push(entry.crc as u32);
307            sub_stream_sizes.push(entry.size);
308            entry.has_crc = true;
309        }
310
311        self.pack_info
312            .add_stream(compressed_len as u64, compressed_crc);
313
314        let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
315        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
316        sizes.push(size as u64);
317
318        self.unpack_info.add_multiple(
319            content_methods.clone(),
320            sizes,
321            crc,
322            entries.len() as u64,
323            sub_stream_sizes,
324            sub_stream_crcs,
325        );
326
327        self.files.extend(entries);
328        Ok(self)
329    }
330
331    fn create_writer<'a, O: Write + 'a>(
332        methods: &[EncoderConfiguration],
333        out: O,
334        more_sized: &mut Vec<Rc<Cell<usize>>>,
335    ) -> Result<Box<dyn Write + 'a>> {
336        let mut encoder: Box<dyn Write> = Box::new(out);
337        let mut first = true;
338        for mc in methods.iter() {
339            if !first {
340                let counting = CountingWriter::new(encoder);
341                more_sized.push(counting.counting());
342                encoder = Box::new(encoder::add_encoder(counting, mc)?);
343            } else {
344                let counting = CountingWriter::new(encoder);
345                encoder = Box::new(encoder::add_encoder(counting, mc)?);
346            }
347            first = false;
348        }
349        Ok(encoder)
350    }
351
352    /// Finishes the compression.
353    pub fn finish(mut self) -> std::io::Result<W> {
354        let mut header: Vec<u8> = Vec::with_capacity(64 * 1024);
355        self.write_encoded_header(&mut header)?;
356        let header_pos = self.output.stream_position()?;
357        self.output.write_all(&header)?;
358        let crc32 = crc32fast::hash(&header);
359        let mut hh = [0u8; SIGNATURE_HEADER_SIZE as usize];
360        {
361            let mut hhw = hh.as_mut_slice();
362            //sig
363            hhw.write_all(SEVEN_Z_SIGNATURE)?;
364            //version
365            hhw.write_u8(0)?;
366            hhw.write_u8(4)?;
367            //placeholder for crc: index = 8
368            hhw.write_u32(0)?;
369
370            // start header
371            hhw.write_u64(header_pos - SIGNATURE_HEADER_SIZE)?;
372            hhw.write_u64(0xFFFFFFFF & header.len() as u64)?;
373            hhw.write_u32(crc32)?;
374        }
375        let crc32 = crc32fast::hash(&hh[12..]);
376        hh[8..12].copy_from_slice(&crc32.to_le_bytes());
377
378        self.output.seek(std::io::SeekFrom::Start(0))?;
379        self.output.write_all(&hh)?;
380        self.output.flush()?;
381        Ok(self.output)
382    }
383
384    fn write_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
385        header.write_u8(K_HEADER)?;
386        header.write_u8(K_MAIN_STREAMS_INFO)?;
387        self.write_streams_info(header)?;
388        self.write_files_info(header)?;
389        header.write_u8(K_END)?;
390        Ok(())
391    }
392
393    fn write_encoded_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
394        let mut raw_header = Vec::with_capacity(64 * 1024);
395        self.write_header(&mut raw_header)?;
396        let mut pack_info = PackInfo::default();
397
398        let position = self.output.stream_position()?;
399        let pos = position - SIGNATURE_HEADER_SIZE;
400        pack_info.pos = pos;
401
402        let mut more_sizes = vec![];
403        let size = raw_header.len() as u64;
404        let crc32 = crc32fast::hash(&raw_header);
405        let mut methods = vec![];
406
407        if self.encrypt_header {
408            for conf in self.content_methods.iter() {
409                if conf.method.id() == EncoderMethod::AES256_SHA256.id() {
410                    methods.push(conf.clone());
411                    break;
412                }
413            }
414        }
415
416        methods.push(EncoderConfiguration::new(EncoderMethod::LZMA));
417
418        let methods = Arc::new(methods);
419
420        let mut encoded_data = Vec::with_capacity(size as usize / 2);
421
422        let mut compress_size = 0;
423        let mut compressed = CompressWrapWriter::new(&mut encoded_data, &mut compress_size);
424        {
425            let mut encoder = Self::create_writer(&methods, &mut compressed, &mut more_sizes)
426                .map_err(std::io::Error::other)?;
427            encoder.write_all(&raw_header)?;
428            encoder.flush()?;
429            let _ = encoder.write(&[])?;
430        }
431
432        let compress_crc = compressed.crc_value();
433        let compress_size = *compressed.bytes_written;
434        if compress_size as u64 + 20 >= size {
435            // compression made it worse. Write raw data
436            header.write_all(&raw_header)?;
437            return Ok(());
438        }
439        self.output.write_all(&encoded_data[..compress_size])?;
440
441        pack_info.add_stream(compress_size as u64, compress_crc);
442
443        let mut unpack_info = UnpackInfo::default();
444        let mut sizes = Vec::with_capacity(1 + more_sizes.len());
445        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
446        sizes.push(size);
447        unpack_info.add(methods, sizes, crc32);
448
449        header.write_u8(K_ENCODED_HEADER)?;
450
451        pack_info.write_to(header)?;
452        unpack_info.write_to(header)?;
453        unpack_info.write_substreams(header)?;
454
455        header.write_u8(K_END)?;
456
457        Ok(())
458    }
459
460    fn write_streams_info<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
461        if self.pack_info.len() > 0 {
462            self.pack_info.write_to(header)?;
463            self.unpack_info.write_to(header)?;
464        }
465        self.unpack_info.write_substreams(header)?;
466
467        header.write_u8(K_END)?;
468        Ok(())
469    }
470
471    fn write_files_info<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
472        header.write_u8(K_FILES_INFO)?;
473        write_u64(header, self.files.len() as u64)?;
474        self.write_file_empty_streams(header)?;
475        self.write_file_empty_files(header)?;
476        self.write_file_anti_items(header)?;
477        self.write_file_names(header)?;
478        self.write_file_ctimes(header)?;
479        self.write_file_atimes(header)?;
480        self.write_file_mtimes(header)?;
481        self.write_file_windows_attrs(header)?;
482        header.write_u8(K_END)?;
483        Ok(())
484    }
485
486    fn write_file_empty_streams<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
487        let mut has_empty = false;
488        for entry in self.files.iter() {
489            if !entry.has_stream {
490                has_empty = true;
491                break;
492            }
493        }
494        if has_empty {
495            header.write_u8(K_EMPTY_STREAM)?;
496            let mut bitset = BitSet::with_capacity(self.files.len());
497            for (i, entry) in self.files.iter().enumerate() {
498                if !entry.has_stream {
499                    bitset.insert(i);
500                }
501            }
502            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
503            write_bit_set(&mut temp, &bitset)?;
504            write_u64(header, temp.len() as u64)?;
505            header.write_all(temp.as_slice())?;
506        }
507        Ok(())
508    }
509
510    fn write_file_empty_files<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
511        let mut has_empty = false;
512        let mut empty_stream_counter = 0;
513        let mut bitset = BitSet::new();
514        for entry in self.files.iter() {
515            if !entry.has_stream {
516                let is_dir = entry.is_directory();
517                has_empty |= !is_dir;
518                if !is_dir {
519                    bitset.insert(empty_stream_counter);
520                }
521                empty_stream_counter += 1;
522            }
523        }
524        if has_empty {
525            header.write_u8(K_EMPTY_FILE)?;
526
527            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
528            write_bit_set(&mut temp, &bitset)?;
529            write_u64(header, temp.len() as u64)?;
530            header.write_all(&temp)?;
531        }
532        Ok(())
533    }
534
535    fn write_file_anti_items<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
536        let mut has_anti = false;
537        let mut counter = 0;
538        let mut bitset = BitSet::new();
539        for entry in self.files.iter() {
540            if !entry.has_stream {
541                let is_anti = entry.is_anti_item();
542                has_anti |= !is_anti;
543                if !is_anti {
544                    bitset.insert(counter);
545                }
546                counter += 1;
547            }
548        }
549        if has_anti {
550            header.write_u8(K_ANTI)?;
551
552            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
553            write_bit_set(&mut temp, &bitset)?;
554            write_u64(header, temp.len() as u64)?;
555            header.write_all(temp.as_slice())?;
556        }
557        Ok(())
558    }
559
560    fn write_file_names<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
561        header.write_u8(K_NAME)?;
562        let mut temp: Vec<u8> = Vec::with_capacity(128);
563        let out = &mut temp;
564        out.write_u8(0)?;
565        for file in self.files.iter() {
566            for c in file.name().encode_utf16() {
567                let buf = c.to_le_bytes();
568                out.write_all(&buf)?;
569            }
570            out.write_all(&[0u8; 2])?;
571        }
572        write_u64(header, temp.len() as u64)?;
573        header.write_all(temp.as_slice())?;
574        Ok(())
575    }
576
577    write_times!(
578        write_file_ctimes,
579        K_C_TIME,
580        has_creation_date,
581        creation_date
582    );
583    write_times!(write_file_atimes, K_A_TIME, has_access_date, access_date);
584    write_times!(
585        write_file_mtimes,
586        K_M_TIME,
587        has_last_modified_date,
588        last_modified_date
589    );
590    write_times!(
591        write_file_windows_attrs,
592        K_WIN_ATTRIBUTES,
593        has_windows_attributes,
594        windows_attributes,
595        write_u32
596    );
597}
598
599impl<W: Write + Seek> AutoFinish for ArchiveWriter<W> {
600    fn finish_ignore_error(self) {
601        let _ = self.finish();
602    }
603}
604
605pub(crate) fn write_u64<W: Write>(header: &mut W, mut value: u64) -> std::io::Result<()> {
606    let mut first = 0;
607    let mut mask = 0x80;
608    let mut i = 0;
609    while i < 8 {
610        if value < (1u64 << (7 * (i + 1))) {
611            first |= value >> (8 * i);
612            break;
613        }
614        first |= mask;
615        mask >>= 1;
616        i += 1;
617    }
618    header.write_u8((first & 0xFF) as u8)?;
619    while i > 0 {
620        header.write_u8((value & 0xFF) as u8)?;
621        value >>= 8;
622        i -= 1;
623    }
624    Ok(())
625}
626
627struct CompressWrapWriter<'a, W> {
628    writer: W,
629    crc: Hasher,
630    cache: Vec<u8>,
631    bytes_written: &'a mut usize,
632}
633
634impl<'a, W: Write> CompressWrapWriter<'a, W> {
635    pub fn new(writer: W, bytes_written: &'a mut usize) -> Self {
636        Self {
637            writer,
638            crc: Hasher::new(),
639            cache: Vec::with_capacity(8192),
640            bytes_written,
641        }
642    }
643
644    pub fn crc_value(&mut self) -> u32 {
645        let crc = std::mem::replace(&mut self.crc, Hasher::new());
646        crc.finalize()
647    }
648}
649
650impl<W: Write> Write for CompressWrapWriter<'_, W> {
651    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
652        self.cache.resize(buf.len(), Default::default());
653        let len = self.writer.write(buf)?;
654        self.crc.update(&buf[..len]);
655        *self.bytes_written += len;
656        Ok(len)
657    }
658
659    fn flush(&mut self) -> std::io::Result<()> {
660        self.writer.flush()
661    }
662}