sevenz_rust2/
writer.rs

1mod counting_writer;
2#[cfg(all(feature = "util", not(target_arch = "wasm32")))]
3mod lazy_file_reader;
4mod pack_info;
5mod seq_reader;
6mod source_reader;
7mod unpack_info;
8
9use std::{
10    cell::Cell,
11    io::{Read, Seek, Write},
12    rc::Rc,
13    sync::Arc,
14};
15#[cfg(not(target_arch = "wasm32"))]
16use std::{fs::File, path::Path};
17
18use byteorder::{LittleEndian, WriteBytesExt};
19pub(crate) use counting_writer::CountingWriter;
20use crc32fast::Hasher;
21
22#[cfg(all(feature = "util", not(target_arch = "wasm32")))]
23pub(crate) use self::lazy_file_reader::LazyFileReader;
24pub(crate) use self::seq_reader::SeqReader;
25pub use self::source_reader::SourceReader;
26use self::{pack_info::PackInfo, unpack_info::UnpackInfo};
27use crate::{
28    ArchiveEntry, Error,
29    archive::*,
30    bitset::{BitSet, write_bit_set},
31    encoder,
32};
33
34macro_rules! write_times {
35    //write_i64
36    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt) => {
37        write_times!($fn_name, $nid, $has_time, $time, write_u64);
38    };
39    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt, $write_fn:tt) => {
40        fn $fn_name<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
41            let mut num = 0;
42            for entry in self.files.iter() {
43                if entry.$has_time {
44                    num += 1;
45                }
46            }
47            if num > 0 {
48                header.write_u8($nid)?;
49                let mut temp: Vec<u8> = Vec::with_capacity(128);
50                let mut out = &mut temp;
51                if num != self.files.len() {
52                    out.write_u8(0)?;
53                    let mut times = BitSet::with_capacity(self.files.len());
54                    for i in 0..self.files.len() {
55                        if self.files[i].$has_time {
56                            times.insert(i);
57                        }
58                    }
59                    write_bit_set(&mut out, &times)?;
60                } else {
61                    out.write_u8(1)?;
62                }
63                out.write_u8(0)?;
64                for file in self.files.iter() {
65                    if file.$has_time {
66                        out.$write_fn::<LittleEndian>((file.$time).into())?;
67                    }
68                }
69                out.flush()?;
70                write_u64(header, temp.len() as u64)?;
71                header.write_all(&temp)?;
72            }
73            Ok(())
74        }
75    };
76}
77
78type Result<T> = std::result::Result<T, Error>;
79
80/// Writes a 7z archive file.
81#[cfg_attr(docsrs, doc(cfg(feature = "compress")))]
82pub struct ArchiveWriter<W: Write> {
83    output: W,
84    files: Vec<ArchiveEntry>,
85    content_methods: Arc<Vec<EncoderConfiguration>>,
86    pack_info: PackInfo,
87    unpack_info: UnpackInfo,
88    encrypt_header: bool,
89}
90
91#[cfg(not(target_arch = "wasm32"))]
92impl ArchiveWriter<File> {
93    /// Creates a file to write a 7z archive to.
94    pub fn create(path: impl AsRef<Path>) -> Result<Self> {
95        let file = File::create(path.as_ref())
96            .map_err(|e| Error::file_open(e, path.as_ref().to_string_lossy().to_string()))?;
97        Self::new(file)
98    }
99}
100
101impl<W: Write + Seek> ArchiveWriter<W> {
102    /// Prepares writer to write a 7z archive to.
103    pub fn new(mut writer: W) -> Result<Self> {
104        writer
105            .seek(std::io::SeekFrom::Start(SIGNATURE_HEADER_SIZE))
106            .map_err(Error::io)?;
107
108        Ok(Self {
109            output: writer,
110            files: Default::default(),
111            content_methods: Arc::new(vec![EncoderConfiguration::new(EncoderMethod::LZMA2)]),
112            pack_info: Default::default(),
113            unpack_info: Default::default(),
114            encrypt_header: true,
115        })
116    }
117
118    /// Sets the default compression methods to use for entry data. Default is LZMA2.
119    pub fn set_content_methods(&mut self, content_methods: Vec<EncoderConfiguration>) -> &mut Self {
120        if content_methods.is_empty() {
121            return self;
122        }
123        self.content_methods = Arc::new(content_methods);
124        self
125    }
126
127    /// Whether to enable the encryption of the -header. Default is `true`.
128    pub fn set_encrypt_header(&mut self, enabled: bool) {
129        self.encrypt_header = enabled;
130    }
131
132    /// Adds an archive `entry` with data from `reader`.
133    ///
134    /// # Example
135    /// ```no_run
136    /// use std::{fs::File, path::Path};
137    ///
138    /// use sevenz_rust2::*;
139    /// let mut sz = ArchiveWriter::create("path/to/dest.7z").expect("create writer ok");
140    /// let src = Path::new("path/to/source.txt");
141    /// let name = "source.txt".to_string();
142    /// let entry = sz
143    ///     .push_archive_entry(
144    ///         ArchiveEntry::from_path(&src, name),
145    ///         Some(File::open(src).unwrap()),
146    ///     )
147    ///     .expect("ok");
148    /// let compressed_size = entry.compressed_size;
149    /// sz.finish().expect("done");
150    /// ```
151    pub fn push_archive_entry<R: Read>(
152        &mut self,
153        mut entry: ArchiveEntry,
154        reader: Option<R>,
155    ) -> Result<&ArchiveEntry> {
156        if !entry.is_directory {
157            if let Some(mut r) = reader {
158                let mut compressed_len = 0;
159                let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
160
161                let mut more_sizes: Vec<Rc<Cell<usize>>> =
162                    Vec::with_capacity(self.content_methods.len() - 1);
163
164                let (crc, size) = {
165                    let mut w = Self::create_writer(
166                        &self.content_methods,
167                        &mut compressed,
168                        &mut more_sizes,
169                    )?;
170                    let mut write_len = 0;
171                    let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
172                    let mut buf = [0u8; 4096];
173                    loop {
174                        match r.read(&mut buf) {
175                            Ok(n) => {
176                                if n == 0 {
177                                    break;
178                                }
179                                w.write_all(&buf[..n]).map_err(|e| {
180                                    Error::io_msg(e, format!("Encode entry:{}", entry.name()))
181                                })?;
182                            }
183                            Err(e) => {
184                                return Err(Error::io_msg(
185                                    e,
186                                    format!("Encode entry:{}", entry.name()),
187                                ));
188                            }
189                        }
190                    }
191                    w.flush()
192                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
193                    w.write(&[])
194                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
195
196                    (w.crc_value(), write_len)
197                };
198                let compressed_crc = compressed.crc_value();
199                entry.has_stream = true;
200                entry.size = size as u64;
201                entry.crc = crc as u64;
202                entry.has_crc = true;
203                entry.compressed_crc = compressed_crc as u64;
204                entry.compressed_size = compressed_len as u64;
205                self.pack_info
206                    .add_stream(compressed_len as u64, compressed_crc);
207
208                let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
209                sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
210                sizes.push(size as u64);
211
212                self.unpack_info
213                    .add(self.content_methods.clone(), sizes, crc);
214
215                self.files.push(entry);
216                return Ok(self.files.last().unwrap());
217            }
218        }
219        entry.has_stream = false;
220        entry.size = 0;
221        entry.compressed_size = 0;
222        entry.has_crc = false;
223        self.files.push(entry);
224        Ok(self.files.last().unwrap())
225    }
226
227    /// Solid compression - packs `entries` into one pack.
228    ///
229    /// # Panics
230    /// * If `entries`'s length not equals to `reader.reader_len()`
231    pub fn push_archive_entries<R: Read>(
232        &mut self,
233        entries: Vec<ArchiveEntry>,
234        reader: Vec<SourceReader<R>>,
235    ) -> Result<&mut Self> {
236        let mut entries = entries;
237        let mut r = SeqReader::new(reader);
238        assert_eq!(r.reader_len(), entries.len());
239        let mut compressed_len = 0;
240        let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
241        let content_methods = &self.content_methods;
242        let mut more_sizes: Vec<Rc<Cell<usize>>> = Vec::with_capacity(content_methods.len() - 1);
243
244        let (crc, size) = {
245            let mut w = Self::create_writer(content_methods, &mut compressed, &mut more_sizes)?;
246            let mut write_len = 0;
247            let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
248            let mut buf = [0u8; 4096];
249
250            fn entries_names(entries: &[ArchiveEntry]) -> String {
251                let mut names = String::with_capacity(512);
252                for ele in entries.iter() {
253                    names.push_str(&ele.name);
254                    names.push(';');
255                    if names.len() > 512 {
256                        break;
257                    }
258                }
259                names
260            }
261
262            loop {
263                match r.read(&mut buf) {
264                    Ok(n) => {
265                        if n == 0 {
266                            break;
267                        }
268                        w.write_all(&buf[..n]).map_err(|e| {
269                            Error::io_msg(e, format!("Encode entries:{}", entries_names(&entries)))
270                        })?;
271                    }
272                    Err(e) => {
273                        return Err(Error::io_msg(
274                            e,
275                            format!("Encode entries:{}", entries_names(&entries)),
276                        ));
277                    }
278                }
279            }
280            w.flush().map_err(|e| {
281                let mut names = String::with_capacity(512);
282                for ele in entries.iter() {
283                    names.push_str(&ele.name);
284                    names.push(';');
285                    if names.len() > 512 {
286                        break;
287                    }
288                }
289                Error::io_msg(e, format!("Encode entry:{names}"))
290            })?;
291            w.write(&[]).map_err(|e| {
292                Error::io_msg(e, format!("Encode entry:{}", entries_names(&entries)))
293            })?;
294
295            (w.crc_value(), write_len)
296        };
297        let compressed_crc = compressed.crc_value();
298        let mut sub_stream_crcs = Vec::with_capacity(entries.len());
299        let mut sub_stream_sizes = Vec::with_capacity(entries.len());
300        for i in 0..entries.len() {
301            let entry = &mut entries[i];
302            let ri = &r[i];
303            entry.crc = ri.crc_value() as u64;
304            entry.size = ri.read_count() as u64;
305            sub_stream_crcs.push(entry.crc as u32);
306            sub_stream_sizes.push(entry.size);
307            entry.has_crc = true;
308        }
309
310        self.pack_info
311            .add_stream(compressed_len as u64, compressed_crc);
312
313        let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
314        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
315        sizes.push(size as u64);
316
317        self.unpack_info.add_multiple(
318            content_methods.clone(),
319            sizes,
320            crc,
321            entries.len() as u64,
322            sub_stream_sizes,
323            sub_stream_crcs,
324        );
325
326        self.files.extend(entries);
327        Ok(self)
328    }
329
330    fn create_writer<'a, O: Write + 'a>(
331        methods: &[EncoderConfiguration],
332        out: O,
333        more_sized: &mut Vec<Rc<Cell<usize>>>,
334    ) -> Result<Box<dyn Write + 'a>> {
335        let mut encoder: Box<dyn Write> = Box::new(out);
336        let mut first = true;
337        for mc in methods.iter() {
338            if !first {
339                let counting = CountingWriter::new(encoder);
340                more_sized.push(counting.counting());
341                encoder = Box::new(encoder::add_encoder(counting, mc)?);
342            } else {
343                let counting = CountingWriter::new(encoder);
344                encoder = Box::new(encoder::add_encoder(counting, mc)?);
345            }
346            first = false;
347        }
348        Ok(encoder)
349    }
350
351    /// Finishes the compression.
352    pub fn finish(mut self) -> std::io::Result<W> {
353        let mut header: Vec<u8> = Vec::with_capacity(64 * 1024);
354        self.write_encoded_header(&mut header)?;
355        let header_pos = self.output.stream_position()?;
356        self.output.write_all(&header)?;
357        let crc32 = crc32fast::hash(&header);
358        let mut hh = [0u8; SIGNATURE_HEADER_SIZE as usize];
359        {
360            let mut hhw = hh.as_mut_slice();
361            //sig
362            hhw.write_all(SEVEN_Z_SIGNATURE)?;
363            //version
364            hhw.write_u8(0)?;
365            hhw.write_u8(2)?;
366            //placeholder for crc: index = 8
367            hhw.write_u32::<LittleEndian>(0)?;
368
369            // start header
370            hhw.write_u64::<LittleEndian>(header_pos - SIGNATURE_HEADER_SIZE)?;
371            hhw.write_u64::<LittleEndian>(0xFFFFFFFF & header.len() as u64)?;
372            hhw.write_u32::<LittleEndian>(crc32)?;
373        }
374        let crc32 = crc32fast::hash(&hh[12..]);
375        hh[8..12].copy_from_slice(&crc32.to_le_bytes());
376
377        self.output.seek(std::io::SeekFrom::Start(0))?;
378        self.output.write_all(&hh)?;
379        self.output.flush()?;
380        Ok(self.output)
381    }
382
383    fn write_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
384        header.write_u8(K_HEADER)?;
385        header.write_u8(K_MAIN_STREAMS_INFO)?;
386        self.write_streams_info(header)?;
387        self.write_files_info(header)?;
388        header.write_u8(K_END)?;
389        Ok(())
390    }
391
392    fn write_encoded_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
393        let mut raw_header = Vec::with_capacity(64 * 1024);
394        self.write_header(&mut raw_header)?;
395        let mut pack_info = PackInfo::default();
396
397        let position = self.output.stream_position()?;
398        let pos = position - SIGNATURE_HEADER_SIZE;
399        pack_info.pos = pos;
400
401        let mut more_sizes = vec![];
402        let size = raw_header.len() as u64;
403        let crc32 = crc32fast::hash(&raw_header);
404        let mut methods = vec![];
405
406        if self.encrypt_header {
407            for conf in self.content_methods.iter() {
408                if conf.method.id() == EncoderMethod::AES256SHA256.id() {
409                    methods.push(conf.clone());
410                    break;
411                }
412            }
413        }
414
415        methods.push(EncoderConfiguration::new(EncoderMethod::LZMA));
416
417        let methods = Arc::new(methods);
418
419        let mut encoded_data = Vec::with_capacity(size as usize / 2);
420
421        let mut compress_size = 0;
422        let mut compressed = CompressWrapWriter::new(&mut encoded_data, &mut compress_size);
423        {
424            let mut encoder = Self::create_writer(&methods, &mut compressed, &mut more_sizes)
425                .map_err(std::io::Error::other)?;
426            encoder.write_all(&raw_header)?;
427            encoder.flush()?;
428            let _ = encoder.write(&[])?;
429        }
430
431        let compress_crc = compressed.crc_value();
432        let compress_size = *compressed.bytes_written;
433        if compress_size as u64 + 20 >= size {
434            // compression made it worse. Write raw data
435            header.write_all(&raw_header)?;
436            return Ok(());
437        }
438        self.output.write_all(&encoded_data[..compress_size])?;
439
440        pack_info.add_stream(compress_size as u64, compress_crc);
441
442        let mut unpack_info = UnpackInfo::default();
443        let mut sizes = Vec::with_capacity(1 + more_sizes.len());
444        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
445        sizes.push(size);
446        unpack_info.add(methods, sizes, crc32);
447
448        header.write_u8(K_ENCODED_HEADER)?;
449
450        pack_info.write_to(header)?;
451        unpack_info.write_to(header)?;
452        unpack_info.write_substreams(header)?;
453
454        header.write_u8(K_END)?;
455
456        Ok(())
457    }
458
459    fn write_streams_info<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
460        if self.pack_info.len() > 0 {
461            self.pack_info.write_to(header)?;
462            self.unpack_info.write_to(header)?;
463        }
464        self.unpack_info.write_substreams(header)?;
465
466        header.write_u8(K_END)?;
467        Ok(())
468    }
469
470    fn write_files_info<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
471        header.write_u8(K_FILES_INFO)?;
472        write_u64(header, self.files.len() as u64)?;
473        self.write_file_empty_streams(header)?;
474        self.write_file_empty_files(header)?;
475        self.write_file_anti_items(header)?;
476        self.write_file_names(header)?;
477        self.write_file_ctimes(header)?;
478        self.write_file_atimes(header)?;
479        self.write_file_mtimes(header)?;
480        self.write_file_windows_attrs(header)?;
481        header.write_u8(K_END)?;
482        Ok(())
483    }
484
485    fn write_file_empty_streams<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
486        let mut has_empty = false;
487        for entry in self.files.iter() {
488            if !entry.has_stream {
489                has_empty = true;
490                break;
491            }
492        }
493        if has_empty {
494            header.write_u8(K_EMPTY_STREAM)?;
495            let mut bitset = BitSet::with_capacity(self.files.len());
496            for (i, entry) in self.files.iter().enumerate() {
497                if !entry.has_stream {
498                    bitset.insert(i);
499                }
500            }
501            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
502            write_bit_set(&mut temp, &bitset)?;
503            write_u64(header, temp.len() as u64)?;
504            header.write_all(temp.as_slice())?;
505        }
506        Ok(())
507    }
508
509    fn write_file_empty_files<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
510        let mut has_empty = false;
511        let mut empty_stream_counter = 0;
512        let mut bitset = BitSet::new();
513        for entry in self.files.iter() {
514            if !entry.has_stream {
515                let is_dir = entry.is_directory();
516                has_empty |= !is_dir;
517                if !is_dir {
518                    bitset.insert(empty_stream_counter);
519                }
520                empty_stream_counter += 1;
521            }
522        }
523        if has_empty {
524            header.write_u8(K_EMPTY_FILE)?;
525
526            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
527            write_bit_set(&mut temp, &bitset)?;
528            write_u64(header, temp.len() as u64)?;
529            header.write_all(&temp)?;
530        }
531        Ok(())
532    }
533
534    fn write_file_anti_items<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
535        let mut has_anti = false;
536        let mut counter = 0;
537        let mut bitset = BitSet::new();
538        for entry in self.files.iter() {
539            if !entry.has_stream {
540                let is_anti = entry.is_anti_item();
541                has_anti |= !is_anti;
542                if !is_anti {
543                    bitset.insert(counter);
544                }
545                counter += 1;
546            }
547        }
548        if has_anti {
549            header.write_u8(K_ANTI)?;
550
551            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
552            write_bit_set(&mut temp, &bitset)?;
553            write_u64(header, temp.len() as u64)?;
554            header.write_all(temp.as_slice())?;
555        }
556        Ok(())
557    }
558
559    fn write_file_names<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
560        header.write_u8(K_NAME)?;
561        let mut temp: Vec<u8> = Vec::with_capacity(128);
562        let out = &mut temp;
563        out.write_u8(0)?;
564        for file in self.files.iter() {
565            for c in file.name().encode_utf16() {
566                let buf = c.to_le_bytes();
567                out.write_all(&buf)?;
568            }
569            out.write_all(&[0u8; 2])?;
570        }
571        write_u64(header, temp.len() as u64)?;
572        header.write_all(temp.as_slice())?;
573        Ok(())
574    }
575
576    write_times!(
577        write_file_ctimes,
578        K_C_TIME,
579        has_creation_date,
580        creation_date
581    );
582    write_times!(write_file_atimes, K_A_TIME, has_access_date, access_date);
583    write_times!(
584        write_file_mtimes,
585        K_M_TIME,
586        has_last_modified_date,
587        last_modified_date
588    );
589    write_times!(
590        write_file_windows_attrs,
591        K_WIN_ATTRIBUTES,
592        has_windows_attributes,
593        windows_attributes,
594        write_u32
595    );
596}
597
598pub(crate) fn write_u64<W: Write>(header: &mut W, mut value: u64) -> std::io::Result<()> {
599    let mut first = 0;
600    let mut mask = 0x80;
601    let mut i = 0;
602    while i < 8 {
603        if value < (1u64 << (7 * (i + 1))) {
604            first |= value >> (8 * i);
605            break;
606        }
607        first |= mask;
608        mask >>= 1;
609        i += 1;
610    }
611    header.write_u8((first & 0xFF) as u8)?;
612    while i > 0 {
613        header.write_u8((value & 0xFF) as u8)?;
614        value >>= 8;
615        i -= 1;
616    }
617    Ok(())
618}
619
620struct CompressWrapWriter<'a, W> {
621    writer: W,
622    crc: Hasher,
623    cache: Vec<u8>,
624    bytes_written: &'a mut usize,
625}
626
627impl<'a, W: Write> CompressWrapWriter<'a, W> {
628    pub fn new(writer: W, bytes_written: &'a mut usize) -> Self {
629        Self {
630            writer,
631            crc: Hasher::new(),
632            cache: Vec::with_capacity(8192),
633            bytes_written,
634        }
635    }
636
637    pub fn crc_value(&mut self) -> u32 {
638        let crc = std::mem::replace(&mut self.crc, Hasher::new());
639        crc.finalize()
640    }
641}
642
643impl<W: Write> Write for CompressWrapWriter<'_, W> {
644    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
645        self.cache.resize(buf.len(), Default::default());
646        let len = self.writer.write(buf)?;
647        self.crc.update(&buf[..len]);
648        *self.bytes_written += len;
649        Ok(len)
650    }
651
652    fn flush(&mut self) -> std::io::Result<()> {
653        self.writer.flush()
654    }
655}