Skip to main content

sevenz_rust2/
writer.rs

1mod counting_writer;
2#[cfg(all(feature = "util", not(target_arch = "wasm32")))]
3mod lazy_file_reader;
4mod pack_info;
5mod seq_reader;
6mod source_reader;
7mod unpack_info;
8
9use std::{
10    cell::Cell,
11    io::{Read, Seek, Write},
12    rc::Rc,
13    sync::Arc,
14};
15#[cfg(not(target_arch = "wasm32"))]
16use std::{fs::File, path::Path};
17
18pub(crate) use counting_writer::CountingWriter;
19use crc32fast::Hasher;
20
21#[cfg(all(feature = "util", not(target_arch = "wasm32")))]
22pub(crate) use self::lazy_file_reader::LazyFileReader;
23pub(crate) use self::seq_reader::SeqReader;
24pub use self::source_reader::SourceReader;
25use self::{pack_info::PackInfo, unpack_info::UnpackInfo};
26use crate::{
27    ArchiveEntry, AutoFinish, AutoFinisher, ByteWriter, Error,
28    archive::*,
29    bitset::{BitSet, write_bit_set},
30    encoder,
31};
32
33macro_rules! write_times {
34    //write_i64
35    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt) => {
36        write_times!($fn_name, $nid, $has_time, $time, write_u64);
37    };
38    ($fn_name:tt, $nid:expr, $has_time:tt, $time:tt, $write_fn:tt) => {
39        fn $fn_name<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
40            let mut num = 0;
41            for entry in self.files.iter() {
42                if entry.$has_time {
43                    num += 1;
44                }
45            }
46            if num > 0 {
47                header.write_u8($nid)?;
48                let mut temp: Vec<u8> = Vec::with_capacity(128);
49                let mut out = &mut temp;
50                if num != self.files.len() {
51                    out.write_u8(0)?;
52                    let mut times = BitSet::with_capacity(self.files.len());
53                    for i in 0..self.files.len() {
54                        if self.files[i].$has_time {
55                            times.insert(i);
56                        }
57                    }
58                    write_bit_set(&mut out, &times)?;
59                } else {
60                    out.write_u8(1)?;
61                }
62                out.write_u8(0)?;
63                for file in self.files.iter() {
64                    if file.$has_time {
65                        out.$write_fn((file.$time).into())?;
66                    }
67                }
68                out.flush()?;
69                write_u64(header, temp.len() as u64)?;
70                header.write_all(&temp)?;
71            }
72            Ok(())
73        }
74    };
75}
76
77type Result<T> = std::result::Result<T, Error>;
78
79/// Writes a 7z archive file.
80pub struct ArchiveWriter<W: Write> {
81    output: W,
82    files: Vec<ArchiveEntry>,
83    content_methods: Arc<Vec<EncoderConfiguration>>,
84    pack_info: PackInfo,
85    unpack_info: UnpackInfo,
86    encrypt_header: bool,
87}
88
89#[cfg(not(target_arch = "wasm32"))]
90impl ArchiveWriter<File> {
91    /// Creates a file to write a 7z archive to.
92    pub fn create(path: impl AsRef<Path>) -> Result<Self> {
93        let file = File::create(path.as_ref())
94            .map_err(|e| Error::file_open(e, path.as_ref().to_string_lossy().to_string()))?;
95        Self::new(file)
96    }
97}
98
99impl<W: Write + Seek> ArchiveWriter<W> {
100    /// Prepares writer to write a 7z archive to.
101    pub fn new(mut writer: W) -> Result<Self> {
102        writer.seek(std::io::SeekFrom::Start(SIGNATURE_HEADER_SIZE))?;
103
104        Ok(Self {
105            output: writer,
106            files: Default::default(),
107            content_methods: Arc::new(vec![EncoderConfiguration::new(EncoderMethod::LZMA2)]),
108            pack_info: Default::default(),
109            unpack_info: Default::default(),
110            encrypt_header: true,
111        })
112    }
113
114    /// Returns a wrapper around `self` that will finish the stream on drop.
115    pub fn auto_finish(self) -> AutoFinisher<Self> {
116        AutoFinisher(Some(self))
117    }
118
119    /// Sets the default compression methods to use for entry data. Default is LZMA2.
120    pub fn set_content_methods(&mut self, content_methods: Vec<EncoderConfiguration>) -> &mut Self {
121        if content_methods.is_empty() {
122            return self;
123        }
124        self.content_methods = Arc::new(content_methods);
125        self
126    }
127
128    /// Whether to enable the encryption of the -header. Default is `true`.
129    pub fn set_encrypt_header(&mut self, enabled: bool) {
130        self.encrypt_header = enabled;
131    }
132
133    /// Non-solid compression - Adds an archive `entry` with data from `reader`.
134    ///
135    /// # Example
136    /// ```no_run
137    /// use std::{fs::File, path::Path};
138    ///
139    /// use sevenz_rust2::*;
140    /// let mut sz = ArchiveWriter::create("path/to/dest.7z").expect("create writer ok");
141    /// let src = Path::new("path/to/source.txt");
142    /// let name = "source.txt".to_string();
143    /// let entry = sz
144    ///     .push_archive_entry(
145    ///         ArchiveEntry::from_path(&src, name),
146    ///         Some(File::open(src).unwrap()),
147    ///     )
148    ///     .expect("ok");
149    /// let compressed_size = entry.compressed_size;
150    /// sz.finish().expect("done");
151    /// ```
152    pub fn push_archive_entry<R: Read>(
153        &mut self,
154        mut entry: ArchiveEntry,
155        reader: Option<R>,
156    ) -> Result<&ArchiveEntry> {
157        if !entry.is_directory {
158            if let Some(mut r) = reader {
159                let mut compressed_len = 0;
160                let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
161
162                let mut more_sizes: Vec<Rc<Cell<usize>>> =
163                    Vec::with_capacity(self.content_methods.len() - 1);
164
165                let (crc, size) = {
166                    let mut w = Self::create_writer(
167                        &self.content_methods,
168                        &mut compressed,
169                        &mut more_sizes,
170                    )?;
171                    let mut write_len = 0;
172                    let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
173                    let mut buf = [0u8; 4096];
174                    loop {
175                        match r.read(&mut buf) {
176                            Ok(n) => {
177                                if n == 0 {
178                                    break;
179                                }
180                                w.write_all(&buf[..n]).map_err(|e| {
181                                    Error::io_msg(e, format!("Encode entry:{}", entry.name()))
182                                })?;
183                            }
184                            Err(e) => {
185                                return Err(Error::io_msg(
186                                    e,
187                                    format!("Encode entry:{}", entry.name()),
188                                ));
189                            }
190                        }
191                    }
192                    w.flush()
193                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
194                    w.write(&[])
195                        .map_err(|e| Error::io_msg(e, format!("Encode entry:{}", entry.name())))?;
196
197                    (w.crc_value(), write_len)
198                };
199                let compressed_crc = compressed.crc_value();
200                entry.has_stream = true;
201                entry.size = size as u64;
202                entry.crc = crc as u64;
203                entry.has_crc = true;
204                entry.compressed_crc = compressed_crc as u64;
205                entry.compressed_size = compressed_len as u64;
206                self.pack_info
207                    .add_stream(compressed_len as u64, compressed_crc);
208
209                let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
210                sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
211                sizes.push(size as u64);
212
213                self.unpack_info
214                    .add(self.content_methods.clone(), sizes, crc);
215
216                self.files.push(entry);
217                return Ok(self.files.last().unwrap());
218            }
219        }
220        entry.has_stream = false;
221        entry.size = 0;
222        entry.compressed_size = 0;
223        entry.has_crc = false;
224        self.files.push(entry);
225        Ok(self.files.last().unwrap())
226    }
227
228    /// Solid compression - packs `entries` into one pack.
229    ///
230    /// # Panics
231    /// * If `entries`'s length not equals to `reader.reader_len()`
232    pub fn push_archive_entries<R: Read>(
233        &mut self,
234        entries: Vec<ArchiveEntry>,
235        reader: Vec<SourceReader<R>>,
236    ) -> Result<&mut Self> {
237        let mut entries = entries;
238        let mut r = SeqReader::new(reader);
239        assert_eq!(r.reader_len(), entries.len());
240        let mut compressed_len = 0;
241        let mut compressed = CompressWrapWriter::new(&mut self.output, &mut compressed_len);
242        let content_methods = &self.content_methods;
243        let mut more_sizes: Vec<Rc<Cell<usize>>> = Vec::with_capacity(content_methods.len() - 1);
244
245        let (crc, size) = {
246            let mut w = Self::create_writer(content_methods, &mut compressed, &mut more_sizes)?;
247            let mut write_len = 0;
248            let mut w = CompressWrapWriter::new(&mut w, &mut write_len);
249            let mut buf = [0u8; 4096];
250
251            fn entries_names(entries: &[ArchiveEntry]) -> String {
252                let mut names = String::with_capacity(512);
253                for ele in entries.iter() {
254                    names.push_str(&ele.name);
255                    names.push(';');
256                    if names.len() > 512 {
257                        break;
258                    }
259                }
260                names
261            }
262
263            loop {
264                match r.read(&mut buf) {
265                    Ok(n) => {
266                        if n == 0 {
267                            break;
268                        }
269                        w.write_all(&buf[..n]).map_err(|e| {
270                            Error::io_msg(e, format!("Encode entries:{}", entries_names(&entries)))
271                        })?;
272                    }
273                    Err(e) => {
274                        return Err(Error::io_msg(
275                            e,
276                            format!("Encode entries:{}", entries_names(&entries)),
277                        ));
278                    }
279                }
280            }
281            w.flush().map_err(|e| {
282                let mut names = String::with_capacity(512);
283                for ele in entries.iter() {
284                    names.push_str(&ele.name);
285                    names.push(';');
286                    if names.len() > 512 {
287                        break;
288                    }
289                }
290                Error::io_msg(e, format!("Encode entry:{names}"))
291            })?;
292            w.write(&[]).map_err(|e| {
293                Error::io_msg(e, format!("Encode entry:{}", entries_names(&entries)))
294            })?;
295
296            (w.crc_value(), write_len)
297        };
298        let compressed_crc = compressed.crc_value();
299        let mut sub_stream_crcs = Vec::with_capacity(entries.len());
300        let mut sub_stream_sizes = Vec::with_capacity(entries.len());
301        for i in 0..entries.len() {
302            let entry = &mut entries[i];
303            let ri = &r[i];
304            entry.crc = ri.crc_value() as u64;
305            entry.size = ri.read_count() as u64;
306            sub_stream_crcs.push(entry.crc as u32);
307            sub_stream_sizes.push(entry.size);
308            entry.has_crc = true;
309        }
310
311        self.pack_info
312            .add_stream(compressed_len as u64, compressed_crc);
313
314        let mut sizes = Vec::with_capacity(more_sizes.len() + 1);
315        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
316        sizes.push(size as u64);
317
318        self.unpack_info.add_multiple(
319            content_methods.clone(),
320            sizes,
321            crc,
322            entries.len() as u64,
323            sub_stream_sizes,
324            sub_stream_crcs,
325        );
326
327        self.files.extend(entries);
328        Ok(self)
329    }
330
331    fn create_writer<'a, O: Write + 'a>(
332        methods: &[EncoderConfiguration],
333        out: O,
334        more_sized: &mut Vec<Rc<Cell<usize>>>,
335    ) -> Result<Box<dyn Write + 'a>> {
336        let mut encoder: Box<dyn Write> = Box::new(out);
337        let mut first = true;
338        for mc in methods.iter() {
339            if !first {
340                let counting = CountingWriter::new(encoder);
341                more_sized.push(counting.counting());
342                encoder = Box::new(encoder::add_encoder(counting, mc)?);
343            } else {
344                let counting = CountingWriter::new(encoder);
345                encoder = Box::new(encoder::add_encoder(counting, mc)?);
346            }
347            first = false;
348        }
349        Ok(encoder)
350    }
351
352    /// Finishes the compression.
353    pub fn finish(mut self) -> std::io::Result<W> {
354        let mut header: Vec<u8> = Vec::with_capacity(64 * 1024);
355        self.write_encoded_header(&mut header)?;
356        let header_pos = self.output.stream_position()?;
357        self.output.write_all(&header)?;
358        let crc32 = crc32fast::hash(&header);
359        let mut hh = [0u8; SIGNATURE_HEADER_SIZE as usize];
360        {
361            let mut hhw = hh.as_mut_slice();
362            //sig
363            hhw.write_all(SEVEN_Z_SIGNATURE)?;
364            //version
365            hhw.write_u8(0)?;
366            hhw.write_u8(4)?;
367            //placeholder for crc: index = 8
368            hhw.write_u32(0)?;
369
370            // start header
371            hhw.write_u64(header_pos - SIGNATURE_HEADER_SIZE)?;
372            hhw.write_u64(0xFFFFFFFF & header.len() as u64)?;
373            hhw.write_u32(crc32)?;
374        }
375        let crc32 = crc32fast::hash(&hh[12..]);
376        hh[8..12].copy_from_slice(&crc32.to_le_bytes());
377
378        self.output.seek(std::io::SeekFrom::Start(0))?;
379        self.output.write_all(&hh)?;
380        self.output.flush()?;
381        Ok(self.output)
382    }
383
384    fn write_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
385        header.write_u8(K_HEADER)?;
386        header.write_u8(K_MAIN_STREAMS_INFO)?;
387        self.write_streams_info(header)?;
388        self.write_files_info(header)?;
389        header.write_u8(K_END)?;
390        Ok(())
391    }
392
393    fn write_encoded_header<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
394        let mut raw_header = Vec::with_capacity(64 * 1024);
395        self.write_header(&mut raw_header)?;
396        let mut pack_info = PackInfo::default();
397
398        let position = self.output.stream_position()?;
399        let pos = position - SIGNATURE_HEADER_SIZE;
400        pack_info.pos = pos;
401
402        let mut more_sizes = vec![];
403        let size = raw_header.len() as u64;
404        let crc32 = crc32fast::hash(&raw_header);
405        let mut methods = vec![];
406
407        let mut must_encrypt_header = false;
408
409        if self.encrypt_header {
410            for conf in self.content_methods.iter() {
411                if conf.method.id() == EncoderMethod::AES256_SHA256.id() {
412                    methods.push(conf.clone());
413                    must_encrypt_header = true;
414                    break;
415                }
416            }
417        }
418
419        methods.push(EncoderConfiguration::new(EncoderMethod::LZMA));
420
421        let methods = Arc::new(methods);
422
423        let mut encoded_data = Vec::with_capacity(size as usize / 2);
424
425        let mut compress_size = 0;
426        let mut compressed = CompressWrapWriter::new(&mut encoded_data, &mut compress_size);
427        {
428            let mut encoder = Self::create_writer(&methods, &mut compressed, &mut more_sizes)
429                .map_err(std::io::Error::other)?;
430            encoder.write_all(&raw_header)?;
431            encoder.flush()?;
432            let _ = encoder.write(&[])?;
433        }
434
435        let compress_crc = compressed.crc_value();
436        let compress_size = *compressed.bytes_written;
437
438        if !must_encrypt_header && compress_size as u64 + 20 >= size {
439            // We have an unencrypted header and the compression made increased the data size,
440            // so we write the raw header data without compressing it to save space.
441            header.write_all(&raw_header)?;
442            return Ok(());
443        }
444        self.output.write_all(&encoded_data[..compress_size])?;
445
446        pack_info.add_stream(compress_size as u64, compress_crc);
447
448        let mut unpack_info = UnpackInfo::default();
449        let mut sizes = Vec::with_capacity(1 + more_sizes.len());
450        sizes.extend(more_sizes.iter().map(|s| s.get() as u64));
451        sizes.push(size);
452        unpack_info.add(methods, sizes, crc32);
453
454        header.write_u8(K_ENCODED_HEADER)?;
455
456        pack_info.write_to(header)?;
457        unpack_info.write_to(header)?;
458        unpack_info.write_substreams(header)?;
459
460        header.write_u8(K_END)?;
461
462        Ok(())
463    }
464
465    fn write_streams_info<H: Write>(&mut self, header: &mut H) -> std::io::Result<()> {
466        if self.pack_info.len() > 0 {
467            self.pack_info.write_to(header)?;
468            self.unpack_info.write_to(header)?;
469        }
470        self.unpack_info.write_substreams(header)?;
471
472        header.write_u8(K_END)?;
473        Ok(())
474    }
475
476    fn write_files_info<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
477        header.write_u8(K_FILES_INFO)?;
478        write_u64(header, self.files.len() as u64)?;
479        self.write_file_empty_streams(header)?;
480        self.write_file_empty_files(header)?;
481        self.write_file_anti_items(header)?;
482        self.write_file_names(header)?;
483        self.write_file_ctimes(header)?;
484        self.write_file_atimes(header)?;
485        self.write_file_mtimes(header)?;
486        self.write_file_windows_attrs(header)?;
487        header.write_u8(K_END)?;
488        Ok(())
489    }
490
491    fn write_file_empty_streams<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
492        let mut has_empty = false;
493        for entry in self.files.iter() {
494            if !entry.has_stream {
495                has_empty = true;
496                break;
497            }
498        }
499        if has_empty {
500            header.write_u8(K_EMPTY_STREAM)?;
501            let mut bitset = BitSet::with_capacity(self.files.len());
502            for (i, entry) in self.files.iter().enumerate() {
503                if !entry.has_stream {
504                    bitset.insert(i);
505                }
506            }
507            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
508            write_bit_set(&mut temp, &bitset)?;
509            write_u64(header, temp.len() as u64)?;
510            header.write_all(temp.as_slice())?;
511        }
512        Ok(())
513    }
514
515    fn write_file_empty_files<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
516        let mut has_empty = false;
517        let mut empty_stream_counter = 0;
518        let mut bitset = BitSet::new();
519        for entry in self.files.iter() {
520            if !entry.has_stream {
521                let is_dir = entry.is_directory();
522                has_empty |= !is_dir;
523                if !is_dir {
524                    bitset.insert(empty_stream_counter);
525                }
526                empty_stream_counter += 1;
527            }
528        }
529        if has_empty {
530            header.write_u8(K_EMPTY_FILE)?;
531
532            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
533            write_bit_set(&mut temp, &bitset)?;
534            write_u64(header, temp.len() as u64)?;
535            header.write_all(&temp)?;
536        }
537        Ok(())
538    }
539
540    fn write_file_anti_items<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
541        let mut has_anti = false;
542        let mut counter = 0;
543        let mut bitset = BitSet::new();
544        for entry in self.files.iter() {
545            if !entry.has_stream {
546                let is_anti = entry.is_anti_item();
547                has_anti |= !is_anti;
548                if !is_anti {
549                    bitset.insert(counter);
550                }
551                counter += 1;
552            }
553        }
554        if has_anti {
555            header.write_u8(K_ANTI)?;
556
557            let mut temp: Vec<u8> = Vec::with_capacity(bitset.len() / 8 + 1);
558            write_bit_set(&mut temp, &bitset)?;
559            write_u64(header, temp.len() as u64)?;
560            header.write_all(temp.as_slice())?;
561        }
562        Ok(())
563    }
564
565    fn write_file_names<H: Write>(&self, header: &mut H) -> std::io::Result<()> {
566        header.write_u8(K_NAME)?;
567        let mut temp: Vec<u8> = Vec::with_capacity(128);
568        let out = &mut temp;
569        out.write_u8(0)?;
570        for file in self.files.iter() {
571            for c in file.name().encode_utf16() {
572                let buf = c.to_le_bytes();
573                out.write_all(&buf)?;
574            }
575            out.write_all(&[0u8; 2])?;
576        }
577        write_u64(header, temp.len() as u64)?;
578        header.write_all(temp.as_slice())?;
579        Ok(())
580    }
581
582    write_times!(
583        write_file_ctimes,
584        K_C_TIME,
585        has_creation_date,
586        creation_date
587    );
588    write_times!(write_file_atimes, K_A_TIME, has_access_date, access_date);
589    write_times!(
590        write_file_mtimes,
591        K_M_TIME,
592        has_last_modified_date,
593        last_modified_date
594    );
595    write_times!(
596        write_file_windows_attrs,
597        K_WIN_ATTRIBUTES,
598        has_windows_attributes,
599        windows_attributes,
600        write_u32
601    );
602}
603
604impl<W: Write + Seek> AutoFinish for ArchiveWriter<W> {
605    fn finish_ignore_error(self) {
606        let _ = self.finish();
607    }
608}
609
610pub(crate) fn write_u64<W: Write>(header: &mut W, mut value: u64) -> std::io::Result<()> {
611    let mut first = 0;
612    let mut mask = 0x80;
613    let mut i = 0;
614    while i < 8 {
615        if value < (1u64 << (7 * (i + 1))) {
616            first |= value >> (8 * i);
617            break;
618        }
619        first |= mask;
620        mask >>= 1;
621        i += 1;
622    }
623    header.write_u8((first & 0xFF) as u8)?;
624    while i > 0 {
625        header.write_u8((value & 0xFF) as u8)?;
626        value >>= 8;
627        i -= 1;
628    }
629    Ok(())
630}
631
632struct CompressWrapWriter<'a, W> {
633    writer: W,
634    crc: Hasher,
635    cache: Vec<u8>,
636    bytes_written: &'a mut usize,
637}
638
639impl<'a, W: Write> CompressWrapWriter<'a, W> {
640    pub fn new(writer: W, bytes_written: &'a mut usize) -> Self {
641        Self {
642            writer,
643            crc: Hasher::new(),
644            cache: Vec::with_capacity(8192),
645            bytes_written,
646        }
647    }
648
649    pub fn crc_value(&mut self) -> u32 {
650        let crc = std::mem::replace(&mut self.crc, Hasher::new());
651        crc.finalize()
652    }
653}
654
655impl<W: Write> Write for CompressWrapWriter<'_, W> {
656    fn write(&mut self, buf: &[u8]) -> std::io::Result<usize> {
657        self.cache.resize(buf.len(), Default::default());
658        let len = self.writer.write(buf)?;
659        self.crc.update(&buf[..len]);
660        *self.bytes_written += len;
661        Ok(len)
662    }
663
664    fn flush(&mut self) -> std::io::Result<()> {
665        self.writer.flush()
666    }
667}