Skip to main content

rar_stream/
rar_files_package.rs

1//! Multi-volume RAR archive parser.
2//!
3//! This module provides the main entry point for parsing RAR archives.
4//! The [`RarFilesPackage`] struct handles single and multi-volume archives,
5//! automatically stitching files that span multiple volumes.
6//!
7//! ## Quick Start
8//!
9//! ```rust,ignore
10//! use rar_stream::{RarFilesPackage, ParseOptions, LocalFileMedia, FileMedia};
11//! use std::sync::Arc;
12//!
13//! // Open a single RAR file
14//! let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new("archive.rar")?);
15//! let package = RarFilesPackage::new(vec![file]);
16//!
17//! // Parse with default options
18//! let files = package.parse(ParseOptions::default()).await?;
19//!
20//! // Read file content
21//! let content = files[0].read_to_end().await?;
22//! ```
23//!
24//! ## Multi-Volume Archives
25//!
26//! For split archives, provide all volumes in order:
27//!
28//! ```rust,ignore
29//! let volumes: Vec<Arc<dyn FileMedia>> = vec![
30//!     Arc::new(LocalFileMedia::new("archive.part1.rar")?),
31//!     Arc::new(LocalFileMedia::new("archive.part2.rar")?),
32//!     Arc::new(LocalFileMedia::new("archive.part3.rar")?),
33//! ];
34//! let package = RarFilesPackage::new(volumes);
35//! let files = package.parse(ParseOptions::default()).await?;
36//! ```
37//!
38//! ## Filtering Files
39//!
40//! Use [`ParseOptions`] to filter or limit results:
41//!
42//! ```rust,ignore
43//! let opts = ParseOptions {
44//!     // Only include .txt files
45//!     filter: Some(Box::new(|name, _index| name.ends_with(".txt"))),
46//!     // Limit to first 10 matches
47//!     max_files: Some(10),
48//!     ..Default::default()
49//! };
50//! let txt_files = package.parse(opts).await?;
51//! ```
52//!
53//! ## Encrypted Archives
54//!
55//! With the `crypto` feature enabled:
56//!
57//! ```rust,ignore
58//! let opts = ParseOptions {
59//!     password: Some("secret".to_string()),
60//!     ..Default::default()
61//! };
62//! let files = package.parse(opts).await?;
63//! ```
64//!
65//! ## Archive Information
66//!
67//! Get metadata about the archive without parsing all files:
68//!
69//! ```rust,ignore
70//! let info = package.get_archive_info().await?;
71//! println!("Format: {:?}", info.version);
72//! println!("Solid: {}", info.is_solid);
73//! println!("Has recovery: {}", info.has_recovery_record);
74//! ```
75
76use crate::error::{RarError, Result};
77use crate::file_media::{FileMedia, ReadInterval};
78use crate::inner_file::InnerFile;
79use crate::parsing::{
80    rar5::{Rar5ArchiveHeaderParser, Rar5EncryptionHeaderParser, Rar5FileHeaderParser},
81    ArchiveHeaderParser, FileHeaderParser, MarkerHeaderParser, RarVersion, TerminatorHeaderParser,
82};
83use crate::rar_file_chunk::RarFileChunk;
84use std::collections::HashMap;
85use std::sync::Arc;
86
87/// Archive metadata returned by [`RarFilesPackage::get_archive_info`].
88///
89/// Contains information about the archive format, flags, and capabilities.
90/// All fields are read from the archive header without decompressing any files.
91///
92/// # Example
93///
94/// ```rust,ignore
95/// let info = package.get_archive_info().await?;
96/// if info.has_encrypted_headers {
97///     println!("Archive requires password to list files");
98/// }
99/// if info.is_solid {
100///     println!("Solid archive: files must be extracted in order");
101/// }
102/// ```
103#[derive(Debug, Clone, Default, PartialEq, Eq)]
104pub struct ArchiveInfo {
105    /// Whether the archive has a recovery record for error correction.
106    ///
107    /// Recovery records allow repairing damaged archives using Reed-Solomon codes.
108    pub has_recovery_record: bool,
109
110    /// Whether the archive uses solid compression.
111    ///
112    /// In solid archives, files are compressed together as a single stream.
113    /// This improves compression ratio but requires extracting files in order.
114    pub is_solid: bool,
115
116    /// Whether the archive is locked (cannot be modified).
117    ///
118    /// Locked archives cannot have files added, deleted, or modified.
119    pub is_locked: bool,
120
121    /// Whether the archive is split across multiple volumes.
122    ///
123    /// Multi-volume archives have files that span multiple `.rar`/`.rXX` files.
124    pub is_multivolume: bool,
125
126    /// Whether file headers are encrypted (requires password to list files).
127    ///
128    /// Only RAR5 archives created with `rar -hp` have encrypted headers.
129    /// Without the password, even file names cannot be read.
130    pub has_encrypted_headers: bool,
131
132    /// RAR format version (RAR4 or RAR5).
133    pub version: RarVersion,
134}
135
136/// Options for parsing RAR archives.
137///
138/// Use this struct to customize parsing behavior, including filtering,
139/// limiting results, and providing passwords for encrypted archives.
140///
141/// # Example
142///
143/// ```rust,ignore
144/// let opts = ParseOptions {
145///     filter: Some(Box::new(|name, _| name.ends_with(".mp4"))),
146///     max_files: Some(100),
147///     #[cfg(feature = "crypto")]
148///     password: Some("secret".to_string()),
149/// };
150/// ```
151#[derive(Default)]
152pub struct ParseOptions {
153    /// Filter function: return `true` to include a file.
154    ///
155    /// The function receives the file name and its index (0-based).
156    /// Only files where the filter returns `true` are included in results.
157    pub filter: Option<Box<dyn Fn(&str, usize) -> bool + Send + Sync>>,
158
159    /// Maximum number of files to return.
160    ///
161    /// Parsing stops after this many files are found. Useful for previewing
162    /// large archives without parsing everything.
163    pub max_files: Option<usize>,
164
165    /// Password for encrypted archives.
166    ///
167    /// Required for archives with encrypted file data or headers.
168    /// If the password is wrong, [`RarError::DecryptionFailed`] is returned.
169    #[cfg(feature = "crypto")]
170    pub password: Option<String>,
171}
172
173/// Encryption info for a file.
174#[cfg(feature = "crypto")]
175#[derive(Debug, Clone, PartialEq, Eq)]
176pub enum FileEncryptionInfo {
177    /// RAR5 encryption (AES-256-CBC with PBKDF2)
178    Rar5 {
179        /// 16-byte salt for key derivation
180        salt: [u8; 16],
181        /// 16-byte initialization vector
182        init_v: [u8; 16],
183        /// Log2 of PBKDF2 iteration count
184        lg2_count: u8,
185    },
186    /// RAR4 encryption (AES-256-CBC with custom SHA-1 KDF)
187    Rar4 {
188        /// 8-byte salt for key derivation
189        salt: [u8; 8],
190    },
191}
192
193/// Parsed file chunk with metadata.
194struct ParsedChunk {
195    name: String,
196    chunk: RarFileChunk,
197    continues_in_next: bool,
198    unpacked_size: u64,
199    chunk_size: u64,
200    method: u8,
201    /// Dictionary size (log2), only for RAR5 compressed files
202    dict_size_log: u8,
203    rar_version: RarVersion,
204    /// Whether this file is part of a solid archive
205    is_solid: bool,
206    /// Encryption info (if encrypted)
207    #[cfg(feature = "crypto")]
208    encryption: Option<FileEncryptionInfo>,
209}
210
211/// Multi-volume RAR archive parser.
212pub struct RarFilesPackage {
213    files: Vec<Arc<dyn FileMedia>>,
214}
215
216impl RarFilesPackage {
217    /// Create a new [`RarFilesPackage`] from a list of volume files.
218    ///
219    /// Volumes are automatically sorted into the correct order
220    /// (`.rar` first, then `.r00`, `.r01`, etc.).
221    pub fn new(files: Vec<Arc<dyn FileMedia>>) -> Self {
222        // Sort files by name to ensure correct order (.rar, .r00, .r01, ...)
223        let mut files = files;
224        files.sort_by(|a, b| Self::volume_order(a.name()).cmp(&Self::volume_order(b.name())));
225        Self { files }
226    }
227
228    /// Get sort order for volume names.
229    fn volume_order(name: &str) -> (u32, String) {
230        let lower = name.to_lowercase();
231        if lower.ends_with(".rar") {
232            (0, lower) // .rar comes first
233        } else {
234            // Try to extract number from extension like .r00, .r01
235            let ext = lower.rsplit('.').next().unwrap_or("");
236            if ext.starts_with('r') && ext.len() == 3 {
237                ext[1..]
238                    .parse::<u32>()
239                    .map(|n| (n + 1, lower.clone()))
240                    .unwrap_or((1000, lower))
241            } else {
242                (1000, lower)
243            }
244        }
245    }
246
247    /// Get archive metadata from the first volume.
248    pub async fn get_archive_info(&self) -> Result<ArchiveInfo> {
249        use crate::parsing::rar5::Rar5EncryptionHeaderParser;
250
251        if self.files.is_empty() {
252            return Err(RarError::NoFilesFound);
253        }
254
255        let rar_file = &self.files[0];
256        let marker_buf = rar_file
257            .read_range(ReadInterval {
258                start: 0,
259                end: 7, // RAR5 signature is 8 bytes
260            })
261            .await?;
262
263        let marker = MarkerHeaderParser::parse(&marker_buf)?;
264
265        match marker.version {
266            RarVersion::Rar4 => {
267                let archive_buf = rar_file
268                    .read_range(ReadInterval {
269                        start: marker.size as u64,
270                        end: marker.size as u64 + ArchiveHeaderParser::HEADER_SIZE as u64 - 1,
271                    })
272                    .await?;
273                let archive = ArchiveHeaderParser::parse(&archive_buf)?;
274
275                Ok(ArchiveInfo {
276                    has_recovery_record: archive.has_recovery,
277                    is_solid: archive.has_solid_attributes,
278                    is_locked: archive.is_locked,
279                    is_multivolume: archive.has_volume_attributes,
280                    has_encrypted_headers: archive.is_block_encoded,
281                    version: RarVersion::Rar4,
282                })
283            }
284            RarVersion::Rar5 => {
285                // Check if next header is encryption header (type 4)
286                let header_buf = rar_file
287                    .read_range(ReadInterval {
288                        start: marker.size as u64,
289                        end: (marker.size as u64 + 255).min(rar_file.length() - 1),
290                    })
291                    .await?;
292
293                let has_encrypted_headers =
294                    Rar5EncryptionHeaderParser::is_encryption_header(&header_buf);
295
296                if has_encrypted_headers {
297                    // Headers are encrypted - we can't read archive flags without password
298                    Ok(ArchiveInfo {
299                        has_encrypted_headers: true,
300                        version: RarVersion::Rar5,
301                        ..Default::default()
302                    })
303                } else {
304                    let (archive, _) = Rar5ArchiveHeaderParser::parse(&header_buf)?;
305
306                    Ok(ArchiveInfo {
307                        has_recovery_record: archive.archive_flags.has_recovery_record,
308                        is_solid: archive.archive_flags.is_solid,
309                        is_locked: archive.archive_flags.is_locked,
310                        is_multivolume: archive.archive_flags.is_volume,
311                        has_encrypted_headers: false,
312                        version: RarVersion::Rar5,
313                    })
314                }
315            }
316        }
317    }
318
319    /// Parse a single RAR file and extract file chunks.
320    async fn parse_file(
321        &self,
322        rar_file: &Arc<dyn FileMedia>,
323        opts: &ParseOptions,
324    ) -> Result<Vec<ParsedChunk>> {
325        #[allow(unused_mut)]
326        let mut offset = 0u64;
327
328        // Read enough for both RAR4 and RAR5 signatures
329        let marker_buf = rar_file
330            .read_range(ReadInterval {
331                start: offset,
332                end: offset + 8 - 1, // RAR5 signature is 8 bytes
333            })
334            .await?;
335
336        let marker = MarkerHeaderParser::parse(&marker_buf)?;
337
338        // Dispatch based on version
339        match marker.version {
340            RarVersion::Rar4 => {
341                self.parse_rar4_file(rar_file, opts, marker.size as u64)
342                    .await
343            }
344            RarVersion::Rar5 => self.parse_rar5_file(rar_file, opts).await,
345        }
346    }
347
348    /// Parse a RAR4 format file.
349    async fn parse_rar4_file(
350        &self,
351        rar_file: &Arc<dyn FileMedia>,
352        opts: &ParseOptions,
353        marker_size: u64,
354    ) -> Result<Vec<ParsedChunk>> {
355        let mut chunks = Vec::new();
356        let mut offset = marker_size;
357
358        // Parse archive header
359        let archive_buf = rar_file
360            .read_range(ReadInterval {
361                start: offset,
362                end: offset + ArchiveHeaderParser::HEADER_SIZE as u64 - 1,
363            })
364            .await?;
365        let archive = ArchiveHeaderParser::parse(&archive_buf)?;
366        let is_solid = archive.has_solid_attributes;
367        offset += archive.size as u64;
368
369        let mut file_count = 0usize;
370        let mut retrieved_count = 0usize;
371        let terminator_size = TerminatorHeaderParser::HEADER_SIZE as u64;
372
373        // Parse file headers
374        while offset < rar_file.length().saturating_sub(terminator_size) {
375            // Read enough bytes for header (but not more than available)
376            let bytes_available = rar_file.length().saturating_sub(offset);
377            let read_size = (FileHeaderParser::HEADER_SIZE as u64).min(bytes_available);
378
379            if read_size < 32 {
380                // Not enough for minimum header
381                break;
382            }
383
384            let header_buf = rar_file
385                .read_range(ReadInterval {
386                    start: offset,
387                    end: offset + read_size - 1,
388                })
389                .await?;
390
391            let file_header = match FileHeaderParser::parse(&header_buf) {
392                Ok(h) => h,
393                Err(_) => break,
394            };
395
396            // Check if this is a file header (type 0x74 = 116)
397            if file_header.header_type != 0x74 {
398                break;
399            }
400
401            // Check encryption - with crypto feature, we can handle encrypted files
402            #[cfg(not(feature = "crypto"))]
403            if file_header.is_encrypted {
404                return Err(RarError::EncryptedNotSupported);
405            }
406
407            let data_start = offset + file_header.head_size as u64;
408            let data_end = if file_header.packed_size > 0 {
409                data_start + file_header.packed_size - 1
410            } else {
411                data_start
412            };
413
414            // Apply filter
415            let include = match &opts.filter {
416                Some(f) => f(&file_header.name, file_count),
417                None => true,
418            };
419
420            if include {
421                let chunk = RarFileChunk::new(rar_file.clone(), data_start, data_end);
422                let chunk_size = chunk.length();
423
424                // Parse encryption info if present (RAR4)
425                #[cfg(feature = "crypto")]
426                let encryption = if file_header.is_encrypted {
427                    file_header
428                        .salt
429                        .map(|salt| FileEncryptionInfo::Rar4 { salt })
430                } else {
431                    None
432                };
433
434                chunks.push(ParsedChunk {
435                    name: file_header.name.clone(),
436                    chunk,
437                    continues_in_next: file_header.continues_in_next,
438                    unpacked_size: file_header.unpacked_size,
439                    chunk_size,
440                    method: file_header.method,
441                    dict_size_log: 22, // RAR4 doesn't specify, use 4MB default
442                    rar_version: RarVersion::Rar4,
443                    is_solid,
444                    #[cfg(feature = "crypto")]
445                    encryption,
446                });
447                retrieved_count += 1;
448
449                // Check max files limit
450                if let Some(max) = opts.max_files {
451                    if retrieved_count >= max {
452                        break;
453                    }
454                }
455            }
456
457            offset = data_end + 1;
458            file_count += 1;
459        }
460
461        Ok(chunks)
462    }
463
464    /// Parse an encrypted header.
465    /// The format is: 16-byte IV + encrypted header data (padded to 16 bytes).
466    #[cfg(feature = "crypto")]
467    fn parse_encrypted_header<T, F>(
468        &self,
469        data: &[u8],
470        crypto: &crate::crypto::Rar5Crypto,
471        parser: F,
472    ) -> Result<(T, usize)>
473    where
474        F: FnOnce(&[u8]) -> Result<(T, usize)>,
475    {
476        use crate::parsing::rar5::VintReader;
477
478        if data.len() < 16 {
479            return Err(RarError::InvalidHeader);
480        }
481
482        // First 16 bytes are the IV
483        let mut iv = [0u8; 16];
484        iv.copy_from_slice(&data[..16]);
485
486        // Read enough encrypted data - we need to determine the header size
487        // RAR5 encrypted headers have their size after CRC and before type
488        // We'll decrypt a reasonable chunk and parse from there
489        let encrypted_start = 16;
490
491        // Read at least 256 bytes of encrypted data (should be enough for most headers)
492        let available = data.len().saturating_sub(encrypted_start);
493        if available < 16 {
494            return Err(RarError::InvalidHeader);
495        }
496
497        // Round up to 16-byte boundary
498        let decrypt_len = (available.min(512) / 16) * 16;
499        if decrypt_len == 0 {
500            return Err(RarError::InvalidHeader);
501        }
502
503        let mut decrypted = data[encrypted_start..encrypted_start + decrypt_len].to_vec();
504        crypto
505            .decrypt(&iv, &mut decrypted)
506            .map_err(|e| RarError::DecryptionFailed(e.to_string()))?;
507
508        // Parse the decrypted header
509        let (result, _) = parser(&decrypted)?;
510
511        // Calculate actual header size including CRC, size vint, and content
512        // We need to read the header size from decrypted data
513        let mut reader = VintReader::new(&decrypted[4..]); // Skip CRC32
514        let header_size = reader.read().ok_or(RarError::InvalidHeader)?;
515        let size_vint_len = reader.position();
516
517        // Total encrypted size = CRC(4) + size_vint + header_content, rounded up to 16
518        let plaintext_size = 4 + size_vint_len + header_size as usize;
519        let encrypted_size = plaintext_size.div_ceil(16) * 16;
520
521        // Total consumed = IV(16) + encrypted_size
522        Ok((result, 16 + encrypted_size))
523    }
524
525    /// Parse a RAR5 format file.
526    async fn parse_rar5_file(
527        &self,
528        rar_file: &Arc<dyn FileMedia>,
529        opts: &ParseOptions,
530    ) -> Result<Vec<ParsedChunk>> {
531        let mut chunks = Vec::new();
532        let mut offset = 8u64; // RAR5 signature is 8 bytes
533
534        // Read first header to check for encryption header
535        let header_buf = rar_file
536            .read_range(ReadInterval {
537                start: offset,
538                end: (offset + 256 - 1).min(rar_file.length() - 1),
539            })
540            .await?;
541
542        // Check if headers are encrypted
543        #[cfg(feature = "crypto")]
544        let header_crypto: Option<crate::crypto::Rar5Crypto> =
545            if Rar5EncryptionHeaderParser::is_encryption_header(&header_buf) {
546                let (enc_header, consumed) = Rar5EncryptionHeaderParser::parse(&header_buf)?;
547                offset += consumed as u64;
548
549                // Need password to decrypt headers
550                let password = opts.password.as_ref().ok_or(RarError::PasswordRequired)?;
551
552                Some(crate::crypto::Rar5Crypto::derive_key(
553                    password,
554                    &enc_header.salt,
555                    enc_header.lg2_count,
556                ))
557            } else {
558                None
559            };
560
561        #[cfg(not(feature = "crypto"))]
562        if Rar5EncryptionHeaderParser::is_encryption_header(&header_buf) {
563            return Err(RarError::PasswordRequired);
564        }
565
566        // Read archive header (which may be encrypted)
567        #[cfg(feature = "crypto")]
568        let (archive_header, consumed) = if let Some(ref crypto) = header_crypto {
569            // Read IV (16 bytes) + encrypted header
570            let enc_buf = rar_file
571                .read_range(ReadInterval {
572                    start: offset,
573                    end: (offset + 512 - 1).min(rar_file.length() - 1),
574                })
575                .await?;
576
577            self.parse_encrypted_header(&enc_buf, crypto, |data| {
578                Rar5ArchiveHeaderParser::parse(data)
579            })?
580        } else {
581            Rar5ArchiveHeaderParser::parse(&header_buf)?
582        };
583
584        #[cfg(not(feature = "crypto"))]
585        let (archive_header, consumed) = Rar5ArchiveHeaderParser::parse(&header_buf)?;
586
587        let is_solid = archive_header.archive_flags.is_solid;
588        offset += consumed as u64;
589
590        let mut file_count = 0usize;
591        let mut retrieved_count = 0usize;
592
593        // Parse file headers
594        while offset < rar_file.length().saturating_sub(16) {
595            // Read header data (variable size)
596            let bytes_available = rar_file.length().saturating_sub(offset);
597            let read_size = 512u64.min(bytes_available);
598
599            if read_size < 16 {
600                break;
601            }
602
603            let header_buf = rar_file
604                .read_range(ReadInterval {
605                    start: offset,
606                    end: offset + read_size - 1,
607                })
608                .await?;
609
610            // Try to parse as file header (may be encrypted)
611            #[cfg(feature = "crypto")]
612            let (file_header, header_consumed) = if let Some(ref crypto) = header_crypto {
613                match self.parse_encrypted_header(&header_buf, crypto, |data| {
614                    Rar5FileHeaderParser::parse(data)
615                }) {
616                    Ok(h) => h,
617                    Err(_) => break,
618                }
619            } else {
620                match Rar5FileHeaderParser::parse(&header_buf) {
621                    Ok(h) => h,
622                    Err(_) => break,
623                }
624            };
625
626            #[cfg(not(feature = "crypto"))]
627            let (file_header, header_consumed) = match Rar5FileHeaderParser::parse(&header_buf) {
628                Ok(h) => h,
629                Err(_) => break,
630            };
631
632            let data_start = offset + header_consumed as u64;
633            let data_end = if file_header.packed_size > 0 {
634                data_start + file_header.packed_size - 1
635            } else {
636                data_start
637            };
638
639            // Apply filter
640            let include = match &opts.filter {
641                Some(f) => f(&file_header.name, file_count),
642                None => true,
643            };
644
645            if include {
646                let chunk = RarFileChunk::new(rar_file.clone(), data_start, data_end);
647                let chunk_size = file_header.packed_size;
648
649                // Convert RAR5 method to RAR4-compatible format
650                // RAR5 method 0 = stored, 1-5 = compression
651                // Store the raw method, not converted to RAR4 format
652                let method = file_header.compression.method;
653
654                // Parse encryption info if present
655                #[cfg(feature = "crypto")]
656                let encryption = if file_header.is_encrypted() {
657                    file_header.encryption_info().and_then(|data| {
658                        crate::crypto::Rar5EncryptionInfo::parse(data)
659                            .ok()
660                            .map(|info| FileEncryptionInfo::Rar5 {
661                                salt: info.salt,
662                                init_v: info.init_v,
663                                lg2_count: info.lg2_count,
664                            })
665                    })
666                } else {
667                    None
668                };
669
670                chunks.push(ParsedChunk {
671                    name: file_header.name.clone(),
672                    chunk,
673                    continues_in_next: file_header.continues_in_next(),
674                    unpacked_size: file_header.unpacked_size,
675                    chunk_size,
676                    method,
677                    dict_size_log: file_header.compression.dict_size_log,
678                    rar_version: RarVersion::Rar5,
679                    is_solid,
680                    #[cfg(feature = "crypto")]
681                    encryption,
682                });
683                retrieved_count += 1;
684
685                if let Some(max) = opts.max_files {
686                    if retrieved_count >= max {
687                        break;
688                    }
689                }
690            }
691
692            offset = data_end + 1;
693            file_count += 1;
694        }
695
696        Ok(chunks)
697    }
698
699    /// Parse all volumes and return inner files.
700    pub async fn parse(&self, opts: ParseOptions) -> Result<Vec<InnerFile>> {
701        if self.files.is_empty() {
702            return Err(RarError::NoFilesFound);
703        }
704
705        let mut all_parsed: Vec<Vec<ParsedChunk>> = Vec::new();
706
707        let mut i = 0;
708        while i < self.files.len() {
709            let file = &self.files[i];
710            let chunks = self.parse_file(file, &opts).await?;
711
712            if chunks.is_empty() {
713                i += 1;
714                continue;
715            }
716
717            // Get info from last chunk
718            let last = chunks.last().unwrap();
719            let continues = last.continues_in_next;
720            let chunk_size = last.chunk_size;
721            let unpacked_size = last.unpacked_size;
722            let chunk_start = last.chunk.start_offset;
723            let chunk_end = last.chunk.end_offset;
724            let name = last.name.clone();
725            let rar_version = last.rar_version;
726            let is_solid = last.is_solid;
727
728            all_parsed.push(chunks);
729
730            // Handle continuation - simplified approach matching original rar-stream
731            if continues {
732                let mut remaining = unpacked_size.saturating_sub(chunk_size);
733                while remaining >= chunk_size && i + 1 < self.files.len() {
734                    i += 1;
735                    let next_file = &self.files[i];
736
737                    // Create chunk at same offsets in next volume
738                    let chunk = RarFileChunk::new(next_file.clone(), chunk_start, chunk_end);
739                    all_parsed.push(vec![ParsedChunk {
740                        name: name.clone(),
741                        chunk,
742                        continues_in_next: false,
743                        unpacked_size,
744                        chunk_size,
745                        method: 0x30,      // Continue chunks are always raw data
746                        dict_size_log: 22, // Default, not used for stored data
747                        rar_version,
748                        is_solid,
749                        #[cfg(feature = "crypto")]
750                        encryption: None, // Continuation chunks don't have encryption headers
751                    }]);
752                    remaining = remaining.saturating_sub(chunk_size);
753                }
754            }
755
756            i += 1;
757        }
758
759        // Flatten and group chunks by filename, keeping method info
760        let all_chunks: Vec<ParsedChunk> = all_parsed.into_iter().flatten().collect();
761
762        #[cfg(feature = "crypto")]
763        type GroupValue = (
764            Vec<RarFileChunk>,
765            u8,
766            u8, // dict_size_log
767            u64,
768            RarVersion,
769            bool, // is_solid
770            Option<FileEncryptionInfo>,
771        );
772        #[cfg(not(feature = "crypto"))]
773        type GroupValue = (Vec<RarFileChunk>, u8, u8, u64, RarVersion, bool);
774
775        let mut grouped: HashMap<String, GroupValue> = HashMap::new();
776        for chunk in all_chunks {
777            #[cfg(feature = "crypto")]
778            let entry = grouped.entry(chunk.name).or_insert_with(|| {
779                (
780                    Vec::new(),
781                    chunk.method,
782                    chunk.dict_size_log,
783                    chunk.unpacked_size,
784                    chunk.rar_version,
785                    chunk.is_solid,
786                    chunk.encryption,
787                )
788            });
789            #[cfg(not(feature = "crypto"))]
790            let entry = grouped.entry(chunk.name).or_insert_with(|| {
791                (
792                    Vec::new(),
793                    chunk.method,
794                    chunk.dict_size_log,
795                    chunk.unpacked_size,
796                    chunk.rar_version,
797                    chunk.is_solid,
798                )
799            });
800            entry.0.push(chunk.chunk);
801        }
802
803        // Create InnerFile for each group
804        #[cfg(feature = "crypto")]
805        let password = opts.password.clone();
806
807        let inner_files: Vec<InnerFile> = grouped
808            .into_iter()
809            .map(|(name, value)| {
810                #[cfg(feature = "crypto")]
811                {
812                    let (
813                        chunks,
814                        method,
815                        dict_size_log,
816                        unpacked_size,
817                        rar_version,
818                        is_solid,
819                        encryption,
820                    ) = value;
821                    let enc_info = encryption.map(|e| match e {
822                        FileEncryptionInfo::Rar5 {
823                            salt,
824                            init_v,
825                            lg2_count,
826                        } => crate::inner_file::EncryptionInfo::Rar5 {
827                            salt,
828                            init_v,
829                            lg2_count,
830                        },
831                        FileEncryptionInfo::Rar4 { salt } => {
832                            crate::inner_file::EncryptionInfo::Rar4 { salt }
833                        }
834                    });
835                    InnerFile::new_encrypted_with_solid_dict(
836                        name,
837                        chunks,
838                        method,
839                        dict_size_log,
840                        unpacked_size,
841                        rar_version,
842                        enc_info,
843                        password.clone(),
844                        is_solid,
845                    )
846                }
847                #[cfg(not(feature = "crypto"))]
848                {
849                    let (chunks, method, dict_size_log, unpacked_size, rar_version, is_solid) =
850                        value;
851                    InnerFile::new_with_solid_dict(
852                        name,
853                        chunks,
854                        method,
855                        dict_size_log,
856                        unpacked_size,
857                        rar_version,
858                        is_solid,
859                    )
860                }
861            })
862            .collect();
863
864        Ok(inner_files)
865    }
866}
867
868#[cfg(test)]
869mod tests {
870    use super::*;
871    use crate::file_media::{FileMedia, LocalFileMedia};
872
873    #[tokio::test]
874    #[cfg(feature = "async")]
875    async fn test_get_archive_info_rar5() {
876        let file: Arc<dyn FileMedia> =
877            Arc::new(LocalFileMedia::new("__fixtures__/rar5/test.rar").unwrap());
878        let package = RarFilesPackage::new(vec![file]);
879
880        let info = package.get_archive_info().await.unwrap();
881        assert_eq!(info.version, RarVersion::Rar5);
882        assert!(!info.is_multivolume);
883    }
884
885    #[tokio::test]
886    #[cfg(feature = "async")]
887    async fn test_get_archive_info_rar4() {
888        let file: Arc<dyn FileMedia> =
889            Arc::new(LocalFileMedia::new("__fixtures__/single/single.rar").unwrap());
890        let package = RarFilesPackage::new(vec![file]);
891
892        let info = package.get_archive_info().await.unwrap();
893        assert_eq!(info.version, RarVersion::Rar4);
894        assert!(!info.is_multivolume);
895    }
896
897    #[tokio::test]
898    #[cfg(feature = "async")]
899    async fn test_parse_rar5_stored() {
900        // Test parsing a RAR5 stored file
901        let file: Arc<dyn FileMedia> =
902            Arc::new(LocalFileMedia::new("__fixtures__/rar5/test.rar").unwrap());
903        let package = RarFilesPackage::new(vec![file]);
904
905        let files = package.parse(ParseOptions::default()).await.unwrap();
906
907        assert_eq!(files.len(), 1);
908        assert_eq!(files[0].name, "test.txt");
909    }
910
911    #[tokio::test]
912    #[cfg(feature = "async")]
913    async fn test_parse_rar5_compressed() {
914        // Test parsing a RAR5 compressed file
915        let file: Arc<dyn FileMedia> =
916            Arc::new(LocalFileMedia::new("__fixtures__/rar5/compressed.rar").unwrap());
917        let package = RarFilesPackage::new(vec![file]);
918
919        let files = package.parse(ParseOptions::default()).await.unwrap();
920
921        assert_eq!(files.len(), 1);
922        assert_eq!(files[0].name, "compress_test.txt");
923        assert_eq!(files[0].length, 152); // Unpacked size
924
925        // Try to read and decompress the file content
926        // Note: RAR5 compressed decompression is still being debugged
927        match files[0].read_to_end().await {
928            Ok(content) => {
929                eprintln!("Got {} bytes of output", content.len());
930                eprintln!("First 32 bytes: {:02x?}", &content[..32.min(content.len())]);
931
932                // Verify we got the full uncompressed content
933                assert_eq!(
934                    content.len(),
935                    152,
936                    "decompressed size should match unpacked size"
937                );
938
939                // Verify the content is valid text
940                match std::str::from_utf8(&content) {
941                    Ok(text) => {
942                        assert!(
943                            text.contains("This is a test file"),
944                            "content should contain expected text"
945                        );
946                        assert!(
947                            text.contains("hello hello"),
948                            "content should contain repeated text"
949                        );
950                    }
951                    Err(_) => {
952                        // Decompression ran but output is wrong - still debugging
953                        eprintln!(
954                            "RAR5 decompression output is not valid UTF-8 (work in progress)"
955                        );
956                    }
957                }
958            }
959            Err(e) => {
960                // RAR5 decompression not yet fully implemented - parsing verified
961                eprintln!("RAR5 decompression error: {:?}", e);
962            }
963        }
964    }
965
966    #[tokio::test]
967    #[cfg(feature = "async")]
968    async fn test_parse_rar5_multivolume() {
969        // Test parsing a multi-volume RAR5 archive
970        let fixture_dir = "__fixtures__/rar5-multivolume";
971
972        // Collect all volume files
973        let mut volume_paths: Vec<String> = std::fs::read_dir(fixture_dir)
974            .unwrap()
975            .filter_map(|e| e.ok())
976            .map(|e| e.path())
977            .filter(|p| p.extension().map_or(false, |ext| ext == "rar"))
978            .map(|p| p.to_string_lossy().to_string())
979            .collect();
980
981        // Sort by name so volumes are in order
982        volume_paths.sort();
983
984        if volume_paths.is_empty() {
985            // Skip test if fixtures don't exist
986            eprintln!("Skipping test - no multi-volume fixtures found");
987            return;
988        }
989
990        eprintln!("Found {} volumes: {:?}", volume_paths.len(), volume_paths);
991
992        // Create file medias for each volume
993        let files: Vec<Arc<dyn FileMedia>> = volume_paths
994            .iter()
995            .map(|p| Arc::new(LocalFileMedia::new(p).unwrap()) as Arc<dyn FileMedia>)
996            .collect();
997
998        let package = RarFilesPackage::new(files);
999
1000        let parsed = package.parse(ParseOptions::default()).await.unwrap();
1001
1002        assert_eq!(parsed.len(), 1, "should have 1 inner file");
1003        assert_eq!(parsed[0].name, "testfile.txt");
1004
1005        // The length might be slightly off due to volume header handling
1006        // but should be close to the original file size
1007        eprintln!("Parsed length: {}", parsed[0].length);
1008
1009        // Try to read the file content (stored, so should work)
1010        let content = parsed[0].read_to_end().await.unwrap();
1011        eprintln!("Read content length: {}", content.len());
1012
1013        // Verify the content is valid and contains expected text
1014        let text = std::str::from_utf8(&content).expect("should be valid UTF-8");
1015        assert!(text.contains("Line 1:"), "should contain first line");
1016        assert!(text.contains("Line 100:"), "should contain last line");
1017
1018        // Verify we got approximately the right size (allow for header overhead)
1019        assert!(content.len() >= 11000, "should have at least 11000 bytes");
1020    }
1021
1022    #[tokio::test]
1023    #[cfg(all(feature = "async", feature = "crypto"))]
1024    async fn test_parse_rar5_encrypted_stored() {
1025        // Test parsing and extracting an encrypted RAR5 file (stored, no compression)
1026        let fixture = "__fixtures__/encrypted/rar5-encrypted-stored.rar";
1027
1028        if !std::path::Path::new(fixture).exists() {
1029            eprintln!("Skipping test - encrypted fixtures not found");
1030            return;
1031        }
1032
1033        let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new(fixture).unwrap());
1034        let package = RarFilesPackage::new(vec![file]);
1035
1036        let opts = ParseOptions {
1037            password: Some("testpass".to_string()),
1038            ..Default::default()
1039        };
1040
1041        let parsed = package.parse(opts).await.unwrap();
1042        assert_eq!(parsed.len(), 1, "should have 1 inner file");
1043
1044        let inner_file = &parsed[0];
1045        assert_eq!(inner_file.name, "testfile.txt");
1046        assert!(inner_file.is_encrypted());
1047
1048        // Read the decrypted content
1049        let content = inner_file.read_decompressed().await.unwrap();
1050        let text = std::str::from_utf8(&content).expect("should be valid UTF-8");
1051
1052        assert!(text.starts_with("Hello, encrypted world!"));
1053    }
1054
1055    #[tokio::test]
1056    #[cfg(all(feature = "async", feature = "crypto"))]
1057    async fn test_parse_rar5_encrypted_no_password() {
1058        let fixture = "__fixtures__/encrypted/rar5-encrypted-stored.rar";
1059
1060        if !std::path::Path::new(fixture).exists() {
1061            eprintln!("Skipping test - encrypted fixtures not found");
1062            return;
1063        }
1064
1065        let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new(fixture).unwrap());
1066        let package = RarFilesPackage::new(vec![file]);
1067
1068        // No password provided
1069        let parsed = package.parse(ParseOptions::default()).await.unwrap();
1070        assert_eq!(parsed.len(), 1, "should have 1 inner file");
1071
1072        let inner_file = &parsed[0];
1073        assert!(inner_file.is_encrypted());
1074
1075        // Reading should fail because no password was provided
1076        let result = inner_file.read_decompressed().await;
1077        assert!(result.is_err());
1078        match result {
1079            Err(crate::RarError::PasswordRequired) => {
1080                // Expected error
1081            }
1082            Err(e) => panic!("Expected PasswordRequired error, got: {:?}", e),
1083            Ok(_) => panic!("Expected error but got success"),
1084        }
1085    }
1086
1087    #[tokio::test]
1088    #[cfg(all(feature = "async", feature = "crypto"))]
1089    async fn test_parse_rar5_encrypted_headers() {
1090        // Test parsing an archive with encrypted headers (created with rar -hp)
1091        let fixture = "__fixtures__/encrypted/rar5-encrypted-headers.rar";
1092
1093        if !std::path::Path::new(fixture).exists() {
1094            eprintln!("Skipping test - encrypted headers fixture not found");
1095            return;
1096        }
1097
1098        let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new(fixture).unwrap());
1099        let package = RarFilesPackage::new(vec![file]);
1100
1101        // First check archive info - should show encrypted headers
1102        let info = package.get_archive_info().await.unwrap();
1103        assert!(info.has_encrypted_headers, "should have encrypted headers");
1104        assert_eq!(info.version, RarVersion::Rar5);
1105
1106        // Parsing without password should fail
1107        let result = package.parse(ParseOptions::default()).await;
1108        assert!(
1109            matches!(result, Err(RarError::PasswordRequired)),
1110            "should require password for encrypted headers, got {:?}",
1111            result
1112        );
1113
1114        // Parsing with password should succeed
1115        let opts = ParseOptions {
1116            password: Some("testpass".to_string()),
1117            ..Default::default()
1118        };
1119
1120        let parsed = package.parse(opts).await.unwrap();
1121        assert_eq!(parsed.len(), 1, "should have 1 inner file");
1122        assert_eq!(parsed[0].name, "testfile.txt");
1123
1124        // File content is also encrypted, so read should work
1125        let content = parsed[0].read_decompressed().await.unwrap();
1126        let text = std::str::from_utf8(&content).expect("should be valid UTF-8");
1127        assert!(
1128            text.starts_with("Hello, encrypted world!"),
1129            "content was: {:?}",
1130            text
1131        );
1132    }
1133
1134    #[tokio::test]
1135    #[cfg(all(feature = "async", feature = "crypto"))]
1136    async fn test_get_archive_info_encrypted_headers() {
1137        // Test that get_archive_info detects encrypted headers
1138        let fixture = "__fixtures__/encrypted/rar5-encrypted-headers.rar";
1139
1140        if !std::path::Path::new(fixture).exists() {
1141            eprintln!("Skipping test - encrypted headers fixture not found");
1142            return;
1143        }
1144
1145        let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new(fixture).unwrap());
1146        let package = RarFilesPackage::new(vec![file]);
1147
1148        let info = package.get_archive_info().await.unwrap();
1149        assert!(info.has_encrypted_headers);
1150        assert_eq!(info.version, RarVersion::Rar5);
1151        // Other flags can't be read when headers are encrypted
1152    }
1153
1154    #[tokio::test]
1155    #[cfg(all(feature = "async", feature = "crypto"))]
1156    async fn test_parse_rar4_encrypted_stored() {
1157        // Test parsing and extracting an encrypted RAR4 file (stored, no compression)
1158        let fixture = "__fixtures__/encrypted/rar4-encrypted-stored.rar";
1159
1160        if !std::path::Path::new(fixture).exists() {
1161            eprintln!("Skipping test - RAR4 encrypted fixtures not found");
1162            return;
1163        }
1164
1165        let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new(fixture).unwrap());
1166        let package = RarFilesPackage::new(vec![file]);
1167
1168        // Check archive info
1169        let info = package.get_archive_info().await.unwrap();
1170        assert_eq!(info.version, RarVersion::Rar4);
1171
1172        let opts = ParseOptions {
1173            password: Some("testpass".to_string()),
1174            ..Default::default()
1175        };
1176
1177        let parsed = package.parse(opts).await.unwrap();
1178        assert_eq!(parsed.len(), 1, "should have 1 inner file");
1179
1180        let inner_file = &parsed[0];
1181        assert_eq!(inner_file.name, "testfile.txt");
1182        assert!(inner_file.is_encrypted());
1183
1184        // Read the decrypted content
1185        let content = inner_file.read_decompressed().await.unwrap();
1186        let text = std::str::from_utf8(&content).expect("should be valid UTF-8");
1187
1188        assert!(
1189            text.starts_with("Hello, encrypted world!"),
1190            "content was: {:?}",
1191            text
1192        );
1193    }
1194
1195    #[tokio::test]
1196    #[cfg(all(feature = "async", feature = "crypto"))]
1197    async fn test_parse_rar4_encrypted_compressed() {
1198        // Test parsing and extracting an encrypted RAR4 file (compressed)
1199        let fixture = "__fixtures__/encrypted/rar4-encrypted.rar";
1200
1201        if !std::path::Path::new(fixture).exists() {
1202            eprintln!("Skipping test - RAR4 encrypted fixtures not found");
1203            return;
1204        }
1205
1206        let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new(fixture).unwrap());
1207        let package = RarFilesPackage::new(vec![file]);
1208
1209        // Check archive info
1210        let info = package.get_archive_info().await.unwrap();
1211        assert_eq!(info.version, RarVersion::Rar4);
1212
1213        let opts = ParseOptions {
1214            password: Some("testpass".to_string()),
1215            ..Default::default()
1216        };
1217
1218        let parsed = package.parse(opts).await.unwrap();
1219        assert_eq!(parsed.len(), 1, "should have 1 inner file");
1220
1221        let inner_file = &parsed[0];
1222        assert_eq!(inner_file.name, "testfile.txt");
1223        assert!(inner_file.is_encrypted());
1224
1225        // Read the decrypted content
1226        let content = inner_file.read_decompressed().await.unwrap();
1227        let text = std::str::from_utf8(&content).expect("should be valid UTF-8");
1228
1229        assert!(
1230            text.starts_with("Hello, encrypted world!"),
1231            "content was: {:?}",
1232            text
1233        );
1234    }
1235}