Skip to main content

rar_stream/
rar_files_package.rs

1//! Multi-volume RAR archive parser.
2//!
3//! This module provides the main entry point for parsing RAR archives.
4//! The [`RarFilesPackage`] struct handles single and multi-volume archives,
5//! automatically stitching files that span multiple volumes.
6//!
7//! ## Quick Start
8//!
9//! ```rust,ignore
10//! use rar_stream::{RarFilesPackage, ParseOptions, LocalFileMedia, FileMedia};
11//! use std::sync::Arc;
12//!
13//! // Open a single RAR file
14//! let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new("archive.rar")?);
15//! let package = RarFilesPackage::new(vec![file]);
16//!
17//! // Parse with default options
18//! let files = package.parse(ParseOptions::default()).await?;
19//!
20//! // Read file content
21//! let content = files[0].read_to_end().await?;
22//! ```
23//!
24//! ## Multi-Volume Archives
25//!
26//! For split archives, provide all volumes in order:
27//!
28//! ```rust,ignore
29//! let volumes: Vec<Arc<dyn FileMedia>> = vec![
30//!     Arc::new(LocalFileMedia::new("archive.part1.rar")?),
31//!     Arc::new(LocalFileMedia::new("archive.part2.rar")?),
32//!     Arc::new(LocalFileMedia::new("archive.part3.rar")?),
33//! ];
34//! let package = RarFilesPackage::new(volumes);
35//! let files = package.parse(ParseOptions::default()).await?;
36//! ```
37//!
38//! ## Filtering Files
39//!
40//! Use [`ParseOptions`] to filter or limit results:
41//!
42//! ```rust,ignore
43//! let opts = ParseOptions {
44//!     // Only include .txt files
45//!     filter: Some(Box::new(|name, _index| name.ends_with(".txt"))),
46//!     // Limit to first 10 matches
47//!     max_files: Some(10),
48//!     ..Default::default()
49//! };
50//! let txt_files = package.parse(opts).await?;
51//! ```
52//!
53//! ## Encrypted Archives
54//!
55//! With the `crypto` feature enabled:
56//!
57//! ```rust,ignore
58//! let opts = ParseOptions {
59//!     password: Some("secret".to_string()),
60//!     ..Default::default()
61//! };
62//! let files = package.parse(opts).await?;
63//! ```
64//!
65//! ## Archive Information
66//!
67//! Get metadata about the archive without parsing all files:
68//!
69//! ```rust,ignore
70//! let info = package.get_archive_info().await?;
71//! println!("Format: {:?}", info.version);
72//! println!("Solid: {}", info.is_solid);
73//! println!("Has recovery: {}", info.has_recovery_record);
74//! ```
75
76use crate::error::{RarError, Result};
77use crate::file_media::{FileMedia, ReadInterval};
78use crate::inner_file::InnerFile;
79use crate::parsing::{
80    rar5::{Rar5ArchiveHeaderParser, Rar5EncryptionHeaderParser, Rar5FileHeaderParser},
81    ArchiveHeaderParser, FileHeaderParser, MarkerHeaderParser, RarVersion, TerminatorHeaderParser,
82};
83use crate::rar_file_chunk::RarFileChunk;
84use std::collections::HashMap;
85use std::sync::Arc;
86
87/// Default prefetch size for header parsing (32KB).
88///
89/// A single read of this size is done at the start of each volume. All header
90/// parsing (marker, archive header, file headers) is served from this buffer.
91/// Only if headers exceed this size is a follow-up read needed. Since RAR
92/// headers are typically small (50-500 bytes each), 32KB covers hundreds of
93/// files in a single I/O round trip.
94const HEADER_PREFETCH_SIZE: u64 = 32 * 1024;
95
96/// Buffered reader that minimizes I/O calls during header parsing.
97///
98/// Reads a large chunk from the underlying media up-front, then serves
99/// sub-range reads from the buffer. Falls back to a direct read only when
100/// the requested range extends beyond the prefetched data.
101struct HeaderBuffer {
102    data: Vec<u8>,
103    /// Byte offset in the file where `data[0]` starts.
104    file_offset: u64,
105    /// Size of each prefetch read.
106    prefetch_size: u64,
107}
108
109impl HeaderBuffer {
110    /// Create a new header buffer by prefetching from the given offset.
111    async fn new(media: &Arc<dyn FileMedia>, start: u64, prefetch_size: u64) -> Result<Self> {
112        let end = (start + prefetch_size - 1).min(media.length().saturating_sub(1));
113        if start > end {
114            return Ok(Self {
115                data: Vec::new(),
116                file_offset: start,
117                prefetch_size,
118            });
119        }
120        let data = media.read_range(ReadInterval { start, end }).await?;
121        Ok(Self {
122            data,
123            file_offset: start,
124            prefetch_size,
125        })
126    }
127
128    /// Read a range from the buffer, falling back to media if needed.
129    async fn read(&mut self, media: &Arc<dyn FileMedia>, start: u64, end: u64) -> Result<Vec<u8>> {
130        if end < start {
131            return Ok(Vec::new());
132        }
133        let buf_start = self.file_offset;
134        let buf_end = buf_start + self.data.len() as u64;
135
136        // Fast path: fully within prefetched buffer
137        if start >= buf_start && end < buf_end {
138            let local_start = (start - buf_start) as usize;
139            let local_end = (end - buf_start) as usize;
140            return Ok(self.data[local_start..=local_end].to_vec());
141        }
142
143        // Slow path: need a fresh read from media (and refresh buffer from here)
144        let prefetch_end = (start + self.prefetch_size - 1).min(media.length().saturating_sub(1));
145        let read_end = end.max(prefetch_end);
146        let data = media
147            .read_range(ReadInterval {
148                start,
149                end: read_end,
150            })
151            .await?;
152        let result = data[..(end - start + 1) as usize].to_vec();
153        // Update buffer to serve future reads from this new position
154        self.data = data;
155        self.file_offset = start;
156        Ok(result)
157    }
158}
159
160/// Archive metadata returned by [`RarFilesPackage::get_archive_info`].
161///
162/// Contains information about the archive format, flags, and capabilities.
163/// All fields are read from the archive header without decompressing any files.
164///
165/// # Example
166///
167/// ```rust,ignore
168/// let info = package.get_archive_info().await?;
169/// if info.has_encrypted_headers {
170///     println!("Archive requires password to list files");
171/// }
172/// if info.is_solid {
173///     println!("Solid archive: files must be extracted in order");
174/// }
175/// ```
176#[derive(Debug, Clone, Default, PartialEq, Eq)]
177pub struct ArchiveInfo {
178    /// Whether the archive has a recovery record for error correction.
179    ///
180    /// Recovery records allow repairing damaged archives using Reed-Solomon codes.
181    pub has_recovery_record: bool,
182
183    /// Whether the archive uses solid compression.
184    ///
185    /// In solid archives, files are compressed together as a single stream.
186    /// This improves compression ratio but requires extracting files in order.
187    pub is_solid: bool,
188
189    /// Whether the archive is locked (cannot be modified).
190    ///
191    /// Locked archives cannot have files added, deleted, or modified.
192    pub is_locked: bool,
193
194    /// Whether the archive is split across multiple volumes.
195    ///
196    /// Multi-volume archives have files that span multiple `.rar`/`.rXX` files.
197    pub is_multivolume: bool,
198
199    /// Whether file headers are encrypted (requires password to list files).
200    ///
201    /// Only RAR5 archives created with `rar -hp` have encrypted headers.
202    /// Without the password, even file names cannot be read.
203    pub has_encrypted_headers: bool,
204
205    /// RAR format version (RAR4 or RAR5).
206    pub version: RarVersion,
207}
208
209/// Options for parsing RAR archives.
210///
211/// Use this struct to customize parsing behavior, including filtering,
212/// limiting results, and providing passwords for encrypted archives.
213///
214/// # Example
215///
216/// ```rust,ignore
217/// let opts = ParseOptions {
218///     filter: Some(Box::new(|name, _| name.ends_with(".mp4"))),
219///     max_files: Some(100),
220///     #[cfg(feature = "crypto")]
221///     password: Some("secret".to_string()),
222/// };
223/// ```
224#[derive(Default)]
225pub struct ParseOptions {
226    /// Filter function: return `true` to include a file.
227    ///
228    /// The function receives the file name and its index (0-based).
229    /// Only files where the filter returns `true` are included in results.
230    pub filter: Option<Box<dyn Fn(&str, usize) -> bool + Send + Sync>>,
231
232    /// Maximum number of files to return.
233    ///
234    /// Parsing stops after this many files are found. Useful for previewing
235    /// large archives without parsing everything.
236    pub max_files: Option<usize>,
237
238    /// Prefetch buffer size for header parsing (default: 32KB).
239    ///
240    /// All headers in a volume are read from a single prefetched buffer
241    /// of this size, minimizing I/O round-trips on slow media (e.g., HTTP
242    /// range requests). Increase for archives with many files or large
243    /// headers; decrease if memory is constrained.
244    pub header_prefetch_size: Option<u64>,
245
246    /// Password for encrypted archives.
247    ///
248    /// Required for archives with encrypted file data or headers.
249    /// If the password is wrong, [`RarError::DecryptionFailed`] is returned.
250    #[cfg(feature = "crypto")]
251    pub password: Option<String>,
252}
253
254/// Encryption info for a file.
255#[cfg(feature = "crypto")]
256#[derive(Debug, Clone, PartialEq, Eq)]
257pub enum FileEncryptionInfo {
258    /// RAR5 encryption (AES-256-CBC with PBKDF2)
259    Rar5 {
260        /// 16-byte salt for key derivation
261        salt: [u8; 16],
262        /// 16-byte initialization vector
263        init_v: [u8; 16],
264        /// Log2 of PBKDF2 iteration count
265        lg2_count: u8,
266    },
267    /// RAR4 encryption (AES-256-CBC with custom SHA-1 KDF)
268    Rar4 {
269        /// 8-byte salt for key derivation
270        salt: [u8; 8],
271    },
272}
273
274/// Parsed file chunk with metadata.
275struct ParsedChunk {
276    name: String,
277    chunk: RarFileChunk,
278    continues_in_next: bool,
279    unpacked_size: u64,
280    method: u8,
281    /// Dictionary size (log2), only for RAR5 compressed files
282    dict_size_log: u8,
283    rar_version: RarVersion,
284    /// Whether this file is part of a solid archive
285    is_solid: bool,
286    /// Encryption info (if encrypted)
287    #[cfg(feature = "crypto")]
288    encryption: Option<FileEncryptionInfo>,
289}
290
291/// Multi-volume RAR archive parser.
292pub struct RarFilesPackage {
293    files: Vec<Arc<dyn FileMedia>>,
294}
295
296impl RarFilesPackage {
297    /// Create a new [`RarFilesPackage`] from a list of volume files.
298    ///
299    /// Volumes are automatically sorted into the correct order
300    /// (`.rar` first, then `.r00`, `.r01`, etc.).
301    pub fn new(files: Vec<Arc<dyn FileMedia>>) -> Self {
302        // Sort files by name to ensure correct order (.rar, .r00, .r01, ...)
303        let mut files = files;
304        files.sort_by(|a, b| Self::volume_order(a.name()).cmp(&Self::volume_order(b.name())));
305        Self { files }
306    }
307
308    /// Get sort order for volume names.
309    fn volume_order(name: &str) -> (u32, String) {
310        let lower = name.to_lowercase();
311        if lower.ends_with(".rar") {
312            // Check for .partN.rar naming (e.g., archive.part1.rar, archive.part10.rar)
313            if let Some(stem) = lower.strip_suffix(".rar") {
314                if let Some(part_pos) = stem.rfind(".part") {
315                    if let Ok(n) = stem[part_pos + 5..].parse::<u32>() {
316                        return (n, lower);
317                    }
318                }
319            }
320            (0, lower) // .rar comes first (for old-style naming)
321        } else {
322            // Try to extract number from extension like .r00, .r01
323            let ext = lower.rsplit('.').next().unwrap_or("");
324            if ext.starts_with('r') && ext.len() == 3 {
325                ext[1..]
326                    .parse::<u32>()
327                    .map(|n| (n + 1, lower.clone()))
328                    .unwrap_or((1000, lower))
329            } else {
330                (1000, lower)
331            }
332        }
333    }
334
335    /// Get archive metadata from the first volume.
336    ///
337    /// This performs a single I/O read to fetch all headers needed for metadata.
338    pub async fn get_archive_info(&self) -> Result<ArchiveInfo> {
339        use crate::parsing::rar5::Rar5EncryptionHeaderParser;
340
341        if self.files.is_empty() {
342            return Err(RarError::NoFilesFound);
343        }
344
345        let rar_file = &self.files[0];
346        let mut buf = HeaderBuffer::new(rar_file, 0, HEADER_PREFETCH_SIZE).await?;
347
348        let marker_buf = buf.read(rar_file, 0, 7).await?;
349        let marker = MarkerHeaderParser::parse(&marker_buf)?;
350
351        match marker.version {
352            RarVersion::Rar4 => {
353                let archive_buf = buf
354                    .read(
355                        rar_file,
356                        marker.size as u64,
357                        marker.size as u64 + ArchiveHeaderParser::HEADER_SIZE as u64 - 1,
358                    )
359                    .await?;
360                let archive = ArchiveHeaderParser::parse(&archive_buf)?;
361
362                Ok(ArchiveInfo {
363                    has_recovery_record: archive.has_recovery,
364                    is_solid: archive.has_solid_attributes,
365                    is_locked: archive.is_locked,
366                    is_multivolume: archive.has_volume_attributes,
367                    has_encrypted_headers: archive.is_block_encoded,
368                    version: RarVersion::Rar4,
369                })
370            }
371            RarVersion::Rar5 => {
372                let header_buf = buf
373                    .read(
374                        rar_file,
375                        marker.size as u64,
376                        (marker.size as u64 + 255).min(rar_file.length() - 1),
377                    )
378                    .await?;
379
380                let has_encrypted_headers =
381                    Rar5EncryptionHeaderParser::is_encryption_header(&header_buf);
382
383                if has_encrypted_headers {
384                    Ok(ArchiveInfo {
385                        has_encrypted_headers: true,
386                        version: RarVersion::Rar5,
387                        ..Default::default()
388                    })
389                } else {
390                    let (archive, _) = Rar5ArchiveHeaderParser::parse(&header_buf)?;
391
392                    Ok(ArchiveInfo {
393                        has_recovery_record: archive.archive_flags.has_recovery_record,
394                        is_solid: archive.archive_flags.is_solid,
395                        is_locked: archive.archive_flags.is_locked,
396                        is_multivolume: archive.archive_flags.is_volume,
397                        has_encrypted_headers: false,
398                        version: RarVersion::Rar5,
399                    })
400                }
401            }
402        }
403    }
404
405    /// Parse a single RAR file and extract file chunks.
406    ///
407    /// Prefetches a 32KB buffer to minimize I/O round-trips. All header
408    /// parsing is served from this buffer; a follow-up read only happens
409    /// if headers exceed 32KB.
410    async fn parse_file(
411        &self,
412        rar_file: &Arc<dyn FileMedia>,
413        opts: &ParseOptions,
414    ) -> Result<Vec<ParsedChunk>> {
415        // Prefetch headers with a single I/O read — this is the key
416        // optimization for slow/remote media (HTTP range requests, etc.)
417        let prefetch = opts.header_prefetch_size.unwrap_or(HEADER_PREFETCH_SIZE);
418        let mut buf = HeaderBuffer::new(rar_file, 0, prefetch).await?;
419
420        let marker_buf = buf.read(rar_file, 0, 7).await?;
421        let marker = MarkerHeaderParser::parse(&marker_buf)?;
422
423        match marker.version {
424            RarVersion::Rar4 => {
425                self.parse_rar4_file(rar_file, opts, marker.size as u64, &mut buf)
426                    .await
427            }
428            RarVersion::Rar5 => self.parse_rar5_file(rar_file, opts, &mut buf).await,
429        }
430    }
431
432    /// Parse a RAR4 format file.
433    async fn parse_rar4_file(
434        &self,
435        rar_file: &Arc<dyn FileMedia>,
436        opts: &ParseOptions,
437        marker_size: u64,
438        buf: &mut HeaderBuffer,
439    ) -> Result<Vec<ParsedChunk>> {
440        let mut chunks = Vec::new();
441        let mut offset = marker_size;
442
443        // Parse archive header
444        let archive_buf = buf
445            .read(
446                rar_file,
447                offset,
448                offset + ArchiveHeaderParser::HEADER_SIZE as u64 - 1,
449            )
450            .await?;
451        let archive = ArchiveHeaderParser::parse(&archive_buf)?;
452        let is_solid = archive.has_solid_attributes;
453        offset += archive.size as u64;
454
455        let mut file_count = 0usize;
456        let mut retrieved_count = 0usize;
457        let terminator_size = TerminatorHeaderParser::HEADER_SIZE as u64;
458
459        // Parse file headers
460        while offset < rar_file.length().saturating_sub(terminator_size) {
461            // Read enough bytes for header (but not more than available)
462            let bytes_available = rar_file.length().saturating_sub(offset);
463            let read_size = (FileHeaderParser::HEADER_SIZE as u64).min(bytes_available);
464
465            if read_size < 32 {
466                // Not enough for minimum header
467                break;
468            }
469
470            let header_buf = buf.read(rar_file, offset, offset + read_size - 1).await?;
471
472            let file_header = match FileHeaderParser::parse(&header_buf) {
473                Ok(h) => h,
474                Err(_) => break,
475            };
476
477            // Check if this is a file header (type 0x74 = 116)
478            if file_header.header_type != 0x74 {
479                break;
480            }
481
482            // Check encryption - with crypto feature, we can handle encrypted files
483            #[cfg(not(feature = "crypto"))]
484            if file_header.is_encrypted {
485                return Err(RarError::EncryptedNotSupported);
486            }
487
488            let data_start = offset
489                .checked_add(file_header.head_size as u64)
490                .ok_or_else(|| RarError::InvalidOffset {
491                    offset,
492                    length: rar_file.length(),
493                })?;
494            let data_end = if file_header.packed_size > 0 {
495                data_start
496                    .checked_add(file_header.packed_size - 1)
497                    .ok_or_else(|| RarError::InvalidOffset {
498                        offset: data_start,
499                        length: rar_file.length(),
500                    })?
501            } else {
502                // Empty range: end < start so RarFileChunk::length() returns 0
503                data_start.saturating_sub(1)
504            };
505
506            // Apply filter
507            let include = match &opts.filter {
508                Some(f) => f(&file_header.name, file_count),
509                None => true,
510            };
511
512            if include {
513                let chunk = RarFileChunk::new(rar_file.clone(), data_start, data_end);
514
515                // Parse encryption info if present (RAR4)
516                #[cfg(feature = "crypto")]
517                let encryption = if file_header.is_encrypted {
518                    file_header
519                        .salt
520                        .map(|salt| FileEncryptionInfo::Rar4 { salt })
521                } else {
522                    None
523                };
524
525                chunks.push(ParsedChunk {
526                    name: file_header.name.clone(),
527                    chunk,
528                    continues_in_next: file_header.continues_in_next,
529                    unpacked_size: file_header.unpacked_size,
530                    method: file_header.method,
531                    dict_size_log: 22, // RAR4 doesn't specify, use 4MB default
532                    rar_version: RarVersion::Rar4,
533                    is_solid,
534                    #[cfg(feature = "crypto")]
535                    encryption,
536                });
537                retrieved_count += 1;
538
539                // Check max files limit
540                if let Some(max) = opts.max_files {
541                    if retrieved_count >= max {
542                        break;
543                    }
544                }
545            }
546
547            offset = data_end + 1;
548            file_count += 1;
549        }
550
551        Ok(chunks)
552    }
553
554    /// Parse an encrypted header.
555    /// The format is: 16-byte IV + encrypted header data (padded to 16 bytes).
556    #[cfg(feature = "crypto")]
557    fn parse_encrypted_header<T, F>(
558        &self,
559        data: &[u8],
560        crypto: &crate::crypto::Rar5Crypto,
561        parser: F,
562    ) -> Result<(T, usize)>
563    where
564        F: FnOnce(&[u8]) -> Result<(T, usize)>,
565    {
566        use crate::parsing::rar5::VintReader;
567
568        if data.len() < 16 {
569            return Err(RarError::InvalidHeader);
570        }
571
572        // First 16 bytes are the IV
573        let mut iv = [0u8; 16];
574        iv.copy_from_slice(&data[..16]);
575
576        // Read enough encrypted data - we need to determine the header size
577        // RAR5 encrypted headers have their size after CRC and before type
578        // We'll decrypt a reasonable chunk and parse from there
579        let encrypted_start = 16;
580
581        // Read at least 256 bytes of encrypted data (should be enough for most headers)
582        let available = data.len().saturating_sub(encrypted_start);
583        if available < 16 {
584            return Err(RarError::InvalidHeader);
585        }
586
587        // Round up to 16-byte boundary
588        let decrypt_len = (available.min(512) / 16) * 16;
589        if decrypt_len == 0 {
590            return Err(RarError::InvalidHeader);
591        }
592
593        let mut decrypted = data[encrypted_start..encrypted_start + decrypt_len].to_vec();
594        crypto
595            .decrypt(&iv, &mut decrypted)
596            .map_err(|e| RarError::DecryptionFailed(e.to_string()))?;
597
598        // Parse the decrypted header
599        let (result, _) = parser(&decrypted)?;
600
601        // Calculate actual header size including CRC, size vint, and content
602        // We need to read the header size from decrypted data
603        let mut reader = VintReader::new(&decrypted[4..]); // Skip CRC32
604        let header_size = reader.read().ok_or(RarError::InvalidHeader)?;
605        let size_vint_len = reader.position();
606
607        // Total encrypted size = CRC(4) + size_vint + header_content, rounded up to 16
608        let plaintext_size = 4 + size_vint_len + header_size as usize;
609        let encrypted_size = plaintext_size.div_ceil(16) * 16;
610
611        // Total consumed = IV(16) + encrypted_size
612        Ok((result, 16 + encrypted_size))
613    }
614
615    /// Parse a RAR5 format file.
616    async fn parse_rar5_file(
617        &self,
618        rar_file: &Arc<dyn FileMedia>,
619        opts: &ParseOptions,
620        buf: &mut HeaderBuffer,
621    ) -> Result<Vec<ParsedChunk>> {
622        let mut chunks = Vec::new();
623        let mut offset = 8u64; // RAR5 signature is 8 bytes
624
625        // Read first header to check for encryption header
626        let header_buf = buf
627            .read(
628                rar_file,
629                offset,
630                (offset + 256 - 1).min(rar_file.length() - 1),
631            )
632            .await?;
633
634        // Check if headers are encrypted
635        #[cfg(feature = "crypto")]
636        let header_crypto: Option<crate::crypto::Rar5Crypto> =
637            if Rar5EncryptionHeaderParser::is_encryption_header(&header_buf) {
638                let (enc_header, consumed) = Rar5EncryptionHeaderParser::parse(&header_buf)?;
639                offset += consumed as u64;
640
641                // Need password to decrypt headers
642                let password = opts.password.as_ref().ok_or(RarError::PasswordRequired)?;
643
644                Some(crate::crypto::Rar5Crypto::derive_key(
645                    password,
646                    &enc_header.salt,
647                    enc_header.lg2_count,
648                ))
649            } else {
650                None
651            };
652
653        #[cfg(not(feature = "crypto"))]
654        if Rar5EncryptionHeaderParser::is_encryption_header(&header_buf) {
655            return Err(RarError::PasswordRequired);
656        }
657
658        // Read archive header (which may be encrypted)
659        #[cfg(feature = "crypto")]
660        let (archive_header, consumed) = if let Some(ref crypto) = header_crypto {
661            // Read IV (16 bytes) + encrypted header
662            let enc_buf = buf
663                .read(
664                    rar_file,
665                    offset,
666                    (offset + 512 - 1).min(rar_file.length() - 1),
667                )
668                .await?;
669
670            self.parse_encrypted_header(&enc_buf, crypto, |data| {
671                Rar5ArchiveHeaderParser::parse(data)
672            })?
673        } else {
674            Rar5ArchiveHeaderParser::parse(&header_buf)?
675        };
676
677        #[cfg(not(feature = "crypto"))]
678        let (archive_header, consumed) = Rar5ArchiveHeaderParser::parse(&header_buf)?;
679
680        let is_solid = archive_header.archive_flags.is_solid;
681        offset += consumed as u64;
682
683        let mut file_count = 0usize;
684        let mut retrieved_count = 0usize;
685
686        // Parse file headers
687        while offset < rar_file.length().saturating_sub(16) {
688            // Read header data (variable size)
689            let bytes_available = rar_file.length().saturating_sub(offset);
690            let read_size = 512u64.min(bytes_available);
691
692            if read_size < 16 {
693                break;
694            }
695
696            let header_buf = buf.read(rar_file, offset, offset + read_size - 1).await?;
697
698            // Try to parse as file header (may be encrypted)
699            #[cfg(feature = "crypto")]
700            let (file_header, header_consumed) = if let Some(ref crypto) = header_crypto {
701                match self.parse_encrypted_header(&header_buf, crypto, |data| {
702                    Rar5FileHeaderParser::parse(data)
703                }) {
704                    Ok(h) => h,
705                    Err(_) => break,
706                }
707            } else {
708                match Rar5FileHeaderParser::parse(&header_buf) {
709                    Ok(h) => h,
710                    Err(_) => break,
711                }
712            };
713
714            #[cfg(not(feature = "crypto"))]
715            let (file_header, header_consumed) = match Rar5FileHeaderParser::parse(&header_buf) {
716                Ok(h) => h,
717                Err(_) => break,
718            };
719
720            let data_start = offset.checked_add(header_consumed as u64).ok_or_else(|| {
721                RarError::InvalidOffset {
722                    offset,
723                    length: rar_file.length(),
724                }
725            })?;
726            let data_end = if file_header.packed_size > 0 {
727                data_start
728                    .checked_add(file_header.packed_size - 1)
729                    .ok_or_else(|| RarError::InvalidOffset {
730                        offset: data_start,
731                        length: rar_file.length(),
732                    })?
733            } else {
734                // Empty range: end < start so RarFileChunk::length() returns 0
735                data_start.saturating_sub(1)
736            };
737
738            // Apply filter
739            let include = match &opts.filter {
740                Some(f) => f(&file_header.name, file_count),
741                None => true,
742            };
743
744            if include {
745                let chunk = RarFileChunk::new(rar_file.clone(), data_start, data_end);
746
747                // Convert RAR5 method to RAR4-compatible format
748                // RAR5 method 0 = stored, 1-5 = compression
749                // Store the raw method, not converted to RAR4 format
750                let method = file_header.compression.method;
751
752                // Parse encryption info if present
753                #[cfg(feature = "crypto")]
754                let encryption = if file_header.is_encrypted() {
755                    file_header.encryption_info().and_then(|data| {
756                        crate::crypto::Rar5EncryptionInfo::parse(data)
757                            .ok()
758                            .map(|info| FileEncryptionInfo::Rar5 {
759                                salt: info.salt,
760                                init_v: info.init_v,
761                                lg2_count: info.lg2_count,
762                            })
763                    })
764                } else {
765                    None
766                };
767
768                chunks.push(ParsedChunk {
769                    name: file_header.name.clone(),
770                    chunk,
771                    continues_in_next: file_header.continues_in_next(),
772                    unpacked_size: file_header.unpacked_size,
773                    method,
774                    dict_size_log: file_header.compression.dict_size_log,
775                    rar_version: RarVersion::Rar5,
776                    is_solid,
777                    #[cfg(feature = "crypto")]
778                    encryption,
779                });
780                retrieved_count += 1;
781
782                if let Some(max) = opts.max_files {
783                    if retrieved_count >= max {
784                        break;
785                    }
786                }
787            }
788
789            offset = data_end + 1;
790            file_count += 1;
791        }
792
793        Ok(chunks)
794    }
795
796    /// Parse all volumes and return inner files.
797    pub async fn parse(&self, opts: ParseOptions) -> Result<Vec<InnerFile>> {
798        if self.files.is_empty() {
799            return Err(RarError::NoFilesFound);
800        }
801
802        let mut all_parsed: Vec<Vec<ParsedChunk>> = Vec::new();
803
804        let mut i = 0;
805        while i < self.files.len() {
806            let file = &self.files[i];
807            let chunks = self.parse_file(file, &opts).await?;
808
809            if chunks.is_empty() {
810                i += 1;
811                continue;
812            }
813
814            // Get info from last chunk
815            let continues = chunks.last().unwrap().continues_in_next;
816
817            all_parsed.push(chunks);
818
819            // Handle continuation - parse each continuation volume's headers
820            // to find actual data offsets (they may differ between volumes)
821            if continues {
822                while i + 1 < self.files.len() {
823                    i += 1;
824                    let next_file = &self.files[i];
825
826                    let cont_chunks = self.parse_file(next_file, &opts).await?;
827                    if cont_chunks.is_empty() {
828                        break;
829                    }
830
831                    let cont_continues = cont_chunks.last().unwrap().continues_in_next;
832                    all_parsed.push(cont_chunks);
833
834                    if !cont_continues {
835                        break;
836                    }
837                }
838            }
839
840            i += 1;
841        }
842
843        // Flatten and group chunks by filename, keeping method info
844        let all_chunks: Vec<ParsedChunk> = all_parsed.into_iter().flatten().collect();
845
846        #[cfg(feature = "crypto")]
847        type GroupValue = (
848            Vec<RarFileChunk>,
849            u8,
850            u8, // dict_size_log
851            u64,
852            RarVersion,
853            bool, // is_solid
854            Option<FileEncryptionInfo>,
855        );
856        #[cfg(not(feature = "crypto"))]
857        type GroupValue = (Vec<RarFileChunk>, u8, u8, u64, RarVersion, bool);
858
859        let mut grouped: HashMap<String, GroupValue> = HashMap::new();
860        for chunk in all_chunks {
861            #[cfg(feature = "crypto")]
862            let entry = grouped.entry(chunk.name).or_insert_with(|| {
863                (
864                    Vec::new(),
865                    chunk.method,
866                    chunk.dict_size_log,
867                    chunk.unpacked_size,
868                    chunk.rar_version,
869                    chunk.is_solid,
870                    chunk.encryption,
871                )
872            });
873            #[cfg(not(feature = "crypto"))]
874            let entry = grouped.entry(chunk.name).or_insert_with(|| {
875                (
876                    Vec::new(),
877                    chunk.method,
878                    chunk.dict_size_log,
879                    chunk.unpacked_size,
880                    chunk.rar_version,
881                    chunk.is_solid,
882                )
883            });
884            entry.0.push(chunk.chunk);
885        }
886
887        // Create InnerFile for each group
888        #[cfg(feature = "crypto")]
889        let password = opts.password.clone();
890
891        let inner_files: Vec<InnerFile> = grouped
892            .into_iter()
893            .map(|(name, value)| {
894                #[cfg(feature = "crypto")]
895                {
896                    let (
897                        chunks,
898                        method,
899                        dict_size_log,
900                        unpacked_size,
901                        rar_version,
902                        is_solid,
903                        encryption,
904                    ) = value;
905                    let enc_info = encryption.map(|e| match e {
906                        FileEncryptionInfo::Rar5 {
907                            salt,
908                            init_v,
909                            lg2_count,
910                        } => crate::inner_file::EncryptionInfo::Rar5 {
911                            salt,
912                            init_v,
913                            lg2_count,
914                        },
915                        FileEncryptionInfo::Rar4 { salt } => {
916                            crate::inner_file::EncryptionInfo::Rar4 { salt }
917                        }
918                    });
919                    InnerFile::new_encrypted_with_solid_dict(
920                        name,
921                        chunks,
922                        method,
923                        dict_size_log,
924                        unpacked_size,
925                        rar_version,
926                        enc_info,
927                        password.clone(),
928                        is_solid,
929                    )
930                }
931                #[cfg(not(feature = "crypto"))]
932                {
933                    let (chunks, method, dict_size_log, unpacked_size, rar_version, is_solid) =
934                        value;
935                    InnerFile::new_with_solid_dict(
936                        name,
937                        chunks,
938                        method,
939                        dict_size_log,
940                        unpacked_size,
941                        rar_version,
942                        is_solid,
943                    )
944                }
945            })
946            .collect();
947
948        Ok(inner_files)
949    }
950}
951
952#[cfg(test)]
953mod tests {
954    use super::*;
955    use crate::file_media::{FileMedia, LocalFileMedia};
956
957    #[test]
958    fn test_volume_order_old_style() {
959        // .rar comes first
960        assert_eq!(RarFilesPackage::volume_order("archive.rar").0, 0);
961        // .r00 = order 1, .r01 = order 2
962        assert_eq!(RarFilesPackage::volume_order("archive.r00").0, 1);
963        assert_eq!(RarFilesPackage::volume_order("archive.r01").0, 2);
964        assert_eq!(RarFilesPackage::volume_order("archive.r99").0, 100);
965    }
966
967    #[test]
968    fn test_volume_order_part_naming() {
969        // .partN.rar naming should sort numerically
970        let mut names = vec![
971            "archive.part10.rar",
972            "archive.part2.rar",
973            "archive.part1.rar",
974            "archive.part3.rar",
975        ];
976        names.sort_by_key(|n| RarFilesPackage::volume_order(n));
977        assert_eq!(
978            names,
979            vec![
980                "archive.part1.rar",
981                "archive.part2.rar",
982                "archive.part3.rar",
983                "archive.part10.rar",
984            ]
985        );
986    }
987
988    #[test]
989    fn test_volume_order_unknown() {
990        // Unknown extensions get high order
991        assert_eq!(RarFilesPackage::volume_order("archive.zip").0, 1000);
992    }
993
994    #[tokio::test]
995    #[cfg(feature = "async")]
996    async fn test_get_archive_info_rar5() {
997        let file: Arc<dyn FileMedia> =
998            Arc::new(LocalFileMedia::new("__fixtures__/rar5/test.rar").unwrap());
999        let package = RarFilesPackage::new(vec![file]);
1000
1001        let info = package.get_archive_info().await.unwrap();
1002        assert_eq!(info.version, RarVersion::Rar5);
1003        assert!(!info.is_multivolume);
1004    }
1005
1006    #[tokio::test]
1007    #[cfg(feature = "async")]
1008    async fn test_get_archive_info_rar4() {
1009        let file: Arc<dyn FileMedia> =
1010            Arc::new(LocalFileMedia::new("__fixtures__/single/single.rar").unwrap());
1011        let package = RarFilesPackage::new(vec![file]);
1012
1013        let info = package.get_archive_info().await.unwrap();
1014        assert_eq!(info.version, RarVersion::Rar4);
1015        assert!(!info.is_multivolume);
1016    }
1017
1018    #[tokio::test]
1019    #[cfg(feature = "async")]
1020    async fn test_parse_rar5_stored() {
1021        // Test parsing a RAR5 stored file
1022        let file: Arc<dyn FileMedia> =
1023            Arc::new(LocalFileMedia::new("__fixtures__/rar5/test.rar").unwrap());
1024        let package = RarFilesPackage::new(vec![file]);
1025
1026        let files = package.parse(ParseOptions::default()).await.unwrap();
1027
1028        assert_eq!(files.len(), 1);
1029        assert_eq!(files[0].name, "test.txt");
1030    }
1031
1032    #[tokio::test]
1033    #[cfg(feature = "async")]
1034    async fn test_parse_rar5_compressed() {
1035        // Test parsing a RAR5 compressed file
1036        let file: Arc<dyn FileMedia> =
1037            Arc::new(LocalFileMedia::new("__fixtures__/rar5/compressed.rar").unwrap());
1038        let package = RarFilesPackage::new(vec![file]);
1039
1040        let files = package.parse(ParseOptions::default()).await.unwrap();
1041
1042        assert_eq!(files.len(), 1);
1043        assert_eq!(files[0].name, "compress_test.txt");
1044        assert_eq!(files[0].length, 152); // Unpacked size
1045
1046        // Try to read and decompress the file content
1047        // Note: RAR5 compressed decompression is still being debugged
1048        match files[0].read_to_end().await {
1049            Ok(content) => {
1050                eprintln!("Got {} bytes of output", content.len());
1051                eprintln!("First 32 bytes: {:02x?}", &content[..32.min(content.len())]);
1052
1053                // Verify we got the full uncompressed content
1054                assert_eq!(
1055                    content.len(),
1056                    152,
1057                    "decompressed size should match unpacked size"
1058                );
1059
1060                // Verify the content is valid text
1061                match std::str::from_utf8(&content) {
1062                    Ok(text) => {
1063                        assert!(
1064                            text.contains("This is a test file"),
1065                            "content should contain expected text"
1066                        );
1067                        assert!(
1068                            text.contains("hello hello"),
1069                            "content should contain repeated text"
1070                        );
1071                    }
1072                    Err(_) => {
1073                        // Decompression ran but output is wrong - still debugging
1074                        eprintln!(
1075                            "RAR5 decompression output is not valid UTF-8 (work in progress)"
1076                        );
1077                    }
1078                }
1079            }
1080            Err(e) => {
1081                // RAR5 decompression not yet fully implemented - parsing verified
1082                eprintln!("RAR5 decompression error: {:?}", e);
1083            }
1084        }
1085    }
1086
1087    #[tokio::test]
1088    #[cfg(feature = "async")]
1089    async fn test_parse_rar5_multivolume() {
1090        // Test parsing a multi-volume RAR5 archive
1091        let fixture_dir = "__fixtures__/rar5-multivolume";
1092
1093        // Collect all volume files
1094        let mut volume_paths: Vec<String> = std::fs::read_dir(fixture_dir)
1095            .unwrap()
1096            .filter_map(|e| e.ok())
1097            .map(|e| e.path())
1098            .filter(|p| p.extension().map_or(false, |ext| ext == "rar"))
1099            .map(|p| p.to_string_lossy().to_string())
1100            .collect();
1101
1102        // Sort by name so volumes are in order
1103        volume_paths.sort();
1104
1105        if volume_paths.is_empty() {
1106            // Skip test if fixtures don't exist
1107            eprintln!("Skipping test - no multi-volume fixtures found");
1108            return;
1109        }
1110
1111        eprintln!("Found {} volumes: {:?}", volume_paths.len(), volume_paths);
1112
1113        // Create file medias for each volume
1114        let files: Vec<Arc<dyn FileMedia>> = volume_paths
1115            .iter()
1116            .map(|p| Arc::new(LocalFileMedia::new(p).unwrap()) as Arc<dyn FileMedia>)
1117            .collect();
1118
1119        let package = RarFilesPackage::new(files);
1120
1121        let parsed = package.parse(ParseOptions::default()).await.unwrap();
1122
1123        assert_eq!(parsed.len(), 1, "should have 1 inner file");
1124        assert_eq!(parsed[0].name, "testfile.txt");
1125
1126        // The length might be slightly off due to volume header handling
1127        // but should be close to the original file size
1128        eprintln!("Parsed length: {}", parsed[0].length);
1129
1130        // Try to read the file content (stored, so should work)
1131        let content = parsed[0].read_to_end().await.unwrap();
1132        eprintln!("Read content length: {}", content.len());
1133
1134        // Verify the content is valid and contains expected text
1135        let text = std::str::from_utf8(&content).expect("should be valid UTF-8");
1136        assert!(text.contains("Line 1:"), "should contain first line");
1137        assert!(text.contains("Line 100:"), "should contain last line");
1138
1139        // Verify we got approximately the right size (allow for header overhead)
1140        assert!(content.len() >= 11000, "should have at least 11000 bytes");
1141    }
1142
1143    #[tokio::test]
1144    #[cfg(all(feature = "async", feature = "crypto"))]
1145    async fn test_parse_rar5_encrypted_stored() {
1146        // Test parsing and extracting an encrypted RAR5 file (stored, no compression)
1147        let fixture = "__fixtures__/encrypted/rar5-encrypted-stored.rar";
1148
1149        if !std::path::Path::new(fixture).exists() {
1150            eprintln!("Skipping test - encrypted fixtures not found");
1151            return;
1152        }
1153
1154        let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new(fixture).unwrap());
1155        let package = RarFilesPackage::new(vec![file]);
1156
1157        let opts = ParseOptions {
1158            password: Some("testpass".to_string()),
1159            ..Default::default()
1160        };
1161
1162        let parsed = package.parse(opts).await.unwrap();
1163        assert_eq!(parsed.len(), 1, "should have 1 inner file");
1164
1165        let inner_file = &parsed[0];
1166        assert_eq!(inner_file.name, "testfile.txt");
1167        assert!(inner_file.is_encrypted());
1168
1169        // Read the decrypted content
1170        let content = inner_file.read_decompressed().await.unwrap();
1171        let text = std::str::from_utf8(&content).expect("should be valid UTF-8");
1172
1173        assert!(text.starts_with("Hello, encrypted world!"));
1174    }
1175
1176    #[tokio::test]
1177    #[cfg(all(feature = "async", feature = "crypto"))]
1178    async fn test_parse_rar5_encrypted_no_password() {
1179        let fixture = "__fixtures__/encrypted/rar5-encrypted-stored.rar";
1180
1181        if !std::path::Path::new(fixture).exists() {
1182            eprintln!("Skipping test - encrypted fixtures not found");
1183            return;
1184        }
1185
1186        let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new(fixture).unwrap());
1187        let package = RarFilesPackage::new(vec![file]);
1188
1189        // No password provided
1190        let parsed = package.parse(ParseOptions::default()).await.unwrap();
1191        assert_eq!(parsed.len(), 1, "should have 1 inner file");
1192
1193        let inner_file = &parsed[0];
1194        assert!(inner_file.is_encrypted());
1195
1196        // Reading should fail because no password was provided
1197        let result = inner_file.read_decompressed().await;
1198        assert!(result.is_err());
1199        match result {
1200            Err(crate::RarError::PasswordRequired) => {
1201                // Expected error
1202            }
1203            Err(e) => panic!("Expected PasswordRequired error, got: {:?}", e),
1204            Ok(_) => panic!("Expected error but got success"),
1205        }
1206    }
1207
1208    #[tokio::test]
1209    #[cfg(all(feature = "async", feature = "crypto"))]
1210    async fn test_parse_rar5_encrypted_headers() {
1211        // Test parsing an archive with encrypted headers (created with rar -hp)
1212        let fixture = "__fixtures__/encrypted/rar5-encrypted-headers.rar";
1213
1214        if !std::path::Path::new(fixture).exists() {
1215            eprintln!("Skipping test - encrypted headers fixture not found");
1216            return;
1217        }
1218
1219        let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new(fixture).unwrap());
1220        let package = RarFilesPackage::new(vec![file]);
1221
1222        // First check archive info - should show encrypted headers
1223        let info = package.get_archive_info().await.unwrap();
1224        assert!(info.has_encrypted_headers, "should have encrypted headers");
1225        assert_eq!(info.version, RarVersion::Rar5);
1226
1227        // Parsing without password should fail
1228        let result = package.parse(ParseOptions::default()).await;
1229        assert!(
1230            matches!(result, Err(RarError::PasswordRequired)),
1231            "should require password for encrypted headers, got {:?}",
1232            result
1233        );
1234
1235        // Parsing with password should succeed
1236        let opts = ParseOptions {
1237            password: Some("testpass".to_string()),
1238            ..Default::default()
1239        };
1240
1241        let parsed = package.parse(opts).await.unwrap();
1242        assert_eq!(parsed.len(), 1, "should have 1 inner file");
1243        assert_eq!(parsed[0].name, "testfile.txt");
1244
1245        // File content is also encrypted, so read should work
1246        let content = parsed[0].read_decompressed().await.unwrap();
1247        let text = std::str::from_utf8(&content).expect("should be valid UTF-8");
1248        assert!(
1249            text.starts_with("Hello, encrypted world!"),
1250            "content was: {:?}",
1251            text
1252        );
1253    }
1254
1255    #[tokio::test]
1256    #[cfg(all(feature = "async", feature = "crypto"))]
1257    async fn test_get_archive_info_encrypted_headers() {
1258        // Test that get_archive_info detects encrypted headers
1259        let fixture = "__fixtures__/encrypted/rar5-encrypted-headers.rar";
1260
1261        if !std::path::Path::new(fixture).exists() {
1262            eprintln!("Skipping test - encrypted headers fixture not found");
1263            return;
1264        }
1265
1266        let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new(fixture).unwrap());
1267        let package = RarFilesPackage::new(vec![file]);
1268
1269        let info = package.get_archive_info().await.unwrap();
1270        assert!(info.has_encrypted_headers);
1271        assert_eq!(info.version, RarVersion::Rar5);
1272        // Other flags can't be read when headers are encrypted
1273    }
1274
1275    #[tokio::test]
1276    #[cfg(all(feature = "async", feature = "crypto"))]
1277    async fn test_parse_rar4_encrypted_stored() {
1278        // Test parsing and extracting an encrypted RAR4 file (stored, no compression)
1279        let fixture = "__fixtures__/encrypted/rar4-encrypted-stored.rar";
1280
1281        if !std::path::Path::new(fixture).exists() {
1282            eprintln!("Skipping test - RAR4 encrypted fixtures not found");
1283            return;
1284        }
1285
1286        let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new(fixture).unwrap());
1287        let package = RarFilesPackage::new(vec![file]);
1288
1289        // Check archive info
1290        let info = package.get_archive_info().await.unwrap();
1291        assert_eq!(info.version, RarVersion::Rar4);
1292
1293        let opts = ParseOptions {
1294            password: Some("testpass".to_string()),
1295            ..Default::default()
1296        };
1297
1298        let parsed = package.parse(opts).await.unwrap();
1299        assert_eq!(parsed.len(), 1, "should have 1 inner file");
1300
1301        let inner_file = &parsed[0];
1302        assert_eq!(inner_file.name, "testfile.txt");
1303        assert!(inner_file.is_encrypted());
1304
1305        // Read the decrypted content
1306        let content = inner_file.read_decompressed().await.unwrap();
1307        let text = std::str::from_utf8(&content).expect("should be valid UTF-8");
1308
1309        assert!(
1310            text.starts_with("Hello, encrypted world!"),
1311            "content was: {:?}",
1312            text
1313        );
1314    }
1315
1316    #[tokio::test]
1317    #[cfg(all(feature = "async", feature = "crypto"))]
1318    async fn test_parse_rar4_encrypted_compressed() {
1319        // Test parsing and extracting an encrypted RAR4 file (compressed)
1320        let fixture = "__fixtures__/encrypted/rar4-encrypted.rar";
1321
1322        if !std::path::Path::new(fixture).exists() {
1323            eprintln!("Skipping test - RAR4 encrypted fixtures not found");
1324            return;
1325        }
1326
1327        let file: Arc<dyn FileMedia> = Arc::new(LocalFileMedia::new(fixture).unwrap());
1328        let package = RarFilesPackage::new(vec![file]);
1329
1330        // Check archive info
1331        let info = package.get_archive_info().await.unwrap();
1332        assert_eq!(info.version, RarVersion::Rar4);
1333
1334        let opts = ParseOptions {
1335            password: Some("testpass".to_string()),
1336            ..Default::default()
1337        };
1338
1339        let parsed = package.parse(opts).await.unwrap();
1340        assert_eq!(parsed.len(), 1, "should have 1 inner file");
1341
1342        let inner_file = &parsed[0];
1343        assert_eq!(inner_file.name, "testfile.txt");
1344        assert!(inner_file.is_encrypted());
1345
1346        // Read the decrypted content
1347        let content = inner_file.read_decompressed().await.unwrap();
1348        let text = std::str::from_utf8(&content).expect("should be valid UTF-8");
1349
1350        assert!(
1351            text.starts_with("Hello, encrypted world!"),
1352            "content was: {:?}",
1353            text
1354        );
1355    }
1356}