Skip to main content

rars_format/
detect.rs

1use crate::version::ArchiveFamily;
2use aho_corasick::AhoCorasick;
3use std::sync::OnceLock;
4
5pub const RAR13_SIGNATURE: &[u8; 4] = b"RE~^";
6pub const RAR15_SIGNATURE: &[u8; 7] = b"Rar!\x1a\x07\x00";
7pub const RAR50_SIGNATURE: &[u8; 8] = b"Rar!\x1a\x07\x01\x00";
8
9/// Default upper bound for scanning past an SFX stub when looking for the RAR
10/// signature. Most installers in the wild place the archive within a few
11/// hundred KiB, but large SFX modules (notably WinRAR's own installer plus a
12/// bundled runtime) can push the offset past 1 MiB.
13pub const SFX_SCAN_LIMIT: usize = 8 * 1024 * 1024;
14
15#[derive(Debug, Clone, Copy, PartialEq, Eq)]
16#[non_exhaustive]
17pub struct ArchiveSignature {
18    pub family: ArchiveFamily,
19    pub offset: usize,
20    pub length: usize,
21}
22
23pub fn detect_archive_family(input: &[u8]) -> Option<ArchiveSignature> {
24    detect_at(input, 0)
25}
26
27const SIGNATURES: &[&[u8]] = &[RAR50_SIGNATURE, RAR15_SIGNATURE, RAR13_SIGNATURE];
28const MAX_SIGNATURE_LEN: usize = {
29    let mut max_len = 0;
30    let mut i = 0;
31    while i < SIGNATURES.len() {
32        if SIGNATURES[i].len() > max_len {
33            max_len = SIGNATURES[i].len();
34        }
35        i += 1;
36    }
37    max_len
38};
39
40fn signatures_searcher() -> &'static AhoCorasick {
41    static INSTANCE: OnceLock<AhoCorasick> = OnceLock::new();
42    INSTANCE.get_or_init(|| AhoCorasick::new(SIGNATURES).unwrap())
43}
44
45fn signatures_searcher_only_15_plus() -> &'static AhoCorasick {
46    static INSTANCE: OnceLock<AhoCorasick> = OnceLock::new();
47    INSTANCE.get_or_init(|| AhoCorasick::new(&SIGNATURES[..2]).unwrap())
48}
49
50pub fn find_archive_start(input: &[u8], max_scan: usize) -> Option<ArchiveSignature> {
51    // The limit is the maximum offset to find the signature, so we need to look
52    // up to the longest signature after that point.
53    let limit = input.len().min(max_scan.saturating_add(MAX_SIGNATURE_LEN));
54    let input = &input[..limit];
55    let mut first_rar13 = None;
56    let mut it = signatures_searcher().find_iter(input);
57    while let Some(m) = it.next() {
58        let family = match m.pattern().as_u32() {
59            0 => ArchiveFamily::Rar50Plus,
60            1 => ArchiveFamily::Rar15To40,
61            2 => {
62                // Only find the first rar13 signature
63                debug_assert!(first_rar13.is_none());
64                first_rar13 = Some(ArchiveSignature {
65                    family: ArchiveFamily::Rar13,
66                    offset: m.start(),
67                    length: m.len(),
68                });
69                // Switch to a searcher that doesn't have the rar13 signature anymore
70                it = signatures_searcher_only_15_plus()
71                    .find_iter(aho_corasick::Input::new(input).range(m.end()..));
72                continue;
73            }
74            _ => unreachable!(),
75        };
76        if m.start() > max_scan {
77            break;
78        }
79        return Some(ArchiveSignature {
80            family,
81            offset: m.start(),
82            length: m.len(),
83        });
84    }
85    first_rar13.filter(|sig| sig.offset <= max_scan)
86}
87
88fn detect_at(input: &[u8], offset: usize) -> Option<ArchiveSignature> {
89    let tail = input.get(offset..)?;
90
91    if tail.starts_with(RAR50_SIGNATURE) {
92        Some(ArchiveSignature {
93            family: ArchiveFamily::Rar50Plus,
94            offset,
95            length: RAR50_SIGNATURE.len(),
96        })
97    } else if tail.starts_with(RAR15_SIGNATURE) {
98        Some(ArchiveSignature {
99            family: ArchiveFamily::Rar15To40,
100            offset,
101            length: RAR15_SIGNATURE.len(),
102        })
103    } else if tail.starts_with(RAR13_SIGNATURE) {
104        Some(ArchiveSignature {
105            family: ArchiveFamily::Rar13,
106            offset,
107            length: RAR13_SIGNATURE.len(),
108        })
109    } else {
110        None
111    }
112}
113
114#[cfg(test)]
115mod tests {
116    use super::*;
117
118    #[test]
119    fn detects_all_known_signatures() {
120        assert_eq!(
121            detect_archive_family(b"RE~^").unwrap().family,
122            ArchiveFamily::Rar13
123        );
124        assert_eq!(
125            detect_archive_family(b"Rar!\x1a\x07\x00").unwrap().family,
126            ArchiveFamily::Rar15To40
127        );
128        assert_eq!(
129            detect_archive_family(b"Rar!\x1a\x07\x01\x00")
130                .unwrap()
131                .family,
132            ArchiveFamily::Rar50Plus
133        );
134    }
135
136    #[test]
137    fn finds_sfx_prefixed_archive() {
138        let sig = find_archive_start(b"stub bytes RE~^payload", 128).unwrap();
139        assert_eq!(sig.family, ArchiveFamily::Rar13);
140        assert_eq!(sig.offset, 11);
141    }
142
143    #[test]
144    fn sfx_scan_prefers_stronger_rar15_signature_over_earlier_rar13_bytes() {
145        let sig = find_archive_start(b"stub RE~^ bytes Rar!\x1a\x07\x00payload", 128).unwrap();
146        assert_eq!(sig.family, ArchiveFamily::Rar15To40);
147        assert_eq!(sig.offset, 16);
148    }
149
150    #[test]
151    fn rejects_unknown_and_truncated_signatures() {
152        assert_eq!(detect_archive_family(b""), None);
153        assert_eq!(detect_archive_family(b"RAR!"), None);
154        assert_eq!(detect_archive_family(b"Rar!\x1a\x07"), None);
155        assert_eq!(find_archive_start(b"not an archive", 128), None);
156    }
157
158    #[test]
159    fn scan_limit_bounds_sfx_detection() {
160        let input = b"stub bytes RE~^payload";
161
162        assert_eq!(find_archive_start(input, 10), None);
163
164        let sig = find_archive_start(input, 11).unwrap();
165        assert_eq!(sig.family, ArchiveFamily::Rar13);
166        assert_eq!(sig.offset, 11);
167        assert_eq!(sig.length, RAR13_SIGNATURE.len());
168    }
169
170    #[test]
171    fn sfx_scan_limit_finds_signature_past_128kib_stub() {
172        // Real SFX installers routinely place the RAR payload past 128 KiB
173        // (modern WinRAR-built SFXes, Nero, anti-virus installers, etc.).
174        let mut stub = vec![0u8; 300 * 1024];
175        stub.extend_from_slice(RAR15_SIGNATURE);
176        let sig = find_archive_start(&stub, SFX_SCAN_LIMIT).unwrap();
177        assert_eq!(sig.family, ArchiveFamily::Rar15To40);
178        assert_eq!(sig.offset, 300 * 1024);
179    }
180}