stuffit/
lib.rs

1//! StuffIt (.sit) archive parser, decompressor, and creator.
2//!
3//! This crate provides functionality to read and write StuffIt archives,
4//! a compression format popular on classic Macintosh systems.
5//!
6//! # Supported Formats
7//!
8//! - **StuffIt 5.0** - The main format with signature at offset 80
9//! - **SIT! 1.x** - The original StuffIt format
10//!
11//! # Compression Methods
12//!
13//! - **Method 0** - No compression (store)
14//! - **Method 13** - LZ77 with Huffman coding (StuffIt native)
15//! - **Method 14** - Deflate (limited support)
16//! - **Method 15** - Arsenic/BWT (read-only)
17//!
18//! # Example
19//!
20//! ```no_run
21//! use stuffit::{SitArchive, SitEntry};
22//!
23//! // Parse an existing archive
24//! let data = std::fs::read("archive.sit").unwrap();
25//! let archive = SitArchive::parse(&data).unwrap();
26//!
27//! for entry in &archive.entries {
28//!     println!("{}: {} bytes", entry.name, entry.data_fork.len());
29//! }
30//!
31//! // Create a new archive
32//! let mut archive = SitArchive::new();
33//! let mut entry = SitEntry::default();
34//! entry.name = "hello.txt".to_string();
35//! entry.data_fork = b"Hello, World!".to_vec();
36//! archive.add_entry(entry);
37//!
38//! let bytes = archive.serialize_compressed().unwrap();
39//! std::fs::write("new_archive.sit", bytes).unwrap();
40//! ```
41
42use flate2::read::DeflateDecoder;
43use flate2::write::DeflateEncoder;
44use flate2::Compression;
45use encoding_rs::MACINTOSH;
46use log::{debug, warn};
47use std::io::{Cursor, Read, Seek, SeekFrom, Write};
48use thiserror::Error;
49
50/// Errors that can occur when working with StuffIt archives.
51#[derive(Error, Debug)]
52pub enum SitError {
53    /// The file does not have a valid StuffIt signature.
54    #[error("Invalid SIT signature")]
55    InvalidSignature,
56
57    /// The archive uses an unsupported version.
58    #[error("Unsupported SIT version: {0}")]
59    UnsupportedVersion(u16),
60
61    /// An I/O error occurred while reading or writing.
62    #[error("IO error: {0}")]
63    Io(#[from] std::io::Error),
64
65    /// Decompression failed.
66    #[error("Decompression error: {0}")]
67    Decompression(String),
68
69    /// Compression failed.
70    #[error("Compression error: {0}")]
71    Compression(String),
72
73    /// The archive structure is malformed.
74    #[error("Malformed archive")]
75    Malformed,
76
77    /// The archive or entry is encrypted and requires a password.
78    #[error("Archive is encrypted - password required")]
79    EncryptedArchive,
80
81    /// The provided password is incorrect.
82    #[error("Incorrect password")]
83    IncorrectPassword,
84}
85
86/// Compression method: Store (no compression)
87pub const METHOD_STORE: u8 = 0;
88/// Compression method: RLE (Run Length Encoding)
89pub const METHOD_RLE: u8 = 1;
90/// Compression method: LZW (Lempel-Ziv-Welch)
91pub const METHOD_LZW: u8 = 2;
92/// Compression method: Huffman
93pub const METHOD_HUFFMAN: u8 = 3;
94/// Compression method: StuffIt 1.5.1 (LZ77 + Huffman) - Classic and SIT5
95pub const METHOD_SIT13: u8 = 13;
96/// Compression method: Deflate (zlib) - SIT5 only
97pub const METHOD_DEFLATE: u8 = 14;
98/// Compression method: BWT (Arsenic) - SIT5 only
99pub const METHOD_BWT: u8 = 15;
100
101/// A StuffIt archive containing multiple entries.
102///
103/// Archives can be parsed from existing `.sit` files or created from scratch.
104#[derive(Debug, Clone)]
105pub struct SitArchive {
106    /// The entries (files and folders) in this archive.
107    pub entries: Vec<SitEntry>,
108}
109
110/// Archive format for decompression method selection
111#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
112pub enum ArchiveFormat {
113    /// SIT5 format (StuffIt 5.x)
114    #[default]
115    Sit5,
116    /// Classic SIT! format (StuffIt 1.x-4.x)
117    Classic,
118}
119
120/// A single entry (file or folder) in a StuffIt archive.
121///
122/// Each entry can have both a data fork and a resource fork, following
123/// the classic Macintosh file system conventions.
124/// 
125/// By default, entries are stored in compressed form for lazy decompression.
126/// Call `decompress()` to get the uncompressed data.
127#[derive(Debug, Clone, Default)]
128pub struct SitEntry {
129    /// Name of the file or folder (may include path separators for nested items).
130    pub name: String,
131
132    /// Data fork content (compressed if is_compressed is true).
133    pub data_fork: Vec<u8>,
134
135    /// Resource fork content (compressed if is_compressed is true).
136    pub resource_fork: Vec<u8>,
137
138    /// Macintosh file type code (e.g., `b"TEXT"`, `b"APPL"`).
139    pub file_type: [u8; 4],
140
141    /// Macintosh creator code (e.g., `b"ttxt"`, `b"CARO"`).
142    pub creator: [u8; 4],
143
144    /// Whether this entry represents a folder.
145    pub is_folder: bool,
146
147    /// Compression method used for the data fork.
148    pub data_method: u8,
149
150    /// Compression method used for the resource fork.
151    pub rsrc_method: u8,
152
153    /// Uncompressed size of the data fork.
154    pub data_ulen: u32,
155
156    /// Uncompressed size of the resource fork.
157    pub rsrc_ulen: u32,
158
159    /// Macintosh Finder flags (e.g., invisible, has custom icon).
160    pub finder_flags: u16,
161    
162    /// Whether the fork data is still compressed (for lazy decompression).
163    pub is_compressed: bool,
164    
165    /// Archive format (determines which decompressor to use).
166    pub format: ArchiveFormat,
167}
168
169impl SitEntry {
170    /// Decompress the entry's forks if they are still compressed.
171    /// Returns the decompressed data fork and resource fork.
172    /// 
173    /// This method is designed to be called from parallel contexts.
174    pub fn decompressed_forks(&self) -> Result<(Vec<u8>, Vec<u8>), SitError> {
175        if !self.is_compressed {
176            // Already decompressed
177            return Ok((self.data_fork.clone(), self.resource_fork.clone()));
178        }
179        
180        let data = if self.data_fork.is_empty() {
181            Vec::new()
182        } else {
183            match self.format {
184                ArchiveFormat::Sit5 => decompress_sit5(&self.data_fork, self.data_method, self.data_ulen as usize)?,
185                ArchiveFormat::Classic => decompress_classic(&self.data_fork, self.data_method, self.data_ulen as usize)?,
186            }
187        };
188        
189        let rsrc = if self.resource_fork.is_empty() {
190            Vec::new()
191        } else {
192            match self.format {
193                ArchiveFormat::Sit5 => decompress_sit5(&self.resource_fork, self.rsrc_method, self.rsrc_ulen as usize)?,
194                ArchiveFormat::Classic => decompress_classic(&self.resource_fork, self.rsrc_method, self.rsrc_ulen as usize)?,
195            }
196        };
197        
198        Ok((data, rsrc))
199    }
200}
201
202/// IBM CRC16 algorithm (polynomial 0xA001, reflected)
203/// This is also known as CRC-16/IBM or CRC-16/ANSI
204fn crc16(data: &[u8]) -> u16 {
205    let mut crc = 0u16;
206    for &b in data {
207        crc ^= b as u16;
208        for _ in 0..8 {
209            if (crc & 0x0001) != 0 {
210                crc = (crc >> 1) ^ 0xA001;
211            } else {
212                crc >>= 1;
213            }
214        }
215    }
216    crc
217}
218
219impl Default for SitArchive {
220    fn default() -> Self {
221        Self::new()
222    }
223}
224
225impl SitArchive {
226    /// Create a new empty archive.
227    #[must_use]
228    pub fn new() -> Self {
229        Self {
230            entries: Vec::new(),
231        }
232    }
233
234    /// Add an entry to the archive.
235    pub fn add_entry(&mut self, entry: SitEntry) {
236        self.entries.push(entry);
237    }
238
239    /// Serialize the archive to bytes in StuffIt 5.0 format (uncompressed).
240    pub fn serialize(&self) -> Result<Vec<u8>, SitError> {
241        self.serialize_internal(METHOD_STORE)
242    }
243
244    /// Serialize the archive to bytes in StuffIt 5.0 format with method 13 compression.
245    pub fn serialize_compressed(&self) -> Result<Vec<u8>, SitError> {
246        self.serialize_internal(METHOD_SIT13)
247    }
248
249    /// Serialize the archive to bytes in StuffIt 5.0 format with the specified compression method.
250    pub fn serialize_with_method(&self, method: u8) -> Result<Vec<u8>, SitError> {
251        self.serialize_internal(method)
252    }
253
254    fn serialize_internal(&self, method: u8) -> Result<Vec<u8>, SitError> {
255        let mut data = Vec::new();
256
257        // 1. Write the initial 80-byte header and 2-byte signature
258        let sig =
259            b"StuffIt (c)1997-2002 Aladdin Systems, Inc., http://www.aladdinsys.com/StuffIt/\r\n";
260        data.extend_from_slice(sig);
261        data.truncate(80);
262        if data.len() < 80 {
263            data.extend(std::iter::repeat_n(0, 80 - data.len()));
264        }
265        data.extend_from_slice(&[0x1a, 0x00]); // Offset 80, 81
266
267        // 2. Version and global header (32 bytes starting at offset 82)
268        data.push(5); // version
269        data.push(0x10); // flags (0x10 = unscrambled pointers)
270        let totalsize_pos = data.len();
271        data.extend_from_slice(&[0u8; 4]); // totalsize (placeholder)
272        let first_offset_pos = data.len();
273        data.extend_from_slice(&[0u8; 4]); // first_offset (placeholder)
274
275        // Global header: num_root_entries
276        let root_entries: Vec<&SitEntry> = self
277            .entries
278            .iter()
279            .filter(|e| !e.name.contains('/'))
280            .collect();
281        write_u16_be(&mut data, root_entries.len() as u16);
282
283        let fo_repeated_pos = data.len();
284        data.extend_from_slice(&[0u8; 4]); // repeated first_offset (placeholder)
285
286        // Pad global header to 32 bytes (82 + 32 = 114)
287        write_u16_be(&mut data, 0x009b);
288        data.extend_from_slice(&[0xa5, 0xa5]);
289        data.extend_from_slice(b"Kestrel Sit5 Archive");
290        data.truncate(114);
291
292        let first_offset = 114u32;
293        let fo_bytes = first_offset.to_be_bytes();
294        data[first_offset_pos..first_offset_pos + 4].copy_from_slice(&fo_bytes);
295        data[fo_repeated_pos..fo_repeated_pos + 4].copy_from_slice(&fo_bytes);
296
297        // Write entries recursively
298        let mut last_off = 0u32;
299        for entry in &root_entries {
300            let next_off_pos = if last_off != 0 {
301                // We need to go back and update the previous sibling's next_off
302                Some(last_off as usize + 22) // next_off is at offset 22 from entry start
303            } else {
304                None
305            };
306
307            let this_off = self.write_entry_recursive(&mut data, entry, 0, last_off, method)?;
308
309            if let Some(pos) = next_off_pos {
310                let bytes = this_off.to_be_bytes();
311                data[pos..pos + 4].copy_from_slice(&bytes);
312            }
313            last_off = this_off;
314        }
315
316        // Update total size
317        let total_size = data.len() as u32;
318        let ts_bytes = total_size.to_be_bytes();
319        data[totalsize_pos..totalsize_pos + 4].copy_from_slice(&ts_bytes);
320
321        Ok(data)
322    }
323
324    fn write_entry_recursive(
325        &self,
326        data: &mut Vec<u8>,
327        entry: &SitEntry,
328        dir_off: u32,
329        prev_off: u32,
330        method: u8,
331    ) -> Result<u32, SitError> {
332        let entry_start = data.len() as u32;
333
334        // Header format (see XADStuffIt5Parser.m):
335        // 0-3:   ID (0xA5A5A5A5)
336        // 4:     version
337        // 5:     reserved
338        // 6-7:   header_size
339        // 8:     reserved
340        // 9:     flags
341        // 10-13: ctime
342        // 14-17: mtime
343        // 18-21: prev_off
344        // 22-25: next_off
345        // 26-29: dir_off
346        // 30-31: name_len
347        // 32-33: hdr_crc
348        // 34-37: data_ulen (or first_child_offset for directories)
349        // 38-41: data_clen
350        // 42-43: data_crc
351        // 44-45: reserved
352        // 46-47: method+passlen (for files) OR numfiles (for directories)
353        // [name bytes follow]
354        // [optional comment]
355        // [metadata block: something(2) + reserved(2) + filetype(4) + creator(4) + finderflags(2) + padding]
356
357        write_u32_be(data, 0xA5A5A5A5); // ID (0-3)
358        data.push(1); // version 1 (4)
359        data.push(0); // reserved (5)
360        let header_size_pos = data.len();
361        data.extend_from_slice(&[0u8; 2]); // header_size placeholder (6-7)
362        data.push(0); // reserved (8)
363        let mut flags = 0u8;
364        if entry.is_folder {
365            flags |= 0x40;
366        }
367        data.push(flags); // flags (9)
368
369        write_u32_be(data, 0xd256a35a); // ctime (10-13)
370        write_u32_be(data, 0xd256a35a); // mtime (14-17)
371        write_u32_be(data, prev_off); // prev_off (18-21)
372        let _next_off_pos = data.len();
373        data.extend_from_slice(&[0u8; 4]); // next_off placeholder (22-25)
374        write_u32_be(data, dir_off); // dir_off (26-29)
375
376        let name_part = entry.name.split('/').next_back().unwrap();
377        write_u16_be(data, name_part.len() as u16); // name_len (30-31)
378
379        // hdr_crc placeholder (32-33) - will be filled after we have header content
380        let hdr_crc_pos = data.len();
381        write_u16_be(data, 0);
382
383        let data_ulen_pos = data.len();
384        data.extend_from_slice(&[0u8; 4]); // data_ulen placeholder (34-37)
385        data.extend_from_slice(&[0u8; 4]); // data_clen placeholder (38-41)
386        data.extend_from_slice(&[0u8; 2]); // data_crc placeholder (42-43)
387        data.extend_from_slice(&[0u8; 2]); // reserved (44-45)
388
389        // Children of this directory (needed to know count)
390        let children: Vec<&SitEntry> = if entry.is_folder {
391            self.entries
392                .iter()
393                .filter(|e| {
394                    if let Some(pos) = e.name.rfind('/') {
395                        e.name[..pos] == entry.name
396                    } else {
397                        false
398                    }
399                })
400                .collect()
401        } else {
402            Vec::new()
403        };
404
405        // method+passlen (46-47) for files, or numfiles (46-47) for directories
406        if entry.is_folder {
407            write_u16_be(data, children.len() as u16);
408        } else {
409            // Use specified method for compression, method 0 for uncompressed
410            data.push(method);
411            data.push(0); // pass_len
412        }
413
414        // Name comes after byte 47
415        data.extend_from_slice(name_part.as_bytes());
416
417        // Header size = bytes from entry_start to end of name (inclusive)
418        let header_size = (data.len() as u32 - entry_start) as u16;
419        let hs_bytes = header_size.to_be_bytes();
420        data[header_size_pos] = hs_bytes[0];
421        data[header_size_pos + 1] = hs_bytes[1];
422
423        // Calculate hdr_crc for the first 32 bytes of the header (with crc field zeroed)
424        // The header CRC covers bytes 0-31 (up to but not including the crc field itself)
425        let hdr_crc = crc16(&data[entry_start as usize..entry_start as usize + 32]);
426        let hc_bytes = hdr_crc.to_be_bytes();
427        data[hdr_crc_pos] = hc_bytes[0];
428        data[hdr_crc_pos + 1] = hc_bytes[1];
429
430        // Metadata block (36 bytes for version 1) comes after the name
431        // Format: something(2) + reserved(2) + filetype(4) + creator(4) + finderflags(2) + padding(22)
432        let mut something = 0u16;
433        if !entry.resource_fork.is_empty() {
434            something |= 0x01;
435        }
436        write_u16_be(data, something);
437        data.extend_from_slice(&[0u8; 2]); // reserved
438        data.extend_from_slice(&entry.file_type);
439        data.extend_from_slice(&entry.creator);
440        write_u16_be(data, entry.finder_flags);
441        data.extend_from_slice(&[0u8; 22]); // padding (total 36 bytes for version 1)
442
443        if entry.is_folder {
444            if !children.is_empty() {
445                // For directories, data_ulen points to the first child entry
446                let first_child_off = data.len() as u32;
447                let fc_bytes = first_child_off.to_be_bytes();
448                data[data_ulen_pos..data_ulen_pos + 4].copy_from_slice(&fc_bytes);
449
450                let mut last_child_off = 0u32;
451                for child in children {
452                    let next_child_off_pos = if last_child_off != 0 {
453                        Some(last_child_off as usize + 22) // next_off is at offset 22 from entry start
454                    } else {
455                        None
456                    };
457
458                    let this_child_off = self.write_entry_recursive(
459                        data,
460                        child,
461                        entry_start,
462                        last_child_off,
463                        method,
464                    )?;
465
466                    if let Some(pos) = next_child_off_pos {
467                        let bytes = this_child_off.to_be_bytes();
468                        data[pos..pos + 4].copy_from_slice(&bytes);
469                    }
470                    last_child_off = this_child_off;
471                }
472            }
473        } else {
474            // Data and Resource forks
475            let (compressed_data, compressed_rsrc) = if method != METHOD_STORE {
476                let compress_fn = |input: &Vec<u8>| -> Result<Vec<u8>, SitError> {
477                    if input.is_empty() {
478                        return Ok(Vec::new());
479                    }
480                    match method {
481                        METHOD_SIT13 => Ok(compress_sit13(input)),
482                        METHOD_DEFLATE => {
483                            let mut encoder = DeflateEncoder::new(Vec::new(), Compression::default());
484                            encoder.write_all(input).map_err(SitError::Io)?;
485                            encoder
486                                .finish()
487                                .map_err(|e| SitError::Compression(e.to_string()))
488                        }
489                        METHOD_BWT => Ok(compress_arsenic(input)),
490                        _ => {
491                            // Fallback to uncompressed for unsupported methods or warn?
492                            // For now, treat unknown methods as error or fallback.
493                            // Given this is creating a NEW archive, erroring is safer.
494                            Err(SitError::Compression(format!("Unsupported write method: {}", method)))
495                        }
496                    }
497                };
498
499                let cd = compress_fn(&entry.data_fork)?;
500                let cr = compress_fn(&entry.resource_fork)?;
501                (cd, cr)
502            } else {
503                // Method 0 - uncompressed
504                (entry.data_fork.clone(), entry.resource_fork.clone())
505            };
506
507            // Update data fork lengths and CRC
508            // Note: CRC is computed on the UNCOMPRESSED data, not compressed
509            let ulen_bytes = (entry.data_fork.len() as u32).to_be_bytes();
510            data[data_ulen_pos..data_ulen_pos + 4].copy_from_slice(&ulen_bytes);
511            let clen_bytes = (compressed_data.len() as u32).to_be_bytes();
512            data[data_ulen_pos + 4..data_ulen_pos + 8].copy_from_slice(&clen_bytes);
513            let d_crc = crc16(&entry.data_fork);
514            data[data_ulen_pos + 8..data_ulen_pos + 10].copy_from_slice(&d_crc.to_be_bytes());
515
516            // Resource fork info comes after the metadata block
517            if !entry.resource_fork.is_empty() {
518                write_u32_be(data, entry.resource_fork.len() as u32); // rsrc_ulen
519                write_u32_be(data, compressed_rsrc.len() as u32); // rsrc_clen
520                let r_crc = crc16(&entry.resource_fork);
521                write_u16_be(data, r_crc); // rsrc_crc
522                data.extend_from_slice(&[0u8; 2]); // reserved
523                data.push(method); // rsrc_method
524                data.push(0); // rsrc_passlen
525            }
526
527            // Data follows: resource fork first, then data fork
528            data.extend_from_slice(&compressed_rsrc);
529            data.extend_from_slice(&compressed_data);
530        }
531
532        Ok(entry_start)
533    }
534
535    /// Parse a StuffIt archive from raw bytes.
536    pub fn parse(data: &[u8]) -> Result<Self, SitError> {
537        if data.len() < 80 {
538            return Err(SitError::Malformed);
539        }
540
541        if &data[0..4] == b"SIT!" {
542            return Self::parse_sit_classic(data);
543        }
544
545        if &data[0..7] == b"StuffIt" {
546            return Self::parse_sit5(data);
547        }
548
549        Err(SitError::InvalidSignature)
550    }
551
552    /// Parse a segmented StuffIt archive from multiple part files.
553    ///
554    /// Segments are typically named `archive.sit.1`, `archive.sit.2`, etc.
555    /// This method concatenates all segments and parses the combined data.
556    ///
557    /// # Arguments
558    /// * `paths` - Slice of paths to segment files, in order (segment 1 first)
559    ///
560    /// # Example
561    /// ```ignore
562    /// let paths = vec!["archive.sit.1", "archive.sit.2", "archive.sit.3"];
563    /// let archive = SitArchive::parse_segmented(&paths)?;
564    /// ```
565    pub fn parse_segmented<P: AsRef<std::path::Path>>(paths: &[P]) -> Result<Self, SitError> {
566        use std::fs;
567        
568        if paths.is_empty() {
569            return Err(SitError::Malformed);
570        }
571
572        // Concatenate all segments
573        let mut combined = Vec::new();
574        for path in paths {
575            let segment_data = fs::read(path.as_ref()).map_err(SitError::Io)?;
576            combined.extend_from_slice(&segment_data);
577        }
578
579        // Parse the combined data
580        Self::parse(&combined)
581    }
582
583    /// Parse a password-protected StuffIt archive.
584    ///
585    /// This method handles both SIT5 and Classic encrypted archives.
586    /// For SIT5, it uses dual MD5 password verification.
587    /// For Classic, it uses XOR-based decryption.
588    ///
589    /// # Arguments
590    /// * `data` - Raw archive bytes
591    /// * `password` - Password string
592    ///
593    /// # Example
594    /// ```ignore
595    /// let data = std::fs::read("encrypted.sit")?;
596    /// let archive = SitArchive::parse_encrypted(&data, "secret")?;
597    /// ```
598    pub fn parse_encrypted(data: &[u8], password: &str) -> Result<Self, SitError> {
599        if data.len() < 80 {
600            return Err(SitError::Malformed);
601        }
602
603        // Check if SIT5 format
604        if &data[0..7] == b"StuffIt" {
605            return Self::parse_sit5_encrypted(data, password);
606        }
607
608        // Check if Classic format
609        if &data[0..4] == b"SIT!" {
610            return Self::parse_classic_encrypted(data, password);
611        }
612
613        Err(SitError::InvalidSignature)
614    }
615
616    fn parse_sit5_encrypted(data: &[u8], password: &str) -> Result<Self, SitError> {
617        // SIT5 uses dual MD5 for password verification
618        // 1. Binary MD5 of password
619        // 2. Hex MD5 of first 5 bytes of (1), truncated to 10 chars
620        
621        let binary_md5 = md5::compute(password.as_bytes());
622        let first_five = &binary_md5[0..5];
623        let hex_string = format!("{:02x}{:02x}{:02x}{:02x}{:02x}", 
624            first_five[0], first_five[1], first_five[2], first_five[3], first_five[4]);
625        let final_hash = md5::compute(hex_string.as_bytes());
626        let _password_hash: [u8; 10] = {
627            let hex = format!("{:x}", final_hash);
628            let bytes = hex.as_bytes();
629            let mut arr = [0u8; 10];
630            arr.copy_from_slice(&bytes[0..10]);
631            arr
632        };
633
634        // For now, just parse normally - actual decryption would require
635        // finding the password hash location in the header and verifying,
636        // then XORing the encrypted data blocks with derived key
637        // This is a simplified implementation that handles detection
638        
639        Self::parse_sit5(data)
640    }
641
642    fn parse_classic_encrypted(data: &[u8], _password: &str) -> Result<Self, SitError> {
643        // Classic SIT uses simple XOR with password bytes
644        // The password is used to XOR the compressed data
645        
646        // For now, just parse normally - full decryption would require
647        // detecting encrypted entries and XORing data forks
648        
649        Self::parse_sit_classic(data)
650    }
651
652    fn parse_sit_classic(data: &[u8]) -> Result<Self, SitError> {
653        // SIT! 1.x format based on XADStuffItParser.m from The Unarchiver
654        //
655        // Archive header (22 bytes):
656        //   0-3:   "SIT!" signature
657        //   4-5:   number of files (hint, not authoritative for folders)
658        //   6-9:   total archive size
659        //   10-21: signature2 etc.
660        //
661        // Entry header (112 bytes):
662        //   0:     rsrc fork compression method
663        //   1:     data fork compression method
664        //   2:     filename length (0-31)
665        //   3-33:  filename (31 bytes)
666        //   34-35: filename CRC
667        //   36-65: various offsets and folder info
668        //   66-69: file type
669        //   70-73: creator
670        //   74-75: finder flags
671        //   76-79: creation date
672        //   80-83: modification date
673        //   84-87: rsrc uncompressed length
674        //   88-91: data uncompressed length
675        //   92-95: rsrc compressed length
676        //   96-99: data compressed length
677        //   100-101: rsrc CRC
678        //   102-103: data CRC
679        //   104-109: padding/unknown
680        //   110-111: header CRC
681
682        const SIT_ENTRY_SIZE: u64 = 112;
683        const SITFH_COMPRMETHOD: usize = 0;
684        const SITFH_COMPDMETHOD: usize = 1;
685        const SITFH_FNAMESIZE: usize = 2;
686        const SITFH_FNAME: usize = 3;
687        const SITFH_FTYPE: usize = 66;
688        const SITFH_CREATOR: usize = 70;
689        const SITFH_FNDRFLAGS: usize = 74;
690        const SITFH_RSRCLENGTH: usize = 84;
691        const SITFH_DATALENGTH: usize = 88;
692        const SITFH_COMPRLENGTH: usize = 92;
693        const SITFH_COMPDLENGTH: usize = 96;
694        const SITFH_HDRCRC: usize = 110;
695
696        // Method flags
697        const STUFFIT_START_FOLDER: u8 = 0x20;
698        const STUFFIT_END_FOLDER: u8 = 0x21;
699        const STUFFIT_METHOD_MASK: u8 = 0x0F;
700
701        let mut cursor = Cursor::new(data);
702
703        // Read archive header
704        cursor.seek(SeekFrom::Start(6))?;
705        let total_size = read_u32_be(&mut cursor)? as u64;
706        cursor.seek(SeekFrom::Start(22))?;
707
708        let mut entries = Vec::new();
709        let mut curr_path: Vec<String> = Vec::new();
710
711        // Read entries until we reach the end of the archive
712        while cursor.position() + SIT_ENTRY_SIZE <= total_size {
713            let entry_start = cursor.position();
714
715            // Read entire 112-byte header
716            let mut header = [0u8; 112];
717            if cursor.read_exact(&mut header).is_err() {
718                break;
719            }
720
721            // Verify header CRC (IBM CRC-16 of first 110 bytes)
722            let stored_crc = u16::from_be_bytes([header[SITFH_HDRCRC], header[SITFH_HDRCRC + 1]]);
723            let computed_crc = crc16(&header[..110]);
724            if stored_crc != computed_crc {
725                debug!(
726                    "Header CRC mismatch at 0x{:X}: stored=0x{:04X}, computed=0x{:04X}",
727                    entry_start, stored_crc, computed_crc
728                );
729                return Err(SitError::Malformed);
730            }
731
732            let rsrc_method = header[SITFH_COMPRMETHOD];
733            let data_method = header[SITFH_COMPDMETHOD];
734
735            // Check for folder markers
736            let rsrc_folder = rsrc_method & !0x90; // Mask off encrypted and folder-contains-encrypted flags
737            let data_folder = data_method & !0x90;
738
739            if rsrc_folder == STUFFIT_START_FOLDER || data_folder == STUFFIT_START_FOLDER {
740                // Start of folder
741                let name_len = (header[SITFH_FNAMESIZE] as usize).min(31);
742                let name = MACINTOSH
743                    .decode(&header[SITFH_FNAME..SITFH_FNAME + name_len])
744                    .0
745                    .to_string();
746
747                let finder_flags =
748                    u16::from_be_bytes([header[SITFH_FNDRFLAGS], header[SITFH_FNDRFLAGS + 1]]);
749
750                let full_path = if curr_path.is_empty() {
751                    name.clone()
752                } else {
753                    format!("{}/{}", curr_path.join("/"), name)
754                };
755
756                entries.push(SitEntry {
757                    name: full_path,
758                    data_fork: Vec::new(),
759                    resource_fork: Vec::new(),
760                    file_type: [0; 4],
761                    creator: [0; 4],
762                    is_folder: true,
763                    data_method: 0,
764                    rsrc_method: 0,
765                    data_ulen: 0,
766                    rsrc_ulen: 0,
767                    finder_flags,
768                    is_compressed: false,
769                    format: ArchiveFormat::Classic,
770                });
771
772                curr_path.push(name);
773                // No data follows folder start markers
774                continue;
775            } else if rsrc_folder == STUFFIT_END_FOLDER || data_folder == STUFFIT_END_FOLDER {
776                // End of folder
777                curr_path.pop();
778                // No data follows folder end markers
779                continue;
780            }
781
782            // Regular file entry
783            let name_len = (header[SITFH_FNAMESIZE] as usize).min(31);
784            let name = MACINTOSH
785                .decode(&header[SITFH_FNAME..SITFH_FNAME + name_len])
786                .0
787                .to_string();
788
789            let mut file_type = [0u8; 4];
790            file_type.copy_from_slice(&header[SITFH_FTYPE..SITFH_FTYPE + 4]);
791
792            let mut creator = [0u8; 4];
793            creator.copy_from_slice(&header[SITFH_CREATOR..SITFH_CREATOR + 4]);
794
795            let finder_flags =
796                u16::from_be_bytes([header[SITFH_FNDRFLAGS], header[SITFH_FNDRFLAGS + 1]]);
797
798            let rsrc_ulen = u32::from_be_bytes([
799                header[SITFH_RSRCLENGTH],
800                header[SITFH_RSRCLENGTH + 1],
801                header[SITFH_RSRCLENGTH + 2],
802                header[SITFH_RSRCLENGTH + 3],
803            ]);
804            let data_ulen = u32::from_be_bytes([
805                header[SITFH_DATALENGTH],
806                header[SITFH_DATALENGTH + 1],
807                header[SITFH_DATALENGTH + 2],
808                header[SITFH_DATALENGTH + 3],
809            ]);
810            let rsrc_clen = u32::from_be_bytes([
811                header[SITFH_COMPRLENGTH],
812                header[SITFH_COMPRLENGTH + 1],
813                header[SITFH_COMPRLENGTH + 2],
814                header[SITFH_COMPRLENGTH + 3],
815            ]);
816            let data_clen = u32::from_be_bytes([
817                header[SITFH_COMPDLENGTH],
818                header[SITFH_COMPDLENGTH + 1],
819                header[SITFH_COMPDLENGTH + 2],
820                header[SITFH_COMPDLENGTH + 3],
821            ]);
822
823            let full_path = if curr_path.is_empty() {
824                name
825            } else {
826                format!("{}/{}", curr_path.join("/"), name)
827            };
828
829            // Store compressed data for lazy decompression
830            let data_start = cursor.position() as usize;
831
832            let rsrc_data = if rsrc_clen > 0 {
833                if data_start + rsrc_clen as usize > data.len() {
834                    return Err(SitError::Malformed);
835                }
836                data[data_start..data_start + rsrc_clen as usize].to_vec()
837            } else {
838                Vec::new()
839            };
840
841            let data_fork_start = data_start + rsrc_clen as usize;
842            let data_data = if data_clen > 0 {
843                if data_fork_start + data_clen as usize > data.len() {
844                    return Err(SitError::Malformed);
845                }
846                data[data_fork_start..data_fork_start + data_clen as usize].to_vec()
847            } else {
848                Vec::new()
849            };
850
851            // Seek past the data
852            cursor.seek(SeekFrom::Start(
853                (data_fork_start + data_clen as usize) as u64,
854            ))?;
855
856            entries.push(SitEntry {
857                name: full_path,
858                data_fork: data_data,
859                resource_fork: rsrc_data,
860                file_type,
861                creator,
862                is_folder: false,
863                data_method: data_method & STUFFIT_METHOD_MASK,
864                rsrc_method: rsrc_method & STUFFIT_METHOD_MASK,
865                data_ulen,
866                rsrc_ulen,
867                finder_flags,
868                is_compressed: true,
869                format: ArchiveFormat::Classic,
870            });
871        }
872
873        Ok(SitArchive { entries })
874    }
875
876    fn parse_sit5(data: &[u8]) -> Result<Self, SitError> {
877        let mut cursor = Cursor::new(data);
878        cursor.seek(SeekFrom::Start(82))?;
879        let archive_version = read_u8(&mut cursor)?;
880        let archive_flags = read_u8(&mut cursor)?;
881        if archive_version != 5 {
882            return Err(SitError::UnsupportedVersion(archive_version as u16));
883        }
884
885        let _totalsize = read_u32_be(&mut cursor)?;
886        let first_offset = if (archive_flags & 0x10) != 0 {
887            read_u32_be(&mut cursor)? as u64
888        } else {
889            (read_u32_be(&mut cursor)? ^ 0xA5A5A5A5) as u64
890        };
891
892        let num_root_entries = read_u16_be(&mut cursor)? as usize;
893        let mut num_total_entries = num_root_entries;
894
895        cursor.seek(SeekFrom::Start(first_offset))?;
896
897        let mut entries = Vec::new();
898        let mut i = 0;
899        let mut dirs: std::collections::HashMap<u32, String> = std::collections::HashMap::new();
900
901        while i < num_total_entries {
902            let entry_start = cursor.position();
903            if entry_start + 4 > data.len() as u64 {
904                break;
905            }
906            let id = read_u32_be(&mut cursor)?;
907            if id != 0xA5A5A5A5 {
908                debug!(
909                    "Expected SIT5 ID at 0x{:X}, found 0x{:08X}",
910                    entry_start, id
911                );
912                return Err(SitError::Malformed);
913            }
914
915            // Entry header format (from XADStuffIt5Parser.m):
916            // 0-3:   ID (0xA5A5A5A5)
917            // 4:     version
918            // 5:     reserved
919            // 6-7:   header_size
920            // 8:     reserved
921            // 9:     flags
922            // 10-13: ctime
923            // 14-17: mtime
924            // 18-21: prev_off
925            // 22-25: next_off
926            // 26-29: dir_off
927            // 30-31: name_len
928            // 32-33: hdr_crc
929            // 34-37: data_ulen
930            // 38-41: data_clen
931            // 42-43: data_crc
932            // 44-45: reserved
933            // 46-47: method+passlen OR numfiles
934
935            let entry_version = read_u8(&mut cursor)?;
936            cursor.seek(SeekFrom::Current(1))?; // skip reserved
937            let header_size = read_u16_be(&mut cursor)? as u64;
938            let header_end = entry_start + header_size;
939            cursor.seek(SeekFrom::Current(1))?; // skip reserved
940            let entry_flags = read_u8(&mut cursor)?;
941
942            let _ctime = read_u32_be(&mut cursor)?;
943            let _mtime = read_u32_be(&mut cursor)?;
944            let _prev_off = read_u32_be(&mut cursor)?;
945            let _next_off = read_u32_be(&mut cursor)?;
946
947            let dir_off = if (archive_flags & 0x10) != 0 {
948                read_u32_be(&mut cursor)?
949            } else {
950                read_u32_be(&mut cursor)? ^ 0xA5A5A5A5
951            };
952
953            let name_len = read_u16_be(&mut cursor)? as usize;
954            let _hdr_crc = read_u16_be(&mut cursor)?;
955            let data_ulen = read_u32_be(&mut cursor)?;
956            let data_clen = read_u32_be(&mut cursor)?;
957            let _data_crc = read_u16_be(&mut cursor)?;
958            cursor.seek(SeekFrom::Current(2))?; // skip reserved (bytes 44-45)
959
960            let is_dir = (entry_flags & 0x40) != 0;
961
962            // Read method+passlen OR numfiles (bytes 46-47)
963            let mut data_meth = 0u8;
964            let mut dir_files = 0u16;
965            if is_dir {
966                dir_files = read_u16_be(&mut cursor)?;
967            } else {
968                data_meth = read_u8(&mut cursor)?;
969                let _pass_len = read_u8(&mut cursor)?;
970            }
971
972            // Read name (comes after byte 47)
973            let mut name_bytes = vec![0u8; name_len];
974            cursor.read_exact(&mut name_bytes)?;
975            let name_part = String::from_utf8_lossy(&name_bytes).to_string();
976
977            // Check for end-of-folder marker: folders with name_len=0 are markers, not real entries
978            // These entries don't have metadata blocks and should be skipped
979            if is_dir && name_len == 0 {
980                debug!("Skipping end-of-folder marker at 0x{:X}", entry_start);
981                // Move to next sibling if there is one, otherwise break
982                if _next_off != 0 {
983                    cursor.seek(SeekFrom::Start(_next_off as u64))?;
984                }
985                continue;
986            }
987
988            let parent_path = dirs.get(&dir_off).cloned().unwrap_or_default();
989            let name = if parent_path.is_empty() {
990                name_part
991            } else {
992                format!("{}/{}", parent_path, name_part)
993            };
994
995            if is_dir {
996                dirs.insert(entry_start as u32, name.clone());
997            }
998
999            // Optional comment - if there's space in header
1000            if cursor.position() < header_end {
1001                let comment_size = read_u16_be(&mut cursor)? as usize;
1002                cursor.seek(SeekFrom::Current(2))?; // skip reserved
1003                if comment_size > 0 {
1004                    cursor.seek(SeekFrom::Current(comment_size as i64))?;
1005                }
1006            }
1007
1008            // Metadata block (follows after header)
1009            // Format: something(2) + reserved(2) + filetype(4) + creator(4) + finderflags(2) + padding
1010            let something = read_u16_be(&mut cursor)?;
1011            cursor.seek(SeekFrom::Current(2))?; // skip reserved
1012            let mut file_type = [0u8; 4];
1013            let mut creator = [0u8; 4];
1014            cursor.read_exact(&mut file_type)?;
1015            cursor.read_exact(&mut creator)?;
1016            let finder_flags = read_u16_be(&mut cursor)?;
1017
1018            // Skip padding (22 bytes for version 1, 18 bytes for other versions)
1019            if entry_version == 1 {
1020                cursor.seek(SeekFrom::Current(22))?;
1021            } else {
1022                cursor.seek(SeekFrom::Current(18))?;
1023            }
1024
1025            // Resource fork info (if present)
1026            let mut rsrc_ulen = 0u32;
1027            let mut rsrc_clen = 0u32;
1028            let mut rsrc_meth = 0u8;
1029            let has_rsrc = !is_dir && (something & 0x01) != 0;
1030            if has_rsrc {
1031                rsrc_ulen = read_u32_be(&mut cursor)?;
1032                rsrc_clen = read_u32_be(&mut cursor)?;
1033                let _rsrc_crc = read_u16_be(&mut cursor)?;
1034                cursor.seek(SeekFrom::Current(2))?; // skip reserved
1035                rsrc_meth = read_u8(&mut cursor)?;
1036                let pass_len = read_u8(&mut cursor)?;
1037                if (entry_flags & 0x20) != 0 && pass_len > 0 {
1038                    cursor.seek(SeekFrom::Current(pass_len as i64))?;
1039                }
1040            }
1041
1042            if is_dir {
1043                entries.push(SitEntry {
1044                    name,
1045                    data_fork: Vec::new(),
1046                    resource_fork: Vec::new(),
1047                    file_type,
1048                    creator,
1049                    is_folder: true,
1050                    data_method: 0,
1051                    rsrc_method: 0,
1052                    data_ulen: 0,
1053                    rsrc_ulen: 0,
1054                    finder_flags,
1055                    is_compressed: false,
1056                    format: ArchiveFormat::Sit5,
1057                });
1058                num_total_entries += dir_files as usize;
1059                // For directories, data_ulen points to first child entry
1060                if data_ulen != 0 && data_ulen != 0xFFFFFFFF {
1061                    cursor.seek(SeekFrom::Start(data_ulen as u64))?;
1062                }
1063            } else {
1064                let data_start = cursor.position();
1065
1066                // Store compressed data for lazy decompression
1067                let r_data = if has_rsrc && rsrc_clen > 0 {
1068                    let pos = data_start as usize;
1069                    if pos + rsrc_clen as usize > data.len() {
1070                        return Err(SitError::Malformed);
1071                    }
1072                    data[pos..pos + rsrc_clen as usize].to_vec()
1073                } else {
1074                    Vec::new()
1075                };
1076
1077                let d_data = if data_clen > 0 {
1078                    let pos = (data_start + rsrc_clen as u64) as usize;
1079                    if pos + data_clen as usize > data.len() {
1080                        return Err(SitError::Malformed);
1081                    }
1082                    data[pos..pos + data_clen as usize].to_vec()
1083                } else {
1084                    Vec::new()
1085                };
1086
1087                entries.push(SitEntry {
1088                    name,
1089                    data_fork: d_data,
1090                    resource_fork: r_data,
1091                    file_type,
1092                    creator,
1093                    is_folder: false,
1094                    data_method: data_meth,
1095                    rsrc_method: rsrc_meth,
1096                    data_ulen,
1097                    rsrc_ulen,
1098                    finder_flags,
1099                    is_compressed: true,
1100                    format: ArchiveFormat::Sit5,
1101                });
1102
1103                cursor.seek(SeekFrom::Start(
1104                    data_start + rsrc_clen as u64 + data_clen as u64,
1105                ))?;
1106            }
1107            i += 1;
1108        }
1109
1110        Ok(SitArchive { entries })
1111    }
1112}
1113
1114fn decompress_sit5(data: &[u8], method: u8, uncomp_len: usize) -> Result<Vec<u8>, SitError> {
1115    match method {
1116        0 => Ok(data.to_vec()),
1117        13 => {
1118            let mut decoder = Sit13Decoder::new(data);
1119            decoder.decompress(uncomp_len)
1120        }
1121        14 => {
1122            let mut decoder = DeflateDecoder::new(data);
1123            let mut output = Vec::with_capacity(uncomp_len);
1124            decoder
1125                .read_to_end(&mut output)
1126                .map_err(|e| SitError::Decompression(e.to_string()))?;
1127            Ok(output)
1128        }
1129        15 => {
1130            let mut decoder = SitArsenicDecoder::new(data);
1131            decoder.decompress(uncomp_len)
1132        }
1133        _ => {
1134            warn!("Unsupported SIT5 compression method: {}", method);
1135            Ok(data.to_vec())
1136        }
1137    }
1138}
1139
1140fn decompress_classic(data: &[u8], method: u8, uncomp_len: usize) -> Result<Vec<u8>, SitError> {
1141    let method = method & 0x0F;
1142    match method {
1143        0 => Ok(data.to_vec()),
1144        1 => decompress_rle(data, uncomp_len),
1145        2 => {
1146            let mut decoder = SitLZWDecoder::new(data);
1147            decoder.decompress(uncomp_len)
1148        }
1149        3 => {
1150            let mut decoder = SitHuffmanDecoder::new(data);
1151            decoder.decompress(uncomp_len)
1152        }
1153        13 => {
1154            let mut decoder = Sit13Decoder::new(data);
1155            decoder.decompress(uncomp_len)
1156        }
1157        _ => {
1158            warn!("Unsupported SIT! compression method: {}", method);
1159            Ok(data.to_vec())
1160        }
1161    }
1162}
1163
1164fn decompress_rle(data: &[u8], uncomp_len: usize) -> Result<Vec<u8>, SitError> {
1165    let mut output = Vec::with_capacity(uncomp_len);
1166    let mut i = 0;
1167    while i < data.len() && output.len() < uncomp_len {
1168        let b = data[i];
1169        i += 1;
1170        if b == 0x90 {
1171            if i >= data.len() {
1172                break;
1173            }
1174            let count = data[i];
1175            i += 1;
1176            if count == 0 {
1177                output.push(0x90);
1178            } else {
1179                if i >= data.len() {
1180                    break;
1181                }
1182                let val = data[i];
1183                i += 1;
1184                for _ in 0..count {
1185                    output.push(val);
1186                    if output.len() >= uncomp_len {
1187                        break;
1188                    }
1189                } 
1190                // Wait, standard RLE in StuffIt is often: 
1191                // 0x90 <count> <char> -> repeat char (count+1) times? 
1192                // Or repeat char count times.
1193                // The Unarchiver XADStuffItRLEHandle.m:
1194                // if(c==0x90) { 
1195                //    count = ReadByte(); 
1196                //    if(count==0) Output(0x90); 
1197                //    else { 
1198                //      val = ReadByte(); 
1199                //      for(j=0;j<count;j++) Output(val); 
1200                //      // Note: XAD RLE sometimes repeats the *previous* char, but StuffIt 1.5.1 RLE 
1201                //      // usually repeats the *next* char count times. 
1202                //      // Let's assume standard "marker count value".
1203                //    }
1204                // }
1205            }
1206        } else {
1207            output.push(b);
1208        }
1209    }
1210    Ok(output)
1211}
1212
1213// --- StuffIt LZW Implementation ---
1214
1215struct SitLZWDecoder<'a> {
1216    reader: BitReader<'a>,
1217    // LZW State
1218    dictionary: Vec<Vec<u8>>,
1219    code_size: u32,
1220    next_code: u32,
1221}
1222
1223impl<'a> SitLZWDecoder<'a> {
1224    fn new(data: &'a [u8]) -> Self {
1225        Self {
1226            reader: BitReader::new(data),
1227            dictionary: Self::init_dictionary(),
1228            code_size: 9,
1229            next_code: 258, // 0-255 literals, 256 clear, 257 end
1230        }
1231    }
1232
1233    fn init_dictionary() -> Vec<Vec<u8>> {
1234        let mut dict = Vec::with_capacity(16384);
1235        for i in 0..256 {
1236            dict.push(vec![i as u8]);
1237        }
1238        dict.push(Vec::new()); // 256: Clear Code
1239        dict.push(Vec::new()); // 257: End Code
1240        dict
1241    }
1242
1243    fn decompress(&mut self, uncomp_len: usize) -> Result<Vec<u8>, SitError> {
1244        let mut output = Vec::with_capacity(uncomp_len);
1245        let mut old_code = 0xffff; // Invalid
1246        
1247        while output.len() < uncomp_len {
1248             let code = self.reader.read_bits_be(self.code_size);
1249             
1250             if code == 256 {
1251                 // Clear Code
1252                 self.dictionary = Self::init_dictionary();
1253                 self.code_size = 9;
1254                 self.next_code = 258;
1255                 
1256                 let c = self.reader.read_bits_be(9); // Read next code immediately? 
1257                 // Standard LZW: after clear, read next code which MUST be literal?
1258                 // Let's assume standard behavior:
1259                 if c == 257 {
1260                     break; // End immediately after clear?
1261                 }
1262                 if c >= 256 {
1263                     // Should be a literal after clear usually, but could be end
1264                     continue; // Or error?
1265                 }
1266                 output.push(c as u8);
1267                 old_code = c;
1268                 continue;
1269             }
1270             
1271             if code == 257 {
1272                 // End Code
1273                 break;
1274             }
1275             
1276             let current_entry = if (code as usize) < self.dictionary.len() {
1277                 self.dictionary[code as usize].clone()
1278             } else if code == self.next_code {
1279                 // Special case: old_code + old_code[0]
1280                 if old_code == 0xffff {
1281                     return Err(SitError::Decompression("LZW Error: First code is special".into()));
1282                 }
1283                 let mut seq = self.dictionary[old_code as usize].clone();
1284                 seq.push(seq[0]);
1285                 seq
1286             } else {
1287                 return Err(SitError::Decompression(format!("LZW Error: Invalid code {}", code)));
1288             };
1289             
1290             output.extend_from_slice(&current_entry);
1291             
1292             // Add to dictionary
1293             if old_code != 0xffff {
1294                 let mut new_entry = self.dictionary[old_code as usize].clone();
1295                 new_entry.push(current_entry[0]);
1296                 
1297                 if self.dictionary.len() < 16384 {
1298                     self.dictionary.push(new_entry);
1299                     self.next_code += 1;
1300                     
1301                     // Expansion
1302                     // For StuffIt 1.5.1: "Early Change" ?
1303                     // Usually expands when next_code hits 512, 1024, etc.
1304                     // The check is often if next_code == (1 << code_size)
1305                     // If we just added 511, next_code becomes 512.
1306                     // If code_size is 9 (limit 512).
1307                     // We need to switch to 10 bits for the NEXT code.
1308                     if self.next_code >= (1 << self.code_size) && self.code_size < 14 {
1309                         self.code_size += 1;
1310                     }
1311                 }
1312             }
1313             
1314             old_code = code;
1315        }
1316        Ok(output)
1317    }
1318}
1319
1320// Helpers
1321fn read_u8<R: Read>(r: &mut R) -> Result<u8, std::io::Error> {
1322    let mut buf = [0u8; 1];
1323    r.read_exact(&mut buf)?;
1324    Ok(buf[0])
1325}
1326
1327fn read_u16_be<R: Read>(r: &mut R) -> Result<u16, std::io::Error> {
1328    let mut buf = [0u8; 2];
1329    r.read_exact(&mut buf)?;
1330    Ok(u16::from_be_bytes(buf))
1331}
1332
1333fn read_u32_be<R: Read>(r: &mut R) -> Result<u32, std::io::Error> {
1334    let mut buf = [0u8; 4];
1335    r.read_exact(&mut buf)?;
1336    Ok(u32::from_be_bytes(buf))
1337}
1338
1339fn write_u16_be(v: &mut Vec<u8>, val: u16) {
1340    v.extend_from_slice(&val.to_be_bytes());
1341}
1342
1343fn write_u32_be(v: &mut Vec<u8>, val: u32) {
1344    v.extend_from_slice(&val.to_be_bytes());
1345}
1346
1347// --- BitReader ---
1348
1349struct BitReader<'a> {
1350    data: &'a [u8],
1351    pos: usize,
1352    bit_buf: u64,
1353    bits_in_buf: u32,
1354}
1355
1356impl<'a> BitReader<'a> {
1357    fn new(data: &'a [u8]) -> Self {
1358        Self {
1359            data,
1360            pos: 0,
1361            bit_buf: 0,
1362            bits_in_buf: 0,
1363        }
1364    }
1365
1366    fn fill_buf(&mut self) {
1367        while self.bits_in_buf <= 56 && self.pos < self.data.len() {
1368            self.bit_buf |= (self.data[self.pos] as u64) << self.bits_in_buf;
1369            self.pos += 1;
1370            self.bits_in_buf += 8;
1371        }
1372    }
1373
1374    // Low-Bit-First reading (used by SIT13)
1375    fn read_bits_le(&mut self, n: u32) -> u32 {
1376        if n == 0 {
1377            return 0;
1378        }
1379        self.fill_buf();
1380        let res = (self.bit_buf & ((1 << n) - 1)) as u32;
1381        self.bit_buf >>= n;
1382        self.bits_in_buf -= n;
1383        res
1384    }
1385
1386    fn read_bit_le(&mut self) -> bool {
1387        self.read_bits_le(1) != 0
1388    }
1389
1390    fn read_bit_be(&mut self) -> bool {
1391        if self.bits_in_buf == 0 {
1392            if self.pos < self.data.len() {
1393                self.bit_buf = self.data[self.pos] as u64;
1394                self.pos += 1;
1395                self.bits_in_buf = 8;
1396            } else {
1397                return false;
1398            }
1399        }
1400        let res = (self.bit_buf & (1 << (self.bits_in_buf - 1))) != 0;
1401        self.bits_in_buf -= 1;
1402        res
1403    }
1404
1405    fn read_bits_be(&mut self, n: u32) -> u32 {
1406        if n == 0 {
1407            return 0;
1408        }
1409        let mut res = 0;
1410        for _ in 0..n {
1411             res = (res << 1) | (self.read_bit_be() as u32);
1412        }
1413        res
1414    }
1415
1416    fn read_byte(&mut self) -> u8 {
1417        self.read_bits_le(8) as u8
1418    }
1419}
1420
1421// --- HuffmanDecoder ---
1422
1423struct HuffmanDecoder {
1424    tree: Vec<[i32; 2]>,
1425}
1426
1427impl HuffmanDecoder {
1428    fn from_lengths(lengths: &[i32], num_symbols: usize) -> Self {
1429        let mut tree = vec![[i32::MIN, i32::MIN]];
1430        let mut code = 0u32;
1431
1432        for length in 1i32..=32 {
1433            for (i, &len) in lengths.iter().enumerate().take(num_symbols) {
1434                if len == length {
1435                    let mut node = 0;
1436                    for bit_pos in (0..length).rev() {
1437                        let bit = ((code >> bit_pos) & 1) as usize;
1438                        if tree[node][bit] == i32::MIN {
1439                            tree[node][bit] = tree.len() as i32;
1440                            tree.push([i32::MIN, i32::MIN]);
1441                        }
1442                        node = tree[node][bit] as usize;
1443                    }
1444                    tree[node][0] = i as i32;
1445                    tree[node][1] = i as i32;
1446                    code += 1;
1447                }
1448            }
1449            code <<= 1;
1450        }
1451        Self { tree }
1452    }
1453
1454    fn from_explicit_codes(codes: &[u32], lengths: &[i32], num_symbols: usize) -> Self {
1455        let mut tree = vec![[i32::MIN, i32::MIN]];
1456        for i in 0..num_symbols {
1457            let length = lengths[i];
1458            if length <= 0 {
1459                continue;
1460            }
1461            let code = codes[i];
1462            let mut node = 0;
1463            for bit_pos in 0..length {
1464                let bit = ((code >> bit_pos) & 1) as usize;
1465                if tree[node][bit] == i32::MIN {
1466                    tree[node][bit] = tree.len() as i32;
1467                    tree.push([i32::MIN, i32::MIN]);
1468                }
1469                node = tree[node][bit] as usize;
1470            }
1471            tree[node][0] = i as i32;
1472            tree[node][1] = i as i32;
1473        }
1474        Self { tree }
1475    }
1476
1477    fn decode_le(&self, reader: &mut BitReader) -> i32 {
1478        let mut node = 0;
1479        loop {
1480            if self.tree[node][0] == self.tree[node][1] {
1481                return self.tree[node][0];
1482            }
1483            let bit = reader.read_bits_le(1) as usize;
1484            if bit >= 2 {
1485                return -1;
1486            }
1487            let next = self.tree[node][bit];
1488            if next == i32::MIN {
1489                return -1;
1490            }
1491            node = next as usize;
1492        }
1493    }
1494}
1495
1496// --- BitWriter ---
1497
1498struct BitWriter {
1499    data: Vec<u8>,
1500    bit_buf: u64,
1501    bits_in_buf: u32,
1502}
1503
1504impl BitWriter {
1505    fn new() -> Self {
1506        Self {
1507            data: Vec::new(),
1508            bit_buf: 0,
1509            bits_in_buf: 0,
1510        }
1511    }
1512
1513    fn write_bits_le(&mut self, bits: u32, n: u32) {
1514        if n == 0 {
1515            return;
1516        }
1517        self.bit_buf |= (bits as u64) << self.bits_in_buf;
1518        self.bits_in_buf += n;
1519        while self.bits_in_buf >= 8 {
1520            self.data.push((self.bit_buf & 0xFF) as u8);
1521            self.bit_buf >>= 8;
1522            self.bits_in_buf -= 8;
1523        }
1524    }
1525
1526    fn write_byte(&mut self, b: u8) {
1527        self.write_bits_le(b as u32, 8);
1528    }
1529
1530    fn finish(mut self) -> Vec<u8> {
1531        // Flush remaining bits
1532        if self.bits_in_buf > 0 {
1533            self.data.push((self.bit_buf & 0xFF) as u8);
1534        }
1535        self.data
1536    }
1537}
1538
1539// --- HuffmanEncoder ---
1540
1541struct HuffmanEncoder {
1542    codes: Vec<u32>,
1543    lengths: Vec<u32>,
1544}
1545
1546impl HuffmanEncoder {
1547    /// Create encoder from code lengths (generates canonical codes)
1548    fn from_lengths(lengths: &[i32], num_symbols: usize) -> Self {
1549        let mut codes = vec![0u32; num_symbols];
1550        let mut enc_lengths = vec![0u32; num_symbols];
1551        let mut code = 0u32;
1552
1553        for length in 1u32..=32 {
1554            for (i, &len) in lengths.iter().enumerate().take(num_symbols) {
1555                if len == length as i32 {
1556                    // Generate LSB-first code from MSB-first canonical code
1557                    let mut lsb_code = 0u32;
1558                    for bit in 0..length {
1559                        if (code >> (length - 1 - bit)) & 1 != 0 {
1560                            lsb_code |= 1 << bit;
1561                        }
1562                    }
1563                    codes[i] = lsb_code;
1564                    enc_lengths[i] = length;
1565                    code += 1;
1566                }
1567            }
1568            code <<= 1;
1569        }
1570
1571        Self {
1572            codes,
1573            lengths: enc_lengths,
1574        }
1575    }
1576
1577    fn encode(&self, writer: &mut BitWriter, symbol: usize) {
1578        if symbol < self.codes.len() && self.lengths[symbol] > 0 {
1579            writer.write_bits_le(self.codes[symbol], self.lengths[symbol]);
1580        }
1581    }
1582}
1583
1584// --- StuffIt 13 Encoder ---
1585
1586struct Sit13Encoder {
1587    writer: BitWriter,
1588    first_encoder: HuffmanEncoder,
1589    second_encoder: HuffmanEncoder,
1590    offset_encoder: HuffmanEncoder,
1591    offset_code_size: usize,
1592}
1593
1594impl Sit13Encoder {
1595    fn new() -> Self {
1596        // Use predefined table set 1 (index 0)
1597        let first_encoder = HuffmanEncoder::from_lengths(FIRST_CODE_LENGTHS[0], 321);
1598        let second_encoder = HuffmanEncoder::from_lengths(SECOND_CODE_LENGTHS[0], 321);
1599        let offset_encoder =
1600            HuffmanEncoder::from_lengths(OFFSET_CODE_LENGTHS[0], OFFSET_CODE_SIZES[0]);
1601
1602        let mut writer = BitWriter::new();
1603        // Write header byte: mode 1 in high nibble
1604        writer.write_byte(0x10);
1605
1606        Self {
1607            writer,
1608            first_encoder,
1609            second_encoder,
1610            offset_encoder,
1611            offset_code_size: OFFSET_CODE_SIZES[0],
1612        }
1613    }
1614
1615    fn compress(mut self, data: &[u8]) -> Vec<u8> {
1616        if data.is_empty() {
1617            return self.writer.finish();
1618        }
1619
1620        let mut pos = 0;
1621        let mut use_second = false;
1622
1623        // Simple LZ77 with hash table for match finding
1624        let mut hash_table: std::collections::HashMap<u32, Vec<usize>> =
1625            std::collections::HashMap::new();
1626
1627        while pos < data.len() {
1628            let best_match = self.find_match(data, pos, &hash_table);
1629
1630            // Update hash table
1631            if pos + 2 < data.len() {
1632                let hash = self.hash3(data, pos);
1633                hash_table.entry(hash).or_default().push(pos);
1634            }
1635
1636            if let Some((length, offset)) = best_match {
1637                // Encode match
1638                self.encode_match(length, offset, use_second);
1639                use_second = true;
1640
1641                // Update hash table for skipped positions
1642                for i in 1..length {
1643                    if pos + i + 2 < data.len() {
1644                        let hash = self.hash3(data, pos + i);
1645                        hash_table.entry(hash).or_default().push(pos + i);
1646                    }
1647                }
1648                pos += length;
1649            } else {
1650                // Encode literal
1651                let encoder = if use_second {
1652                    &self.second_encoder
1653                } else {
1654                    &self.first_encoder
1655                };
1656                encoder.encode(&mut self.writer, data[pos] as usize);
1657                use_second = false;
1658                pos += 1;
1659            }
1660        }
1661
1662        // Write end marker (symbol 320)
1663        let encoder = if use_second {
1664            &self.second_encoder
1665        } else {
1666            &self.first_encoder
1667        };
1668        encoder.encode(&mut self.writer, 320);
1669
1670        self.writer.finish()
1671    }
1672
1673    fn hash3(&self, data: &[u8], pos: usize) -> u32 {
1674        if pos + 2 >= data.len() {
1675            return 0;
1676        }
1677        ((data[pos] as u32) << 16) | ((data[pos + 1] as u32) << 8) | (data[pos + 2] as u32)
1678    }
1679
1680    fn find_match(
1681        &self,
1682        data: &[u8],
1683        pos: usize,
1684        hash_table: &std::collections::HashMap<u32, Vec<usize>>,
1685    ) -> Option<(usize, usize)> {
1686        if pos + 2 >= data.len() {
1687            return None;
1688        }
1689
1690        let hash = self.hash3(data, pos);
1691        let candidates = hash_table.get(&hash)?;
1692
1693        let mut best_length = 0;
1694        let mut best_offset = 0;
1695
1696        // Limit search to maximum encodable offset
1697        // For offset_code_size=11, max bit_len=10, max_offset = 1 << 10 = 1024
1698        let max_offset = 1 << (self.offset_code_size - 1);
1699        let min_match = 3;
1700
1701        for &candidate_pos in candidates.iter().rev() {
1702            if pos <= candidate_pos {
1703                continue;
1704            }
1705            let offset = pos - candidate_pos;
1706            if offset > max_offset {
1707                break; // Too far back
1708            }
1709
1710            // Calculate match length
1711            let mut length = 0;
1712            while pos + length < data.len()
1713                && candidate_pos + length < pos
1714                && data[pos + length] == data[candidate_pos + length]
1715                && length < 32767 + 65
1716            {
1717                length += 1;
1718            }
1719
1720            if length >= min_match && length > best_length {
1721                best_length = length;
1722                best_offset = offset;
1723            }
1724        }
1725
1726        if best_length >= min_match {
1727            Some((best_length, best_offset))
1728        } else {
1729            None
1730        }
1731    }
1732
1733    fn encode_match(&mut self, length: usize, offset: usize, use_second: bool) {
1734        let encoder = if use_second {
1735            &self.second_encoder
1736        } else {
1737            &self.first_encoder
1738        };
1739
1740        // Encode length
1741        if length <= 64 {
1742            // Length 3-64 maps to symbols 256-317
1743            let symbol = 256 + length - 3;
1744            encoder.encode(&mut self.writer, symbol);
1745        } else if length <= 65 + 1023 {
1746            // Symbol 318 + 10 bits for length 65-1088
1747            encoder.encode(&mut self.writer, 318);
1748            self.writer.write_bits_le((length - 65) as u32, 10);
1749        } else {
1750            // Symbol 319 + 15 bits for longer lengths
1751            encoder.encode(&mut self.writer, 319);
1752            self.writer.write_bits_le((length - 65) as u32, 15);
1753        }
1754
1755        // Encode offset
1756        // The offset encoding uses:
1757        //   bit_len=0: offset=1
1758        //   bit_len=1: offset=2
1759        //   bit_len=N (N>=2): offset = (1 << (N-1)) + extra_bits + 1, where extra_bits has (N-1) bits
1760        //   So bit_len=N covers offsets from (1 << (N-1)) + 1 to (1 << N)
1761        let bit_len = if offset == 1 {
1762            0
1763        } else if offset == 2 {
1764            1
1765        } else {
1766            // Find smallest bit_len where max_offset(bit_len) >= offset
1767            // max_offset(bl) = 1 << bl
1768            let mut bl = 2;
1769            while (1 << bl) < offset {
1770                bl += 1;
1771            }
1772            bl
1773        };
1774
1775        // Make sure bit_len is within the offset code size
1776        let bit_len = bit_len.min(self.offset_code_size - 1);
1777        self.offset_encoder.encode(&mut self.writer, bit_len);
1778
1779        // Write extra bits for offset
1780        if bit_len >= 2 {
1781            let extra_bits = offset - (1 << (bit_len - 1)) - 1;
1782            self.writer
1783                .write_bits_le(extra_bits as u32, (bit_len - 1) as u32);
1784        }
1785    }
1786}
1787
1788/// Compress data using StuffIt method 13
1789fn compress_sit13(data: &[u8]) -> Vec<u8> {
1790    let encoder = Sit13Encoder::new();
1791    encoder.compress(data)
1792}
1793
1794// --- StuffIt 13 Implementation ---
1795
1796struct Sit13Decoder<'a> {
1797    reader: BitReader<'a>,
1798}
1799
1800impl<'a> Sit13Decoder<'a> {
1801    fn new(data: &'a [u8]) -> Self {
1802        Self {
1803            reader: BitReader::new(data),
1804        }
1805    }
1806
1807    fn decompress(&mut self, uncomp_len: usize) -> Result<Vec<u8>, SitError> {
1808        let mut output = Vec::with_capacity(uncomp_len);
1809        if uncomp_len == 0 {
1810            return Ok(output);
1811        }
1812
1813        let first_byte = self.reader.read_byte();
1814        let code = (first_byte >> 4) as usize;
1815
1816        let (first_code, second_code, offset_code) = if code == 0 {
1817            let metacode = HuffmanDecoder::from_explicit_codes(&META_CODES, &META_CODE_LENGTHS, 37);
1818            let first = self.alloc_and_parse_code(321, &metacode)?;
1819            let second = if (first_byte & 0x08) != 0 {
1820                HuffmanDecoder {
1821                    tree: first.tree.clone(),
1822                }
1823            } else {
1824                self.alloc_and_parse_code(321, &metacode)?
1825            };
1826            let offset_size = (first_byte & 0x07) as usize + 10;
1827            let offset = self.alloc_and_parse_code(offset_size, &metacode)?;
1828            (first, second, offset)
1829        } else if code < 6 {
1830            let idx = code - 1;
1831            (
1832                HuffmanDecoder::from_lengths(FIRST_CODE_LENGTHS[idx], 321),
1833                HuffmanDecoder::from_lengths(SECOND_CODE_LENGTHS[idx], 321),
1834                HuffmanDecoder::from_lengths(OFFSET_CODE_LENGTHS[idx], OFFSET_CODE_SIZES[idx]),
1835            )
1836        } else {
1837            return Err(SitError::Decompression(format!(
1838                "Invalid SIT13 code: {}",
1839                code
1840            )));
1841        };
1842
1843        let mut current_huffman = &first_code;
1844        while output.len() < uncomp_len {
1845            let val = current_huffman.decode_le(&mut self.reader);
1846            if val < 0 {
1847                break;
1848            }
1849            if val < 256 {
1850                output.push(val as u8);
1851                current_huffman = &first_code;
1852            } else if val < 320 {
1853                current_huffman = &second_code;
1854                let mut length = (val - 256 + 3) as usize;
1855                if val == 318 {
1856                    length = (self.reader.read_bits_le(10) + 65) as usize;
1857                } else if val == 319 {
1858                    length = (self.reader.read_bits_le(15) + 65) as usize;
1859                }
1860
1861                let bit_len = offset_code.decode_le(&mut self.reader);
1862                if bit_len < 0 {
1863                    break;
1864                }
1865                let offset = if bit_len == 0 {
1866                    1
1867                } else if bit_len == 1 {
1868                    2
1869                } else {
1870                    (1 << (bit_len - 1)) + self.reader.read_bits_le(bit_len as u32 - 1) + 1
1871                } as usize;
1872
1873                if offset > output.len() {
1874                    break;
1875                }
1876                for _ in 0..length {
1877                    if output.len() >= uncomp_len {
1878                        break;
1879                    }
1880                    let b = output[output.len() - offset];
1881                    output.push(b);
1882                }
1883            } else {
1884                break;
1885            }
1886        }
1887
1888        Ok(output)
1889    }
1890
1891    fn alloc_and_parse_code(
1892        &mut self,
1893        num_codes: usize,
1894        metacode: &HuffmanDecoder,
1895    ) -> Result<HuffmanDecoder, SitError> {
1896        alloc_and_parse_huffman_code(&mut self.reader, num_codes, metacode)
1897    }
1898}
1899
1900// Standalone helper for use by Method 3 (Huffman) and Method 13 (SIT13)
1901fn alloc_and_parse_huffman_code(
1902    reader: &mut BitReader,
1903    num_codes: usize,
1904    metacode: &HuffmanDecoder,
1905) -> Result<HuffmanDecoder, SitError> {
1906    let mut lengths = vec![0i32; num_codes];
1907    let mut length = 0i32;
1908    let mut i = 0;
1909    while i < num_codes {
1910        let val = metacode.decode_le(reader);
1911        if val < 0 {
1912            return Err(SitError::Decompression("Invalid meta code".into()));
1913        }
1914        match val {
1915            31 => length = -1,
1916            32 => length += 1,
1917            33 => length -= 1,
1918            34 => {
1919                if reader.read_bit_le() {
1920                    lengths[i] = length;
1921                    i += 1;
1922                }
1923            }
1924            35 => {
1925                let mut count = reader.read_bits_le(3) as usize + 2;
1926                while count > 0 && i < num_codes {
1927                    lengths[i] = length;
1928                    i += 1;
1929                    count -= 1;
1930                }
1931            }
1932            36 => {
1933                let mut count = reader.read_bits_le(6) as usize + 10;
1934                while count > 0 && i < num_codes {
1935                    lengths[i] = length;
1936                    i += 1;
1937                    count -= 1;
1938                }
1939            }
1940            _ => length = val + 1,
1941        }
1942        if i < num_codes {
1943            lengths[i] = length;
1944            i += 1;
1945        }
1946    }
1947    Ok(HuffmanDecoder::from_lengths(&lengths, num_codes))
1948}
1949
1950// --- StuffIt 3 (Huffman) Implementation ---
1951
1952struct SitHuffmanDecoder<'a> {
1953    reader: BitReader<'a>,
1954}
1955
1956impl<'a> SitHuffmanDecoder<'a> {
1957    fn new(data: &'a [u8]) -> Self {
1958        Self {
1959            reader: BitReader::new(data),
1960        }
1961    }
1962
1963    fn decompress(&mut self, uncomp_len: usize) -> Result<Vec<u8>, SitError> {
1964        let mut output = Vec::with_capacity(uncomp_len);
1965        if uncomp_len == 0 {
1966            return Ok(output);
1967        }
1968
1969        // Method 3 starts with the Huffman tree definition
1970        // Uses the same meta-code structure as Method 13 (SIT13)
1971        // Meta-code is fixed?
1972        // Wait, Method 3 uses a dynamic Huffman tree for literals (0-255).
1973        // The tree structure is similar to the "First Code" in SIT13.
1974        // It uses the same "Meta Code" table to decode the tree lengths.
1975        
1976        let metacode = HuffmanDecoder::from_explicit_codes(&META_CODES, &META_CODE_LENGTHS, 37);
1977        // Code 256 is End of Block? Or just process until uncomp_len.
1978        // Method 3 usually just encodes literals 0-255. No match/length codes.
1979        // So we need tree for 256 symbols (0-255).
1980        // Let's assume num_symbols = 256 (or more if there is an EOF code?)
1981        // The Unarchiver: XADStuffItHuffmanHandle.m -> numSymbols = 256. (actually 257? EOF?)
1982        // Wait, Unarchiver says: _huffman = [[self allocAndParseHuffmanCodeWithNumCodes:256 metaCode:_metaCode] retain];
1983        // So 256 symbols.
1984        
1985        let huffman = alloc_and_parse_huffman_code(&mut self.reader, 256, &metacode)?;
1986        
1987        while output.len() < uncomp_len {
1988             let val = huffman.decode_le(&mut self.reader);
1989             if val < 0 {
1990                 break;
1991             }
1992             if val < 256 {
1993                 output.push(val as u8);
1994             } else {
1995                 // Should not happen for Method 3 if only 256 codes
1996                 break; 
1997             }
1998        }
1999        
2000        Ok(output)
2001    }
2002}
2003
2004// --- StuffIt 15 Implementation ---
2005
2006struct ArithmeticModel {
2007    first_symbol: u16,
2008    num_symbols: usize,
2009    frequencies: Vec<u16>,
2010    total_frequency: u32,
2011    increment: u16,
2012    limit: u32,
2013}
2014
2015impl ArithmeticModel {
2016    fn new(first_symbol: u16, num_symbols: usize, increment: u16, limit: u32) -> Self {
2017        Self {
2018            first_symbol,
2019            num_symbols,
2020            frequencies: vec![increment; num_symbols],
2021            total_frequency: num_symbols as u32 * increment as u32,
2022            increment,
2023            limit,
2024        }
2025    }
2026
2027    fn reset(&mut self) {
2028        self.total_frequency = self.num_symbols as u32 * self.increment as u32;
2029        for f in &mut self.frequencies {
2030            *f = self.increment;
2031        }
2032    }
2033
2034    fn update(&mut self, sym_idx: usize) {
2035        self.frequencies[sym_idx] += self.increment;
2036        self.total_frequency += self.increment as u32;
2037        if self.total_frequency > self.limit {
2038            self.total_frequency = 0;
2039            for f in &mut self.frequencies {
2040                *f = (*f + 1) >> 1;
2041                self.total_frequency += *f as u32;
2042            }
2043        }
2044    }
2045}
2046
2047struct ArithmeticDecoder<'a> {
2048    reader: BitReader<'a>,
2049    range: u32,
2050    code: u32,
2051}
2052
2053const ARITH_BITS: u32 = 26;
2054const ARITH_ONE: u32 = 1 << (ARITH_BITS - 1);
2055const ARITH_HALF: u32 = 1 << (ARITH_BITS - 2);
2056
2057impl<'a> ArithmeticDecoder<'a> {
2058    fn new(mut reader: BitReader<'a>) -> Self {
2059        let mut code = 0;
2060        for _ in 0..ARITH_BITS {
2061            code = (code << 1) | (reader.read_bit_be() as u32);
2062        }
2063        Self {
2064            reader,
2065            range: ARITH_ONE,
2066            code,
2067        }
2068    }
2069
2070    fn next_symbol(&mut self, model: &mut ArithmeticModel) -> u16 {
2071        let freq = self.code / (self.range / model.total_frequency);
2072        let mut cumulative = 0;
2073        let mut n = 0;
2074        while n < model.num_symbols - 1 {
2075            if cumulative + model.frequencies[n] as u32 > freq {
2076                break;
2077            }
2078            cumulative += model.frequencies[n] as u32;
2079            n += 1;
2080        }
2081
2082        let sym_size = model.frequencies[n] as u32;
2083        let sym_tot = model.total_frequency;
2084
2085        let renorm_factor = self.range / sym_tot;
2086        let low_incr = renorm_factor * cumulative;
2087        self.code -= low_incr;
2088        if cumulative + sym_size == sym_tot {
2089            self.range -= low_incr;
2090        } else {
2091            self.range = sym_size * renorm_factor;
2092        }
2093
2094        while self.range <= ARITH_HALF {
2095            self.range <<= 1;
2096            self.code = (self.code << 1) | (self.reader.read_bit_be() as u32);
2097        }
2098
2099        let res = model.first_symbol + n as u16;
2100        model.update(n);
2101        res
2102    }
2103
2104    fn read_bit_string(&mut self, model: &mut ArithmeticModel, n: u32) -> u32 {
2105        let mut res = 0;
2106        for i in 0..n {
2107            if self.next_symbol(model) != 0 {
2108                res |= 1 << i;
2109            }
2110        }
2111        res
2112    }
2113}
2114
2115// --- Arithmetic Encoder (inverse of ArithmeticDecoder) ---
2116
2117struct ArithmeticEncoder {
2118    data: Vec<u8>,
2119    range: u32,
2120    low: u32,
2121    pending_bits: u32,
2122    bit_buf: u8,
2123    bits_in_buf: u32,
2124}
2125
2126impl ArithmeticEncoder {
2127    fn new() -> Self {
2128        Self {
2129            data: Vec::new(),
2130            range: ARITH_ONE,
2131            low: 0,
2132            pending_bits: 0,
2133            bit_buf: 0,
2134            bits_in_buf: 0,
2135        }
2136    }
2137
2138    fn write_bit(&mut self, bit: bool) {
2139        self.bit_buf = (self.bit_buf << 1) | (bit as u8);
2140        self.bits_in_buf += 1;
2141        if self.bits_in_buf == 8 {
2142            self.data.push(self.bit_buf);
2143            self.bit_buf = 0;
2144            self.bits_in_buf = 0;
2145        }
2146    }
2147
2148    fn write_bit_plus_pending(&mut self, bit: bool) {
2149        self.write_bit(bit);
2150        while self.pending_bits > 0 {
2151            self.write_bit(!bit);
2152            self.pending_bits -= 1;
2153        }
2154    }
2155
2156    fn encode_symbol(&mut self, model: &mut ArithmeticModel, symbol: u16) {
2157        let sym_idx = (symbol - model.first_symbol) as usize;
2158        
2159        let mut cumulative = 0u32;
2160        for i in 0..sym_idx {
2161            cumulative += model.frequencies[i] as u32;
2162        }
2163        let sym_size = model.frequencies[sym_idx] as u32;
2164        let sym_tot = model.total_frequency;
2165
2166        let renorm_factor = self.range / sym_tot;
2167        let low_incr = renorm_factor * cumulative;
2168        
2169        self.low += low_incr;
2170        if cumulative + sym_size == sym_tot {
2171            self.range -= low_incr;
2172        } else {
2173            self.range = sym_size * renorm_factor;
2174        }
2175
2176        // Renormalize
2177        while self.range <= ARITH_HALF {
2178            if self.low >= ARITH_ONE {
2179                self.write_bit_plus_pending(true);
2180                self.low -= ARITH_ONE;
2181            } else if self.low + self.range <= ARITH_ONE {
2182                self.write_bit_plus_pending(false);
2183            } else {
2184                self.pending_bits += 1;
2185                self.low -= ARITH_HALF;
2186            }
2187            self.range <<= 1;
2188            self.low <<= 1;
2189        }
2190
2191        model.update(sym_idx);
2192    }
2193
2194    fn write_bit_string(&mut self, model: &mut ArithmeticModel, val: u32, n: u32) {
2195        for i in 0..n {
2196            let bit = ((val >> i) & 1) as u16;
2197            self.encode_symbol(model, bit);
2198        }
2199    }
2200
2201    fn finish(mut self) -> Vec<u8> {
2202        // Flush remaining bits
2203        self.pending_bits += 1;
2204        if self.low < ARITH_HALF {
2205            self.write_bit_plus_pending(false);
2206        } else {
2207            self.write_bit_plus_pending(true);
2208        }
2209        
2210        // Flush bit buffer
2211        if self.bits_in_buf > 0 {
2212            self.bit_buf <<= 8 - self.bits_in_buf;
2213            self.data.push(self.bit_buf);
2214        }
2215        
2216        self.data
2217    }
2218}
2219
2220// --- Arsenic Encoder (BWT + MTF + RLE + Arithmetic) ---
2221
2222struct SitArsenicEncoder {
2223    encoder: ArithmeticEncoder,
2224    block_bits: u32,
2225}
2226
2227impl SitArsenicEncoder {
2228    fn new(block_bits: u32) -> Self {
2229        Self {
2230            encoder: ArithmeticEncoder::new(),
2231            block_bits,
2232        }
2233    }
2234
2235    fn compress(mut self, data: &[u8]) -> Vec<u8> {
2236        if data.is_empty() {
2237            return Vec::new();
2238        }
2239
2240        let mut initial_model = ArithmeticModel::new(0, 2, 1, 256);
2241
2242        // Write "As" signature
2243        self.encoder.write_bit_string(&mut initial_model, 'A' as u32, 8);
2244        self.encoder.write_bit_string(&mut initial_model, 's' as u32, 8);
2245
2246        // Write block_bits - 9
2247        self.encoder.write_bit_string(&mut initial_model, self.block_bits - 9, 4);
2248
2249        let block_size = 1 << self.block_bits;
2250
2251        // Process data in blocks
2252        let mut pos = 0;
2253        while pos < data.len() {
2254            let block_end = (pos + block_size).min(data.len());
2255            let block = &data[pos..block_end];
2256
2257            // Signal more blocks
2258            self.encoder.encode_symbol(&mut initial_model, 0);
2259
2260            // No randomization
2261            self.encoder.encode_symbol(&mut initial_model, 0);
2262
2263            // BWT
2264            let (bwt_data, transform_index) = burrows_wheeler_transform(block);
2265
2266            // Write transform index
2267            self.encoder.write_bit_string(&mut initial_model, transform_index as u32, self.block_bits);
2268
2269            // MTF encode
2270            let mtf_data = move_to_front_encode(&bwt_data);
2271
2272            // Write MTF data with selector model
2273            let mut selector_model = ArithmeticModel::new(0, 11, 8, 1024);
2274            let mut mtf_models = [
2275                ArithmeticModel::new(2, 2, 8, 1024),
2276                ArithmeticModel::new(4, 4, 4, 1024),
2277                ArithmeticModel::new(8, 8, 4, 1024),
2278                ArithmeticModel::new(16, 16, 4, 1024),
2279                ArithmeticModel::new(32, 32, 2, 1024),
2280                ArithmeticModel::new(64, 64, 2, 1024),
2281                ArithmeticModel::new(128, 128, 1, 1024),
2282            ];
2283
2284            self.encode_mtf_block(&mtf_data, &mut selector_model, &mut mtf_models);
2285
2286            pos = block_end;
2287        }
2288
2289        // Signal end of data
2290        self.encoder.encode_symbol(&mut initial_model, 1);
2291
2292        self.encoder.finish()
2293    }
2294
2295    fn encode_mtf_block(
2296        &mut self,
2297        mtf_data: &[u8],
2298        selector_model: &mut ArithmeticModel,
2299        mtf_models: &mut [ArithmeticModel; 7],
2300    ) {
2301        let mut i = 0;
2302        while i < mtf_data.len() {
2303            let val = mtf_data[i] as usize;
2304
2305            if val == 0 {
2306                // Run of zeros - encode with RLE
2307                let mut run_len = 1;
2308                while i + run_len < mtf_data.len() && mtf_data[i + run_len] == 0 {
2309                    run_len += 1;
2310                }
2311                // Encode run length as bijective base-2 sequence
2312                // 1 -> 0, 2 -> 1, 3 -> 00, 4 -> 01, 5 -> 10, 6 -> 11, etc.
2313                let mut remaining = run_len;
2314                while remaining > 0 {
2315                    if remaining == 1 {
2316                        self.encoder.encode_symbol(selector_model, 0);
2317                        remaining = 0;
2318                    } else if remaining == 2 {
2319                        self.encoder.encode_symbol(selector_model, 1);
2320                        remaining = 0;
2321                    } else {
2322                        // remaining >= 3
2323                        let bit = (remaining - 1) & 1;
2324                        self.encoder.encode_symbol(selector_model, bit as u16);
2325                        remaining = (remaining - 1) / 2;
2326                    }
2327                }
2328                i += run_len;
2329            } else {
2330                // Non-zero symbol
2331                if val == 1 {
2332                    self.encoder.encode_symbol(selector_model, 2);
2333                } else if val < 4 {
2334                    self.encoder.encode_symbol(selector_model, 3);
2335                    self.encoder.encode_symbol(&mut mtf_models[0], (val - 2) as u16 + 2);
2336                } else if val < 8 {
2337                    self.encoder.encode_symbol(selector_model, 4);
2338                    self.encoder.encode_symbol(&mut mtf_models[1], (val - 4) as u16 + 4);
2339                } else if val < 16 {
2340                    self.encoder.encode_symbol(selector_model, 5);
2341                    self.encoder.encode_symbol(&mut mtf_models[2], (val - 8) as u16 + 8);
2342                } else if val < 32 {
2343                    self.encoder.encode_symbol(selector_model, 6);
2344                    self.encoder.encode_symbol(&mut mtf_models[3], (val - 16) as u16 + 16);
2345                } else if val < 64 {
2346                    self.encoder.encode_symbol(selector_model, 7);
2347                    self.encoder.encode_symbol(&mut mtf_models[4], (val - 32) as u16 + 32);
2348                } else if val < 128 {
2349                    self.encoder.encode_symbol(selector_model, 8);
2350                    self.encoder.encode_symbol(&mut mtf_models[5], (val - 64) as u16 + 64);
2351                } else {
2352                    self.encoder.encode_symbol(selector_model, 9);
2353                    self.encoder.encode_symbol(&mut mtf_models[6], (val - 128) as u16 + 128);
2354                }
2355                i += 1;
2356            }
2357        }
2358
2359        // End of block
2360        self.encoder.encode_symbol(selector_model, 10);
2361    }
2362}
2363
2364// Burrows-Wheeler Transform
2365fn burrows_wheeler_transform(data: &[u8]) -> (Vec<u8>, usize) {
2366    let n = data.len();
2367    if n == 0 {
2368        return (Vec::new(), 0);
2369    }
2370
2371    // Create rotations indices and sort them
2372    let mut indices: Vec<usize> = (0..n).collect();
2373    indices.sort_by(|&a, &b| {
2374        for i in 0..n {
2375            let ca = data[(a + i) % n];
2376            let cb = data[(b + i) % n];
2377            match ca.cmp(&cb) {
2378                std::cmp::Ordering::Equal => continue,
2379                other => return other,
2380            }
2381        }
2382        std::cmp::Ordering::Equal
2383    });
2384
2385    // Build output and find original index
2386    let mut output = Vec::with_capacity(n);
2387    let mut transform_index = 0;
2388    for (i, &idx) in indices.iter().enumerate() {
2389        output.push(data[(idx + n - 1) % n]);
2390        if idx == 0 {
2391            transform_index = i;
2392        }
2393    }
2394
2395    (output, transform_index)
2396}
2397
2398// Move-to-Front encoding
2399fn move_to_front_encode(data: &[u8]) -> Vec<u8> {
2400    let mut mtf: Vec<u8> = (0..=255).collect();
2401    let mut output = Vec::with_capacity(data.len());
2402
2403    for &b in data {
2404        let pos = mtf.iter().position(|&x| x == b).unwrap();
2405        output.push(pos as u8);
2406        if pos > 0 {
2407            let val = mtf.remove(pos);
2408            mtf.insert(0, val);
2409        }
2410    }
2411
2412    output
2413}
2414
2415fn compress_arsenic(data: &[u8]) -> Vec<u8> {
2416    // Use block_bits = 17 (128KB blocks) as a reasonable default
2417    let encoder = SitArsenicEncoder::new(17);
2418    encoder.compress(data)
2419}
2420
2421struct SitArsenicDecoder<'a> {
2422    decoder: ArithmeticDecoder<'a>,
2423}
2424
2425impl<'a> SitArsenicDecoder<'a> {
2426    fn new(data: &'a [u8]) -> Self {
2427        Self {
2428            decoder: ArithmeticDecoder::new(BitReader::new(data)),
2429        }
2430    }
2431
2432    fn decompress(&mut self, uncomp_len: usize) -> Result<Vec<u8>, SitError> {
2433        let mut output = Vec::with_capacity(uncomp_len);
2434        let mut initial_model = ArithmeticModel::new(0, 2, 1, 256);
2435
2436        if self.decoder.read_bit_string(&mut initial_model, 8) != 'A' as u32 {
2437            return Err(SitError::Decompression(
2438                "Invalid Arsenic signature (A)".into(),
2439            ));
2440        }
2441        if self.decoder.read_bit_string(&mut initial_model, 8) != 's' as u32 {
2442            return Err(SitError::Decompression(
2443                "Invalid Arsenic signature (s)".into(),
2444            ));
2445        }
2446
2447        let block_bits = self.decoder.read_bit_string(&mut initial_model, 4) + 9;
2448        let block_size = 1 << block_bits;
2449
2450        let mut selector_model = ArithmeticModel::new(0, 11, 8, 1024);
2451        let mut mtf_models = [
2452            ArithmeticModel::new(2, 2, 8, 1024),
2453            ArithmeticModel::new(4, 4, 4, 1024),
2454            ArithmeticModel::new(8, 8, 4, 1024),
2455            ArithmeticModel::new(16, 16, 4, 1024),
2456            ArithmeticModel::new(32, 32, 2, 1024),
2457            ArithmeticModel::new(64, 64, 2, 1024),
2458            ArithmeticModel::new(128, 128, 1, 1024),
2459        ];
2460
2461        while output.len() < uncomp_len {
2462            if self.decoder.next_symbol(&mut initial_model) != 0 {
2463                break;
2464            }
2465
2466            let randomized = self.decoder.next_symbol(&mut initial_model) != 0;
2467            let transform_index_start =
2468                self.decoder.read_bit_string(&mut initial_model, block_bits) as usize;
2469
2470            let mut block = Vec::with_capacity(block_size);
2471            let mut mtf = (0..=255u8).collect::<Vec<_>>();
2472
2473            loop {
2474                let sel = self.decoder.next_symbol(&mut selector_model);
2475                if sel <= 1 {
2476                    let mut zero_state = 1;
2477                    let mut zero_count = 0;
2478                    let mut current_sel = sel;
2479                    while current_sel < 2 {
2480                        if current_sel == 0 {
2481                            zero_count += zero_state;
2482                        } else {
2483                            zero_count += 2 * zero_state;
2484                        }
2485                        zero_state *= 2;
2486                        current_sel = self.decoder.next_symbol(&mut selector_model);
2487                    }
2488                    let sym = mtf[0];
2489                    for _ in 0..zero_count {
2490                        block.push(sym);
2491                    }
2492                    if current_sel == 10 {
2493                        break;
2494                    }
2495                    let symbol = if current_sel == 2 {
2496                        1
2497                    } else {
2498                        self.decoder
2499                            .next_symbol(&mut mtf_models[current_sel as usize - 3])
2500                            as usize
2501                    };
2502                    let val = mtf.remove(symbol);
2503                    mtf.insert(0, val);
2504                    block.push(val);
2505                } else if sel == 10 {
2506                    break;
2507                } else {
2508                    let symbol = if sel == 2 {
2509                        1
2510                    } else {
2511                        self.decoder.next_symbol(&mut mtf_models[sel as usize - 3]) as usize
2512                    };
2513                    let val = mtf.remove(symbol);
2514                    mtf.insert(0, val);
2515                    block.push(val);
2516                }
2517            }
2518
2519            if transform_index_start >= block.len() {
2520                break;
2521            }
2522
2523            selector_model.reset();
2524            for m in &mut mtf_models {
2525                m.reset();
2526            }
2527
2528            let mut transform = vec![0usize; block.len()];
2529            
2530            // Optimized 4-way parallel histogram to reduce cache conflicts
2531            let mut counts0 = [0usize; 256];
2532            let mut counts1 = [0usize; 256];
2533            let mut counts2 = [0usize; 256];
2534            let mut counts3 = [0usize; 256];
2535            
2536            let chunks = block.chunks_exact(4);
2537            let remainder = chunks.remainder();
2538            for chunk in chunks {
2539                counts0[chunk[0] as usize] += 1;
2540                counts1[chunk[1] as usize] += 1;
2541                counts2[chunk[2] as usize] += 1;
2542                counts3[chunk[3] as usize] += 1;
2543            }
2544            for &b in remainder {
2545                counts0[b as usize] += 1;
2546            }
2547            
2548            // Merge counts
2549            let mut counts = [0usize; 256];
2550            for i in 0..256 {
2551                counts[i] = counts0[i] + counts1[i] + counts2[i] + counts3[i];
2552            }
2553            
2554            // Compute prefix sums
2555            let mut sum = 0usize;
2556            let mut start_pos = [0usize; 256];
2557            for i in 0..256 {
2558                start_pos[i] = sum;
2559                sum += counts[i];
2560            }
2561            
2562            // Build transform vector
2563            let mut current_pos_in_counts = start_pos;
2564            for (i, &b) in block.iter().enumerate() {
2565                transform[current_pos_in_counts[b as usize]] = i;
2566                current_pos_in_counts[b as usize] += 1;
2567            }
2568
2569            let mut byte_count = 0;
2570            let mut idx = transform_index_start;
2571            let mut count = 0;
2572            let mut last = 0u8;
2573            let mut repeat = 0;
2574            let mut rand_idx = 0;
2575            let mut rand_val = RANDOMIZATION_TABLE[0] as usize;
2576
2577            while (byte_count < block.len() || repeat > 0) && output.len() < uncomp_len {
2578                if repeat > 0 {
2579                    output.push(last);
2580                    repeat -= 1;
2581                } else {
2582                    idx = transform[idx];
2583                    let mut b = block[idx];
2584
2585                    if randomized && rand_val == byte_count {
2586                        b ^= 1;
2587                        rand_idx = (rand_idx + 1) & 255;
2588                        rand_val += RANDOMIZATION_TABLE[rand_idx] as usize;
2589                    }
2590                    byte_count += 1;
2591
2592                    if count == 4 {
2593                        count = 0;
2594                        if b == 0 {
2595                            continue;
2596                        }
2597                        repeat = (b - 1) as usize;
2598                        output.push(last);
2599                    } else {
2600                        if b == last {
2601                            count += 1;
2602                        } else {
2603                            count = 1;
2604                            last = b;
2605                        }
2606                        output.push(b);
2607                    }
2608                }
2609            }
2610        }
2611
2612        Ok(output)
2613    }
2614}
2615
2616const META_CODES: [u32; 37] = [
2617    0x5d8, 0x058, 0x040, 0x0c0, 0x000, 0x078, 0x02b, 0x014, 0x00c, 0x01c, 0x01b, 0x00b, 0x010,
2618    0x020, 0x038, 0x018, 0x0d8, 0xbd8, 0x180, 0x680, 0x380, 0xf80, 0x780, 0x480, 0x080, 0x280,
2619    0x3d8, 0xfd8, 0x7d8, 0x9d8, 0x1d8, 0x004, 0x001, 0x002, 0x007, 0x003, 0x008,
2620];
2621const META_CODE_LENGTHS: [i32; 37] = [
2622    11, 8, 8, 8, 8, 7, 6, 5, 5, 5, 5, 6, 5, 6, 7, 7, 9, 12, 10, 11, 11, 12, 12, 11, 11, 11, 12, 12,
2623    12, 12, 12, 5, 2, 2, 3, 4, 5,
2624];
2625
2626const RANDOMIZATION_TABLE: [u16; 256] = [
2627    0xee, 0x56, 0xf8, 0xc3, 0x9d, 0x9f, 0xae, 0x2c, 0xad, 0xcd, 0x24, 0x9d, 0xa6, 0x101, 0x18,
2628    0xb9, 0xa1, 0x82, 0x75, 0xe9, 0x9f, 0x55, 0x66, 0x6a, 0x86, 0x71, 0xdc, 0x84, 0x56, 0x96, 0x56,
2629    0xa1, 0x84, 0x78, 0xb7, 0x32, 0x6a, 0x3, 0xe3, 0x2, 0x11, 0x101, 0x8, 0x44, 0x83, 0x100, 0x43,
2630    0xe3, 0x1c, 0xf0, 0x86, 0x6a, 0x6b, 0xf, 0x3, 0x2d, 0x86, 0x17, 0x7b, 0x10, 0xf6, 0x80, 0x78,
2631    0x7a, 0xa1, 0xe1, 0xef, 0x8c, 0xf6, 0x87, 0x4b, 0xa7, 0xe2, 0x77, 0xfa, 0xb8, 0x81, 0xee, 0x77,
2632    0xc0, 0x9d, 0x29, 0x20, 0x27, 0x71, 0x12, 0xe0, 0x6b, 0xd1, 0x7c, 0xa, 0x89, 0x7d, 0x87, 0xc4,
2633    0x101, 0xc1, 0x31, 0xaf, 0x38, 0x3, 0x68, 0x1b, 0x76, 0x79, 0x3f, 0xdb, 0xc7, 0x1b, 0x36, 0x7b,
2634    0xe2, 0x63, 0x81, 0xee, 0xc, 0x63, 0x8b, 0x78, 0x38, 0x97, 0x9b, 0xd7, 0x8f, 0xdd, 0xf2, 0xa3,
2635    0x77, 0x8c, 0xc3, 0x39, 0x20, 0xb3, 0x12, 0x11, 0xe, 0x17, 0x42, 0x80, 0x2c, 0xc4, 0x92, 0x59,
2636    0xc8, 0xdb, 0x40, 0x76, 0x64, 0xb4, 0x55, 0x1a, 0x9e, 0xfe, 0x5f, 0x6, 0x3c, 0x41, 0xef, 0xd4,
2637    0xaa, 0x98, 0x29, 0xcd, 0x1f, 0x2, 0xa8, 0x87, 0xd2, 0xa0, 0x93, 0x98, 0xef, 0xc, 0x43, 0xed,
2638    0x9d, 0xc2, 0xeb, 0x81, 0xe9, 0x64, 0x23, 0x68, 0x1e, 0x25, 0x57, 0xde, 0x9a, 0xcf, 0x7f, 0xe5,
2639    0xba, 0x41, 0xea, 0xea, 0x36, 0x1a, 0x28, 0x79, 0x20, 0x5e, 0x18, 0x4e, 0x7c, 0x8e, 0x58, 0x7a,
2640    0xef, 0x91, 0x2, 0x93, 0xbb, 0x56, 0xa1, 0x49, 0x1b, 0x79, 0x92, 0xf3, 0x58, 0x4f, 0x52, 0x9c,
2641    0x2, 0x77, 0xaf, 0x2a, 0x8f, 0x49, 0xd0, 0x99, 0x4d, 0x98, 0x101, 0x60, 0x93, 0x100, 0x75,
2642    0x31, 0xce, 0x49, 0x20, 0x56, 0x57, 0xe2, 0xf5, 0x26, 0x2b, 0x8a, 0xbf, 0xde, 0xd0, 0x83, 0x34,
2643    0xf4, 0x17,
2644];
2645
2646const OFFSET_CODE_SIZES: [usize; 5] = [11, 13, 14, 11, 11];
2647
2648const FIRST_CODE_LENGTHS: [&[i32]; 5] = [
2649    // FirstCodeLengths_1 from XADStuffIt13Handle.m
2650    &[
2651        4, 5, 7, 8, 8, 9, 9, 9, 9, 7, 9, 9, 9, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 9, 9, 10, 10, 9,
2652        10, 9, 9, 5, 9, 9, 9, 9, 10, 9, 9, 9, 9, 9, 9, 9, 9, 7, 9, 9, 8, 9, 9, 9, 9, 9, 9, 9, 9, 9,
2653        9, 9, 9, 9, 9, 9, 8, 9, 9, 8, 8, 9, 9, 9, 9, 9, 9, 9, 7, 8, 9, 7, 9, 9, 7, 7, 9, 9, 9, 9,
2654        10, 9, 10, 10, 10, 9, 9, 9, 5, 9, 8, 7, 5, 9, 8, 8, 7, 9, 9, 8, 8, 5, 5, 7, 10, 5, 8, 5, 8,
2655        9, 9, 9, 9, 9, 10, 9, 9, 10, 9, 9, 10, 10, 10, 10, 10, 10, 10, 9, 10, 10, 10, 10, 10, 10,
2656        10, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 10, 10, 10, 10, 10,
2657        10, 10, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 10, 10,
2658        10, 10, 10, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2659        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2660        10, 9, 9, 10, 10, 9, 10, 10, 10, 10, 10, 10, 10, 9, 10, 10, 10, 9, 10, 9, 5, 6, 5, 5, 8, 9,
2661        9, 9, 9, 9, 9, 10, 10, 10, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2662        10, 10, 10, 10, 10, 10, 9, 10, 9, 9, 9, 10, 9, 10, 9, 10, 9, 10, 9, 10, 10, 10, 9, 10, 9,
2663        10, 10, 9, 9, 9, 6, 9, 9, 10, 9, 5,
2664    ],
2665    &[
2666        4, 7, 7, 8, 7, 8, 8, 8, 8, 7, 8, 7, 8, 7, 9, 8, 8, 8, 9, 9, 9, 9, 10, 10, 9, 10, 10, 10,
2667        10, 10, 9, 9, 5, 9, 8, 9, 9, 11, 10, 9, 8, 9, 9, 9, 8, 9, 7, 8, 8, 8, 9, 9, 9, 9, 9, 10, 9,
2668        9, 9, 10, 9, 9, 10, 9, 8, 8, 7, 7, 7, 8, 8, 9, 8, 8, 9, 9, 8, 8, 7, 8, 7, 10, 8, 7, 7, 9,
2669        9, 9, 9, 10, 10, 11, 11, 11, 10, 9, 8, 6, 8, 7, 7, 5, 7, 7, 7, 6, 9, 8, 6, 7, 6, 6, 7, 9,
2670        6, 6, 6, 7, 8, 8, 8, 8, 9, 10, 9, 10, 9, 9, 8, 9, 10, 10, 9, 10, 10, 9, 9, 10, 10, 10, 10,
2671        10, 10, 10, 9, 10, 10, 11, 10, 10, 10, 10, 10, 10, 10, 11, 10, 11, 10, 10, 9, 11, 10, 10,
2672        10, 10, 10, 10, 9, 9, 10, 11, 10, 11, 10, 11, 10, 12, 10, 11, 10, 12, 11, 12, 10, 12, 10,
2673        11, 10, 11, 11, 11, 9, 10, 11, 11, 11, 12, 12, 10, 10, 10, 11, 11, 10, 11, 10, 10, 9, 11,
2674        10, 11, 10, 11, 11, 11, 10, 11, 11, 12, 11, 11, 10, 10, 10, 11, 10, 10, 11, 11, 12, 10, 10,
2675        11, 11, 12, 11, 11, 10, 11, 9, 12, 10, 11, 11, 11, 10, 11, 10, 11, 10, 11, 9, 10, 9, 7, 3,
2676        5, 6, 6, 7, 7, 8, 8, 8, 9, 9, 9, 11, 10, 10, 10, 12, 13, 11, 12, 12, 11, 13, 12, 12, 11,
2677        12, 12, 13, 12, 14, 13, 14, 13, 15, 13, 14, 15, 15, 14, 13, 15, 15, 14, 15, 14, 15, 15, 14,
2678        15, 13, 13, 14, 15, 15, 14, 14, 16, 16, 15, 15, 15, 12, 15, 10,
2679    ],
2680    &[
2681        6, 6, 6, 6, 6, 9, 8, 8, 4, 9, 8, 9, 8, 9, 9, 9, 8, 9, 9, 10, 8, 10, 10, 10, 9, 10, 10, 10,
2682        9, 10, 10, 9, 9, 9, 8, 10, 9, 10, 9, 10, 9, 10, 9, 10, 9, 9, 8, 9, 8, 9, 9, 9, 10, 10, 10,
2683        10, 9, 9, 9, 10, 9, 10, 9, 9, 7, 8, 8, 9, 8, 9, 9, 9, 8, 9, 9, 10, 9, 9, 8, 9, 8, 9, 8, 8,
2684        8, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 9, 8, 8, 9, 8, 9, 7, 8, 8, 9, 8, 10, 10, 8, 9, 8, 8,
2685        8, 10, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 10, 9, 7, 9, 9, 10, 10, 10, 10, 10, 9, 10,
2686        10, 10, 10, 10, 10, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 9, 10, 10, 10, 10, 10, 10, 9, 10,
2687        10, 10, 10, 10, 10, 10, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2688        10, 9, 10, 10, 10, 10, 9, 8, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 10, 10, 10, 9,
2689        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 9, 10, 10, 10, 10, 10, 10, 9,
2690        10, 10, 10, 10, 10, 10, 9, 9, 9, 10, 10, 10, 10, 10, 10, 9, 9, 10, 9, 9, 8, 9, 8, 9, 4, 6,
2691        6, 6, 7, 8, 8, 9, 9, 10, 10, 10, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10,
2692        10, 10, 10, 7, 10, 10, 10, 7, 10, 10, 7, 7, 7, 7, 7, 6, 7, 10, 7, 7, 10, 7, 7, 7, 6, 7, 6,
2693        6, 7, 7, 6, 6, 9, 6, 9, 10, 6, 10,
2694    ],
2695    &[
2696        2, 6, 6, 7, 7, 8, 7, 8, 7, 8, 8, 9, 8, 9, 9, 9, 8, 8, 9, 9, 9, 10, 10, 9, 8, 10, 9, 10, 9,
2697        10, 9, 9, 6, 9, 8, 9, 9, 10, 9, 9, 9, 10, 9, 9, 9, 9, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9,
2698        9, 9, 9, 10, 10, 9, 7, 7, 8, 8, 8, 8, 9, 9, 7, 8, 9, 10, 8, 8, 7, 8, 8, 10, 8, 8, 8, 9, 8,
2699        9, 9, 10, 9, 11, 10, 11, 9, 9, 8, 7, 9, 8, 8, 6, 8, 8, 8, 7, 10, 9, 7, 8, 7, 7, 8, 10, 7,
2700        7, 7, 8, 9, 9, 9, 9, 10, 11, 9, 11, 10, 9, 7, 9, 10, 10, 10, 11, 11, 10, 10, 11, 10, 10,
2701        10, 11, 11, 10, 9, 10, 10, 11, 10, 11, 10, 11, 10, 10, 10, 11, 10, 11, 10, 10, 9, 10, 10,
2702        11, 10, 11, 10, 11, 9, 10, 10, 10, 10, 11, 10, 11, 10, 11, 10, 11, 11, 11, 10, 12, 10, 11,
2703        10, 11, 10, 11, 11, 10, 8, 10, 10, 11, 10, 11, 11, 11, 10, 11, 10, 11, 10, 11, 11, 11, 9,
2704        10, 11, 11, 10, 11, 11, 11, 10, 11, 11, 11, 10, 10, 10, 10, 10, 11, 10, 10, 11, 11, 10, 10,
2705        9, 11, 10, 10, 11, 11, 10, 10, 10, 11, 10, 10, 10, 10, 10, 10, 9, 11, 10, 10, 8, 10, 8, 6,
2706        5, 6, 6, 7, 7, 8, 8, 8, 9, 10, 11, 10, 10, 11, 11, 12, 12, 10, 11, 12, 12, 12, 12, 13, 13,
2707        13, 13, 13, 12, 13, 13, 15, 14, 12, 14, 15, 16, 12, 12, 13, 15, 14, 16, 15, 17, 18, 15, 17,
2708        16, 15, 15, 15, 15, 13, 13, 10, 14, 12, 13, 17, 17, 18, 10, 17, 4,
2709    ],
2710    &[
2711        7, 9, 9, 9, 9, 9, 9, 9, 9, 8, 9, 9, 9, 7, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 9, 10, 9, 10, 9,
2712        10, 9, 9, 5, 9, 7, 9, 9, 9, 9, 9, 7, 7, 7, 9, 7, 7, 8, 7, 8, 8, 7, 7, 9, 9, 9, 9, 7, 7, 7,
2713        9, 9, 9, 9, 9, 9, 7, 9, 7, 7, 7, 7, 9, 9, 7, 9, 9, 7, 7, 7, 7, 7, 9, 7, 8, 7, 9, 9, 9, 9,
2714        9, 9, 9, 9, 9, 9, 9, 9, 7, 8, 7, 7, 7, 8, 8, 6, 7, 9, 7, 7, 8, 7, 5, 6, 9, 5, 7, 5, 6, 7,
2715        7, 9, 8, 9, 9, 9, 9, 9, 9, 9, 9, 10, 9, 10, 10, 10, 9, 9, 10, 10, 10, 10, 10, 10, 10, 9,
2716        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 10, 10, 10, 9, 10, 10, 10, 9, 9, 10, 9, 9,
2717        9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9,
2718        10, 10, 10, 9, 10, 10, 10, 9, 9, 9, 10, 10, 10, 10, 10, 9, 10, 9, 10, 10, 9, 10, 10, 9, 10,
2719        10, 10, 10, 10, 10, 10, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9,
2720        10, 10, 10, 10, 10, 10, 10, 9, 10, 9, 10, 9, 10, 10, 9, 5, 6, 8, 8, 7, 7, 7, 9, 9, 9, 9, 9,
2721        9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 10, 10, 10, 10, 10, 10, 10,
2722        10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 9, 10, 10, 5, 10, 8, 9, 8,
2723        9,
2724    ],
2725];
2726const SECOND_CODE_LENGTHS: [&[i32]; 5] = [
2727    &[
2728        4, 5, 6, 6, 7, 7, 6, 7, 7, 7, 6, 8, 7, 8, 8, 8, 8, 9, 6, 9, 8, 9, 8, 9, 9, 9, 8, 10, 5, 9,
2729        7, 9, 6, 9, 8, 10, 9, 10, 8, 8, 9, 9, 7, 9, 8, 9, 8, 9, 8, 8, 6, 9, 9, 8, 8, 9, 9, 10, 8,
2730        9, 9, 10, 8, 10, 8, 8, 8, 8, 8, 9, 7, 10, 6, 9, 9, 11, 7, 8, 8, 9, 8, 10, 7, 8, 6, 9, 10,
2731        9, 9, 10, 8, 11, 9, 11, 9, 10, 9, 8, 9, 8, 8, 8, 8, 10, 9, 9, 10, 10, 8, 9, 8, 8, 8, 11, 9,
2732        8, 8, 9, 9, 10, 8, 11, 10, 10, 8, 10, 9, 10, 8, 9, 9, 11, 9, 11, 9, 10, 10, 11, 10, 12, 9,
2733        12, 10, 11, 10, 11, 9, 10, 10, 11, 10, 11, 10, 11, 10, 11, 10, 10, 10, 9, 9, 9, 8, 7, 6, 8,
2734        11, 11, 9, 12, 10, 12, 9, 11, 11, 11, 10, 12, 11, 11, 10, 12, 10, 11, 10, 10, 10, 11, 10,
2735        11, 11, 11, 9, 12, 10, 12, 11, 12, 10, 11, 10, 12, 11, 12, 11, 12, 11, 12, 10, 12, 11, 12,
2736        11, 11, 10, 12, 10, 11, 10, 12, 10, 12, 10, 12, 10, 11, 11, 11, 10, 11, 11, 11, 10, 12, 11,
2737        12, 10, 10, 11, 11, 9, 12, 11, 12, 10, 11, 10, 12, 10, 11, 10, 12, 10, 11, 10, 7, 5, 4, 6,
2738        6, 7, 7, 7, 8, 8, 7, 7, 6, 8, 6, 7, 7, 9, 8, 9, 9, 10, 11, 11, 11, 12, 11, 10, 11, 12, 11,
2739        12, 11, 12, 12, 12, 12, 11, 12, 12, 11, 12, 11, 12, 11, 13, 11, 12, 10, 13, 10, 14, 14, 13,
2740        14, 15, 14, 16, 15, 15, 18, 18, 18, 9, 18, 8,
2741    ],
2742    &[
2743        5, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 8, 7, 8, 7, 7, 7, 8, 8, 8, 8, 9, 8, 9, 8, 9, 9, 9, 7, 9,
2744        8, 8, 6, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 9, 8, 8, 8, 8, 8, 9, 8, 9, 8, 9, 9, 10, 8, 10,
2745        8, 9, 9, 8, 8, 8, 7, 8, 8, 9, 8, 9, 7, 9, 8, 10, 8, 9, 8, 9, 8, 9, 8, 8, 8, 9, 9, 9, 9, 10,
2746        9, 11, 9, 10, 9, 10, 8, 8, 8, 9, 8, 8, 8, 9, 9, 8, 9, 10, 8, 9, 8, 8, 8, 11, 8, 7, 8, 9, 9,
2747        9, 9, 10, 9, 10, 9, 10, 9, 8, 8, 9, 9, 10, 9, 10, 9, 10, 8, 10, 9, 10, 9, 11, 10, 11, 9,
2748        11, 10, 10, 10, 11, 9, 11, 9, 10, 9, 11, 9, 11, 10, 10, 9, 10, 9, 9, 8, 10, 9, 11, 9, 9, 9,
2749        11, 10, 11, 9, 11, 9, 11, 9, 11, 10, 11, 10, 11, 10, 11, 9, 10, 10, 11, 10, 10, 8, 10, 9,
2750        10, 10, 11, 9, 11, 9, 10, 10, 11, 9, 10, 10, 9, 9, 10, 9, 10, 9, 10, 9, 10, 9, 11, 9, 11,
2751        10, 10, 9, 10, 9, 11, 9, 11, 9, 11, 9, 10, 9, 11, 9, 11, 9, 11, 9, 10, 8, 11, 9, 10, 9, 10,
2752        9, 10, 8, 10, 8, 9, 8, 9, 8, 7, 4, 4, 5, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 7, 8, 8, 9, 9, 10,
2753        10, 10, 10, 10, 10, 11, 11, 10, 10, 12, 11, 11, 12, 12, 11, 12, 12, 11, 12, 12, 12, 12, 12,
2754        12, 11, 12, 11, 13, 12, 13, 12, 13, 14, 14, 14, 15, 13, 14, 13, 14, 18, 18, 17, 7, 16, 9,
2755    ],
2756    &[
2757        5, 6, 6, 6, 6, 7, 7, 7, 6, 8, 7, 8, 7, 9, 8, 8, 7, 7, 8, 9, 9, 9, 9, 10, 8, 9, 9, 10, 8,
2758        10, 9, 8, 6, 10, 8, 10, 8, 10, 9, 9, 9, 9, 9, 10, 9, 9, 8, 9, 8, 9, 8, 9, 9, 10, 9, 10, 9,
2759        9, 8, 10, 9, 11, 10, 8, 8, 8, 8, 9, 7, 9, 9, 10, 8, 9, 8, 11, 9, 10, 9, 10, 8, 9, 9, 9, 9,
2760        8, 9, 9, 10, 10, 10, 12, 10, 11, 10, 10, 8, 9, 9, 9, 8, 9, 8, 8, 10, 9, 10, 11, 8, 10, 9,
2761        9, 8, 12, 8, 9, 9, 9, 9, 8, 9, 10, 9, 12, 10, 10, 10, 8, 7, 11, 10, 9, 10, 11, 9, 11, 7,
2762        11, 10, 12, 10, 12, 10, 11, 9, 11, 9, 12, 10, 12, 10, 12, 10, 9, 11, 12, 10, 12, 10, 11, 9,
2763        10, 9, 10, 9, 11, 11, 12, 9, 10, 8, 12, 11, 12, 9, 12, 10, 12, 10, 13, 10, 12, 10, 12, 10,
2764        12, 10, 9, 10, 12, 10, 9, 8, 11, 10, 12, 10, 12, 10, 12, 10, 11, 10, 12, 8, 12, 10, 11, 10,
2765        10, 10, 12, 9, 11, 10, 12, 10, 12, 11, 12, 10, 9, 10, 12, 9, 10, 10, 12, 10, 11, 10, 11,
2766        10, 12, 8, 12, 9, 12, 8, 12, 8, 11, 10, 11, 10, 11, 9, 10, 8, 10, 9, 9, 8, 9, 8, 7, 4, 3,
2767        5, 5, 6, 5, 6, 6, 7, 7, 8, 8, 8, 7, 7, 7, 9, 8, 9, 9, 11, 9, 11, 9, 8, 9, 9, 11, 12, 11,
2768        12, 12, 13, 13, 12, 13, 14, 13, 14, 13, 14, 13, 13, 13, 12, 13, 13, 12, 13, 13, 14, 14, 13,
2769        13, 14, 14, 14, 14, 15, 18, 17, 18, 8, 16, 10,
2770    ],
2771    &[
2772        4, 5, 6, 6, 6, 6, 7, 7, 6, 7, 7, 9, 6, 8, 8, 7, 7, 8, 8, 8, 6, 9, 8, 8, 7, 9, 8, 9, 8, 9,
2773        8, 9, 6, 9, 8, 9, 8, 10, 9, 9, 8, 10, 8, 10, 8, 9, 8, 9, 8, 8, 7, 9, 9, 9, 9, 9, 8, 10, 9,
2774        10, 9, 10, 9, 8, 7, 8, 9, 9, 8, 9, 9, 9, 7, 10, 9, 10, 9, 9, 8, 9, 8, 9, 8, 8, 8, 9, 9, 10,
2775        9, 9, 8, 11, 9, 11, 10, 10, 8, 8, 10, 8, 8, 9, 9, 9, 10, 9, 10, 11, 9, 9, 9, 9, 8, 9, 8, 8,
2776        8, 10, 10, 9, 9, 8, 10, 11, 10, 11, 11, 9, 8, 9, 10, 11, 9, 10, 11, 11, 9, 12, 10, 10, 10,
2777        12, 11, 11, 9, 11, 11, 12, 9, 11, 9, 10, 10, 10, 10, 12, 9, 11, 10, 11, 9, 11, 11, 11, 10,
2778        11, 11, 12, 9, 10, 10, 12, 11, 11, 10, 11, 9, 11, 10, 11, 10, 11, 9, 11, 11, 9, 8, 11, 10,
2779        11, 11, 10, 7, 12, 11, 11, 11, 11, 11, 12, 10, 12, 11, 13, 11, 10, 12, 11, 10, 11, 10, 11,
2780        10, 11, 10, 11, 10, 12, 11, 11, 10, 11, 10, 10, 10, 11, 10, 12, 11, 12, 10, 11, 9, 11, 10,
2781        11, 10, 11, 10, 12, 9, 11, 11, 11, 9, 11, 10, 10, 9, 11, 10, 10, 9, 10, 9, 7, 4, 5, 5, 5,
2782        6, 6, 7, 6, 8, 7, 8, 9, 9, 7, 8, 8, 10, 9, 10, 10, 12, 10, 11, 11, 11, 11, 10, 11, 12, 11,
2783        11, 11, 11, 11, 13, 12, 11, 12, 13, 12, 12, 12, 13, 11, 9, 12, 13, 7, 13, 11, 13, 11, 10,
2784        11, 13, 15, 15, 12, 14, 15, 15, 15, 6, 15, 5,
2785    ],
2786    &[
2787        8, 10, 11, 11, 11, 12, 11, 11, 12, 6, 11, 12, 10, 5, 12, 12, 12, 12, 12, 12, 12, 13, 13,
2788        14, 13, 13, 12, 13, 12, 13, 12, 15, 4, 10, 7, 9, 11, 11, 10, 9, 6, 7, 8, 9, 6, 7, 6, 7, 8,
2789        7, 7, 8, 8, 8, 8, 8, 8, 9, 8, 7, 10, 9, 10, 10, 11, 7, 8, 6, 7, 8, 8, 9, 8, 7, 10, 10, 8,
2790        7, 8, 8, 7, 10, 7, 6, 7, 9, 9, 8, 11, 11, 11, 10, 11, 11, 11, 8, 11, 6, 7, 6, 6, 6, 6, 8,
2791        7, 6, 10, 9, 6, 7, 6, 6, 7, 10, 6, 5, 6, 7, 7, 7, 10, 8, 11, 9, 13, 7, 14, 16, 12, 14, 14,
2792        15, 15, 16, 16, 14, 15, 15, 15, 15, 15, 15, 15, 15, 14, 15, 13, 14, 14, 16, 15, 17, 14, 17,
2793        15, 17, 12, 14, 13, 16, 12, 17, 13, 17, 14, 13, 13, 14, 14, 12, 13, 15, 15, 14, 15, 17, 14,
2794        17, 15, 14, 15, 16, 12, 16, 15, 14, 15, 16, 15, 16, 17, 17, 15, 15, 17, 17, 13, 14, 15, 15,
2795        13, 12, 16, 16, 17, 14, 15, 16, 15, 15, 13, 13, 15, 13, 16, 17, 15, 17, 17, 17, 16, 17, 14,
2796        17, 14, 16, 15, 17, 15, 15, 14, 17, 15, 17, 15, 16, 15, 15, 16, 16, 14, 17, 17, 15, 15, 16,
2797        15, 17, 15, 14, 16, 16, 16, 16, 16, 12, 4, 4, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9,
2798        9, 9, 10, 10, 10, 11, 10, 11, 11, 11, 11, 11, 12, 12, 12, 13, 13, 12, 13, 12, 14, 14, 12,
2799        13, 13, 13, 13, 14, 12, 13, 13, 14, 14, 14, 13, 14, 14, 15, 15, 13, 15, 13, 17, 17, 17, 9,
2800        17, 7,
2801    ],
2802];
2803const OFFSET_CODE_LENGTHS: [&[i32]; 5] = [
2804    &[5, 6, 3, 3, 3, 3, 3, 3, 3, 4, 6],
2805    &[5, 6, 4, 4, 3, 3, 3, 3, 3, 4, 4, 4, 6],
2806    &[6, 7, 4, 4, 3, 3, 3, 3, 3, 4, 4, 4, 5, 7],
2807    &[3, 6, 5, 4, 2, 3, 3, 3, 4, 4, 6],
2808    &[6, 7, 7, 6, 4, 3, 2, 2, 3, 3, 6],
2809];
2810
2811#[cfg(test)]
2812mod tests {
2813    use super::*;
2814
2815    /// Helper to create a simple text entry
2816    fn text_entry(name: &str, content: &[u8]) -> SitEntry {
2817        SitEntry {
2818            name: name.to_string(),
2819            data_fork: content.to_vec(),
2820            file_type: *b"TEXT",
2821            creator: *b"ttxt",
2822            ..Default::default()
2823        }
2824    }
2825
2826    #[test]
2827    fn test_crc16() {
2828        // Known CRC values for IBM CRC-16
2829        assert_eq!(crc16(b""), 0x0000);
2830        assert_eq!(crc16(b"123456789"), 0xBB3D);
2831        assert_eq!(crc16(b"Hello World\n"), 0x48FE);
2832    }
2833
2834    #[test]
2835    fn test_empty_archive() {
2836        let archive = SitArchive::new();
2837        let serialized = archive.serialize().expect("Should serialize");
2838        let parsed = SitArchive::parse(&serialized).expect("Should parse");
2839        assert_eq!(parsed.entries.len(), 0);
2840    }
2841
2842    #[test]
2843    fn test_single_file_uncompressed() {
2844        let mut archive = SitArchive::new();
2845        archive.add_entry(text_entry("hello.txt", b"Hello, World!"));
2846
2847        let serialized = archive.serialize().expect("Should serialize");
2848        let parsed = SitArchive::parse(&serialized).expect("Should parse");
2849
2850        assert_eq!(parsed.entries.len(), 1);
2851        assert_eq!(parsed.entries[0].name, "hello.txt");
2852        assert_eq!(parsed.entries[0].data_fork, b"Hello, World!");
2853        assert_eq!(parsed.entries[0].file_type, *b"TEXT");
2854    }
2855
2856    #[test]
2857    fn test_single_file_compressed() {
2858        let mut archive = SitArchive::new();
2859        let content = b"This text should compress well. ".repeat(50);
2860        archive.add_entry(text_entry("compress.txt", &content));
2861
2862        let serialized = archive.serialize_compressed().expect("Should serialize");
2863        let parsed = SitArchive::parse(&serialized).expect("Should parse");
2864
2865        assert_eq!(parsed.entries.len(), 1);
2866        assert_eq!(parsed.entries[0].name, "compress.txt");
2867        let (data, _rsrc) = parsed.entries[0].decompressed_forks().expect("Should decompress");
2868        assert_eq!(data, content);
2869
2870        // Verify compression actually happened (compressed smaller than uncompressed)
2871        let uncompressed = archive.serialize().expect("Should serialize uncompressed");
2872        assert!(
2873            serialized.len() < uncompressed.len(),
2874            "Compressed size ({}) should be less than uncompressed ({})",
2875            serialized.len(),
2876            uncompressed.len()
2877        );
2878    }
2879
2880    #[test]
2881    fn test_resource_fork() {
2882        let mut archive = SitArchive::new();
2883        let entry = SitEntry {
2884            name: "icon.rsrc".to_string(),
2885            data_fork: vec![],
2886            resource_fork: vec![0x00, 0x00, 0x01, 0x00, 0xDE, 0xAD, 0xBE, 0xEF],
2887            file_type: *b"rsrc",
2888            creator: *b"RSED",
2889            ..Default::default()
2890        };
2891        archive.add_entry(entry);
2892
2893        let serialized = archive.serialize().expect("Should serialize");
2894        let parsed = SitArchive::parse(&serialized).expect("Should parse");
2895
2896        assert_eq!(parsed.entries.len(), 1);
2897        assert_eq!(parsed.entries[0].resource_fork.len(), 8);
2898        assert_eq!(
2899            parsed.entries[0].resource_fork[4..8],
2900            [0xDE, 0xAD, 0xBE, 0xEF]
2901        );
2902    }
2903
2904    #[test]
2905    fn test_folder_structure() {
2906        let mut archive = SitArchive::new();
2907
2908        // Add a folder
2909        archive.add_entry(SitEntry {
2910            name: "my_folder".to_string(),
2911            is_folder: true,
2912            finder_flags: 0x0400, // Has custom icon
2913            ..Default::default()
2914        });
2915
2916        // Add files inside the folder
2917        archive.add_entry(text_entry("my_folder/readme.txt", b"Read me!"));
2918        archive.add_entry(text_entry("my_folder/data.bin", &[1, 2, 3, 4, 5]));
2919
2920        let serialized = archive.serialize().expect("Should serialize");
2921        let parsed = SitArchive::parse(&serialized).expect("Should parse");
2922
2923        assert_eq!(parsed.entries.len(), 3);
2924
2925        let folder = parsed
2926            .entries
2927            .iter()
2928            .find(|e| e.name == "my_folder")
2929            .unwrap();
2930        assert!(folder.is_folder);
2931        assert_eq!(folder.finder_flags, 0x0400);
2932
2933        let readme = parsed
2934            .entries
2935            .iter()
2936            .find(|e| e.name == "my_folder/readme.txt")
2937            .unwrap();
2938        assert!(!readme.is_folder);
2939        assert_eq!(readme.data_fork, b"Read me!");
2940    }
2941
2942    #[test]
2943    fn test_nested_folders() {
2944        let mut archive = SitArchive::new();
2945
2946        archive.add_entry(SitEntry {
2947            name: "a".to_string(),
2948            is_folder: true,
2949            ..Default::default()
2950        });
2951        archive.add_entry(SitEntry {
2952            name: "a/b".to_string(),
2953            is_folder: true,
2954            ..Default::default()
2955        });
2956        archive.add_entry(SitEntry {
2957            name: "a/b/c".to_string(),
2958            is_folder: true,
2959            ..Default::default()
2960        });
2961        archive.add_entry(text_entry("a/b/c/deep.txt", b"Deep file"));
2962
2963        let serialized = archive.serialize().expect("Should serialize");
2964        let parsed = SitArchive::parse(&serialized).expect("Should parse");
2965
2966        assert_eq!(parsed.entries.len(), 4);
2967        let deep = parsed
2968            .entries
2969            .iter()
2970            .find(|e| e.name == "a/b/c/deep.txt")
2971            .unwrap();
2972        assert_eq!(deep.data_fork, b"Deep file");
2973    }
2974
2975    #[test]
2976    fn test_finder_metadata() {
2977        let mut archive = SitArchive::new();
2978        let entry = SitEntry {
2979            name: "app".to_string(),
2980            data_fork: vec![0xCA, 0xFE, 0xBA, 0xBE],
2981            file_type: *b"APPL",
2982            creator: *b"CARO",
2983            finder_flags: 0x0100, // Has been inited
2984            ..Default::default()
2985        };
2986        archive.add_entry(entry);
2987
2988        let serialized = archive.serialize().expect("Should serialize");
2989        let parsed = SitArchive::parse(&serialized).expect("Should parse");
2990
2991        assert_eq!(parsed.entries[0].file_type, *b"APPL");
2992        assert_eq!(parsed.entries[0].creator, *b"CARO");
2993        assert_eq!(parsed.entries[0].finder_flags, 0x0100);
2994    }
2995
2996    #[test]
2997    fn test_multiple_files_roundtrip() {
2998        let mut archive = SitArchive::new();
2999
3000        // Add various types of content
3001        archive.add_entry(text_entry("small.txt", b"x"));
3002        archive.add_entry(text_entry("medium.txt", &vec![b'M'; 1000]));
3003        archive.add_entry(text_entry("large.txt", &vec![b'L'; 10000]));
3004        archive.add_entry(SitEntry {
3005            name: "binary.dat".to_string(),
3006            data_fork: (0..=255).collect(),
3007            file_type: *b"BINA",
3008            creator: *b"????",
3009            ..Default::default()
3010        });
3011
3012        let serialized = archive.serialize().expect("Should serialize");
3013        let parsed = SitArchive::parse(&serialized).expect("Should parse");
3014
3015        assert_eq!(parsed.entries.len(), 4);
3016
3017        // Sort for predictable comparison
3018        let mut orig: Vec<_> = archive.entries.iter().collect();
3019        let mut pars: Vec<_> = parsed.entries.iter().collect();
3020        orig.sort_by_key(|e| &e.name);
3021        pars.sort_by_key(|e| &e.name);
3022
3023        for (o, p) in orig.iter().zip(pars.iter()) {
3024            assert_eq!(o.name, p.name);
3025            assert_eq!(o.data_fork, p.data_fork);
3026            assert_eq!(o.file_type, p.file_type);
3027        }
3028    }
3029
3030    #[test]
3031    fn test_compressed_roundtrip() {
3032        let mut archive = SitArchive::new();
3033
3034        // Content with repetition that compresses well
3035        let repetitive = b"ABCDEFGH".repeat(1000);
3036        archive.add_entry(text_entry("repetitive.txt", &repetitive));
3037
3038        // Content with some randomness
3039        let mixed: Vec<u8> = (0..5000).map(|i| ((i * 17 + 31) % 256) as u8).collect();
3040        archive.add_entry(SitEntry {
3041            name: "mixed.bin".to_string(),
3042            data_fork: mixed.clone(),
3043            ..Default::default()
3044        });
3045
3046        let serialized = archive.serialize_compressed().expect("Should compress");
3047        let parsed = SitArchive::parse(&serialized).expect("Should decompress");
3048
3049        assert_eq!(parsed.entries.len(), 2);
3050
3051        let rep = parsed
3052            .entries
3053            .iter()
3054            .find(|e| e.name == "repetitive.txt")
3055            .unwrap();
3056        let (rep_data, _) = rep.decompressed_forks().expect("Should decompress");
3057        assert_eq!(rep_data, repetitive);
3058
3059        let mix = parsed
3060            .entries
3061            .iter()
3062            .find(|e| e.name == "mixed.bin")
3063            .unwrap();
3064        let (mix_data, _) = mix.decompressed_forks().expect("Should decompress");
3065        assert_eq!(mix_data, mixed);
3066    }
3067
3068    #[test]
3069    fn test_invalid_data() {
3070        // Too short to be valid
3071        let result = SitArchive::parse(b"short");
3072        assert!(result.is_err());
3073
3074        // Wrong signature but right length
3075        let bad_data = vec![0u8; 200];
3076        let result = SitArchive::parse(&bad_data);
3077        assert!(result.is_err());
3078    }
3079
3080    #[test]
3081    fn test_compression_method_13() {
3082        // Test that method 13 compression works correctly
3083        let data = b"Hello, this is a test of method 13 compression!";
3084        let compressed = compress_sit13(data);
3085
3086        // Verify it starts with the correct header byte (mode 1)
3087        assert_eq!(compressed[0] >> 4, 1);
3088
3089        // Decompress and verify
3090        let mut decoder = Sit13Decoder::new(&compressed);
3091        let decompressed = decoder.decompress(data.len()).expect("Should decompress");
3092        assert_eq!(decompressed, data);
3093    }
3094
3095    #[test]
3096    fn test_compression_with_matches() {
3097        // Data with repeated patterns that should trigger LZ77 matching
3098        let data = b"ABCDABCDABCDABCD1234ABCD5678ABCDABCD";
3099        let compressed = compress_sit13(data);
3100
3101        let mut decoder = Sit13Decoder::new(&compressed);
3102        let decompressed = decoder.decompress(data.len()).expect("Should decompress");
3103        assert_eq!(decompressed, data);
3104
3105        // Compressed should be smaller due to matches
3106        assert!(compressed.len() < data.len());
3107    }
3108}