1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
//! Snappy decompression for iWork IWA files
//!
//! iWork uses a custom Snappy framing format that differs from the standard:
//! - No stream identifier chunk
//! - No CRC-32C checksums
//! - Custom chunk header format (4 bytes: type + 24-bit length)
use std::io::{self, Read, Cursor};
use snap::raw::Decoder;
use crate::iwa::Error;
/// Custom Snappy stream decompressor for iWork IWA files
#[derive(Debug)]
pub struct SnappyStream {
decompressed: Vec<u8>,
}
impl SnappyStream {
/// Decompress an IWA file from a reader
///
/// iWork IWA files use a custom Snappy framing format:
/// - 4-byte header: [chunk_type, length_byte1, length_byte2, length_byte3]
/// - chunk_type is always 0 for compressed chunks
/// - length is a 24-bit little-endian integer
/// - No stream identifier, no CRC checksums
pub fn decompress<R: Read>(reader: &mut R) -> Result<Self, Error> {
let mut decompressed = Vec::new();
let mut decoder = Decoder::new();
loop {
// Read 4-byte header
let mut header = [0u8; 4];
match reader.read_exact(&mut header) {
Ok(_) => {}
Err(ref e) if e.kind() == io::ErrorKind::UnexpectedEof => {
// End of stream
break;
}
Err(e) => return Err(Error::Io(e)),
}
let chunk_type = header[0];
if chunk_type != 0 {
return Err(Error::Snappy(format!(
"Unexpected chunk type: {}, expected 0",
chunk_type
)));
}
// Extract 24-bit length (little-endian)
let length = u32::from_le_bytes([header[1], header[2], header[3], 0]);
if length == 0 {
continue;
}
// Read compressed chunk
let mut compressed = vec![0u8; length as usize];
reader.read_exact(&mut compressed)
.map_err(Error::Io)?;
let mut chunk_decompressed = Vec::new();
let mut buffer_size = 1024; // Start with 1KB
loop {
chunk_decompressed.resize(buffer_size, 0);
match decoder.decompress(&compressed, &mut chunk_decompressed) {
Ok(decompressed_size) => {
// Success - truncate to actual size and break
chunk_decompressed.truncate(decompressed_size);
break;
}
Err(_) if buffer_size < 10 * 1024 * 1024 => {
// Buffer too small, try with larger buffer (up to 10MB)
buffer_size *= 2;
continue;
}
Err(e) => {
return Err(Error::Snappy(format!("Decompression failed: {}", e)));
}
}
}
decompressed.extend(chunk_decompressed);
}
Ok(SnappyStream { decompressed })
}
/// Get the decompressed data as a slice
pub fn data(&self) -> &[u8] {
&self.decompressed
}
/// Get the decompressed data as a mutable slice
pub fn data_mut(&mut self) -> &mut [u8] {
&mut self.decompressed
}
/// Consume self and return the decompressed data
pub fn into_data(self) -> Vec<u8> {
self.decompressed
}
/// Create a reader for the decompressed data
pub fn reader(&self) -> Cursor<&[u8]> {
Cursor::new(&self.decompressed)
}
}
impl AsRef<[u8]> for SnappyStream {
fn as_ref(&self) -> &[u8] {
self.data()
}
}
#[cfg(test)]
mod tests {
use super::*;
use std::io::Cursor;
use std::fs::File;
use zip::ZipArchive;
#[test]
fn test_empty_stream() {
let empty_data = [];
let mut cursor = Cursor::new(&empty_data);
let result = SnappyStream::decompress(&mut cursor);
assert!(result.is_ok());
let stream = result.unwrap();
assert_eq!(stream.data().len(), 0);
}
#[test]
fn test_invalid_chunk_type() {
// Create a header with invalid chunk type (1 instead of 0)
let invalid_data = [1, 0, 0, 0]; // chunk_type=1, length=0
let mut cursor = Cursor::new(&invalid_data);
let result = SnappyStream::decompress(&mut cursor);
assert!(result.is_err());
match result.unwrap_err() {
Error::Snappy(msg) => assert!(msg.contains("Unexpected chunk type")),
_ => panic!("Expected Snappy error"),
}
}
#[test]
fn test_real_iwa_decompression() {
// Test decompression with real IWA files from test bundles
let test_files = vec!["test.pages", "test.numbers"];
for test_file in test_files {
if !std::path::Path::new(test_file).exists() {
continue; // Skip if test file doesn't exist
}
let file = File::open(test_file).expect("Failed to open test file");
let mut zip_archive = ZipArchive::new(file).expect("Failed to read zip archive");
// Find an IWA file to test with
for i in 0..zip_archive.len() {
let mut zip_file = zip_archive.by_index(i).expect("Failed to read zip entry");
if zip_file.name().ends_with(".iwa") {
let mut compressed_data = Vec::new();
zip_file.read_to_end(&mut compressed_data)
.expect("Failed to read IWA file");
let mut cursor = Cursor::new(&compressed_data);
let result = SnappyStream::decompress(&mut cursor);
assert!(result.is_ok(),
"Failed to decompress {} from {}: {:?}",
zip_file.name(), test_file, result.err());
let decompressed = result.unwrap();
assert!(!decompressed.data().is_empty(),
"Decompressed data should not be empty for {}", zip_file.name());
// Verify it's valid protobuf data (starts with a varint length)
let data = decompressed.data();
assert!(!data.is_empty(), "Decompressed data too small");
break; // Test with first IWA file found
}
}
}
}
}