1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
//! Sparse/RLE compression and decompression (StormLib compatible)
//!
//! This implements the exact sparse compression format used by StormLib and MPQ archives.
//! Based on StormLib's src/sparse/sparse.cpp implementation.
use crate::{Error, Result};
/// Decompress sparse/RLE compressed data (StormLib format)
pub(crate) fn decompress(data: &[u8], expected_size: usize) -> Result<Vec<u8>> {
// Don't decompress anything that is shorter than 5 bytes
if data.len() < 5 {
return Err(Error::compression(
"Sparse decompression: input too small (< 5 bytes)",
));
}
// Get the 32-bits from the input stream (big-endian)
let mut cb_out_buffer = 0u32;
cb_out_buffer |= (data[0] as u32) << 0x18;
cb_out_buffer |= (data[1] as u32) << 0x10;
cb_out_buffer |= (data[2] as u32) << 0x08;
cb_out_buffer |= data[3] as u32;
// Verify the size of the stream against the output buffer size
if cb_out_buffer as usize > expected_size {
return Err(Error::compression(
"Sparse decompression: stored size exceeds expected size",
));
}
let mut output = Vec::with_capacity(cb_out_buffer as usize);
let mut pos = 4; // Skip the size header
let mut cb_out_buffer_remaining = cb_out_buffer;
// Process the input buffer
while pos < data.len() {
// Get (next) byte from the stream
let one_byte = data[pos];
pos += 1;
// If highest bit, it means that normal data follow
if one_byte & 0x80 != 0 {
// Check the length of one chunk. Check for overflows
let mut cb_chunk_size = ((one_byte & 0x7F) + 1) as u32;
// Check for overflow like StormLib does
if pos + cb_chunk_size as usize > data.len() {
// StormLib returns 0 (failure) in this case
// Some MPQ files might have malformed sparse data
log::warn!(
"Sparse decompression: not enough data for copy (need {}, have {})",
cb_chunk_size,
data.len() - pos
);
// Try to recover by using available data
cb_chunk_size = (data.len() - pos) as u32;
if cb_chunk_size == 0 {
break; // No more data to process
}
}
// Copy the chunk. Make sure that the buffer won't overflow
cb_chunk_size = cb_chunk_size.min(cb_out_buffer_remaining);
output.extend_from_slice(&data[pos..pos + cb_chunk_size as usize]);
pos += cb_chunk_size as usize;
cb_out_buffer_remaining -= cb_chunk_size;
} else {
let mut cb_chunk_size = ((one_byte & 0x7F) + 3) as u32;
cb_chunk_size = cb_chunk_size.min(cb_out_buffer_remaining);
output.resize(output.len() + cb_chunk_size as usize, 0);
cb_out_buffer_remaining -= cb_chunk_size;
}
}
Ok(output)
}
/// Compress using sparse/RLE compression (StormLib format)
pub(crate) fn compress(data: &[u8]) -> Result<Vec<u8>> {
let cb_in_buffer = data.len();
// Reserve enough space for worst case (data doesn't compress)
let mut output = Vec::with_capacity(cb_in_buffer + 4 + (cb_in_buffer / 128) + 1);
// Put the original data length (in big-endian)
output.push((cb_in_buffer >> 0x18) as u8);
output.push((cb_in_buffer >> 0x10) as u8);
output.push((cb_in_buffer >> 0x08) as u8);
output.push(cb_in_buffer as u8);
let pb_in_buffer_end = data.len();
let mut pb_in_buffer = 0;
// If there is at least 3 bytes in the input buffer, do this loop
while pb_in_buffer < pb_in_buffer_end.saturating_sub(3) {
// Reset the zero count and frontal pointer
let mut pb_last_non_zero = pb_in_buffer;
let mut pb_in_buff_ptr = pb_in_buffer;
let mut number_of_zeros = 0usize;
if pb_in_buff_ptr < pb_in_buffer_end {
loop {
// Count number of zeros
if data[pb_in_buff_ptr] == 0 {
number_of_zeros += 1;
} else {
// Were there at least 3 zeros before? If yes, we need to flush the data
if number_of_zeros >= 3 {
break;
}
pb_last_non_zero = pb_in_buff_ptr + 1;
number_of_zeros = 0;
}
pb_in_buff_ptr += 1;
if pb_in_buff_ptr >= pb_in_buffer_end {
break;
}
}
}
// Get number of nonzeros that we found so far and flush them
let mut number_of_non_zeros = pb_last_non_zero - pb_in_buffer;
if number_of_non_zeros != 0 {
// Process blocks that are longer than 0x81 nonzero bytes
while number_of_non_zeros > 0x81 {
// Put marker that means "0x80 of nonzeros"
output.push(0xFF);
output.extend_from_slice(&data[pb_in_buffer..pb_in_buffer + 0x80]);
// Adjust counter of nonzeros and both pointers
number_of_non_zeros -= 0x80;
pb_in_buffer += 0x80;
}
// BUGBUG: The following code will be triggered if the NumberOfNonZeros
// was 0x81 before. It will copy just one byte. This seems like a bug to me,
// but since I want StormLib to be exact like Blizzard code is, I will keep
// it that way here
if number_of_non_zeros > 0x80 {
// Put marker that means "1 nonzero byte"
output.push(0x80);
output.push(data[pb_in_buffer]);
// Adjust counter of nonzeros and both pointers
number_of_non_zeros -= 1;
pb_in_buffer += 1;
}
// If there is 1 nonzero or more, put the block
if number_of_non_zeros >= 0x01 {
// Put marker that means "Several nonzero bytes"
output.push(0x80 | (number_of_non_zeros - 1) as u8);
output.extend_from_slice(&data[pb_in_buffer..pb_in_buffer + number_of_non_zeros]);
// Adjust pointers
pb_in_buffer += number_of_non_zeros;
}
}
// Now flush all zero bytes
while number_of_zeros > 0x85 {
// Put marker that means "0x82 zeros"
output.push(0x7F);
// Adjust zero counter and input pointer
number_of_zeros -= 0x82;
pb_in_buffer += 0x82;
}
// If we got more than 0x82 zeros, flush 3 of them now
if number_of_zeros > 0x82 {
// Put marker that means "0x03 zeros"
output.push(0);
// Adjust zero counter and input pointer
number_of_zeros -= 0x03;
pb_in_buffer += 0x03;
}
// Is there at least three zeros?
if number_of_zeros >= 3 {
// Put marker that means "Several zeros"
output.push((number_of_zeros - 3) as u8);
// Adjust pointer
pb_in_buffer += number_of_zeros;
}
}
// Flush last three bytes
if pb_in_buffer < pb_in_buffer_end {
let mut pb_in_buff_ptr = pb_in_buffer;
loop {
if pb_in_buff_ptr < pb_in_buffer_end && data[pb_in_buff_ptr] != 0 {
// Get number of bytes remaining
let number_of_non_zeros = pb_in_buffer_end - pb_in_buffer;
// Use the correct marker for the actual number of bytes
if number_of_non_zeros <= 0x80 {
output.push(0x80 | (number_of_non_zeros - 1) as u8);
} else {
// For larger chunks, use 0xFF
output.push(0xFF);
}
output.extend_from_slice(&data[pb_in_buffer..pb_in_buffer + number_of_non_zeros]);
break;
} else {
pb_in_buff_ptr += 1;
// Is there are more chars in the input buffer
if pb_in_buff_ptr < pb_in_buffer_end {
continue;
}
// Terminate with a chunk that means "0x82 of zeros"
output.push(0x7F);
break;
}
}
}
Ok(output)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_decompress() {
// Test StormLib sparse format
// Format: [4-byte size BE] [control bytes + data]
// Control byte: 0x80+ = literal data, 0x00-0x7F = zeros
let mut compressed = vec![];
// Size header (15 bytes, big-endian)
compressed.extend_from_slice(&[0x00, 0x00, 0x00, 0x0F]);
// "Hello" = 5 bytes literal
compressed.push(0x84); // 0x80 | (5-1)
compressed.extend_from_slice(b"Hello");
// 5 zeros
compressed.push(0x02); // 5-3 = 2
// "World" = 5 bytes literal
compressed.push(0x84); // 0x80 | (5-1)
compressed.extend_from_slice(b"World");
let decompressed = decompress(&compressed, 15).expect("Decompression failed");
let expected = b"Hello\0\0\0\0\0World";
assert_eq!(decompressed, expected);
}
#[test]
fn test_round_trip() {
let original = b"Hello\0\0\0\0\0World\0\0\0!!!";
let compressed = compress(original).expect("Compression failed");
println!(
"Original len: {}, Compressed: {:?}",
original.len(),
compressed
);
let decompressed = decompress(&compressed, original.len()).expect("Decompression failed");
assert_eq!(decompressed, original);
}
#[test]
fn test_all_zeros() {
let original = vec![0u8; 100];
let compressed = compress(&original).expect("Compression failed");
assert!(compressed.len() < original.len()); // Should compress well
let decompressed = decompress(&compressed, original.len()).expect("Decompression failed");
assert_eq!(decompressed, original);
}
}