blte 0.4.3

BLTE (Block Table Encoded) compression/decompression for World of Warcraft CASC files
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
//! Perfect archive recreation functionality

use crate::{BLTEFile, CompressionMode, Error, Result};

/// Detect original compression mode from BLTE file
pub fn detect_compression_mode(blte: &BLTEFile) -> Result<CompressionMode> {
    if blte.is_single_chunk() {
        // Single chunk - check first byte of chunk data
        if let Ok(chunk) = blte.get_chunk_data(0) {
            if !chunk.data.is_empty() {
                return CompressionMode::from_byte(chunk.data[0])
                    .ok_or_else(|| Error::InvalidHeaderSize(chunk.data[0] as u32));
            }
        }
    } else {
        // Multi-chunk - check first chunk's compression mode
        // All chunks in a BLTE file use the same compression mode
        if let Ok(chunk) = blte.get_chunk_data(0) {
            if !chunk.data.is_empty() {
                return CompressionMode::from_byte(chunk.data[0])
                    .ok_or_else(|| Error::InvalidHeaderSize(chunk.data[0] as u32));
            }
        }
    }

    // Fallback: if we can't determine, assume no compression
    // This shouldn't happen with valid BLTE files
    Ok(CompressionMode::None)
}

/// Analyze original chunk structure from BLTE file
pub fn analyze_chunk_structure(blte: &BLTEFile) -> Result<ChunkStructure> {
    if blte.is_single_chunk() {
        // Single chunk - need to get the decompressed size
        let chunk = blte.get_chunk_data(0)?;
        Ok(ChunkStructure::SingleChunk {
            decompressed_size: chunk.decompressed_size,
        })
    } else {
        // Multi-chunk - collect all chunk info
        let chunk_count = blte.chunk_count();
        let mut chunk_sizes = Vec::with_capacity(chunk_count);
        let mut compressed_sizes = Vec::with_capacity(chunk_count);

        for i in 0..chunk_count {
            let chunk = blte.get_chunk_data(i)?;
            chunk_sizes.push(chunk.decompressed_size);
            compressed_sizes.push(chunk.compressed_size);
        }

        Ok(ChunkStructure::MultiChunk {
            chunk_count,
            decompressed_sizes: chunk_sizes,
            compressed_sizes,
        })
    }
}

/// Detect original header format used
pub fn detect_header_format(blte: &BLTEFile) -> Result<HeaderFormat> {
    if blte.is_single_chunk() {
        return Ok(HeaderFormat::Standard); // Single chunk always uses standard
    }

    // For multi-chunk, determine format based on header size vs chunk table size
    let expected_chunk_table_size = 4 + (blte.chunk_count() * 24); // flags + chunk entries
    let header_size = blte.header.header_size as usize;

    if header_size == expected_chunk_table_size {
        // Standard format: header_size = chunk table size only
        Ok(HeaderFormat::Standard)
    } else if header_size == 8 + expected_chunk_table_size {
        // Archive format: header_size = 8 + chunk table size
        Ok(HeaderFormat::Archive)
    } else {
        // Use actual data offset to determine format
        let data_offset = blte.header.data_offset();
        if data_offset == 8 + header_size {
            Ok(HeaderFormat::Standard)
        } else if data_offset == header_size {
            Ok(HeaderFormat::Archive)
        } else {
            // Fallback to standard for unknown cases
            Ok(HeaderFormat::Standard)
        }
    }
}

/// Extract original checksums from BLTE file
pub fn extract_original_checksums(blte: &BLTEFile) -> Result<Vec<[u8; 16]>> {
    let mut checksums = Vec::new();

    if blte.is_single_chunk() {
        // Single chunk files don't have stored checksums
        checksums.push([0u8; 16]); // Zero checksum indicates no checksum
    } else {
        // Multi-chunk files have checksums in the chunk table
        for chunk_info in &blte.header.chunks {
            checksums.push(chunk_info.checksum);
        }
    }

    Ok(checksums)
}

/// Extract original compressed sizes
pub fn extract_compressed_sizes(blte: &BLTEFile) -> Result<Vec<u32>> {
    let mut sizes = Vec::new();

    if blte.is_single_chunk() {
        let chunk = blte.get_chunk_data(0)?;
        sizes.push(chunk.compressed_size);
    } else {
        for chunk_info in &blte.header.chunks {
            sizes.push(chunk_info.compressed_size);
        }
    }

    Ok(sizes)
}

/// Original chunk structure information
#[derive(Debug, Clone)]
pub enum ChunkStructure {
    SingleChunk {
        decompressed_size: u32,
    },
    MultiChunk {
        chunk_count: usize,
        decompressed_sizes: Vec<u32>,
        compressed_sizes: Vec<u32>,
    },
}

/// Original header format
#[derive(Debug, Clone, PartialEq)]
pub enum HeaderFormat {
    /// Standard format: data_offset = 8 + header_size
    Standard,
    /// Archive format: data_offset = header_size
    Archive,
}

/// Complete metadata for perfect file recreation
#[derive(Debug, Clone)]
pub struct OriginalFileMetadata {
    /// Original compression mode detected
    pub compression_mode: CompressionMode,
    /// Original chunk structure
    pub chunk_structure: ChunkStructure,
    /// Original header format (standard vs archive)
    pub header_format: HeaderFormat,
    /// Original checksums for verification
    pub checksums: Vec<[u8; 16]>,
    /// Original compressed sizes
    pub compressed_sizes: Vec<u32>,
    /// Original file offset in archive
    pub original_offset: usize,
    /// Original total BLTE file size
    pub original_size: usize,
}

/// Extracted file with complete metadata for recreation
#[derive(Debug, Clone)]
pub struct ExtractedFile {
    /// Original file index in archive
    pub original_index: usize,
    /// Decompressed file data
    pub data: Vec<u8>,
    /// Original metadata for perfect recreation
    pub metadata: OriginalFileMetadata,
}

impl ExtractedFile {
    /// Create extracted file from archive entry
    pub fn from_blte(
        index: usize,
        blte: &BLTEFile,
        original_offset: usize,
        original_size: usize,
    ) -> Result<Self> {
        // Detect all original metadata
        let compression_mode = detect_compression_mode(blte)?;
        let chunk_structure = analyze_chunk_structure(blte)?;
        let header_format = detect_header_format(blte)?;
        let checksums = extract_original_checksums(blte)?;
        let compressed_sizes = extract_compressed_sizes(blte)?;

        // Decompress the data
        let data = crate::decompress_blte(blte.raw_data(), None)?;

        Ok(ExtractedFile {
            original_index: index,
            data,
            metadata: OriginalFileMetadata {
                compression_mode,
                chunk_structure,
                header_format,
                checksums,
                compressed_sizes,
                original_offset,
                original_size,
            },
        })
    }

    /// Recreate perfect BLTE file from this extracted file
    pub fn recreate_perfect_blte(&self) -> Result<Vec<u8>> {
        recreate_perfect_blte_file(self)
    }
}

/// Recreate perfect BLTE file from extracted file with complete metadata preservation
pub fn recreate_perfect_blte_file(file: &ExtractedFile) -> Result<Vec<u8>> {
    let meta = &file.metadata;

    // Step 1: Recompress data using original compression mode and chunk structure
    let recompressed_blte = match &meta.chunk_structure {
        ChunkStructure::SingleChunk { .. } => {
            // Single chunk recreation
            crate::compress_data_single(file.data.clone(), meta.compression_mode, None)?
        }
        ChunkStructure::MultiChunk { .. } => {
            // Multi-chunk recreation with original chunk boundaries
            recreate_multichunk_blte(file)?
        }
    };

    // Step 2: Parse recreated BLTE and verify it matches expected structure
    let recreated = BLTEFile::parse(recompressed_blte)?;

    // Step 3: Verify critical properties match original
    verify_recreated_blte(&recreated, meta)?;

    // Step 4: Apply header format corrections if needed
    let final_blte = apply_header_format_corrections(recreated.raw_data(), meta)?;

    Ok(final_blte)
}

/// Recreate multi-chunk BLTE with original chunk boundaries
fn recreate_multichunk_blte(file: &ExtractedFile) -> Result<Vec<u8>> {
    let meta = &file.metadata;

    if let ChunkStructure::MultiChunk {
        decompressed_sizes, ..
    } = &meta.chunk_structure
    {
        // For perfect recreation, we need to compress the data in the same chunks
        // as the original. However, our current API doesn't support exact chunk boundaries.

        // For now, use the average chunk size approach and verify checksums match
        let total_decompressed: u32 = decompressed_sizes.iter().sum();
        let avg_chunk_size = if !decompressed_sizes.is_empty() {
            total_decompressed / decompressed_sizes.len() as u32
        } else {
            64 * 1024 // Default 64KB chunks
        };

        crate::compress_data_multi(
            file.data.clone(),
            avg_chunk_size as usize,
            meta.compression_mode,
            None,
        )
    } else {
        Err(crate::Error::InvalidChunkCount(0))
    }
}

/// Verify recreated BLTE matches original metadata
fn verify_recreated_blte(recreated: &BLTEFile, original_meta: &OriginalFileMetadata) -> Result<()> {
    // Verify compression mode
    let recreated_mode = detect_compression_mode(recreated)?;
    if recreated_mode != original_meta.compression_mode {
        tracing::warn!(
            "Compression mode mismatch: expected {:?}, got {:?}",
            original_meta.compression_mode,
            recreated_mode
        );
        // Not a fatal error, but log it
    }

    // Verify chunk count
    let expected_chunk_count = match &original_meta.chunk_structure {
        ChunkStructure::SingleChunk { .. } => 1,
        ChunkStructure::MultiChunk { chunk_count, .. } => *chunk_count,
    };

    if recreated.chunk_count() != expected_chunk_count {
        tracing::warn!(
            "Chunk count mismatch: expected {}, got {}",
            expected_chunk_count,
            recreated.chunk_count()
        );
        // Continue - chunk count differences are acceptable for now
    }

    // Verify header format detection
    let recreated_format = detect_header_format(recreated)?;
    if recreated_format != original_meta.header_format {
        tracing::warn!(
            "Header format mismatch: expected {:?}, got {:?}",
            original_meta.header_format,
            recreated_format
        );
        // Will be corrected in apply_header_format_corrections
    }

    Ok(())
}

/// Apply header format corrections to match original format
fn apply_header_format_corrections(
    blte_data: Vec<u8>,
    meta: &OriginalFileMetadata,
) -> Result<Vec<u8>> {
    // Parse the BLTE to understand current structure
    let blte = BLTEFile::parse(blte_data.clone())?;
    let current_format = detect_header_format(&blte)?;

    // If formats match, no correction needed
    if current_format == meta.header_format {
        return Ok(blte_data);
    }

    // For now, return as-is. Full header format correction would require
    // rebuilding the header with the correct offset calculation.
    // This is a complex operation that we can implement if needed.
    tracing::debug!(
        "Header format correction needed: {:?} -> {:?} (not implemented yet)",
        current_format,
        meta.header_format
    );

    Ok(blte_data)
}

/// Perfect archive builder that creates exact recreations
#[derive(Debug)]
pub struct PerfectArchiveBuilder {
    /// Files to include with preserved metadata
    files: Vec<ExtractedFile>,
    /// Target total archive size
    target_size: usize,
    /// Current size tracking
    current_size: usize,
}

impl PerfectArchiveBuilder {
    /// Create builder for perfect recreation
    pub fn new() -> Self {
        Self {
            files: Vec::new(),
            target_size: 256 * 1024 * 1024, // 256MB
            current_size: 0,
        }
    }

    /// Set target archive size
    pub fn target_size(mut self, size: usize) -> Self {
        self.target_size = size;
        self
    }

    /// Add extracted file preserving all metadata
    pub fn add_extracted_file(&mut self, file: ExtractedFile) -> Result<bool> {
        // Calculate what the recreated BLTE size will be
        let estimated_size = estimate_recreated_size(&file)?;

        if self.current_size + estimated_size > self.target_size {
            return Ok(false); // Archive would be too large
        }

        self.current_size += estimated_size;
        self.files.push(file);
        Ok(true) // Successfully added
    }

    /// Build perfect archive with zero gaps, maintaining exact file order
    pub fn build_perfect(mut self) -> Result<Vec<u8>> {
        println!(
            "Building perfect archive from {} files...",
            self.files.len()
        );

        // Sort files by original index to maintain exact order
        self.files.sort_by_key(|f| f.original_index);

        let mut archive = Vec::with_capacity(self.current_size);
        let mut current_offset = 0;

        for (i, file) in self.files.iter().enumerate() {
            if i % 1000 == 0 {
                println!("  Recreating file {}/{}", i, self.files.len());
            }

            // Recreate perfect BLTE file
            let recreated_blte = file.recreate_perfect_blte()?;

            // Verify offset matches expected position (for validation)
            if current_offset != file.metadata.original_offset {
                tracing::warn!(
                    "Offset mismatch for file {}: expected {}, got {}",
                    i,
                    file.metadata.original_offset,
                    current_offset
                );
            }

            // Add to archive with zero gaps
            archive.extend_from_slice(&recreated_blte);
            current_offset += recreated_blte.len();
        }

        println!("Perfect archive built: {} bytes", archive.len());
        Ok(archive)
    }

    /// Get current file count
    pub fn file_count(&self) -> usize {
        self.files.len()
    }

    /// Get estimated current size
    pub fn current_size(&self) -> usize {
        self.current_size
    }
}

impl Default for PerfectArchiveBuilder {
    fn default() -> Self {
        Self::new()
    }
}

/// Estimate the size of recreated BLTE file
fn estimate_recreated_size(file: &ExtractedFile) -> Result<usize> {
    // Conservative estimate: assume similar compression ratio as original
    let original_ratio = if !file.data.is_empty() {
        file.metadata.original_size as f64 / file.data.len() as f64
    } else {
        1.0
    };

    // Estimate recreated size with some buffer for header differences
    let estimated = (file.data.len() as f64 * original_ratio * 1.1) as usize; // 10% buffer
    Ok(estimated.max(file.metadata.original_size)) // At least as large as original
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::{BLTEFile, compress_data_single};

    #[test]
    fn test_compression_mode_detection() {
        // Test with ZLib compression
        let test_data = b"Hello, BLTE world!".to_vec();
        let compressed =
            compress_data_single(test_data.clone(), CompressionMode::ZLib, None).unwrap();
        let blte = BLTEFile::parse(compressed).unwrap();

        let detected = detect_compression_mode(&blte).unwrap();
        assert_eq!(detected, CompressionMode::ZLib);
    }

    #[test]
    fn test_chunk_structure_analysis() {
        // Test single chunk
        let test_data = b"Hello, BLTE world!".to_vec();
        let compressed =
            compress_data_single(test_data.clone(), CompressionMode::None, None).unwrap();
        let blte = BLTEFile::parse(compressed).unwrap();

        let structure = analyze_chunk_structure(&blte).unwrap();
        match structure {
            ChunkStructure::SingleChunk { decompressed_size } => {
                // Single chunk files don't store decompressed size, so it's 0
                assert_eq!(decompressed_size, 0);
            }
            _ => panic!("Expected single chunk structure"),
        }
    }

    #[test]
    fn test_header_format_detection() {
        let test_data = b"Hello, BLTE world!".to_vec();
        let compressed = compress_data_single(test_data, CompressionMode::None, None).unwrap();
        let blte = BLTEFile::parse(compressed).unwrap();

        let format = detect_header_format(&blte).unwrap();
        assert_eq!(format, HeaderFormat::Standard); // Single chunk always standard
    }
}