casc_storage/manifest/
tact_integration.rs

1//! TACT manifest integration implementation
2
3use crate::error::{CascError, Result};
4use crate::types::EKey;
5use parking_lot::RwLock;
6use std::collections::HashMap;
7use std::io::{Cursor, Read};
8use std::path::Path;
9use std::sync::Arc;
10use tact_parser::{
11    encoding::EncodingFile,
12    wow_root::{ContentFlags, LocaleFlags, WowRoot},
13};
14use tracing::{debug, info};
15
16/// Configuration for manifest loading
17#[derive(Debug, Clone)]
18pub struct ManifestConfig {
19    /// Locale to use for filtering files
20    pub locale: LocaleFlags,
21    /// Content flags to require (e.g., Windows, x86_64)
22    pub content_flags: Option<ContentFlags>,
23    /// Whether to cache manifests in memory
24    pub cache_manifests: bool,
25    /// Enable lazy loading for large manifest sections
26    pub lazy_loading: bool,
27    /// Maximum number of entries to cache when lazy loading
28    pub lazy_cache_limit: usize,
29}
30
31impl Default for ManifestConfig {
32    fn default() -> Self {
33        Self {
34            locale: LocaleFlags::any_locale(),
35            content_flags: None,
36            cache_manifests: true,
37            lazy_loading: true,
38            lazy_cache_limit: 10_000,
39        }
40    }
41}
42
43/// Represents a file mapping from FileDataID to EKey
44#[derive(Debug, Clone)]
45pub struct FileMapping {
46    /// FileDataID (game's internal file identifier)
47    pub file_data_id: u32,
48    /// Content key (MD5 hash from root manifest)
49    pub content_key: [u8; 16],
50    /// Encoding key (from encoding manifest)
51    pub encoding_key: Option<EKey>,
52    /// Content flags for this file
53    pub flags: Option<ContentFlags>,
54}
55
56/// Lazy-loaded root manifest for memory efficiency
57#[allow(dead_code)] // Infrastructure for future full lazy loading implementation
58struct LazyRootManifest {
59    /// Raw decompressed data
60    data: Vec<u8>,
61    /// Partial cache of FileDataID mappings
62    fdid_cache: HashMap<
63        u32,
64        std::collections::BTreeMap<tact_parser::wow_root::LocaleContentFlags, [u8; 16]>,
65    >,
66    /// Filename hash cache
67    hash_cache: HashMap<u64, u32>,
68    /// Configuration
69    config: ManifestConfig,
70    /// Approximate file count for memory management
71    approx_file_count: u32,
72}
73
74/// Lazy-loaded encoding manifest for memory efficiency
75#[allow(dead_code)] // Infrastructure for future full lazy loading implementation
76struct LazyEncodingManifest {
77    /// Raw decompressed data
78    data: Vec<u8>,
79    /// Partial cache of CKey mappings
80    ckey_cache: HashMap<Vec<u8>, tact_parser::encoding::EncodingEntry>,
81    /// EKey to CKey reverse mapping cache
82    ekey_cache: HashMap<Vec<u8>, Vec<u8>>,
83    /// Maximum cache size
84    cache_limit: usize,
85}
86
87/// Manages TACT manifests and their integration with CASC storage
88pub struct TactManifests {
89    /// Configuration
90    config: ManifestConfig,
91
92    /// Root manifest (FileDataID -> CKey)
93    root: Arc<RwLock<Option<WowRoot>>>,
94
95    /// Lazy root manifest for memory efficiency
96    lazy_root: Arc<RwLock<Option<LazyRootManifest>>>,
97
98    /// Encoding manifest (CKey -> EKey)
99    encoding: Arc<RwLock<Option<EncodingFile>>>,
100
101    /// Lazy encoding manifest for memory efficiency
102    lazy_encoding: Arc<RwLock<Option<LazyEncodingManifest>>>,
103
104    /// Cached FileDataID -> EKey mappings
105    fdid_cache: Arc<RwLock<HashMap<u32, FileMapping>>>,
106
107    /// Cached filename -> FileDataID mappings (from listfile)
108    filename_cache: Arc<RwLock<HashMap<String, u32>>>,
109}
110
111impl TactManifests {
112    /// Create a new TACT manifest manager
113    pub fn new(config: ManifestConfig) -> Self {
114        Self {
115            config,
116            root: Arc::new(RwLock::new(None)),
117            lazy_root: Arc::new(RwLock::new(None)),
118            encoding: Arc::new(RwLock::new(None)),
119            lazy_encoding: Arc::new(RwLock::new(None)),
120            fdid_cache: Arc::new(RwLock::new(HashMap::new())),
121            filename_cache: Arc::new(RwLock::new(HashMap::new())),
122        }
123    }
124
125    /// Load root manifest from raw data
126    pub fn load_root_from_data(&self, data: Vec<u8>) -> Result<()> {
127        info!("Loading root manifest from data ({} bytes)", data.len());
128
129        if self.config.lazy_loading {
130            return self.load_root_lazy(data);
131        }
132
133        // Check if data is BLTE compressed
134        let decompressed = if data.starts_with(b"BLTE") {
135            debug!("Root manifest is BLTE compressed, decompressing with streaming");
136            use std::io::{Cursor, Read};
137            let cursor = Cursor::new(data);
138            let mut stream = blte::create_streaming_reader(cursor, None)
139                .map_err(|e| CascError::DecompressionError(e.to_string()))?;
140
141            let mut result = Vec::new();
142            stream
143                .read_to_end(&mut result)
144                .map_err(|e| CascError::DecompressionError(e.to_string()))?;
145            result
146        } else {
147            data
148        };
149
150        // Parse root manifest
151        let mut cursor = Cursor::new(decompressed);
152        let root = WowRoot::parse(&mut cursor, self.config.locale)
153            .map_err(|e| CascError::InvalidFormat(format!("Failed to parse root: {e}")))?;
154
155        info!(
156            "Loaded root manifest: {} FileDataIDs, {} name hashes",
157            root.fid_md5.len(),
158            root.name_hash_fid.len()
159        );
160
161        // Store in cache
162        *self.root.write() = Some(root);
163
164        // Clear FileDataID cache as it's now outdated
165        self.fdid_cache.write().clear();
166
167        Ok(())
168    }
169
170    /// Load root manifest with lazy loading (memory efficient)
171    fn load_root_lazy(&self, data: Vec<u8>) -> Result<()> {
172        info!(
173            "Loading root manifest with lazy loading ({} bytes)",
174            data.len()
175        );
176
177        // Decompress if needed
178        let decompressed = if data.starts_with(b"BLTE") {
179            debug!("Root manifest is BLTE compressed, decompressing");
180            use std::io::{Cursor, Read};
181            let cursor = Cursor::new(&data);
182            let mut stream = blte::create_streaming_reader(cursor, None)
183                .map_err(|e| CascError::DecompressionError(e.to_string()))?;
184
185            let mut result = Vec::new();
186            stream
187                .read_to_end(&mut result)
188                .map_err(|e| CascError::DecompressionError(e.to_string()))?;
189            result
190        } else {
191            data
192        };
193
194        // Parse just the header to get an estimate
195        let mut cursor = Cursor::new(&decompressed);
196        let header = tact_parser::wow_root::WowRootHeader::parse(&mut cursor)
197            .map_err(|e| CascError::InvalidFormat(format!("Failed to parse root header: {e}")))?;
198
199        info!(
200            "Parsed root header for lazy loading: {} total files, {} named files",
201            header.total_file_count, header.named_file_count
202        );
203
204        // Create lazy manifest
205        let lazy_manifest = LazyRootManifest {
206            data: decompressed,
207            fdid_cache: HashMap::new(),
208            hash_cache: HashMap::new(),
209            config: self.config.clone(),
210            approx_file_count: header.total_file_count,
211        };
212
213        // Store lazy manifest
214        *self.lazy_root.write() = Some(lazy_manifest);
215
216        // Clear caches
217        self.fdid_cache.write().clear();
218        *self.root.write() = None; // Clear fully loaded version if any
219
220        Ok(())
221    }
222
223    /// Load encoding manifest from raw data
224    pub fn load_encoding_from_data(&self, data: Vec<u8>) -> Result<()> {
225        info!("Loading encoding manifest from data ({} bytes)", data.len());
226
227        if self.config.lazy_loading {
228            return self.load_encoding_lazy(data);
229        }
230
231        // Check if data is BLTE compressed
232        let decompressed = if data.starts_with(b"BLTE") {
233            debug!("Encoding manifest is BLTE compressed, decompressing with streaming");
234            use std::io::{Cursor, Read};
235            let cursor = Cursor::new(data);
236            let mut stream = blte::create_streaming_reader(cursor, None)
237                .map_err(|e| CascError::DecompressionError(e.to_string()))?;
238
239            let mut result = Vec::new();
240            stream
241                .read_to_end(&mut result)
242                .map_err(|e| CascError::DecompressionError(e.to_string()))?;
243            result
244        } else {
245            data
246        };
247
248        // Parse encoding manifest
249        let encoding = EncodingFile::parse(&decompressed)
250            .map_err(|e| CascError::InvalidFormat(format!("Failed to parse encoding: {e}")))?;
251
252        info!(
253            "Loaded encoding manifest: {} CKey entries",
254            encoding.ckey_count()
255        );
256
257        // Store in cache
258        *self.encoding.write() = Some(encoding);
259
260        // Clear FileDataID cache as it's now outdated
261        self.fdid_cache.write().clear();
262
263        Ok(())
264    }
265
266    /// Load encoding manifest with lazy loading (memory efficient)
267    fn load_encoding_lazy(&self, data: Vec<u8>) -> Result<()> {
268        info!(
269            "Loading encoding manifest with lazy loading ({} bytes)",
270            data.len()
271        );
272
273        // Decompress if needed
274        let decompressed = if data.starts_with(b"BLTE") {
275            debug!("Encoding manifest is BLTE compressed, decompressing");
276            use std::io::{Cursor, Read};
277            let cursor = Cursor::new(&data);
278            let mut stream = blte::create_streaming_reader(cursor, None)
279                .map_err(|e| CascError::DecompressionError(e.to_string()))?;
280
281            let mut result = Vec::new();
282            stream
283                .read_to_end(&mut result)
284                .map_err(|e| CascError::DecompressionError(e.to_string()))?;
285            result
286        } else {
287            data
288        };
289
290        info!(
291            "Stored encoding manifest data for lazy loading ({} bytes)",
292            decompressed.len()
293        );
294
295        // Create lazy manifest
296        let lazy_manifest = LazyEncodingManifest {
297            data: decompressed,
298            ckey_cache: HashMap::new(),
299            ekey_cache: HashMap::new(),
300            cache_limit: self.config.lazy_cache_limit,
301        };
302
303        // Store lazy manifest
304        *self.lazy_encoding.write() = Some(lazy_manifest);
305
306        // Clear caches
307        self.fdid_cache.write().clear();
308        *self.encoding.write() = None; // Clear fully loaded version if any
309
310        Ok(())
311    }
312
313    /// Load root manifest from streaming reader (memory-efficient)
314    pub fn load_root_from_reader<R: std::io::Read + std::io::Seek>(
315        &self,
316        mut reader: R,
317    ) -> Result<()> {
318        info!("Loading root manifest from streaming reader");
319
320        // Check if data is BLTE compressed by reading magic
321        let mut magic = [0u8; 4];
322        reader.read_exact(&mut magic)?;
323
324        let root = if &magic == b"BLTE" {
325            debug!("Root manifest is BLTE compressed, decompressing with streaming");
326            // Seek back to beginning for BLTE parser
327            reader.seek(std::io::SeekFrom::Start(0))?;
328
329            // Create streaming BLTE reader
330            let mut blte_stream = blte::create_streaming_reader(reader, None)
331                .map_err(|e| CascError::DecompressionError(e.to_string()))?;
332
333            // Root parser needs Read+Seek, so we need to decompress to memory first
334            // This is still better than loading the compressed file entirely into memory
335            let mut decompressed = Vec::new();
336            blte_stream
337                .read_to_end(&mut decompressed)
338                .map_err(|e| CascError::DecompressionError(e.to_string()))?;
339
340            // Parse from decompressed data
341            let mut cursor = Cursor::new(decompressed);
342            WowRoot::parse(&mut cursor, self.config.locale)
343                .map_err(|e| CascError::InvalidFormat(format!("Failed to parse root: {e}")))?
344        } else {
345            debug!("Root manifest is uncompressed, parsing directly");
346            // Seek back to beginning for direct parsing
347            reader.seek(std::io::SeekFrom::Start(0))?;
348
349            // Parse directly from reader
350            WowRoot::parse(&mut reader, self.config.locale)
351                .map_err(|e| CascError::InvalidFormat(format!("Failed to parse root: {e}")))?
352        };
353
354        info!(
355            "Loaded root manifest: {} FileDataIDs, {} name hashes",
356            root.fid_md5.len(),
357            root.name_hash_fid.len()
358        );
359
360        // Store in cache
361        *self.root.write() = Some(root);
362
363        // Clear FileDataID cache as it's now outdated
364        self.fdid_cache.write().clear();
365
366        Ok(())
367    }
368
369    /// Load encoding manifest from streaming reader (memory-efficient)
370    pub fn load_encoding_from_reader<R: std::io::Read + std::io::Seek>(
371        &self,
372        mut reader: R,
373    ) -> Result<()> {
374        info!("Loading encoding manifest from streaming reader");
375
376        // Check if data is BLTE compressed by reading magic
377        let mut magic = [0u8; 4];
378        reader.read_exact(&mut magic)?;
379
380        let encoding = if &magic == b"BLTE" {
381            debug!("Encoding manifest is BLTE compressed, decompressing with streaming");
382            // Seek back to beginning for BLTE parser
383            reader.seek(std::io::SeekFrom::Start(0))?;
384
385            // Create streaming BLTE reader
386            let mut blte_stream = blte::create_streaming_reader(reader, None)
387                .map_err(|e| CascError::DecompressionError(e.to_string()))?;
388
389            // For encoding files, we need to read all data since the parser expects &[u8]
390            // TODO: This could be further optimized if EncodingFile gets streaming support
391            let mut decompressed = Vec::new();
392            blte_stream
393                .read_to_end(&mut decompressed)
394                .map_err(|e| CascError::DecompressionError(e.to_string()))?;
395
396            EncodingFile::parse(&decompressed)
397                .map_err(|e| CascError::InvalidFormat(format!("Failed to parse encoding: {e}")))?
398        } else {
399            debug!("Encoding manifest is uncompressed");
400            // Read all data for parsing (EncodingFile needs &[u8])
401            reader.seek(std::io::SeekFrom::Start(0))?;
402            let mut data = Vec::new();
403            reader.read_to_end(&mut data)?;
404
405            EncodingFile::parse(&data)
406                .map_err(|e| CascError::InvalidFormat(format!("Failed to parse encoding: {e}")))?
407        };
408
409        info!(
410            "Loaded encoding manifest: {} CKey entries",
411            encoding.ckey_count()
412        );
413
414        // Store in cache
415        *self.encoding.write() = Some(encoding);
416
417        // Clear FileDataID cache as it's now outdated
418        self.fdid_cache.write().clear();
419
420        Ok(())
421    }
422
423    /// Load root manifest from file
424    pub fn load_root_from_file(&self, path: &Path) -> Result<()> {
425        info!("Loading root manifest from file: {:?}", path);
426        let file = std::fs::File::open(path)?;
427        self.load_root_from_reader(file)
428    }
429
430    /// Load encoding manifest from file
431    pub fn load_encoding_from_file(&self, path: &Path) -> Result<()> {
432        info!("Loading encoding manifest from file: {:?}", path);
433        let file = std::fs::File::open(path)?;
434        self.load_encoding_from_reader(file)
435    }
436
437    /// Load a listfile for filename -> FileDataID mappings
438    pub fn load_listfile(&self, path: &Path) -> Result<usize> {
439        info!("Loading listfile from: {:?}", path);
440        let file = std::fs::File::open(path)?;
441        self.load_listfile_from_reader(file)
442    }
443
444    /// Load listfile from streaming reader (memory-efficient for large listfiles)
445    pub fn load_listfile_from_reader<R: std::io::Read>(&self, reader: R) -> Result<usize> {
446        info!("Loading listfile from streaming reader");
447
448        let mut cache = self.filename_cache.write();
449        cache.clear();
450
451        // Use BufReader for efficient line-by-line reading
452        use std::io::{BufRead, BufReader};
453        let buf_reader = BufReader::new(reader);
454
455        let mut count = 0;
456        for line_result in buf_reader.lines() {
457            let line = line_result?;
458
459            // Parse CSV format: "FileDataID;Filename"
460            if let Some(sep_pos) = line.find(';') {
461                if let Ok(fdid) = line[..sep_pos].parse::<u32>() {
462                    let filename = line[sep_pos + 1..].to_string();
463                    cache.insert(filename, fdid);
464                    count += 1;
465                }
466            }
467        }
468
469        info!("Loaded {} filename mappings from listfile", count);
470        Ok(count)
471    }
472
473    /// Lookup a file by FileDataID
474    pub fn lookup_by_fdid(&self, fdid: u32) -> Result<FileMapping> {
475        // Check cache first
476        {
477            let cache = self.fdid_cache.read();
478            if let Some(mapping) = cache.get(&fdid) {
479                return Ok(mapping.clone());
480            }
481        }
482
483        // Try lazy loading first if enabled
484        if self.config.lazy_loading {
485            if let Some(result) = self.lookup_fdid_lazy(fdid)? {
486                return Ok(result);
487            }
488        }
489
490        // Fallback to fully loaded manifests
491        let root = self.root.read();
492        let encoding = self.encoding.read();
493
494        let root = root
495            .as_ref()
496            .ok_or_else(|| CascError::ManifestNotLoaded("root".to_string()))?;
497        let encoding = encoding
498            .as_ref()
499            .ok_or_else(|| CascError::ManifestNotLoaded("encoding".to_string()))?;
500
501        // Get content key from root manifest
502        let content_entries = root
503            .fid_md5
504            .get(&fdid)
505            .ok_or_else(|| CascError::EntryNotFound(format!("FileDataID {fdid}")))?;
506
507        // Find the best matching content entry based on locale/content flags
508        let (flags, content_key) = self.select_best_content(content_entries)?;
509
510        // Get encoding key from encoding manifest
511        let encoding_entry = encoding.lookup_by_ckey(content_key).ok_or_else(|| {
512            CascError::EntryNotFound(format!("CKey {} in encoding", hex::encode(content_key)))
513        })?;
514
515        // Get the first EKey (usually there's only one)
516        let ekey = encoding_entry
517            .encoding_keys
518            .first()
519            .ok_or_else(|| CascError::EntryNotFound("EKey in encoding entry".to_string()))?;
520
521        let mapping = FileMapping {
522            file_data_id: fdid,
523            content_key: *content_key,
524            encoding_key: Some(EKey::from_slice(ekey).unwrap()),
525            flags: Some(*flags),
526        };
527
528        // Cache the result
529        if self.config.cache_manifests {
530            self.fdid_cache.write().insert(fdid, mapping.clone());
531        }
532
533        Ok(mapping)
534    }
535
536    /// Lookup a file by filename
537    pub fn lookup_by_filename(&self, filename: &str) -> Result<FileMapping> {
538        // First try the filename cache
539        let fdid = {
540            let cache = self.filename_cache.read();
541            cache.get(filename).copied()
542        };
543
544        if let Some(fdid) = fdid {
545            return self.lookup_by_fdid(fdid);
546        }
547
548        // Try using jenkins hash from root manifest
549        let root = self.root.read();
550        let root = root
551            .as_ref()
552            .ok_or_else(|| CascError::ManifestNotLoaded("root".to_string()))?;
553
554        let fdid = root
555            .get_fid(filename)
556            .ok_or_else(|| CascError::EntryNotFound(format!("Filename: {filename}")))?;
557
558        self.lookup_by_fdid(fdid)
559    }
560
561    /// Get all FileDataIDs
562    pub fn get_all_fdids(&self) -> Result<Vec<u32>> {
563        let root = self.root.read();
564        let root = root
565            .as_ref()
566            .ok_or_else(|| CascError::ManifestNotLoaded("root".to_string()))?;
567
568        Ok(root.fid_md5.keys().copied().collect())
569    }
570
571    /// Get FileDataID for a filename (if known)
572    pub fn get_fdid_for_filename(&self, filename: &str) -> Option<u32> {
573        // Check filename cache first
574        {
575            let cache = self.filename_cache.read();
576            if let Some(&fdid) = cache.get(filename) {
577                return Some(fdid);
578            }
579        }
580
581        // Try root manifest's jenkins hash lookup
582        let root = self.root.read();
583        root.as_ref()?.get_fid(filename)
584    }
585
586    /// Get EKey for a FileDataID (if manifests are loaded)
587    pub fn get_ekey_for_fdid(&self, fdid: u32) -> Result<EKey> {
588        let mapping = self.lookup_by_fdid(fdid)?;
589        mapping
590            .encoding_key
591            .ok_or_else(|| CascError::EntryNotFound(format!("EKey for FDID {fdid}")))
592    }
593
594    /// Check if manifests are loaded
595    pub fn is_loaded(&self) -> bool {
596        let has_root = self.root.read().is_some() || self.lazy_root.read().is_some();
597        let has_encoding = self.encoding.read().is_some() || self.lazy_encoding.read().is_some();
598        has_root && has_encoding
599    }
600
601    /// Clear all cached data
602    pub fn clear_cache(&self) {
603        self.fdid_cache.write().clear();
604
605        // Clear lazy manifest caches
606        if let Some(lazy_root) = self.lazy_root.write().as_mut() {
607            lazy_root.fdid_cache.clear();
608            lazy_root.hash_cache.clear();
609        }
610
611        if let Some(lazy_encoding) = self.lazy_encoding.write().as_mut() {
612            lazy_encoding.ckey_cache.clear();
613            lazy_encoding.ekey_cache.clear();
614        }
615
616        debug!("Cleared FileDataID and lazy manifest caches");
617    }
618
619    /// Lazy lookup implementation for FileDataID
620    fn lookup_fdid_lazy(&self, _fdid: u32) -> Result<Option<FileMapping>> {
621        let lazy_root = self.lazy_root.read();
622        let lazy_encoding = self.lazy_encoding.read();
623
624        if lazy_root.is_none() || lazy_encoding.is_none() {
625            return Ok(None); // Fallback to full loading
626        }
627
628        // For now, fallback to full loading until we implement on-demand parsing
629        // This provides the infrastructure for lazy loading while maintaining compatibility
630        debug!("Lazy lookup infrastructure ready, falling back to full loading for now");
631        Ok(None)
632    }
633
634    /// Select the best content entry based on locale and content flags
635    fn select_best_content<'a>(
636        &self,
637        entries: &'a std::collections::BTreeMap<
638            tact_parser::wow_root::LocaleContentFlags,
639            [u8; 16],
640        >,
641    ) -> Result<(&'a ContentFlags, &'a [u8; 16])> {
642        // If only one entry, use it
643        if entries.len() == 1 {
644            let (flags, key) = entries.iter().next().unwrap();
645            return Ok((&flags.content, key));
646        }
647
648        // Filter by locale first
649        let locale_matches: Vec<_> = entries
650            .iter()
651            .filter(|(flags, _)| (flags.locale & self.config.locale).any() || flags.locale.all())
652            .collect();
653
654        if locale_matches.is_empty() {
655            // No locale match, use first available
656            let (flags, key) = entries.iter().next().unwrap();
657            return Ok((&flags.content, key));
658        }
659
660        // If content flags are specified, try to match them
661        if let Some(required_flags) = self.config.content_flags {
662            for (flags, key) in &locale_matches {
663                // Check if the entry matches required flags
664                if self.content_flags_match(&flags.content, &required_flags) {
665                    return Ok((&flags.content, key));
666                }
667            }
668        }
669
670        // Use first locale match
671        let (flags, key) = locale_matches[0];
672        Ok((&flags.content, key))
673    }
674
675    /// Check if content flags match requirements
676    fn content_flags_match(&self, flags: &ContentFlags, required: &ContentFlags) -> bool {
677        // Check platform requirements
678        if required.windows() && !flags.windows() {
679            return false;
680        }
681        if required.macos() && !flags.macos() {
682            return false;
683        }
684
685        // Check architecture
686        if required.x86_64() && !flags.x86_64() {
687            return false;
688        }
689        if required.x86_32() && !flags.x86_32() {
690            return false;
691        }
692        if required.aarch64() && !flags.aarch64() {
693            return false;
694        }
695
696        true
697    }
698}