tact_parser/
tvfs.rs

1//! TVFS (TACT Virtual File System) parser
2//!
3//! TVFS is a modern manifest format that defines a virtual filesystem
4//! for game assets, used in newer Blizzard games.
5
6use std::collections::HashMap;
7use std::io::{Cursor, Read, Seek, SeekFrom};
8
9use byteorder::{BigEndian, ReadBytesExt};
10use tracing::{debug, trace};
11
12use crate::utils::read_uint40_be_from;
13use crate::{Error, Result};
14
15/// TVFS FileManifestFlags
16pub mod flags {
17    /// Include CKey in content records
18    pub const INCLUDE_CKEY: u8 = 0x01;
19    /// Enable write support
20    pub const WRITE_SUPPORT: u8 = 0x02;
21    /// Include patch file records
22    pub const PATCH_SUPPORT: u8 = 0x04;
23    /// Force lowercase paths
24    pub const LOWERCASE: u8 = 0x08;
25}
26
27/// TVFS header structure
28#[derive(Debug, Clone)]
29pub struct TVFSHeader {
30    /// Magic bytes "TVFS" (0x53465654)
31    pub magic: [u8; 4],
32    /// Version (typically 1)
33    pub version: u8,
34    /// Header size in bytes (minimum 0x26 = 38 bytes)
35    pub header_size: u8,
36    /// EKey size (usually 9)
37    pub ekey_size: u8,
38    /// Patch key size (usually 9)
39    pub patch_key_size: u8,
40    /// Flags (FileManifestFlags)
41    pub flags: u8,
42    /// Path table offset (40-bit integer)
43    pub path_table_offset: u64,
44    /// Path table size (40-bit integer)
45    pub path_table_size: u64,
46    /// VFS table offset (40-bit integer)
47    pub vfs_table_offset: u64,
48    /// VFS table size (40-bit integer)
49    pub vfs_table_size: u64,
50    /// Container file table offset (40-bit integer)
51    pub cft_table_offset: u64,
52    /// Container file table size (40-bit integer)
53    pub cft_table_size: u64,
54    /// Maximum metafile size
55    pub max_metafile_size: u16,
56    /// Build version number
57    pub build_version: u32,
58}
59
60impl TVFSHeader {
61    /// Parse TVFS header
62    pub fn parse<R: Read>(reader: &mut R) -> Result<Self> {
63        let mut magic = [0u8; 4];
64        reader.read_exact(&mut magic)?;
65
66        // Check for correct TVFS magic bytes (0x53465654)
67        if &magic != b"TVFS" {
68            return Err(Error::IOError(std::io::Error::new(
69                std::io::ErrorKind::InvalidData,
70                format!("Invalid TVFS magic: {magic:?}, expected TVFS"),
71            )));
72        }
73
74        let version = reader.read_u8()?;
75        if version != 1 {
76            debug!("Unexpected TVFS version: {}", version);
77        }
78
79        let header_size = reader.read_u8()?;
80        let ekey_size = reader.read_u8()?;
81        let patch_key_size = reader.read_u8()?;
82        let flags = reader.read_u8()?;
83
84        // Read 40-bit offsets and sizes (big-endian)
85        let path_table_offset = read_uint40_be_from(reader)?;
86        let path_table_size = read_uint40_be_from(reader)?;
87        let vfs_table_offset = read_uint40_be_from(reader)?;
88        let vfs_table_size = read_uint40_be_from(reader)?;
89        let cft_table_offset = read_uint40_be_from(reader)?;
90        let cft_table_size = read_uint40_be_from(reader)?;
91
92        let max_metafile_size = reader.read_u16::<BigEndian>()?;
93        let build_version = reader.read_u32::<BigEndian>()?;
94
95        Ok(TVFSHeader {
96            magic,
97            version,
98            header_size,
99            ekey_size,
100            patch_key_size,
101            flags,
102            path_table_offset,
103            path_table_size,
104            vfs_table_offset,
105            vfs_table_size,
106            cft_table_offset,
107            cft_table_size,
108            max_metafile_size,
109            build_version,
110        })
111    }
112
113    /// Check if TVFS includes CKeys
114    pub fn has_ckey(&self) -> bool {
115        self.flags & flags::INCLUDE_CKEY != 0
116    }
117
118    /// Check if TVFS has write support
119    pub fn has_write_support(&self) -> bool {
120        self.flags & flags::WRITE_SUPPORT != 0
121    }
122
123    /// Check if TVFS has patch support
124    pub fn has_patch_support(&self) -> bool {
125        self.flags & flags::PATCH_SUPPORT != 0
126    }
127
128    /// Check if TVFS forces lowercase paths
129    pub fn has_lowercase_paths(&self) -> bool {
130        self.flags & flags::LOWERCASE != 0
131    }
132}
133
134/// Path table entry
135#[derive(Debug, Clone)]
136pub struct PathEntry {
137    /// Path string
138    pub path: String,
139    /// Path hash
140    pub hash: u64,
141}
142
143/// VFS entry type
144#[derive(Debug, Clone, Copy, PartialEq)]
145pub enum VFSEntryType {
146    /// Regular file
147    File,
148    /// Deleted file
149    Deleted,
150    /// Inline data
151    Inline,
152    /// Link to another entry
153    Link,
154}
155
156/// VFS table entry
157#[derive(Debug, Clone)]
158pub struct VFSEntry {
159    /// Entry type
160    pub entry_type: VFSEntryType,
161    /// Span offset in CFT table
162    pub span_offset: u32,
163    /// Span count
164    pub span_count: u32,
165    /// Path index
166    pub path_index: u32,
167    /// File offset (for inline data)
168    pub file_offset: Option<u64>,
169    /// File size (for inline data)
170    pub file_size: Option<u32>,
171}
172
173/// Container file table entry (file span)
174#[derive(Debug, Clone)]
175pub struct CFTEntry {
176    /// Encoding key (or content key)
177    pub ekey: Vec<u8>,
178    /// File size
179    pub file_size: u64,
180    /// ESpec index (optional)
181    pub espec_index: Option<u32>,
182}
183
184/// TVFS manifest
185#[derive(Debug, Clone)]
186pub struct TVFSManifest {
187    /// Header information
188    pub header: TVFSHeader,
189    /// Path table
190    pub path_table: Vec<PathEntry>,
191    /// VFS table
192    pub vfs_table: Vec<VFSEntry>,
193    /// Container file table
194    pub cft_table: Vec<CFTEntry>,
195    /// ESpec table (optional)
196    pub espec_table: Option<Vec<String>>,
197    /// Path to VFS entry mapping
198    path_map: HashMap<String, usize>,
199}
200
201impl TVFSManifest {
202    /// Parse a TVFS manifest from bytes
203    pub fn parse(data: &[u8]) -> Result<Self> {
204        let mut cursor = Cursor::new(data);
205
206        // Parse header
207        let header = TVFSHeader::parse(&mut cursor)?;
208
209        debug!(
210            "Parsing TVFS v{} with {} bytes, flags: {:#04x}",
211            header.version,
212            data.len(),
213            header.flags
214        );
215
216        // Parse path table
217        cursor.seek(SeekFrom::Start(header.path_table_offset))?;
218        let path_table = Self::parse_path_table(&mut cursor, header.path_table_size as usize)?;
219
220        // Parse VFS table
221        cursor.seek(SeekFrom::Start(header.vfs_table_offset))?;
222        let vfs_table = Self::parse_vfs_table(&mut cursor, header.vfs_table_size as usize)?;
223
224        // Parse CFT table
225        cursor.seek(SeekFrom::Start(header.cft_table_offset))?;
226        let cft_table = Self::parse_cft_table(
227            &mut cursor,
228            header.cft_table_size as usize,
229            false, // ESpec support - currently not implemented
230        )?;
231
232        // ESpec table parsing not yet implemented
233        let espec_table = None;
234
235        // Build path map for quick lookups
236        let mut path_map = HashMap::new();
237        for (idx, entry) in vfs_table.iter().enumerate() {
238            if entry.path_index < path_table.len() as u32 {
239                let path = &path_table[entry.path_index as usize].path;
240                path_map.insert(path.clone(), idx);
241            }
242        }
243
244        Ok(TVFSManifest {
245            header,
246            path_table,
247            vfs_table,
248            cft_table,
249            espec_table,
250            path_map,
251        })
252    }
253
254    /// Parse path table
255    fn parse_path_table<R: Read>(reader: &mut R, size: usize) -> Result<Vec<PathEntry>> {
256        let mut entries = Vec::new();
257        let mut bytes_read = 0usize;
258
259        debug!("Parsing path table with size: {}", size);
260
261        while bytes_read < size {
262            // In TFVS format, path entries use a simple structure:
263            // - 0x00 byte indicates path separator '/' before
264            // - Length byte (1-255) for path component
265            // - Path component string
266            // - 0x00 byte indicates path separator '/' after
267            // - 0xFF followed by 4 bytes for node value
268
269            // For now, use simplified parsing - read length byte directly
270            let path_len = reader.read_u8()? as usize;
271            bytes_read += 1;
272
273            if path_len == 0 || bytes_read >= size {
274                break; // End of table or separator
275            }
276
277            // Read path string
278            let mut path_bytes = vec![0u8; path_len];
279            reader.read_exact(&mut path_bytes)?;
280            bytes_read += path_len;
281
282            let path = String::from_utf8(path_bytes).map_err(|e| {
283                Error::IOError(std::io::Error::new(
284                    std::io::ErrorKind::InvalidData,
285                    format!("Invalid UTF-8 in path: {e}"),
286                ))
287            })?;
288
289            // Calculate path hash (Jenkins3)
290            let hash = crate::utils::jenkins3_hashpath(&path);
291
292            trace!("Path entry: {} (hash: {:#x})", path, hash);
293
294            entries.push(PathEntry { path, hash });
295        }
296
297        debug!("Parsed {} path entries", entries.len());
298        Ok(entries)
299    }
300
301    /// Parse VFS table
302    fn parse_vfs_table<R: Read>(reader: &mut R, size: usize) -> Result<Vec<VFSEntry>> {
303        let mut entries = Vec::new();
304        let mut bytes_read = 0usize;
305
306        while bytes_read < size {
307            if bytes_read >= size {
308                break;
309            }
310
311            // Read entry type and flags
312            let type_byte = reader.read_u8()?;
313            bytes_read += 1;
314
315            let entry_type = match type_byte & 0x03 {
316                0 => VFSEntryType::File,
317                1 => VFSEntryType::Deleted,
318                2 => VFSEntryType::Inline,
319                3 => VFSEntryType::Link,
320                _ => unreachable!(),
321            };
322
323            // Read span info for files
324            let (span_offset, span_count) = if entry_type == VFSEntryType::File {
325                // Read varint for span offset directly
326                let mut offset = 0u32;
327                let mut shift = 0;
328                for _ in 0..5 {
329                    let byte = reader.read_u8()?;
330                    bytes_read += 1;
331                    let value = (byte & 0x7F) as u32;
332                    offset |= value << shift;
333                    if byte & 0x80 == 0 {
334                        break;
335                    }
336                    shift += 7;
337                }
338
339                // Read varint for span count directly
340                let mut count = 0u32;
341                shift = 0;
342                for _ in 0..5 {
343                    let byte = reader.read_u8()?;
344                    bytes_read += 1;
345                    let value = (byte & 0x7F) as u32;
346                    count |= value << shift;
347                    if byte & 0x80 == 0 {
348                        break;
349                    }
350                    shift += 7;
351                }
352
353                (offset, count)
354            } else {
355                (0, 0)
356            };
357
358            // Read path index varint directly
359            let mut path_index = 0u32;
360            let mut shift = 0;
361            for _ in 0..5 {
362                let byte = reader.read_u8()?;
363                bytes_read += 1;
364                let value = (byte & 0x7F) as u32;
365                path_index |= value << shift;
366                if byte & 0x80 == 0 {
367                    break;
368                }
369                shift += 7;
370            }
371
372            // Read inline data info if applicable
373            let (file_offset, file_size) = if entry_type == VFSEntryType::Inline {
374                let offset = read_uint40_be_from(reader)?;
375                bytes_read += 5;
376                let size = reader.read_u32::<BigEndian>()?;
377                bytes_read += 4;
378                (Some(offset), Some(size))
379            } else {
380                (None, None)
381            };
382
383            entries.push(VFSEntry {
384                entry_type,
385                span_offset,
386                span_count,
387                path_index,
388                file_offset,
389                file_size,
390            });
391        }
392
393        debug!("Parsed {} VFS entries", entries.len());
394        Ok(entries)
395    }
396
397    /// Parse container file table
398    fn parse_cft_table<R: Read>(
399        reader: &mut R,
400        size: usize,
401        has_est_table: bool,
402    ) -> Result<Vec<CFTEntry>> {
403        let mut entries = Vec::new();
404        let mut bytes_read = 0usize;
405
406        while bytes_read < size {
407            // Read encoding key (16 bytes MD5)
408            let mut ekey = vec![0u8; 16];
409            reader.read_exact(&mut ekey)?;
410            bytes_read += 16;
411
412            // Read file size (40-bit, big-endian)
413            let file_size = read_uint40_be_from(reader)?;
414            bytes_read += 5;
415
416            // Read ESpec index if EST table is present (1 byte)
417            let espec_index = if has_est_table {
418                let index = reader.read_u8()?;
419                bytes_read += 1;
420                Some(index as u32)
421            } else {
422                None
423            };
424
425            entries.push(CFTEntry {
426                ekey,
427                file_size,
428                espec_index,
429            });
430        }
431
432        debug!("Parsed {} CFT entries", entries.len());
433        Ok(entries)
434    }
435
436    // Note: ESpec table parsing would be added here when needed
437    // The parse_espec_table function has been removed as it's not currently used
438    // It can be re-added when ESpec support is fully implemented
439
440    /// Resolve a file path to its file information
441    pub fn resolve_path(&self, path: &str) -> Option<FileInfo> {
442        // Look up VFS entry by path
443        let vfs_index = *self.path_map.get(path)?;
444        let vfs_entry = &self.vfs_table[vfs_index];
445
446        match vfs_entry.entry_type {
447            VFSEntryType::File => {
448                // Collect file spans
449                let mut spans = Vec::new();
450                for i in 0..vfs_entry.span_count {
451                    let cft_index = (vfs_entry.span_offset + i) as usize;
452                    if cft_index < self.cft_table.len() {
453                        let cft_entry = &self.cft_table[cft_index];
454                        spans.push(FileSpan {
455                            ekey: cft_entry.ekey.clone(),
456                            file_size: cft_entry.file_size,
457                            espec: cft_entry.espec_index.and_then(|idx| {
458                                self.espec_table.as_ref()?.get(idx as usize).cloned()
459                            }),
460                        });
461                    }
462                }
463
464                Some(FileInfo {
465                    path: path.to_string(),
466                    entry_type: vfs_entry.entry_type,
467                    spans,
468                    inline_data: None,
469                })
470            }
471            VFSEntryType::Inline => Some(FileInfo {
472                path: path.to_string(),
473                entry_type: vfs_entry.entry_type,
474                spans: Vec::new(),
475                inline_data: Some((vfs_entry.file_offset?, vfs_entry.file_size?)),
476            }),
477            _ => None,
478        }
479    }
480
481    /// List all files in a directory
482    pub fn list_directory(&self, dir_path: &str) -> Vec<DirEntry> {
483        let mut entries = Vec::new();
484        let dir_prefix = if dir_path.ends_with('/') {
485            dir_path.to_string()
486        } else if dir_path.is_empty() {
487            String::new()
488        } else {
489            format!("{dir_path}/")
490        };
491
492        for path_entry in &self.path_table {
493            if path_entry.path.starts_with(&dir_prefix) {
494                let relative_path = &path_entry.path[dir_prefix.len()..];
495
496                // Check if it's a direct child (no additional slashes)
497                if !relative_path.contains('/') && !relative_path.is_empty() {
498                    if let Some(vfs_index) = self.path_map.get(&path_entry.path) {
499                        let vfs_entry = &self.vfs_table[*vfs_index];
500
501                        let is_directory = false; // TVFS doesn't have explicit directories
502                        let size = if vfs_entry.entry_type == VFSEntryType::File {
503                            self.calculate_file_size(*vfs_index)
504                        } else {
505                            0
506                        };
507
508                        entries.push(DirEntry {
509                            name: relative_path.to_string(),
510                            path: path_entry.path.clone(),
511                            is_directory,
512                            size,
513                        });
514                    }
515                }
516            }
517        }
518
519        entries
520    }
521
522    /// Calculate total size of a file (sum of all spans)
523    fn calculate_file_size(&self, vfs_index: usize) -> u64 {
524        let vfs_entry = &self.vfs_table[vfs_index];
525        let mut total_size = 0u64;
526
527        for i in 0..vfs_entry.span_count {
528            let cft_index = (vfs_entry.span_offset + i) as usize;
529            if cft_index < self.cft_table.len() {
530                total_size += self.cft_table[cft_index].file_size;
531            }
532        }
533
534        total_size
535    }
536
537    /// Get file count
538    pub fn file_count(&self) -> usize {
539        self.vfs_table
540            .iter()
541            .filter(|e| e.entry_type == VFSEntryType::File || e.entry_type == VFSEntryType::Inline)
542            .count()
543    }
544
545    /// Get deleted file count
546    pub fn deleted_count(&self) -> usize {
547        self.vfs_table
548            .iter()
549            .filter(|e| e.entry_type == VFSEntryType::Deleted)
550            .count()
551    }
552
553    /// Get total size of all files
554    pub fn total_size(&self) -> u64 {
555        self.cft_table.iter().map(|e| e.file_size).sum()
556    }
557}
558
559/// File span information
560#[derive(Debug, Clone)]
561pub struct FileSpan {
562    /// Encoding key for this span
563    pub ekey: Vec<u8>,
564    /// Size of this span
565    pub file_size: u64,
566    /// ESpec string (optional)
567    pub espec: Option<String>,
568}
569
570/// File information
571#[derive(Debug, Clone)]
572pub struct FileInfo {
573    /// File path
574    pub path: String,
575    /// Entry type
576    pub entry_type: VFSEntryType,
577    /// File spans (for regular files)
578    pub spans: Vec<FileSpan>,
579    /// Inline data location (offset, size) for inline entries
580    pub inline_data: Option<(u64, u32)>,
581}
582
583/// Directory entry
584#[derive(Debug, Clone)]
585pub struct DirEntry {
586    /// Entry name (relative to directory)
587    pub name: String,
588    /// Full path
589    pub path: String,
590    /// Whether this is a directory
591    pub is_directory: bool,
592    /// File size (0 for directories)
593    pub size: u64,
594}
595
596#[cfg(test)]
597mod tests {
598    use super::*;
599
600    #[test]
601    fn test_tvfs_header_flags() {
602        let header = TVFSHeader {
603            magic: *b"TVFS",
604            version: 1,
605            header_size: 38,
606            ekey_size: 9,
607            patch_key_size: 9,
608            flags: flags::INCLUDE_CKEY | flags::WRITE_SUPPORT,
609            path_table_offset: 100,
610            path_table_size: 200,
611            vfs_table_offset: 300,
612            vfs_table_size: 400,
613            cft_table_offset: 700,
614            cft_table_size: 500,
615            max_metafile_size: 1024,
616            build_version: 42000,
617        };
618
619        assert!(header.has_ckey());
620        assert!(header.has_write_support());
621        assert!(!header.has_patch_support());
622        assert!(!header.has_lowercase_paths());
623    }
624
625    #[test]
626    fn test_vfs_entry_type() {
627        // VFSEntryType values are encoded in 2 bits
628        let file_type = VFSEntryType::File;
629        let deleted_type = VFSEntryType::Deleted;
630        let inline_type = VFSEntryType::Inline;
631        let link_type = VFSEntryType::Link;
632
633        // Test that different types are distinguishable
634        assert_ne!(file_type as u8, deleted_type as u8);
635        assert_ne!(file_type as u8, inline_type as u8);
636        assert_ne!(file_type as u8, link_type as u8);
637    }
638
639    #[test]
640    fn test_tvfs_40bit_offsets() {
641        use crate::utils::{read_uint40_be, write_uint40_be};
642
643        // Test that 40-bit values can represent up to 1TB
644        let one_tb = 1_099_511_627_776u64; // 1TB in bytes  
645        let max_40bit = (1u64 << 40) - 1; // 1,099,511,627,775 bytes
646
647        // Actually, max 40-bit is 1 byte less than 1TB
648        assert_eq!(max_40bit, one_tb - 1);
649
650        // Test encoding/decoding with max value (big-endian for TVFS)
651        let encoded = write_uint40_be(max_40bit);
652        assert_eq!(encoded.len(), 5);
653
654        let decoded = read_uint40_be(&encoded).unwrap();
655        assert_eq!(decoded, max_40bit);
656
657        // Test with a more typical large file size (100GB)
658        let hundred_gb = 100 * 1024 * 1024 * 1024u64;
659        let encoded_100gb = write_uint40_be(hundred_gb);
660        let decoded_100gb = read_uint40_be(&encoded_100gb).unwrap();
661        assert_eq!(decoded_100gb, hundred_gb);
662    }
663}