unity_asset_binary/bundle/
parser.rs

1//! Bundle parser implementation
2//!
3//! This module provides the main parsing logic for Unity AssetBundles,
4//! inspired by UnityPy/files/BundleFile.py
5
6use super::compression::BundleCompression;
7use super::header::BundleHeader;
8use super::types::{AssetBundle, BundleFileInfo, BundleLoadOptions, DirectoryNode};
9use crate::compression::CompressionType;
10use crate::error::{BinaryError, Result};
11use crate::reader::{BinaryReader, ByteOrder};
12
13/// Main bundle parser
14///
15/// This struct handles the parsing of Unity AssetBundle files,
16/// supporting both UnityFS and legacy formats.
17pub struct BundleParser;
18
19impl BundleParser {
20    /// Parse an AssetBundle from binary data
21    pub fn from_bytes(data: Vec<u8>) -> Result<AssetBundle> {
22        Self::from_bytes_with_options(data, BundleLoadOptions::default())
23    }
24
25    /// Parse an AssetBundle from binary data with options
26    pub fn from_bytes_with_options(
27        data: Vec<u8>,
28        options: BundleLoadOptions,
29    ) -> Result<AssetBundle> {
30        let data_clone = data.clone();
31        let mut reader = BinaryReader::new(&data, ByteOrder::Big);
32
33        // Parse header
34        let header = BundleHeader::from_reader(&mut reader)?;
35
36        if options.validate {
37            header.validate()?;
38        }
39
40        let mut bundle = AssetBundle::new(header, data_clone);
41
42        // Parse based on bundle format
43        match bundle.header.signature.as_str() {
44            "UnityFS" => {
45                Self::parse_unity_fs(&mut bundle, &mut reader, &options)?;
46            }
47            "UnityWeb" | "UnityRaw" => {
48                Self::parse_legacy(&mut bundle, &mut reader, &options)?;
49            }
50            _ => {
51                return Err(BinaryError::unsupported(format!(
52                    "Unsupported bundle format: {}",
53                    bundle.header.signature
54                )));
55            }
56        }
57
58        if options.validate {
59            bundle.validate()?;
60        }
61
62        Ok(bundle)
63    }
64
65    /// Parse UnityFS format bundle
66    fn parse_unity_fs(
67        bundle: &mut AssetBundle,
68        reader: &mut BinaryReader,
69        options: &BundleLoadOptions,
70    ) -> Result<()> {
71        // Read blocks info
72        Self::read_blocks_info(bundle, reader)?;
73
74        // Decompress data blocks if requested OR if we need to load assets
75        if options.decompress_blocks || options.load_assets {
76            let blocks_data = Self::read_blocks(bundle, reader)?;
77            Self::parse_files(bundle, &blocks_data)?;
78
79            // Load assets if requested
80            if options.load_assets {
81                Self::load_assets(bundle)?;
82            }
83        } else {
84            // Just parse directory structure without decompressing all data
85            Self::parse_directory_lazy(bundle, reader)?;
86        }
87
88        Ok(())
89    }
90
91    /// Parse legacy format bundle
92    fn parse_legacy(
93        bundle: &mut AssetBundle,
94        reader: &mut BinaryReader,
95        options: &BundleLoadOptions,
96    ) -> Result<()> {
97        // Legacy bundles have a simpler structure
98        let header_size = bundle.header.header_size() as usize;
99
100        // Skip to after header
101        reader.set_position(header_size as u64)?;
102
103        // Read compression information
104        let compressed_size = reader.read_u32()?;
105        let _uncompressed_size = reader.read_u32()?;
106
107        // Skip some bytes based on version
108        let skip_bytes = if bundle.header.version >= 2 { 4 } else { 0 };
109        if skip_bytes > 0 {
110            reader.read_bytes(skip_bytes)?;
111        }
112
113        // Move to the data section
114        reader.set_position(header_size as u64)?;
115
116        // Read and decompress the directory data
117        let compressed_data = reader.read_bytes(compressed_size as usize)?;
118        let directory_data = if bundle.header.signature == "UnityWeb" {
119            // UnityWeb uses LZMA compression
120            crate::compression::decompress(
121                &compressed_data,
122                CompressionType::Lzma,
123                compressed_data.len() * 4, // Estimate uncompressed size
124            )?
125        } else {
126            // UnityRaw is uncompressed
127            compressed_data
128        };
129
130        // Parse directory information from decompressed data
131        Self::parse_legacy_directory(bundle, &directory_data, header_size)?;
132
133        // Load assets if requested
134        if options.load_assets {
135            Self::load_assets(bundle)?;
136        }
137
138        Ok(())
139    }
140
141    /// Read compression blocks information
142    fn read_blocks_info(bundle: &mut AssetBundle, reader: &mut BinaryReader) -> Result<()> {
143        // Apply version-specific alignment
144        if bundle.header.version >= 7 {
145            reader.align()?;
146        }
147
148        // TEMPORARY FIX: Always read blocks info from after header, ignore the flag
149        // The BLOCK_INFO_AT_END flag seems to be misunderstood - Python UnityPy always reads from header
150        let blocks_info_data =
151            reader.read_bytes(bundle.header.compressed_blocks_info_size as usize)?;
152
153        // Decompress blocks info
154        let uncompressed_data =
155            BundleCompression::decompress_blocks_info(&bundle.header, &blocks_info_data)?;
156
157        // Parse compression blocks
158        bundle.blocks = BundleCompression::parse_compression_blocks(&uncompressed_data)?;
159
160        // Validate blocks
161        BundleCompression::validate_blocks(&bundle.blocks)?;
162
163        // Parse directory information from the same blocks info data
164        Self::parse_directory_from_blocks_info(bundle, &uncompressed_data)?;
165
166        Ok(())
167    }
168
169    /// Read and decompress all blocks
170    fn read_blocks(bundle: &AssetBundle, reader: &mut BinaryReader) -> Result<Vec<u8>> {
171        BundleCompression::decompress_data_blocks(&bundle.header, &bundle.blocks, reader)
172    }
173
174    /// Parse files from decompressed block data
175    fn parse_files(bundle: &mut AssetBundle, blocks_data: &[u8]) -> Result<()> {
176        // Store the decompressed data
177        *bundle.data_mut() = blocks_data.to_vec();
178
179        // Create file info for each node
180        for node in &bundle.nodes {
181            let file_info = BundleFileInfo::new(node.name.clone(), node.offset, node.size);
182            bundle.files.push(file_info);
183        }
184
185        Ok(())
186    }
187
188    /// Parse directory structure without full decompression (lazy loading)
189    fn parse_directory_lazy(_bundle: &mut AssetBundle, _reader: &mut BinaryReader) -> Result<()> {
190        // For lazy loading, we only parse the directory structure
191        // without decompressing all data blocks
192
193        // The directory information has already been parsed in read_blocks_info()
194        // so there's nothing more to do here for lazy loading.
195
196        // The directory nodes are already populated in bundle.nodes
197        Ok(())
198    }
199
200    /// Parse directory structure from blocks info data
201    fn parse_directory_from_blocks_info(
202        bundle: &mut AssetBundle,
203        blocks_info_data: &[u8],
204    ) -> Result<()> {
205        let mut reader = BinaryReader::new(blocks_info_data, ByteOrder::Big);
206
207        // Skip uncompressed data hash (16 bytes)
208        reader.read_bytes(16)?;
209
210        // Skip compression blocks information
211        let block_count = reader.read_i32()? as usize;
212        for _ in 0..block_count {
213            reader.read_u32()?; // uncompressed_size
214            reader.read_u32()?; // compressed_size
215            reader.read_u16()?; // flags
216        }
217
218        // Now read directory information
219        let node_count = reader.read_i32()? as usize;
220
221        // Read directory nodes (UnityFS format)
222        for _i in 0..node_count {
223            let offset = reader.read_i64()? as u64; // UnityFS uses i64 for offset
224            let size = reader.read_i64()? as u64; // UnityFS uses i64 for size
225            let flags = reader.read_u32()?;
226            let name = reader.read_cstring()?;
227
228            let node = DirectoryNode::new(name, offset, size, flags);
229            bundle.nodes.push(node);
230        }
231
232        Ok(())
233    }
234
235    /// Parse directory structure from data (legacy method, kept for compatibility)
236    #[allow(dead_code)]
237    fn parse_directory_from_data(bundle: &mut AssetBundle, data: &[u8]) -> Result<()> {
238        let mut reader = BinaryReader::new(data, ByteOrder::Big);
239
240        // Skip to directory info (this offset varies by bundle version)
241        // This is a simplified implementation
242        reader.set_position(0)?;
243
244        // Read directory node count
245        let node_count = reader.read_i32()? as usize;
246
247        // Read directory nodes
248        for _ in 0..node_count {
249            let offset = reader.read_u64()?;
250            let size = reader.read_u64()?;
251            let flags = reader.read_u32()?;
252            let name = reader.read_cstring()?;
253
254            let node = DirectoryNode::new(name, offset, size, flags);
255            bundle.nodes.push(node);
256        }
257
258        Ok(())
259    }
260
261    /// Parse legacy bundle directory
262    fn parse_legacy_directory(
263        bundle: &mut AssetBundle,
264        directory_data: &[u8],
265        header_size: usize,
266    ) -> Result<()> {
267        let mut dir_reader = BinaryReader::new(directory_data, ByteOrder::Big);
268        dir_reader.set_position(header_size as u64)?; // Skip header in directory data
269
270        // Read file count
271        let file_count = dir_reader.read_i32()? as usize;
272
273        // Read file entries
274        for _ in 0..file_count {
275            let name = dir_reader.read_cstring()?;
276            let offset = dir_reader.read_u32()? as u64;
277            let size = dir_reader.read_u32()? as u64;
278
279            let file_info = BundleFileInfo::new(name.clone(), offset, size);
280            bundle.files.push(file_info);
281
282            // Also create a directory node for consistency
283            let node = DirectoryNode::new(name, offset, size, 1); // Flag 1 = file
284            bundle.nodes.push(node);
285        }
286
287        Ok(())
288    }
289
290    /// Load assets from the bundle files
291    fn load_assets(bundle: &mut AssetBundle) -> Result<()> {
292        // Clone the data to avoid borrowing issues
293        let bundle_data = bundle.data().to_vec();
294        let mut data_reader = BinaryReader::new(&bundle_data, ByteOrder::Big);
295
296        // Clone nodes to avoid borrowing issues
297        let nodes = bundle.nodes.clone();
298
299        for node in &nodes {
300            if node.is_file() {
301                // Skip non-asset files (like .resS files)
302                if node.name.ends_with(".resS") || node.name.ends_with(".resource") {
303                    continue;
304                }
305
306                // Set position to the file's offset in decompressed data
307                data_reader.set_position(node.offset)?;
308
309                // Read the file data
310                let file_data = data_reader.read_bytes(node.size as usize)?;
311
312                // Try to parse as SerializedFile
313                match crate::asset::SerializedFileParser::from_bytes(file_data) {
314                    Ok(serialized_file) => {
315                        // Add the SerializedFile as an asset
316                        bundle.assets.push(serialized_file);
317                    }
318                    Err(_e) => {
319                        // If it's not a valid SerializedFile, skip or handle differently
320                        // For now, we'll skip non-serialized files
321                        continue;
322                    }
323                }
324            }
325        }
326
327        Ok(())
328    }
329
330    /// Estimate parsing complexity
331    pub fn estimate_complexity(data: &[u8]) -> Result<ParsingComplexity> {
332        let mut reader = BinaryReader::new(data, ByteOrder::Big);
333        let header = BundleHeader::from_reader(&mut reader)?;
334
335        let complexity = match header.signature.as_str() {
336            "UnityFS" => {
337                let compression_type = header.compression_type()?;
338                let has_compression = compression_type != CompressionType::None;
339
340                ParsingComplexity {
341                    format: "UnityFS".to_string(),
342                    estimated_time: if has_compression { "Medium" } else { "Fast" }.to_string(),
343                    memory_usage: header.size,
344                    has_compression,
345                    block_count: 0, // Would need to parse blocks info to get accurate count
346                }
347            }
348            "UnityWeb" | "UnityRaw" => ParsingComplexity {
349                format: header.signature.clone(),
350                estimated_time: "Fast".to_string(),
351                memory_usage: header.size,
352                has_compression: header.signature == "UnityWeb",
353                block_count: 1,
354            },
355            _ => {
356                return Err(BinaryError::unsupported(format!(
357                    "Unknown bundle format: {}",
358                    header.signature
359                )));
360            }
361        };
362
363        Ok(complexity)
364    }
365}
366
367/// Parsing complexity information
368#[derive(Debug, Clone)]
369pub struct ParsingComplexity {
370    pub format: String,
371    pub estimated_time: String,
372    pub memory_usage: u64,
373    pub has_compression: bool,
374    pub block_count: usize,
375}
376
377#[cfg(test)]
378mod tests {
379    #[test]
380    fn test_parser_creation() {
381        // Basic test to ensure parser can be created
382        // In practice, you'd need actual bundle data to test parsing
383        let _dummy = 1 + 1;
384        assert_eq!(_dummy, 2);
385    }
386}