unity_asset_binary/typetree/
parser.rs

1//! TypeTree parser implementation
2//!
3//! This module provides parsing functionality for Unity TypeTree structures,
4//! inspired by UnityPy/classes/TypeTree.py
5
6use super::common_strings;
7use super::types::{TypeTree, TypeTreeNode};
8use crate::error::{BinaryError, Result};
9use crate::reader::BinaryReader;
10
11/// TypeTree parser
12///
13/// This struct handles the parsing of TypeTree structures from binary data,
14/// supporting different Unity versions and formats.
15pub struct TypeTreeParser;
16
17impl TypeTreeParser {
18    /// Parse TypeTree from binary data
19    pub fn from_reader(reader: &mut BinaryReader, version: u32) -> Result<TypeTree> {
20        let mut tree = TypeTree::new();
21        tree.version = version;
22
23        // Read number of nodes
24        let node_count = reader.read_u32()? as usize;
25
26        // Read string buffer size
27        let string_buffer_size = reader.read_u32()? as usize;
28
29        // Read nodes
30        for _ in 0..node_count {
31            let node = Self::read_node(reader, version)?;
32            tree.nodes.push(node);
33        }
34
35        // Read string buffer
36        tree.string_buffer = reader.read_bytes(string_buffer_size)?;
37
38        // Resolve string references
39        Self::resolve_strings(&mut tree)?;
40
41        // Build tree hierarchy
42        Self::build_hierarchy(&mut tree)?;
43
44        Ok(tree)
45    }
46
47    /// Parse TypeTree from binary data using blob format (Unity version >= 12 or == 10)
48    pub fn from_reader_blob(reader: &mut BinaryReader, version: u32) -> Result<TypeTree> {
49        let mut tree = TypeTree::new();
50        tree.version = version;
51
52        // Read number of nodes
53        let node_count = reader.read_i32()? as usize;
54
55        // Read string buffer size
56        let string_buffer_size = reader.read_i32()? as usize;
57
58        // Read nodes in blob format
59        for _ in 0..node_count {
60            let mut node = TypeTreeNode::new();
61
62            // Read node data in blob format (based on unity-rs)
63            node.version = reader.read_u16()? as i32;
64            node.level = reader.read_u8()? as i32;
65            node.type_flags = reader.read_u8()? as i32;
66            node.type_str_offset = reader.read_u32()?;
67            node.name_str_offset = reader.read_u32()?;
68            node.byte_size = reader.read_i32()?;
69            node.index = reader.read_i32()?;
70            node.meta_flags = reader.read_i32()?;
71
72            if version >= 19 {
73                node.ref_type_hash = reader.read_u64()?;
74            }
75
76            tree.nodes.push(node);
77        }
78
79        // Read string buffer
80        tree.string_buffer = reader.read_bytes(string_buffer_size)?;
81
82        // Resolve string references
83        Self::resolve_strings(&mut tree)?;
84
85        // Build tree hierarchy
86        Self::build_hierarchy(&mut tree)?;
87
88        Ok(tree)
89    }
90
91    /// Read a single TypeTree node
92    fn read_node(reader: &mut BinaryReader, version: u32) -> Result<TypeTreeNode> {
93        let mut node = TypeTreeNode::new();
94
95        if version >= 10 {
96            node.version = reader.read_i16()? as i32;
97            node.level = reader.read_u8()? as i32;
98            node.type_flags = reader.read_u8()? as i32;
99            node.type_str_offset = reader.read_u32()?;
100            node.name_str_offset = reader.read_u32()?;
101            node.byte_size = reader.read_i32()?;
102            node.index = reader.read_i32()?;
103            node.meta_flags = reader.read_i32()?;
104
105            if version >= 12 {
106                node.ref_type_hash = reader.read_u64()?;
107            }
108        } else {
109            // Legacy format
110            node.type_str_offset = reader.read_u32()?;
111            node.name_str_offset = reader.read_u32()?;
112            node.byte_size = reader.read_i32()?;
113            node.index = reader.read_i32()?;
114            node.type_flags = reader.read_i32()?;
115            node.version = reader.read_i32()?;
116            node.meta_flags = reader.read_i32()?;
117            node.level = reader.read_i32()?;
118        }
119
120        Ok(node)
121    }
122
123    /// Resolve string references in the TypeTree
124    fn resolve_strings(tree: &mut TypeTree) -> Result<()> {
125        for node in &mut tree.nodes {
126            Self::resolve_node_strings(node, &tree.string_buffer)?;
127        }
128        Ok(())
129    }
130
131    /// Resolve string references for a single node and its children
132    fn resolve_node_strings(node: &mut TypeTreeNode, string_buffer: &[u8]) -> Result<()> {
133        // Resolve type name
134        node.type_name = Self::resolve_string(string_buffer, node.type_str_offset)?;
135
136        // Resolve field name
137        node.name = Self::resolve_string(string_buffer, node.name_str_offset)?;
138
139        // Resolve children
140        for child in &mut node.children {
141            Self::resolve_node_strings(child, string_buffer)?;
142        }
143
144        Ok(())
145    }
146
147    /// Resolve TypeTree strings which can either reference the local string buffer or a global
148    /// common string buffer (signaled via the high bit in blob TypeTrees).
149    fn resolve_string(buffer: &[u8], offset: u32) -> Result<String> {
150        const COMMON_STRING_FLAG: u32 = 0x8000_0000;
151
152        if (offset & COMMON_STRING_FLAG) != 0 {
153            let common_offset = offset & !COMMON_STRING_FLAG;
154            return Ok(common_strings::get_common_string(common_offset)
155                .unwrap_or_default()
156                .to_string());
157        }
158
159        Self::get_string_from_buffer(buffer, offset)
160    }
161
162    /// Get string from buffer at offset
163    fn get_string_from_buffer(buffer: &[u8], offset: u32) -> Result<String> {
164        if offset as usize >= buffer.len() {
165            return Ok(String::new());
166        }
167
168        let start = offset as usize;
169        let end = buffer[start..]
170            .iter()
171            .position(|&b| b == 0)
172            .map(|pos| start + pos)
173            .unwrap_or(buffer.len());
174
175        String::from_utf8(buffer[start..end].to_vec())
176            .map_err(|e| BinaryError::generic(format!("Invalid UTF-8 string: {}", e)))
177    }
178
179    /// Build hierarchical structure from flat node list
180    fn build_hierarchy(tree: &mut TypeTree) -> Result<()> {
181        if tree.nodes.is_empty() {
182            return Ok(());
183        }
184
185        // Create a working copy of nodes
186        let mut nodes = std::mem::take(&mut tree.nodes);
187
188        // Build hierarchy using a stack-based approach
189        let mut stack: Vec<(i32, usize)> = Vec::new(); // (level, index)
190        let mut root_nodes = Vec::new();
191
192        for (i, node) in nodes.iter().enumerate() {
193            let current_level = node.level;
194
195            // Pop stack until we find the parent level
196            while let Some(&(level, _)) = stack.last() {
197                if level < current_level {
198                    break;
199                }
200                stack.pop();
201            }
202
203            if let Some(&(_, _parent_idx)) = stack.last() {
204                // This node is a child of the node at parent_idx
205                // We'll handle this in the second pass
206            } else {
207                // This is a root node
208                root_nodes.push(i);
209            }
210
211            stack.push((current_level, i));
212        }
213
214        // Second pass: actually build the hierarchy
215        let mut processed = vec![false; nodes.len()];
216        let mut result_nodes = Vec::new();
217
218        for &root_idx in &root_nodes {
219            if !processed[root_idx] {
220                let root_node = Self::build_node_hierarchy(&mut nodes, &mut processed, root_idx)?;
221                result_nodes.push(root_node);
222            }
223        }
224
225        tree.nodes = result_nodes;
226        Ok(())
227    }
228
229    /// Build hierarchy for a single node and its children
230    fn build_node_hierarchy(
231        nodes: &mut [TypeTreeNode],
232        processed: &mut [bool],
233        node_idx: usize,
234    ) -> Result<TypeTreeNode> {
235        if processed[node_idx] {
236            return Err(BinaryError::generic("Node already processed"));
237        }
238
239        let mut node = nodes[node_idx].clone();
240        processed[node_idx] = true;
241
242        let current_level = node.level;
243        node.children.clear();
244
245        // Find children (nodes with level = current_level + 1 that come after this node)
246        for i in (node_idx + 1)..nodes.len() {
247            if processed[i] {
248                continue;
249            }
250
251            let child_level = nodes[i].level;
252
253            if child_level <= current_level {
254                // We've reached a sibling or parent level, stop looking for children
255                break;
256            }
257
258            if child_level == current_level + 1 {
259                // This is a direct child
260                let child_node = Self::build_node_hierarchy(nodes, processed, i)?;
261                node.children.push(child_node);
262            }
263        }
264
265        Ok(node)
266    }
267
268    /// Validate parsed TypeTree
269    pub fn validate(tree: &TypeTree) -> Result<()> {
270        if tree.nodes.is_empty() {
271            return Err(BinaryError::invalid_data("TypeTree has no nodes"));
272        }
273
274        for (i, node) in tree.nodes.iter().enumerate() {
275            Self::validate_node(node, 0).map_err(|e| {
276                BinaryError::generic(format!("Node {} validation failed: {}", i, e))
277            })?;
278        }
279
280        Ok(())
281    }
282
283    /// Validate a single node and its children
284    fn validate_node(node: &TypeTreeNode, expected_level: i32) -> Result<()> {
285        if node.type_name.is_empty() {
286            return Err(BinaryError::invalid_data("Node has empty type name"));
287        }
288
289        if node.level != expected_level {
290            return Err(BinaryError::invalid_data(format!(
291                "Node level mismatch: expected {}, got {}",
292                expected_level, node.level
293            )));
294        }
295
296        if node.byte_size < -1 {
297            return Err(BinaryError::invalid_data("Invalid byte size"));
298        }
299
300        // Validate children
301        for child in &node.children {
302            Self::validate_node(child, expected_level + 1)?;
303        }
304
305        Ok(())
306    }
307
308    /// Get parsing statistics
309    pub fn get_parsing_stats(tree: &TypeTree) -> ParsingStats {
310        let mut stats = (0usize, 0i32, 0usize, 0usize); // (total_nodes, max_depth, primitive_count, array_count)
311
312        fn count_nodes(node: &TypeTreeNode, depth: i32, stats: &mut (usize, i32, usize, usize)) {
313            stats.0 += 1; // total_nodes
314            stats.1 = stats.1.max(depth); // max_depth
315
316            if node.is_primitive() {
317                stats.2 += 1; // primitive_count
318            }
319            if node.is_array() {
320                stats.3 += 1; // array_count
321            }
322
323            for child in &node.children {
324                count_nodes(child, depth + 1, stats);
325            }
326        }
327
328        for node in &tree.nodes {
329            count_nodes(node, 0, &mut stats);
330        }
331
332        ParsingStats {
333            total_nodes: stats.0,
334            root_nodes: tree.nodes.len(),
335            max_depth: stats.1,
336            primitive_count: stats.2,
337            array_count: stats.3,
338            string_buffer_size: tree.string_buffer.len(),
339            version: tree.version,
340        }
341    }
342}
343
344/// Parsing statistics
345#[derive(Debug, Clone)]
346pub struct ParsingStats {
347    pub total_nodes: usize,
348    pub root_nodes: usize,
349    pub max_depth: i32,
350    pub primitive_count: usize,
351    pub array_count: usize,
352    pub string_buffer_size: usize,
353    pub version: u32,
354}
355
356#[cfg(test)]
357mod tests {
358    use super::*;
359    use crate::reader::{BinaryReader, ByteOrder};
360
361    #[test]
362    fn test_parser_creation() {
363        // Basic test to ensure parser methods exist
364        let _dummy = 1 + 1;
365        assert_eq!(_dummy, 2);
366    }
367
368    #[test]
369    fn test_string_buffer_parsing() {
370        let buffer = b"hello\0world\0test\0";
371        let result = TypeTreeParser::get_string_from_buffer(buffer, 0).unwrap();
372        assert_eq!(result, "hello");
373
374        let result = TypeTreeParser::get_string_from_buffer(buffer, 6).unwrap();
375        assert_eq!(result, "world");
376
377        let result = TypeTreeParser::get_string_from_buffer(buffer, 12).unwrap();
378        assert_eq!(result, "test");
379    }
380
381    #[test]
382    fn test_common_string_flag_resolves_known_offsets() {
383        const COMMON_STRING_FLAG: u32 = 0x8000_0000;
384
385        let local = b"ignored\0";
386
387        // offset 0 in the common string buffer maps to "AABB"
388        let result = TypeTreeParser::resolve_string(local, COMMON_STRING_FLAG).unwrap();
389        assert_eq!(result, "AABB");
390
391        // An unknown common-string offset should not error, but should resolve to empty.
392        let result = TypeTreeParser::resolve_string(local, COMMON_STRING_FLAG | 123_456).unwrap();
393        assert_eq!(result, "");
394    }
395
396    #[test]
397    fn test_blob_typetree_parsing_resolves_common_strings() {
398        const COMMON_STRING_FLAG: u32 = 0x8000_0000;
399
400        let mut data = Vec::new();
401        data.extend_from_slice(&(1i32).to_le_bytes()); // node_count
402        data.extend_from_slice(&(0i32).to_le_bytes()); // string_buffer_size
403
404        // TypeTreeNode (blob)
405        data.extend_from_slice(&(1u16).to_le_bytes()); // version
406        data.push(0u8); // level
407        data.push(0u8); // type_flags
408        data.extend_from_slice(&COMMON_STRING_FLAG.to_le_bytes()); // type_str_offset => "AABB"
409        data.extend_from_slice(&COMMON_STRING_FLAG.to_le_bytes()); // name_str_offset => "AABB"
410        data.extend_from_slice(&(0i32).to_le_bytes()); // byte_size
411        data.extend_from_slice(&(0i32).to_le_bytes()); // index
412        data.extend_from_slice(&(0i32).to_le_bytes()); // meta_flags
413        data.extend_from_slice(&(0u64).to_le_bytes()); // ref_type_hash (version >= 19)
414
415        let mut reader = BinaryReader::new(&data, ByteOrder::Little);
416        let tree = TypeTreeParser::from_reader_blob(&mut reader, 19).unwrap();
417
418        assert_eq!(tree.nodes.len(), 1);
419        assert_eq!(tree.nodes[0].type_name, "AABB");
420        assert_eq!(tree.nodes[0].name, "AABB");
421    }
422}