unity_asset_binary/
webfile.rs

1//! Unity WebFile parsing
2//!
3//! WebFiles are Unity's web-optimized format that can contain other files
4//! and may be compressed with gzip or brotli.
5
6use crate::bundle::{AssetBundle, BundleFileInfo};
7use crate::compression::{decompress_brotli, decompress_gzip};
8use crate::data_view::DataView;
9use crate::error::{BinaryError, Result};
10use crate::reader::{BinaryReader, ByteOrder};
11use crate::shared_bytes::SharedBytes;
12use std::ops::Range;
13use std::sync::Arc;
14
15/// Magic bytes for different compression formats
16const GZIP_MAGIC: &[u8] = &[0x1f, 0x8b];
17const BROTLI_MAGIC: &[u8] = &[0xce, 0xb2, 0xcf, 0x81, 0x13, 0x00];
18
19/// Compression type used in WebFile
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum WebFileCompression {
22    None,
23    Gzip,
24    Brotli,
25}
26
27/// A Unity WebFile that can contain other files
28#[derive(Debug)]
29pub struct WebFile {
30    /// Signature (e.g., "UnityWebData1.0")
31    pub signature: String,
32    /// Compression type used
33    pub compression: WebFileCompression,
34    /// Files contained in this WebFile
35    pub files: Vec<BundleFileInfo>,
36    /// Raw decompressed data
37    data: DataView,
38}
39
40impl WebFile {
41    /// Parse a WebFile from binary data
42    pub fn from_bytes(data: Vec<u8>) -> Result<Self> {
43        let shared = SharedBytes::from_vec(data);
44        let len = shared.len();
45        Self::from_shared_range(shared, 0..len)
46    }
47
48    pub fn from_shared_range(data: SharedBytes, range: Range<usize>) -> Result<Self> {
49        let view = DataView::from_shared_range(data, range)?;
50        Self::from_view(view)
51    }
52
53    fn from_view(view: DataView) -> Result<Self> {
54        let mut reader = BinaryReader::new(view.as_bytes(), ByteOrder::Little);
55
56        // Detect compression type
57        let compression = Self::detect_compression(&mut reader)?;
58
59        // Decompress if necessary
60        let decompressed_data: DataView = match compression {
61            WebFileCompression::None => view,
62            WebFileCompression::Gzip => {
63                DataView::from_shared(SharedBytes::from_vec(decompress_gzip(view.as_bytes())?))
64            }
65            WebFileCompression::Brotli => {
66                DataView::from_shared(SharedBytes::from_vec(decompress_brotli(view.as_bytes())?))
67            }
68        };
69
70        // Create reader for decompressed data
71        let mut reader = BinaryReader::new(decompressed_data.as_bytes(), ByteOrder::Little);
72
73        // Read signature
74        let signature = reader.read_cstring()?;
75        if !signature.starts_with("UnityWebData") && !signature.starts_with("TuanjieWebData") {
76            return Err(BinaryError::invalid_signature(
77                "UnityWebData or TuanjieWebData",
78                &signature,
79            ));
80        }
81
82        // Read header length
83        let head_length_i32 = reader.read_i32()?;
84        if head_length_i32 < 0 {
85            return Err(BinaryError::invalid_data(format!(
86                "Negative WebFile head_length: {}",
87                head_length_i32
88            )));
89        }
90        let head_length = head_length_i32 as usize;
91        let total_len = decompressed_data.len();
92        if head_length > total_len {
93            return Err(BinaryError::invalid_data(format!(
94                "WebFile head_length {} exceeds data len {}",
95                head_length, total_len
96            )));
97        }
98        if head_length < reader.position() as usize {
99            return Err(BinaryError::invalid_data(format!(
100                "WebFile head_length {} precedes current position {}",
101                head_length,
102                reader.position()
103            )));
104        }
105
106        // Read file entries
107        let mut files = Vec::new();
108        while reader.position() < head_length as u64 {
109            let offset_i32 = reader.read_i32()?;
110            let length_i32 = reader.read_i32()?;
111            let path_len_i32 = reader.read_i32()?;
112
113            if offset_i32 < 0 || length_i32 < 0 || path_len_i32 < 0 {
114                return Err(BinaryError::invalid_data(format!(
115                    "Negative WebFile entry values: offset={} length={} path_len={}",
116                    offset_i32, length_i32, path_len_i32
117                )));
118            }
119
120            let offset = offset_i32 as u64;
121            let length = length_i32 as u64;
122            let path_length = path_len_i32 as usize;
123            if path_length > 16 * 1024 {
124                return Err(BinaryError::ResourceLimitExceeded(format!(
125                    "WebFile entry name too large: {}",
126                    path_length
127                )));
128            }
129            let name_bytes = reader.read_bytes(path_length)?;
130            let name = String::from_utf8(name_bytes).map_err(|e| {
131                BinaryError::invalid_data(format!("Invalid UTF-8 in file name: {}", e))
132            })?;
133
134            files.push(BundleFileInfo {
135                name,
136                offset,
137                size: length,
138            });
139        }
140
141        Ok(WebFile {
142            signature,
143            compression,
144            files,
145            data: decompressed_data,
146        })
147    }
148
149    /// Detect compression type from file header
150    fn detect_compression(reader: &mut BinaryReader) -> Result<WebFileCompression> {
151        // Check for GZIP magic
152        let magic = reader.read_bytes(2)?;
153        reader.set_position(0)?; // Reset position
154
155        if magic == GZIP_MAGIC {
156            return Ok(WebFileCompression::Gzip);
157        }
158
159        // Check for Brotli magic at offset 0x20
160        reader.set_position(0x20)?;
161        let magic = reader.read_bytes(6)?;
162        reader.set_position(0)?; // Reset position
163
164        if magic == BROTLI_MAGIC {
165            return Ok(WebFileCompression::Brotli);
166        }
167
168        Ok(WebFileCompression::None)
169    }
170
171    /// Get the files contained in this WebFile
172    pub fn files(&self) -> &[BundleFileInfo] {
173        &self.files
174    }
175
176    pub fn data_shared(&self) -> SharedBytes {
177        self.data.backing_shared()
178    }
179
180    pub fn data_arc(&self) -> Arc<[u8]> {
181        match self.data.backing_shared() {
182            SharedBytes::Arc(v) => v,
183            #[cfg(feature = "mmap")]
184            SharedBytes::Mmap(v) => Arc::<[u8]>::from(v.as_ref().as_ref()),
185        }
186    }
187
188    /// Extract a specific file by name
189    pub fn extract_file(&self, name: &str) -> Result<Vec<u8>> {
190        Ok(self.extract_file_slice(name)?.to_vec())
191    }
192
193    pub fn extract_file_slice(&self, name: &str) -> Result<&[u8]> {
194        let file_info = self
195            .files
196            .iter()
197            .find(|f| f.name == name)
198            .ok_or_else(|| BinaryError::invalid_data(format!("File not found: {}", name)))?;
199
200        let start = file_info.offset as usize;
201        let end = start + file_info.size as usize;
202
203        let bytes = self.data.as_bytes();
204        if end > bytes.len() {
205            return Err(BinaryError::invalid_data(format!(
206                "File {} extends beyond data bounds: {} > {}",
207                name,
208                end,
209                bytes.len()
210            )));
211        }
212
213        Ok(&bytes[start..end])
214    }
215
216    pub fn extract_file_view(&self, name: &str) -> Result<DataView> {
217        let file_info = self
218            .files
219            .iter()
220            .find(|f| f.name == name)
221            .ok_or_else(|| BinaryError::invalid_data(format!("File not found: {}", name)))?;
222
223        let start = file_info.offset as usize;
224        let end = start + file_info.size as usize;
225        let base = self.data.base_offset();
226        DataView::from_shared_range(self.data.backing_shared(), (base + start)..(base + end))
227    }
228
229    /// Try to parse contained files as AssetBundles
230    pub fn parse_bundles(&self) -> Result<Vec<AssetBundle>> {
231        let mut bundles = Vec::new();
232
233        for file_info in &self.files {
234            if let Ok(view) = self.extract_file_view(&file_info.name)
235                && let Ok(bundle) = crate::bundle::BundleParser::from_shared_range(
236                    view.backing_shared(),
237                    view.absolute_range(),
238                )
239            {
240                bundles.push(bundle);
241            }
242        }
243
244        Ok(bundles)
245    }
246}
247
248#[cfg(test)]
249mod tests {
250    use super::*;
251
252    #[test]
253    fn test_compression_detection() {
254        // Test GZIP magic detection
255        let gzip_data = [0x1f, 0x8b, 0x08, 0x00];
256        let mut reader = BinaryReader::new(&gzip_data, ByteOrder::Little);
257        let compression = WebFile::detect_compression(&mut reader).unwrap();
258        assert_eq!(compression, WebFileCompression::Gzip);
259    }
260
261    #[test]
262    fn test_webfile_creation() {
263        // Test basic WebFile structure creation
264        let data = DataView::from_shared(SharedBytes::from_vec(Vec::<u8>::new()));
265        let webfile = WebFile {
266            signature: "UnityWebData1.0".to_string(),
267            compression: WebFileCompression::None,
268            files: Vec::new(),
269            data,
270        };
271
272        assert_eq!(webfile.signature, "UnityWebData1.0");
273        assert_eq!(webfile.compression, WebFileCompression::None);
274        assert!(webfile.files().is_empty());
275    }
276}