1use super::header::SerializedFileHeader;
6use super::types::{
7 FileIdentifier, LocalSerializedObjectIdentifier, ObjectInfo, SerializedType, TypeRegistry,
8};
9use crate::data_view::DataView;
10use crate::error::{BinaryError, Result};
11use crate::object::ObjectHandle;
12use crate::reader::{BinaryReader, ByteOrder};
13use crate::shared_bytes::SharedBytes;
14use crate::typetree::TypeTreeRegistry;
15use std::collections::HashMap;
16use std::ops::Range;
17use std::sync::Arc;
18use std::sync::OnceLock;
19
20pub struct SerializedFileParser;
25
26impl SerializedFileParser {
27 pub fn from_bytes(data: Vec<u8>) -> Result<SerializedFile> {
29 Self::from_bytes_with_options(data, false)
31 }
32
33 pub fn from_bytes_with_options(
35 data: Vec<u8>,
36 preload_object_data: bool,
37 ) -> Result<SerializedFile> {
38 let shared = SharedBytes::from_vec(data);
39 let len = shared.len();
40 Self::from_shared_range_with_options(shared, 0..len, preload_object_data)
41 }
42
43 pub fn from_shared_range(data: SharedBytes, range: Range<usize>) -> Result<SerializedFile> {
45 Self::from_shared_range_with_options(data, range, false)
46 }
47
48 pub fn from_shared_range_with_options(
50 data: SharedBytes,
51 range: Range<usize>,
52 preload_object_data: bool,
53 ) -> Result<SerializedFile> {
54 let view = DataView::from_shared_range(data, range)?;
55 Self::from_view_with_options(view, preload_object_data)
56 }
57
58 fn from_view_with_options(view: DataView, preload_object_data: bool) -> Result<SerializedFile> {
59 let mut file = SerializedFile {
60 header: SerializedFileHeader::default(),
61 unity_version: String::new(),
62 target_platform: 0,
63 enable_type_tree: false,
64 type_tree_registry: None,
65 types: Vec::new(),
66 big_id_enabled: false,
67 objects: Vec::new(),
68 script_types: Vec::new(),
69 externals: Vec::new(),
70 ref_types: Vec::new(),
71 user_information: String::new(),
72 data: view,
73 object_index_by_path_id: OnceLock::new(),
74 };
75
76 {
77 let backing = file.data.backing_shared();
78 let start = file.data.base_offset();
79 let len = file.data.len();
80 let bytes = &backing.as_bytes()[start..start + len];
81 let mut reader = BinaryReader::new(bytes, ByteOrder::Big);
82
83 file.header = SerializedFileHeader::from_reader(&mut reader)?;
85
86 if !file.header.is_valid() {
87 return Err(BinaryError::invalid_data("Invalid SerializedFile header"));
88 }
89
90 reader.set_byte_order(file.header.byte_order());
92
93 Self::parse_metadata(&mut file, &mut reader)?;
95 }
96
97 if preload_object_data {
98 file.load_object_data()?;
99 }
100
101 Ok(file)
102 }
103
104 #[cfg(feature = "async")]
106 pub async fn from_bytes_async(data: Vec<u8>) -> Result<SerializedFile> {
107 Self::from_bytes_async_with_options(data, false).await
108 }
109
110 #[cfg(feature = "async")]
112 pub async fn from_bytes_async_with_options(
113 data: Vec<u8>,
114 preload_object_data: bool,
115 ) -> Result<SerializedFile> {
116 let result = tokio::task::spawn_blocking(move || {
118 Self::from_bytes_with_options(data, preload_object_data)
119 })
120 .await
121 .map_err(|e| BinaryError::generic(format!("Task join error: {}", e)))??;
122
123 Ok(result)
124 }
125
126 fn parse_metadata(file: &mut SerializedFile, reader: &mut BinaryReader) -> Result<()> {
128 if file.header.version >= 7 {
130 file.unity_version = reader.read_cstring()?;
131 }
132
133 if file.header.version >= 8 {
135 file.target_platform = reader.read_i32()?;
136 }
137
138 if file.header.version >= 13 {
140 file.enable_type_tree = reader.read_bool()?;
141 }
142
143 let type_count = reader.read_i32()?;
145 if type_count < 0 {
146 return Err(BinaryError::invalid_data(format!(
147 "Negative type count: {}",
148 type_count
149 )));
150 }
151 let type_count = type_count as usize;
152 for _ in 0..type_count {
153 let serialized_type = SerializedType::from_reader(
154 reader,
155 file.header.version,
156 file.enable_type_tree,
157 false,
158 )?;
159 file.types.push(serialized_type);
160 }
161
162 if file.header.version >= 7 && file.header.version < 14 {
164 file.big_id_enabled = reader.read_i32()? != 0;
165 }
166
167 let object_count = reader.read_i32()?;
169 if object_count < 0 {
170 return Err(BinaryError::invalid_data(format!(
171 "Negative object count: {}",
172 object_count
173 )));
174 }
175 let object_count = object_count as usize;
176 for _ in 0..object_count {
177 let object_info = Self::parse_object_info(file, reader)?;
178 file.objects.push(object_info);
179 }
180
181 if file.header.version >= 11 {
183 let script_count = reader.read_i32()?;
184 if script_count < 0 {
185 return Err(BinaryError::invalid_data(format!(
186 "Negative script count: {}",
187 script_count
188 )));
189 }
190 let script_count = script_count as usize;
191 for _ in 0..script_count {
192 let script_type =
193 LocalSerializedObjectIdentifier::from_reader(reader, file.header.version)?;
194 file.script_types.push(script_type);
195 }
196 }
197
198 let external_count = reader.read_i32()?;
200 if external_count < 0 {
201 return Err(BinaryError::invalid_data(format!(
202 "Negative external count: {}",
203 external_count
204 )));
205 }
206 let external_count = external_count as usize;
207 for _ in 0..external_count {
208 let external = FileIdentifier::from_reader(reader, file.header.version)?;
209 file.externals.push(external);
210 }
211
212 if file.header.version >= 20 {
214 let ref_type_count = reader.read_i32()?;
215 if ref_type_count < 0 {
216 return Err(BinaryError::invalid_data(format!(
217 "Negative ref type count: {}",
218 ref_type_count
219 )));
220 }
221 let ref_type_count = ref_type_count as usize;
222 for _ in 0..ref_type_count {
223 let ref_type = SerializedType::from_reader(
224 reader,
225 file.header.version,
226 file.enable_type_tree,
227 true,
228 )?;
229 file.ref_types.push(ref_type);
230 }
231 }
232
233 if file.header.version >= 5 {
235 file.user_information = reader.read_cstring()?;
236 }
237
238 Ok(())
239 }
240
241 fn parse_object_info(
243 file: &mut SerializedFile,
244 reader: &mut BinaryReader,
245 ) -> Result<ObjectInfo> {
246 let version = file.header.version;
247
248 let path_id = if file.big_id_enabled {
250 reader.read_i64()?
251 } else if version < 14 {
252 reader.read_i32()? as i64
253 } else {
254 reader.align()?;
255 reader.read_i64()?
256 };
257
258 let byte_start = if version >= 22 {
260 i64_to_u64_checked(reader.read_i64()?, "object.byte_start")?
261 } else {
262 reader.read_u32()? as u64
263 };
264 let byte_start = byte_start
265 .checked_add(file.header.data_offset)
266 .ok_or_else(|| BinaryError::invalid_data("Object byte_start overflow"))?;
267
268 let byte_size = reader.read_u32()?;
270
271 let raw_type_id = reader.read_i32()?;
273
274 let (class_id, type_index) = if version < 16 {
276 let class_id = reader.read_u16()? as i32;
277 (class_id, -1)
278 } else {
279 let idx = raw_type_id;
280 let class_id = file
281 .types
282 .get(idx as usize)
283 .ok_or_else(|| {
284 BinaryError::invalid_data(format!(
285 "Invalid type index in object table: {}",
286 idx
287 ))
288 })?
289 .class_id;
290 (class_id, idx)
291 };
292
293 if version < 11 {
295 let _is_destroyed = reader.read_u16()?;
296 }
297
298 if (11..17).contains(&version) {
300 let script_type_index = reader.read_i16()?;
301 if version < 16 {
303 if let Some(typ) = file.types.iter_mut().find(|t| t.class_id == raw_type_id) {
304 typ.script_type_index = script_type_index;
305 }
306 } else if raw_type_id >= 0
307 && let Some(typ) = file.types.get_mut(raw_type_id as usize)
308 {
309 typ.script_type_index = script_type_index;
310 }
311 }
312
313 if version == 15 || version == 16 {
315 let _stripped = reader.read_u8()?;
316 }
317
318 Ok(ObjectInfo::new(
319 path_id, byte_start, byte_size, class_id, type_index,
320 ))
321 }
322
323 pub fn validate(file: &SerializedFile) -> Result<()> {
325 file.header.validate()?;
327
328 for (i, obj) in file.objects.iter().enumerate() {
330 obj.validate().map_err(|e| {
331 BinaryError::generic(format!("Object {} validation failed: {}", i, e))
332 })?;
333 }
334
335 for (i, stype) in file.types.iter().enumerate() {
337 stype.validate().map_err(|e| {
338 BinaryError::generic(format!("Type {} validation failed: {}", i, e))
339 })?;
340 }
341
342 Ok(())
343 }
344
345 pub fn get_parsing_stats(file: &SerializedFile) -> ParsingStats {
347 ParsingStats {
348 version: file.header.version,
349 unity_version: file.unity_version.clone(),
350 target_platform: file.target_platform,
351 file_size: file.header.file_size,
352 object_count: file.objects.len(),
353 type_count: file.types.len(),
354 script_type_count: file.script_types.len(),
355 external_count: file.externals.len(),
356 has_type_tree: file.enable_type_tree,
357 big_id_enabled: file.big_id_enabled,
358 }
359 }
360}
361
362#[derive(Debug)]
367pub struct SerializedFile {
368 pub header: SerializedFileHeader,
370 pub unity_version: String,
372 pub target_platform: i32,
374 pub enable_type_tree: bool,
376 pub type_tree_registry: Option<Arc<dyn TypeTreeRegistry>>,
378 pub types: Vec<SerializedType>,
380 pub big_id_enabled: bool,
382 pub objects: Vec<ObjectInfo>,
384 pub script_types: Vec<LocalSerializedObjectIdentifier>,
386 pub externals: Vec<FileIdentifier>,
388 pub ref_types: Vec<SerializedType>,
390 pub user_information: String,
392 data: DataView,
394 object_index_by_path_id: OnceLock<HashMap<i64, usize>>,
395}
396
397impl SerializedFile {
398 pub fn set_type_tree_registry(&mut self, registry: Option<Arc<dyn TypeTreeRegistry>>) {
399 self.type_tree_registry = registry;
400 }
401
402 pub fn data(&self) -> &[u8] {
404 self.data.as_bytes()
405 }
406
407 pub fn data_shared(&self) -> SharedBytes {
409 self.data.backing_shared()
410 }
411
412 pub fn data_arc(&self) -> Arc<[u8]> {
420 match self.data.backing_shared() {
421 SharedBytes::Arc(v) => v,
422 #[cfg(feature = "mmap")]
423 SharedBytes::Mmap(v) => Arc::<[u8]>::from(v.as_ref().as_ref()),
424 }
425 }
426
427 pub fn data_base_offset(&self) -> usize {
429 self.data.base_offset()
430 }
431
432 pub fn data_identity_key(&self) -> (usize, usize, usize) {
434 self.data.identity_key()
435 }
436
437 pub fn object_bytes<'a>(&'a self, info: &ObjectInfo) -> Result<&'a [u8]> {
439 let start: usize = info.byte_start.try_into().map_err(|_| {
440 BinaryError::invalid_data(format!("Object byte_start overflow: {}", info.byte_start))
441 })?;
442 let end = start.saturating_add(info.byte_size as usize);
443 let data = self.data();
444 if end > data.len() {
445 return Err(BinaryError::invalid_data(format!(
446 "Object data out of bounds (path_id={}, start={}, size={}, file_len={})",
447 info.path_id,
448 start,
449 info.byte_size,
450 data.len()
451 )));
452 }
453 Ok(&data[start..end])
454 }
455
456 pub fn assetbundle_container_raw(&self, info: &ObjectInfo) -> Result<Vec<(String, i32, i64)>> {
463 let data = self.object_bytes(info)?;
464 let byte_order = self.header.byte_order();
465
466 fn parse_pptr(reader: &mut BinaryReader) -> Result<(i32, i64)> {
467 let file_id = reader.read_i32()?;
468 let path_id = reader.read_i64()?;
469 Ok((file_id, path_id))
470 }
471
472 fn parse_aligned_string(reader: &mut BinaryReader) -> Result<String> {
473 let s = reader.read_string()?;
474 reader.align()?;
475 Ok(s)
476 }
477
478 fn try_parse(
479 reader: &mut BinaryReader,
480 assetinfo_layout: bool,
481 assetinfo_asset_last: bool,
482 ) -> Result<Vec<(String, i32, i64)>> {
483 let _name = parse_aligned_string(reader)?;
486
487 let preload_size = reader.read_i32()?;
489 if !(0..=1_000_000).contains(&preload_size) {
490 return Err(BinaryError::invalid_data(format!(
491 "Invalid AssetBundle preload table size: {}",
492 preload_size
493 )));
494 }
495 for _ in 0..preload_size {
496 let _ = parse_pptr(reader)?;
497 }
498 reader.align()?;
499
500 let container_size = reader.read_i32()?;
502 if !(0..=1_000_000).contains(&container_size) {
503 return Err(BinaryError::invalid_data(format!(
504 "Invalid AssetBundle container size: {}",
505 container_size
506 )));
507 }
508
509 let mut out = Vec::with_capacity(container_size as usize);
510 for _ in 0..container_size {
511 let asset_path = parse_aligned_string(reader)?;
512
513 let (file_id, path_id) = if assetinfo_layout {
517 if assetinfo_asset_last {
518 let _preload_index = reader.read_i32()?;
519 let _preload_size = reader.read_i32()?;
520 parse_pptr(reader)?
521 } else {
522 let pptr = parse_pptr(reader)?;
523 let _preload_index = reader.read_i32()?;
524 let _preload_size = reader.read_i32()?;
525 pptr
526 }
527 } else {
528 parse_pptr(reader)?
529 };
530
531 out.push((asset_path, file_id, path_id));
532 }
533 reader.align()?;
534
535 if assetinfo_layout {
537 if assetinfo_asset_last {
538 let _preload_index = reader.read_i32()?;
539 let _preload_size = reader.read_i32()?;
540 let _ = parse_pptr(reader)?;
541 } else {
542 let _ = parse_pptr(reader)?;
543 let _preload_index = reader.read_i32()?;
544 let _preload_size = reader.read_i32()?;
545 }
546 } else {
547 let _ = parse_pptr(reader)?;
548 }
549 reader.align()?;
550
551 Ok(out)
552 }
553
554 let mut last_err: Option<BinaryError> = None;
556 let externals_len: i32 = self.externals.len().try_into().unwrap_or(i32::MAX);
557 type ExternalRefCandidate = (String, i32, i64);
558 type BestCandidate = (usize, Vec<ExternalRefCandidate>);
559 let mut best: Option<BestCandidate> = None;
560 let score = |entries: &[ExternalRefCandidate]| -> usize {
561 entries
562 .iter()
563 .filter(|(path, file_id, path_id)| {
564 if path.is_empty() || *path_id == 0 || *file_id < 0 {
565 return false;
566 }
567
568 if *file_id == 0 {
573 self.find_object(*path_id).is_some()
575 } else {
576 (*file_id - 1) < externals_len
578 }
579 })
580 .count()
581 };
582
583 for offset in (0..=256usize).step_by(4) {
584 if offset >= data.len() {
585 break;
586 }
587
588 for assetinfo_layout in [true, false] {
590 let variants: &[(bool, bool)] = if assetinfo_layout {
591 &[(true, false), (true, true)]
593 } else {
594 &[(false, false)]
595 };
596
597 for &(_layout, asset_last) in variants {
598 let mut reader = BinaryReader::new(&data[offset..], byte_order);
599 match try_parse(&mut reader, assetinfo_layout, asset_last) {
600 Ok(entries) => {
601 let s = score(&entries);
602 let better = match &best {
603 None => true,
604 Some((best_score, best_entries)) => {
605 s > *best_score
606 || (s == *best_score && entries.len() > best_entries.len())
607 }
608 };
609 if better {
610 best = Some((s, entries));
611 }
612 }
613 Err(e) => last_err = Some(e),
614 }
615 }
616 }
617 }
618
619 if let Some((_score, entries)) = best {
620 if entries.iter().any(|(p, _, _)| !p.is_empty()) {
622 return Ok(entries);
623 }
624 }
625
626 Err(last_err.unwrap_or_else(|| {
627 BinaryError::invalid_data(
628 "Failed to parse AssetBundle container (no candidates matched)",
629 )
630 }))
631 }
632
633 pub fn object_count(&self) -> usize {
635 self.objects.len()
636 }
637
638 pub fn type_count(&self) -> usize {
640 self.types.len()
641 }
642
643 pub fn find_object(&self, path_id: i64) -> Option<&ObjectInfo> {
645 let index = self.object_index_by_path_id.get_or_init(|| {
646 let mut map = HashMap::with_capacity(self.objects.len());
647 for (idx, obj) in self.objects.iter().enumerate() {
648 map.insert(obj.path_id, idx);
649 }
650 map
651 });
652 index.get(&path_id).and_then(|idx| self.objects.get(*idx))
653 }
654
655 pub fn object_handles(&self) -> impl Iterator<Item = ObjectHandle<'_>> {
657 self.objects
658 .iter()
659 .map(|info| ObjectHandle::new(self, info))
660 }
661
662 pub fn find_object_handle(&self, path_id: i64) -> Option<ObjectHandle<'_>> {
664 self.find_object(path_id)
665 .map(|info| ObjectHandle::new(self, info))
666 }
667
668 pub fn find_type(&self, class_id: i32) -> Option<&SerializedType> {
670 self.types.iter().find(|t| t.class_id == class_id)
671 }
672
673 pub fn objects_of_type(&self, type_id: i32) -> Vec<&ObjectInfo> {
675 self.objects
676 .iter()
677 .filter(|obj| obj.type_id == type_id)
678 .collect()
679 }
680
681 pub fn create_type_registry(&self) -> TypeRegistry {
683 let mut registry = TypeRegistry::new();
684
685 for stype in &self.types {
686 registry.add_type(stype.clone());
687 }
688
689 registry
690 }
691
692 pub fn statistics(&self) -> FileStatistics {
694 FileStatistics {
695 version: self.header.version,
696 unity_version: self.unity_version.clone(),
697 file_size: self.header.file_size,
698 object_count: self.objects.len(),
699 type_count: self.types.len(),
700 script_type_count: self.script_types.len(),
701 external_count: self.externals.len(),
702 has_type_tree: self.enable_type_tree,
703 target_platform: self.target_platform,
704 }
705 }
706
707 pub fn validate(&self) -> Result<()> {
709 SerializedFileParser::validate(self)
710 }
711
712 fn load_object_data(&mut self) -> Result<()> {
713 let backing = self.data.backing_shared();
714 let start = self.data.base_offset();
715 let len = self.data.len();
716 let bytes = &backing.as_bytes()[start..start + len];
717 let file_len = bytes.len();
718 for obj in &mut self.objects {
719 let start: usize = obj.byte_start.try_into().map_err(|_| {
720 BinaryError::invalid_data(format!("Object byte_start overflow: {}", obj.byte_start))
721 })?;
722 let end = start.saturating_add(obj.byte_size as usize);
723 if end > file_len {
724 return Err(BinaryError::invalid_data(format!(
725 "Object data out of bounds (path_id={}, start={}, size={}, file_len={})",
726 obj.path_id, start, obj.byte_size, file_len
727 )));
728 }
729 obj.data = bytes[start..end].to_vec();
730 }
731 Ok(())
732 }
733}
734
735fn i64_to_u64_checked(value: i64, name: &'static str) -> Result<u64> {
736 if value < 0 {
737 return Err(BinaryError::invalid_data(format!(
738 "Invalid {}: negative value {}",
739 name, value
740 )));
741 }
742 Ok(value as u64)
743}
744
745#[derive(Debug, Clone)]
747pub struct ParsingStats {
748 pub version: u32,
749 pub unity_version: String,
750 pub target_platform: i32,
751 pub file_size: u64,
752 pub object_count: usize,
753 pub type_count: usize,
754 pub script_type_count: usize,
755 pub external_count: usize,
756 pub has_type_tree: bool,
757 pub big_id_enabled: bool,
758}
759
760#[derive(Debug, Clone)]
762pub struct FileStatistics {
763 pub version: u32,
764 pub unity_version: String,
765 pub file_size: u64,
766 pub object_count: usize,
767 pub type_count: usize,
768 pub script_type_count: usize,
769 pub external_count: usize,
770 pub has_type_tree: bool,
771 pub target_platform: i32,
772}
773
774#[cfg(test)]
775mod tests {
776 #[test]
777 fn test_parser_creation() {
778 let _dummy = 1 + 1;
781 assert_eq!(_dummy, 2);
782 }
783}