Skip to main content

unity_assetdb/
asset.rs

1//! Parser for Unity asset YAML files (`.asset`, `.prefab`, `.controller`, …).
2//!
3//! Produces:
4//! - `top_class_id` — first `--- !u!<classID> &<fileID>` doc header.
5//! - `script_guid` — `m_Script.guid` of the top doc when class is MonoBehaviour.
6//! - `sub_assets` — every `--- !u!<id> &<fileID>` after the first, paired
7//!   with that doc's `m_Name`.
8//!
9//! Stays line-oriented; faster and lighter than full YAML parsing for this
10//! narrow shape. See [[json-schema.md]] for what each field means in
11//! Unity's emitted YAML.
12
13use anyhow::Result;
14
15#[derive(Debug, Clone, Default)]
16pub struct AssetInfo {
17    /// First doc's class ID. None on a malformed/empty asset.
18    pub top_class_id: Option<u32>,
19    /// First doc's fileID (the `&NNN` after the class ID).
20    pub top_file_id: Option<i64>,
21    /// `m_Script.guid` for the top doc when it's MonoBehaviour-class (114).
22    pub script_guid: Option<u128>,
23    /// Sub-asset docs after the first. `(class_id, file_id, m_Name)`.
24    /// `m_Name` is empty when the sub-doc has none — caller decides how to handle.
25    pub sub_assets: Vec<SubAssetEntry>,
26}
27
28#[derive(Debug, Clone)]
29pub struct SubAssetEntry {
30    pub class_id: u32,
31    pub file_id: i64,
32    pub name: String,
33}
34
35/// What to capture from the asset.
36#[derive(Debug, Clone, Copy, PartialEq, Eq)]
37pub enum ParseMode {
38    /// First doc only — top class ID + (if MonoBehaviour) m_Script.guid.
39    /// Bails out as soon as it sees the second `---` doc header. Use for
40    /// types that don't expose addressable sub-assets (`.prefab`,
41    /// `.controller`, `.mat`, `.anim`, `.mask`, `.unity`).
42    TopOnly,
43    /// Full multi-doc scan: top doc + every sub-doc's `(class_id, fileID, m_Name)`.
44    /// Use for types that legitimately host sub-assets (`.asset`, `.spriteatlas`).
45    WithSubAssets,
46}
47
48/// Parse the YAML text. We only walk doc headers and a couple of well-known
49/// keys per doc; full parsing is overkill for this shape.
50pub fn parse(text: &str, mode: ParseMode) -> Result<AssetInfo> {
51    let mut info = AssetInfo::default();
52
53    // Doc structure: `--- !u!<id> &<fileID> [stripped]` opens a doc; lines
54    // after the next non-doc-header line and before the following `---` are
55    // its body. We collect (class, file_id, name, script_guid) per doc.
56    struct DocAccum {
57        class_id: u32,
58        file_id: i64,
59        name: Option<String>,
60        script_guid: Option<u128>,
61    }
62
63    let mut doc_idx: usize = 0;
64    let mut cur: Option<DocAccum> = None;
65
66    let flush = |info: &mut AssetInfo, doc_idx: usize, d: DocAccum| {
67        if doc_idx == 0 {
68            info.top_class_id = Some(d.class_id);
69            info.top_file_id = Some(d.file_id);
70            info.script_guid = d.script_guid;
71        } else {
72            // class_id is propagated through to `SubAsset` in store.rs —
73            // critical for prefab-embedded sub-docs whose hashed negative
74            // fileID can't be reverse-derived to a class via the
75            // `file_id = class * 100_000` heuristic.
76            info.sub_assets.push(SubAssetEntry {
77                class_id: d.class_id,
78                file_id: d.file_id,
79                name: d.name.unwrap_or_default(),
80            });
81        }
82    };
83
84    for line in text.lines() {
85        if let Some((cls, fid)) = parse_doc_header(line) {
86            if let Some(d) = cur.take() {
87                flush(&mut info, doc_idx, d);
88                doc_idx += 1;
89                // TopOnly: stop the moment we've finished the first doc.
90                if mode == ParseMode::TopOnly {
91                    return Ok(info);
92                }
93            }
94            cur = Some(DocAccum {
95                class_id: cls,
96                file_id: fid,
97                name: None,
98                script_guid: None,
99            });
100            continue;
101        }
102
103        let Some(d) = cur.as_mut() else { continue };
104
105        let trimmed = line.trim_start();
106        if let Some(rest) = trimmed.strip_prefix("m_Name:") {
107            if d.name.is_none() {
108                let s = rest.trim();
109                if !s.is_empty() {
110                    d.name = Some(s.to_string());
111                }
112            }
113        } else if d.script_guid.is_none()
114            && let Some(rest) = trimmed.strip_prefix("m_Script:")
115        {
116            // `m_Script: {fileID: …, guid: <hex32>, type: 3}` on one line
117            d.script_guid = parse_inline_guid(rest);
118        }
119    }
120    if let Some(d) = cur.take() {
121        flush(&mut info, doc_idx, d);
122    }
123    Ok(info)
124}
125
126/// Match `--- !u!<id> &<fileID>` (with optional ` stripped` suffix).
127/// Returns `(class_id, file_id)` or None.
128fn parse_doc_header(line: &str) -> Option<(u32, i64)> {
129    let rest = line.strip_prefix("--- !u!")?;
130    let (cls_str, after) = rest.split_once(" &")?;
131    let cls: u32 = cls_str.trim().parse().ok()?;
132    let fid_str = after.split_whitespace().next()?;
133    let fid: i64 = fid_str.parse().ok()?;
134    Some((cls, fid))
135}
136
137/// Pull `guid: <hex32>` out of `{fileID: …, guid: ABC…, type: …}`.
138fn parse_inline_guid(rest: &str) -> Option<u128> {
139    let s = rest.trim();
140    let s = s.trim_start_matches('{').trim_end_matches('}');
141    for part in s.split(',') {
142        let part = part.trim();
143        if let Some(hex) = part.strip_prefix("guid:") {
144            let hex = hex.trim();
145            if hex.len() == 32 {
146                return u128::from_str_radix(hex, 16).ok();
147            }
148        }
149    }
150    None
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156
157    #[test]
158    fn parses_top_only() {
159        let text = "%YAML 1.1
160%TAG !u! tag:unity3d.com,2011:
161--- !u!1001 &100100000
162PrefabInstance:
163  m_ObjectHideFlags: 0
164";
165        let info = parse(text, ParseMode::WithSubAssets).unwrap();
166        assert_eq!(info.top_class_id, Some(1001));
167        assert_eq!(info.top_file_id, Some(100100000));
168        assert!(info.sub_assets.is_empty());
169    }
170
171    #[test]
172    fn parses_monobehaviour_with_script_guid() {
173        let text = "--- !u!114 &11400000
174MonoBehaviour:
175  m_ObjectHideFlags: 0
176  m_Script: {fileID: 11500000, guid: 7d602c2080b53413fa393df6b2c0af43, type: 3}
177  m_Name: TweenSeqDef
178";
179        let info = parse(text, ParseMode::WithSubAssets).unwrap();
180        assert_eq!(info.top_class_id, Some(114));
181        assert_eq!(
182            info.script_guid,
183            Some(0x7d602c2080b53413fa393df6b2c0af43_u128)
184        );
185    }
186
187    #[test]
188    fn top_only_skips_sub_docs() {
189        // Same multi-doc input as `parses_sub_assets` but in TopOnly mode —
190        // sub-asset list must be empty.
191        let text = "--- !u!28 &2800000
192Texture2D:
193  m_Name: Sheet
194--- !u!213 &21300000
195Sprite:
196  m_Name: spr_a
197";
198        let info = parse(text, ParseMode::TopOnly).unwrap();
199        assert_eq!(info.top_class_id, Some(28));
200        assert!(info.sub_assets.is_empty());
201    }
202
203    #[test]
204    fn parses_sub_assets() {
205        let text = "--- !u!28 &2800000
206Texture2D:
207  m_Name: Sheet
208--- !u!213 &21300000
209Sprite:
210  m_Name: spr_a
211--- !u!213 &21300002
212Sprite:
213  m_Name: spr_b
214";
215        let info = parse(text, ParseMode::WithSubAssets).unwrap();
216        assert_eq!(info.top_class_id, Some(28));
217        assert_eq!(info.sub_assets.len(), 2);
218        assert_eq!(info.sub_assets[0].file_id, 21300000);
219        assert_eq!(info.sub_assets[0].name, "spr_a");
220        assert_eq!(info.sub_assets[1].name, "spr_b");
221    }
222
223    /// `asset::parse` is class-blind: every named sub-doc surfaces
224    /// regardless of class. The extension-aware filter that drops
225    /// GO-tree structural docs from prefabs lives in `bake::process_one`
226    /// — pinning that here keeps the parser layer's contract clear.
227    #[test]
228    fn parses_keeps_all_named_subdocs_regardless_of_class() {
229        let text = "--- !u!114 &11400000
230MonoBehaviour:
231  m_Name: TimelineAsset
232--- !u!114 &-7938135556022269506
233MonoBehaviour:
234  m_Name: 'Animation Track (1)'
235--- !u!1 &111111
236GameObject:
237  m_Name: '@SomeGo'
238--- !u!74 &-444444
239AnimationClip:
240  m_Name: EmbeddedClip
241";
242        let info = parse(text, ParseMode::WithSubAssets).unwrap();
243        // 3 named sub-docs (the class-114 top doc is the parent, not a sub).
244        // The line-oriented parser preserves YAML quote literals — Unity's
245        // typical output uses single-quoted strings for names with special
246        // chars; the sanitize / strip-quote pass happens downstream.
247        assert_eq!(info.sub_assets.len(), 3);
248        assert_eq!(info.sub_assets[0].name, "'Animation Track (1)'");
249        assert_eq!(info.sub_assets[1].name, "'@SomeGo'");
250        assert_eq!(info.sub_assets[2].name, "EmbeddedClip");
251    }
252}