Skip to main content

unity_assetdb/
store.rs

1//! On-disk schemas for the bake pipeline.
2//!
3//! Two files, written side-by-side under the consumer-chosen out-dir
4//! (commonly `<project>/Library/<consumer>/`):
5//!
6//! - `asset-db.bin` — convert artifact. Lean: per-entry guid, asset type,
7//!   name, sub-assets. Sorted by guid for O(log n) binary-search lookup;
8//!   no path/mtime baggage.
9//! - `asset-db.cache.bin` — bake-only cache, gitignored alongside.
10//!   Maps `hint → (mtimes, resolved bake state)` so unchanged assets skip
11//!   re-parsing on subsequent bakes. Downstream consumers never read this.
12//!
13//! Script (MonoBehaviour / ScriptableObject) types are interned in
14//! `script_types` and referenced by index — keeps per-entry payload small
15//! (8 bytes for `AssetType`).
16
17use std::path::{Path, PathBuf};
18
19use anyhow::{Context, Result};
20use bincode::{Decode, Encode};
21
22use crate::class_id::ClassId;
23
24/// Bumped whenever the on-disk schema changes incompatibly.
25/// A version mismatch is a hard fail — the user re-bakes.
26///
27/// History:
28/// - v4: every name in `entries[].name` and `entries[].sub_assets[].name`
29///   resolves to a unique guid (name namespace unified across top-level
30///   and sub-asset rows). Pre-v4 bakes could carry colliding sub-asset
31///   names; readers no longer accept them.
32/// - v5: two changes shipped together.
33///   1. File magic renamed `PSPECADB` → `UADBIN__` and `PSPECABC` →
34///      `UADCACHE` to drop the historical "pspec" prefix.
35///   2. `SubAsset` carries `class_id` so non-canonical sub-asset fileIDs
36///      (prefab-embedded `AnimationClip` with hashed negative fids) keep
37///      their real Unity class instead of a `file_id / 100_000` heuristic
38///      collapsing them to `ScriptableObject`. Top-level entries also
39///      share their alias bucket with same-named entries of a different
40///      `asset_type` — type-aware reverse lookup discriminates at query
41///      time. See [Name collisions](docs/asset-database.md#name-collisions).
42///   Pre-v5 bakes are unreadable; re-bake required after upgrading.
43/// - v6: `.prefab`/`.controller`/`.anim`/`.mixer`/`.playable` sub-asset
44///   rows now exclude the GameObject-tree structural classes (GameObject,
45///   Transform, RectTransform, MonoBehaviour-as-component scoped to
46///   `.prefab` only). Pre-v6 caches carry leaked `'@<name>'` sub-asset
47///   rows for child GOs that would re-emerge on warm bakes; the bump
48///   invalidates them.
49pub const SCHEMA_VERSION: u16 = 6;
50
51/// File magic — first 8 bytes. `b"UADBIN__"`.
52pub const MAGIC: [u8; 8] = *b"UADBIN__";
53
54/// File magic for the bake-only cache file.
55pub const CACHE_MAGIC: [u8; 8] = *b"UADCACHE";
56
57/// Type of a Unity asset.
58///
59/// `Native(classId)` for built-in types (Sprite, Prefab, Texture2D, …).
60/// `Script(idx)` for MonoBehaviour-backed assets — `idx` indexes into
61/// [`AssetDb::script_types`], whose entries are u128 script GUIDs that
62/// match the `guid` field on entries in `types.json`.
63#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Encode, Decode)]
64pub enum AssetType {
65    Native(u32),
66    Script(u32),
67}
68
69impl AssetType {
70    pub fn native(class_id: ClassId) -> Self {
71        Self::Native(class_id as u32)
72    }
73}
74
75/// One sub-object inside an asset that has its own fileID.
76///
77/// Sprite-atlas entries, multi-clip animations, sprite-sheet sub-sprites,
78/// prefab-embedded `AnimationClip` docs. Per-entry list is sorted by
79/// `file_id` for binary-search lookups.
80///
81/// `class_id` is the Unity native classID of the sub-doc (`74` for
82/// `AnimationClip`, `213` for `Sprite`, etc.). Stored explicitly because
83/// prefab-embedded sub-asset fileIDs are hashed (negative or non-multiple-
84/// of-100000) and a `file_id / 100_000` heuristic collapses them to
85/// `ScriptableObject` — the asset DB needs the real class for the
86/// strict-typed-field elision rule consumers apply downstream.
87///
88/// `name` is `Box<str>` rather than `String` — strings here are immutable
89/// once decoded; dropping the capacity field saves 8 bytes per entry.
90#[derive(Debug, Clone, Encode, Decode)]
91pub struct SubAsset {
92    pub file_id: i64,
93    pub class_id: u32,
94    pub name: Box<str>,
95}
96
97/// One top-level Unity asset, as stored in the convert artifact.
98///
99/// `name` is the asset's filename stem (with optional collision suffix).
100/// At convert time it's prefixed with `$` to form a JSON ref (`$Foo`),
101/// but the prefix is purely a JSON encoding convention — never stored.
102/// String fields use `Box<str>` (immutable; saves the 8-byte capacity
103/// field a `String` carries for growability).
104#[derive(Debug, Clone, Encode, Decode)]
105pub struct AssetEntry {
106    pub guid: u128,
107    pub asset_type: AssetType,
108    pub name: Box<str>,
109    pub sub_assets: Vec<SubAsset>,
110    /// Project-root-relative path (`Assets/Foo.prefab`,
111    /// `Packages/com.boxcat.libs/Bar.mixer`). Convert-side uses this so
112    /// `SourcePrefabResolver` can locate base prefabs by guid without
113    /// re-walking the project tree.
114    pub hint: Box<str>,
115}
116
117/// Whole-database envelope. `entries` is sorted by `guid` so convert-time
118/// lookups are `binary_search_by_key`.
119#[derive(Debug, Clone, Default, Encode, Decode)]
120pub struct AssetDb {
121    pub schema_version: u16,
122    /// Interned script GUIDs (u128). `AssetType::Script(idx)` indexes here.
123    /// Sorted ascending; deduplicated.
124    pub script_types: Vec<u128>,
125    /// Sorted by `guid` ascending.
126    pub entries: Vec<AssetEntry>,
127}
128
129impl AssetDb {
130    pub fn new() -> Self {
131        Self {
132            schema_version: SCHEMA_VERSION,
133            ..Default::default()
134        }
135    }
136
137    /// O(log n) lookup by GUID. None if absent.
138    pub fn find_by_guid(&self, guid: u128) -> Option<&AssetEntry> {
139        let idx = self.entries.binary_search_by_key(&guid, |e| e.guid).ok()?;
140        Some(&self.entries[idx])
141    }
142
143    /// Resolve `AssetType::Script(idx)` to its underlying script GUID.
144    /// Panics on out-of-range idx — that's a corrupt-file error, fail loud.
145    pub fn script_guid(&self, idx: u32) -> u128 {
146        self.script_types[idx as usize]
147    }
148
149    /// Bake-side intern: returns the index of `guid`, inserting if new.
150    pub fn intern_script(&mut self, guid: u128) -> u32 {
151        match self.script_types.binary_search(&guid) {
152            Ok(idx) => idx as u32,
153            Err(idx) => {
154                self.script_types.insert(idx, guid);
155                idx as u32
156            }
157        }
158    }
159
160    /// Sort `entries` by guid and each `sub_assets` by `file_id`.
161    /// Call after bulk-loading.
162    pub fn sort(&mut self) {
163        self.entries.sort_by_key(|e| e.guid);
164        for e in &mut self.entries {
165            e.sub_assets.sort_by_key(|s| s.file_id);
166        }
167    }
168}
169
170// ─── Bake-only cache ─────────────────────────────────────────────────────
171
172/// `AssetType` variant for the cache. Stores the script GUID directly so
173/// the cache doesn't depend on the in-memory `script_types` table — each
174/// bake interns scripts fresh.
175#[derive(Debug, Clone, Copy, PartialEq, Eq, Encode, Decode)]
176pub enum CachedAssetType {
177    Native(u32),
178    Script(u128),
179}
180
181/// One cached parse result, keyed by `hint`. Lets a re-bake skip the
182/// .meta + asset reads when both mtimes match.
183#[derive(Debug, Clone, Encode, Decode)]
184pub struct CachedEntry {
185    pub hint: Box<str>,
186    pub meta_mtime_ns: u64,
187    pub asset_mtime_ns: u64,
188    pub guid: u128,
189    pub asset_type: CachedAssetType,
190    pub sub_assets: Vec<SubAsset>,
191}
192
193/// Bake-only cache file envelope. `entries` order is hint-sorted so re-writes
194/// are deterministic, but lookups go through a HashMap built at load.
195#[derive(Debug, Clone, Default, Encode, Decode)]
196pub struct BakeCache {
197    pub schema_version: u16,
198    pub entries: Vec<CachedEntry>,
199}
200
201impl BakeCache {
202    pub fn new() -> Self {
203        Self {
204            schema_version: SCHEMA_VERSION,
205            ..Default::default()
206        }
207    }
208}
209
210// ─── Path helpers ────────────────────────────────────────────────────────
211
212/// Convert artifact filename.
213pub const DB_FILENAME: &str = "asset-db.bin";
214
215/// Bake-only mtime cache filename. Sibling to [`DB_FILENAME`].
216pub const CACHE_FILENAME: &str = "asset-db.cache.bin";
217
218/// `<dir>/asset-db.bin`. Caller composes the directory convention
219/// (e.g. `<project>/Library/unity-assetdb/`).
220pub fn db_path(dir: &Path) -> PathBuf {
221    dir.join(DB_FILENAME)
222}
223
224/// `<dir>/asset-db.cache.bin`. Sibling to [`db_path`].
225pub fn cache_path(dir: &Path) -> PathBuf {
226    dir.join(CACHE_FILENAME)
227}
228
229// ─── IO ──────────────────────────────────────────────────────────────────
230
231/// Read the convert artifact.
232pub fn read(path: &Path) -> Result<AssetDb> {
233    let bytes =
234        std::fs::read(path).with_context(|| format!("read asset-db: {}", path.display()))?;
235    decode(&bytes)
236}
237
238pub fn decode(bytes: &[u8]) -> Result<AssetDb> {
239    let body = check_magic(bytes, MAGIC, "asset-db")?;
240    let cfg = bincode::config::standard();
241    let (db, _): (AssetDb, _) = bincode::decode_from_slice(body, cfg).context("bincode decode")?;
242    if db.schema_version != SCHEMA_VERSION {
243        anyhow::bail!(
244            "asset-db schema {} expected {}, re-bake required",
245            db.schema_version,
246            SCHEMA_VERSION
247        );
248    }
249    Ok(db)
250}
251
252/// Write the convert artifact, creating parent dirs as needed.
253pub fn write(path: &Path, db: &AssetDb) -> Result<()> {
254    write_bytes(path, &encode(db)?)
255}
256
257pub fn encode(db: &AssetDb) -> Result<Vec<u8>> {
258    encode_with_magic(db, MAGIC)
259}
260
261/// Read the bake-only cache. Returns `BakeCache::new()` (empty, current
262/// schema) if the file is missing or unreadable — first bake or stale
263/// cache, parse everything from scratch.
264pub fn read_cache(path: &Path) -> Result<BakeCache> {
265    let bytes = std::fs::read(path).with_context(|| format!("read cache: {}", path.display()))?;
266    decode_cache(&bytes)
267}
268
269pub fn decode_cache(bytes: &[u8]) -> Result<BakeCache> {
270    let body = check_magic(bytes, CACHE_MAGIC, "asset-db.cache")?;
271    let cfg = bincode::config::standard();
272    let (cache, _): (BakeCache, _) =
273        bincode::decode_from_slice(body, cfg).context("bincode decode cache")?;
274    if cache.schema_version != SCHEMA_VERSION {
275        anyhow::bail!(
276            "asset-db cache schema {} expected {}",
277            cache.schema_version,
278            SCHEMA_VERSION
279        );
280    }
281    Ok(cache)
282}
283
284pub fn write_cache(path: &Path, cache: &BakeCache) -> Result<()> {
285    write_bytes(path, &encode_cache(cache)?)
286}
287
288pub fn encode_cache(cache: &BakeCache) -> Result<Vec<u8>> {
289    encode_with_magic(cache, CACHE_MAGIC)
290}
291
292fn encode_with_magic<T: Encode>(value: &T, magic: [u8; 8]) -> Result<Vec<u8>> {
293    let cfg = bincode::config::standard();
294    let body = bincode::encode_to_vec(value, cfg).context("bincode encode")?;
295    let mut out = Vec::with_capacity(magic.len() + body.len());
296    out.extend_from_slice(&magic);
297    out.extend_from_slice(&body);
298    Ok(out)
299}
300
301fn check_magic<'a>(bytes: &'a [u8], magic: [u8; 8], label: &str) -> Result<&'a [u8]> {
302    if bytes.len() < magic.len() {
303        anyhow::bail!("{label} too short ({} bytes)", bytes.len());
304    }
305    let (head, body) = bytes.split_at(magic.len());
306    if head != magic {
307        anyhow::bail!("{label} magic mismatch");
308    }
309    Ok(body)
310}
311
312fn write_bytes(path: &Path, bytes: &[u8]) -> Result<()> {
313    if let Some(parent) = path.parent() {
314        std::fs::create_dir_all(parent)
315            .with_context(|| format!("create dir: {}", parent.display()))?;
316    }
317    std::fs::write(path, bytes).with_context(|| format!("write: {}", path.display()))?;
318    Ok(())
319}
320
321#[cfg(test)]
322mod tests {
323    use super::*;
324    use crate::class_id::ClassId;
325
326    #[test]
327    fn roundtrip_empty() {
328        let db = AssetDb::new();
329        let bytes = encode(&db).unwrap();
330        let back = decode(&bytes).unwrap();
331        assert_eq!(back.schema_version, SCHEMA_VERSION);
332        assert!(back.entries.is_empty());
333        assert!(back.script_types.is_empty());
334    }
335
336    #[test]
337    fn roundtrip_with_entries() {
338        let mut db = AssetDb::new();
339        let script_guid = 0x1234_5678_9abc_def0_1122_3344_5566_7788_u128;
340        let idx = db.intern_script(script_guid);
341        db.entries.push(AssetEntry {
342            guid: 0xaabb_ccdd_u128,
343            asset_type: AssetType::native(ClassId::Prefab),
344            name: "Foo".into(),
345            sub_assets: vec![],
346            hint: "Assets/UI/Foo.prefab".into(),
347        });
348        db.entries.push(AssetEntry {
349            guid: 0x1111_2222_u128,
350            asset_type: AssetType::Script(idx),
351            name: "Bar".into(),
352            sub_assets: vec![SubAsset {
353                file_id: 21300000,
354                class_id: ClassId::Sprite as u32,
355                name: "Bar_sub".into(),
356            }],
357            hint: "Assets/Tween/Bar.asset".into(),
358        });
359        db.sort();
360
361        let bytes = encode(&db).unwrap();
362        let back = decode(&bytes).unwrap();
363        assert_eq!(back.script_types, vec![script_guid]);
364        assert_eq!(back.entries.len(), 2);
365        assert_eq!(back.entries[0].guid, 0x1111_2222_u128);
366        assert_eq!(&*back.find_by_guid(0xaabb_ccdd_u128).unwrap().name, "Foo");
367        assert!(back.find_by_guid(0xdead_beef_u128).is_none());
368    }
369
370    #[test]
371    fn intern_dedups() {
372        let mut db = AssetDb::new();
373        let g = 42u128;
374        let a = db.intern_script(g);
375        let b = db.intern_script(g);
376        assert_eq!(a, b);
377        assert_eq!(db.script_types.len(), 1);
378    }
379
380    #[test]
381    fn magic_mismatch_errors() {
382        let bad = b"NOTAPDB!extra".to_vec();
383        assert!(decode(&bad).is_err());
384    }
385
386    #[test]
387    fn cache_roundtrip() {
388        let mut c = BakeCache::new();
389        c.entries.push(CachedEntry {
390            hint: "UI/Foo.prefab".into(),
391            meta_mtime_ns: 1,
392            asset_mtime_ns: 2,
393            guid: 0xaa_u128,
394            asset_type: CachedAssetType::Native(1001),
395            sub_assets: vec![],
396        });
397        c.entries.push(CachedEntry {
398            hint: "Tween/Bar.asset".into(),
399            meta_mtime_ns: 3,
400            asset_mtime_ns: 4,
401            guid: 0xbb_u128,
402            asset_type: CachedAssetType::Script(0xcc_u128),
403            sub_assets: vec![],
404        });
405
406        let bytes = encode_cache(&c).unwrap();
407        let back = decode_cache(&bytes).unwrap();
408        assert_eq!(back.entries.len(), 2);
409        assert_eq!(
410            back.entries[1].asset_type,
411            CachedAssetType::Script(0xcc_u128)
412        );
413    }
414
415    #[test]
416    fn cache_magic_distinct_from_db() {
417        // Cache file must not be mistaken for db (and vice versa).
418        let c = BakeCache::new();
419        let cache_bytes = encode_cache(&c).unwrap();
420        assert!(decode(&cache_bytes).is_err());
421
422        let db = AssetDb::new();
423        let db_bytes = encode(&db).unwrap();
424        assert!(decode_cache(&db_bytes).is_err());
425    }
426}