Skip to main content

unity_assetdb/
store.rs

1//! On-disk schemas for the bake pipeline.
2//!
3//! Two files, written side-by-side under the consumer-chosen out-dir
4//! (commonly `<project>/Library/<consumer>/`):
5//!
6//! - `asset-db.bin` — convert artifact. Lean: per-entry guid, asset type,
7//!   name, sub-assets. Sorted by guid for O(log n) binary-search lookup;
8//!   no path/mtime baggage.
9//! - `asset-db.cache.bin` — bake-only cache, gitignored alongside.
10//!   Maps `hint → (mtimes, resolved bake state)` so unchanged assets skip
11//!   re-parsing on subsequent bakes. Downstream consumers never read this.
12//!
13//! Script (MonoBehaviour / ScriptableObject) types are interned in
14//! `script_types` and referenced by index — keeps per-entry payload small
15//! (8 bytes for `AssetType`).
16
17use std::path::{Path, PathBuf};
18
19use bincode::{Decode, Encode};
20
21use crate::class_id::ClassId;
22
23/// Bumped whenever the on-disk schema changes incompatibly.
24/// A version mismatch is a hard fail — the user re-bakes.
25///
26/// History:
27/// - v4: every name in `entries[].name` and `entries[].sub_assets[].name`
28///   resolves to a unique guid (name namespace unified across top-level
29///   and sub-asset rows). Pre-v4 bakes could carry colliding sub-asset
30///   names; readers no longer accept them.
31/// - v5: two changes shipped together.
32///   1. File magic renamed `PSPECADB` → `UADBIN__` and `PSPECABC` →
33///      `UADCACHE` to drop the historical "pspec" prefix.
34///   2. `SubAsset` carries `class_id` so non-canonical sub-asset fileIDs
35///      (prefab-embedded `AnimationClip` with hashed negative fids) keep
36///      their real Unity class instead of a `file_id / 100_000` heuristic
37///      collapsing them to `ScriptableObject`. Top-level entries also
38///      share their alias bucket with same-named entries of a different
39///      `asset_type` — type-aware reverse lookup discriminates at query
40///      time. See [Name collisions](docs/asset-database.md#name-collisions).
41///   Pre-v5 bakes are unreadable; re-bake required after upgrading.
42/// - v6: `.prefab`/`.controller`/`.anim`/`.mixer`/`.playable` sub-asset
43///   rows now exclude the GameObject-tree structural classes (GameObject,
44///   Transform, RectTransform, MonoBehaviour-as-component scoped to
45///   `.prefab` only). Pre-v6 caches carry leaked `'@<name>'` sub-asset
46///   rows for child GOs that would re-emerge on warm bakes; the bump
47///   invalidates them.
48pub const SCHEMA_VERSION: u16 = 6;
49
50/// File magic — first 8 bytes. `b"UADBIN__"`.
51pub const MAGIC: [u8; 8] = *b"UADBIN__";
52
53/// File magic for the bake-only cache file.
54pub const CACHE_MAGIC: [u8; 8] = *b"UADCACHE";
55
56/// Type of a Unity asset.
57///
58/// `Native(classId)` for built-in types (Sprite, Prefab, Texture2D, …).
59/// `Script(idx)` for MonoBehaviour-backed assets — `idx` indexes into
60/// [`AssetDb::script_types`], whose entries are u128 script GUIDs that
61/// match the `guid` field on entries in `types.json`.
62#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Encode, Decode)]
63pub enum AssetType {
64    Native(u32),
65    Script(u32),
66}
67
68impl AssetType {
69    pub fn native(class_id: ClassId) -> Self {
70        Self::Native(class_id as u32)
71    }
72}
73
74/// One sub-object inside an asset that has its own fileID.
75///
76/// Sprite-atlas entries, multi-clip animations, sprite-sheet sub-sprites,
77/// prefab-embedded `AnimationClip` docs. Per-entry list is sorted by
78/// `file_id` for binary-search lookups.
79///
80/// `class_id` is the Unity native classID of the sub-doc (`74` for
81/// `AnimationClip`, `213` for `Sprite`, etc.). Stored explicitly because
82/// prefab-embedded sub-asset fileIDs are hashed (negative or non-multiple-
83/// of-100000) and a `file_id / 100_000` heuristic collapses them to
84/// `ScriptableObject` — the asset DB needs the real class for the
85/// strict-typed-field elision rule consumers apply downstream.
86///
87/// `name` is `Box<str>` rather than `String` — strings here are immutable
88/// once decoded; dropping the capacity field saves 8 bytes per entry.
89#[derive(Debug, Clone, Encode, Decode)]
90pub struct SubAsset {
91    pub file_id: i64,
92    pub class_id: u32,
93    pub name: Box<str>,
94}
95
96/// One top-level Unity asset, as stored in the convert artifact.
97///
98/// `name` is the asset's filename stem (with optional collision suffix).
99/// At convert time it's prefixed with `$` to form a JSON ref (`$Foo`),
100/// but the prefix is purely a JSON encoding convention — never stored.
101/// String fields use `Box<str>` (immutable; saves the 8-byte capacity
102/// field a `String` carries for growability).
103#[derive(Debug, Clone, Encode, Decode)]
104pub struct AssetEntry {
105    pub guid: u128,
106    pub asset_type: AssetType,
107    pub name: Box<str>,
108    pub sub_assets: Vec<SubAsset>,
109    /// Project-root-relative path (`Assets/Foo.prefab`,
110    /// `Packages/com.boxcat.libs/Bar.mixer`). Convert-side uses this so
111    /// `SourcePrefabResolver` can locate base prefabs by guid without
112    /// re-walking the project tree.
113    pub hint: Box<str>,
114}
115
116/// Whole-database envelope. `entries` is sorted by `guid` so convert-time
117/// lookups are `binary_search_by_key`.
118#[derive(Debug, Clone, Default, Encode, Decode)]
119pub struct AssetDb {
120    pub schema_version: u16,
121    /// Interned script GUIDs (u128). `AssetType::Script(idx)` indexes here.
122    /// Sorted ascending; deduplicated.
123    pub script_types: Vec<u128>,
124    /// Sorted by `guid` ascending.
125    pub entries: Vec<AssetEntry>,
126}
127
128impl AssetDb {
129    pub fn new() -> Self {
130        Self {
131            schema_version: SCHEMA_VERSION,
132            ..Default::default()
133        }
134    }
135
136    /// O(log n) lookup by GUID. None if absent.
137    pub fn find_by_guid(&self, guid: u128) -> Option<&AssetEntry> {
138        let idx = self.entries.binary_search_by_key(&guid, |e| e.guid).ok()?;
139        Some(&self.entries[idx])
140    }
141
142    /// Resolve `AssetType::Script(idx)` to its underlying script GUID.
143    /// Panics on out-of-range idx — that's a corrupt-file error, fail loud.
144    pub fn script_guid(&self, idx: u32) -> u128 {
145        self.script_types[idx as usize]
146    }
147
148    /// Bake-side intern: returns the index of `guid`, inserting if new.
149    pub fn intern_script(&mut self, guid: u128) -> u32 {
150        match self.script_types.binary_search(&guid) {
151            Ok(idx) => idx as u32,
152            Err(idx) => {
153                self.script_types.insert(idx, guid);
154                idx as u32
155            }
156        }
157    }
158
159    /// Sort `entries` by guid and each `sub_assets` by `file_id`.
160    /// Call after bulk-loading.
161    pub fn sort(&mut self) {
162        self.entries.sort_by_key(|e| e.guid);
163        for e in &mut self.entries {
164            e.sub_assets.sort_by_key(|s| s.file_id);
165        }
166    }
167}
168
169// ─── Bake-only cache ─────────────────────────────────────────────────────
170
171/// `AssetType` variant for the cache. Stores the script GUID directly so
172/// the cache doesn't depend on the in-memory `script_types` table — each
173/// bake interns scripts fresh.
174#[derive(Debug, Clone, Copy, PartialEq, Eq, Encode, Decode)]
175pub enum CachedAssetType {
176    Native(u32),
177    Script(u128),
178}
179
180/// One cached parse result, keyed by `hint`. Lets a re-bake skip the
181/// .meta + asset reads when the meta mtime matches.
182///
183/// `asset_mtime_ns` is recorded but no longer participates in the warm
184/// fast-path invalidation check — `process_one` keys solely on
185/// `meta_mtime_ns`. The field is retained for forensic value (a future
186/// re-bake or external tooling can compare against the live asset
187/// mtime) and as a schema slot for richer invalidation logic should it
188/// land. The implication: under hand-edits that touch the asset
189/// without touching the .meta, this field can become stale on the
190/// next re-bake (still serves the cached row). See
191/// `tests/bake.rs::cache_does_not_detect_asset_only_touch`.
192#[derive(Debug, Clone, Encode, Decode)]
193pub struct CachedEntry {
194    pub hint: Box<str>,
195    pub meta_mtime_ns: u64,
196    pub asset_mtime_ns: u64,
197    pub guid: u128,
198    pub asset_type: CachedAssetType,
199    pub sub_assets: Vec<SubAsset>,
200}
201
202/// Bake-only cache file envelope. `entries` order is hint-sorted so re-writes
203/// are deterministic, but lookups go through a HashMap built at load.
204#[derive(Debug, Clone, Default, Encode, Decode)]
205pub struct BakeCache {
206    pub schema_version: u16,
207    pub entries: Vec<CachedEntry>,
208}
209
210impl BakeCache {
211    pub fn new() -> Self {
212        Self {
213            schema_version: SCHEMA_VERSION,
214            ..Default::default()
215        }
216    }
217}
218
219// ─── Path helpers ────────────────────────────────────────────────────────
220
221/// Convert artifact filename.
222pub const DB_FILENAME: &str = "asset-db.bin";
223
224/// Bake-only mtime cache filename. Sibling to [`DB_FILENAME`].
225pub const CACHE_FILENAME: &str = "asset-db.cache.bin";
226
227/// `<dir>/asset-db.bin`. Caller composes the directory convention
228/// (e.g. `<project>/Library/unity-assetdb/`).
229pub fn db_path(dir: &Path) -> PathBuf {
230    dir.join(DB_FILENAME)
231}
232
233/// `<dir>/asset-db.cache.bin`. Sibling to [`db_path`].
234pub fn cache_path(dir: &Path) -> PathBuf {
235    dir.join(CACHE_FILENAME)
236}
237
238// ─── Errors ──────────────────────────────────────────────────────────────
239
240/// Errors from reading/writing the on-disk asset-db / cache binaries.
241///
242/// Distinguishes filesystem errors (path-tagged), envelope-level
243/// validation (magic + schema), and the underlying `bincode` codec
244/// errors. Consumers can match on `SchemaMismatch` to detect "needs
245/// re-bake" without string-parsing.
246#[derive(Debug, thiserror::Error)]
247pub enum StoreError {
248    #[error("{op} {}: {source}", path.display())]
249    Io {
250        op: &'static str,
251        path: PathBuf,
252        #[source]
253        source: std::io::Error,
254    },
255    #[error("{label} too short ({len} bytes)")]
256    MagicTooShort { label: &'static str, len: usize },
257    #[error("{label} magic mismatch")]
258    MagicMismatch { label: &'static str },
259    #[error("{label} schema {found} expected {expected}, re-bake required")]
260    SchemaMismatch {
261        label: &'static str,
262        found: u16,
263        expected: u16,
264    },
265    #[error("bincode decode: {0}")]
266    BincodeDecode(#[from] bincode::error::DecodeError),
267    #[error("bincode encode: {0}")]
268    BincodeEncode(#[from] bincode::error::EncodeError),
269}
270
271// ─── IO ──────────────────────────────────────────────────────────────────
272
273/// Read the convert artifact.
274pub fn read(path: &Path) -> Result<AssetDb, StoreError> {
275    let bytes = std::fs::read(path).map_err(|source| StoreError::Io {
276        op: "read asset-db",
277        path: path.to_path_buf(),
278        source,
279    })?;
280    decode(&bytes)
281}
282
283pub fn decode(bytes: &[u8]) -> Result<AssetDb, StoreError> {
284    let body = check_magic(bytes, MAGIC, "asset-db")?;
285    let cfg = bincode::config::standard();
286    let (db, _): (AssetDb, _) = bincode::decode_from_slice(body, cfg)?;
287    if db.schema_version != SCHEMA_VERSION {
288        return Err(StoreError::SchemaMismatch {
289            label: "asset-db",
290            found: db.schema_version,
291            expected: SCHEMA_VERSION,
292        });
293    }
294    Ok(db)
295}
296
297/// Write the convert artifact, creating parent dirs as needed.
298pub fn write(path: &Path, db: &AssetDb) -> Result<(), StoreError> {
299    write_bytes(path, &encode(db)?)
300}
301
302pub fn encode(db: &AssetDb) -> Result<Vec<u8>, StoreError> {
303    encode_with_magic(db, MAGIC)
304}
305
306/// Read the bake-only cache. Caller decides what to do on error —
307/// the bake treats any error here as "no cache, parse from scratch".
308pub fn read_cache(path: &Path) -> Result<BakeCache, StoreError> {
309    let bytes = std::fs::read(path).map_err(|source| StoreError::Io {
310        op: "read cache",
311        path: path.to_path_buf(),
312        source,
313    })?;
314    decode_cache(&bytes)
315}
316
317pub fn decode_cache(bytes: &[u8]) -> Result<BakeCache, StoreError> {
318    let body = check_magic(bytes, CACHE_MAGIC, "asset-db.cache")?;
319    let cfg = bincode::config::standard();
320    let (cache, _): (BakeCache, _) = bincode::decode_from_slice(body, cfg)?;
321    if cache.schema_version != SCHEMA_VERSION {
322        return Err(StoreError::SchemaMismatch {
323            label: "asset-db.cache",
324            found: cache.schema_version,
325            expected: SCHEMA_VERSION,
326        });
327    }
328    Ok(cache)
329}
330
331pub fn write_cache(path: &Path, cache: &BakeCache) -> Result<(), StoreError> {
332    write_bytes(path, &encode_cache(cache)?)
333}
334
335pub fn encode_cache(cache: &BakeCache) -> Result<Vec<u8>, StoreError> {
336    encode_with_magic(cache, CACHE_MAGIC)
337}
338
339fn encode_with_magic<T: Encode>(value: &T, magic: [u8; 8]) -> Result<Vec<u8>, StoreError> {
340    let cfg = bincode::config::standard();
341    let body = bincode::encode_to_vec(value, cfg)?;
342    let mut out = Vec::with_capacity(magic.len() + body.len());
343    out.extend_from_slice(&magic);
344    out.extend_from_slice(&body);
345    Ok(out)
346}
347
348fn check_magic<'a>(
349    bytes: &'a [u8],
350    magic: [u8; 8],
351    label: &'static str,
352) -> Result<&'a [u8], StoreError> {
353    if bytes.len() < magic.len() {
354        return Err(StoreError::MagicTooShort {
355            label,
356            len: bytes.len(),
357        });
358    }
359    let (head, body) = bytes.split_at(magic.len());
360    if head != magic {
361        return Err(StoreError::MagicMismatch { label });
362    }
363    Ok(body)
364}
365
366fn write_bytes(path: &Path, bytes: &[u8]) -> Result<(), StoreError> {
367    if let Some(parent) = path.parent() {
368        std::fs::create_dir_all(parent).map_err(|source| StoreError::Io {
369            op: "create dir",
370            path: parent.to_path_buf(),
371            source,
372        })?;
373    }
374    std::fs::write(path, bytes).map_err(|source| StoreError::Io {
375        op: "write",
376        path: path.to_path_buf(),
377        source,
378    })?;
379    Ok(())
380}
381
382#[cfg(test)]
383mod tests {
384    use super::*;
385    use crate::class_id::ClassId;
386
387    #[test]
388    fn roundtrip_empty() {
389        let db = AssetDb::new();
390        let bytes = encode(&db).unwrap();
391        let back = decode(&bytes).unwrap();
392        assert_eq!(back.schema_version, SCHEMA_VERSION);
393        assert!(back.entries.is_empty());
394        assert!(back.script_types.is_empty());
395    }
396
397    #[test]
398    fn roundtrip_with_entries() {
399        let mut db = AssetDb::new();
400        let script_guid = 0x1234_5678_9abc_def0_1122_3344_5566_7788_u128;
401        let idx = db.intern_script(script_guid);
402        db.entries.push(AssetEntry {
403            guid: 0xaabb_ccdd_u128,
404            asset_type: AssetType::native(ClassId::Prefab),
405            name: "Foo".into(),
406            sub_assets: vec![],
407            hint: "Assets/UI/Foo.prefab".into(),
408        });
409        db.entries.push(AssetEntry {
410            guid: 0x1111_2222_u128,
411            asset_type: AssetType::Script(idx),
412            name: "Bar".into(),
413            sub_assets: vec![SubAsset {
414                file_id: 21300000,
415                class_id: ClassId::Sprite as u32,
416                name: "Bar_sub".into(),
417            }],
418            hint: "Assets/Tween/Bar.asset".into(),
419        });
420        db.sort();
421
422        let bytes = encode(&db).unwrap();
423        let back = decode(&bytes).unwrap();
424        assert_eq!(back.script_types, vec![script_guid]);
425        assert_eq!(back.entries.len(), 2);
426        assert_eq!(back.entries[0].guid, 0x1111_2222_u128);
427        assert_eq!(&*back.find_by_guid(0xaabb_ccdd_u128).unwrap().name, "Foo");
428        assert!(back.find_by_guid(0xdead_beef_u128).is_none());
429    }
430
431    #[test]
432    fn intern_dedups() {
433        let mut db = AssetDb::new();
434        let g = 42u128;
435        let a = db.intern_script(g);
436        let b = db.intern_script(g);
437        assert_eq!(a, b);
438        assert_eq!(db.script_types.len(), 1);
439    }
440
441    #[test]
442    fn magic_mismatch_errors() {
443        let bad = b"NOTAPDB!extra".to_vec();
444        assert!(decode(&bad).is_err());
445    }
446
447    #[test]
448    fn cache_roundtrip() {
449        let mut c = BakeCache::new();
450        c.entries.push(CachedEntry {
451            hint: "UI/Foo.prefab".into(),
452            meta_mtime_ns: 1,
453            asset_mtime_ns: 2,
454            guid: 0xaa_u128,
455            asset_type: CachedAssetType::Native(1001),
456            sub_assets: vec![],
457        });
458        c.entries.push(CachedEntry {
459            hint: "Tween/Bar.asset".into(),
460            meta_mtime_ns: 3,
461            asset_mtime_ns: 4,
462            guid: 0xbb_u128,
463            asset_type: CachedAssetType::Script(0xcc_u128),
464            sub_assets: vec![],
465        });
466
467        let bytes = encode_cache(&c).unwrap();
468        let back = decode_cache(&bytes).unwrap();
469        assert_eq!(back.entries.len(), 2);
470        assert_eq!(
471            back.entries[1].asset_type,
472            CachedAssetType::Script(0xcc_u128)
473        );
474    }
475
476    #[test]
477    fn cache_magic_distinct_from_db() {
478        // Cache file must not be mistaken for db (and vice versa).
479        let c = BakeCache::new();
480        let cache_bytes = encode_cache(&c).unwrap();
481        assert!(decode(&cache_bytes).is_err());
482
483        let db = AssetDb::new();
484        let db_bytes = encode(&db).unwrap();
485        assert!(decode_cache(&db_bytes).is_err());
486    }
487
488    /// Schema version is bumped on every breaking layout change. A
489    /// downgrade-shaped payload (correct magic but lower version)
490    /// must hard-fail decoding so the bake re-parses from scratch
491    /// rather than serving structurally wrong sub-asset rows.
492    #[test]
493    fn schema_version_downgrade_hard_fails() {
494        let mut db = AssetDb::new();
495        db.schema_version = SCHEMA_VERSION.saturating_sub(1);
496        let bytes = encode(&db).unwrap();
497        let err = decode(&bytes).unwrap_err().to_string();
498        assert!(
499            err.contains("schema") && err.contains("re-bake"),
500            "expected schema-version error, got: {err}",
501        );
502    }
503
504    /// Schema version upgrades (an envelope from a *newer* unity-assetdb
505    /// build) also hard-fail — consumer can't safely interpret
506    /// fields it doesn't know about. Same error path as downgrades.
507    #[test]
508    fn schema_version_upgrade_hard_fails() {
509        let mut db = AssetDb::new();
510        db.schema_version = SCHEMA_VERSION + 1;
511        let bytes = encode(&db).unwrap();
512        let err = decode(&bytes).unwrap_err().to_string();
513        assert!(err.contains("schema"), "expected schema-version error, got: {err}");
514    }
515
516    /// Cache schema mismatch is detected the same way — a v5 cache
517    /// against a v6 reader must trip the error path so the bake
518    /// rebuilds the cache from scratch instead of serving stale rows.
519    #[test]
520    fn cache_schema_version_mismatch_hard_fails() {
521        let mut c = BakeCache::new();
522        c.schema_version = SCHEMA_VERSION.saturating_sub(1);
523        let bytes = encode_cache(&c).unwrap();
524        let err = decode_cache(&bytes).unwrap_err().to_string();
525        assert!(err.contains("schema"), "expected schema mismatch, got: {err}");
526    }
527
528    /// `SubAsset.class_id` is a v5+ field. Pin that a round-trip
529    /// through the encoder preserves it — the read path uses this
530    /// value as the discriminator for sub-asset namespacing rules,
531    /// so silent truncation would be a hard-to-spot correctness bug.
532    #[test]
533    fn subasset_class_id_round_trips() {
534        let mut db = AssetDb::new();
535        db.entries.push(AssetEntry {
536            guid: 0xa0_u128,
537            asset_type: AssetType::native(ClassId::AnimatorController),
538            name: "Foo".into(),
539            sub_assets: vec![
540                SubAsset {
541                    file_id: 9100000,
542                    class_id: ClassId::AnimatorController as u32,
543                    name: "Foo_self".into(),
544                },
545                SubAsset {
546                    file_id: -123456789,
547                    class_id: 1102, // AnimatorState
548                    name: "Idle".into(),
549                },
550            ],
551            hint: "Assets/Foo.controller".into(),
552        });
553        db.sort();
554
555        let bytes = encode(&db).unwrap();
556        let back = decode(&bytes).unwrap();
557        let entry = back.find_by_guid(0xa0_u128).unwrap();
558        assert_eq!(entry.sub_assets.len(), 2);
559        let self_sub = entry
560            .sub_assets
561            .iter()
562            .find(|s| &*s.name == "Foo_self")
563            .unwrap();
564        assert_eq!(self_sub.class_id, ClassId::AnimatorController as u32);
565        let idle = entry.sub_assets.iter().find(|s| &*s.name == "Idle").unwrap();
566        assert_eq!(idle.class_id, 1102);
567    }
568}