Skip to main content

unity_assetdb/
bake.rs

1//! Bake orchestrator: walk → parse → cache → write.
2//!
3//! Per-file flow:
4//! 1. Stat `.meta` and the companion asset file. If both mtimes match the
5//!    cached values → reuse cached entry, skip parse.
6//! 2. Else read `.meta` → guid + sprite-sheet sub-assets.
7//! 3. Read the asset file → top-level class ID + sub-asset rows.
8//! 4. Resolve `AssetType`: native `class_id` or `Script(script_guid)`.
9//! 5. Derive alias from the filename stem.
10//!
11//! Post-walk: alias-collision sweep (filename stems can clash; we suffix
12//! with parent dir on conflict and warn).
13
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16use std::sync::atomic::{AtomicUsize, Ordering};
17use std::sync::mpsc;
18use std::time::{Instant, SystemTime};
19
20use ahash::{AHashMap, AHashSet};
21
22use anyhow::{Context, Result};
23
24use crate::asset;
25use crate::class_id::{ClassId, class_from_ext};
26use crate::meta::{self, SPRITE_MODE_SINGLE, TEXTURE_TYPE_SPRITE};
27use crate::store::{
28    self, AssetDb, AssetEntry, AssetType, BakeCache, CachedAssetType, CachedEntry, SubAsset,
29    CACHE_FILENAME, DB_FILENAME,
30};
31use crate::walk::walk_meta_files;
32
33/// Caller-supplied name sanitizer. Returns `Some(rewritten)` when the
34/// input contains characters the consumer wants to scrub from asset
35/// names; `None` to keep the input as-is. Bake calls this once per
36/// top-level filename stem and once per sub-asset YAML `m_Name`.
37///
38/// Bound is `Send + Sync + 'static` because [`BakeOptions`] flows into
39/// `ignore::WalkParallel` worker closures.
40///
41/// Default behavior (no sanitizer) leaves all names verbatim.
42pub type NameSanitizer = Box<dyn Fn(&str) -> Option<String> + Send + Sync + 'static>;
43
44/// Caller-supplied warning sink. Bake invokes this for non-fatal events
45/// (worker errors during the parallel walk, name-collision rewrites,
46/// sanitizer rewrites). The library never writes to stderr itself.
47pub type WarnSink = Box<dyn Fn(&str) + Send + Sync + 'static>;
48
49/// Caller-supplied progress sink. Bake invokes this with the post-bake
50/// summary line and (when `BakeOptions::verbose_timing` is true) with
51/// per-phase timing. Separate from [`WarnSink`] so consumers can route
52/// "info" output and warnings to different places.
53pub type ProgressSink = Box<dyn Fn(&str) + Send + Sync + 'static>;
54
55/// Borrowed view of a [`NameSanitizer`] for internal helpers. Kept as a
56/// named type so per-call signatures don't trip clippy's `type_complexity`.
57type NameSanitizerRef<'a> = &'a (dyn Fn(&str) -> Option<String> + Send + Sync);
58
59/// Borrowed view of a [`WarnSink`]. See [`NameSanitizerRef`].
60type WarnSinkRef<'a> = &'a (dyn Fn(&str) + Send + Sync);
61
62/// File extensions whose asset has embedded sub-asset docs that should
63/// NOT join the global dedup pool — they live in the parent's namespace
64/// and consumers resolve them via parent-scoped addressing (`$Sub@Parent`).
65///
66/// Extension-keyed rather than `AssetType`-keyed because the top doc of a
67/// `.playable` file is whichever sub-doc Unity sorts first by hashed fileID
68/// (often an `AnimationTrack`, not the `TimelineAsset` itself), so the
69/// resulting `AssetTypeRaw::Script(...)` carries an unstable script guid.
70/// The extension is the only stable container discriminator.
71const EMBEDDED_CONTAINER_EXTS: &[&str] = &["prefab", "controller", "anim", "mixer", "playable"];
72
73fn is_embedded_container(hint: &str) -> bool {
74    Path::new(hint)
75        .extension()
76        .and_then(|s| s.to_str())
77        .is_some_and(|ext| EMBEDDED_CONTAINER_EXTS.contains(&ext))
78}
79
80/// True when `class_id` is a structural sub-doc that should be filtered
81/// out at parse time for the given container extension.
82///
83/// `.prefab`: GO / Transform / RectTransform / MonoBehaviour are all
84/// part of the GameObject tree — never addressable as sub-assets.
85/// `.controller` / `.anim` / `.mixer` / `.playable`: MonoBehaviour-114
86/// docs ARE addressable sub-assets (Timeline tracks, AudioMixerGroup,
87/// etc.) — only filter the GO-tree triplet, which doesn't appear in
88/// these files anyway (the predicate is a no-op there but stays valid
89/// for future-proofing).
90fn is_filterable_subdoc_for_ext(class_id: u32, ext: &str) -> bool {
91    let cls = ClassId::from_raw(class_id);
92    let is_go_tree = matches!(
93        cls,
94        Some(ClassId::GameObject | ClassId::Transform | ClassId::RectTransform)
95    );
96    let is_component = matches!(cls, Some(ClassId::MonoBehaviour));
97    is_go_tree || (is_component && ext == "prefab")
98}
99
100/// Convert `SystemTime` → ns-since-UNIX. Saturates to 0 on pre-epoch
101/// (which would only happen if the user's clock is bogus).
102fn mtime_ns(t: SystemTime) -> u64 {
103    t.duration_since(SystemTime::UNIX_EPOCH)
104        .map_or(0, |d| d.as_nanos() as u64)
105}
106
107/// One raw bake result, before name dedup. `script_guid` is the unmapped
108/// GUID for MonoBehaviour assets — interning happens after the walk so we
109/// only need one final sort.
110#[derive(Clone)]
111struct RawEntry {
112    guid: u128,
113    asset_type_raw: AssetTypeRaw,
114    hint: String,
115    name: String,
116    meta_mtime_ns: u64,
117    asset_mtime_ns: u64,
118    sub_assets: Vec<SubAsset>,
119}
120
121/// Hashable type discriminator: `Native(classID)` for built-in classes
122/// and `Script(scriptGuid)` for MonoBehaviour-backed assets. Hashable so
123/// the dedup pass can bucket by `(name, asset_type)` without depending
124/// on the post-walk script-intern table.
125#[derive(Clone, Copy, PartialEq, Eq, Hash)]
126enum AssetTypeRaw {
127    Native(u32),
128    Script(u128),
129}
130
131/// Per-worker-thread accumulator. Sends its collected `entries` + `errors`
132/// to the main thread via Drop — `ignore::WalkBuilder::run` drops each
133/// thread's visitor closure (and thus its captured `ThreadLocal`) on
134/// thread exit, so the main thread sees all batches once `walker.run`
135/// returns.
136struct ThreadLocal {
137    entries: Vec<RawEntry>,
138    errors: Vec<String>,
139    raw_tx: mpsc::Sender<Vec<RawEntry>>,
140    err_tx: mpsc::Sender<Vec<String>>,
141}
142
143impl Drop for ThreadLocal {
144    fn drop(&mut self) {
145        let entries = std::mem::take(&mut self.entries);
146        let errors = std::mem::take(&mut self.errors);
147        // Channel-closed errors are unreachable here — main thread holds
148        // the receivers until after `walker.run` returns.
149        let _ = self.raw_tx.send(entries);
150        let _ = self.err_tx.send(errors);
151    }
152}
153
154/// Cache key: hint (Assets-relative, forward-slashed). ahash beats siphash
155/// by ~2x for our small-string keys.
156type CacheMap = AHashMap<String, RawEntry>;
157
158/// Run a `Result<Option<T>>`-producing closure under `catch_unwind` and
159/// flatten the four-way outcome (success-with-value / success-skip /
160/// inner-err / panic) into `Result<Option<T>, String>`. The closure
161/// is wrapped in `AssertUnwindSafe` because parallel-walk visitors
162/// capture Arc state by ref, and the bake worker treats process_one
163/// as panic-safe on its inputs.
164///
165/// `label` prefixes both inner errors and panic reports with the
166/// asset path; `task_name` names the operation in the panic line
167/// (e.g. `"process_one"`) so the message reads
168/// `"<path>: panic in <task_name>: <payload>"`.
169///
170/// Pulled out of the inline closure inside `bake_action`'s parallel
171/// walk so panic-payload extraction (string / String / non-string)
172/// can be unit-tested without spinning up a project tree.
173fn run_with_panic_safety<T, F>(label: &str, task_name: &str, f: F) -> Result<Option<T>, String>
174where
175    F: FnOnce() -> Result<Option<T>>,
176{
177    match std::panic::catch_unwind(std::panic::AssertUnwindSafe(f)) {
178        Ok(Ok(opt)) => Ok(opt),
179        Ok(Err(e)) => Err(format!("{label}: {e}")),
180        Err(panic) => {
181            let msg = panic
182                .downcast_ref::<&str>()
183                .map(|s| (*s).to_string())
184                .or_else(|| panic.downcast_ref::<String>().cloned())
185                .unwrap_or_else(|| "<non-string panic payload>".to_string());
186            Err(format!("{label}: panic in {task_name}: {msg}"))
187        }
188    }
189}
190
191/// Build the in-memory cache from a previously-saved `BakeCache`. Each
192/// `CachedEntry` becomes a `RawEntry` keyed by its hint. Cache hits during
193/// the walk drop straight into the post-walk pipeline.
194///
195/// `String::from(Box<str>)` is O(1) — Rust hands the heap allocation
196/// directly from the box to the new String, no copy. The map key is then
197/// cloned once for the parallel field on `RawEntry` (one alloc per entry).
198fn build_cache(cache: BakeCache) -> CacheMap {
199    let mut out = AHashMap::with_capacity(cache.entries.len());
200    for e in cache.entries {
201        let asset_type_raw = match e.asset_type {
202            CachedAssetType::Native(n) => AssetTypeRaw::Native(n),
203            CachedAssetType::Script(g) => AssetTypeRaw::Script(g),
204        };
205        let hint = String::from(e.hint);
206        let raw = RawEntry {
207            guid: e.guid,
208            asset_type_raw,
209            hint: hint.clone(),
210            name: String::new(), // re-derived in build_db
211            meta_mtime_ns: e.meta_mtime_ns,
212            asset_mtime_ns: e.asset_mtime_ns,
213            sub_assets: e.sub_assets,
214        };
215        out.insert(hint, raw);
216    }
217    out
218}
219
220/// Build the on-disk cache from the post-walk raw entries. Sorted by hint
221/// so the file is byte-stable across re-bakes when nothing changed.
222fn build_bake_cache(raw: &[RawEntry]) -> BakeCache {
223    let mut entries: Vec<CachedEntry> = raw
224        .iter()
225        .map(|r| CachedEntry {
226            hint: r.hint.clone().into_boxed_str(),
227            meta_mtime_ns: r.meta_mtime_ns,
228            asset_mtime_ns: r.asset_mtime_ns,
229            guid: r.guid,
230            asset_type: match r.asset_type_raw {
231                AssetTypeRaw::Native(n) => CachedAssetType::Native(n),
232                AssetTypeRaw::Script(g) => CachedAssetType::Script(g),
233            },
234            sub_assets: r.sub_assets.clone(),
235        })
236        .collect();
237    entries.sort_by(|a, b| a.hint.cmp(&b.hint));
238    BakeCache {
239        schema_version: store::SCHEMA_VERSION,
240        entries,
241    }
242}
243
244/// Caller-supplied bake configuration.
245///
246/// Built by the consumer's CLI / library entry point and handed to
247/// [`bake`]. The library never reads env vars, never resolves the
248/// project root for you, and never writes to stderr — every side
249/// channel routes through one of the optional sinks below.
250pub struct BakeOptions {
251    /// Project root containing `Assets/` + `ProjectSettings/`. Caller
252    /// resolves this (typically via [`crate::walk::resolve_project_root`])
253    /// before constructing options.
254    pub project_root: PathBuf,
255    /// Directory where `asset-db.bin` and `asset-db.cache.bin` are written.
256    /// Caller composes the convention (e.g. `<project>/Library/unity-assetdb/`
257    /// or a fixture-staging path).
258    pub out_dir: PathBuf,
259    /// Optional name sanitizer; see [`NameSanitizer`].
260    pub name_sanitizer: Option<NameSanitizer>,
261    /// Optional warning sink; see [`WarnSink`]. `None` discards warnings.
262    pub on_warn: Option<WarnSink>,
263    /// Optional progress sink; see [`ProgressSink`]. `None` discards the
264    /// summary line.
265    pub on_progress: Option<ProgressSink>,
266    /// When true, [`on_progress`] also receives a per-phase timing line
267    /// (cache / walk / build / write). Env-var-driven behavior is the
268    /// consumer's call.
269    pub verbose_timing: bool,
270    /// When true, [`on_warn`] receives a line for each name-collision
271    /// rewrite during dedup. Off by default to keep steady-state warm
272    /// bakes quiet.
273    pub verbose_collisions: bool,
274}
275
276/// Bake entry-point. Walks `Assets/`, parses `.meta` + asset YAML,
277/// writes `<out_dir>/asset-db.bin` and `<out_dir>/asset-db.cache.bin`.
278pub fn bake(opts: &BakeOptions) -> Result<()> {
279    let project_root = &opts.project_root;
280    std::fs::create_dir_all(&opts.out_dir)
281        .with_context(|| format!("create out-dir: {}", opts.out_dir.display()))?;
282    let db_file = opts.out_dir.join(DB_FILENAME);
283    let cache_file = opts.out_dir.join(CACHE_FILENAME);
284    let t_start = Instant::now();
285
286    // Load bake-only cache. Missing/corrupt → empty (first bake or stale).
287    let cache: CacheMap = match store::read_cache(&cache_file) {
288        Ok(c) => build_cache(c),
289        Err(_) => AHashMap::new(),
290    };
291    let cache_size = cache.len();
292    let t_cache = t_start.elapsed();
293
294    // Per-thread accumulators: each worker drops its `Vec<RawEntry>` and
295    // `Vec<String>` (errors) into channels at thread exit via `Drop`. Avoids
296    // the Mutex<Vec> contention 16k pushes on 8 cores produced — measured
297    // ~3-4 ms warm savings on meow-tower.
298    //
299    // `ignore::WalkParallel::run` requires `'static + Send` visitors, so
300    // shared state goes through `Arc`. Each worker clones the Arc once at
301    // factory time — the clone cost is negligible vs the per-entry work.
302    let (raw_tx, raw_rx) = mpsc::channel::<Vec<RawEntry>>();
303    let (err_tx, err_rx) = mpsc::channel::<Vec<String>>();
304    let cache_arc = Arc::new(cache);
305    let cache_hits = Arc::new(AtomicUsize::new(0));
306    let walked = Arc::new(AtomicUsize::new(0));
307    let project_root_arc: Arc<PathBuf> = Arc::new(project_root.clone());
308
309    walk_meta_files(project_root, || {
310        let raw_tx = raw_tx.clone();
311        let err_tx = err_tx.clone();
312        let cache = Arc::clone(&cache_arc);
313        let cache_hits = Arc::clone(&cache_hits);
314        let walked = Arc::clone(&walked);
315        let project_root = Arc::clone(&project_root_arc);
316        let mut local = ThreadLocal {
317            entries: Vec::with_capacity(2048),
318            errors: Vec::new(),
319            raw_tx,
320            err_tx,
321        };
322        move |meta_path: &Path| {
323            walked.fetch_add(1, Ordering::Relaxed);
324            // Catch panics so a single malformed .meta or unforeseen
325            // bug doesn't silently terminate the worker thread (which
326            // would lose its ThreadLocal accumulator). `ignore::WalkParallel`
327            // doesn't propagate visitor panics; without this, a panic in
328            // `process_one` produces a partial DB with no surfaced error.
329            // Helper does the catch_unwind + payload-downcast — see
330            // `run_with_panic_safety`.
331            let label = meta_path.display().to_string();
332            match run_with_panic_safety(&label, "process_one", || {
333                process_one(meta_path, &project_root, &cache, &cache_hits)
334            }) {
335                Ok(Some(r)) => local.entries.push(r),
336                Ok(None) => {}
337                Err(msg) => local.errors.push(msg),
338            }
339        }
340    })?;
341    drop(raw_tx);
342    drop(err_tx);
343    let t_walk = t_start.elapsed();
344
345    let mut errors: Vec<String> = Vec::new();
346    for v in err_rx.iter() {
347        errors.extend(v);
348    }
349    if let Some(sink) = opts.on_warn.as_ref() {
350        for e in &errors {
351            sink(&format!("warning: {e}"));
352        }
353    }
354
355    let mut raw: Vec<RawEntry> = Vec::with_capacity(cache_size + 256);
356    for v in raw_rx.iter() {
357        raw.extend(v);
358    }
359    // Build cache from `raw` (consumes nothing) before `build_db` consumes
360    // it. Sequence the writes so the cache is only persisted after the
361    // convert artifact lands — a half-baked cache without a matching db
362    // would let a later run skip parsing for entries that aren't in the
363    // db yet.
364    let bake_cache = build_bake_cache(&raw);
365    let db = build_db(
366        raw,
367        opts.name_sanitizer.as_deref(),
368        opts.on_warn.as_deref(),
369        opts.verbose_collisions,
370    )?;
371    let t_build = t_start.elapsed();
372
373    // No-op skip: every entry came from cache AND nothing was dropped from
374    // cache (count stable). Skips ~2-3 ms of bincode encode + file write
375    // on the steady-state warm path. Still skips only when both files are
376    // present — first run or after a manual delete writes anyway.
377    let hit_n = cache_hits.load(Ordering::Relaxed);
378    let no_op =
379        hit_n == cache_size && hit_n == db.entries.len() && db_file.exists() && cache_file.exists();
380
381    if !no_op {
382        store::write(&db_file, &db)
383            .with_context(|| format!("write asset-db: {}", db_file.display()))?;
384        store::write_cache(&cache_file, &bake_cache)
385            .with_context(|| format!("write cache: {}", cache_file.display()))?;
386    }
387    let t_write = t_start.elapsed();
388
389    if let Some(sink) = opts.on_progress.as_ref() {
390        sink(&format!(
391            "baked {} entries → {}",
392            db.entries.len(),
393            db_file.display()
394        ));
395        if opts.verbose_timing {
396            let walked_n = walked.load(Ordering::Relaxed);
397            let parsed_n = db.entries.len() - hit_n;
398            let write_phase = if no_op { "skipped" } else { "wrote" };
399            sink(&format!(
400                "  walked={walked_n} hit={hit_n} parsed={parsed_n} | cache={:?} walk={:?} build={:?} write={:?} ({write_phase}) total={:?}",
401                t_cache,
402                t_walk - t_cache,
403                t_build - t_walk,
404                t_write - t_build,
405                t_write,
406            ));
407        }
408    }
409    Ok(())
410}
411
412/// Per-`.meta` work. Returns `Ok(None)` when the meta has no companion file
413/// to describe (e.g. orphaned `.meta`, directory `.meta`).
414fn process_one(
415    meta_path: &Path,
416    project_root: &Path,
417    cache: &CacheMap,
418    cache_hits: &AtomicUsize,
419) -> Result<Option<RawEntry>> {
420    let companion =
421        strip_meta_suffix(meta_path).ok_or_else(|| anyhow::anyhow!("not a .meta path"))?;
422
423    // Skip directory `.meta` files — directories don't get asset-db rows.
424    let Ok(companion_md) = std::fs::metadata(&companion) else {
425        return Ok(None);
426    };
427    if companion_md.is_dir() {
428        return Ok(None);
429    }
430
431    let meta_md =
432        std::fs::metadata(meta_path).with_context(|| format!("stat: {}", meta_path.display()))?;
433
434    let meta_mtime_ns = mtime_ns(meta_md.modified().unwrap_or(SystemTime::UNIX_EPOCH));
435    let asset_mtime_ns = mtime_ns(companion_md.modified().unwrap_or(SystemTime::UNIX_EPOCH));
436
437    let hint = rel_hint(project_root, &companion)?;
438
439    // Cache hit?
440    if let Some(cached) = cache.get(&hint)
441        && cached.meta_mtime_ns == meta_mtime_ns
442        && cached.asset_mtime_ns == asset_mtime_ns
443    {
444        cache_hits.fetch_add(1, Ordering::Relaxed);
445        return Ok(Some(cached.clone()));
446    }
447
448    // Cache miss → parse.
449    let meta_text = std::fs::read_to_string(meta_path)
450        .with_context(|| format!("read .meta: {}", meta_path.display()))?;
451    let meta_info = meta::parse(&meta_text)?;
452
453    let ext = companion.extension().and_then(|s| s.to_str()).unwrap_or("");
454    let from_ext = class_from_ext(ext);
455
456    let mut sub_assets: Vec<SubAsset> = Vec::new();
457    let mut top_class_id: Option<u32> = None;
458    let mut script_guid: Option<u128> = None;
459
460    // YAML peek strategy:
461    //  - WithSubAssets: types where extra docs ARE addressable from outside.
462    //    `.asset`/`.spriteatlas`/`.spriteatlasv2` host explicit sub-assets;
463    //    `.prefab`/`.controller`/`.anim`/`.mixer`/`.playable` can host
464    //    embedded sub-asset docs (legacy `AnimationClip` inline in a
465    //    prefab; AnimatorState in a controller; AudioMixerGroup in a
466    //    mixer; Timeline tracks in a playable) that other prefabs
467    //    address as `{fileID, guid: <parent.guid>, type: 3}`. Without
468    //    capturing them the embedded ref encodes as `&#f<fid>` and
469    //    cross-prefab refs degrade to the parent alias + `#f<fid>` suffix.
470    //    Embeds are excluded from the global dedup pool — see
471    //    `is_embedded` in `build_db`.
472    //  - TopOnly: types whose extra docs are internal scene-graph that
473    //    isn't addressable from outside (`.unity`, `.mat`, `.mask`).
474    //  - None: extension already says everything (`.png`, `.fbx`, scripts).
475    let parse_mode: Option<asset::ParseMode> = match ext {
476        "asset" | "spriteatlas" | "spriteatlasv2" | "prefab" | "controller" | "anim"
477        | "mixer" | "playable" => Some(asset::ParseMode::WithSubAssets),
478        "mat" | "mask" | "unity" => Some(asset::ParseMode::TopOnly),
479        _ => None,
480    };
481
482    if let Some(mode) = parse_mode {
483        let asset_text = read_asset_for_mode(&companion, mode)?;
484        let info = asset::parse(&asset_text, mode)?;
485        top_class_id = info.top_class_id;
486        script_guid = info.script_guid;
487        for s in info.sub_assets {
488            if s.name.is_empty() {
489                continue;
490            }
491            if is_filterable_subdoc_for_ext(s.class_id, ext) {
492                continue;
493            }
494            sub_assets.push(SubAsset {
495                file_id: s.file_id,
496                class_id: s.class_id,
497                name: s.name.into_boxed_str(),
498            });
499        }
500    }
501
502    // Precedence: script_guid (MonoBehaviour-backed) > from_ext > top_class_id.
503    // `.prefab` and `.unity` deliberately let from_ext win — their YAML's first
504    // doc is a *contained* object (GameObject = classID 1), not the asset's
505    // class (Prefab = 1001). Falling back to top_class_id only for extensions
506    // without a stable class mapping (e.g. `.asset`, where the YAML peek is
507    // the only signal).
508    let asset_type_raw = if let Some(g) = script_guid {
509        AssetTypeRaw::Script(g)
510    } else if let Some(cls) = from_ext {
511        AssetTypeRaw::Native(cls as u32)
512    } else if let Some(cls) = top_class_id.and_then(ClassId::from_raw) {
513        AssetTypeRaw::Native(cls as u32)
514    } else if let Some(cls) = top_class_id {
515        // Unknown raw class ID — store anyway; lookup will treat as Native.
516        AssetTypeRaw::Native(cls)
517    } else {
518        return Ok(None);
519    };
520
521    let name = filename_stem(&companion);
522
523    // Implicit Sprite sub-asset for Single-mode textures. Compute first
524    // (borrows `meta_info` whole); the for-loop below moves
525    // `meta_info.sprite_sheet`, so the predicate must run before that.
526    let implicit_sprite = synthesize_implicit_sprite(&meta_info, &name);
527
528    // Texture sprite-sheet sub-assets (from .meta). Always class Sprite —
529    // .meta `sprites:` entries are by definition Sprite sub-assets of the
530    // texture (Unity's Sprite-mode importer creates them at fileID-as-hash).
531    for (fid, name) in meta_info.sprite_sheet {
532        sub_assets.push(SubAsset {
533            file_id: fid,
534            class_id: ClassId::Sprite as u32,
535            name: name.into_boxed_str(),
536        });
537    }
538
539    if let Some(sub) = implicit_sprite {
540        sub_assets.push(sub);
541    }
542
543    Ok(Some(RawEntry {
544        guid: meta_info.guid,
545        asset_type_raw,
546        hint,
547        name,
548        meta_mtime_ns,
549        asset_mtime_ns,
550        sub_assets,
551    }))
552}
553
554/// Synthesize the implicit Sprite sub-asset Unity auto-generates for
555/// Single-mode Sprite textures. Unity creates one Sprite (fileID
556/// `21300000` = `ClassId::Sprite × 100_000`) named after the texture
557/// file but never writes it to the `.meta` — the `sprites:` list stays
558/// empty. Without synthesizing it here, `AssetMap::elidable_subasset_fid`
559/// (`mapping/asset_map.rs`) can't fire and `_sprite: $TexName` fields
560/// keep the redundant `#f21300000` suffix on pull.
561///
562/// Returns `None` when:
563///   - the `.meta`'s `spriteSheet.sprites:` list is non-empty (explicit
564///     entries own the sub-asset list — atlases, multi-sprite sheets);
565///   - `textureType` isn't 8 (Sprite); or
566///   - `spriteMode` isn't 1 (Single).
567///
568/// Branches pinned by `bake_asset_db::bake::tests::synthesize_implicit_sprite_*`.
569fn synthesize_implicit_sprite(meta: &meta::MetaInfo, stem: &str) -> Option<SubAsset> {
570    if meta.sprite_sheet.is_empty()
571        && meta.texture_type == Some(TEXTURE_TYPE_SPRITE)
572        && meta.sprite_mode == Some(SPRITE_MODE_SINGLE)
573    {
574        Some(SubAsset {
575            file_id: ClassId::Sprite.canonical_subobject_fid(),
576            class_id: ClassId::Sprite as u32,
577            name: stem.to_string().into_boxed_str(),
578        })
579    } else {
580        None
581    }
582}
583
584fn warn_sanitized(on_warn: Option<WarnSinkRef<'_>>, kind: &str, hint: &str, old: &str, new: &str) {
585    if let Some(sink) = on_warn {
586        sink(&format!(
587            "warning: {kind} {hint} name `{old}` contains ref-reserved char; renamed to `{new}`",
588        ));
589    }
590}
591
592fn build_db(
593    mut raw: Vec<RawEntry>,
594    sanitizer: Option<NameSanitizerRef<'_>>,
595    on_warn: Option<WarnSinkRef<'_>>,
596    verbose_collisions: bool,
597) -> Result<AssetDb> {
598    // Stable order: sort by hint so dedup picks the same "winner" each bake.
599    raw.sort_by(|a, b| a.hint.cmp(&b.hint));
600
601    // Reset every entry's name to its raw filename stem before dedup
602    // (cached entries arrive with their previously-suffixed name; if we
603    // dedup against that, collisions compound across bakes), then sanitize
604    // ref-reserved chars in both top-level and sub-asset names — covers the
605    // three name sources (filename stem, YAML m_Name sub-assets, `.meta`
606    // sprite-sheet entries) in one pass before dedup uses `r.name` as key.
607    for r in raw.iter_mut() {
608        r.name = filename_stem_from_hint(&r.hint);
609        if let Some(san) = sanitizer
610            && let Some(clean) = san(&r.name)
611        {
612            warn_sanitized(on_warn, "asset", &r.hint, &r.name, &clean);
613            r.name = clean;
614        }
615        if let Some(san) = sanitizer {
616            for sub in r.sub_assets.iter_mut() {
617                if let Some(clean) = san(&sub.name) {
618                    warn_sanitized(on_warn, "sub-asset of", &r.hint, &sub.name, &clean);
619                    sub.name = clean.into_boxed_str();
620                }
621            }
622        }
623    }
624
625    // Type-aware dedup: collisions are scoped by `(name, asset_type)`.
626    // Same-name entries of distinct `asset_type` (`Foo.png` Texture2D +
627    // `Foo.prefab` Prefab) get distinct alias buckets — the consuming
628    // field's C# type discriminates at decode. Embedded sub-asset docs
629    // of container types are excluded from the global pool entirely
630    // (see [Name collisions](docs/asset-database.md#name-collisions)).
631
632    // Pass 1: tally distinct-guid owners per `(name, asset_type)` bucket.
633    let mut owners: AHashMap<(String, AssetTypeRaw), AHashSet<u128>> =
634        AHashMap::with_capacity(raw.len());
635    for r in &raw {
636        let key = (r.name.clone(), r.asset_type_raw);
637        owners.entry(key).or_default().insert(r.guid);
638        if is_embedded_container(&r.hint) {
639            continue;
640        }
641        for sub in &r.sub_assets {
642            let key = (
643                sub.name.to_string(),
644                AssetTypeRaw::Native(sub.class_id),
645            );
646            owners.entry(key).or_default().insert(r.guid);
647        }
648    }
649    let contested = |name: &str, t: AssetTypeRaw| {
650        owners
651            .get(&(name.to_string(), t))
652            .is_some_and(|s| s.len() > 1)
653    };
654
655    // Pass 2: walk entries in hint-sorted order, renaming every contested
656    // claim. `taken` tracks `(name, asset_type) → guid` pairs already
657    // claimed in this pass so the disambiguator never picks a candidate
658    // that collides with an earlier (different-guid) entry of the same
659    // type; same-guid sharing remains allowed.
660    let mut taken: AHashMap<(String, AssetTypeRaw), u128> = AHashMap::with_capacity(raw.len());
661    for r in raw.iter_mut() {
662        let top_type = r.asset_type_raw;
663        if contested(&r.name, top_type) {
664            let new_name = disambiguate(&r.name, &r.hint, r.guid, top_type, &taken)?;
665            if verbose_collisions && let Some(sink) = on_warn {
666                sink(&format!(
667                    "warning: name collision on `{}` (guid {:032x}); renamed to `{}`",
668                    r.name, r.guid, new_name,
669                ));
670            }
671            r.name = new_name;
672        }
673        match taken.get(&(r.name.clone(), top_type)) {
674            Some(&prev) if prev != r.guid => anyhow::bail!(
675                "asset-db: name `{}` claimed by both guid {:032x} and {prev:032x} \
676                 after dedup — `disambiguate` produced a non-unique alias",
677                r.name,
678                r.guid,
679            ),
680            _ => {
681                taken.insert((r.name.clone(), top_type), r.guid);
682            }
683        }
684
685        if is_embedded_container(&r.hint) {
686            // Prefab-embedded sub-assets bypass the global dedup pool;
687            // sanitization already happened above. Names stay as authored
688            // and resolve via `$Sub@Parent` at the codec layer.
689            continue;
690        }
691        for sub in r.sub_assets.iter_mut() {
692            let sub_type = AssetTypeRaw::Native(sub.class_id);
693            if contested(&sub.name, sub_type) {
694                let original = sub.name.to_string();
695                let new_name = disambiguate(&original, &r.hint, r.guid, sub_type, &taken)?;
696                if verbose_collisions && let Some(sink) = on_warn {
697                    sink(&format!(
698                        "warning: sub-asset name collision on `{}` (parent guid {:032x}); renamed to `{}`",
699                        original, r.guid, new_name,
700                    ));
701                }
702                sub.name = new_name.into_boxed_str();
703            }
704            // Same-guid sharing is allowed — a sub-asset's deduped name
705            // will often equal the parent's deduped alias (same hint
706            // feeds disambiguate), and that's the desired outcome.
707            let key = (sub.name.to_string(), sub_type);
708            if !taken.contains_key(&key) {
709                taken.insert(key, r.guid);
710            }
711        }
712    }
713
714    // Intern script types and finalize entries.
715    let mut db = AssetDb::new();
716    let entries: Vec<AssetEntry> = raw
717        .into_iter()
718        .map(|r| {
719            let asset_type = match r.asset_type_raw {
720                AssetTypeRaw::Native(n) => AssetType::Native(n),
721                AssetTypeRaw::Script(g) => AssetType::Script(db.intern_script(g)),
722            };
723            AssetEntry {
724                guid: r.guid,
725                asset_type,
726                name: r.name.into_boxed_str(),
727                sub_assets: r.sub_assets,
728                hint: r.hint.into_boxed_str(),
729            }
730        })
731        .collect();
732    db.entries = entries;
733    db.sort();
734    check_no_full_duplicates(&db)?;
735    Ok(db)
736}
737
738/// Hard-fail on two corruption cases:
739///
740/// 1. **Two top-level entries share a GUID.** Hand-edited or copy-pasted
741///    `.meta` whose GUID wasn't rewritten. The name-dedup loop only
742///    renames when guids *differ*, so same-guid pairs flow through with
743///    distinct names and `db.sort()` doesn't merge them. Catches the
744///    duplicate-`.meta` case the Unity-hidden walker filter also guards
745///    against — belt and braces.
746///
747/// 2. **Within-entry sub-asset rows share `(name, fileID)`.** Two YAML
748///    sub-docs in the same asset declared identical names + fileIDs —
749///    asset-side corruption, parser bug, or atlas content collision.
750fn check_no_full_duplicates(db: &AssetDb) -> Result<()> {
751    // Top-level: guid uniqueness. `db.entries` is already guid-sorted, so
752    // a single pass over consecutive pairs catches every dup.
753    for w in db.entries.windows(2) {
754        if w[0].guid == w[1].guid {
755            anyhow::bail!(
756                "duplicate top-level GUID: {:032x} between names `{}` and `{}` — likely two .meta files share a GUID",
757                w[0].guid,
758                w[0].name,
759                w[1].name,
760            );
761        }
762    }
763
764    // Sub-assets: (guid, fileID, name) uniqueness within each entry.
765    let mut seen: AHashSet<(i64, &str)> = AHashSet::new();
766    for e in &db.entries {
767        seen.clear();
768        for s in &e.sub_assets {
769            if !seen.insert((s.file_id, &*s.name)) {
770                anyhow::bail!(
771                    "duplicate sub-asset record: name={} guid={:032x} fileID={} type={:?}",
772                    s.name,
773                    e.guid,
774                    s.file_id,
775                    e.asset_type,
776                );
777            }
778        }
779    }
780    Ok(())
781}
782
783/// Read just enough of the asset to satisfy `mode`.
784///
785/// `TopOnly` reads the first 4 KiB and truncates at the last newline — that
786/// covers a YAML preamble (`%YAML 1.1\n%TAG …\n`), the first
787/// `--- !u!<id> &<fid>` header, and a `m_Script` line for .asset
788/// MonoBehaviours (≤ ~200 bytes). `WithSubAssets` reads the full file.
789///
790/// Trimming at the last newline guards against UTF-8 boundary cuts inside a
791/// multi-byte character — every YAML line is complete UTF-8.
792fn read_asset_for_mode(path: &Path, mode: asset::ParseMode) -> Result<String> {
793    use std::io::Read;
794    match mode {
795        asset::ParseMode::WithSubAssets => {
796            std::fs::read_to_string(path).with_context(|| format!("read asset: {}", path.display()))
797        }
798        asset::ParseMode::TopOnly => {
799            const HEAD_BYTES: u64 = 4096;
800            let f = std::fs::File::open(path)
801                .with_context(|| format!("open asset: {}", path.display()))?;
802            let mut buf = Vec::with_capacity(HEAD_BYTES as usize);
803            f.take(HEAD_BYTES)
804                .read_to_end(&mut buf)
805                .with_context(|| format!("read asset: {}", path.display()))?;
806            // Drop trailing partial line so .lines() yields only complete
807            // (and thus complete-UTF-8) lines. If the head has no newline at
808            // all (pathological — single-line YAML > 4 KiB), keep the buffer
809            // and let `from_utf8` decide.
810            if let Some(last_nl) = buf.iter().rposition(|&b| b == b'\n') {
811                buf.truncate(last_nl + 1);
812            }
813            String::from_utf8(buf)
814                .with_context(|| format!("non-utf8 asset head: {}", path.display()))
815        }
816    }
817}
818
819fn strip_meta_suffix(p: &Path) -> Option<PathBuf> {
820    let s = p.to_str()?;
821    s.strip_suffix(".meta").map(PathBuf::from)
822}
823
824fn rel_hint(project_root: &Path, companion: &Path) -> Result<String> {
825    // Strip the project root, not just `Assets/`. The walker now visits both
826    // `<project>/Assets/` and `<project>/Packages/`, so hints look like
827    // `Assets/Foo.prefab` or `Packages/com.boxcat.libs/Bar.mixer`.
828    let rel = companion
829        .strip_prefix(project_root)
830        .with_context(|| format!("strip prefix: {}", companion.display()))?;
831    let s = rel.to_string_lossy().replace('\\', "/");
832    Ok(s)
833}
834
835fn filename_stem(p: &Path) -> String {
836    p.file_stem()
837        .and_then(|s| s.to_str())
838        .unwrap_or("")
839        .to_string()
840}
841
842fn filename_stem_from_hint(hint: &str) -> String {
843    Path::new(hint)
844        .file_stem()
845        .and_then(|s| s.to_str())
846        .unwrap_or("")
847        .to_string()
848}
849
850/// Pick a unique alias for `stem` given `hint` and an existing `taken` map.
851/// Strategy: try `stem^dir` for successively-deeper parent dirs. A candidate
852/// is considered "free" iff it's absent from `taken` *or* already mapped to
853/// `owner_guid` (the latter covers the same-guid sub-asset case where the
854/// parent's deduped top-level alias is a valid name to share).
855///
856/// `asset_type` scopes the dedup bucket — a candidate is "taken" only when
857/// another guid has claimed the exact `(name, asset_type)` pair. Two assets
858/// of different `asset_type` (e.g. Texture2D `Foo.png` vs Prefab `Foo.prefab`)
859/// share the bare alias `Foo` without contesting because the codec layer
860/// uses the field's declared C# type to pick the right one at lookup time.
861///
862/// Hard-fails when no parent segment yields a free candidate — ambiguity
863/// surfaces at bake time rather than getting papered over with a guid suffix.
864/// See [Name collisions](docs/asset-database.md#name-collisions) for the
865/// `^` separator rationale.
866fn disambiguate(
867    stem: &str,
868    hint: &str,
869    owner_guid: u128,
870    asset_type: AssetTypeRaw,
871    taken: &AHashMap<(String, AssetTypeRaw), u128>,
872) -> Result<String> {
873    let parts: Vec<&str> = Path::new(hint)
874        .parent()
875        .map(|p| p.iter().filter_map(|c| c.to_str()).collect::<Vec<_>>())
876        .unwrap_or_default();
877
878    // Walk parent segments from nearest to root, picking the shortest
879    // suffix that doesn't collide with a different-guid owner.
880    let mut suffix = String::new();
881    for seg in parts.iter().rev() {
882        if !suffix.is_empty() {
883            suffix.insert(0, '/');
884        }
885        suffix.insert_str(0, seg);
886        let candidate = format!("{stem}^{suffix}");
887        match taken.get(&(candidate.clone(), asset_type)) {
888            None => return Ok(candidate),
889            Some(&prev) if prev == owner_guid => return Ok(candidate),
890            Some(_) => continue,
891        }
892    }
893    anyhow::bail!(
894        "asset-db: cannot disambiguate name `{stem}` for guid {owner_guid:032x} \
895         (hint `{hint}`) — every parent-segment suffix is already taken by \
896         another asset. Rename one of the colliding assets in source.",
897    )
898}
899
900#[cfg(test)]
901mod tests {
902    use super::*;
903
904    #[test]
905    fn run_with_panic_safety_passes_through_ok_some() {
906        let r: Result<Option<i32>, String> = run_with_panic_safety("path", "task", || Ok(Some(42)));
907        assert_eq!(r, Ok(Some(42)));
908    }
909
910    #[test]
911    fn run_with_panic_safety_passes_through_ok_none() {
912        let r: Result<Option<i32>, String> = run_with_panic_safety("path", "task", || Ok(None));
913        assert_eq!(r, Ok(None));
914    }
915
916    #[test]
917    fn run_with_panic_safety_formats_inner_error_with_label() {
918        let r: Result<Option<i32>, String> = run_with_panic_safety("foo.meta", "task", || {
919            Err(anyhow::anyhow!("malformed yaml"))
920        });
921        assert_eq!(r, Err("foo.meta: malformed yaml".to_string()));
922    }
923
924    #[test]
925    fn run_with_panic_safety_catches_str_panic() {
926        let r: Result<Option<i32>, String> =
927            run_with_panic_safety("foo.meta", "process_one", || {
928                std::panic::panic_any("boom (&str payload)")
929            });
930        assert_eq!(
931            r,
932            Err("foo.meta: panic in process_one: boom (&str payload)".to_string())
933        );
934    }
935
936    #[test]
937    fn run_with_panic_safety_catches_string_panic() {
938        let r: Result<Option<i32>, String> =
939            run_with_panic_safety("foo.meta", "process_one", || {
940                // String payloads come from `panic!("{x}")` via the format!
941                // path — the runtime hands a String, not a &str.
942                panic!("formatted {}", "msg")
943            });
944        assert_eq!(
945            r,
946            Err("foo.meta: panic in process_one: formatted msg".to_string())
947        );
948    }
949
950    #[test]
951    fn run_with_panic_safety_handles_non_string_panic_payload() {
952        // `panic_any(42_i32)` produces a panic whose payload isn't &str
953        // or String. The helper falls back to a sentinel message rather
954        // than dropping the error silently.
955        let r: Result<Option<i32>, String> =
956            run_with_panic_safety("foo.meta", "process_one", || std::panic::panic_any(42_i32));
957        assert_eq!(
958            r,
959            Err("foo.meta: panic in process_one: <non-string panic payload>".to_string())
960        );
961    }
962
963    fn meta_for(
964        texture_type: Option<u32>,
965        sprite_mode: Option<u32>,
966        sprites: Vec<(i64, String)>,
967    ) -> meta::MetaInfo {
968        meta::MetaInfo {
969            guid: 0,
970            sprite_sheet: sprites,
971            texture_type,
972            sprite_mode,
973        }
974    }
975
976    #[test]
977    fn synthesize_implicit_sprite_fires_on_single_mode_sprite_with_empty_sheet() {
978        let m = meta_for(Some(TEXTURE_TYPE_SPRITE), Some(SPRITE_MODE_SINGLE), vec![]);
979        let sub = synthesize_implicit_sprite(&m, "Icon").expect("synthesis should fire");
980        assert_eq!(sub.file_id, ClassId::Sprite.canonical_subobject_fid());
981        assert_eq!(&*sub.name, "Icon");
982    }
983
984    #[test]
985    fn synthesize_implicit_sprite_skips_when_sheet_non_empty() {
986        // Explicit sprites own the sub-asset list — atlas-shaped meta
987        // doesn't get a phantom main-Sprite layered on top.
988        let m = meta_for(
989            Some(TEXTURE_TYPE_SPRITE),
990            Some(SPRITE_MODE_SINGLE),
991            vec![(12345, "explicit_a".into())],
992        );
993        assert!(synthesize_implicit_sprite(&m, "Icon").is_none());
994    }
995
996    #[test]
997    fn synthesize_implicit_sprite_skips_on_multiple_mode() {
998        // spriteMode: 2 (Multiple = atlas) means "the sprites: list is
999        // canonical, even if currently empty". No synthesis.
1000        let m = meta_for(Some(TEXTURE_TYPE_SPRITE), Some(2), vec![]);
1001        assert!(synthesize_implicit_sprite(&m, "Icon").is_none());
1002    }
1003
1004    #[test]
1005    fn synthesize_implicit_sprite_skips_on_non_sprite_texture() {
1006        // textureType: 0 (Default) — texture isn't a Sprite at all.
1007        let m = meta_for(Some(0), Some(SPRITE_MODE_SINGLE), vec![]);
1008        assert!(synthesize_implicit_sprite(&m, "Icon").is_none());
1009    }
1010
1011    #[test]
1012    fn synthesize_implicit_sprite_skips_when_predicates_absent() {
1013        // Both texture_type and sprite_mode None — `.meta` from a
1014        // non-texture asset (or a stale .meta missing the fields).
1015        let m = meta_for(None, None, vec![]);
1016        assert!(synthesize_implicit_sprite(&m, "Icon").is_none());
1017    }
1018
1019    /// `is_filterable_subdoc_for_ext` is the single point where parse-
1020    /// time sub-asset filtering decides what's a structural prefab tree
1021    /// doc vs. a real sub-asset. Pin the contract per extension.
1022    #[test]
1023    fn is_filterable_subdoc_for_ext_branches_correctly() {
1024        // .prefab: GO + Transform + RectTransform + MonoBehaviour-as-component.
1025        for cls in [1, 4, 224, 114] {
1026            assert!(
1027                is_filterable_subdoc_for_ext(cls, "prefab"),
1028                "class {cls} should be filtered for .prefab",
1029            );
1030        }
1031        // .playable: Timeline tracks live as MB-114 — must NOT filter.
1032        // GO/Transform never appear in .playable but the predicate stays
1033        // valid (no-op).
1034        assert!(!is_filterable_subdoc_for_ext(114, "playable"));
1035        assert!(is_filterable_subdoc_for_ext(1, "playable"));
1036        // .controller: AnimatorState (1102), BlendTree (206) — never
1037        // filtered.
1038        assert!(!is_filterable_subdoc_for_ext(1102, "controller"));
1039        assert!(!is_filterable_subdoc_for_ext(114, "controller"));
1040        // .mixer: AudioMixerGroup (273) — never filtered.
1041        assert!(!is_filterable_subdoc_for_ext(273, "mixer"));
1042        assert!(!is_filterable_subdoc_for_ext(114, "mixer"));
1043        // .asset / .spriteatlas: MB-114 are real ScriptableObject sub-
1044        // assets. Real classes (Sprite=213) are never filtered either.
1045        assert!(!is_filterable_subdoc_for_ext(114, "asset"));
1046        assert!(!is_filterable_subdoc_for_ext(213, "spriteatlas"));
1047    }
1048
1049    #[test]
1050    fn stem_basic() {
1051        assert_eq!(filename_stem(Path::new("foo/Bar.prefab")), "Bar");
1052        assert_eq!(filename_stem_from_hint("foo/Bar.prefab"), "Bar");
1053    }
1054
1055    #[test]
1056    fn disambiguate_walks_parents() {
1057        let t = AssetTypeRaw::Native(ClassId::Texture2D as u32);
1058        let mut taken = AHashMap::new();
1059        taken.insert(("Foo".to_string(), t), 1u128);
1060        // Nearest parent suffix wins on first try.
1061        let alias = disambiguate("Foo", "pkg/Editor/Foo.cs", 2, t, &taken).unwrap();
1062        assert_eq!(alias, "Foo^Editor");
1063
1064        // First-level parent already taken (by a different guid, same type)
1065        // → falls back to deeper path.
1066        taken.insert(("Foo^Editor".to_string(), t), 3);
1067        let alias = disambiguate("Foo", "pkg/Editor/Foo.cs", 2, t, &taken).unwrap();
1068        assert_eq!(alias, "Foo^pkg/Editor");
1069    }
1070
1071    #[test]
1072    fn disambiguate_ignores_collisions_in_other_types() {
1073        // A different `AssetTypeRaw` claiming the same alias does NOT
1074        // contest — type-aware dedup gives each `(name, type)` its own
1075        // bucket. PNG (Texture2D) and prefab (Prefab) named `Foo` both
1076        // keep bare `Foo`.
1077        let png = AssetTypeRaw::Native(ClassId::Texture2D as u32);
1078        let prefab = AssetTypeRaw::Native(ClassId::Prefab as u32);
1079        let mut taken = AHashMap::new();
1080        taken.insert(("Foo".to_string(), png), 1u128);
1081        // disambiguate against the prefab bucket — `Foo` is free here.
1082        let alias = disambiguate("Foo", "Assets/Bar/Foo.prefab", 2, prefab, &taken).unwrap();
1083        // Walk produces `Foo^Bar` because we always step at least one
1084        // parent (disambiguate's contract is "produce a suffixed form");
1085        // the contention check upstream is what decides whether to call.
1086        assert_eq!(alias, "Foo^Bar");
1087    }
1088
1089    #[test]
1090    fn disambiguate_returns_existing_when_same_owner() {
1091        // When the candidate suffix is already mapped to `owner_guid`, the
1092        // sub-asset can safely share that alias — its lookup path resolves
1093        // back to the same guid, so no real ambiguity exists.
1094        let t = AssetTypeRaw::Native(ClassId::Texture2D as u32);
1095        let mut taken = AHashMap::new();
1096        taken.insert(("Cloud1".to_string(), t), 0xa0_u128);
1097        taken.insert(("Cloud1^Tower".to_string(), t), 0xb0_u128);
1098        let alias =
1099            disambiguate("Cloud1", "Assets/Tower/Cloud1.png", 0xb0_u128, t, &taken).unwrap();
1100        assert_eq!(alias, "Cloud1^Tower");
1101    }
1102
1103    #[test]
1104    fn disambiguate_hard_fails_when_no_parent_segments() {
1105        let t = AssetTypeRaw::Native(ClassId::Texture2D as u32);
1106        let mut taken = AHashMap::new();
1107        taken.insert(("Foo".to_string(), t), 1u128);
1108        // Hint has no directories — nothing to suffix with. Must error
1109        // rather than silently fall back to a guid suffix.
1110        let err =
1111            disambiguate("Foo", "Foo.cs", 2u128, t, &taken).expect_err("must hard-fail");
1112        let msg = format!("{err:#}");
1113        assert!(msg.contains("disambiguate"), "msg: {msg}");
1114        assert!(msg.contains("Foo"), "msg: {msg}");
1115    }
1116
1117    fn raw_native(hint: &str, guid: u128, sub_assets: Vec<SubAsset>) -> RawEntry {
1118        RawEntry {
1119            guid,
1120            asset_type_raw: AssetTypeRaw::Native(ClassId::Texture2D as u32),
1121            hint: hint.to_string(),
1122            // `build_db`'s first pass overwrites `name` from `hint`, so any
1123            // value here is fine. Empty kept the test minimal.
1124            name: String::new(),
1125            meta_mtime_ns: 0,
1126            asset_mtime_ns: 0,
1127            sub_assets,
1128        }
1129    }
1130
1131    /// Pin: when a name is claimed by ≥2 distinct guids of the same
1132    /// `asset_type`, every claimant must rename — no "first wins" carve-out.
1133    /// The deduped form is consistent across claimants: each entry resolves
1134    /// through `disambiguate` against its own hint.
1135    ///
1136    /// Two same-type Texture2D `Cloud1.png` files in different folders
1137    /// share the bare alias `Cloud1` until type-aware dedup forces both to
1138    /// suffix.
1139    #[test]
1140    fn build_db_renames_every_claimant_when_name_is_contested() {
1141        let png_a_guid = 0xa0_u128;
1142        let png_b_guid = 0xb0_u128;
1143        let sprite_fid: i64 = 21300000;
1144
1145        let raw = vec![
1146            raw_native("Assets/Other/Cloud1.png", png_a_guid, vec![]),
1147            raw_native(
1148                "Assets/Tower/Cloud1.png",
1149                png_b_guid,
1150                vec![SubAsset {
1151                    file_id: sprite_fid,
1152                    class_id: ClassId::Sprite as u32,
1153                    name: "Cloud1".into(),
1154                }],
1155            ),
1156        ];
1157
1158        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1159
1160        let a_entry = db.find_by_guid(png_a_guid).unwrap();
1161        let b_entry = db.find_by_guid(png_b_guid).unwrap();
1162
1163        // Neither entry keeps the bare alias — both renamed.
1164        assert_ne!(&*a_entry.name, "Cloud1");
1165        assert_ne!(&*b_entry.name, "Cloud1");
1166        assert!(
1167            a_entry.name.starts_with("Cloud1^"),
1168            "first png top-level not deduped: {}",
1169            a_entry.name,
1170        );
1171        assert!(
1172            b_entry.name.starts_with("Cloud1^"),
1173            "second png top-level not deduped: {}",
1174            b_entry.name,
1175        );
1176        // Distinct hints → distinct deduped suffixes.
1177        assert_ne!(&*a_entry.name, &*b_entry.name);
1178
1179        // Sub-asset dedup: the Sprite sub-asset's `Cloud1` lives in its own
1180        // type-bucket (Sprite, not Texture2D), so it isn't contested by the
1181        // Texture2D collision above. It stays bare. The png_b entry is the
1182        // only Sprite-bucket owner.
1183        let png_b_sub = &b_entry.sub_assets[0];
1184        assert_eq!(png_b_sub.file_id, sprite_fid);
1185        assert_eq!(
1186            &*png_b_sub.name, "Cloud1",
1187            "Sprite sub-asset should stay bare under type-aware dedup",
1188        );
1189    }
1190
1191    /// Pin type-aware dedup: a Texture2D and a Prefab sharing the stem
1192    /// `Foo` both keep the bare alias. Reverse lookup discriminates by
1193    /// the field's declared C# type at the consumer layer.
1194    #[test]
1195    fn build_db_keeps_bare_alias_for_type_distinct_collisions() {
1196        let png_guid = 0xa0_u128;
1197        let prefab_guid = 0xb0_u128;
1198        let raw = vec![
1199            RawEntry {
1200                guid: png_guid,
1201                asset_type_raw: AssetTypeRaw::Native(ClassId::Texture2D as u32),
1202                hint: "Assets/UI/Foo.png".to_string(),
1203                name: String::new(),
1204                meta_mtime_ns: 0,
1205                asset_mtime_ns: 0,
1206                sub_assets: vec![],
1207            },
1208            RawEntry {
1209                guid: prefab_guid,
1210                asset_type_raw: AssetTypeRaw::Native(ClassId::Prefab as u32),
1211                hint: "Assets/UI/Foo.prefab".to_string(),
1212                name: String::new(),
1213                meta_mtime_ns: 0,
1214                asset_mtime_ns: 0,
1215                sub_assets: vec![],
1216            },
1217        ];
1218        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1219        // Both keep bare `Foo` because they live in distinct type buckets.
1220        assert_eq!(&*db.find_by_guid(png_guid).unwrap().name, "Foo");
1221        assert_eq!(&*db.find_by_guid(prefab_guid).unwrap().name, "Foo");
1222    }
1223
1224    /// Pin: AnimatorController-embedded sub-assets are excluded from the
1225    /// global dedup pool, mirroring the prefab-embedded rule. Without the
1226    /// exclusion, an embedded AnimatorState named `Idle` would contest a
1227    /// hypothetical standalone `.asset` of the same name AND same Unity
1228    /// classID (AnimatorState exists as both an embedded sub of
1229    /// `.controller` and a top-level `.asset` in Unity), forcing both to
1230    /// rename via parent-dir suffix. The exclusion keeps the embedded
1231    /// state in its parent's namespace where it's addressed via
1232    /// `$Idle@Player` at the consumer layer.
1233    #[test]
1234    fn build_db_skips_controller_embedded_subassets_in_global_pool() {
1235        const ANIMATOR_STATE_CLASS_ID: u32 = 1102;
1236        let controller_guid = 0xc0_u128;
1237        let other_state_guid = 0xd0_u128;
1238        let raw = vec![
1239            RawEntry {
1240                guid: controller_guid,
1241                asset_type_raw: AssetTypeRaw::Native(ClassId::AnimatorController as u32),
1242                hint: "Assets/Anim/Player.controller".to_string(),
1243                name: String::new(),
1244                meta_mtime_ns: 0,
1245                asset_mtime_ns: 0,
1246                sub_assets: vec![SubAsset {
1247                    file_id: -123_456_789_012,
1248                    class_id: ANIMATOR_STATE_CLASS_ID,
1249                    name: "Idle".into(),
1250                }],
1251            },
1252            // Standalone .asset whose top class IS AnimatorState — same
1253            // (name, class_id) bucket as the embedded one. With
1254            // exclusion, only this one claims the global `Idle` alias.
1255            RawEntry {
1256                guid: other_state_guid,
1257                asset_type_raw: AssetTypeRaw::Native(ANIMATOR_STATE_CLASS_ID),
1258                hint: "Assets/Other/Idle.asset".to_string(),
1259                name: String::new(),
1260                meta_mtime_ns: 0,
1261                asset_mtime_ns: 0,
1262                sub_assets: vec![],
1263            },
1264        ];
1265        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1266        // Standalone keeps bare `Idle`.
1267        assert_eq!(&*db.find_by_guid(other_state_guid).unwrap().name, "Idle");
1268        // Embedded state stays as authored in the parent's namespace.
1269        let ctrl_entry = db.find_by_guid(controller_guid).unwrap();
1270        assert_eq!(&*ctrl_entry.sub_assets[0].name, "Idle");
1271    }
1272
1273    /// Same shape as the controller test, for AudioMixerController:
1274    /// AudioMixerGroup sub-asset class collides with itself between an
1275    /// embedded `Main.mixer` group and a hypothetical standalone
1276    /// `.asset` of the same class. Exclusion keeps the embed in the
1277    /// parent's namespace.
1278    #[test]
1279    fn build_db_skips_mixer_embedded_subassets_in_global_pool() {
1280        const AUDIO_MIXER_GROUP_CLASS_ID: u32 = 273;
1281        let mixer_guid = 0xe0_u128;
1282        let other_group_guid = 0xf0_u128;
1283        let raw = vec![
1284            RawEntry {
1285                guid: mixer_guid,
1286                asset_type_raw: AssetTypeRaw::Native(ClassId::AudioMixerController as u32),
1287                hint: "Assets/Audio/Main.mixer".to_string(),
1288                name: String::new(),
1289                meta_mtime_ns: 0,
1290                asset_mtime_ns: 0,
1291                sub_assets: vec![SubAsset {
1292                    file_id: 9_001,
1293                    class_id: AUDIO_MIXER_GROUP_CLASS_ID,
1294                    name: "Master".into(),
1295                }],
1296            },
1297            RawEntry {
1298                guid: other_group_guid,
1299                asset_type_raw: AssetTypeRaw::Native(AUDIO_MIXER_GROUP_CLASS_ID),
1300                hint: "Assets/Other/Master.asset".to_string(),
1301                name: String::new(),
1302                meta_mtime_ns: 0,
1303                asset_mtime_ns: 0,
1304                sub_assets: vec![],
1305            },
1306        ];
1307        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1308        assert_eq!(&*db.find_by_guid(other_group_guid).unwrap().name, "Master");
1309        let mixer_entry = db.find_by_guid(mixer_guid).unwrap();
1310        assert_eq!(&*mixer_entry.sub_assets[0].name, "Master");
1311    }
1312
1313    /// Pin: `.playable` files are treated as embedded containers — their
1314    /// Timeline track sub-assets bypass the global dedup pool. Many
1315    /// `.playable` files in a project share Unity-default track names like
1316    /// `Animation Track (2)`; without the exclusion they contest in the
1317    /// global pool and `disambiguate` hard-fails when the shared
1318    /// parent-dir suffixes are exhausted. Exclusion is keyed on the
1319    /// `.playable` extension (`is_embedded_container`) because the
1320    /// top-doc script guid of a playable is whichever sub-doc Unity
1321    /// sorts first by hashed fileID — unstable as a discriminator.
1322    #[test]
1323    fn build_db_skips_playable_embedded_tracks_in_global_pool() {
1324        // Track class id + script guid placeholders — bake stores both
1325        // but doesn't validate them against any registry. Extension is
1326        // the discriminator that triggers the exclusion.
1327        const ANIMATION_TRACK_CLASS_ID: u32 = 5004;
1328        let some_script_guid = 0xd21dcc2386d650c4597f3633c75a1f98_u128;
1329        let pa_guid = 0xa0_u128;
1330        let pb_guid = 0xb0_u128;
1331        let raw = vec![
1332            RawEntry {
1333                guid: pa_guid,
1334                asset_type_raw: AssetTypeRaw::Script(some_script_guid),
1335                hint: "Assets/Anim/PlayableA.playable".to_string(),
1336                name: String::new(),
1337                meta_mtime_ns: 0,
1338                asset_mtime_ns: 0,
1339                sub_assets: vec![SubAsset {
1340                    file_id: -123_456_789,
1341                    class_id: ANIMATION_TRACK_CLASS_ID,
1342                    name: "Animation Track (2)".into(),
1343                }],
1344            },
1345            RawEntry {
1346                guid: pb_guid,
1347                asset_type_raw: AssetTypeRaw::Script(some_script_guid),
1348                hint: "Assets/Anim/PlayableB.playable".to_string(),
1349                name: String::new(),
1350                meta_mtime_ns: 0,
1351                asset_mtime_ns: 0,
1352                sub_assets: vec![SubAsset {
1353                    file_id: -987_654_321,
1354                    class_id: ANIMATION_TRACK_CLASS_ID,
1355                    name: "Animation Track (2)".into(),
1356                }],
1357            },
1358        ];
1359        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1360        // Both playables keep their embedded track names as authored —
1361        // sub-assets live in the parent's namespace, not the global pool.
1362        assert_eq!(
1363            &*db.find_by_guid(pa_guid).unwrap().sub_assets[0].name,
1364            "Animation Track (2)"
1365        );
1366        assert_eq!(
1367            &*db.find_by_guid(pb_guid).unwrap().sub_assets[0].name,
1368            "Animation Track (2)"
1369        );
1370    }
1371
1372    /// Pin: prefab-embedded sub-assets are excluded from the global dedup
1373    /// pool. Their names stay as authored even when another asset in the
1374    /// project shares the name. They resolve via `$Sub@Parent` at the
1375    /// consumer layer, not the global alias bucket.
1376    #[test]
1377    fn build_db_skips_prefab_embedded_subassets_in_global_pool() {
1378        let prefab_guid = 0xa0_u128;
1379        let other_clip_guid = 0xb0_u128;
1380        let raw = vec![
1381            RawEntry {
1382                guid: prefab_guid,
1383                asset_type_raw: AssetTypeRaw::Native(ClassId::Prefab as u32),
1384                hint: "Assets/UI/PatternBG.prefab".to_string(),
1385                name: String::new(),
1386                meta_mtime_ns: 0,
1387                asset_mtime_ns: 0,
1388                sub_assets: vec![SubAsset {
1389                    file_id: -4_468_419_427_481_386_445,
1390                    class_id: ClassId::AnimationClip as u32,
1391                    name: "Animation".into(),
1392                }],
1393            },
1394            RawEntry {
1395                guid: other_clip_guid,
1396                asset_type_raw: AssetTypeRaw::Native(ClassId::AnimationClip as u32),
1397                hint: "Assets/Other/Animation.anim".to_string(),
1398                name: String::new(),
1399                meta_mtime_ns: 0,
1400                asset_mtime_ns: 0,
1401                sub_assets: vec![],
1402            },
1403        ];
1404        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1405        // Standalone .anim keeps bare `Animation` — the prefab-embedded
1406        // `Animation` doesn't claim the global alias.
1407        assert_eq!(
1408            &*db.find_by_guid(other_clip_guid).unwrap().name,
1409            "Animation"
1410        );
1411        // Prefab-embedded sub-asset keeps its raw name (lives in parent's
1412        // namespace; `$Animation@PatternBG` at the consumer layer).
1413        let prefab_entry = db.find_by_guid(prefab_guid).unwrap();
1414        assert_eq!(&*prefab_entry.sub_assets[0].name, "Animation");
1415    }
1416
1417    /// Pin: a single-owner name (one guid only, even if it appears as both
1418    /// a top-level alias and one of its own sub-assets) is *not*
1419    /// contested — it stays bare. Guards against over-renaming the common
1420    /// case of a Texture2D and its lone same-named Sprite sub-asset.
1421    #[test]
1422    fn build_db_keeps_bare_alias_when_name_is_uncontested() {
1423        let png_guid = 0xb0_u128;
1424        let raw = vec![raw_native(
1425            "Assets/Tower/Lone.png",
1426            png_guid,
1427            vec![SubAsset {
1428                file_id: 21300000,
1429                class_id: ClassId::Sprite as u32,
1430                name: "Lone".into(),
1431            }],
1432        )];
1433
1434        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1435        let entry = db.find_by_guid(png_guid).unwrap();
1436        assert_eq!(&*entry.name, "Lone");
1437        assert_eq!(&*entry.sub_assets[0].name, "Lone");
1438    }
1439
1440    /// Pin: when a top-level alias is genuinely unresolvable (no parent
1441    /// segments left to walk and the bare stem is already taken), the
1442    /// bake hard-fails rather than silently falling back to a `^<guid8>`
1443    /// suffix. Per the project policy: ambiguity surfaces at bake time,
1444    /// not encode time.
1445    #[test]
1446    fn build_db_fails_when_dedup_cannot_resolve() {
1447        let raw = vec![
1448            // Two top-level entries with the same bare stem and no parent
1449            // segments to walk — `disambiguate` has nothing to suffix with.
1450            raw_native("Foo.asset", 0x01_u128, vec![]),
1451            raw_native("Foo.prefab", 0x02_u128, vec![]),
1452        ];
1453
1454        let err = build_db(raw, None, None, false).expect_err("collision with no parent dirs must hard-fail");
1455        let msg = format!("{err:#}");
1456        assert!(
1457            msg.contains("Foo") && msg.contains("disambiguate"),
1458            "error message should name the collision and the dedup pass: {msg}",
1459        );
1460    }
1461}