Skip to main content

unity_assetdb/
bake.rs

1//! Bake orchestrator: walk → parse → cache → write.
2//!
3//! Per-file flow:
4//! 1. Stat `.meta` and the companion asset file. If both mtimes match the
5//!    cached values → reuse cached entry, skip parse.
6//! 2. Else read `.meta` → guid + sprite-sheet sub-assets.
7//! 3. Read the asset file → top-level class ID + sub-asset rows.
8//! 4. Resolve `AssetType`: native `class_id` or `Script(script_guid)`.
9//! 5. Derive alias from the filename stem.
10//!
11//! Post-walk: alias-collision sweep (filename stems can clash; we suffix
12//! with parent dir on conflict and warn).
13
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16use std::sync::atomic::{AtomicUsize, Ordering};
17use std::sync::mpsc;
18use std::time::{Instant, SystemTime};
19
20use ahash::{AHashMap, AHashSet};
21
22use anyhow::{Context, Result};
23
24use crate::asset;
25use crate::class_id::{ClassId, class_from_ext};
26use crate::meta::{self, SPRITE_MODE_SINGLE, TEXTURE_TYPE_SPRITE};
27use crate::store::{
28    self, AssetDb, AssetEntry, AssetType, BakeCache, CachedAssetType, CachedEntry, StoreError,
29    SubAsset, CACHE_FILENAME, DB_FILENAME,
30};
31use crate::walk::{walk_meta_files, WalkError};
32
33/// Errors from a bake run.
34///
35/// `Store(StoreError)` and `Walk(WalkError)` surface the typed source
36/// errors from those modules — match on them when you need to
37/// distinguish (e.g. "is this a schema-mismatch that needs re-bake?").
38/// `Other` carries the remaining anyhow-chained errors (cache I/O,
39/// dedup hard-fails, duplicate-guid checks) — most consumers propagate
40/// these untouched.
41#[derive(Debug, thiserror::Error)]
42pub enum BakeError {
43    #[error("{0}")]
44    Store(#[from] StoreError),
45    #[error("{0}")]
46    Walk(#[from] WalkError),
47    #[error("{0}")]
48    Other(#[from] anyhow::Error),
49}
50
51/// Caller-supplied name sanitizer. Returns `Some(rewritten)` when the
52/// input contains characters the consumer wants to scrub from asset
53/// names; `None` to keep the input as-is. Bake calls this once per
54/// top-level filename stem and once per sub-asset YAML `m_Name`.
55///
56/// Bound is `Send + Sync + 'static` because [`BakeOptions`] flows into
57/// `ignore::WalkParallel` worker closures.
58///
59/// Default behavior (no sanitizer) leaves all names verbatim.
60pub type NameSanitizer = Box<dyn Fn(&str) -> Option<String> + Send + Sync + 'static>;
61
62/// Caller-supplied warning sink. Bake invokes this for non-fatal events
63/// (worker errors during the parallel walk, name-collision rewrites,
64/// sanitizer rewrites). The library never writes to stderr itself.
65pub type WarnSink = Box<dyn Fn(&str) + Send + Sync + 'static>;
66
67/// Caller-supplied progress sink. Bake invokes this with the post-bake
68/// summary line and (when `BakeOptions::verbose_timing` is true) with
69/// per-phase timing. Separate from [`WarnSink`] so consumers can route
70/// "info" output and warnings to different places.
71pub type ProgressSink = Box<dyn Fn(&str) + Send + Sync + 'static>;
72
73/// Borrowed view of a [`NameSanitizer`] for internal helpers. Kept as a
74/// named type so per-call signatures don't trip clippy's `type_complexity`.
75type NameSanitizerRef<'a> = &'a (dyn Fn(&str) -> Option<String> + Send + Sync);
76
77/// Borrowed view of a [`WarnSink`]. See [`NameSanitizerRef`].
78type WarnSinkRef<'a> = &'a (dyn Fn(&str) + Send + Sync);
79
80/// File extensions whose asset has embedded sub-asset docs that should
81/// NOT join the global dedup pool — they live in the parent's namespace
82/// and consumers resolve them via parent-scoped addressing (`$Sub@Parent`).
83///
84/// Extension-keyed rather than `AssetType`-keyed because the top doc of a
85/// `.playable` file is whichever sub-doc Unity sorts first by hashed fileID
86/// (often an `AnimationTrack`, not the `TimelineAsset` itself), so the
87/// resulting `AssetTypeRaw::Script(...)` carries an unstable script guid.
88/// The extension is the only stable container discriminator.
89const EMBEDDED_CONTAINER_EXTS: &[&str] = &["prefab", "controller", "anim", "mixer", "playable"];
90
91fn is_embedded_container(hint: &str) -> bool {
92    Path::new(hint)
93        .extension()
94        .and_then(|s| s.to_str())
95        .is_some_and(|ext| EMBEDDED_CONTAINER_EXTS.contains(&ext))
96}
97
98/// True when `class_id` is a structural sub-doc that should be filtered
99/// out at parse time for the given container extension.
100///
101/// `.prefab`: GO / Transform / RectTransform / MonoBehaviour are all
102/// part of the GameObject tree — never addressable as sub-assets.
103/// `.controller` / `.anim` / `.mixer` / `.playable`: MonoBehaviour-114
104/// docs ARE addressable sub-assets (Timeline tracks, AudioMixerGroup,
105/// etc.) — only filter the GO-tree triplet, which doesn't appear in
106/// these files anyway (the predicate is a no-op there but stays valid
107/// for future-proofing).
108fn is_filterable_subdoc_for_ext(class_id: u32, ext: &str) -> bool {
109    let cls = ClassId::from_raw(class_id);
110    let is_go_tree = matches!(
111        cls,
112        Some(ClassId::GameObject | ClassId::Transform | ClassId::RectTransform)
113    );
114    let is_component = matches!(cls, Some(ClassId::MonoBehaviour));
115    is_go_tree || (is_component && ext == "prefab")
116}
117
118/// Convert `SystemTime` → ns-since-UNIX. Saturates to 0 on pre-epoch
119/// (which would only happen if the user's clock is bogus).
120fn mtime_ns(t: SystemTime) -> u64 {
121    t.duration_since(SystemTime::UNIX_EPOCH)
122        .map_or(0, |d| d.as_nanos() as u64)
123}
124
125/// One raw bake result, before name dedup. `script_guid` is the unmapped
126/// GUID for MonoBehaviour assets — interning happens after the walk so we
127/// only need one final sort.
128#[derive(Clone)]
129struct RawEntry {
130    guid: u128,
131    asset_type_raw: AssetTypeRaw,
132    hint: String,
133    name: String,
134    meta_mtime_ns: u64,
135    asset_mtime_ns: u64,
136    sub_assets: Vec<SubAsset>,
137}
138
139/// Hashable type discriminator: `Native(classID)` for built-in classes
140/// and `Script(scriptGuid)` for MonoBehaviour-backed assets. Hashable so
141/// the dedup pass can bucket by `(name, asset_type)` without depending
142/// on the post-walk script-intern table.
143#[derive(Clone, Copy, PartialEq, Eq, Hash)]
144enum AssetTypeRaw {
145    Native(u32),
146    Script(u128),
147}
148
149/// Per-worker-thread accumulator. Sends its collected `entries` + `errors`
150/// to the main thread via Drop — `ignore::WalkBuilder::run` drops each
151/// thread's visitor closure (and thus its captured `ThreadLocal`) on
152/// thread exit, so the main thread sees all batches once `walker.run`
153/// returns.
154struct ThreadLocal {
155    entries: Vec<RawEntry>,
156    errors: Vec<String>,
157    raw_tx: mpsc::Sender<Vec<RawEntry>>,
158    err_tx: mpsc::Sender<Vec<String>>,
159}
160
161impl Drop for ThreadLocal {
162    fn drop(&mut self) {
163        let entries = std::mem::take(&mut self.entries);
164        let errors = std::mem::take(&mut self.errors);
165        // Channel-closed errors are unreachable here — main thread holds
166        // the receivers until after `walker.run` returns.
167        let _ = self.raw_tx.send(entries);
168        let _ = self.err_tx.send(errors);
169    }
170}
171
172/// Cache key: hint (Assets-relative, forward-slashed). ahash beats siphash
173/// by ~2x for our small-string keys.
174type CacheMap = AHashMap<String, RawEntry>;
175
176/// Run a `Result<Option<T>>`-producing closure under `catch_unwind` and
177/// flatten the four-way outcome (success-with-value / success-skip /
178/// inner-err / panic) into `Result<Option<T>, String>`. The closure
179/// is wrapped in `AssertUnwindSafe` because parallel-walk visitors
180/// capture Arc state by ref, and the bake worker treats process_one
181/// as panic-safe on its inputs.
182///
183/// `label` prefixes both inner errors and panic reports with the
184/// asset path; `task_name` names the operation in the panic line
185/// (e.g. `"process_one"`) so the message reads
186/// `"<path>: panic in <task_name>: <payload>"`.
187///
188/// Pulled out of the inline closure inside `bake_action`'s parallel
189/// walk so panic-payload extraction (string / String / non-string)
190/// can be unit-tested without spinning up a project tree.
191fn run_with_panic_safety<T, F>(label: &str, task_name: &str, f: F) -> Result<Option<T>, String>
192where
193    F: FnOnce() -> Result<Option<T>>,
194{
195    match std::panic::catch_unwind(std::panic::AssertUnwindSafe(f)) {
196        Ok(Ok(opt)) => Ok(opt),
197        Ok(Err(e)) => Err(format!("{label}: {e}")),
198        Err(panic) => {
199            let msg = panic
200                .downcast_ref::<&str>()
201                .map(|s| (*s).to_string())
202                .or_else(|| panic.downcast_ref::<String>().cloned())
203                .unwrap_or_else(|| "<non-string panic payload>".to_string());
204            Err(format!("{label}: panic in {task_name}: {msg}"))
205        }
206    }
207}
208
209/// Build the in-memory cache from a previously-saved `BakeCache`. Each
210/// `CachedEntry` becomes a `RawEntry` keyed by its hint. Cache hits during
211/// the walk drop straight into the post-walk pipeline.
212///
213/// `String::from(Box<str>)` is O(1) — Rust hands the heap allocation
214/// directly from the box to the new String, no copy. The map key is then
215/// cloned once for the parallel field on `RawEntry` (one alloc per entry).
216fn build_cache(cache: BakeCache) -> CacheMap {
217    let mut out = AHashMap::with_capacity(cache.entries.len());
218    for e in cache.entries {
219        let asset_type_raw = match e.asset_type {
220            CachedAssetType::Native(n) => AssetTypeRaw::Native(n),
221            CachedAssetType::Script(g) => AssetTypeRaw::Script(g),
222        };
223        let hint = String::from(e.hint);
224        let raw = RawEntry {
225            guid: e.guid,
226            asset_type_raw,
227            hint: hint.clone(),
228            name: String::new(), // re-derived in build_db
229            meta_mtime_ns: e.meta_mtime_ns,
230            asset_mtime_ns: e.asset_mtime_ns,
231            sub_assets: e.sub_assets,
232        };
233        out.insert(hint, raw);
234    }
235    out
236}
237
238/// Build the on-disk cache from the post-walk raw entries. Sorted by hint
239/// so the file is byte-stable across re-bakes when nothing changed.
240fn build_bake_cache(raw: &[RawEntry]) -> BakeCache {
241    let mut entries: Vec<CachedEntry> = raw
242        .iter()
243        .map(|r| CachedEntry {
244            hint: r.hint.clone().into_boxed_str(),
245            meta_mtime_ns: r.meta_mtime_ns,
246            asset_mtime_ns: r.asset_mtime_ns,
247            guid: r.guid,
248            asset_type: match r.asset_type_raw {
249                AssetTypeRaw::Native(n) => CachedAssetType::Native(n),
250                AssetTypeRaw::Script(g) => CachedAssetType::Script(g),
251            },
252            sub_assets: r.sub_assets.clone(),
253        })
254        .collect();
255    entries.sort_by(|a, b| a.hint.cmp(&b.hint));
256    BakeCache {
257        schema_version: store::SCHEMA_VERSION,
258        entries,
259    }
260}
261
262/// Caller-supplied bake configuration.
263///
264/// Built by the consumer's CLI / library entry point and handed to
265/// [`bake`]. The library never reads env vars, never resolves the
266/// project root for you, and never writes to stderr — every side
267/// channel routes through one of the optional sinks below.
268pub struct BakeOptions {
269    /// Project root containing `Assets/` + `ProjectSettings/`. Caller
270    /// resolves this (typically via [`crate::walk::resolve_project_root`])
271    /// before constructing options.
272    pub project_root: PathBuf,
273    /// Directory where `asset-db.bin` and `asset-db.cache.bin` are written.
274    /// Caller composes the convention (e.g. `<project>/Library/unity-assetdb/`
275    /// or a fixture-staging path).
276    pub out_dir: PathBuf,
277    /// Optional name sanitizer; see [`NameSanitizer`].
278    pub name_sanitizer: Option<NameSanitizer>,
279    /// Optional warning sink; see [`WarnSink`]. `None` discards warnings.
280    pub on_warn: Option<WarnSink>,
281    /// Optional progress sink; see [`ProgressSink`]. `None` discards the
282    /// summary line.
283    pub on_progress: Option<ProgressSink>,
284    /// When true, [`on_progress`] also receives a per-phase timing line
285    /// (cache / walk / build / write). Env-var-driven behavior is the
286    /// consumer's call.
287    pub verbose_timing: bool,
288    /// When true, [`on_warn`] receives a line for each name-collision
289    /// rewrite during dedup. Off by default to keep steady-state warm
290    /// bakes quiet.
291    pub verbose_collisions: bool,
292}
293
294/// Bake entry-point. Walks `Assets/`, parses `.meta` + asset YAML,
295/// writes `<out_dir>/asset-db.bin` and `<out_dir>/asset-db.cache.bin`.
296pub fn bake(opts: &BakeOptions) -> Result<(), BakeError> {
297    bake_inner(opts).map_err(|e| {
298        // Surface typed source errors when they bubbled up via `?`
299        // without context wrapping — consumers can match on
300        // `BakeError::Store` etc. Otherwise fall through to `Other`.
301        match e.downcast::<StoreError>() {
302            Ok(s) => return BakeError::Store(s),
303            Err(e) => match e.downcast::<WalkError>() {
304                Ok(w) => return BakeError::Walk(w),
305                Err(e) => BakeError::Other(e),
306            },
307        }
308    })
309}
310
311fn bake_inner(opts: &BakeOptions) -> Result<()> {
312    let project_root = &opts.project_root;
313    std::fs::create_dir_all(&opts.out_dir)
314        .with_context(|| format!("create out-dir: {}", opts.out_dir.display()))?;
315    let db_file = opts.out_dir.join(DB_FILENAME);
316    let cache_file = opts.out_dir.join(CACHE_FILENAME);
317    let t_start = Instant::now();
318
319    // Load bake-only cache. Missing/corrupt → empty (first bake or stale).
320    let cache: CacheMap = match store::read_cache(&cache_file) {
321        Ok(c) => build_cache(c),
322        Err(_) => AHashMap::new(),
323    };
324    let cache_size = cache.len();
325    let t_cache = t_start.elapsed();
326
327    // Per-thread accumulators: each worker drops its `Vec<RawEntry>` and
328    // `Vec<String>` (errors) into channels at thread exit via `Drop`. Avoids
329    // the Mutex<Vec> contention 16k pushes on 8 cores produced — measured
330    // ~3-4 ms warm savings on meow-tower.
331    //
332    // `ignore::WalkParallel::run` requires `'static + Send` visitors, so
333    // shared state goes through `Arc`. Each worker clones the Arc once at
334    // factory time — the clone cost is negligible vs the per-entry work.
335    let (raw_tx, raw_rx) = mpsc::channel::<Vec<RawEntry>>();
336    let (err_tx, err_rx) = mpsc::channel::<Vec<String>>();
337    let cache_arc = Arc::new(cache);
338    let cache_hits = Arc::new(AtomicUsize::new(0));
339    let walked = Arc::new(AtomicUsize::new(0));
340    let project_root_arc: Arc<PathBuf> = Arc::new(project_root.clone());
341
342    walk_meta_files(project_root, || {
343        let raw_tx = raw_tx.clone();
344        let err_tx = err_tx.clone();
345        let cache = Arc::clone(&cache_arc);
346        let cache_hits = Arc::clone(&cache_hits);
347        let walked = Arc::clone(&walked);
348        let project_root = Arc::clone(&project_root_arc);
349        let mut local = ThreadLocal {
350            entries: Vec::with_capacity(2048),
351            errors: Vec::new(),
352            raw_tx,
353            err_tx,
354        };
355        move |meta_path: &Path| {
356            walked.fetch_add(1, Ordering::Relaxed);
357            // Catch panics so a single malformed .meta or unforeseen
358            // bug doesn't silently terminate the worker thread (which
359            // would lose its ThreadLocal accumulator). `ignore::WalkParallel`
360            // doesn't propagate visitor panics; without this, a panic in
361            // `process_one` produces a partial DB with no surfaced error.
362            // Helper does the catch_unwind + payload-downcast — see
363            // `run_with_panic_safety`.
364            let label = meta_path.display().to_string();
365            match run_with_panic_safety(&label, "process_one", || {
366                process_one(meta_path, &project_root, &cache, &cache_hits)
367            }) {
368                Ok(Some(r)) => local.entries.push(r),
369                Ok(None) => {}
370                Err(msg) => local.errors.push(msg),
371            }
372        }
373    })?;
374    drop(raw_tx);
375    drop(err_tx);
376    let t_walk = t_start.elapsed();
377
378    let mut errors: Vec<String> = Vec::new();
379    for v in err_rx.iter() {
380        errors.extend(v);
381    }
382    if let Some(sink) = opts.on_warn.as_ref() {
383        for e in &errors {
384            sink(&format!("warning: {e}"));
385        }
386    }
387
388    let mut raw: Vec<RawEntry> = Vec::with_capacity(cache_size + 256);
389    for v in raw_rx.iter() {
390        raw.extend(v);
391    }
392    // Build cache from `raw` (consumes nothing) before `build_db` consumes
393    // it. Sequence the writes so the cache is only persisted after the
394    // convert artifact lands — a half-baked cache without a matching db
395    // would let a later run skip parsing for entries that aren't in the
396    // db yet.
397    let bake_cache = build_bake_cache(&raw);
398    let db = build_db(
399        raw,
400        opts.name_sanitizer.as_deref(),
401        opts.on_warn.as_deref(),
402        opts.verbose_collisions,
403    )?;
404    let t_build = t_start.elapsed();
405
406    // No-op skip: every entry came from cache AND nothing was dropped from
407    // cache (count stable). Skips ~2-3 ms of bincode encode + file write
408    // on the steady-state warm path. Still skips only when both files are
409    // present — first run or after a manual delete writes anyway.
410    let hit_n = cache_hits.load(Ordering::Relaxed);
411    let no_op =
412        hit_n == cache_size && hit_n == db.entries.len() && db_file.exists() && cache_file.exists();
413
414    if !no_op {
415        store::write(&db_file, &db)
416            .with_context(|| format!("write asset-db: {}", db_file.display()))?;
417        store::write_cache(&cache_file, &bake_cache)
418            .with_context(|| format!("write cache: {}", cache_file.display()))?;
419    }
420    let t_write = t_start.elapsed();
421
422    if let Some(sink) = opts.on_progress.as_ref() {
423        sink(&format!(
424            "baked {} entries → {}",
425            db.entries.len(),
426            db_file.display()
427        ));
428        if opts.verbose_timing {
429            let walked_n = walked.load(Ordering::Relaxed);
430            let parsed_n = db.entries.len() - hit_n;
431            let write_phase = if no_op { "skipped" } else { "wrote" };
432            sink(&format!(
433                "  walked={walked_n} hit={hit_n} parsed={parsed_n} | cache={:?} walk={:?} build={:?} write={:?} ({write_phase}) total={:?}",
434                t_cache,
435                t_walk - t_cache,
436                t_build - t_walk,
437                t_write - t_build,
438                t_write,
439            ));
440        }
441    }
442    Ok(())
443}
444
445/// Per-`.meta` work. Returns `Ok(None)` when the meta has no companion file
446/// to describe (e.g. orphaned `.meta`, directory `.meta`).
447fn process_one(
448    meta_path: &Path,
449    project_root: &Path,
450    cache: &CacheMap,
451    cache_hits: &AtomicUsize,
452) -> Result<Option<RawEntry>> {
453    let companion =
454        strip_meta_suffix(meta_path).ok_or_else(|| anyhow::anyhow!("not a .meta path"))?;
455
456    let hint = rel_hint(project_root, &companion)?;
457
458    // Cache-hit fast path: stat `.meta` only. If the mtime matches the
459    // cache, trust the cached row outright — no companion stat. Saves
460    // ~1 stat × N entries on the warm bake, the bake's dominant cost
461    // (warm walk against meow-tower dropped from 47 ms → ~26 ms).
462    //
463    // **Cache assumption**: Unity's importer touches the `.meta` mtime
464    // whenever it re-imports the asset, so a `.meta` mtime drift is the
465    // canonical "this asset changed" signal. Hand-editing the asset YAML
466    // *without* touching the .meta will serve a stale cached row until
467    // the next .meta touch (or a manual `rm asset-db.cache.bin`).
468    // Documented + pinned by `tests/bake.rs::cache_does_not_detect_asset_only_touch`.
469    let meta_md =
470        std::fs::metadata(meta_path).with_context(|| format!("stat: {}", meta_path.display()))?;
471    let meta_mtime_ns = mtime_ns(meta_md.modified().unwrap_or(SystemTime::UNIX_EPOCH));
472
473    if let Some(cached) = cache.get(&hint)
474        && cached.meta_mtime_ns == meta_mtime_ns
475    {
476        cache_hits.fetch_add(1, Ordering::Relaxed);
477        return Ok(Some(cached.clone()));
478    }
479
480    // Cache miss. Now stat the companion — handles directory-`.meta`
481    // exclusion too. Slow path beyond here re-parses both files.
482    let Ok(companion_md) = std::fs::metadata(&companion) else {
483        return Ok(None);
484    };
485    if companion_md.is_dir() {
486        return Ok(None);
487    }
488    let asset_mtime_ns = mtime_ns(companion_md.modified().unwrap_or(SystemTime::UNIX_EPOCH));
489
490    process_one_uncached(meta_path, &companion, &hint, meta_mtime_ns, asset_mtime_ns)
491}
492
493/// Slow path: parse `.meta` + asset YAML, build a `RawEntry`. Shared
494/// between the "no cache row at all" and "cache row but companion mtime
495/// drifted" cases — both end up doing the same parse work.
496fn process_one_uncached(
497    meta_path: &Path,
498    companion: &Path,
499    hint: &str,
500    meta_mtime_ns: u64,
501    asset_mtime_ns: u64,
502) -> Result<Option<RawEntry>> {
503    // Cache miss → parse.
504    let meta_text = std::fs::read_to_string(meta_path)
505        .with_context(|| format!("read .meta: {}", meta_path.display()))?;
506    let meta_info = meta::parse(&meta_text)?;
507
508    let ext = companion.extension().and_then(|s| s.to_str()).unwrap_or("");
509    let from_ext = class_from_ext(ext);
510
511    let mut sub_assets: Vec<SubAsset> = Vec::new();
512    let mut top_class_id: Option<u32> = None;
513    let mut script_guid: Option<u128> = None;
514
515    // YAML peek strategy:
516    //  - WithSubAssets: types where extra docs ARE addressable from outside.
517    //    `.asset`/`.spriteatlas`/`.spriteatlasv2` host explicit sub-assets;
518    //    `.prefab`/`.controller`/`.anim`/`.mixer`/`.playable` can host
519    //    embedded sub-asset docs (legacy `AnimationClip` inline in a
520    //    prefab; AnimatorState in a controller; AudioMixerGroup in a
521    //    mixer; Timeline tracks in a playable) that other prefabs
522    //    address as `{fileID, guid: <parent.guid>, type: 3}`. Without
523    //    capturing them the embedded ref encodes as `&#f<fid>` and
524    //    cross-prefab refs degrade to the parent alias + `#f<fid>` suffix.
525    //    Embeds are excluded from the global dedup pool — see
526    //    `is_embedded` in `build_db`.
527    //  - TopOnly: types whose extra docs are internal scene-graph that
528    //    isn't addressable from outside (`.unity`, `.mat`, `.mask`).
529    //  - None: extension already says everything (`.png`, `.fbx`, scripts).
530    let parse_mode: Option<asset::ParseMode> = match ext {
531        "asset" | "spriteatlas" | "spriteatlasv2" | "prefab" | "controller" | "anim"
532        | "mixer" | "playable" => Some(asset::ParseMode::WithSubAssets),
533        "mat" | "mask" | "unity" => Some(asset::ParseMode::TopOnly),
534        _ => None,
535    };
536
537    if let Some(mode) = parse_mode {
538        let asset_text = read_asset_for_mode(companion, mode)?;
539        let info = asset::parse(&asset_text, mode)?;
540        top_class_id = info.top_class_id;
541        script_guid = info.script_guid;
542        for s in info.sub_assets {
543            if s.name.is_empty() {
544                continue;
545            }
546            if is_filterable_subdoc_for_ext(s.class_id, ext) {
547                continue;
548            }
549            sub_assets.push(SubAsset {
550                file_id: s.file_id,
551                class_id: s.class_id,
552                name: s.name.into_boxed_str(),
553            });
554        }
555    }
556
557    // Precedence: script_guid (MonoBehaviour-backed) > from_ext > top_class_id.
558    // `.prefab` and `.unity` deliberately let from_ext win — their YAML's first
559    // doc is a *contained* object (GameObject = classID 1), not the asset's
560    // class (Prefab = 1001). Falling back to top_class_id only for extensions
561    // without a stable class mapping (e.g. `.asset`, where the YAML peek is
562    // the only signal).
563    let asset_type_raw = if let Some(g) = script_guid {
564        AssetTypeRaw::Script(g)
565    } else if let Some(cls) = from_ext {
566        AssetTypeRaw::Native(cls as u32)
567    } else if let Some(cls) = top_class_id.and_then(ClassId::from_raw) {
568        AssetTypeRaw::Native(cls as u32)
569    } else if let Some(cls) = top_class_id {
570        // Unknown raw class ID — store anyway; lookup will treat as Native.
571        AssetTypeRaw::Native(cls)
572    } else {
573        return Ok(None);
574    };
575
576    let name = filename_stem(companion);
577
578    // Implicit Sprite sub-asset for Single-mode textures. Compute first
579    // (borrows `meta_info` whole); the for-loop below moves
580    // `meta_info.sprite_sheet`, so the predicate must run before that.
581    let implicit_sprite = synthesize_implicit_sprite(&meta_info, &name);
582
583    // Texture sprite-sheet sub-assets (from .meta). Always class Sprite —
584    // .meta `sprites:` entries are by definition Sprite sub-assets of the
585    // texture (Unity's Sprite-mode importer creates them at fileID-as-hash).
586    for (fid, name) in meta_info.sprite_sheet {
587        sub_assets.push(SubAsset {
588            file_id: fid,
589            class_id: ClassId::Sprite as u32,
590            name: name.into_boxed_str(),
591        });
592    }
593
594    if let Some(sub) = implicit_sprite {
595        sub_assets.push(sub);
596    }
597
598    Ok(Some(RawEntry {
599        guid: meta_info.guid,
600        asset_type_raw,
601        hint: hint.to_string(),
602        name,
603        meta_mtime_ns,
604        asset_mtime_ns,
605        sub_assets,
606    }))
607}
608
609/// Synthesize the implicit Sprite sub-asset Unity auto-generates for
610/// Single-mode Sprite textures. Unity creates one Sprite (fileID
611/// `21300000` = `ClassId::Sprite × 100_000`) named after the texture
612/// file but never writes it to the `.meta` — the `sprites:` list stays
613/// empty. Without synthesizing it here, `AssetMap::elidable_subasset_fid`
614/// (`mapping/asset_map.rs`) can't fire and `_sprite: $TexName` fields
615/// keep the redundant `#f21300000` suffix on pull.
616///
617/// Returns `None` when:
618///   - the `.meta`'s `spriteSheet.sprites:` list is non-empty (explicit
619///     entries own the sub-asset list — atlases, multi-sprite sheets);
620///   - `textureType` isn't 8 (Sprite); or
621///   - `spriteMode` isn't 1 (Single).
622///
623/// Branches pinned by `bake_asset_db::bake::tests::synthesize_implicit_sprite_*`.
624fn synthesize_implicit_sprite(meta: &meta::MetaInfo, stem: &str) -> Option<SubAsset> {
625    if meta.sprite_sheet.is_empty()
626        && meta.texture_type == Some(TEXTURE_TYPE_SPRITE)
627        && meta.sprite_mode == Some(SPRITE_MODE_SINGLE)
628    {
629        Some(SubAsset {
630            file_id: ClassId::Sprite.canonical_subobject_fid(),
631            class_id: ClassId::Sprite as u32,
632            name: stem.to_string().into_boxed_str(),
633        })
634    } else {
635        None
636    }
637}
638
639fn warn_sanitized(on_warn: Option<WarnSinkRef<'_>>, kind: &str, hint: &str, old: &str, new: &str) {
640    if let Some(sink) = on_warn {
641        sink(&format!(
642            "warning: {kind} {hint} name `{old}` contains ref-reserved char; renamed to `{new}`",
643        ));
644    }
645}
646
647fn build_db(
648    mut raw: Vec<RawEntry>,
649    sanitizer: Option<NameSanitizerRef<'_>>,
650    on_warn: Option<WarnSinkRef<'_>>,
651    verbose_collisions: bool,
652) -> Result<AssetDb> {
653    // Stable order: sort by hint so dedup picks the same "winner" each bake.
654    raw.sort_by(|a, b| a.hint.cmp(&b.hint));
655
656    // Reset every entry's name to its raw filename stem before dedup
657    // (cached entries arrive with their previously-suffixed name; if we
658    // dedup against that, collisions compound across bakes), then sanitize
659    // ref-reserved chars in both top-level and sub-asset names — covers the
660    // three name sources (filename stem, YAML m_Name sub-assets, `.meta`
661    // sprite-sheet entries) in one pass before dedup uses `r.name` as key.
662    for r in raw.iter_mut() {
663        r.name = filename_stem_from_hint(&r.hint);
664        if let Some(san) = sanitizer
665            && let Some(clean) = san(&r.name)
666        {
667            warn_sanitized(on_warn, "asset", &r.hint, &r.name, &clean);
668            r.name = clean;
669        }
670        if let Some(san) = sanitizer {
671            for sub in r.sub_assets.iter_mut() {
672                if let Some(clean) = san(&sub.name) {
673                    warn_sanitized(on_warn, "sub-asset of", &r.hint, &sub.name, &clean);
674                    sub.name = clean.into_boxed_str();
675                }
676            }
677        }
678    }
679
680    // Type-aware dedup: collisions are scoped by `(name, asset_type)`.
681    // Same-name entries of distinct `asset_type` (`Foo.png` Texture2D +
682    // `Foo.prefab` Prefab) get distinct alias buckets — the consuming
683    // field's C# type discriminates at decode. Embedded sub-asset docs
684    // of container types are excluded from the global pool entirely
685    // (see [Name collisions](docs/asset-database.md#name-collisions)).
686
687    // Pass 1: tally distinct-guid owners per `(name, asset_type)` bucket.
688    let mut owners: AHashMap<(String, AssetTypeRaw), AHashSet<u128>> =
689        AHashMap::with_capacity(raw.len());
690    for r in &raw {
691        let key = (r.name.clone(), r.asset_type_raw);
692        owners.entry(key).or_default().insert(r.guid);
693        if is_embedded_container(&r.hint) {
694            continue;
695        }
696        for sub in &r.sub_assets {
697            let key = (
698                sub.name.to_string(),
699                AssetTypeRaw::Native(sub.class_id),
700            );
701            owners.entry(key).or_default().insert(r.guid);
702        }
703    }
704    let contested = |name: &str, t: AssetTypeRaw| {
705        owners
706            .get(&(name.to_string(), t))
707            .is_some_and(|s| s.len() > 1)
708    };
709
710    // Pass 2: walk entries in hint-sorted order, renaming every contested
711    // claim. `taken` tracks `(name, asset_type) → guid` pairs already
712    // claimed in this pass so the disambiguator never picks a candidate
713    // that collides with an earlier (different-guid) entry of the same
714    // type; same-guid sharing remains allowed.
715    let mut taken: AHashMap<(String, AssetTypeRaw), u128> = AHashMap::with_capacity(raw.len());
716    for r in raw.iter_mut() {
717        let top_type = r.asset_type_raw;
718        if contested(&r.name, top_type) {
719            let new_name = disambiguate(&r.name, &r.hint, r.guid, top_type, &taken)?;
720            if verbose_collisions && let Some(sink) = on_warn {
721                sink(&format!(
722                    "warning: name collision on `{}` (guid {:032x}); renamed to `{}`",
723                    r.name, r.guid, new_name,
724                ));
725            }
726            r.name = new_name;
727        }
728        match taken.get(&(r.name.clone(), top_type)) {
729            Some(&prev) if prev != r.guid => anyhow::bail!(
730                "asset-db: name `{}` claimed by both guid {:032x} and {prev:032x} \
731                 after dedup — `disambiguate` produced a non-unique alias",
732                r.name,
733                r.guid,
734            ),
735            _ => {
736                taken.insert((r.name.clone(), top_type), r.guid);
737            }
738        }
739
740        if is_embedded_container(&r.hint) {
741            // Prefab-embedded sub-assets bypass the global dedup pool;
742            // sanitization already happened above. Names stay as authored
743            // and resolve via `$Sub@Parent` at the codec layer.
744            continue;
745        }
746        for sub in r.sub_assets.iter_mut() {
747            let sub_type = AssetTypeRaw::Native(sub.class_id);
748            if contested(&sub.name, sub_type) {
749                let original = sub.name.to_string();
750                let new_name = disambiguate(&original, &r.hint, r.guid, sub_type, &taken)?;
751                if verbose_collisions && let Some(sink) = on_warn {
752                    sink(&format!(
753                        "warning: sub-asset name collision on `{}` (parent guid {:032x}); renamed to `{}`",
754                        original, r.guid, new_name,
755                    ));
756                }
757                sub.name = new_name.into_boxed_str();
758            }
759            // Same-guid sharing is allowed — a sub-asset's deduped name
760            // will often equal the parent's deduped alias (same hint
761            // feeds disambiguate), and that's the desired outcome.
762            let key = (sub.name.to_string(), sub_type);
763            if !taken.contains_key(&key) {
764                taken.insert(key, r.guid);
765            }
766        }
767    }
768
769    // Intern script types and finalize entries.
770    let mut db = AssetDb::new();
771    let entries: Vec<AssetEntry> = raw
772        .into_iter()
773        .map(|r| {
774            let asset_type = match r.asset_type_raw {
775                AssetTypeRaw::Native(n) => AssetType::Native(n),
776                AssetTypeRaw::Script(g) => AssetType::Script(db.intern_script(g)),
777            };
778            AssetEntry {
779                guid: r.guid,
780                asset_type,
781                name: r.name.into_boxed_str(),
782                sub_assets: r.sub_assets,
783                hint: r.hint.into_boxed_str(),
784            }
785        })
786        .collect();
787    db.entries = entries;
788    db.sort();
789    check_no_full_duplicates(&db)?;
790    Ok(db)
791}
792
793/// Hard-fail on two corruption cases:
794///
795/// 1. **Two top-level entries share a GUID.** Hand-edited or copy-pasted
796///    `.meta` whose GUID wasn't rewritten. The name-dedup loop only
797///    renames when guids *differ*, so same-guid pairs flow through with
798///    distinct names and `db.sort()` doesn't merge them. Catches the
799///    duplicate-`.meta` case the Unity-hidden walker filter also guards
800///    against — belt and braces.
801///
802/// 2. **Within-entry sub-asset rows share `(name, fileID)`.** Two YAML
803///    sub-docs in the same asset declared identical names + fileIDs —
804///    asset-side corruption, parser bug, or atlas content collision.
805fn check_no_full_duplicates(db: &AssetDb) -> Result<()> {
806    // Top-level: guid uniqueness. `db.entries` is already guid-sorted, so
807    // a single pass over consecutive pairs catches every dup.
808    for w in db.entries.windows(2) {
809        if w[0].guid == w[1].guid {
810            anyhow::bail!(
811                "duplicate top-level GUID: {:032x} between names `{}` and `{}` — likely two .meta files share a GUID",
812                w[0].guid,
813                w[0].name,
814                w[1].name,
815            );
816        }
817    }
818
819    // Sub-assets: (guid, fileID, name) uniqueness within each entry.
820    let mut seen: AHashSet<(i64, &str)> = AHashSet::new();
821    for e in &db.entries {
822        seen.clear();
823        for s in &e.sub_assets {
824            if !seen.insert((s.file_id, &*s.name)) {
825                anyhow::bail!(
826                    "duplicate sub-asset record: name={} guid={:032x} fileID={} type={:?}",
827                    s.name,
828                    e.guid,
829                    s.file_id,
830                    e.asset_type,
831                );
832            }
833        }
834    }
835    Ok(())
836}
837
838/// Read just enough of the asset to satisfy `mode`.
839///
840/// `TopOnly` reads the first 4 KiB and truncates at the last newline — that
841/// covers a YAML preamble (`%YAML 1.1\n%TAG …\n`), the first
842/// `--- !u!<id> &<fid>` header, and a `m_Script` line for .asset
843/// MonoBehaviours (≤ ~200 bytes). `WithSubAssets` reads the full file.
844///
845/// Trimming at the last newline guards against UTF-8 boundary cuts inside a
846/// multi-byte character — every YAML line is complete UTF-8.
847fn read_asset_for_mode(path: &Path, mode: asset::ParseMode) -> Result<String> {
848    use std::io::Read;
849    match mode {
850        asset::ParseMode::WithSubAssets => {
851            std::fs::read_to_string(path).with_context(|| format!("read asset: {}", path.display()))
852        }
853        asset::ParseMode::TopOnly => {
854            const HEAD_BYTES: u64 = 4096;
855            let f = std::fs::File::open(path)
856                .with_context(|| format!("open asset: {}", path.display()))?;
857            let mut buf = Vec::with_capacity(HEAD_BYTES as usize);
858            f.take(HEAD_BYTES)
859                .read_to_end(&mut buf)
860                .with_context(|| format!("read asset: {}", path.display()))?;
861            // Drop trailing partial line so .lines() yields only complete
862            // (and thus complete-UTF-8) lines. If the head has no newline at
863            // all (pathological — single-line YAML > 4 KiB), keep the buffer
864            // and let `from_utf8` decide.
865            if let Some(last_nl) = buf.iter().rposition(|&b| b == b'\n') {
866                buf.truncate(last_nl + 1);
867            }
868            String::from_utf8(buf)
869                .with_context(|| format!("non-utf8 asset head: {}", path.display()))
870        }
871    }
872}
873
874fn strip_meta_suffix(p: &Path) -> Option<PathBuf> {
875    let s = p.to_str()?;
876    s.strip_suffix(".meta").map(PathBuf::from)
877}
878
879fn rel_hint(project_root: &Path, companion: &Path) -> Result<String> {
880    // Strip the project root, not just `Assets/`. The walker now visits both
881    // `<project>/Assets/` and `<project>/Packages/`, so hints look like
882    // `Assets/Foo.prefab` or `Packages/com.boxcat.libs/Bar.mixer`.
883    let rel = companion
884        .strip_prefix(project_root)
885        .with_context(|| format!("strip prefix: {}", companion.display()))?;
886    let s = rel.to_string_lossy().replace('\\', "/");
887    Ok(s)
888}
889
890fn filename_stem(p: &Path) -> String {
891    p.file_stem()
892        .and_then(|s| s.to_str())
893        .unwrap_or("")
894        .to_string()
895}
896
897fn filename_stem_from_hint(hint: &str) -> String {
898    Path::new(hint)
899        .file_stem()
900        .and_then(|s| s.to_str())
901        .unwrap_or("")
902        .to_string()
903}
904
905/// Pick a unique alias for `stem` given `hint` and an existing `taken` map.
906/// Strategy: try `stem^dir` for successively-deeper parent dirs. A candidate
907/// is considered "free" iff it's absent from `taken` *or* already mapped to
908/// `owner_guid` (the latter covers the same-guid sub-asset case where the
909/// parent's deduped top-level alias is a valid name to share).
910///
911/// `asset_type` scopes the dedup bucket — a candidate is "taken" only when
912/// another guid has claimed the exact `(name, asset_type)` pair. Two assets
913/// of different `asset_type` (e.g. Texture2D `Foo.png` vs Prefab `Foo.prefab`)
914/// share the bare alias `Foo` without contesting because the codec layer
915/// uses the field's declared C# type to pick the right one at lookup time.
916///
917/// Hard-fails when no parent segment yields a free candidate — ambiguity
918/// surfaces at bake time rather than getting papered over with a guid suffix.
919/// See [Name collisions](docs/asset-database.md#name-collisions) for the
920/// `^` separator rationale.
921fn disambiguate(
922    stem: &str,
923    hint: &str,
924    owner_guid: u128,
925    asset_type: AssetTypeRaw,
926    taken: &AHashMap<(String, AssetTypeRaw), u128>,
927) -> Result<String> {
928    let parts: Vec<&str> = Path::new(hint)
929        .parent()
930        .map(|p| p.iter().filter_map(|c| c.to_str()).collect::<Vec<_>>())
931        .unwrap_or_default();
932
933    // Walk parent segments from nearest to root, picking the shortest
934    // suffix that doesn't collide with a different-guid owner.
935    let mut suffix = String::new();
936    for seg in parts.iter().rev() {
937        if !suffix.is_empty() {
938            suffix.insert(0, '/');
939        }
940        suffix.insert_str(0, seg);
941        let candidate = format!("{stem}^{suffix}");
942        match taken.get(&(candidate.clone(), asset_type)) {
943            None => return Ok(candidate),
944            Some(&prev) if prev == owner_guid => return Ok(candidate),
945            Some(_) => continue,
946        }
947    }
948    anyhow::bail!(
949        "asset-db: cannot disambiguate name `{stem}` for guid {owner_guid:032x} \
950         (hint `{hint}`) — every parent-segment suffix is already taken by \
951         another asset. Rename one of the colliding assets in source.",
952    )
953}
954
955#[cfg(test)]
956mod tests {
957    use super::*;
958
959    #[test]
960    fn run_with_panic_safety_passes_through_ok_some() {
961        let r: Result<Option<i32>, String> = run_with_panic_safety("path", "task", || Ok(Some(42)));
962        assert_eq!(r, Ok(Some(42)));
963    }
964
965    #[test]
966    fn run_with_panic_safety_passes_through_ok_none() {
967        let r: Result<Option<i32>, String> = run_with_panic_safety("path", "task", || Ok(None));
968        assert_eq!(r, Ok(None));
969    }
970
971    #[test]
972    fn run_with_panic_safety_formats_inner_error_with_label() {
973        let r: Result<Option<i32>, String> = run_with_panic_safety("foo.meta", "task", || {
974            Err(anyhow::anyhow!("malformed yaml"))
975        });
976        assert_eq!(r, Err("foo.meta: malformed yaml".to_string()));
977    }
978
979    #[test]
980    fn run_with_panic_safety_catches_str_panic() {
981        let r: Result<Option<i32>, String> =
982            run_with_panic_safety("foo.meta", "process_one", || {
983                std::panic::panic_any("boom (&str payload)")
984            });
985        assert_eq!(
986            r,
987            Err("foo.meta: panic in process_one: boom (&str payload)".to_string())
988        );
989    }
990
991    #[test]
992    fn run_with_panic_safety_catches_string_panic() {
993        let r: Result<Option<i32>, String> =
994            run_with_panic_safety("foo.meta", "process_one", || {
995                // String payloads come from `panic!("{x}")` via the format!
996                // path — the runtime hands a String, not a &str.
997                panic!("formatted {}", "msg")
998            });
999        assert_eq!(
1000            r,
1001            Err("foo.meta: panic in process_one: formatted msg".to_string())
1002        );
1003    }
1004
1005    #[test]
1006    fn run_with_panic_safety_handles_non_string_panic_payload() {
1007        // `panic_any(42_i32)` produces a panic whose payload isn't &str
1008        // or String. The helper falls back to a sentinel message rather
1009        // than dropping the error silently.
1010        let r: Result<Option<i32>, String> =
1011            run_with_panic_safety("foo.meta", "process_one", || std::panic::panic_any(42_i32));
1012        assert_eq!(
1013            r,
1014            Err("foo.meta: panic in process_one: <non-string panic payload>".to_string())
1015        );
1016    }
1017
1018    fn meta_for(
1019        texture_type: Option<u32>,
1020        sprite_mode: Option<u32>,
1021        sprites: Vec<(i64, String)>,
1022    ) -> meta::MetaInfo {
1023        meta::MetaInfo {
1024            guid: 0,
1025            sprite_sheet: sprites,
1026            texture_type,
1027            sprite_mode,
1028        }
1029    }
1030
1031    #[test]
1032    fn synthesize_implicit_sprite_fires_on_single_mode_sprite_with_empty_sheet() {
1033        let m = meta_for(Some(TEXTURE_TYPE_SPRITE), Some(SPRITE_MODE_SINGLE), vec![]);
1034        let sub = synthesize_implicit_sprite(&m, "Icon").expect("synthesis should fire");
1035        assert_eq!(sub.file_id, ClassId::Sprite.canonical_subobject_fid());
1036        assert_eq!(&*sub.name, "Icon");
1037    }
1038
1039    #[test]
1040    fn synthesize_implicit_sprite_skips_when_sheet_non_empty() {
1041        // Explicit sprites own the sub-asset list — atlas-shaped meta
1042        // doesn't get a phantom main-Sprite layered on top.
1043        let m = meta_for(
1044            Some(TEXTURE_TYPE_SPRITE),
1045            Some(SPRITE_MODE_SINGLE),
1046            vec![(12345, "explicit_a".into())],
1047        );
1048        assert!(synthesize_implicit_sprite(&m, "Icon").is_none());
1049    }
1050
1051    #[test]
1052    fn synthesize_implicit_sprite_skips_on_multiple_mode() {
1053        // spriteMode: 2 (Multiple = atlas) means "the sprites: list is
1054        // canonical, even if currently empty". No synthesis.
1055        let m = meta_for(Some(TEXTURE_TYPE_SPRITE), Some(2), vec![]);
1056        assert!(synthesize_implicit_sprite(&m, "Icon").is_none());
1057    }
1058
1059    #[test]
1060    fn synthesize_implicit_sprite_skips_on_non_sprite_texture() {
1061        // textureType: 0 (Default) — texture isn't a Sprite at all.
1062        let m = meta_for(Some(0), Some(SPRITE_MODE_SINGLE), vec![]);
1063        assert!(synthesize_implicit_sprite(&m, "Icon").is_none());
1064    }
1065
1066    #[test]
1067    fn synthesize_implicit_sprite_skips_when_predicates_absent() {
1068        // Both texture_type and sprite_mode None — `.meta` from a
1069        // non-texture asset (or a stale .meta missing the fields).
1070        let m = meta_for(None, None, vec![]);
1071        assert!(synthesize_implicit_sprite(&m, "Icon").is_none());
1072    }
1073
1074    /// `is_filterable_subdoc_for_ext` is the single point where parse-
1075    /// time sub-asset filtering decides what's a structural prefab tree
1076    /// doc vs. a real sub-asset. Pin the contract per extension.
1077    #[test]
1078    fn is_filterable_subdoc_for_ext_branches_correctly() {
1079        // .prefab: GO + Transform + RectTransform + MonoBehaviour-as-component.
1080        for cls in [1, 4, 224, 114] {
1081            assert!(
1082                is_filterable_subdoc_for_ext(cls, "prefab"),
1083                "class {cls} should be filtered for .prefab",
1084            );
1085        }
1086        // .playable: Timeline tracks live as MB-114 — must NOT filter.
1087        // GO/Transform never appear in .playable but the predicate stays
1088        // valid (no-op).
1089        assert!(!is_filterable_subdoc_for_ext(114, "playable"));
1090        assert!(is_filterable_subdoc_for_ext(1, "playable"));
1091        // .controller: AnimatorState (1102), BlendTree (206) — never
1092        // filtered.
1093        assert!(!is_filterable_subdoc_for_ext(1102, "controller"));
1094        assert!(!is_filterable_subdoc_for_ext(114, "controller"));
1095        // .mixer: AudioMixerGroup (273) — never filtered.
1096        assert!(!is_filterable_subdoc_for_ext(273, "mixer"));
1097        assert!(!is_filterable_subdoc_for_ext(114, "mixer"));
1098        // .asset / .spriteatlas: MB-114 are real ScriptableObject sub-
1099        // assets. Real classes (Sprite=213) are never filtered either.
1100        assert!(!is_filterable_subdoc_for_ext(114, "asset"));
1101        assert!(!is_filterable_subdoc_for_ext(213, "spriteatlas"));
1102    }
1103
1104    #[test]
1105    fn stem_basic() {
1106        assert_eq!(filename_stem(Path::new("foo/Bar.prefab")), "Bar");
1107        assert_eq!(filename_stem_from_hint("foo/Bar.prefab"), "Bar");
1108    }
1109
1110    #[test]
1111    fn disambiguate_walks_parents() {
1112        let t = AssetTypeRaw::Native(ClassId::Texture2D as u32);
1113        let mut taken = AHashMap::new();
1114        taken.insert(("Foo".to_string(), t), 1u128);
1115        // Nearest parent suffix wins on first try.
1116        let alias = disambiguate("Foo", "pkg/Editor/Foo.cs", 2, t, &taken).unwrap();
1117        assert_eq!(alias, "Foo^Editor");
1118
1119        // First-level parent already taken (by a different guid, same type)
1120        // → falls back to deeper path.
1121        taken.insert(("Foo^Editor".to_string(), t), 3);
1122        let alias = disambiguate("Foo", "pkg/Editor/Foo.cs", 2, t, &taken).unwrap();
1123        assert_eq!(alias, "Foo^pkg/Editor");
1124    }
1125
1126    #[test]
1127    fn disambiguate_ignores_collisions_in_other_types() {
1128        // A different `AssetTypeRaw` claiming the same alias does NOT
1129        // contest — type-aware dedup gives each `(name, type)` its own
1130        // bucket. PNG (Texture2D) and prefab (Prefab) named `Foo` both
1131        // keep bare `Foo`.
1132        let png = AssetTypeRaw::Native(ClassId::Texture2D as u32);
1133        let prefab = AssetTypeRaw::Native(ClassId::Prefab as u32);
1134        let mut taken = AHashMap::new();
1135        taken.insert(("Foo".to_string(), png), 1u128);
1136        // disambiguate against the prefab bucket — `Foo` is free here.
1137        let alias = disambiguate("Foo", "Assets/Bar/Foo.prefab", 2, prefab, &taken).unwrap();
1138        // Walk produces `Foo^Bar` because we always step at least one
1139        // parent (disambiguate's contract is "produce a suffixed form");
1140        // the contention check upstream is what decides whether to call.
1141        assert_eq!(alias, "Foo^Bar");
1142    }
1143
1144    #[test]
1145    fn disambiguate_returns_existing_when_same_owner() {
1146        // When the candidate suffix is already mapped to `owner_guid`, the
1147        // sub-asset can safely share that alias — its lookup path resolves
1148        // back to the same guid, so no real ambiguity exists.
1149        let t = AssetTypeRaw::Native(ClassId::Texture2D as u32);
1150        let mut taken = AHashMap::new();
1151        taken.insert(("Cloud1".to_string(), t), 0xa0_u128);
1152        taken.insert(("Cloud1^Tower".to_string(), t), 0xb0_u128);
1153        let alias =
1154            disambiguate("Cloud1", "Assets/Tower/Cloud1.png", 0xb0_u128, t, &taken).unwrap();
1155        assert_eq!(alias, "Cloud1^Tower");
1156    }
1157
1158    #[test]
1159    fn disambiguate_hard_fails_when_no_parent_segments() {
1160        let t = AssetTypeRaw::Native(ClassId::Texture2D as u32);
1161        let mut taken = AHashMap::new();
1162        taken.insert(("Foo".to_string(), t), 1u128);
1163        // Hint has no directories — nothing to suffix with. Must error
1164        // rather than silently fall back to a guid suffix.
1165        let err =
1166            disambiguate("Foo", "Foo.cs", 2u128, t, &taken).expect_err("must hard-fail");
1167        let msg = format!("{err:#}");
1168        assert!(msg.contains("disambiguate"), "msg: {msg}");
1169        assert!(msg.contains("Foo"), "msg: {msg}");
1170    }
1171
1172    fn raw_native(hint: &str, guid: u128, sub_assets: Vec<SubAsset>) -> RawEntry {
1173        RawEntry {
1174            guid,
1175            asset_type_raw: AssetTypeRaw::Native(ClassId::Texture2D as u32),
1176            hint: hint.to_string(),
1177            // `build_db`'s first pass overwrites `name` from `hint`, so any
1178            // value here is fine. Empty kept the test minimal.
1179            name: String::new(),
1180            meta_mtime_ns: 0,
1181            asset_mtime_ns: 0,
1182            sub_assets,
1183        }
1184    }
1185
1186    /// Pin: when a name is claimed by ≥2 distinct guids of the same
1187    /// `asset_type`, every claimant must rename — no "first wins" carve-out.
1188    /// The deduped form is consistent across claimants: each entry resolves
1189    /// through `disambiguate` against its own hint.
1190    ///
1191    /// Two same-type Texture2D `Cloud1.png` files in different folders
1192    /// share the bare alias `Cloud1` until type-aware dedup forces both to
1193    /// suffix.
1194    #[test]
1195    fn build_db_renames_every_claimant_when_name_is_contested() {
1196        let png_a_guid = 0xa0_u128;
1197        let png_b_guid = 0xb0_u128;
1198        let sprite_fid: i64 = 21300000;
1199
1200        let raw = vec![
1201            raw_native("Assets/Other/Cloud1.png", png_a_guid, vec![]),
1202            raw_native(
1203                "Assets/Tower/Cloud1.png",
1204                png_b_guid,
1205                vec![SubAsset {
1206                    file_id: sprite_fid,
1207                    class_id: ClassId::Sprite as u32,
1208                    name: "Cloud1".into(),
1209                }],
1210            ),
1211        ];
1212
1213        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1214
1215        let a_entry = db.find_by_guid(png_a_guid).unwrap();
1216        let b_entry = db.find_by_guid(png_b_guid).unwrap();
1217
1218        // Neither entry keeps the bare alias — both renamed.
1219        assert_ne!(&*a_entry.name, "Cloud1");
1220        assert_ne!(&*b_entry.name, "Cloud1");
1221        assert!(
1222            a_entry.name.starts_with("Cloud1^"),
1223            "first png top-level not deduped: {}",
1224            a_entry.name,
1225        );
1226        assert!(
1227            b_entry.name.starts_with("Cloud1^"),
1228            "second png top-level not deduped: {}",
1229            b_entry.name,
1230        );
1231        // Distinct hints → distinct deduped suffixes.
1232        assert_ne!(&*a_entry.name, &*b_entry.name);
1233
1234        // Sub-asset dedup: the Sprite sub-asset's `Cloud1` lives in its own
1235        // type-bucket (Sprite, not Texture2D), so it isn't contested by the
1236        // Texture2D collision above. It stays bare. The png_b entry is the
1237        // only Sprite-bucket owner.
1238        let png_b_sub = &b_entry.sub_assets[0];
1239        assert_eq!(png_b_sub.file_id, sprite_fid);
1240        assert_eq!(
1241            &*png_b_sub.name, "Cloud1",
1242            "Sprite sub-asset should stay bare under type-aware dedup",
1243        );
1244    }
1245
1246    /// Pin type-aware dedup: a Texture2D and a Prefab sharing the stem
1247    /// `Foo` both keep the bare alias. Reverse lookup discriminates by
1248    /// the field's declared C# type at the consumer layer.
1249    #[test]
1250    fn build_db_keeps_bare_alias_for_type_distinct_collisions() {
1251        let png_guid = 0xa0_u128;
1252        let prefab_guid = 0xb0_u128;
1253        let raw = vec![
1254            RawEntry {
1255                guid: png_guid,
1256                asset_type_raw: AssetTypeRaw::Native(ClassId::Texture2D as u32),
1257                hint: "Assets/UI/Foo.png".to_string(),
1258                name: String::new(),
1259                meta_mtime_ns: 0,
1260                asset_mtime_ns: 0,
1261                sub_assets: vec![],
1262            },
1263            RawEntry {
1264                guid: prefab_guid,
1265                asset_type_raw: AssetTypeRaw::Native(ClassId::Prefab as u32),
1266                hint: "Assets/UI/Foo.prefab".to_string(),
1267                name: String::new(),
1268                meta_mtime_ns: 0,
1269                asset_mtime_ns: 0,
1270                sub_assets: vec![],
1271            },
1272        ];
1273        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1274        // Both keep bare `Foo` because they live in distinct type buckets.
1275        assert_eq!(&*db.find_by_guid(png_guid).unwrap().name, "Foo");
1276        assert_eq!(&*db.find_by_guid(prefab_guid).unwrap().name, "Foo");
1277    }
1278
1279    /// Pin: AnimatorController-embedded sub-assets are excluded from the
1280    /// global dedup pool, mirroring the prefab-embedded rule. Without the
1281    /// exclusion, an embedded AnimatorState named `Idle` would contest a
1282    /// hypothetical standalone `.asset` of the same name AND same Unity
1283    /// classID (AnimatorState exists as both an embedded sub of
1284    /// `.controller` and a top-level `.asset` in Unity), forcing both to
1285    /// rename via parent-dir suffix. The exclusion keeps the embedded
1286    /// state in its parent's namespace where it's addressed via
1287    /// `$Idle@Player` at the consumer layer.
1288    #[test]
1289    fn build_db_skips_controller_embedded_subassets_in_global_pool() {
1290        const ANIMATOR_STATE_CLASS_ID: u32 = 1102;
1291        let controller_guid = 0xc0_u128;
1292        let other_state_guid = 0xd0_u128;
1293        let raw = vec![
1294            RawEntry {
1295                guid: controller_guid,
1296                asset_type_raw: AssetTypeRaw::Native(ClassId::AnimatorController as u32),
1297                hint: "Assets/Anim/Player.controller".to_string(),
1298                name: String::new(),
1299                meta_mtime_ns: 0,
1300                asset_mtime_ns: 0,
1301                sub_assets: vec![SubAsset {
1302                    file_id: -123_456_789_012,
1303                    class_id: ANIMATOR_STATE_CLASS_ID,
1304                    name: "Idle".into(),
1305                }],
1306            },
1307            // Standalone .asset whose top class IS AnimatorState — same
1308            // (name, class_id) bucket as the embedded one. With
1309            // exclusion, only this one claims the global `Idle` alias.
1310            RawEntry {
1311                guid: other_state_guid,
1312                asset_type_raw: AssetTypeRaw::Native(ANIMATOR_STATE_CLASS_ID),
1313                hint: "Assets/Other/Idle.asset".to_string(),
1314                name: String::new(),
1315                meta_mtime_ns: 0,
1316                asset_mtime_ns: 0,
1317                sub_assets: vec![],
1318            },
1319        ];
1320        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1321        // Standalone keeps bare `Idle`.
1322        assert_eq!(&*db.find_by_guid(other_state_guid).unwrap().name, "Idle");
1323        // Embedded state stays as authored in the parent's namespace.
1324        let ctrl_entry = db.find_by_guid(controller_guid).unwrap();
1325        assert_eq!(&*ctrl_entry.sub_assets[0].name, "Idle");
1326    }
1327
1328    /// Same shape as the controller test, for AudioMixerController:
1329    /// AudioMixerGroup sub-asset class collides with itself between an
1330    /// embedded `Main.mixer` group and a hypothetical standalone
1331    /// `.asset` of the same class. Exclusion keeps the embed in the
1332    /// parent's namespace.
1333    #[test]
1334    fn build_db_skips_mixer_embedded_subassets_in_global_pool() {
1335        const AUDIO_MIXER_GROUP_CLASS_ID: u32 = 273;
1336        let mixer_guid = 0xe0_u128;
1337        let other_group_guid = 0xf0_u128;
1338        let raw = vec![
1339            RawEntry {
1340                guid: mixer_guid,
1341                asset_type_raw: AssetTypeRaw::Native(ClassId::AudioMixerController as u32),
1342                hint: "Assets/Audio/Main.mixer".to_string(),
1343                name: String::new(),
1344                meta_mtime_ns: 0,
1345                asset_mtime_ns: 0,
1346                sub_assets: vec![SubAsset {
1347                    file_id: 9_001,
1348                    class_id: AUDIO_MIXER_GROUP_CLASS_ID,
1349                    name: "Master".into(),
1350                }],
1351            },
1352            RawEntry {
1353                guid: other_group_guid,
1354                asset_type_raw: AssetTypeRaw::Native(AUDIO_MIXER_GROUP_CLASS_ID),
1355                hint: "Assets/Other/Master.asset".to_string(),
1356                name: String::new(),
1357                meta_mtime_ns: 0,
1358                asset_mtime_ns: 0,
1359                sub_assets: vec![],
1360            },
1361        ];
1362        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1363        assert_eq!(&*db.find_by_guid(other_group_guid).unwrap().name, "Master");
1364        let mixer_entry = db.find_by_guid(mixer_guid).unwrap();
1365        assert_eq!(&*mixer_entry.sub_assets[0].name, "Master");
1366    }
1367
1368    /// Pin: `.playable` files are treated as embedded containers — their
1369    /// Timeline track sub-assets bypass the global dedup pool. Many
1370    /// `.playable` files in a project share Unity-default track names like
1371    /// `Animation Track (2)`; without the exclusion they contest in the
1372    /// global pool and `disambiguate` hard-fails when the shared
1373    /// parent-dir suffixes are exhausted. Exclusion is keyed on the
1374    /// `.playable` extension (`is_embedded_container`) because the
1375    /// top-doc script guid of a playable is whichever sub-doc Unity
1376    /// sorts first by hashed fileID — unstable as a discriminator.
1377    #[test]
1378    fn build_db_skips_playable_embedded_tracks_in_global_pool() {
1379        // Track class id + script guid placeholders — bake stores both
1380        // but doesn't validate them against any registry. Extension is
1381        // the discriminator that triggers the exclusion.
1382        const ANIMATION_TRACK_CLASS_ID: u32 = 5004;
1383        let some_script_guid = 0xd21dcc2386d650c4597f3633c75a1f98_u128;
1384        let pa_guid = 0xa0_u128;
1385        let pb_guid = 0xb0_u128;
1386        let raw = vec![
1387            RawEntry {
1388                guid: pa_guid,
1389                asset_type_raw: AssetTypeRaw::Script(some_script_guid),
1390                hint: "Assets/Anim/PlayableA.playable".to_string(),
1391                name: String::new(),
1392                meta_mtime_ns: 0,
1393                asset_mtime_ns: 0,
1394                sub_assets: vec![SubAsset {
1395                    file_id: -123_456_789,
1396                    class_id: ANIMATION_TRACK_CLASS_ID,
1397                    name: "Animation Track (2)".into(),
1398                }],
1399            },
1400            RawEntry {
1401                guid: pb_guid,
1402                asset_type_raw: AssetTypeRaw::Script(some_script_guid),
1403                hint: "Assets/Anim/PlayableB.playable".to_string(),
1404                name: String::new(),
1405                meta_mtime_ns: 0,
1406                asset_mtime_ns: 0,
1407                sub_assets: vec![SubAsset {
1408                    file_id: -987_654_321,
1409                    class_id: ANIMATION_TRACK_CLASS_ID,
1410                    name: "Animation Track (2)".into(),
1411                }],
1412            },
1413        ];
1414        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1415        // Both playables keep their embedded track names as authored —
1416        // sub-assets live in the parent's namespace, not the global pool.
1417        assert_eq!(
1418            &*db.find_by_guid(pa_guid).unwrap().sub_assets[0].name,
1419            "Animation Track (2)"
1420        );
1421        assert_eq!(
1422            &*db.find_by_guid(pb_guid).unwrap().sub_assets[0].name,
1423            "Animation Track (2)"
1424        );
1425    }
1426
1427    /// Pin: prefab-embedded sub-assets are excluded from the global dedup
1428    /// pool. Their names stay as authored even when another asset in the
1429    /// project shares the name. They resolve via `$Sub@Parent` at the
1430    /// consumer layer, not the global alias bucket.
1431    #[test]
1432    fn build_db_skips_prefab_embedded_subassets_in_global_pool() {
1433        let prefab_guid = 0xa0_u128;
1434        let other_clip_guid = 0xb0_u128;
1435        let raw = vec![
1436            RawEntry {
1437                guid: prefab_guid,
1438                asset_type_raw: AssetTypeRaw::Native(ClassId::Prefab as u32),
1439                hint: "Assets/UI/PatternBG.prefab".to_string(),
1440                name: String::new(),
1441                meta_mtime_ns: 0,
1442                asset_mtime_ns: 0,
1443                sub_assets: vec![SubAsset {
1444                    file_id: -4_468_419_427_481_386_445,
1445                    class_id: ClassId::AnimationClip as u32,
1446                    name: "Animation".into(),
1447                }],
1448            },
1449            RawEntry {
1450                guid: other_clip_guid,
1451                asset_type_raw: AssetTypeRaw::Native(ClassId::AnimationClip as u32),
1452                hint: "Assets/Other/Animation.anim".to_string(),
1453                name: String::new(),
1454                meta_mtime_ns: 0,
1455                asset_mtime_ns: 0,
1456                sub_assets: vec![],
1457            },
1458        ];
1459        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1460        // Standalone .anim keeps bare `Animation` — the prefab-embedded
1461        // `Animation` doesn't claim the global alias.
1462        assert_eq!(
1463            &*db.find_by_guid(other_clip_guid).unwrap().name,
1464            "Animation"
1465        );
1466        // Prefab-embedded sub-asset keeps its raw name (lives in parent's
1467        // namespace; `$Animation@PatternBG` at the consumer layer).
1468        let prefab_entry = db.find_by_guid(prefab_guid).unwrap();
1469        assert_eq!(&*prefab_entry.sub_assets[0].name, "Animation");
1470    }
1471
1472    /// Pin: a single-owner name (one guid only, even if it appears as both
1473    /// a top-level alias and one of its own sub-assets) is *not*
1474    /// contested — it stays bare. Guards against over-renaming the common
1475    /// case of a Texture2D and its lone same-named Sprite sub-asset.
1476    #[test]
1477    fn build_db_keeps_bare_alias_when_name_is_uncontested() {
1478        let png_guid = 0xb0_u128;
1479        let raw = vec![raw_native(
1480            "Assets/Tower/Lone.png",
1481            png_guid,
1482            vec![SubAsset {
1483                file_id: 21300000,
1484                class_id: ClassId::Sprite as u32,
1485                name: "Lone".into(),
1486            }],
1487        )];
1488
1489        let db = build_db(raw, None, None, false).expect("build_db should succeed");
1490        let entry = db.find_by_guid(png_guid).unwrap();
1491        assert_eq!(&*entry.name, "Lone");
1492        assert_eq!(&*entry.sub_assets[0].name, "Lone");
1493    }
1494
1495    /// Pin: when a top-level alias is genuinely unresolvable (no parent
1496    /// segments left to walk and the bare stem is already taken), the
1497    /// bake hard-fails rather than silently falling back to a `^<guid8>`
1498    /// suffix. Per the project policy: ambiguity surfaces at bake time,
1499    /// not encode time.
1500    #[test]
1501    fn build_db_fails_when_dedup_cannot_resolve() {
1502        let raw = vec![
1503            // Two top-level entries with the same bare stem and no parent
1504            // segments to walk — `disambiguate` has nothing to suffix with.
1505            raw_native("Foo.asset", 0x01_u128, vec![]),
1506            raw_native("Foo.prefab", 0x02_u128, vec![]),
1507        ];
1508
1509        let err = build_db(raw, None, None, false).expect_err("collision with no parent dirs must hard-fail");
1510        let msg = format!("{err:#}");
1511        assert!(
1512            msg.contains("Foo") && msg.contains("disambiguate"),
1513            "error message should name the collision and the dedup pass: {msg}",
1514        );
1515    }
1516}