unity_assetdb/bake.rs
1//! Bake orchestrator: walk → parse → cache → write.
2//!
3//! Per-file flow:
4//! 1. Stat `.meta` and the companion asset file. If both mtimes match the
5//! cached values → reuse cached entry, skip parse.
6//! 2. Else read `.meta` → guid + sprite-sheet sub-assets.
7//! 3. Read the asset file → top-level class ID + sub-asset rows.
8//! 4. Resolve `AssetType`: native `class_id` or `Script(script_guid)`.
9//! 5. Derive alias from the filename stem.
10//!
11//! Post-walk: alias-collision sweep (filename stems can clash; we suffix
12//! with parent dir on conflict and warn).
13
14use std::path::{Path, PathBuf};
15use std::sync::Arc;
16use std::sync::atomic::{AtomicUsize, Ordering};
17use std::sync::mpsc;
18use std::time::{Instant, SystemTime};
19
20use ahash::{AHashMap, AHashSet};
21
22use anyhow::{Context, Result};
23
24use crate::asset;
25use crate::class_id::{ClassId, class_from_ext};
26use crate::meta::{self, SPRITE_MODE_SINGLE, TEXTURE_TYPE_SPRITE};
27use crate::store::{
28 self, AssetDb, AssetEntry, AssetType, BakeCache, CachedAssetType, CachedEntry, StoreError,
29 SubAsset, CACHE_FILENAME, DB_FILENAME,
30};
31use crate::walk::{walk_meta_files, WalkError};
32
33/// Errors from a bake run.
34///
35/// `Store(StoreError)` and `Walk(WalkError)` surface the typed source
36/// errors from those modules — match on them when you need to
37/// distinguish (e.g. "is this a schema-mismatch that needs re-bake?").
38/// `Other` carries the remaining anyhow-chained errors (cache I/O,
39/// dedup hard-fails, duplicate-guid checks) — most consumers propagate
40/// these untouched.
41#[derive(Debug, thiserror::Error)]
42pub enum BakeError {
43 #[error("{0}")]
44 Store(#[from] StoreError),
45 #[error("{0}")]
46 Walk(#[from] WalkError),
47 #[error("{0}")]
48 Other(#[from] anyhow::Error),
49}
50
51/// Caller-supplied name sanitizer. Returns `Some(rewritten)` when the
52/// input contains characters the consumer wants to scrub from asset
53/// names; `None` to keep the input as-is. Bake calls this once per
54/// top-level filename stem and once per sub-asset YAML `m_Name`.
55///
56/// Bound is `Send + Sync + 'static` because [`BakeOptions`] flows into
57/// `ignore::WalkParallel` worker closures.
58///
59/// Default behavior (no sanitizer) leaves all names verbatim.
60pub type NameSanitizer = Box<dyn Fn(&str) -> Option<String> + Send + Sync + 'static>;
61
62/// Caller-supplied warning sink. Bake invokes this for non-fatal events
63/// (worker errors during the parallel walk, name-collision rewrites,
64/// sanitizer rewrites). The library never writes to stderr itself.
65pub type WarnSink = Box<dyn Fn(&str) + Send + Sync + 'static>;
66
67/// Caller-supplied progress sink. Bake invokes this with the post-bake
68/// summary line and (when `BakeOptions::verbose_timing` is true) with
69/// per-phase timing. Separate from [`WarnSink`] so consumers can route
70/// "info" output and warnings to different places.
71pub type ProgressSink = Box<dyn Fn(&str) + Send + Sync + 'static>;
72
73/// Borrowed view of a [`NameSanitizer`] for internal helpers. Kept as a
74/// named type so per-call signatures don't trip clippy's `type_complexity`.
75type NameSanitizerRef<'a> = &'a (dyn Fn(&str) -> Option<String> + Send + Sync);
76
77/// Borrowed view of a [`WarnSink`]. See [`NameSanitizerRef`].
78type WarnSinkRef<'a> = &'a (dyn Fn(&str) + Send + Sync);
79
80/// File extensions whose asset has embedded sub-asset docs that should
81/// NOT join the global dedup pool — they live in the parent's namespace
82/// and consumers resolve them via parent-scoped addressing (`$Sub@Parent`).
83///
84/// Extension-keyed rather than `AssetType`-keyed because the top doc of a
85/// `.playable` file is whichever sub-doc Unity sorts first by hashed fileID
86/// (often an `AnimationTrack`, not the `TimelineAsset` itself), so the
87/// resulting `AssetTypeRaw::Script(...)` carries an unstable script guid.
88/// The extension is the only stable container discriminator.
89const EMBEDDED_CONTAINER_EXTS: &[&str] = &["prefab", "controller", "anim", "mixer", "playable"];
90
91fn is_embedded_container(hint: &str) -> bool {
92 Path::new(hint)
93 .extension()
94 .and_then(|s| s.to_str())
95 .is_some_and(|ext| EMBEDDED_CONTAINER_EXTS.contains(&ext))
96}
97
98/// True when `class_id` is a structural sub-doc that should be filtered
99/// out at parse time for the given container extension.
100///
101/// `.prefab`: GO / Transform / RectTransform / MonoBehaviour are all
102/// part of the GameObject tree — never addressable as sub-assets.
103/// `.controller` / `.anim` / `.mixer` / `.playable`: MonoBehaviour-114
104/// docs ARE addressable sub-assets (Timeline tracks, AudioMixerGroup,
105/// etc.) — only filter the GO-tree triplet, which doesn't appear in
106/// these files anyway (the predicate is a no-op there but stays valid
107/// for future-proofing).
108fn is_filterable_subdoc_for_ext(class_id: u32, ext: &str) -> bool {
109 let cls = ClassId::from_raw(class_id);
110 let is_go_tree = matches!(
111 cls,
112 Some(ClassId::GameObject | ClassId::Transform | ClassId::RectTransform)
113 );
114 let is_component = matches!(cls, Some(ClassId::MonoBehaviour));
115 is_go_tree || (is_component && ext == "prefab")
116}
117
118/// Convert `SystemTime` → ns-since-UNIX. Saturates to 0 on pre-epoch
119/// (which would only happen if the user's clock is bogus).
120fn mtime_ns(t: SystemTime) -> u64 {
121 t.duration_since(SystemTime::UNIX_EPOCH)
122 .map_or(0, |d| d.as_nanos() as u64)
123}
124
125/// One raw bake result, before name dedup. `script_guid` is the unmapped
126/// GUID for MonoBehaviour assets — interning happens after the walk so we
127/// only need one final sort.
128#[derive(Clone)]
129struct RawEntry {
130 guid: u128,
131 asset_type_raw: AssetTypeRaw,
132 hint: String,
133 name: String,
134 meta_mtime_ns: u64,
135 asset_mtime_ns: u64,
136 sub_assets: Vec<SubAsset>,
137}
138
139/// Hashable type discriminator: `Native(classID)` for built-in classes
140/// and `Script(scriptGuid)` for MonoBehaviour-backed assets. Hashable so
141/// the dedup pass can bucket by `(name, asset_type)` without depending
142/// on the post-walk script-intern table.
143#[derive(Clone, Copy, PartialEq, Eq, Hash)]
144enum AssetTypeRaw {
145 Native(u32),
146 Script(u128),
147}
148
149/// Per-worker-thread accumulator. Sends its collected `entries` + `errors`
150/// to the main thread via Drop — `ignore::WalkBuilder::run` drops each
151/// thread's visitor closure (and thus its captured `ThreadLocal`) on
152/// thread exit, so the main thread sees all batches once `walker.run`
153/// returns.
154struct ThreadLocal {
155 entries: Vec<RawEntry>,
156 errors: Vec<String>,
157 raw_tx: mpsc::Sender<Vec<RawEntry>>,
158 err_tx: mpsc::Sender<Vec<String>>,
159}
160
161impl Drop for ThreadLocal {
162 fn drop(&mut self) {
163 let entries = std::mem::take(&mut self.entries);
164 let errors = std::mem::take(&mut self.errors);
165 // Channel-closed errors are unreachable here — main thread holds
166 // the receivers until after `walker.run` returns.
167 let _ = self.raw_tx.send(entries);
168 let _ = self.err_tx.send(errors);
169 }
170}
171
172/// Cache key: hint (Assets-relative, forward-slashed). ahash beats siphash
173/// by ~2x for our small-string keys.
174type CacheMap = AHashMap<String, RawEntry>;
175
176/// Run a `Result<Option<T>>`-producing closure under `catch_unwind` and
177/// flatten the four-way outcome (success-with-value / success-skip /
178/// inner-err / panic) into `Result<Option<T>, String>`. The closure
179/// is wrapped in `AssertUnwindSafe` because parallel-walk visitors
180/// capture Arc state by ref, and the bake worker treats process_one
181/// as panic-safe on its inputs.
182///
183/// `label` prefixes both inner errors and panic reports with the
184/// asset path; `task_name` names the operation in the panic line
185/// (e.g. `"process_one"`) so the message reads
186/// `"<path>: panic in <task_name>: <payload>"`.
187///
188/// Pulled out of the inline closure inside `bake_action`'s parallel
189/// walk so panic-payload extraction (string / String / non-string)
190/// can be unit-tested without spinning up a project tree.
191fn run_with_panic_safety<T, F>(label: &str, task_name: &str, f: F) -> Result<Option<T>, String>
192where
193 F: FnOnce() -> Result<Option<T>>,
194{
195 match std::panic::catch_unwind(std::panic::AssertUnwindSafe(f)) {
196 Ok(Ok(opt)) => Ok(opt),
197 Ok(Err(e)) => Err(format!("{label}: {e}")),
198 Err(panic) => {
199 let msg = panic
200 .downcast_ref::<&str>()
201 .map(|s| (*s).to_string())
202 .or_else(|| panic.downcast_ref::<String>().cloned())
203 .unwrap_or_else(|| "<non-string panic payload>".to_string());
204 Err(format!("{label}: panic in {task_name}: {msg}"))
205 }
206 }
207}
208
209/// Build the in-memory cache from a previously-saved `BakeCache`. Each
210/// `CachedEntry` becomes a `RawEntry` keyed by its hint. Cache hits during
211/// the walk drop straight into the post-walk pipeline.
212///
213/// `String::from(Box<str>)` is O(1) — Rust hands the heap allocation
214/// directly from the box to the new String, no copy. The map key is then
215/// cloned once for the parallel field on `RawEntry` (one alloc per entry).
216fn build_cache(cache: BakeCache) -> CacheMap {
217 let mut out = AHashMap::with_capacity(cache.entries.len());
218 for e in cache.entries {
219 let asset_type_raw = match e.asset_type {
220 CachedAssetType::Native(n) => AssetTypeRaw::Native(n),
221 CachedAssetType::Script(g) => AssetTypeRaw::Script(g),
222 };
223 let hint = String::from(e.hint);
224 let raw = RawEntry {
225 guid: e.guid,
226 asset_type_raw,
227 hint: hint.clone(),
228 name: String::new(), // re-derived in build_db
229 meta_mtime_ns: e.meta_mtime_ns,
230 asset_mtime_ns: e.asset_mtime_ns,
231 sub_assets: e.sub_assets,
232 };
233 out.insert(hint, raw);
234 }
235 out
236}
237
238/// Build the on-disk cache from the post-walk raw entries. Sorted by hint
239/// so the file is byte-stable across re-bakes when nothing changed.
240fn build_bake_cache(raw: &[RawEntry]) -> BakeCache {
241 let mut entries: Vec<CachedEntry> = raw
242 .iter()
243 .map(|r| CachedEntry {
244 hint: r.hint.clone().into_boxed_str(),
245 meta_mtime_ns: r.meta_mtime_ns,
246 asset_mtime_ns: r.asset_mtime_ns,
247 guid: r.guid,
248 asset_type: match r.asset_type_raw {
249 AssetTypeRaw::Native(n) => CachedAssetType::Native(n),
250 AssetTypeRaw::Script(g) => CachedAssetType::Script(g),
251 },
252 sub_assets: r.sub_assets.clone(),
253 })
254 .collect();
255 entries.sort_by(|a, b| a.hint.cmp(&b.hint));
256 BakeCache {
257 schema_version: store::SCHEMA_VERSION,
258 entries,
259 }
260}
261
262/// Caller-supplied bake configuration.
263///
264/// Built by the consumer's CLI / library entry point and handed to
265/// [`bake`]. The library never reads env vars, never resolves the
266/// project root for you, and never writes to stderr — every side
267/// channel routes through one of the optional sinks below.
268pub struct BakeOptions {
269 /// Project root containing `Assets/` + `ProjectSettings/`. Caller
270 /// resolves this (typically via [`crate::walk::resolve_project_root`])
271 /// before constructing options.
272 pub project_root: PathBuf,
273 /// Directory where `asset-db.bin` and `asset-db.cache.bin` are written.
274 /// Caller composes the convention (e.g. `<project>/Library/unity-assetdb/`
275 /// or a fixture-staging path).
276 pub out_dir: PathBuf,
277 /// Optional name sanitizer; see [`NameSanitizer`].
278 pub name_sanitizer: Option<NameSanitizer>,
279 /// Optional warning sink; see [`WarnSink`]. `None` discards warnings.
280 pub on_warn: Option<WarnSink>,
281 /// Optional progress sink; see [`ProgressSink`]. `None` discards the
282 /// summary line.
283 pub on_progress: Option<ProgressSink>,
284 /// When true, [`on_progress`] also receives a per-phase timing line
285 /// (cache / walk / build / write). Env-var-driven behavior is the
286 /// consumer's call.
287 pub verbose_timing: bool,
288 /// When true, [`on_warn`] receives a line for each name-collision
289 /// rewrite during dedup. Off by default to keep steady-state warm
290 /// bakes quiet.
291 pub verbose_collisions: bool,
292}
293
294/// Bake entry-point. Walks `Assets/`, parses `.meta` + asset YAML,
295/// writes `<out_dir>/asset-db.bin` and `<out_dir>/asset-db.cache.bin`.
296pub fn bake(opts: &BakeOptions) -> Result<(), BakeError> {
297 bake_inner(opts).map_err(|e| {
298 // Surface typed source errors when they bubbled up via `?`
299 // without context wrapping — consumers can match on
300 // `BakeError::Store` etc. Otherwise fall through to `Other`.
301 match e.downcast::<StoreError>() {
302 Ok(s) => return BakeError::Store(s),
303 Err(e) => match e.downcast::<WalkError>() {
304 Ok(w) => return BakeError::Walk(w),
305 Err(e) => BakeError::Other(e),
306 },
307 }
308 })
309}
310
311fn bake_inner(opts: &BakeOptions) -> Result<()> {
312 let project_root = &opts.project_root;
313 std::fs::create_dir_all(&opts.out_dir)
314 .with_context(|| format!("create out-dir: {}", opts.out_dir.display()))?;
315 let db_file = opts.out_dir.join(DB_FILENAME);
316 let cache_file = opts.out_dir.join(CACHE_FILENAME);
317 let t_start = Instant::now();
318
319 // Load bake-only cache. Missing/corrupt → empty (first bake or stale).
320 let cache: CacheMap = match store::read_cache(&cache_file) {
321 Ok(c) => build_cache(c),
322 Err(_) => AHashMap::new(),
323 };
324 let cache_size = cache.len();
325 let t_cache = t_start.elapsed();
326
327 // Per-thread accumulators: each worker drops its `Vec<RawEntry>` and
328 // `Vec<String>` (errors) into channels at thread exit via `Drop`. Avoids
329 // the Mutex<Vec> contention 16k pushes on 8 cores produced — measured
330 // ~3-4 ms warm savings on meow-tower.
331 //
332 // `ignore::WalkParallel::run` requires `'static + Send` visitors, so
333 // shared state goes through `Arc`. Each worker clones the Arc once at
334 // factory time — the clone cost is negligible vs the per-entry work.
335 let (raw_tx, raw_rx) = mpsc::channel::<Vec<RawEntry>>();
336 let (err_tx, err_rx) = mpsc::channel::<Vec<String>>();
337 let cache_arc = Arc::new(cache);
338 let cache_hits = Arc::new(AtomicUsize::new(0));
339 let walked = Arc::new(AtomicUsize::new(0));
340 let project_root_arc: Arc<PathBuf> = Arc::new(project_root.clone());
341
342 walk_meta_files(project_root, || {
343 let raw_tx = raw_tx.clone();
344 let err_tx = err_tx.clone();
345 let cache = Arc::clone(&cache_arc);
346 let cache_hits = Arc::clone(&cache_hits);
347 let walked = Arc::clone(&walked);
348 let project_root = Arc::clone(&project_root_arc);
349 let mut local = ThreadLocal {
350 entries: Vec::with_capacity(2048),
351 errors: Vec::new(),
352 raw_tx,
353 err_tx,
354 };
355 move |meta_path: &Path| {
356 walked.fetch_add(1, Ordering::Relaxed);
357 // Catch panics so a single malformed .meta or unforeseen
358 // bug doesn't silently terminate the worker thread (which
359 // would lose its ThreadLocal accumulator). `ignore::WalkParallel`
360 // doesn't propagate visitor panics; without this, a panic in
361 // `process_one` produces a partial DB with no surfaced error.
362 // Helper does the catch_unwind + payload-downcast — see
363 // `run_with_panic_safety`.
364 let label = meta_path.display().to_string();
365 match run_with_panic_safety(&label, "process_one", || {
366 process_one(meta_path, &project_root, &cache, &cache_hits)
367 }) {
368 Ok(Some(r)) => local.entries.push(r),
369 Ok(None) => {}
370 Err(msg) => local.errors.push(msg),
371 }
372 }
373 })?;
374 drop(raw_tx);
375 drop(err_tx);
376 let t_walk = t_start.elapsed();
377
378 let mut errors: Vec<String> = Vec::new();
379 for v in err_rx.iter() {
380 errors.extend(v);
381 }
382 if let Some(sink) = opts.on_warn.as_ref() {
383 for e in &errors {
384 sink(&format!("warning: {e}"));
385 }
386 }
387
388 let mut raw: Vec<RawEntry> = Vec::with_capacity(cache_size + 256);
389 for v in raw_rx.iter() {
390 raw.extend(v);
391 }
392 // Build cache from `raw` (consumes nothing) before `build_db` consumes
393 // it. Sequence the writes so the cache is only persisted after the
394 // convert artifact lands — a half-baked cache without a matching db
395 // would let a later run skip parsing for entries that aren't in the
396 // db yet.
397 let bake_cache = build_bake_cache(&raw);
398 let db = build_db(
399 raw,
400 opts.name_sanitizer.as_deref(),
401 opts.on_warn.as_deref(),
402 opts.verbose_collisions,
403 )?;
404 let t_build = t_start.elapsed();
405
406 // No-op skip: every entry came from cache AND nothing was dropped from
407 // cache (count stable). Skips ~2-3 ms of bincode encode + file write
408 // on the steady-state warm path. Still skips only when both files are
409 // present — first run or after a manual delete writes anyway.
410 let hit_n = cache_hits.load(Ordering::Relaxed);
411 let no_op =
412 hit_n == cache_size && hit_n == db.entries.len() && db_file.exists() && cache_file.exists();
413
414 if !no_op {
415 store::write(&db_file, &db)
416 .with_context(|| format!("write asset-db: {}", db_file.display()))?;
417 store::write_cache(&cache_file, &bake_cache)
418 .with_context(|| format!("write cache: {}", cache_file.display()))?;
419 }
420 let t_write = t_start.elapsed();
421
422 if let Some(sink) = opts.on_progress.as_ref() {
423 sink(&format!(
424 "baked {} entries → {}",
425 db.entries.len(),
426 db_file.display()
427 ));
428 if opts.verbose_timing {
429 let walked_n = walked.load(Ordering::Relaxed);
430 let parsed_n = db.entries.len() - hit_n;
431 let write_phase = if no_op { "skipped" } else { "wrote" };
432 sink(&format!(
433 " walked={walked_n} hit={hit_n} parsed={parsed_n} | cache={:?} walk={:?} build={:?} write={:?} ({write_phase}) total={:?}",
434 t_cache,
435 t_walk - t_cache,
436 t_build - t_walk,
437 t_write - t_build,
438 t_write,
439 ));
440 }
441 }
442 Ok(())
443}
444
445/// Per-`.meta` work. Returns `Ok(None)` when the meta has no companion file
446/// to describe (e.g. orphaned `.meta`, directory `.meta`).
447fn process_one(
448 meta_path: &Path,
449 project_root: &Path,
450 cache: &CacheMap,
451 cache_hits: &AtomicUsize,
452) -> Result<Option<RawEntry>> {
453 let companion =
454 strip_meta_suffix(meta_path).ok_or_else(|| anyhow::anyhow!("not a .meta path"))?;
455
456 let hint = rel_hint(project_root, &companion)?;
457
458 // Cache-hit fast path: stat `.meta` only. If the mtime matches the
459 // cache, trust the cached row outright — no companion stat. Saves
460 // ~1 stat × N entries on the warm bake, the bake's dominant cost
461 // (warm walk against meow-tower dropped from 47 ms → ~26 ms).
462 //
463 // **Cache assumption**: Unity's importer touches the `.meta` mtime
464 // whenever it re-imports the asset, so a `.meta` mtime drift is the
465 // canonical "this asset changed" signal. Hand-editing the asset YAML
466 // *without* touching the .meta will serve a stale cached row until
467 // the next .meta touch (or a manual `rm asset-db.cache.bin`).
468 // Documented + pinned by `tests/bake.rs::cache_does_not_detect_asset_only_touch`.
469 let meta_md =
470 std::fs::metadata(meta_path).with_context(|| format!("stat: {}", meta_path.display()))?;
471 let meta_mtime_ns = mtime_ns(meta_md.modified().unwrap_or(SystemTime::UNIX_EPOCH));
472
473 if let Some(cached) = cache.get(&hint)
474 && cached.meta_mtime_ns == meta_mtime_ns
475 {
476 cache_hits.fetch_add(1, Ordering::Relaxed);
477 return Ok(Some(cached.clone()));
478 }
479
480 // Cache miss. Now stat the companion — handles directory-`.meta`
481 // exclusion too. Slow path beyond here re-parses both files.
482 let Ok(companion_md) = std::fs::metadata(&companion) else {
483 return Ok(None);
484 };
485 if companion_md.is_dir() {
486 return Ok(None);
487 }
488 let asset_mtime_ns = mtime_ns(companion_md.modified().unwrap_or(SystemTime::UNIX_EPOCH));
489
490 process_one_uncached(meta_path, &companion, &hint, meta_mtime_ns, asset_mtime_ns)
491}
492
493/// Slow path: parse `.meta` + asset YAML, build a `RawEntry`. Shared
494/// between the "no cache row at all" and "cache row but companion mtime
495/// drifted" cases — both end up doing the same parse work.
496fn process_one_uncached(
497 meta_path: &Path,
498 companion: &Path,
499 hint: &str,
500 meta_mtime_ns: u64,
501 asset_mtime_ns: u64,
502) -> Result<Option<RawEntry>> {
503 // Cache miss → parse.
504 let meta_text = std::fs::read_to_string(meta_path)
505 .with_context(|| format!("read .meta: {}", meta_path.display()))?;
506 let meta_info = meta::parse(&meta_text)?;
507
508 let ext = companion.extension().and_then(|s| s.to_str()).unwrap_or("");
509 let from_ext = class_from_ext(ext);
510
511 let mut sub_assets: Vec<SubAsset> = Vec::new();
512 let mut top_class_id: Option<u32> = None;
513 let mut script_guid: Option<u128> = None;
514
515 // YAML peek strategy:
516 // - WithSubAssets: types where extra docs ARE addressable from outside.
517 // `.asset`/`.spriteatlas`/`.spriteatlasv2` host explicit sub-assets;
518 // `.prefab`/`.controller`/`.anim`/`.mixer`/`.playable` can host
519 // embedded sub-asset docs (legacy `AnimationClip` inline in a
520 // prefab; AnimatorState in a controller; AudioMixerGroup in a
521 // mixer; Timeline tracks in a playable) that other prefabs
522 // address as `{fileID, guid: <parent.guid>, type: 3}`. Without
523 // capturing them the embedded ref encodes as `&#f<fid>` and
524 // cross-prefab refs degrade to the parent alias + `#f<fid>` suffix.
525 // Embeds are excluded from the global dedup pool — see
526 // `is_embedded` in `build_db`.
527 // - TopOnly: types whose extra docs are internal scene-graph that
528 // isn't addressable from outside (`.unity`, `.mat`, `.mask`).
529 // - None: extension already says everything (`.png`, `.fbx`, scripts).
530 let parse_mode: Option<asset::ParseMode> = match ext {
531 "asset" | "spriteatlas" | "spriteatlasv2" | "prefab" | "controller" | "anim"
532 | "mixer" | "playable" => Some(asset::ParseMode::WithSubAssets),
533 "mat" | "mask" | "unity" => Some(asset::ParseMode::TopOnly),
534 _ => None,
535 };
536
537 if let Some(mode) = parse_mode {
538 let asset_text = read_asset_for_mode(companion, mode)?;
539 let info = asset::parse(&asset_text, mode)?;
540 top_class_id = info.top_class_id;
541 script_guid = info.script_guid;
542 for s in info.sub_assets {
543 if s.name.is_empty() {
544 continue;
545 }
546 if is_filterable_subdoc_for_ext(s.class_id, ext) {
547 continue;
548 }
549 sub_assets.push(SubAsset {
550 file_id: s.file_id,
551 class_id: s.class_id,
552 name: s.name.into_boxed_str(),
553 });
554 }
555 }
556
557 // Precedence: script_guid (MonoBehaviour-backed) > from_ext > top_class_id.
558 // `.prefab` and `.unity` deliberately let from_ext win — their YAML's first
559 // doc is a *contained* object (GameObject = classID 1), not the asset's
560 // class (Prefab = 1001). Falling back to top_class_id only for extensions
561 // without a stable class mapping (e.g. `.asset`, where the YAML peek is
562 // the only signal).
563 let asset_type_raw = if let Some(g) = script_guid {
564 AssetTypeRaw::Script(g)
565 } else if let Some(cls) = from_ext {
566 AssetTypeRaw::Native(cls as u32)
567 } else if let Some(cls) = top_class_id.and_then(ClassId::from_raw) {
568 AssetTypeRaw::Native(cls as u32)
569 } else if let Some(cls) = top_class_id {
570 // Unknown raw class ID — store anyway; lookup will treat as Native.
571 AssetTypeRaw::Native(cls)
572 } else {
573 return Ok(None);
574 };
575
576 let name = filename_stem(companion);
577
578 // Implicit Sprite sub-asset for Single-mode textures. Compute first
579 // (borrows `meta_info` whole); the for-loop below moves
580 // `meta_info.sprite_sheet`, so the predicate must run before that.
581 let implicit_sprite = synthesize_implicit_sprite(&meta_info, &name);
582
583 // Texture sprite-sheet sub-assets (from .meta). Always class Sprite —
584 // .meta `sprites:` entries are by definition Sprite sub-assets of the
585 // texture (Unity's Sprite-mode importer creates them at fileID-as-hash).
586 for (fid, name) in meta_info.sprite_sheet {
587 sub_assets.push(SubAsset {
588 file_id: fid,
589 class_id: ClassId::Sprite as u32,
590 name: name.into_boxed_str(),
591 });
592 }
593
594 if let Some(sub) = implicit_sprite {
595 sub_assets.push(sub);
596 }
597
598 Ok(Some(RawEntry {
599 guid: meta_info.guid,
600 asset_type_raw,
601 hint: hint.to_string(),
602 name,
603 meta_mtime_ns,
604 asset_mtime_ns,
605 sub_assets,
606 }))
607}
608
609/// Synthesize the implicit Sprite sub-asset Unity auto-generates for
610/// Single-mode Sprite textures. Unity creates one Sprite (fileID
611/// `21300000` = `ClassId::Sprite × 100_000`) named after the texture
612/// file but never writes it to the `.meta` — the `sprites:` list stays
613/// empty. Without synthesizing it here, `AssetMap::elidable_subasset_fid`
614/// (`mapping/asset_map.rs`) can't fire and `_sprite: $TexName` fields
615/// keep the redundant `#f21300000` suffix on pull.
616///
617/// Returns `None` when:
618/// - the `.meta`'s `spriteSheet.sprites:` list is non-empty (explicit
619/// entries own the sub-asset list — atlases, multi-sprite sheets);
620/// - `textureType` isn't 8 (Sprite); or
621/// - `spriteMode` isn't 1 (Single).
622///
623/// Branches pinned by `bake_asset_db::bake::tests::synthesize_implicit_sprite_*`.
624fn synthesize_implicit_sprite(meta: &meta::MetaInfo, stem: &str) -> Option<SubAsset> {
625 if meta.sprite_sheet.is_empty()
626 && meta.texture_type == Some(TEXTURE_TYPE_SPRITE)
627 && meta.sprite_mode == Some(SPRITE_MODE_SINGLE)
628 {
629 Some(SubAsset {
630 file_id: ClassId::Sprite.canonical_subobject_fid(),
631 class_id: ClassId::Sprite as u32,
632 name: stem.to_string().into_boxed_str(),
633 })
634 } else {
635 None
636 }
637}
638
639fn warn_sanitized(on_warn: Option<WarnSinkRef<'_>>, kind: &str, hint: &str, old: &str, new: &str) {
640 if let Some(sink) = on_warn {
641 sink(&format!(
642 "warning: {kind} {hint} name `{old}` contains ref-reserved char; renamed to `{new}`",
643 ));
644 }
645}
646
647fn build_db(
648 mut raw: Vec<RawEntry>,
649 sanitizer: Option<NameSanitizerRef<'_>>,
650 on_warn: Option<WarnSinkRef<'_>>,
651 verbose_collisions: bool,
652) -> Result<AssetDb> {
653 // Stable order: sort by hint so dedup picks the same "winner" each bake.
654 raw.sort_by(|a, b| a.hint.cmp(&b.hint));
655
656 // Reset every entry's name to its raw filename stem before dedup
657 // (cached entries arrive with their previously-suffixed name; if we
658 // dedup against that, collisions compound across bakes), then sanitize
659 // ref-reserved chars in both top-level and sub-asset names — covers the
660 // three name sources (filename stem, YAML m_Name sub-assets, `.meta`
661 // sprite-sheet entries) in one pass before dedup uses `r.name` as key.
662 for r in raw.iter_mut() {
663 r.name = filename_stem_from_hint(&r.hint);
664 if let Some(san) = sanitizer
665 && let Some(clean) = san(&r.name)
666 {
667 warn_sanitized(on_warn, "asset", &r.hint, &r.name, &clean);
668 r.name = clean;
669 }
670 if let Some(san) = sanitizer {
671 for sub in r.sub_assets.iter_mut() {
672 if let Some(clean) = san(&sub.name) {
673 warn_sanitized(on_warn, "sub-asset of", &r.hint, &sub.name, &clean);
674 sub.name = clean.into_boxed_str();
675 }
676 }
677 }
678 }
679
680 // Type-aware dedup: collisions are scoped by `(name, asset_type)`.
681 // Same-name entries of distinct `asset_type` (`Foo.png` Texture2D +
682 // `Foo.prefab` Prefab) get distinct alias buckets — the consuming
683 // field's C# type discriminates at decode. Embedded sub-asset docs
684 // of container types are excluded from the global pool entirely
685 // (see [Name collisions](docs/asset-database.md#name-collisions)).
686
687 // Pass 1: tally distinct-guid owners per `(name, asset_type)` bucket.
688 let mut owners: AHashMap<(String, AssetTypeRaw), AHashSet<u128>> =
689 AHashMap::with_capacity(raw.len());
690 for r in &raw {
691 let key = (r.name.clone(), r.asset_type_raw);
692 owners.entry(key).or_default().insert(r.guid);
693 if is_embedded_container(&r.hint) {
694 continue;
695 }
696 for sub in &r.sub_assets {
697 let key = (
698 sub.name.to_string(),
699 AssetTypeRaw::Native(sub.class_id),
700 );
701 owners.entry(key).or_default().insert(r.guid);
702 }
703 }
704 let contested = |name: &str, t: AssetTypeRaw| {
705 owners
706 .get(&(name.to_string(), t))
707 .is_some_and(|s| s.len() > 1)
708 };
709
710 // Pass 2: walk entries in hint-sorted order, renaming every contested
711 // claim. `taken` tracks `(name, asset_type) → guid` pairs already
712 // claimed in this pass so the disambiguator never picks a candidate
713 // that collides with an earlier (different-guid) entry of the same
714 // type; same-guid sharing remains allowed.
715 let mut taken: AHashMap<(String, AssetTypeRaw), u128> = AHashMap::with_capacity(raw.len());
716 for r in raw.iter_mut() {
717 let top_type = r.asset_type_raw;
718 if contested(&r.name, top_type) {
719 let new_name = disambiguate(&r.name, &r.hint, r.guid, top_type, &taken)?;
720 if verbose_collisions && let Some(sink) = on_warn {
721 sink(&format!(
722 "warning: name collision on `{}` (guid {:032x}); renamed to `{}`",
723 r.name, r.guid, new_name,
724 ));
725 }
726 r.name = new_name;
727 }
728 match taken.get(&(r.name.clone(), top_type)) {
729 Some(&prev) if prev != r.guid => anyhow::bail!(
730 "asset-db: name `{}` claimed by both guid {:032x} and {prev:032x} \
731 after dedup — `disambiguate` produced a non-unique alias",
732 r.name,
733 r.guid,
734 ),
735 _ => {
736 taken.insert((r.name.clone(), top_type), r.guid);
737 }
738 }
739
740 if is_embedded_container(&r.hint) {
741 // Prefab-embedded sub-assets bypass the global dedup pool;
742 // sanitization already happened above. Names stay as authored
743 // and resolve via `$Sub@Parent` at the codec layer.
744 continue;
745 }
746 for sub in r.sub_assets.iter_mut() {
747 let sub_type = AssetTypeRaw::Native(sub.class_id);
748 if contested(&sub.name, sub_type) {
749 let original = sub.name.to_string();
750 let new_name = disambiguate(&original, &r.hint, r.guid, sub_type, &taken)?;
751 if verbose_collisions && let Some(sink) = on_warn {
752 sink(&format!(
753 "warning: sub-asset name collision on `{}` (parent guid {:032x}); renamed to `{}`",
754 original, r.guid, new_name,
755 ));
756 }
757 sub.name = new_name.into_boxed_str();
758 }
759 // Same-guid sharing is allowed — a sub-asset's deduped name
760 // will often equal the parent's deduped alias (same hint
761 // feeds disambiguate), and that's the desired outcome.
762 let key = (sub.name.to_string(), sub_type);
763 if !taken.contains_key(&key) {
764 taken.insert(key, r.guid);
765 }
766 }
767 }
768
769 // Intern script types and finalize entries.
770 let mut db = AssetDb::new();
771 let entries: Vec<AssetEntry> = raw
772 .into_iter()
773 .map(|r| {
774 let asset_type = match r.asset_type_raw {
775 AssetTypeRaw::Native(n) => AssetType::Native(n),
776 AssetTypeRaw::Script(g) => AssetType::Script(db.intern_script(g)),
777 };
778 AssetEntry {
779 guid: r.guid,
780 asset_type,
781 name: r.name.into_boxed_str(),
782 sub_assets: r.sub_assets,
783 hint: r.hint.into_boxed_str(),
784 }
785 })
786 .collect();
787 db.entries = entries;
788 db.sort();
789 check_no_full_duplicates(&db)?;
790 Ok(db)
791}
792
793/// Hard-fail on two corruption cases:
794///
795/// 1. **Two top-level entries share a GUID.** Hand-edited or copy-pasted
796/// `.meta` whose GUID wasn't rewritten. The name-dedup loop only
797/// renames when guids *differ*, so same-guid pairs flow through with
798/// distinct names and `db.sort()` doesn't merge them. Catches the
799/// duplicate-`.meta` case the Unity-hidden walker filter also guards
800/// against — belt and braces.
801///
802/// 2. **Within-entry sub-asset rows share `(name, fileID)`.** Two YAML
803/// sub-docs in the same asset declared identical names + fileIDs —
804/// asset-side corruption, parser bug, or atlas content collision.
805fn check_no_full_duplicates(db: &AssetDb) -> Result<()> {
806 // Top-level: guid uniqueness. `db.entries` is already guid-sorted, so
807 // a single pass over consecutive pairs catches every dup.
808 for w in db.entries.windows(2) {
809 if w[0].guid == w[1].guid {
810 anyhow::bail!(
811 "duplicate top-level GUID: {:032x} between names `{}` and `{}` — likely two .meta files share a GUID",
812 w[0].guid,
813 w[0].name,
814 w[1].name,
815 );
816 }
817 }
818
819 // Sub-assets: (guid, fileID, name) uniqueness within each entry.
820 let mut seen: AHashSet<(i64, &str)> = AHashSet::new();
821 for e in &db.entries {
822 seen.clear();
823 for s in &e.sub_assets {
824 if !seen.insert((s.file_id, &*s.name)) {
825 anyhow::bail!(
826 "duplicate sub-asset record: name={} guid={:032x} fileID={} type={:?}",
827 s.name,
828 e.guid,
829 s.file_id,
830 e.asset_type,
831 );
832 }
833 }
834 }
835 Ok(())
836}
837
838/// Read just enough of the asset to satisfy `mode`.
839///
840/// `TopOnly` reads the first 4 KiB and truncates at the last newline — that
841/// covers a YAML preamble (`%YAML 1.1\n%TAG …\n`), the first
842/// `--- !u!<id> &<fid>` header, and a `m_Script` line for .asset
843/// MonoBehaviours (≤ ~200 bytes). `WithSubAssets` reads the full file.
844///
845/// Trimming at the last newline guards against UTF-8 boundary cuts inside a
846/// multi-byte character — every YAML line is complete UTF-8.
847fn read_asset_for_mode(path: &Path, mode: asset::ParseMode) -> Result<String> {
848 use std::io::Read;
849 match mode {
850 asset::ParseMode::WithSubAssets => {
851 std::fs::read_to_string(path).with_context(|| format!("read asset: {}", path.display()))
852 }
853 asset::ParseMode::TopOnly => {
854 const HEAD_BYTES: u64 = 4096;
855 let f = std::fs::File::open(path)
856 .with_context(|| format!("open asset: {}", path.display()))?;
857 let mut buf = Vec::with_capacity(HEAD_BYTES as usize);
858 f.take(HEAD_BYTES)
859 .read_to_end(&mut buf)
860 .with_context(|| format!("read asset: {}", path.display()))?;
861 // Drop trailing partial line so .lines() yields only complete
862 // (and thus complete-UTF-8) lines. If the head has no newline at
863 // all (pathological — single-line YAML > 4 KiB), keep the buffer
864 // and let `from_utf8` decide.
865 if let Some(last_nl) = buf.iter().rposition(|&b| b == b'\n') {
866 buf.truncate(last_nl + 1);
867 }
868 String::from_utf8(buf)
869 .with_context(|| format!("non-utf8 asset head: {}", path.display()))
870 }
871 }
872}
873
874fn strip_meta_suffix(p: &Path) -> Option<PathBuf> {
875 let s = p.to_str()?;
876 s.strip_suffix(".meta").map(PathBuf::from)
877}
878
879fn rel_hint(project_root: &Path, companion: &Path) -> Result<String> {
880 // Strip the project root, not just `Assets/`. The walker now visits both
881 // `<project>/Assets/` and `<project>/Packages/`, so hints look like
882 // `Assets/Foo.prefab` or `Packages/com.boxcat.libs/Bar.mixer`.
883 let rel = companion
884 .strip_prefix(project_root)
885 .with_context(|| format!("strip prefix: {}", companion.display()))?;
886 let s = rel.to_string_lossy().replace('\\', "/");
887 Ok(s)
888}
889
890fn filename_stem(p: &Path) -> String {
891 p.file_stem()
892 .and_then(|s| s.to_str())
893 .unwrap_or("")
894 .to_string()
895}
896
897fn filename_stem_from_hint(hint: &str) -> String {
898 Path::new(hint)
899 .file_stem()
900 .and_then(|s| s.to_str())
901 .unwrap_or("")
902 .to_string()
903}
904
905/// Pick a unique alias for `stem` given `hint` and an existing `taken` map.
906/// Strategy: try `stem^dir` for successively-deeper parent dirs. A candidate
907/// is considered "free" iff it's absent from `taken` *or* already mapped to
908/// `owner_guid` (the latter covers the same-guid sub-asset case where the
909/// parent's deduped top-level alias is a valid name to share).
910///
911/// `asset_type` scopes the dedup bucket — a candidate is "taken" only when
912/// another guid has claimed the exact `(name, asset_type)` pair. Two assets
913/// of different `asset_type` (e.g. Texture2D `Foo.png` vs Prefab `Foo.prefab`)
914/// share the bare alias `Foo` without contesting because the codec layer
915/// uses the field's declared C# type to pick the right one at lookup time.
916///
917/// Hard-fails when no parent segment yields a free candidate — ambiguity
918/// surfaces at bake time rather than getting papered over with a guid suffix.
919/// See [Name collisions](docs/asset-database.md#name-collisions) for the
920/// `^` separator rationale.
921fn disambiguate(
922 stem: &str,
923 hint: &str,
924 owner_guid: u128,
925 asset_type: AssetTypeRaw,
926 taken: &AHashMap<(String, AssetTypeRaw), u128>,
927) -> Result<String> {
928 let parts: Vec<&str> = Path::new(hint)
929 .parent()
930 .map(|p| p.iter().filter_map(|c| c.to_str()).collect::<Vec<_>>())
931 .unwrap_or_default();
932
933 // Walk parent segments from nearest to root, picking the shortest
934 // suffix that doesn't collide with a different-guid owner.
935 let mut suffix = String::new();
936 for seg in parts.iter().rev() {
937 if !suffix.is_empty() {
938 suffix.insert(0, '/');
939 }
940 suffix.insert_str(0, seg);
941 let candidate = format!("{stem}^{suffix}");
942 match taken.get(&(candidate.clone(), asset_type)) {
943 None => return Ok(candidate),
944 Some(&prev) if prev == owner_guid => return Ok(candidate),
945 Some(_) => continue,
946 }
947 }
948 anyhow::bail!(
949 "asset-db: cannot disambiguate name `{stem}` for guid {owner_guid:032x} \
950 (hint `{hint}`) — every parent-segment suffix is already taken by \
951 another asset. Rename one of the colliding assets in source.",
952 )
953}
954
955#[cfg(test)]
956mod tests {
957 use super::*;
958
959 #[test]
960 fn run_with_panic_safety_passes_through_ok_some() {
961 let r: Result<Option<i32>, String> = run_with_panic_safety("path", "task", || Ok(Some(42)));
962 assert_eq!(r, Ok(Some(42)));
963 }
964
965 #[test]
966 fn run_with_panic_safety_passes_through_ok_none() {
967 let r: Result<Option<i32>, String> = run_with_panic_safety("path", "task", || Ok(None));
968 assert_eq!(r, Ok(None));
969 }
970
971 #[test]
972 fn run_with_panic_safety_formats_inner_error_with_label() {
973 let r: Result<Option<i32>, String> = run_with_panic_safety("foo.meta", "task", || {
974 Err(anyhow::anyhow!("malformed yaml"))
975 });
976 assert_eq!(r, Err("foo.meta: malformed yaml".to_string()));
977 }
978
979 #[test]
980 fn run_with_panic_safety_catches_str_panic() {
981 let r: Result<Option<i32>, String> =
982 run_with_panic_safety("foo.meta", "process_one", || {
983 std::panic::panic_any("boom (&str payload)")
984 });
985 assert_eq!(
986 r,
987 Err("foo.meta: panic in process_one: boom (&str payload)".to_string())
988 );
989 }
990
991 #[test]
992 fn run_with_panic_safety_catches_string_panic() {
993 let r: Result<Option<i32>, String> =
994 run_with_panic_safety("foo.meta", "process_one", || {
995 // String payloads come from `panic!("{x}")` via the format!
996 // path — the runtime hands a String, not a &str.
997 panic!("formatted {}", "msg")
998 });
999 assert_eq!(
1000 r,
1001 Err("foo.meta: panic in process_one: formatted msg".to_string())
1002 );
1003 }
1004
1005 #[test]
1006 fn run_with_panic_safety_handles_non_string_panic_payload() {
1007 // `panic_any(42_i32)` produces a panic whose payload isn't &str
1008 // or String. The helper falls back to a sentinel message rather
1009 // than dropping the error silently.
1010 let r: Result<Option<i32>, String> =
1011 run_with_panic_safety("foo.meta", "process_one", || std::panic::panic_any(42_i32));
1012 assert_eq!(
1013 r,
1014 Err("foo.meta: panic in process_one: <non-string panic payload>".to_string())
1015 );
1016 }
1017
1018 fn meta_for(
1019 texture_type: Option<u32>,
1020 sprite_mode: Option<u32>,
1021 sprites: Vec<(i64, String)>,
1022 ) -> meta::MetaInfo {
1023 meta::MetaInfo {
1024 guid: 0,
1025 sprite_sheet: sprites,
1026 texture_type,
1027 sprite_mode,
1028 }
1029 }
1030
1031 #[test]
1032 fn synthesize_implicit_sprite_fires_on_single_mode_sprite_with_empty_sheet() {
1033 let m = meta_for(Some(TEXTURE_TYPE_SPRITE), Some(SPRITE_MODE_SINGLE), vec![]);
1034 let sub = synthesize_implicit_sprite(&m, "Icon").expect("synthesis should fire");
1035 assert_eq!(sub.file_id, ClassId::Sprite.canonical_subobject_fid());
1036 assert_eq!(&*sub.name, "Icon");
1037 }
1038
1039 #[test]
1040 fn synthesize_implicit_sprite_skips_when_sheet_non_empty() {
1041 // Explicit sprites own the sub-asset list — atlas-shaped meta
1042 // doesn't get a phantom main-Sprite layered on top.
1043 let m = meta_for(
1044 Some(TEXTURE_TYPE_SPRITE),
1045 Some(SPRITE_MODE_SINGLE),
1046 vec![(12345, "explicit_a".into())],
1047 );
1048 assert!(synthesize_implicit_sprite(&m, "Icon").is_none());
1049 }
1050
1051 #[test]
1052 fn synthesize_implicit_sprite_skips_on_multiple_mode() {
1053 // spriteMode: 2 (Multiple = atlas) means "the sprites: list is
1054 // canonical, even if currently empty". No synthesis.
1055 let m = meta_for(Some(TEXTURE_TYPE_SPRITE), Some(2), vec![]);
1056 assert!(synthesize_implicit_sprite(&m, "Icon").is_none());
1057 }
1058
1059 #[test]
1060 fn synthesize_implicit_sprite_skips_on_non_sprite_texture() {
1061 // textureType: 0 (Default) — texture isn't a Sprite at all.
1062 let m = meta_for(Some(0), Some(SPRITE_MODE_SINGLE), vec![]);
1063 assert!(synthesize_implicit_sprite(&m, "Icon").is_none());
1064 }
1065
1066 #[test]
1067 fn synthesize_implicit_sprite_skips_when_predicates_absent() {
1068 // Both texture_type and sprite_mode None — `.meta` from a
1069 // non-texture asset (or a stale .meta missing the fields).
1070 let m = meta_for(None, None, vec![]);
1071 assert!(synthesize_implicit_sprite(&m, "Icon").is_none());
1072 }
1073
1074 /// `is_filterable_subdoc_for_ext` is the single point where parse-
1075 /// time sub-asset filtering decides what's a structural prefab tree
1076 /// doc vs. a real sub-asset. Pin the contract per extension.
1077 #[test]
1078 fn is_filterable_subdoc_for_ext_branches_correctly() {
1079 // .prefab: GO + Transform + RectTransform + MonoBehaviour-as-component.
1080 for cls in [1, 4, 224, 114] {
1081 assert!(
1082 is_filterable_subdoc_for_ext(cls, "prefab"),
1083 "class {cls} should be filtered for .prefab",
1084 );
1085 }
1086 // .playable: Timeline tracks live as MB-114 — must NOT filter.
1087 // GO/Transform never appear in .playable but the predicate stays
1088 // valid (no-op).
1089 assert!(!is_filterable_subdoc_for_ext(114, "playable"));
1090 assert!(is_filterable_subdoc_for_ext(1, "playable"));
1091 // .controller: AnimatorState (1102), BlendTree (206) — never
1092 // filtered.
1093 assert!(!is_filterable_subdoc_for_ext(1102, "controller"));
1094 assert!(!is_filterable_subdoc_for_ext(114, "controller"));
1095 // .mixer: AudioMixerGroup (273) — never filtered.
1096 assert!(!is_filterable_subdoc_for_ext(273, "mixer"));
1097 assert!(!is_filterable_subdoc_for_ext(114, "mixer"));
1098 // .asset / .spriteatlas: MB-114 are real ScriptableObject sub-
1099 // assets. Real classes (Sprite=213) are never filtered either.
1100 assert!(!is_filterable_subdoc_for_ext(114, "asset"));
1101 assert!(!is_filterable_subdoc_for_ext(213, "spriteatlas"));
1102 }
1103
1104 #[test]
1105 fn stem_basic() {
1106 assert_eq!(filename_stem(Path::new("foo/Bar.prefab")), "Bar");
1107 assert_eq!(filename_stem_from_hint("foo/Bar.prefab"), "Bar");
1108 }
1109
1110 #[test]
1111 fn disambiguate_walks_parents() {
1112 let t = AssetTypeRaw::Native(ClassId::Texture2D as u32);
1113 let mut taken = AHashMap::new();
1114 taken.insert(("Foo".to_string(), t), 1u128);
1115 // Nearest parent suffix wins on first try.
1116 let alias = disambiguate("Foo", "pkg/Editor/Foo.cs", 2, t, &taken).unwrap();
1117 assert_eq!(alias, "Foo^Editor");
1118
1119 // First-level parent already taken (by a different guid, same type)
1120 // → falls back to deeper path.
1121 taken.insert(("Foo^Editor".to_string(), t), 3);
1122 let alias = disambiguate("Foo", "pkg/Editor/Foo.cs", 2, t, &taken).unwrap();
1123 assert_eq!(alias, "Foo^pkg/Editor");
1124 }
1125
1126 #[test]
1127 fn disambiguate_ignores_collisions_in_other_types() {
1128 // A different `AssetTypeRaw` claiming the same alias does NOT
1129 // contest — type-aware dedup gives each `(name, type)` its own
1130 // bucket. PNG (Texture2D) and prefab (Prefab) named `Foo` both
1131 // keep bare `Foo`.
1132 let png = AssetTypeRaw::Native(ClassId::Texture2D as u32);
1133 let prefab = AssetTypeRaw::Native(ClassId::Prefab as u32);
1134 let mut taken = AHashMap::new();
1135 taken.insert(("Foo".to_string(), png), 1u128);
1136 // disambiguate against the prefab bucket — `Foo` is free here.
1137 let alias = disambiguate("Foo", "Assets/Bar/Foo.prefab", 2, prefab, &taken).unwrap();
1138 // Walk produces `Foo^Bar` because we always step at least one
1139 // parent (disambiguate's contract is "produce a suffixed form");
1140 // the contention check upstream is what decides whether to call.
1141 assert_eq!(alias, "Foo^Bar");
1142 }
1143
1144 #[test]
1145 fn disambiguate_returns_existing_when_same_owner() {
1146 // When the candidate suffix is already mapped to `owner_guid`, the
1147 // sub-asset can safely share that alias — its lookup path resolves
1148 // back to the same guid, so no real ambiguity exists.
1149 let t = AssetTypeRaw::Native(ClassId::Texture2D as u32);
1150 let mut taken = AHashMap::new();
1151 taken.insert(("Cloud1".to_string(), t), 0xa0_u128);
1152 taken.insert(("Cloud1^Tower".to_string(), t), 0xb0_u128);
1153 let alias =
1154 disambiguate("Cloud1", "Assets/Tower/Cloud1.png", 0xb0_u128, t, &taken).unwrap();
1155 assert_eq!(alias, "Cloud1^Tower");
1156 }
1157
1158 #[test]
1159 fn disambiguate_hard_fails_when_no_parent_segments() {
1160 let t = AssetTypeRaw::Native(ClassId::Texture2D as u32);
1161 let mut taken = AHashMap::new();
1162 taken.insert(("Foo".to_string(), t), 1u128);
1163 // Hint has no directories — nothing to suffix with. Must error
1164 // rather than silently fall back to a guid suffix.
1165 let err =
1166 disambiguate("Foo", "Foo.cs", 2u128, t, &taken).expect_err("must hard-fail");
1167 let msg = format!("{err:#}");
1168 assert!(msg.contains("disambiguate"), "msg: {msg}");
1169 assert!(msg.contains("Foo"), "msg: {msg}");
1170 }
1171
1172 fn raw_native(hint: &str, guid: u128, sub_assets: Vec<SubAsset>) -> RawEntry {
1173 RawEntry {
1174 guid,
1175 asset_type_raw: AssetTypeRaw::Native(ClassId::Texture2D as u32),
1176 hint: hint.to_string(),
1177 // `build_db`'s first pass overwrites `name` from `hint`, so any
1178 // value here is fine. Empty kept the test minimal.
1179 name: String::new(),
1180 meta_mtime_ns: 0,
1181 asset_mtime_ns: 0,
1182 sub_assets,
1183 }
1184 }
1185
1186 /// Pin: when a name is claimed by ≥2 distinct guids of the same
1187 /// `asset_type`, every claimant must rename — no "first wins" carve-out.
1188 /// The deduped form is consistent across claimants: each entry resolves
1189 /// through `disambiguate` against its own hint.
1190 ///
1191 /// Two same-type Texture2D `Cloud1.png` files in different folders
1192 /// share the bare alias `Cloud1` until type-aware dedup forces both to
1193 /// suffix.
1194 #[test]
1195 fn build_db_renames_every_claimant_when_name_is_contested() {
1196 let png_a_guid = 0xa0_u128;
1197 let png_b_guid = 0xb0_u128;
1198 let sprite_fid: i64 = 21300000;
1199
1200 let raw = vec![
1201 raw_native("Assets/Other/Cloud1.png", png_a_guid, vec![]),
1202 raw_native(
1203 "Assets/Tower/Cloud1.png",
1204 png_b_guid,
1205 vec![SubAsset {
1206 file_id: sprite_fid,
1207 class_id: ClassId::Sprite as u32,
1208 name: "Cloud1".into(),
1209 }],
1210 ),
1211 ];
1212
1213 let db = build_db(raw, None, None, false).expect("build_db should succeed");
1214
1215 let a_entry = db.find_by_guid(png_a_guid).unwrap();
1216 let b_entry = db.find_by_guid(png_b_guid).unwrap();
1217
1218 // Neither entry keeps the bare alias — both renamed.
1219 assert_ne!(&*a_entry.name, "Cloud1");
1220 assert_ne!(&*b_entry.name, "Cloud1");
1221 assert!(
1222 a_entry.name.starts_with("Cloud1^"),
1223 "first png top-level not deduped: {}",
1224 a_entry.name,
1225 );
1226 assert!(
1227 b_entry.name.starts_with("Cloud1^"),
1228 "second png top-level not deduped: {}",
1229 b_entry.name,
1230 );
1231 // Distinct hints → distinct deduped suffixes.
1232 assert_ne!(&*a_entry.name, &*b_entry.name);
1233
1234 // Sub-asset dedup: the Sprite sub-asset's `Cloud1` lives in its own
1235 // type-bucket (Sprite, not Texture2D), so it isn't contested by the
1236 // Texture2D collision above. It stays bare. The png_b entry is the
1237 // only Sprite-bucket owner.
1238 let png_b_sub = &b_entry.sub_assets[0];
1239 assert_eq!(png_b_sub.file_id, sprite_fid);
1240 assert_eq!(
1241 &*png_b_sub.name, "Cloud1",
1242 "Sprite sub-asset should stay bare under type-aware dedup",
1243 );
1244 }
1245
1246 /// Pin type-aware dedup: a Texture2D and a Prefab sharing the stem
1247 /// `Foo` both keep the bare alias. Reverse lookup discriminates by
1248 /// the field's declared C# type at the consumer layer.
1249 #[test]
1250 fn build_db_keeps_bare_alias_for_type_distinct_collisions() {
1251 let png_guid = 0xa0_u128;
1252 let prefab_guid = 0xb0_u128;
1253 let raw = vec![
1254 RawEntry {
1255 guid: png_guid,
1256 asset_type_raw: AssetTypeRaw::Native(ClassId::Texture2D as u32),
1257 hint: "Assets/UI/Foo.png".to_string(),
1258 name: String::new(),
1259 meta_mtime_ns: 0,
1260 asset_mtime_ns: 0,
1261 sub_assets: vec![],
1262 },
1263 RawEntry {
1264 guid: prefab_guid,
1265 asset_type_raw: AssetTypeRaw::Native(ClassId::Prefab as u32),
1266 hint: "Assets/UI/Foo.prefab".to_string(),
1267 name: String::new(),
1268 meta_mtime_ns: 0,
1269 asset_mtime_ns: 0,
1270 sub_assets: vec![],
1271 },
1272 ];
1273 let db = build_db(raw, None, None, false).expect("build_db should succeed");
1274 // Both keep bare `Foo` because they live in distinct type buckets.
1275 assert_eq!(&*db.find_by_guid(png_guid).unwrap().name, "Foo");
1276 assert_eq!(&*db.find_by_guid(prefab_guid).unwrap().name, "Foo");
1277 }
1278
1279 /// Pin: AnimatorController-embedded sub-assets are excluded from the
1280 /// global dedup pool, mirroring the prefab-embedded rule. Without the
1281 /// exclusion, an embedded AnimatorState named `Idle` would contest a
1282 /// hypothetical standalone `.asset` of the same name AND same Unity
1283 /// classID (AnimatorState exists as both an embedded sub of
1284 /// `.controller` and a top-level `.asset` in Unity), forcing both to
1285 /// rename via parent-dir suffix. The exclusion keeps the embedded
1286 /// state in its parent's namespace where it's addressed via
1287 /// `$Idle@Player` at the consumer layer.
1288 #[test]
1289 fn build_db_skips_controller_embedded_subassets_in_global_pool() {
1290 const ANIMATOR_STATE_CLASS_ID: u32 = 1102;
1291 let controller_guid = 0xc0_u128;
1292 let other_state_guid = 0xd0_u128;
1293 let raw = vec![
1294 RawEntry {
1295 guid: controller_guid,
1296 asset_type_raw: AssetTypeRaw::Native(ClassId::AnimatorController as u32),
1297 hint: "Assets/Anim/Player.controller".to_string(),
1298 name: String::new(),
1299 meta_mtime_ns: 0,
1300 asset_mtime_ns: 0,
1301 sub_assets: vec![SubAsset {
1302 file_id: -123_456_789_012,
1303 class_id: ANIMATOR_STATE_CLASS_ID,
1304 name: "Idle".into(),
1305 }],
1306 },
1307 // Standalone .asset whose top class IS AnimatorState — same
1308 // (name, class_id) bucket as the embedded one. With
1309 // exclusion, only this one claims the global `Idle` alias.
1310 RawEntry {
1311 guid: other_state_guid,
1312 asset_type_raw: AssetTypeRaw::Native(ANIMATOR_STATE_CLASS_ID),
1313 hint: "Assets/Other/Idle.asset".to_string(),
1314 name: String::new(),
1315 meta_mtime_ns: 0,
1316 asset_mtime_ns: 0,
1317 sub_assets: vec![],
1318 },
1319 ];
1320 let db = build_db(raw, None, None, false).expect("build_db should succeed");
1321 // Standalone keeps bare `Idle`.
1322 assert_eq!(&*db.find_by_guid(other_state_guid).unwrap().name, "Idle");
1323 // Embedded state stays as authored in the parent's namespace.
1324 let ctrl_entry = db.find_by_guid(controller_guid).unwrap();
1325 assert_eq!(&*ctrl_entry.sub_assets[0].name, "Idle");
1326 }
1327
1328 /// Same shape as the controller test, for AudioMixerController:
1329 /// AudioMixerGroup sub-asset class collides with itself between an
1330 /// embedded `Main.mixer` group and a hypothetical standalone
1331 /// `.asset` of the same class. Exclusion keeps the embed in the
1332 /// parent's namespace.
1333 #[test]
1334 fn build_db_skips_mixer_embedded_subassets_in_global_pool() {
1335 const AUDIO_MIXER_GROUP_CLASS_ID: u32 = 273;
1336 let mixer_guid = 0xe0_u128;
1337 let other_group_guid = 0xf0_u128;
1338 let raw = vec![
1339 RawEntry {
1340 guid: mixer_guid,
1341 asset_type_raw: AssetTypeRaw::Native(ClassId::AudioMixerController as u32),
1342 hint: "Assets/Audio/Main.mixer".to_string(),
1343 name: String::new(),
1344 meta_mtime_ns: 0,
1345 asset_mtime_ns: 0,
1346 sub_assets: vec![SubAsset {
1347 file_id: 9_001,
1348 class_id: AUDIO_MIXER_GROUP_CLASS_ID,
1349 name: "Master".into(),
1350 }],
1351 },
1352 RawEntry {
1353 guid: other_group_guid,
1354 asset_type_raw: AssetTypeRaw::Native(AUDIO_MIXER_GROUP_CLASS_ID),
1355 hint: "Assets/Other/Master.asset".to_string(),
1356 name: String::new(),
1357 meta_mtime_ns: 0,
1358 asset_mtime_ns: 0,
1359 sub_assets: vec![],
1360 },
1361 ];
1362 let db = build_db(raw, None, None, false).expect("build_db should succeed");
1363 assert_eq!(&*db.find_by_guid(other_group_guid).unwrap().name, "Master");
1364 let mixer_entry = db.find_by_guid(mixer_guid).unwrap();
1365 assert_eq!(&*mixer_entry.sub_assets[0].name, "Master");
1366 }
1367
1368 /// Pin: `.playable` files are treated as embedded containers — their
1369 /// Timeline track sub-assets bypass the global dedup pool. Many
1370 /// `.playable` files in a project share Unity-default track names like
1371 /// `Animation Track (2)`; without the exclusion they contest in the
1372 /// global pool and `disambiguate` hard-fails when the shared
1373 /// parent-dir suffixes are exhausted. Exclusion is keyed on the
1374 /// `.playable` extension (`is_embedded_container`) because the
1375 /// top-doc script guid of a playable is whichever sub-doc Unity
1376 /// sorts first by hashed fileID — unstable as a discriminator.
1377 #[test]
1378 fn build_db_skips_playable_embedded_tracks_in_global_pool() {
1379 // Track class id + script guid placeholders — bake stores both
1380 // but doesn't validate them against any registry. Extension is
1381 // the discriminator that triggers the exclusion.
1382 const ANIMATION_TRACK_CLASS_ID: u32 = 5004;
1383 let some_script_guid = 0xd21dcc2386d650c4597f3633c75a1f98_u128;
1384 let pa_guid = 0xa0_u128;
1385 let pb_guid = 0xb0_u128;
1386 let raw = vec![
1387 RawEntry {
1388 guid: pa_guid,
1389 asset_type_raw: AssetTypeRaw::Script(some_script_guid),
1390 hint: "Assets/Anim/PlayableA.playable".to_string(),
1391 name: String::new(),
1392 meta_mtime_ns: 0,
1393 asset_mtime_ns: 0,
1394 sub_assets: vec![SubAsset {
1395 file_id: -123_456_789,
1396 class_id: ANIMATION_TRACK_CLASS_ID,
1397 name: "Animation Track (2)".into(),
1398 }],
1399 },
1400 RawEntry {
1401 guid: pb_guid,
1402 asset_type_raw: AssetTypeRaw::Script(some_script_guid),
1403 hint: "Assets/Anim/PlayableB.playable".to_string(),
1404 name: String::new(),
1405 meta_mtime_ns: 0,
1406 asset_mtime_ns: 0,
1407 sub_assets: vec![SubAsset {
1408 file_id: -987_654_321,
1409 class_id: ANIMATION_TRACK_CLASS_ID,
1410 name: "Animation Track (2)".into(),
1411 }],
1412 },
1413 ];
1414 let db = build_db(raw, None, None, false).expect("build_db should succeed");
1415 // Both playables keep their embedded track names as authored —
1416 // sub-assets live in the parent's namespace, not the global pool.
1417 assert_eq!(
1418 &*db.find_by_guid(pa_guid).unwrap().sub_assets[0].name,
1419 "Animation Track (2)"
1420 );
1421 assert_eq!(
1422 &*db.find_by_guid(pb_guid).unwrap().sub_assets[0].name,
1423 "Animation Track (2)"
1424 );
1425 }
1426
1427 /// Pin: prefab-embedded sub-assets are excluded from the global dedup
1428 /// pool. Their names stay as authored even when another asset in the
1429 /// project shares the name. They resolve via `$Sub@Parent` at the
1430 /// consumer layer, not the global alias bucket.
1431 #[test]
1432 fn build_db_skips_prefab_embedded_subassets_in_global_pool() {
1433 let prefab_guid = 0xa0_u128;
1434 let other_clip_guid = 0xb0_u128;
1435 let raw = vec![
1436 RawEntry {
1437 guid: prefab_guid,
1438 asset_type_raw: AssetTypeRaw::Native(ClassId::Prefab as u32),
1439 hint: "Assets/UI/PatternBG.prefab".to_string(),
1440 name: String::new(),
1441 meta_mtime_ns: 0,
1442 asset_mtime_ns: 0,
1443 sub_assets: vec![SubAsset {
1444 file_id: -4_468_419_427_481_386_445,
1445 class_id: ClassId::AnimationClip as u32,
1446 name: "Animation".into(),
1447 }],
1448 },
1449 RawEntry {
1450 guid: other_clip_guid,
1451 asset_type_raw: AssetTypeRaw::Native(ClassId::AnimationClip as u32),
1452 hint: "Assets/Other/Animation.anim".to_string(),
1453 name: String::new(),
1454 meta_mtime_ns: 0,
1455 asset_mtime_ns: 0,
1456 sub_assets: vec![],
1457 },
1458 ];
1459 let db = build_db(raw, None, None, false).expect("build_db should succeed");
1460 // Standalone .anim keeps bare `Animation` — the prefab-embedded
1461 // `Animation` doesn't claim the global alias.
1462 assert_eq!(
1463 &*db.find_by_guid(other_clip_guid).unwrap().name,
1464 "Animation"
1465 );
1466 // Prefab-embedded sub-asset keeps its raw name (lives in parent's
1467 // namespace; `$Animation@PatternBG` at the consumer layer).
1468 let prefab_entry = db.find_by_guid(prefab_guid).unwrap();
1469 assert_eq!(&*prefab_entry.sub_assets[0].name, "Animation");
1470 }
1471
1472 /// Pin: a single-owner name (one guid only, even if it appears as both
1473 /// a top-level alias and one of its own sub-assets) is *not*
1474 /// contested — it stays bare. Guards against over-renaming the common
1475 /// case of a Texture2D and its lone same-named Sprite sub-asset.
1476 #[test]
1477 fn build_db_keeps_bare_alias_when_name_is_uncontested() {
1478 let png_guid = 0xb0_u128;
1479 let raw = vec![raw_native(
1480 "Assets/Tower/Lone.png",
1481 png_guid,
1482 vec![SubAsset {
1483 file_id: 21300000,
1484 class_id: ClassId::Sprite as u32,
1485 name: "Lone".into(),
1486 }],
1487 )];
1488
1489 let db = build_db(raw, None, None, false).expect("build_db should succeed");
1490 let entry = db.find_by_guid(png_guid).unwrap();
1491 assert_eq!(&*entry.name, "Lone");
1492 assert_eq!(&*entry.sub_assets[0].name, "Lone");
1493 }
1494
1495 /// Pin: when a top-level alias is genuinely unresolvable (no parent
1496 /// segments left to walk and the bare stem is already taken), the
1497 /// bake hard-fails rather than silently falling back to a `^<guid8>`
1498 /// suffix. Per the project policy: ambiguity surfaces at bake time,
1499 /// not encode time.
1500 #[test]
1501 fn build_db_fails_when_dedup_cannot_resolve() {
1502 let raw = vec![
1503 // Two top-level entries with the same bare stem and no parent
1504 // segments to walk — `disambiguate` has nothing to suffix with.
1505 raw_native("Foo.asset", 0x01_u128, vec![]),
1506 raw_native("Foo.prefab", 0x02_u128, vec![]),
1507 ];
1508
1509 let err = build_db(raw, None, None, false).expect_err("collision with no parent dirs must hard-fail");
1510 let msg = format!("{err:#}");
1511 assert!(
1512 msg.contains("Foo") && msg.contains("disambiguate"),
1513 "error message should name the collision and the dedup pass: {msg}",
1514 );
1515 }
1516}