Skip to main content

unity_solution_generator/
project_scanner.rs

1//! Project-side filesystem scanner.
2//!
3//! Walks `Assets/` and `Packages/` to enumerate every `.cs` directory and every
4//! `.asmdef`/`.asmref` marker, then resolves directory ownership to assemblies.
5//!
6//! Hot path: uses `ignore::WalkBuilder::build_parallel()` (with all gitignore
7//! filters disabled) so traversal fans out across worker threads and shares
8//! work via crossbeam_deque. See [[CLAUDE.md]] §"Performance" for budget.
9
10use std::collections::{BTreeSet, HashMap};
11use std::path::Path;
12
13use ignore::{WalkBuilder, WalkState};
14use rayon::prelude::*;
15
16use crate::error::{GeneratorError, Result};
17use crate::io::{create_dir_all, has_matching_version, read_file, write_file_if_changed};
18use crate::paths::{join_path, parent_directory};
19
20#[derive(Debug, Clone, Copy, PartialEq, Eq)]
21pub enum ProjectCategory {
22    Runtime,
23    Editor,
24    Test,
25}
26
27#[derive(Debug, Clone)]
28pub struct VersionDefine {
29    pub package_name: String,
30    pub define: String,
31}
32
33#[derive(Debug, Clone)]
34pub struct AsmDefRecord {
35    pub name: String,
36    pub directory: String,
37    pub references: Vec<String>,
38    pub category: ProjectCategory,
39    pub include_platforms: Vec<String>,
40    pub allow_unsafe_code: bool,
41    pub version_defines: Vec<VersionDefine>,
42}
43
44impl AsmDefRecord {
45    pub fn load(root_path: &str, relative_path: &str) -> Result<Option<AsmDefRecord>> {
46        let full = join_path(root_path, relative_path);
47        let json = read_file(&full)?;
48        let Ok(v) = serde_json::from_str::<serde_json::Value>(&json) else {
49            // asmdef present but malformed — treat same as missing-name (skip).
50            return Ok(None);
51        };
52        let Some(name) = v.get("name").and_then(|x| x.as_str()).map(String::from) else {
53            return Ok(None);
54        };
55        let include_platforms = json_string_array(&v, "includePlatforms");
56        let define_constraints = json_string_array(&v, "defineConstraints");
57        Ok(Some(AsmDefRecord {
58            name,
59            directory: parent_directory(relative_path).to_string(),
60            references: json_string_array(&v, "references"),
61            category: infer_category(&include_platforms, &define_constraints),
62            include_platforms,
63            allow_unsafe_code: v
64                .get("allowUnsafeCode")
65                .and_then(|x| x.as_bool())
66                .unwrap_or(false),
67            version_defines: parse_version_defines(&v),
68        }))
69    }
70}
71
72fn json_string_array(v: &serde_json::Value, key: &str) -> Vec<String> {
73    v.get(key)
74        .and_then(|x| x.as_array())
75        .map(|arr| {
76            arr.iter()
77                .filter_map(|x| x.as_str().map(String::from))
78                .collect()
79        })
80        .unwrap_or_default()
81}
82
83pub fn parse_version_defines(v: &serde_json::Value) -> Vec<VersionDefine> {
84    let Some(arr) = v.get("versionDefines").and_then(|x| x.as_array()) else {
85        return Vec::new();
86    };
87    arr.iter()
88        .filter_map(|obj| {
89            let p = obj.get("name").and_then(|x| x.as_str())?;
90            let d = obj.get("define").and_then(|x| x.as_str())?;
91            if p.is_empty() || d.is_empty() {
92                return None;
93            }
94            Some(VersionDefine {
95                package_name: p.to_string(),
96                define: d.to_string(),
97            })
98        })
99        .collect()
100}
101
102fn infer_category(include_platforms: &[String], define_constraints: &[String]) -> ProjectCategory {
103    if define_constraints.iter().any(|s| s == "UNITY_INCLUDE_TESTS") {
104        return ProjectCategory::Test;
105    }
106    if include_platforms.len() == 1 && include_platforms[0] == "Editor" {
107        return ProjectCategory::Editor;
108    }
109    if define_constraints.iter().any(|s| s == "UNITY_EDITOR") {
110        return ProjectCategory::Editor;
111    }
112    ProjectCategory::Runtime
113}
114
115#[derive(Debug, Clone)]
116pub struct ScanResult {
117    pub asm_def_by_name: HashMap<String, AsmDefRecord>,
118    pub dirs_by_project: HashMap<String, Vec<String>>,
119    pub unresolved_dirs: Vec<String>,
120}
121
122pub struct ProjectScanner;
123
124impl ProjectScanner {
125    /// `generator_root` controls where the `scan-cache` file lives — pass
126    /// [`crate::paths::DEFAULT_GENERATOR_ROOT`] for the standard layout, or
127    /// a custom directory to keep multiple variant trees isolated (tests).
128    pub fn scan(project_root: &str, generator_root: &str) -> Result<ScanResult> {
129        let _span = tracing::info_span!("project_scanner.scan").entered();
130        let generator_dir = join_path(project_root, generator_root);
131        let cache_path = join_path(&generator_dir, "scan-cache");
132        let file_scan = {
133            let _s = tracing::info_span!("project_scanner.file_scan").entered();
134            load_cached_scan(&cache_path, project_root)
135                .unwrap_or_else(|| scan_and_cache(project_root, &cache_path))
136        };
137
138        let mut asm_def_by_name: HashMap<String, AsmDefRecord> = HashMap::new();
139        for record in file_scan.asmdef_records {
140            if asm_def_by_name.contains_key(&record.name) {
141                return Err(GeneratorError::DuplicateAsmDefName(record.name));
142            }
143            asm_def_by_name.insert(record.name.clone(), record);
144        }
145
146        let mut assembly_roots: HashMap<String, String> = HashMap::new();
147        for (name, record) in &asm_def_by_name {
148            assembly_roots.insert(record.directory.clone(), name.clone());
149        }
150        for (dir, reference) in file_scan.asmref_records {
151            if asm_def_by_name.contains_key(&reference) {
152                assembly_roots.insert(dir, reference);
153            }
154        }
155
156        let mut dirs_by_project: HashMap<String, Vec<String>> = HashMap::new();
157        let mut unresolved_dirs: Vec<String> = Vec::new();
158        for dir in &file_scan.cs_dirs {
159            if let Some(owner) = find_assembly_owner(dir, &assembly_roots) {
160                dirs_by_project.entry(owner).or_default().push(dir.clone());
161            } else if let Some(legacy) = resolve_legacy_project(dir) {
162                dirs_by_project
163                    .entry(legacy.to_string())
164                    .or_default()
165                    .push(dir.clone());
166            } else {
167                unresolved_dirs.push(dir.clone());
168            }
169        }
170
171        Ok(ScanResult {
172            asm_def_by_name,
173            dirs_by_project,
174            unresolved_dirs,
175        })
176    }
177}
178
179#[derive(Default)]
180struct ScanBucket {
181    cs_dirs: BTreeSet<String>,
182    asmdef_paths: Vec<String>,
183    asmref_paths: Vec<String>,
184}
185
186impl crate::walk::Bucket for ScanBucket {
187    fn merge_from(&mut self, other: Self) {
188        self.cs_dirs.extend(other.cs_dirs);
189        self.asmdef_paths.extend(other.asmdef_paths);
190        self.asmref_paths.extend(other.asmref_paths);
191    }
192}
193
194#[derive(Debug, Clone)]
195struct FileScan {
196    cs_dirs: Vec<String>,
197    /// Asmdef paths relative to project root. Kept on cache for invalidation
198    /// (mtime tracking) and by-path debugging; the parsed record set below is
199    /// what the rest of the pipeline consumes.
200    asmdef_paths: Vec<String>,
201    asmref_paths: Vec<String>,
202    /// Pre-parsed `AsmDefRecord`s, populated either during a cold scan (parsed
203    /// in parallel via rayon) or loaded straight from `[asmdef-records]` on a
204    /// warm cache hit. Skipping the per-asmdef file read + JSON parse on the
205    /// warm path is the whole point of this cache extension.
206    asmdef_records: Vec<AsmDefRecord>,
207    /// Pre-resolved (directory, reference) pairs from each `.asmref`. Same
208    /// reasoning as `asmdef_records`.
209    asmref_records: Vec<(String, String)>,
210}
211
212/// Scan project files using `ignore`'s parallel walker with all gitignore
213/// behaviour disabled. We emulate the Swift `processDirent` filter (skip
214/// `.foo` and `bar~` entries) via the visit closure.
215fn scan_project_files(project_root: &str, roots: &[&str]) -> FileScan {
216    use crate::walk::{Bucket, parallel_walk};
217
218    // Use `Path::strip_prefix` (component-aware) rather than byte-slicing on
219    // `path_str.len() + 1`. Byte-slicing would panic if the kernel ever handed
220    // back a path whose prefix differed from `project_root` by even one byte
221    // (trailing slash, canonicalised symlink component) AND that byte landed
222    // inside a multibyte UTF-8 char. With `panic = "abort"` such a panic would
223    // SIGKILL the process, including via FFI from Unity's Mono. Defense-in-depth.
224    let project_root_path = std::path::Path::new(project_root);
225    let mut bucket = ScanBucket::default();
226
227    for root in roots {
228        let root_dir = format!("{}/{}", project_root, root);
229        if !Path::new(&root_dir).exists() {
230            continue;
231        }
232
233        let mut builder = WalkBuilder::new(&root_dir);
234        builder
235            .standard_filters(false)
236            .hidden(false)
237            .ignore(false)
238            .git_ignore(false)
239            .git_global(false)
240            .git_exclude(false)
241            .parents(false)
242            .follow_links(false);
243
244        let from_root = parallel_walk(builder, |local: &mut ScanBucket, entry| {
245            let name = entry.file_name().to_string_lossy();
246            if name.starts_with('.') || name.ends_with('~') {
247                return WalkState::Skip;
248            }
249            let Some(ft) = entry.file_type() else {
250                return WalkState::Continue;
251            };
252            if !ft.is_file() {
253                return WalkState::Continue;
254            }
255            let Ok(rel) = entry.path().strip_prefix(project_root_path) else {
256                return WalkState::Continue;
257            };
258            // Non-UTF-8 path component — Unity asset paths are always UTF-8 on
259            // macOS APFS, so this only fires on a corrupt filesystem entry.
260            // Skip rather than panic.
261            let Some(rel_path) = rel.to_str() else {
262                return WalkState::Continue;
263            };
264            let n: &str = name.as_ref();
265            if n.ends_with(".cs") {
266                local.cs_dirs.insert(parent_directory(rel_path).to_string());
267            } else if n.ends_with(".asmdef") {
268                local.asmdef_paths.push(rel_path.to_string());
269            } else if n.ends_with(".asmref") {
270                local.asmref_paths.push(rel_path.to_string());
271            }
272            WalkState::Continue
273        });
274        bucket.merge_from(from_root);
275    }
276
277    FileScan {
278        cs_dirs: bucket.cs_dirs.into_iter().collect(),
279        asmdef_paths: bucket.asmdef_paths,
280        asmref_paths: bucket.asmref_paths,
281        // Records are populated by `scan_and_cache` after parsing the asmdef
282        // / asmref files in parallel; the walker itself only enumerates paths.
283        asmdef_records: Vec::new(),
284        asmref_records: Vec::new(),
285    }
286}
287
288fn find_assembly_owner(directory: &str, assembly_roots: &HashMap<String, String>) -> Option<String> {
289    let mut current = directory.to_string();
290    loop {
291        if let Some(name) = assembly_roots.get(&current) {
292            return Some(name.clone());
293        }
294        if current.is_empty() {
295            return None;
296        }
297        current = parent_directory(&current).to_string();
298    }
299}
300
301fn load_asm_ref(root_path: &str, relative_path: &str) -> Result<Option<(String, String)>> {
302    let json = read_file(&join_path(root_path, relative_path))?;
303    let Ok(v) = serde_json::from_str::<serde_json::Value>(&json) else {
304        return Ok(None);
305    };
306    let Some(reference) = v.get("reference").and_then(|x| x.as_str()) else {
307        return Ok(None);
308    };
309    Ok(Some((parent_directory(relative_path).to_string(), reference.to_string())))
310}
311
312fn resolve_legacy_project(directory: &str) -> Option<&'static str> {
313    let mut iter = directory.split('/');
314    let first = iter.next()?;
315    if first != "Assets" {
316        return None;
317    }
318    let mut second: Option<&str> = None;
319    let mut has_editor = false;
320    if let Some(s) = iter.next() {
321        second = Some(s);
322        if s == "Editor" {
323            has_editor = true;
324        }
325    }
326    for c in iter {
327        if c == "Editor" {
328            has_editor = true;
329        }
330    }
331    let is_first_pass = matches!(
332        second,
333        Some("Plugins") | Some("Standard Assets") | Some("Pro Standard Assets")
334    );
335    Some(match (has_editor, is_first_pass) {
336        (true, true) => "Assembly-CSharp-Editor-firstpass",
337        (true, false) => "Assembly-CSharp-Editor",
338        (false, true) => "Assembly-CSharp-firstpass",
339        (false, false) => "Assembly-CSharp",
340    })
341}
342
343// ── scan cache ────────────────────────────────────────────────────────────
344
345/// Routed through the workspace-level `CACHE_VERSION` so the scan-cache,
346/// lock-fingerprint, and generate-fingerprint all bump together. See
347/// [[architecture.md]] (Versioning section).
348const SCAN_CACHE_VERSION: u32 = crate::CACHE_VERSION;
349
350fn load_cached_scan(cache_path: &str, root_path: &str) -> Option<FileScan> {
351    let _s = tracing::info_span!("scan_cache.validate").entered();
352    let content = read_file(cache_path).ok()?;
353    if !has_matching_version(&content, SCAN_CACHE_VERSION) {
354        return None;
355    }
356
357    enum Sec {
358        Cs,
359        Asmdef,
360        Asmref,
361        Mtimes,
362        AsmdefRecords,
363        AsmrefRecords,
364    }
365    let mut section: Option<Sec> = None;
366    let mut cs_dirs = Vec::new();
367    let mut asmdef_paths = Vec::new();
368    let mut asmref_paths = Vec::new();
369    let mut mtimes: Vec<(String, u128)> = Vec::new();
370    let mut asmdef_records: Vec<AsmDefRecord> = Vec::new();
371    let mut asmref_records: Vec<(String, String)> = Vec::new();
372
373    for line in content.split('\n') {
374        if line.is_empty() || line.starts_with('#') {
375            continue;
376        }
377        match line {
378            "[cs]" => {
379                section = Some(Sec::Cs);
380                continue;
381            }
382            "[asmdef]" => {
383                section = Some(Sec::Asmdef);
384                continue;
385            }
386            "[asmref]" => {
387                section = Some(Sec::Asmref);
388                continue;
389            }
390            "[mtimes]" => {
391                section = Some(Sec::Mtimes);
392                continue;
393            }
394            "[asmdef-records]" => {
395                section = Some(Sec::AsmdefRecords);
396                continue;
397            }
398            "[asmref-records]" => {
399                section = Some(Sec::AsmrefRecords);
400                continue;
401            }
402            _ => {}
403        }
404        match section {
405            Some(Sec::Cs) => cs_dirs.push(line.to_string()),
406            Some(Sec::Asmdef) => asmdef_paths.push(line.to_string()),
407            Some(Sec::Asmref) => asmref_paths.push(line.to_string()),
408            Some(Sec::Mtimes) => {
409                if let Some(pipe) = line.find('|') {
410                    if let Ok(m) = line[pipe + 1..].parse::<u128>() {
411                        mtimes.push((line[..pipe].to_string(), m));
412                    }
413                }
414            }
415            Some(Sec::AsmdefRecords) => {
416                if let Some(rec) = decode_asmdef_record(line) {
417                    asmdef_records.push(rec);
418                }
419            }
420            Some(Sec::AsmrefRecords) => {
421                if let Some(pair) = decode_asmref_record(line) {
422                    asmref_records.push(pair);
423                }
424            }
425            None => {}
426        }
427    }
428
429    if mtimes.is_empty() {
430        return None;
431    }
432
433    // Older caches (or caches written by an earlier version of usg) won't have
434    // [asmdef-records]; refusing them forces a fresh scan that re-populates.
435    if !asmdef_paths.is_empty() && asmdef_records.is_empty() {
436        return None;
437    }
438
439    for (rel, cached) in &mtimes {
440        let full = if rel.is_empty() {
441            root_path.to_string()
442        } else {
443            join_path(root_path, rel)
444        };
445        let m = std::fs::metadata(&full).ok()?;
446        let mtime_ns = mtime_nanos(&m)?;
447        if mtime_ns != *cached {
448            return None;
449        }
450    }
451
452    Some(FileScan {
453        cs_dirs,
454        asmdef_paths,
455        asmref_paths,
456        asmdef_records,
457        asmref_records,
458    })
459}
460
461fn scan_and_cache(root_path: &str, cache_path: &str) -> FileScan {
462    let _s = tracing::info_span!("scan_cache.full_walk").entered();
463    let walk = scan_project_files(root_path, &["Assets", "Packages"]);
464
465    // Parse asmdefs + asmrefs in parallel. Most projects have <100 of each;
466    // the parse itself is microseconds, but parallelising amortises the
467    // per-file `read` syscall across cores.
468    let asmdef_records: Vec<AsmDefRecord> = walk
469        .asmdef_paths
470        .par_iter()
471        .filter_map(|p| AsmDefRecord::load(root_path, p).ok().flatten())
472        .collect();
473    let asmref_records: Vec<(String, String)> = walk
474        .asmref_paths
475        .par_iter()
476        .filter_map(|p| load_asm_ref(root_path, p).ok().flatten())
477        .collect();
478
479    // Track every directory that contributed work plus the asmdef/asmref files
480    // themselves. Tracking the files (not just their parent dirs) catches
481    // in-place edits — a parent dir's mtime doesn't bump when a file inside it
482    // is rewritten in place, so without these we'd miss e.g. an asmdef name
483    // change. The price is `len(asmdefs + asmrefs) × stat()` on warm validation.
484    let mut all_paths: BTreeSet<String> = BTreeSet::new();
485    all_paths.insert("Assets".to_string());
486    all_paths.insert("Packages".to_string());
487    let mut add_with_ancestors = |dir: &str| {
488        let mut cur = dir.to_string();
489        while !cur.is_empty() && all_paths.insert(cur.clone()) {
490            cur = parent_directory(&cur).to_string();
491        }
492    };
493    for d in &walk.cs_dirs {
494        add_with_ancestors(d);
495    }
496    for p in &walk.asmdef_paths {
497        add_with_ancestors(parent_directory(p));
498    }
499    for p in &walk.asmref_paths {
500        add_with_ancestors(parent_directory(p));
501    }
502    for p in &walk.asmdef_paths {
503        all_paths.insert(p.clone());
504    }
505    for p in &walk.asmref_paths {
506        all_paths.insert(p.clone());
507    }
508
509    let mut s = String::from("# scan-cache — auto-generated, do not edit\n");
510    s.push_str(&format!("# version: {}\n", SCAN_CACHE_VERSION));
511    s.push_str("[cs]\n");
512    for d in &walk.cs_dirs {
513        s.push_str(d);
514        s.push('\n');
515    }
516    s.push_str("[asmdef]\n");
517    for p in &walk.asmdef_paths {
518        s.push_str(p);
519        s.push('\n');
520    }
521    s.push_str("[asmref]\n");
522    for p in &walk.asmref_paths {
523        s.push_str(p);
524        s.push('\n');
525    }
526    s.push_str("[asmdef-records]\n");
527    for r in &asmdef_records {
528        encode_asmdef_record(&mut s, r);
529        s.push('\n');
530    }
531    s.push_str("[asmref-records]\n");
532    for (dir, reference) in &asmref_records {
533        s.push_str(dir);
534        s.push('\t');
535        s.push_str(reference);
536        s.push('\n');
537    }
538    s.push_str("[mtimes]\n");
539    for p in &all_paths {
540        let full = if p.is_empty() {
541            root_path.to_string()
542        } else {
543            join_path(root_path, p)
544        };
545        if let Ok(m) = std::fs::metadata(&full) {
546            if let Some(ns) = mtime_nanos(&m) {
547                s.push_str(p);
548                s.push('|');
549                s.push_str(&ns.to_string());
550                s.push('\n');
551            }
552        }
553    }
554
555    create_dir_all(parent_directory(cache_path));
556    let _ = write_file_if_changed(cache_path, &s);
557
558    FileScan {
559        cs_dirs: walk.cs_dirs,
560        asmdef_paths: walk.asmdef_paths,
561        asmref_paths: walk.asmref_paths,
562        asmdef_records,
563        asmref_records,
564    }
565}
566
567// ── asmdef record (de)serialization ──────────────────────────────────────
568//
569// Single line per record, tab-delimited. Columns:
570//   0: name              (Unity asmdef names — alphanumeric + dots)
571//   1: directory         (forward-slash relative path)
572//   2: category          ("R" = Runtime, "E" = Editor, "T" = Test)
573//   3: allow_unsafe      ("0" or "1")
574//   4: references        (semicolon-separated assembly names; may be empty)
575//   5: include_platforms (semicolon-separated; may be empty)
576//   6: version_defines   (`pkg|def` pairs, comma-separated; may be empty)
577//
578// Tab and newline can't appear in any of those fields (Unity rejects them in
579// asmdef names; paths are forward-slash; defines are uppercase identifiers),
580// so no escaping is needed.
581
582fn encode_asmdef_record(out: &mut String, r: &AsmDefRecord) {
583    out.push_str(&r.name);
584    out.push('\t');
585    out.push_str(&r.directory);
586    out.push('\t');
587    out.push(match r.category {
588        ProjectCategory::Runtime => 'R',
589        ProjectCategory::Editor => 'E',
590        ProjectCategory::Test => 'T',
591    });
592    out.push('\t');
593    out.push(if r.allow_unsafe_code { '1' } else { '0' });
594    out.push('\t');
595    out.push_str(&r.references.join(";"));
596    out.push('\t');
597    out.push_str(&r.include_platforms.join(";"));
598    out.push('\t');
599    for (i, vd) in r.version_defines.iter().enumerate() {
600        if i > 0 {
601            out.push(',');
602        }
603        out.push_str(&vd.package_name);
604        out.push('|');
605        out.push_str(&vd.define);
606    }
607}
608
609fn decode_asmdef_record(line: &str) -> Option<AsmDefRecord> {
610    let mut parts = line.split('\t');
611    let name = parts.next()?.to_string();
612    let directory = parts.next()?.to_string();
613    let category = match parts.next()? {
614        "R" => ProjectCategory::Runtime,
615        "E" => ProjectCategory::Editor,
616        "T" => ProjectCategory::Test,
617        _ => return None,
618    };
619    let allow_unsafe_code = matches!(parts.next()?, "1");
620    let references = split_semi(parts.next()?);
621    let include_platforms = split_semi(parts.next()?);
622    let version_defines = parts
623        .next()
624        .map(|s| {
625            if s.is_empty() {
626                Vec::new()
627            } else {
628                s.split(',')
629                    .filter_map(|pair| {
630                        let (pkg, def) = pair.split_once('|')?;
631                        Some(VersionDefine {
632                            package_name: pkg.to_string(),
633                            define: def.to_string(),
634                        })
635                    })
636                    .collect()
637            }
638        })
639        .unwrap_or_default();
640    Some(AsmDefRecord {
641        name,
642        directory,
643        references,
644        category,
645        include_platforms,
646        allow_unsafe_code,
647        version_defines,
648    })
649}
650
651fn decode_asmref_record(line: &str) -> Option<(String, String)> {
652    let (dir, reference) = line.split_once('\t')?;
653    Some((dir.to_string(), reference.to_string()))
654}
655
656fn split_semi(s: &str) -> Vec<String> {
657    if s.is_empty() {
658        Vec::new()
659    } else {
660        s.split(';').map(str::to_string).collect()
661    }
662}
663
664#[cfg(unix)]
665fn mtime_nanos(m: &std::fs::Metadata) -> Option<u128> {
666    use std::os::unix::fs::MetadataExt;
667    let secs: i64 = m.mtime();
668    let nanos: i64 = m.mtime_nsec();
669    if secs < 0 {
670        return None;
671    }
672    Some((secs as u128) * 1_000_000_000 + (nanos as u128))
673}
674
675#[cfg(not(unix))]
676fn mtime_nanos(m: &std::fs::Metadata) -> Option<u128> {
677    let mt = m.modified().ok()?;
678    let d = mt.duration_since(std::time::SystemTime::UNIX_EPOCH).ok()?;
679    Some(d.as_nanos())
680}
681