Skip to main content

srcwalk/
overview.rs

1//! Project fingerprint for MCP initialization.
2//! Gives agents instant orientation without a tool call.
3
4use std::collections::HashMap;
5use std::fmt::Write;
6use std::fs;
7use std::path::Path;
8use std::process::Command;
9use std::time::{Duration, Instant};
10
11use crate::lang::detect_file_type;
12use crate::read::imports::is_import_line;
13use crate::search::io::SKIP_DIRS;
14use crate::types::{FileType, Lang};
15
16/// Compute a project fingerprint for MCP initialization.
17/// Must be fast (<250ms) — runs synchronously in the initialize handler.
18/// Returns empty string on any failure (no error propagation).
19#[must_use]
20pub fn fingerprint(root: &Path) -> String {
21    let start = Instant::now();
22    let result = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| fingerprint_inner(root)));
23    let elapsed = start.elapsed();
24    if elapsed.as_millis() > 250 {
25        eprintln!(
26            "[srcwalk] fingerprint took {}ms (>250ms budget)",
27            elapsed.as_millis()
28        );
29    }
30    result.unwrap_or_default()
31}
32
33fn fingerprint_inner(root: &Path) -> String {
34    let mut lines: Vec<String> = Vec::new();
35
36    // Walk files (depth 2) — collect language counts, modules, entry points
37    let walk = walk_files(root);
38
39    // Determine primary language
40    let primary_lang = walk
41        .lang_counts
42        .iter()
43        .max_by_key(|(_, count)| *count)
44        .map(|(lang, _)| *lang);
45
46    let lang_name = primary_lang.map_or("Unknown", lang_display_name);
47    let total_files = primary_lang
48        .and_then(|l| walk.lang_counts.get(&l))
49        .copied()
50        .unwrap_or_else(|| walk.lang_counts.values().sum::<usize>());
51
52    // Modules: dirs with >=2 files of the primary language, with common prefix stripped.
53    // Keys in module_lang_counts may be "dir" or "dir/subdir" (for deeply nested projects).
54    let modules: Vec<String> = {
55        // Collect dirs with >=2 primary-language files, sorted by file count descending
56        let mut mods: Vec<(String, usize)> = walk
57            .module_lang_counts
58            .iter()
59            .filter_map(|(name, lang_map)| {
60                let count = primary_lang
61                    .and_then(|l| lang_map.get(&l))
62                    .copied()
63                    .unwrap_or(0);
64                if count >= 2 {
65                    Some((name.clone(), count))
66                } else {
67                    None
68                }
69            })
70            .collect();
71        mods.sort_by_key(|b| std::cmp::Reverse(b.1)); // most files first
72
73        // If all modules (or at least most) share a common top-level prefix
74        // (e.g., all are "src/..."), strip it so we display short names
75        // ("diff/" not "src/diff/"). Also exclude the bare prefix entry itself.
76        if mods.len() >= 2 {
77            let prefix = common_dir_prefix(&mods);
78            if !prefix.is_empty() {
79                // The prefix without trailing slash (e.g., "src")
80                let prefix_bare = prefix.trim_end_matches('/');
81                mods = mods
82                    .into_iter()
83                    .filter_map(|(name, count)| {
84                        if name == prefix_bare {
85                            // Drop the bare prefix itself (it's the container, not a module)
86                            None
87                        } else if let Some(stripped) = name.strip_prefix(&prefix) {
88                            let s = stripped.trim_start_matches('/');
89                            if s.is_empty() {
90                                None
91                            } else {
92                                Some((s.to_string(), count))
93                            }
94                        } else {
95                            Some((name, count))
96                        }
97                    })
98                    .collect();
99            }
100        }
101        // Filter out well-known non-source directories
102        let non_source = [
103            "test",
104            "tests",
105            "__tests__",
106            "spec",
107            "specs",
108            "doc",
109            "docs",
110            "docs_src",
111            "documentation",
112            "example",
113            "examples",
114            "sample",
115            "samples",
116            "script",
117            "scripts",
118            "tools",
119            "fixtures",
120            "benchmark",
121            "benchmarks",
122            "bench",
123            ".github",
124            ".vscode",
125            ".idea",
126            "vendor",
127            "node_modules",
128            "target",
129            "dist",
130            "build",
131        ];
132        mods.retain(|(name, _)| {
133            let lower = name.to_lowercase();
134            // Check if ANY path component is a non-source dir
135            !lower.split('/').any(|part| non_source.contains(&part))
136        });
137        // Sort by file count descending, truncate to 10, extract names
138        mods.sort_by_key(|b| std::cmp::Reverse(b.1));
139        mods.truncate(10);
140        mods.into_iter().map(|(name, _)| name).collect()
141    };
142
143    // Header line
144    let dir_count = modules.len();
145    lines.push(format!(
146        "[srcwalk] {lang_name} project — {total_files} source files, {dir_count} directories"
147    ));
148
149    // Directories (cap at 10, sorted by file count descending)
150    if !modules.is_empty() {
151        let mut dirs = modules;
152        dirs.truncate(10);
153        let display: Vec<String> = dirs.iter().map(|m| format!("{m}/")).collect();
154        lines.push(format!("  dirs: {}", display.join(" ")));
155    }
156
157    // Manifest — name, version, deps
158    if let Some(manifest) = find_manifest(root) {
159        if let Some(info) = parse_manifest(root, &manifest) {
160            // Deps line
161            if !info.deps.is_empty() {
162                let dep_str = info.deps.join(", ");
163                lines.push(format!("  deps: {dep_str}"));
164            }
165
166            // Hot files (only for projects with local imports)
167            if let Some(hot) = hot_files(root, &walk, primary_lang) {
168                lines.push(format!("  hot: {hot}"));
169            }
170
171            // Git context
172            if let Some(git) = git_context(root) {
173                lines.push(format!("  git: {git}"));
174            }
175
176            // Test style
177            if let Some(tests) = test_style(root, &walk, primary_lang) {
178                lines.push(format!("  tests: {tests}"));
179            }
180
181            // Manifest line
182            let mut manifest_line = format!("  manifest: {manifest}");
183            if let Some(name) = &info.name {
184                write!(manifest_line, " ({name}").unwrap();
185                if let Some(version) = &info.version {
186                    write!(manifest_line, " v{version}").unwrap();
187                }
188                manifest_line.push(')');
189            }
190            lines.push(manifest_line);
191        }
192    } else {
193        // No manifest — still show hot, git, tests
194        if let Some(hot) = hot_files(root, &walk, primary_lang) {
195            lines.push(format!("  hot: {hot}"));
196        }
197        if let Some(git) = git_context(root) {
198            lines.push(format!("  git: {git}"));
199        }
200        if let Some(tests) = test_style(root, &walk, primary_lang) {
201            lines.push(format!("  tests: {tests}"));
202        }
203    }
204
205    lines.join("\n")
206}
207
208// ---------------------------------------------------------------------------
209// Common dir prefix helper
210// ---------------------------------------------------------------------------
211
212/// If all module names (which may be "a/b" style) share the same first path
213/// component, return that component followed by "/". Otherwise return "".
214fn common_dir_prefix(mods: &[(String, usize)]) -> String {
215    if mods.is_empty() {
216        return String::new();
217    }
218    // Extract the first path component from each name
219    let first_components: Vec<&str> = mods
220        .iter()
221        .map(|(n, _)| n.split('/').next().unwrap_or(n))
222        .collect();
223    let first = first_components[0];
224    if first_components.iter().all(|c| *c == first) && mods.iter().any(|(n, _)| n.contains('/')) {
225        // All share the same first component and at least some have a subdir
226        format!("{first}/")
227    } else {
228        String::new()
229    }
230}
231
232// ---------------------------------------------------------------------------
233// Language display
234// ---------------------------------------------------------------------------
235
236fn lang_display_name(lang: Lang) -> &'static str {
237    match lang {
238        Lang::Rust => "Rust",
239        Lang::TypeScript => "TypeScript",
240        Lang::Tsx => "TSX",
241        Lang::JavaScript => "JavaScript",
242        Lang::Python => "Python",
243        Lang::Go => "Go",
244        Lang::Java => "Java",
245        Lang::Scala => "Scala",
246        Lang::C => "C",
247        Lang::Cpp => "C++",
248        Lang::Ruby => "Ruby",
249        Lang::Php => "PHP",
250        Lang::Swift => "Swift",
251        Lang::Kotlin => "Kotlin",
252        Lang::CSharp => "C#",
253        Lang::Elixir => "Elixir",
254        Lang::Dockerfile => "Docker",
255        Lang::Make => "Make",
256    }
257}
258
259// ---------------------------------------------------------------------------
260// File walk (depth 2)
261// ---------------------------------------------------------------------------
262
263struct WalkResult {
264    lang_counts: HashMap<Lang, usize>,
265    /// Top-level dirs → per-language file counts
266    module_lang_counts: HashMap<String, HashMap<Lang, usize>>,
267    /// Code files found: (path relative to root, size in bytes)
268    code_files: Vec<(String, u64)>,
269    /// Whether specific test dirs exist
270    has_tests_dir: bool,
271    has_test_dir: bool,
272    has_dunder_tests: bool,
273    has_spec_dir: bool,
274}
275
276fn walk_files(root: &Path) -> WalkResult {
277    let mut lang_counts: HashMap<Lang, usize> = HashMap::new();
278    let mut module_lang_counts: HashMap<String, HashMap<Lang, usize>> = HashMap::new();
279    let mut code_files: Vec<(String, u64)> = Vec::new();
280    let mut has_tests_dir = false;
281    let mut has_test_dir = false;
282    let mut has_dunder_tests = false;
283    let mut has_spec_dir = false;
284
285    // Walk depth 0 (root itself)
286    walk_dir(
287        root,
288        root,
289        0,
290        2,
291        &mut lang_counts,
292        &mut module_lang_counts,
293        &mut code_files,
294        &mut has_tests_dir,
295        &mut has_test_dir,
296        &mut has_dunder_tests,
297        &mut has_spec_dir,
298    );
299
300    WalkResult {
301        lang_counts,
302        module_lang_counts,
303        code_files,
304        has_tests_dir,
305        has_test_dir,
306        has_dunder_tests,
307        has_spec_dir,
308    }
309}
310
311#[allow(clippy::too_many_arguments)]
312fn walk_dir(
313    dir: &Path,
314    root: &Path,
315    depth: usize,
316    max_depth: usize,
317    lang_counts: &mut HashMap<Lang, usize>,
318    module_lang_counts: &mut HashMap<String, HashMap<Lang, usize>>,
319    code_files: &mut Vec<(String, u64)>,
320    has_tests_dir: &mut bool,
321    has_test_dir: &mut bool,
322    has_dunder_tests: &mut bool,
323    has_spec_dir: &mut bool,
324) {
325    let Ok(entries) = fs::read_dir(dir) else {
326        return;
327    };
328
329    for entry in entries.flatten() {
330        let path = entry.path();
331        let Some(name) = path.file_name().and_then(|n| n.to_str()) else {
332            continue;
333        };
334
335        let Ok(ft) = entry.file_type() else {
336            continue;
337        };
338
339        if ft.is_dir() {
340            if SKIP_DIRS.contains(&name) {
341                continue;
342            }
343
344            // Track test directories at any depth
345            match name {
346                "tests" => *has_tests_dir = true,
347                "test" => *has_test_dir = true,
348                "__tests__" => *has_dunder_tests = true,
349                "spec" => *has_spec_dir = true,
350                _ => {}
351            }
352
353            if depth < max_depth {
354                walk_dir(
355                    &path,
356                    root,
357                    depth + 1,
358                    max_depth,
359                    lang_counts,
360                    module_lang_counts,
361                    code_files,
362                    has_tests_dir,
363                    has_test_dir,
364                    has_dunder_tests,
365                    has_spec_dir,
366                );
367            }
368        } else if ft.is_file() {
369            if let FileType::Code(lang) = detect_file_type(&path) {
370                *lang_counts.entry(lang).or_insert(0) += 1;
371
372                // Track size for hot files
373                let size = entry.metadata().map_or(0, |m| m.len());
374                if let Ok(rel) = path.strip_prefix(root) {
375                    let rel_str = rel.to_string_lossy().to_string();
376
377                    code_files.push((rel_str, size));
378
379                    // Track module — use up to 2 path components as the key,
380                    // but only for files nested at least one level deep.
381                    // e.g. src/diff/mod.rs → key "src/diff", lib.rs → skipped
382                    {
383                        let mut comps = rel.components();
384                        if let Some(c1) = comps.next() {
385                            let remaining: Vec<_> = comps.collect();
386                            if !remaining.is_empty() {
387                                let key = if remaining.len() >= 2 {
388                                    // File is at depth 3+: use first two components
389                                    format!(
390                                        "{}/{}",
391                                        c1.as_os_str().to_string_lossy(),
392                                        remaining[0].as_os_str().to_string_lossy()
393                                    )
394                                } else {
395                                    // File is at depth 2: use first component only
396                                    c1.as_os_str().to_string_lossy().to_string()
397                                };
398                                *module_lang_counts
399                                    .entry(key)
400                                    .or_default()
401                                    .entry(lang)
402                                    .or_insert(0) += 1;
403                            }
404                        }
405                    }
406                }
407            }
408
409            // Check test file patterns
410            if name.contains(".test.") || name.contains(".spec.") {
411                // These contribute to test style but we detect in test_style()
412            }
413        }
414    }
415}
416
417// ---------------------------------------------------------------------------
418// Manifest parsing
419// ---------------------------------------------------------------------------
420
421fn find_manifest(root: &Path) -> Option<String> {
422    const MANIFESTS: &[&str] = &["Cargo.toml", "package.json", "go.mod", "pyproject.toml"];
423    for m in MANIFESTS {
424        if root.join(m).exists() {
425            return Some((*m).to_string());
426        }
427    }
428    None
429}
430
431struct ManifestInfo {
432    name: Option<String>,
433    version: Option<String>,
434    deps: Vec<String>,
435}
436
437fn parse_manifest(root: &Path, manifest: &str) -> Option<ManifestInfo> {
438    match manifest {
439        "Cargo.toml" => parse_cargo_toml(root),
440        "package.json" => parse_package_json(root),
441        "go.mod" => parse_go_mod(root),
442        "pyproject.toml" => parse_pyproject_toml(root),
443        _ => None,
444    }
445}
446
447fn parse_cargo_toml(root: &Path) -> Option<ManifestInfo> {
448    let content = fs::read_to_string(root.join("Cargo.toml")).ok()?;
449    let mut name = None;
450    let mut version = None;
451    let mut deps: Vec<String> = Vec::new();
452    let mut in_package = false;
453    let mut in_deps = false;
454
455    for line in content.lines() {
456        let trimmed = line.trim();
457
458        if trimmed.starts_with('[') {
459            in_package = trimmed == "[package]";
460            in_deps = trimmed == "[dependencies]";
461            continue;
462        }
463
464        if in_package {
465            if let Some(val) = extract_toml_string_value(trimmed, "name") {
466                name = Some(val);
467            } else if let Some(val) = extract_toml_string_value(trimmed, "version") {
468                version = Some(val);
469            }
470        }
471
472        if in_deps {
473            // dep_name = "version" or dep_name = { version = "..." }
474            if let Some(dep_name) = trimmed.split('=').next() {
475                let dep = dep_name.trim();
476                if !dep.is_empty() && !dep.starts_with('#') {
477                    deps.push(dep.to_string());
478                }
479            }
480        }
481    }
482
483    deps.sort();
484    deps.truncate(10);
485
486    Some(ManifestInfo {
487        name,
488        version,
489        deps,
490    })
491}
492
493fn parse_package_json(root: &Path) -> Option<ManifestInfo> {
494    let content = fs::read_to_string(root.join("package.json")).ok()?;
495    let json: serde_json::Value = serde_json::from_str(&content).ok()?;
496
497    let name = json.get("name").and_then(|v| v.as_str()).map(String::from);
498    let version = json
499        .get("version")
500        .and_then(|v| v.as_str())
501        .map(String::from);
502
503    let mut deps: Vec<String> = Vec::new();
504    if let Some(obj) = json.get("dependencies").and_then(|v| v.as_object()) {
505        for key in obj.keys() {
506            deps.push(key.clone());
507        }
508    }
509    deps.sort();
510    deps.truncate(10);
511
512    Some(ManifestInfo {
513        name,
514        version,
515        deps,
516    })
517}
518
519fn parse_go_mod(root: &Path) -> Option<ManifestInfo> {
520    let content = fs::read_to_string(root.join("go.mod")).ok()?;
521    let mut name = None;
522    let mut deps: Vec<String> = Vec::new();
523    let mut in_require = false;
524
525    for line in content.lines() {
526        let trimmed = line.trim();
527        if let Some(rest) = trimmed.strip_prefix("module ") {
528            name = Some(rest.trim().to_string());
529        }
530        if trimmed == "require (" {
531            in_require = true;
532            continue;
533        }
534        if trimmed == ")" {
535            in_require = false;
536            continue;
537        }
538        if in_require {
539            // e.g. "github.com/gin-gonic/gin v1.9.0"
540            if let Some(dep) = trimmed.split_whitespace().next() {
541                if !dep.starts_with("//") {
542                    // Use short name (last segment of module path)
543                    let short = dep.rsplit('/').next().unwrap_or(dep);
544                    deps.push(short.to_string());
545                }
546            }
547        }
548    }
549
550    deps.sort();
551    deps.truncate(10);
552
553    Some(ManifestInfo {
554        name,
555        version: None,
556        deps,
557    })
558}
559
560fn parse_pyproject_toml(root: &Path) -> Option<ManifestInfo> {
561    let content = fs::read_to_string(root.join("pyproject.toml")).ok()?;
562    let mut name = None;
563    let mut version = None;
564    let mut deps: Vec<String> = Vec::new();
565    let mut in_project = false;
566    let mut in_deps = false;
567
568    for line in content.lines() {
569        let trimmed = line.trim();
570
571        if trimmed.starts_with('[') {
572            in_project = trimmed == "[project]";
573            in_deps = trimmed == "[project.dependencies]"
574                || (in_project && trimmed == "dependencies = [");
575            continue;
576        }
577
578        if in_project {
579            if let Some(val) = extract_toml_string_value(trimmed, "name") {
580                name = Some(val);
581            } else if let Some(val) = extract_toml_string_value(trimmed, "version") {
582                version = Some(val);
583            }
584
585            // Inline dependencies array
586            if trimmed.starts_with("dependencies") && trimmed.contains('[') {
587                // Parse inline: dependencies = ["dep1", "dep2>=1.0"]
588                if let Some(arr_start) = trimmed.find('[') {
589                    let arr_content = &trimmed[arr_start..];
590                    for item in arr_content.split('"') {
591                        let item = item.trim();
592                        if item.is_empty()
593                            || item.starts_with('[')
594                            || item.starts_with(']')
595                            || item.starts_with(',')
596                        {
597                            continue;
598                        }
599                        // Extract package name (before any version specifier)
600                        let dep_name = item
601                            .split(&['>', '<', '=', '~', '!', ';', '['][..])
602                            .next()
603                            .unwrap_or(item)
604                            .trim();
605                        if !dep_name.is_empty() {
606                            deps.push(dep_name.to_string());
607                        }
608                    }
609                }
610            }
611        }
612
613        if in_deps && !trimmed.starts_with('[') {
614            // Multi-line deps array items: "dep_name>=1.0",
615            let clean = trimmed.trim_matches(&['"', '\'', ',', ' '][..]);
616            if !clean.is_empty() && clean != "]" {
617                let dep_name = clean
618                    .split(&['>', '<', '=', '~', '!', ';', '['][..])
619                    .next()
620                    .unwrap_or(clean)
621                    .trim();
622                if !dep_name.is_empty() {
623                    deps.push(dep_name.to_string());
624                }
625            }
626        }
627    }
628
629    deps.sort();
630    deps.truncate(10);
631
632    Some(ManifestInfo {
633        name,
634        version,
635        deps,
636    })
637}
638
639/// Extract a string value from a TOML key = "value" line.
640fn extract_toml_string_value(line: &str, key: &str) -> Option<String> {
641    let trimmed = line.trim();
642    if !trimmed.starts_with(key) {
643        return None;
644    }
645    let rest = trimmed[key.len()..].trim_start();
646    if !rest.starts_with('=') {
647        return None;
648    }
649    let after_eq = rest[1..].trim();
650    // Extract value between quotes: "value" or 'value'
651    let val = if let Some(rest) = after_eq.strip_prefix('"') {
652        rest.split('"').next().unwrap_or("")
653    } else if let Some(rest) = after_eq.strip_prefix('\'') {
654        rest.split('\'').next().unwrap_or("")
655    } else {
656        // Bare value — take up to whitespace or comment
657        after_eq.split_whitespace().next().unwrap_or("")
658    };
659    if val.is_empty() {
660        return None;
661    }
662    Some(val.to_string())
663}
664
665// ---------------------------------------------------------------------------
666// Git context
667// ---------------------------------------------------------------------------
668
669/// Run a git command with a 200ms timeout. Returns None if it fails or times out.
670fn git_output(root: &Path, args: &[&str]) -> Option<String> {
671    let mut child = Command::new("git")
672        .args(args)
673        .current_dir(root)
674        .stdout(std::process::Stdio::piped())
675        .stderr(std::process::Stdio::null())
676        .spawn()
677        .ok()?;
678
679    let deadline = Instant::now() + Duration::from_millis(200);
680    loop {
681        match child.try_wait() {
682            Ok(Some(status)) => {
683                if !status.success() {
684                    return None;
685                }
686                let out = child.stdout.take()?;
687                let s = std::io::read_to_string(out).ok()?;
688                let trimmed = s.trim().to_string();
689                return if trimmed.is_empty() {
690                    None
691                } else {
692                    Some(trimmed)
693                };
694            }
695            Ok(None) => {
696                if Instant::now() >= deadline {
697                    let _ = child.kill();
698                    let _ = child.wait();
699                    return None;
700                }
701                std::thread::sleep(Duration::from_millis(5));
702            }
703            Err(_) => return None,
704        }
705    }
706}
707
708fn git_context(root: &Path) -> Option<String> {
709    let branch = git_output(root, &["branch", "--show-current"])
710        .or_else(|| git_output(root, &["rev-parse", "--short", "HEAD"]))?;
711
712    let dirty_count = git_output(root, &["status", "--porcelain"]).map_or(0, |s| s.lines().count());
713
714    let dirty_str = if dirty_count == 0 {
715        "clean".to_string()
716    } else {
717        format!("{dirty_count} uncommitted files")
718    };
719
720    Some(format!("branch {branch}, {dirty_str}"))
721}
722
723// ---------------------------------------------------------------------------
724// Test style detection
725// ---------------------------------------------------------------------------
726
727fn test_style(root: &Path, walk: &WalkResult, primary_lang: Option<Lang>) -> Option<String> {
728    let mut styles: Vec<String> = Vec::new();
729
730    // Directory-based test detection
731    if walk.has_tests_dir {
732        styles.push("tests/".to_string());
733    }
734    if walk.has_test_dir {
735        styles.push("test/".to_string());
736    }
737    if walk.has_dunder_tests {
738        styles.push("__tests__/".to_string());
739    }
740    if walk.has_spec_dir {
741        styles.push("spec/".to_string());
742    }
743
744    // File pattern detection
745    let has_test_files = walk
746        .code_files
747        .iter()
748        .any(|(path, _)| path.contains(".test.") || path.contains(".spec."));
749    let has_go_tests = walk
750        .code_files
751        .iter()
752        .any(|(path, _)| path.ends_with("_test.go"));
753    let has_py_tests = walk
754        .code_files
755        .iter()
756        .any(|(path, _)| path.starts_with("test_") || path.contains("/test_"));
757
758    if has_test_files && !walk.has_dunder_tests {
759        styles.push("*.test/spec files".to_string());
760    }
761    if has_go_tests {
762        styles.push("_test.go".to_string());
763    }
764    if has_py_tests {
765        styles.push("test_*.py".to_string());
766    }
767
768    // Rust in-source test detection
769    if primary_lang == Some(Lang::Rust) {
770        let has_cfg_test = walk
771            .code_files
772            .iter()
773            .filter(|(path, _)| {
774                Path::new(path)
775                    .extension()
776                    .is_some_and(|ext| ext.eq_ignore_ascii_case("rs"))
777            })
778            .take(5)
779            .any(|(path, _)| {
780                let full = root.join(path);
781                fs::read_to_string(&full)
782                    .ok()
783                    .is_some_and(|content| content.contains("#[cfg(test)]"))
784            });
785        if has_cfg_test {
786            styles.push("in-source #[cfg(test)]".to_string());
787        }
788    }
789
790    if styles.is_empty() {
791        None
792    } else {
793        Some(styles.join(", "))
794    }
795}
796
797// ---------------------------------------------------------------------------
798// Hot files — most imported local files
799// ---------------------------------------------------------------------------
800
801fn hot_files(root: &Path, walk: &WalkResult, primary_lang: Option<Lang>) -> Option<String> {
802    let lang = primary_lang?; // require a detected language
803    let start = Instant::now();
804
805    // Sort by size (smallest first) and take first 100
806    let mut files: Vec<&(String, u64)> = walk.code_files.iter().collect();
807    files.sort_by_key(|(_, size)| *size);
808    files.truncate(100);
809
810    // Use resolve_related_files to get real file paths for imports.
811    // Count how many files import each target path.
812    let mut path_counts: HashMap<std::path::PathBuf, usize> = HashMap::new();
813    // Also collect all import source lines for symbol extraction later
814    let mut all_import_sources: Vec<String> = Vec::new();
815
816    for (rel_path, _) in &files {
817        if start.elapsed().as_millis() > 100 {
818            break;
819        }
820        let full = root.join(rel_path);
821        let Ok(content) = fs::read_to_string(&full) else {
822            continue;
823        };
824
825        // Resolve imports to actual file paths using the proven import resolver
826        let resolved = crate::read::imports::resolve_related_files_with_content(&full, &content);
827        for target_path in resolved {
828            *path_counts.entry(target_path).or_insert(0) += 1;
829        }
830
831        // Collect import source strings for symbol extraction
832        for line in content.lines() {
833            if is_import_line(line, lang) {
834                let source = crate::lang::outline::extract_import_source(line, Some(lang));
835                if !source.is_empty() && !crate::read::imports::is_external(&source, lang) {
836                    all_import_sources.push(source);
837                }
838            }
839        }
840    }
841
842    if path_counts.is_empty() {
843        return None;
844    }
845
846    // Sort by import count descending, take top 5
847    let mut sorted: Vec<(std::path::PathBuf, usize)> = path_counts.into_iter().collect();
848    sorted.sort_by(|a, b| b.1.cmp(&a.1).then(a.0.cmp(&b.0)));
849    sorted.truncate(5);
850
851    if sorted[0].1 < 2 {
852        return None;
853    }
854
855    // For each hot file, find the most commonly imported symbol by scanning
856    // import sources that reference this file's module name.
857    let parts: Vec<String> = sorted
858        .iter()
859        .filter(|(_, count)| *count >= 2)
860        .map(|(path, count)| {
861            let rel = path.strip_prefix(root).unwrap_or(path);
862            let rel_str = rel.display().to_string();
863
864            // Derive the module name from the file path
865            // src/types.rs → "types", src/lang/mod.rs → "lang", src/error.rs → "error"
866            let stem = path.file_stem().and_then(|s| s.to_str()).unwrap_or("");
867            let module_name = if stem == "mod" || stem == "index" || stem == "__init__" {
868                path.parent()
869                    .and_then(|p| p.file_name())
870                    .and_then(|n| n.to_str())
871                    .unwrap_or(stem)
872            } else {
873                stem
874            };
875
876            // Count symbols imported from this module across all import sources
877            let mut symbol_counts: HashMap<String, usize> = HashMap::new();
878            for source in &all_import_sources {
879                // Match imports that reference this module
880                // e.g., for module "types": "crate::types::OutlineEntry" matches
881                let segments: Vec<&str> = source.split("::").collect();
882                if let Some(mod_pos) = segments.iter().position(|s| *s == module_name) {
883                    // Everything after the module name is a symbol path
884                    for &sym in segments.iter().skip(mod_pos + 1) {
885                        if !sym.is_empty()
886                            && !sym.contains('*')
887                            && !sym.contains('{')
888                            && sym != "self"
889                        {
890                            *symbol_counts.entry(sym.to_string()).or_insert(0) += 1;
891                        }
892                    }
893                }
894            }
895
896            // Pick the most frequently imported symbol (break ties alphabetically for determinism)
897            let top_sym = symbol_counts
898                .into_iter()
899                .max_by(|(a_sym, a_c), (b_sym, b_c)| a_c.cmp(b_c).then(b_sym.cmp(a_sym)))
900                .map(|(sym, _)| sym);
901
902            if let Some(sym) = top_sym {
903                format!("{rel_str}({sym}) ×{count}")
904            } else {
905                format!("{rel_str} ×{count}")
906            }
907        })
908        .collect();
909
910    if parts.is_empty() {
911        None
912    } else {
913        Some(parts.join(", "))
914    }
915}
916
917// ---------------------------------------------------------------------------
918// Tests
919// ---------------------------------------------------------------------------
920
921#[cfg(test)]
922mod tests {
923    use super::*;
924
925    #[test]
926    fn test_fingerprint_on_srcwalk() {
927        let root = Path::new(env!("CARGO_MANIFEST_DIR"));
928        let output = fingerprint(root);
929
930        assert!(!output.is_empty(), "fingerprint should not be empty");
931        assert!(
932            output.contains("Rust"),
933            "should detect Rust as primary language"
934        );
935        assert!(output.contains("Cargo.toml"), "should detect manifest");
936        assert!(output.contains("srcwalk"), "should find project name");
937
938        // Token budget: output should be compact
939        let estimated_tokens = output.len() / 4;
940        assert!(
941            estimated_tokens < 300,
942            "fingerprint should be <300 tokens, got {estimated_tokens}"
943        );
944    }
945
946    #[test]
947    fn test_fingerprint_empty_dir() {
948        let tmp = tempfile::tempdir().unwrap();
949        let output = fingerprint(tmp.path());
950
951        // Empty dir: should produce minimal output or empty
952        // With 0 files and 0 modules, the header will say "0 source files"
953        // but that's fine — it's still useful context
954        assert!(
955            output.is_empty() || output.contains("0 source files"),
956            "empty dir should produce empty or minimal output, got: {output}"
957        );
958    }
959
960    #[test]
961    fn test_manifest_parsing() {
962        let root = Path::new(env!("CARGO_MANIFEST_DIR"));
963        let info = parse_cargo_toml(root).expect("should parse Cargo.toml");
964
965        assert_eq!(info.name.as_deref(), Some("srcwalk"));
966        assert!(info.version.is_some(), "should have a version");
967        assert!(
968            info.deps.iter().any(|d| d == "clap"),
969            "deps should include clap: {:?}",
970            info.deps
971        );
972        assert!(
973            info.deps.iter().any(|d| d == "dashmap"),
974            "deps should include dashmap: {:?}",
975            info.deps
976        );
977    }
978}