morpharch/commands/
scan.rs

1// =============================================================================
2// commands/scan.rs — Scan command: commit scanning + dependency graph + drift score
3// =============================================================================
4//
5// Orchestration module with performance-optimized scanning pipeline:
6//
7//   1. Incremental scan: detect existing snapshots, only process new commits
8//   2. Open repo with gix
9//   3. Get valid commit list (new commits only, or full if first scan)
10//   4. Reverse the list so commits are processed oldest → newest (chronological)
11//   5. For each commit:
12//      a. Skip if same tree_id was already processed (deduplicate)
13//      b. Recursively walk the commit's tree (subtree-cached)
14//      c. Phase 1: Classify entries — cache hits vs misses (single-threaded)
15//      d. Phase 2: Parse cache misses in parallel (rayon + tree-sitter)
16//      e. Phase 3: Merge results, build edges, aggregate weights
17//      f. Build dependency graph with petgraph
18//      g. Calculate drift score (compare with previous graph)
19//      h. Save GraphSnapshot with drift to DB
20//
21// Performance:
22//   - Incremental scan: 5-20× faster (only new commits)
23//   - LRU blob cache: bounded memory (50K entries max)
24//   - Parallel parsing: 2-4× on multi-core (rayon)
25//   - Tree deduplicate: same tree_id is not re-parsed
26//   - Subtree cache: unchanged directories never re-walked
27// =============================================================================
28
29use std::collections::{HashMap, HashSet};
30use std::num::NonZeroUsize;
31use std::path::{Path, PathBuf};
32
33use anyhow::{Context, Result};
34use lru::LruCache;
35use petgraph::graph::DiGraph;
36use rayon::prelude::*;
37use tracing::{debug, info};
38
39use crate::db::Database;
40use crate::git_scanner;
41use crate::graph_builder;
42use crate::models::{CommitInfo, DependencyEdge, GraphSnapshot};
43use crate::parser;
44use crate::scoring;
45
46// =============================================================================
47// Path & import filters — keep only meaningful architecture nodes
48// =============================================================================
49
50/// Returns `true` if the file path belongs to a test / fixture / example
51/// directory that should be excluded from the dependency graph.
52///
53/// Test files create thousands of "noise" nodes (e.g. `001_hello`, `002_hello`)
54/// that drown out the real architecture.
55fn is_test_path(path: &Path) -> bool {
56    let s = path.to_string_lossy();
57    // Normalize to forward slashes for cross-platform matching
58    let lower = s.to_ascii_lowercase().replace('\\', "/");
59
60    // Directory-based patterns (any component match)
61    const TEST_DIRS: &[&str] = &[
62        "/test/",
63        "/tests/",
64        "/testdata/",
65        "/test_data/",
66        "/__tests__/",
67        "/spec/",
68        "/fixtures/",
69        "/fixture/",
70        "/examples/",
71        "/example/",
72        "/benchmarks/",
73        "/bench/",
74        "/testutil/",
75        "/testing/",
76        "/mock/",
77        "/mocks/",
78        "/snapshots/",
79        "/e2e/",
80    ];
81    for pat in TEST_DIRS {
82        if lower.contains(pat) {
83            return true;
84        }
85    }
86
87    // Also match when path starts with these directories (no leading slash)
88    const TEST_DIR_PREFIXES: &[&str] = &[
89        "test/",
90        "tests/",
91        "testdata/",
92        "test_data/",
93        "__tests__/",
94        "spec/",
95        "fixtures/",
96        "fixture/",
97        "examples/",
98        "example/",
99        "benchmarks/",
100        "bench/",
101        ".github/",
102    ];
103    for pat in TEST_DIR_PREFIXES {
104        if lower.starts_with(pat) {
105            return true;
106        }
107    }
108
109    // File-name suffix patterns
110    let file_name = path
111        .file_name()
112        .unwrap_or_default()
113        .to_string_lossy()
114        .to_ascii_lowercase();
115    if file_name.ends_with("_test.ts")
116        || file_name.ends_with("_test.tsx")
117        || file_name.ends_with("_test.rs")
118        || file_name.ends_with("_test.go")
119        || file_name.ends_with("_test.py")
120        || file_name.ends_with(".test.ts")
121        || file_name.ends_with(".test.tsx")
122        || file_name.ends_with(".test.js")
123        || file_name.ends_with(".spec.ts")
124        || file_name.ends_with(".spec.tsx")
125        || file_name.starts_with("test_")
126    {
127        return true;
128    }
129
130    false
131}
132
133/// Returns `true` if the import target is noise and should be excluded
134/// from the dependency graph.
135///
136/// Filters out:
137/// - URL imports (`https://...`) common in Deno
138/// - npm/node specifiers (`npm:chalk`, `node:fs`)
139/// - Non-code file imports (`.css`, `.json`, `.svg`, etc.)
140/// - Version-like strings (`0.1.0`, `1.2.3`)
141/// - Very short or empty names
142fn is_noise_import(name: &str) -> bool {
143    let name = name.trim();
144
145    // Empty or too short
146    if name.len() <= 1 {
147        return true;
148    }
149
150    // URL imports (Deno-style)
151    if name.starts_with("http://") || name.starts_with("https://") {
152        return true;
153    }
154
155    // npm/node specifiers — we normalize these separately
156    if name.starts_with("npm:") || name.starts_with("node:") {
157        return true;
158    }
159
160    // Non-code file imports
161    let lower = name.to_ascii_lowercase();
162    if lower.ends_with(".css")
163        || lower.ends_with(".scss")
164        || lower.ends_with(".json")
165        || lower.ends_with(".svg")
166        || lower.ends_with(".png")
167        || lower.ends_with(".jpg")
168        || lower.ends_with(".wasm")
169        || lower.ends_with(".html")
170        || lower.ends_with(".md")
171        || lower.ends_with(".txt")
172    {
173        return true;
174    }
175
176    // Version-like strings (e.g. "0.1.0", "1.2.3")
177    if name.starts_with(|c: char| c.is_ascii_digit()) && name.contains('.') {
178        return true;
179    }
180
181    // Pure numbers
182    if name.chars().all(|c| c.is_ascii_digit() || c == '.') {
183        return true;
184    }
185
186    false
187}
188
189/// Normalizes an import name to a clean module identifier.
190///
191/// - `npm:chalk@5` → `chalk`
192/// - `node:fs` → `fs`
193/// - `@scope/package` → `@scope/package`
194/// - Strips leading `./` or `../`
195fn normalize_import(name: &str) -> String {
196    let name = name.trim();
197
198    // npm: specifier → extract package name
199    // Handle scoped packages: npm:@scope/pkg@version → @scope/pkg
200    if let Some(rest) = name.strip_prefix("npm:") {
201        let without_version = if let Some(stripped) = rest.strip_prefix('@') {
202            // Scoped: @scope/pkg@version — find the second '@' for version
203            match stripped.find('@') {
204                Some(pos) => &rest[..pos + 1],
205                None => rest,
206            }
207        } else {
208            rest.split('@').next().unwrap_or(rest)
209        };
210        return without_version.to_string();
211    }
212
213    // node: specifier → extract builtin name
214    if let Some(rest) = name.strip_prefix("node:") {
215        return rest.to_string();
216    }
217
218    name.to_string()
219}
220
221/// Collects filtered edges from an import list into the node/edge accumulators.
222///
223/// For path-like imports (containing `/`), the import is converted to a
224/// directory-level package name via `extract_package_name` — this makes the
225/// target name match source packages so internal edges are preserved.
226///
227/// For relative imports (`./` or `../`), we resolve them against the source
228/// file's directory first, then extract the package name.
229///
230/// Bare specifiers like crate names (`tokio`, `deno_core`) are left as-is;
231/// they'll be filtered out later by the source_pkgs check.
232fn collect_edges(
233    source_pkg: &str,
234    imports: &[String],
235    file_path_str: &str,
236    all_nodes: &mut HashSet<String>,
237    all_edges: &mut Vec<DependencyEdge>,
238) {
239    let source_dir = Path::new(file_path_str)
240        .parent()
241        .unwrap_or_else(|| Path::new(""));
242
243    for imp in imports {
244        if is_noise_import(imp) {
245            continue;
246        }
247        let imp = normalize_import(imp);
248        if imp.is_empty() {
249            continue;
250        }
251
252        // Convert import to a directory-level package name that matches source_pkgs
253        let target = if imp.starts_with("./") || imp.starts_with("../") {
254            // Relative import → resolve against source file's directory
255            let resolved = source_dir.join(&imp);
256            // Normalize: collapse `foo/../bar` segments
257            let resolved_str = resolved.to_string_lossy().replace('\\', "/");
258            let mut parts: Vec<&str> = Vec::new();
259            for part in resolved_str.split('/') {
260                match part {
261                    ".." if !parts.is_empty() => {
262                        parts.pop();
263                    }
264                    "." | "" => {}
265                    _ => parts.push(part),
266                }
267            }
268            if parts.is_empty() {
269                continue;
270            }
271            // Re-join and extract package name
272            let joined = parts.join("/");
273            parser::extract_package_name(Path::new(&joined))
274        } else if imp.contains('/') {
275            // Absolute path-like import (e.g., "ext/node/polyfills/path.ts")
276            parser::extract_package_name(Path::new(&imp))
277        } else {
278            // Bare specifier (crate name, npm package, etc.)
279            imp
280        };
281
282        if target.is_empty() {
283            continue;
284        }
285
286        // Skip self-edges
287        if target == source_pkg {
288            continue;
289        }
290
291        all_nodes.insert(target.clone());
292        all_edges.push(DependencyEdge {
293            from_module: source_pkg.to_string(),
294            to_module: target,
295            file_path: file_path_str.to_string(),
296            line: 0,
297            weight: 1,
298        });
299    }
300}
301
302/// Scan result — used by main.rs for printing the summary
303pub struct ScanResult {
304    pub commits_scanned: usize,
305    pub graphs_created: usize,
306    /// Number of drift scores calculated
307    pub drifts_calculated: usize,
308}
309
310/// Scans the repository: creates commit metadata + dependency graphs + drift scores.
311///
312/// Supports **incremental scanning**: if existing snapshots are found in the DB,
313/// only new commits since the last scan are processed. This gives 5-20× speedup
314/// for large repositories on subsequent scans.
315///
316/// # Workflow
317/// 1. Check for existing snapshots (incremental scan detection)
318/// 2. Get valid commit list (new commits only, or full)
319/// 3. Reverse to chronological order (oldest → newest)
320/// 4. For each commit: tree_walk + parallel parse + graph/drift processing
321pub fn run_scan(path: &Path, db: &Database, max_commits: usize) -> Result<ScanResult> {
322    let repo = gix::discover(path)
323        .with_context(|| format!("Failed to open repo for graph building: {}", path.display()))?;
324
325    // ── Incremental scan: detect existing snapshots ──
326    let last_commit = db.get_latest_scanned_commit()?;
327    let existing_count = db.graph_snapshot_count()?;
328
329    let mut commits = if let Some(ref last_hash) = last_commit {
330        // Incremental: only get commits since last scan
331        let new_commits = git_scanner::get_commits_since(&repo, last_hash, max_commits)?;
332        if new_commits.is_empty() {
333            info!("No new commits since last scan ({})", &last_hash[..7]);
334            return Ok(ScanResult {
335                commits_scanned: 0,
336                graphs_created: 0,
337                drifts_calculated: 0,
338            });
339        }
340        info!(
341            "Incremental scan: {} existing snapshots, {} new commits since {}",
342            existing_count,
343            new_commits.len(),
344            &last_hash[..7]
345        );
346        new_commits
347    } else {
348        // Full scan — clear existing data
349        db.clear_all_graph_snapshots()?;
350        info!("Building dependency graphs...");
351        git_scanner::get_commits_in_order(&repo, max_commits)?
352    };
353
354    // Reverse to chronological order (oldest → newest)
355    commits.reverse();
356
357    // ── Load prev_graph for drift continuity (incremental) ──
358    let mut prev_graph: Option<DiGraph<String, ()>> = None;
359    if let Some(ref last_hash) = last_commit {
360        if let Some(snapshot) = db.get_graph_snapshot(last_hash)? {
361            let nodes: HashSet<String> = snapshot.nodes.into_iter().collect();
362            prev_graph = Some(graph_builder::build_graph(&nodes, &snapshot.edges));
363            debug!(
364                "Loaded previous graph for drift continuity ({} nodes)",
365                nodes.len()
366            );
367        }
368    }
369
370    // ── Begin batch transaction (one fsync for ALL inserts) ──
371    db.begin_transaction()?;
372
373    let mut graphs_created: usize = 0;
374    let mut drifts_calculated: usize = 0;
375
376    // Avoid re-processing the same tree
377    let mut seen_trees: HashSet<String> = HashSet::new();
378
379    // ── Two-level cache for incremental scanning ──────────────────
380    //
381    // Level 1 — Subtree cache: skip re-walking unchanged directories.
382    //   Key = tree OID, Value = list of (relative_path, blob_oid).
383    //   Between adjacent commits, ~95 % of subtrees share the same OID.
384    //
385    // Level 2 — Blob import cache: LRU-bounded to prevent unbounded growth.
386    //   Key = blob OID (20 bytes), Value = parsed import list.
387    //   Even when the tree walk finds the file, if the blob OID matches
388    //   a previous parse result we skip the expensive read + parse.
389    //   Capacity: 50K entries ≈ 50K × ~200 bytes = ~10 MB max.
390    let mut subtree_cache = git_scanner::SubtreeCache::new();
391    let mut blob_import_cache: LruCache<[u8; 20], Vec<String>> =
392        LruCache::new(NonZeroUsize::new(50_000).unwrap());
393    let total_commits = commits.len();
394    let scan_start = std::time::Instant::now();
395
396    for (ci, commit) in commits.iter().enumerate() {
397        let commit_hash = commit.id().to_string();
398
399        let decoded = match commit.decode() {
400            Ok(d) => d,
401            Err(e) => {
402                debug!(hash = %commit_hash, error = %e, "Failed to decode commit, skipping");
403                continue;
404            }
405        };
406
407        let (author_name, author_email, timestamp) = match decoded.author() {
408            Ok(sig) => (sig.name.to_string(), sig.email.to_string(), sig.seconds()),
409            Err(_) => ("unknown".to_string(), "unknown".to_string(), 0),
410        };
411        let commit_info = CommitInfo {
412            hash: commit_hash.clone(),
413            author_name,
414            author_email,
415            message: decoded.message.to_string(),
416            timestamp,
417            tree_id: decoded.tree().to_string(),
418        };
419
420        // Save commit to DB
421        db.insert_commit(&commit_info)?;
422
423        let tree_oid = match git_scanner::get_tree_for_commit(&repo, &commit_hash) {
424            Ok(oid) => oid,
425            Err(e) => {
426                debug!(hash = %commit_hash, error = %e, "Failed to get commit tree, skipping");
427                continue;
428            }
429        };
430
431        let tree_hex = tree_oid.to_string();
432        if !seen_trees.insert(tree_hex) {
433            debug!(hash = %commit_hash, "Same tree already processed, skipping");
434            continue;
435        }
436
437        let tree = match repo.find_tree(tree_oid) {
438            Ok(t) => t,
439            Err(e) => {
440                debug!(hash = %commit_hash, error = %e, "Tree not found");
441                continue;
442            }
443        };
444
445        // ── Fast tree walk: subtree-cached + blob-oid-only ──
446        let entries = match git_scanner::walk_tree_entries_cached(&repo, &tree, &mut subtree_cache)
447        {
448            Ok(e) => e,
449            Err(e) => {
450                debug!(hash = %commit_hash, error = %e, "Tree walk failed");
451                continue;
452            }
453        };
454
455        if entries.is_empty() {
456            continue;
457        }
458
459        let mut all_nodes: HashSet<String> = HashSet::with_capacity(entries.len() / 4);
460        let mut all_edges: Vec<DependencyEdge> = Vec::with_capacity(entries.len());
461        let mut cache_hits: usize = 0;
462
463        // ────────────────────────────────────────────────────────────────
464        // Phase 1: Classify entries — cache hits vs cache misses
465        //   Single-threaded: needs &repo for blob reads and &mut cache
466        // ────────────────────────────────────────────────────────────────
467        struct ParseJob {
468            source_pkg: String,
469            oid_key: [u8; 20],
470            content: String,
471            file_path: PathBuf,
472        }
473
474        let mut cached_imports: Vec<(String, Vec<String>, String)> = Vec::new();
475        let mut parse_jobs: Vec<ParseJob> = Vec::new();
476
477        for (file_path, blob_oid) in &entries {
478            // Skip test / fixture / example files — they create noise nodes
479            if is_test_path(file_path.as_path()) {
480                continue;
481            }
482
483            let source_pkg = parser::extract_package_name(file_path.as_path());
484            all_nodes.insert(source_pkg.clone());
485
486            // Use raw 20-byte OID as cache key (no hex string allocation)
487            let oid_key: [u8; 20] = blob_oid.as_bytes().try_into().unwrap_or([0u8; 20]);
488
489            // ── Cache lookup: skip blob read + parse if OID unchanged ──
490            if let Some(cached) = blob_import_cache.get(&oid_key) {
491                cache_hits += 1;
492                if !cached.is_empty() {
493                    let file_path_str = file_path.to_string_lossy().replace('\\', "/");
494                    cached_imports.push((source_pkg, cached.clone(), file_path_str));
495                }
496            } else {
497                // Cache miss → read blob content (needs &repo, single-threaded)
498                let blob = match repo.find_object(*blob_oid) {
499                    Ok(b) => b,
500                    Err(_) => {
501                        blob_import_cache.put(oid_key, Vec::new());
502                        continue;
503                    }
504                };
505                let content = match std::str::from_utf8(&blob.data) {
506                    Ok(s) => s.to_string(),
507                    Err(_) => {
508                        blob_import_cache.put(oid_key, Vec::new());
509                        continue;
510                    }
511                };
512                let file_path_str = file_path.to_string_lossy();
513                if parser::detect_language(file_path_str.as_ref()).is_none() {
514                    blob_import_cache.put(oid_key, Vec::new());
515                    continue;
516                }
517                parse_jobs.push(ParseJob {
518                    source_pkg,
519                    oid_key,
520                    content,
521                    file_path: file_path.clone(),
522                });
523            }
524        }
525
526        // ── Snapshot source packages BEFORE collect_edges adds imports ──
527        // At this point all_nodes contains ONLY source packages (from extract_package_name).
528        // After collect_edges runs, external import targets will be added too.
529        // We capture source_pkgs now so we can filter externals out later.
530        let source_pkgs: HashSet<String> = all_nodes.clone();
531
532        // ────────────────────────────────────────────────────────────────
533        // Phase 2: Parse cache misses in parallel (rayon)
534        //   Each thread creates its own tree-sitter Parser (Parser is !Sync
535        //   but Send, and parse_imports creates a fresh one per call).
536        //   No repo access needed — only CPU-bound tree-sitter parsing.
537        // ────────────────────────────────────────────────────────────────
538        let parsed_results: Vec<(String, [u8; 20], Vec<String>, String)> = parse_jobs
539            .into_par_iter()
540            .filter_map(|job| {
541                let path_str = job.file_path.to_string_lossy();
542                let lang = parser::detect_language(path_str.as_ref())?;
543                let imports = parser::parse_imports(&job.content, lang, job.file_path.as_path());
544                let file_path_str = path_str.replace('\\', "/");
545                Some((job.source_pkg, job.oid_key, imports, file_path_str))
546            })
547            .collect();
548
549        // ────────────────────────────────────────────────────────────────
550        // Phase 3: Merge results — update cache + build edges
551        //   Single-threaded: needs &mut cache and &mut all_nodes/all_edges
552        // ────────────────────────────────────────────────────────────────
553        for (source_pkg, oid_key, imports, file_path_str) in parsed_results {
554            blob_import_cache.put(oid_key, imports.clone());
555            if !imports.is_empty() {
556                collect_edges(
557                    &source_pkg,
558                    &imports,
559                    &file_path_str,
560                    &mut all_nodes,
561                    &mut all_edges,
562                );
563            }
564        }
565
566        // Process cached imports (already parsed, just need edge creation)
567        for (source_pkg, imports, file_path_str) in cached_imports {
568            collect_edges(
569                &source_pkg,
570                &imports,
571                &file_path_str,
572                &mut all_nodes,
573                &mut all_edges,
574            );
575        }
576
577        if all_nodes.is_empty() {
578            continue;
579        }
580
581        // Aggregate duplicate edges: same (from, to) pair → merge with weight count
582        let mut edge_weight_map: HashMap<(String, String), DependencyEdge> =
583            HashMap::with_capacity(all_edges.len() / 2);
584        for edge in all_edges {
585            let key = (edge.from_module.clone(), edge.to_module.clone());
586            edge_weight_map
587                .entry(key)
588                .and_modify(|existing| existing.weight += 1)
589                .or_insert(edge);
590        }
591        let all_edges: Vec<DependencyEdge> = edge_weight_map.into_values().collect();
592
593        // ── Prune noise: keep source packages + high-connectivity external deps ──
594        //
595        // Strategy:
596        //   - Source packages (from extract_package_name) are ALWAYS kept
597        //   - External import targets are kept only if they're imported by
598        //     ≥ MIN_EXT_IMPORTERS different source packages (shared deps are
599        //     architecturally significant; single-use imports are noise)
600        //   - Edges are kept only if both endpoints survive the filter
601        //
602        // This reduces e.g. 622 → ~80-120 nodes while preserving meaningful edges.
603        const MIN_EXT_IMPORTERS: usize = 3;
604
605        // Count how many DISTINCT source packages import each external target
606        let mut ext_importer_count: HashMap<String, HashSet<String>> = HashMap::new();
607        for edge in &all_edges {
608            if !source_pkgs.contains(&edge.to_module) {
609                ext_importer_count
610                    .entry(edge.to_module.clone())
611                    .or_default()
612                    .insert(edge.from_module.clone());
613            }
614        }
615
616        let kept_nodes: HashSet<String> = all_nodes
617            .iter()
618            .filter(|n| {
619                source_pkgs.contains(*n)
620                    || ext_importer_count
621                        .get(*n)
622                        .is_some_and(|importers| importers.len() >= MIN_EXT_IMPORTERS)
623            })
624            .cloned()
625            .collect();
626
627        let filtered_edges: Vec<DependencyEdge> = all_edges
628            .into_iter()
629            .filter(|e| kept_nodes.contains(&e.from_module) && kept_nodes.contains(&e.to_module))
630            .collect();
631
632        let graph = graph_builder::build_graph(&kept_nodes, &filtered_edges);
633
634        let nodes_vec: Vec<String> = kept_nodes.iter().cloned().collect();
635        let edges_pairs = scoring::edges_to_pairs(&filtered_edges);
636        let drift = scoring::calculate_drift(
637            &graph,
638            prev_graph.as_ref(),
639            &nodes_vec,
640            &edges_pairs,
641            commit_info.timestamp,
642        );
643        drifts_calculated += 1;
644
645        let snapshot = GraphSnapshot {
646            commit_hash: commit_hash.clone(),
647            nodes: nodes_vec,
648            edges: filtered_edges,
649            node_count: graph.node_count(),
650            edge_count: graph.edge_count(),
651            timestamp: commit_info.timestamp,
652            drift: Some(drift),
653        };
654
655        db.insert_graph_snapshot(&snapshot)?;
656        graphs_created += 1;
657
658        prev_graph = Some(graph);
659
660        // ── Progress indicator (every 25 commits or at the end) ──
661        if (ci + 1) % 25 == 0 || ci + 1 == total_commits {
662            let elapsed = scan_start.elapsed().as_secs_f64();
663            let pct = ((ci + 1) as f64 / total_commits as f64 * 100.0) as u32;
664            info!(
665                "[{}/{}] {}% — {} graphs, {} cached blobs ({} hits), {:.1}s",
666                ci + 1,
667                total_commits,
668                pct,
669                graphs_created,
670                blob_import_cache.len(),
671                cache_hits,
672                elapsed,
673            );
674        }
675    }
676
677    // ── Commit all batched writes in one fsync ──
678    db.commit_transaction()?;
679
680    info!(
681        total = graphs_created,
682        drifts = drifts_calculated,
683        "Dependency graph + drift creation complete"
684    );
685
686    Ok(ScanResult {
687        commits_scanned: commits.len(),
688        graphs_created,
689        drifts_calculated,
690    })
691}
692
693// =============================================================================
694// Helper functions
695// =============================================================================
696
697/// Extracts a module name from a file path.
698///
699/// Removes the extension and replaces directory separators with `::`.
700///
701/// # Examples
702/// - "src/main.rs" → "src::main"
703/// - "packages/ui/index.ts" → "packages::ui::index"
704/// - "cmd/server/main.go" → "cmd::server::main"
705#[cfg(test)]
706fn path_to_module(path: &str) -> String {
707    let path = path.replace("\\", "/");
708    let without_ext = path
709        .rsplit_once('.')
710        .map_or(path.as_str(), |(base, _)| base);
711    without_ext.replace('/', "::")
712}
713
714// =============================================================================
715// Tests
716// =============================================================================
717#[cfg(test)]
718mod tests {
719    use super::*;
720
721    #[test]
722    fn test_path_to_module() {
723        assert_eq!(path_to_module("src/main.rs"), "src::main");
724        assert_eq!(
725            path_to_module("packages/ui/index.ts"),
726            "packages::ui::index"
727        );
728        assert_eq!(path_to_module("cmd/server/main.go"), "cmd::server::main");
729        assert_eq!(path_to_module("lib.rs"), "lib");
730        assert_eq!(path_to_module("src\\win\\main.rs"), "src::win::main");
731    }
732
733    #[test]
734    fn test_is_test_path() {
735        // Should be filtered
736        assert!(is_test_path(Path::new("cli/tests/testdata/001_hello.ts")));
737        assert!(is_test_path(Path::new("src/__tests__/app.test.tsx")));
738        assert!(is_test_path(Path::new("tests/integration/run.rs")));
739        assert!(is_test_path(Path::new("examples/hello/main.rs")));
740        assert!(is_test_path(Path::new("benchmarks/perf.go")));
741        assert!(is_test_path(Path::new("src/utils_test.go")));
742        assert!(is_test_path(Path::new("lib/parser.test.ts")));
743        assert!(is_test_path(Path::new("test_helper.py")));
744        assert!(is_test_path(Path::new("fixtures/data.ts")));
745
746        // Should NOT be filtered
747        assert!(!is_test_path(Path::new("src/main.rs")));
748        assert!(!is_test_path(Path::new("cli/tools/run.ts")));
749        assert!(!is_test_path(Path::new("packages/core/index.ts")));
750        assert!(!is_test_path(Path::new("runtime/ops/fs.rs")));
751    }
752
753    #[test]
754    fn test_is_noise_import() {
755        // Should be filtered
756        assert!(is_noise_import("https://deno.land/std/testing/asserts.ts"));
757        assert!(is_noise_import("http://example.com/mod.ts"));
758        assert!(is_noise_import("npm:chalk@5"));
759        assert!(is_noise_import("node:fs"));
760        assert!(is_noise_import("./styles.css"));
761        assert!(is_noise_import("../data.json"));
762        assert!(is_noise_import("logo.svg"));
763        assert!(is_noise_import("0.1.0"));
764        assert!(is_noise_import("1.2.3"));
765        assert!(is_noise_import("x")); // single char
766
767        // Should NOT be filtered
768        assert!(!is_noise_import("react"));
769        assert!(!is_noise_import("serde"));
770        assert!(!is_noise_import("std"));
771        assert!(!is_noise_import("@scope/package"));
772        assert!(!is_noise_import("tokio"));
773    }
774
775    #[test]
776    fn test_normalize_import() {
777        assert_eq!(normalize_import("npm:chalk@5"), "chalk");
778        assert_eq!(normalize_import("npm:@types/node"), "@types/node");
779        assert_eq!(normalize_import("node:fs"), "fs");
780        assert_eq!(normalize_import("node:path"), "path");
781        assert_eq!(normalize_import("react"), "react");
782    }
783}
morpharch/commands/scan.rs

morpharch/commands/
scan.rs