Skip to main content

morph_cli/core/detection/
scanner.rs

1#![allow(clippy::all, unused)]
2use std::collections::HashMap;
3use std::path::{Path, PathBuf};
4use std::time::Instant;
5
6use crate::core::detection::frameworks::Framework;
7use crate::core::detection::package_json::PackageJson;
8use crate::core::detection::workspace::{WorkspaceSummary, detect_workspaces};
9use crate::core::detection::{
10    DetectedFramework, DetectionResult, MigrationOpportunity, ModuleSystem, RiskyArea,
11};
12
13pub struct Scanner {
14    root: PathBuf,
15    cache: HashMap<PathBuf, PackageJson>,
16    pub max_files: usize,
17}
18
19#[derive(Debug, Clone, serde::Serialize)]
20pub struct ScannedFile {
21    pub path: PathBuf,
22    pub tags: Vec<String>,
23}
24
25#[derive(Debug, Clone, serde::Serialize)]
26pub struct SkippedFileDiagnostic {
27    pub path: PathBuf,
28    pub reason: String,
29}
30
31impl Scanner {
32    pub fn new(root: PathBuf) -> Self {
33        Self {
34            root,
35            cache: HashMap::new(),
36            max_files: 10000,
37        }
38    }
39
40    pub fn with_max_files(mut self, max_files: usize) -> Self {
41        self.max_files = max_files;
42        self
43    }
44
45    pub fn scan(&mut self) -> ScanResult {
46        let start = Instant::now();
47
48        let pkg = self.load_package_json();
49        let mut frameworks = Vec::new();
50        let mut module_system = ModuleSystem::Mixed;
51
52        if let Some(ref p) = pkg {
53            for fw in Framework::all() {
54                if let Some(detected) = fw.detect(p) {
55                    frameworks.push(detected);
56                }
57            }
58
59            module_system = if p.typ.as_deref() == Some("module") {
60                ModuleSystem::ESM
61            } else if p.dependencies.contains_key("ts-node") || p.dependencies.contains_key("tsx") {
62                ModuleSystem::ESM
63            } else {
64                ModuleSystem::CommonJS
65            };
66        }
67
68        let opportunities = self.suggest_recipes(&frameworks, &module_system);
69        let risky = self.find_risky_areas();
70        let workspace = detect_workspaces(&self.root);
71
72        let schema = crate::core::config::loader::load_config_for_path(&self.root)
73            .unwrap_or_default();
74            
75        let ignore_handler = crate::core::config::ignore::IgnoreHandler::from_schema(&schema);
76
77        let mut scanned_files = Vec::new();
78        let mut skipped_files = Vec::new();
79        let mut total_files = 0;
80
81        for entry in walkdir::WalkDir::new(&self.root)
82            .into_iter()
83            .filter_entry(|e| {
84                let name = e.file_name().to_string_lossy();
85                name != "node_modules" && name != ".git" && name != "target" && name != "dist" && name != "build"
86            })
87            .filter_map(|e| e.ok())
88        {
89            if entry.file_type().is_file() {
90                total_files += 1;
91
92                if scanned_files.len() + skipped_files.len() >= self.max_files {
93                    continue;
94                }
95
96                let path = entry.path();
97                let relative_path = path.strip_prefix(&self.root).unwrap_or(path).to_path_buf();
98                let path_str = relative_path.to_string_lossy();
99                
100                let mut skip_reason = None;
101                
102                // 1. Check gitignore or config exclusion
103                if ignore_handler.should_ignore(path) {
104                    if crate::core::config::ignore::IgnoreHandler::has_gitignore(path) {
105                        skip_reason = Some(".gitignore".to_string());
106                    } else {
107                        let mut is_config_excluded = false;
108                        for pattern in &schema.excluded_paths {
109                            if path_str.contains(pattern) {
110                                is_config_excluded = true;
111                                break;
112                            }
113                        }
114                        if is_config_excluded {
115                            skip_reason = Some("morph-cli config exclusion".to_string());
116                        } else {
117                            skip_reason = Some("default exclusion".to_string());
118                        }
119                    }
120                } else if let Ok(metadata) = std::fs::metadata(path) {
121                    if metadata.len() == 0 {
122                        skip_reason = Some("empty file".to_string());
123                    } else {
124                        let size_kb = metadata.len() / 1024;
125                        if size_kb > schema.max_file_size_kb as u64 {
126                            skip_reason = Some(format!("size limit ({} KB)", schema.max_file_size_kb));
127                        } else if let Ok(content) = std::fs::read_to_string(path) {
128                            if let Some(reason) = ignore_handler.check_file(path, &content, schema.max_file_size_kb) {
129                                if reason.contains("minified") {
130                                    skip_reason = Some("minified detection".to_string());
131                                } else if reason.contains("generated") {
132                                    skip_reason = Some("generated detection".to_string());
133                                } else if reason.contains("binary") {
134                                    skip_reason = Some("binary content".to_string());
135                                } else {
136                                    skip_reason = Some(reason);
137                                }
138                            }
139                        } else {
140                            if let Some(reason) = ignore_handler.check_file(path, "\0", schema.max_file_size_kb) {
141                                if reason.contains("binary") {
142                                    skip_reason = Some("binary content".to_string());
143                                } else {
144                                    skip_reason = Some(reason);
145                                }
146                            }
147                        }
148                    }
149                }
150                
151                if let Some(reason) = skip_reason {
152                    skipped_files.push(SkippedFileDiagnostic {
153                        path: relative_path,
154                        reason,
155                    });
156                } else {
157                    let tags = crate::core::recipe::compute_tags_for_file(path, None, &[], false, false);
158                    scanned_files.push(ScannedFile {
159                        path: relative_path,
160                        tags,
161                    });
162                }
163            }
164        }
165
166        let elapsed = start.elapsed();
167
168        ScanResult {
169            root: self.root.clone(),
170            detection: DetectionResult {
171                frameworks,
172                module_system,
173                migration_opportunities: opportunities,
174                risky_areas: risky,
175            },
176            scan_time_ms: elapsed.as_millis() as u64,
177            cached: self.cache.len(),
178            total_files,
179            workspace,
180            scanned_files,
181            skipped_files,
182        }
183    }
184
185    fn load_package_json(&mut self) -> Option<PackageJson> {
186        let path = self.root.join("package.json");
187        if let Some(pkg) = PackageJson::load(&path) {
188            self.cache.insert(path, pkg.clone());
189            Some(pkg)
190        } else {
191            None
192        }
193    }
194
195    fn suggest_recipes(
196        &self,
197        frameworks: &[DetectedFramework],
198        _module_system: &ModuleSystem,
199    ) -> Vec<MigrationOpportunity> {
200        let mut opportunities = Vec::new();
201
202        let has_cjs = frameworks.iter().any(|f| f.name == "CommonJS");
203        let has_express = frameworks.iter().any(|f| f.name == "Express");
204        let has_react = frameworks.iter().any(|f| f.name == "React");
205        let has_ts = frameworks.iter().any(|f| f.name == "TypeScript");
206        let has_no_ts = frameworks
207            .iter()
208            .all(|f| f.name != "TypeScript" && f.name != "CommonJS");
209
210        if has_cjs && has_express {
211            opportunities.push(MigrationOpportunity {
212                name: "CommonJS to ESM".into(),
213                description: "Migrate from require() to import statements".into(),
214                recipes: vec!["commonjs-to-esm".into()],
215                priority: 80,
216            });
217        }
218
219        if has_no_ts && has_react {
220            opportunities.push(MigrationOpportunity {
221                name: "JavaScript to TypeScript".into(),
222                description: "Add type safety to JavaScript files".into(),
223                recipes: vec!["js-to-ts".into()],
224                priority: 70,
225            });
226        }
227
228        if has_ts {
229            opportunities.push(MigrationOpportunity {
230                name: "TypeScript strict mode".into(),
231                description: "Enable strict type checking".into(),
232                recipes: vec![],
233                priority: 50,
234            });
235        }
236
237        opportunities
238    }
239
240    fn find_risky_areas(&self) -> Vec<RiskyArea> {
241        let mut risky = Vec::new();
242
243        for entry in walkdir::WalkDir::new(&self.root)
244            .max_depth(3)
245            .into_iter()
246            .filter_entry(|e| {
247                let name = e.file_name().to_string_lossy();
248                name != "node_modules" && name != ".git" && name != "target" && name != "dist" && name != "build"
249            })
250            .filter_map(|e| e.ok())
251            .take(self.max_files)
252        {
253            let path = entry.path();
254            let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
255
256            if name == "node_modules" || name.starts_with('.') || name == "dist" || name == "build"
257            {
258                continue;
259            }
260
261            if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
262                match ext {
263                    "min.js" | "min.jsx" => {
264                        risky.push(RiskyArea {
265                            path: path.display().to_string(),
266                            reason: "Minified file".into(),
267                            severity: 60,
268                        });
269                    }
270                    "bundle.js" | "chunk.js" => {
271                        risky.push(RiskyArea {
272                            path: path.display().to_string(),
273                            reason: "Bundled output".into(),
274                            severity: 70,
275                        });
276                    }
277                    _ => {}
278                }
279            }
280        }
281
282        risky
283    }
284}
285
286#[derive(Debug)]
287#[allow(unused)]
288pub struct ScanResult {
289    pub root: PathBuf,
290    pub detection: DetectionResult,
291    pub scan_time_ms: u64,
292    pub cached: usize,
293    pub total_files: usize,
294    pub workspace: WorkspaceSummary,
295    pub scanned_files: Vec<ScannedFile>,
296    pub skipped_files: Vec<SkippedFileDiagnostic>,
297}
298
299impl ScanResult {
300    pub fn print_summary(&self, tag_filter: Option<&str>, verbose: bool) {
301        if self.scanned_files.is_empty() {
302            use colored::Colorize;
303            println!();
304            println!("{}", "✨ Welcome to morph-cli! ✨".bold().cyan());
305            println!("{}", "═".repeat(60).cyan());
306            println!("{}", "⚠️  No scanned files detected in this directory!".yellow().bold());
307            println!("  Make sure your project contains Javascript or TypeScript source files.");
308            println!("  Supported extensions: .js, .ts, .jsx, .tsx, .cjs, .mjs");
309            println!();
310            println!("{}", "💡 Quick Onboarding Guide:".bold().yellow());
311            println!("  1. Place some JavaScript or TypeScript files in this directory.");
312            println!("  2. Run `morph init` to generate a `morph-cli.toml` config file.");
313            println!("  3. Run `morph list` to explore all built-in modernization recipes.");
314            println!();
315            println!("{}", "🚀 Beginner-Safe Recommendations:".bold().green());
316            println!("  - To migrate CommonJS require statements to modern ESM imports:");
317            println!("    {}", "morph run commonjs-to-esm . --dry-run".bold().cyan());
318            println!("  - To upgrade JavaScript files to TypeScript safely:");
319            println!("    {}", "morph run js-to-ts . --dry-run".bold().cyan());
320            println!("  - To preview a preset workflow impact:");
321            println!("    {}", "morph preset run modern-js .".bold().cyan());
322            println!();
323            println!("{}", "👉 Next-Step Hints:".bold().magenta());
324            println!("  - Run `morph magic` to start our guided, step-by-step interactive assistant!");
325            println!("  - Run `morph ignored` to check why any files are being skipped.");
326            println!("{}", "═".repeat(60).cyan());
327            println!();
328            return;
329        }
330
331        println!();
332        println!("  Scan time: {}ms", self.scan_time_ms);
333        println!();
334        println!("  Frameworks: {}", self.detection.frameworks.len());
335        for fw in &self.detection.frameworks {
336            println!("    - {} ({}%)", fw.name, fw.confidence);
337            if let Some(v) = &fw.version {
338                println!("      version: {}", v);
339            }
340        }
341        println!();
342        println!("  Module system: {:?}", self.detection.module_system);
343
344        // Print Scanned Files with Tags
345        println!();
346        if let Some(tag) = tag_filter {
347            println!("  Files matching tag '{}':", tag);
348            let filtered: Vec<_> = self.scanned_files.iter()
349                .filter(|f| f.tags.iter().any(|t| t == tag))
350                .collect();
351            if filtered.is_empty() {
352                println!("    No files found.");
353            } else {
354                for f in filtered {
355                    println!("    - {} [{}]", f.path.display(), f.tags.join(", "));
356                }
357            }
358        } else {
359            println!("  Analyzed Files & Tags:");
360            for f in self.scanned_files.iter().take(50) {
361                println!("    - {} [{}]", f.path.display(), f.tags.join(", "));
362            }
363            if self.scanned_files.len() > 50 {
364                println!("    ... and {} more files", self.scanned_files.len() - 50);
365            }
366        }
367        if self.workspace.is_workspace() {
368            println!();
369            println!("  Workspaces:");
370            println!(
371                "    managers: {}",
372                self.workspace
373                    .managers
374                    .iter()
375                    .map(|manager| format!("{:?}", manager).to_lowercase())
376                    .collect::<Vec<_>>()
377                    .join(", ")
378            );
379            println!("    packages: {}", self.workspace.packages.len());
380            for package in &self.workspace.packages {
381                println!("      - {} ({})", package.name, package.path.display());
382            }
383        }
384        println!();
385        println!(
386            "  Migration opportunities: {}",
387            self.detection.migration_opportunities.len()
388        );
389        for opp in &self.detection.migration_opportunities {
390            println!("    - {} (priority: {})", opp.name, opp.priority);
391        }
392        if !self.detection.risky_areas.is_empty() {
393            println!();
394            println!("  Risky areas: {}", self.detection.risky_areas.len());
395        }
396
397        if !self.skipped_files.is_empty() {
398            println!();
399            println!("  Ignored/Skipped Files Diagnostics:");
400            let mut counts = std::collections::BTreeMap::new();
401            for file in &self.skipped_files {
402                *counts.entry(&file.reason).or_insert(0) += 1;
403            }
404            for (reason, count) in &counts {
405                println!("    - {} file(s) skipped due to {}", count, reason);
406            }
407            if verbose {
408                println!();
409                println!("    Detailed Ignored/Skipped Files:");
410                for file in &self.skipped_files {
411                    println!("      - {} ({})", file.path.display(), file.reason);
412                }
413            }
414        }
415        println!();
416        println!("  Project Fingerprint:");
417        println!("    status:   updated");
418        println!("    path:     .morph-cli/project.json");
419    }
420}
421
422#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
423pub struct ScanFileCounts {
424    pub total: usize,
425    pub scanned: usize,
426    pub skipped: usize,
427}
428
429#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
430pub struct ScanRiskCounts {
431    pub total: usize,
432}
433
434#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
435pub struct ScanSnapshot {
436    pub id: String,
437    pub timestamp: u64,
438    pub target_path: PathBuf,
439    pub detected_frameworks: Vec<String>,
440    pub recipe_suggestions: Vec<String>,
441    pub file_counts: ScanFileCounts,
442    pub risk_counts: ScanRiskCounts,
443}
444
445const SCAN_DIR: &str = ".morph-cli/scans";
446
447pub struct ScanSnapshotStore {
448    root: PathBuf,
449}
450
451impl ScanSnapshotStore {
452    pub fn new(project_root: &Path) -> Self {
453        Self {
454            root: project_root.join(SCAN_DIR),
455        }
456    }
457
458    pub fn save(&self, snapshot: &ScanSnapshot) -> anyhow::Result<()> {
459        use anyhow::Context;
460        std::fs::create_dir_all(&self.root).with_context(|| {
461            format!(
462                "Failed to create scan snapshots directory: {}",
463                self.root.display()
464            )
465        })?;
466
467        let path = self.snapshot_path(&snapshot.id);
468        let json = serde_json::to_string_pretty(snapshot)
469            .context("Failed to serialize scan snapshot")?;
470        std::fs::write(&path, json)
471            .with_context(|| format!("Failed to write scan snapshot: {}", path.display()))?;
472        Ok(())
473    }
474
475    pub fn load(&self, id: &str) -> anyhow::Result<Option<ScanSnapshot>> {
476        use anyhow::Context;
477        let path = self.snapshot_path(id);
478        if !path.exists() {
479            return Ok(None);
480        }
481
482        let content = std::fs::read_to_string(&path)
483            .with_context(|| format!("Failed to read scan snapshot: {}", path.display()))?;
484        let snapshot = serde_json::from_str(&content)
485            .with_context(|| format!("Failed to parse scan snapshot: {}", path.display()))?;
486        Ok(Some(snapshot))
487    }
488
489    pub fn list(&self) -> anyhow::Result<Vec<ScanSnapshot>> {
490        let mut snapshots = Vec::new();
491
492        if !self.root.exists() {
493            return Ok(snapshots);
494        }
495
496        for entry in std::fs::read_dir(&self.root)? {
497            let entry = entry?;
498            let path = entry.path();
499
500            if path.extension().and_then(|extension| extension.to_str()) != Some("json") {
501                continue;
502            }
503
504            if let Ok(content) = std::fs::read_to_string(&path) {
505                if let Ok(snapshot) = serde_json::from_str::<ScanSnapshot>(&content) {
506                    snapshots.push(snapshot);
507                }
508            }
509        }
510
511        snapshots.sort_by(|left, right| right.timestamp.cmp(&left.timestamp));
512        Ok(snapshots)
513    }
514
515    fn snapshot_path(&self, id: &str) -> PathBuf {
516        self.root.join(format!("{id}.json"))
517    }
518}
519
520fn current_timestamp() -> u64 {
521    std::time::SystemTime::now()
522        .duration_since(std::time::UNIX_EPOCH)
523        .unwrap_or_default()
524        .as_secs()
525}
526
527fn current_timestamp_millis() -> u128 {
528    std::time::SystemTime::now()
529        .duration_since(std::time::UNIX_EPOCH)
530        .unwrap_or_default()
531        .as_millis()
532}
533
534impl ScanResult {
535    pub fn to_snapshot(&self, target_path: &Path) -> ScanSnapshot {
536        let id = format!("scan-{}", current_timestamp_millis());
537        let timestamp = current_timestamp();
538        let detected_frameworks = self.detection.frameworks.iter()
539            .map(|f| {
540                if let Some(v) = &f.version {
541                    format!("{} ({})", f.name, v)
542                } else {
543                    f.name.clone()
544                }
545            })
546            .collect();
547        let recipe_suggestions = self.detection.migration_opportunities.iter()
548            .map(|opp| opp.name.clone())
549            .collect();
550        ScanSnapshot {
551            id,
552            timestamp,
553            target_path: target_path.to_path_buf(),
554            detected_frameworks,
555            recipe_suggestions,
556            file_counts: ScanFileCounts {
557                total: self.total_files,
558                scanned: self.scanned_files.len(),
559                skipped: self.skipped_files.len(),
560            },
561            risk_counts: ScanRiskCounts {
562                total: self.detection.risky_areas.len(),
563            },
564        }
565    }
566}
567
568#[cfg(test)]
569mod tests {
570    use super::*;
571
572    #[test]
573    fn test_scanner_new() {
574        let scanner = Scanner::new(PathBuf::from("/tmp"));
575        assert_eq!(scanner.root, PathBuf::from("/tmp"));
576    }
577}