#![allow(clippy::all, unused)]
use std::collections::HashMap;
use std::path::{Path, PathBuf};
use std::time::Instant;
use crate::core::detection::frameworks::Framework;
use crate::core::detection::package_json::PackageJson;
use crate::core::detection::workspace::{WorkspaceSummary, detect_workspaces};
use crate::core::detection::{
DetectedFramework, DetectionResult, MigrationOpportunity, ModuleSystem, RiskyArea,
};
pub struct Scanner {
root: PathBuf,
cache: HashMap<PathBuf, PackageJson>,
pub max_files: usize,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct ScannedFile {
pub path: PathBuf,
pub tags: Vec<String>,
}
#[derive(Debug, Clone, serde::Serialize)]
pub struct SkippedFileDiagnostic {
pub path: PathBuf,
pub reason: String,
}
impl Scanner {
pub fn new(root: PathBuf) -> Self {
Self {
root,
cache: HashMap::new(),
max_files: 10000,
}
}
pub fn with_max_files(mut self, max_files: usize) -> Self {
self.max_files = max_files;
self
}
pub fn scan(&mut self) -> ScanResult {
let start = Instant::now();
let pkg = self.load_package_json();
let mut frameworks = Vec::new();
let mut module_system = ModuleSystem::Mixed;
if let Some(ref p) = pkg {
for fw in Framework::all() {
if let Some(detected) = fw.detect(p) {
frameworks.push(detected);
}
}
module_system = if p.typ.as_deref() == Some("module") {
ModuleSystem::ESM
} else if p.dependencies.contains_key("ts-node") || p.dependencies.contains_key("tsx") {
ModuleSystem::ESM
} else {
ModuleSystem::CommonJS
};
}
let opportunities = self.suggest_recipes(&frameworks, &module_system);
let risky = self.find_risky_areas();
let workspace = detect_workspaces(&self.root);
let schema = crate::core::config::loader::load_config_for_path(&self.root)
.unwrap_or_default();
let ignore_handler = crate::core::config::ignore::IgnoreHandler::from_schema(&schema);
let mut scanned_files = Vec::new();
let mut skipped_files = Vec::new();
let mut total_files = 0;
for entry in walkdir::WalkDir::new(&self.root)
.into_iter()
.filter_entry(|e| {
let name = e.file_name().to_string_lossy();
name != "node_modules" && name != ".git" && name != "target" && name != "dist" && name != "build"
})
.filter_map(|e| e.ok())
{
if entry.file_type().is_file() {
total_files += 1;
if scanned_files.len() + skipped_files.len() >= self.max_files {
continue;
}
let path = entry.path();
let relative_path = path.strip_prefix(&self.root).unwrap_or(path).to_path_buf();
let path_str = relative_path.to_string_lossy();
let mut skip_reason = None;
if ignore_handler.should_ignore(path) {
if crate::core::config::ignore::IgnoreHandler::has_gitignore(path) {
skip_reason = Some(".gitignore".to_string());
} else {
let mut is_config_excluded = false;
for pattern in &schema.excluded_paths {
if path_str.contains(pattern) {
is_config_excluded = true;
break;
}
}
if is_config_excluded {
skip_reason = Some("morph-cli config exclusion".to_string());
} else {
skip_reason = Some("default exclusion".to_string());
}
}
} else if let Ok(metadata) = std::fs::metadata(path) {
if metadata.len() == 0 {
skip_reason = Some("empty file".to_string());
} else {
let size_kb = metadata.len() / 1024;
if size_kb > schema.max_file_size_kb as u64 {
skip_reason = Some(format!("size limit ({} KB)", schema.max_file_size_kb));
} else if let Ok(content) = std::fs::read_to_string(path) {
if let Some(reason) = ignore_handler.check_file(path, &content, schema.max_file_size_kb) {
if reason.contains("minified") {
skip_reason = Some("minified detection".to_string());
} else if reason.contains("generated") {
skip_reason = Some("generated detection".to_string());
} else if reason.contains("binary") {
skip_reason = Some("binary content".to_string());
} else {
skip_reason = Some(reason);
}
}
} else {
if let Some(reason) = ignore_handler.check_file(path, "\0", schema.max_file_size_kb) {
if reason.contains("binary") {
skip_reason = Some("binary content".to_string());
} else {
skip_reason = Some(reason);
}
}
}
}
}
if let Some(reason) = skip_reason {
skipped_files.push(SkippedFileDiagnostic {
path: relative_path,
reason,
});
} else {
let tags = crate::core::recipe::compute_tags_for_file(path, None, &[], false, false);
scanned_files.push(ScannedFile {
path: relative_path,
tags,
});
}
}
}
let elapsed = start.elapsed();
ScanResult {
root: self.root.clone(),
detection: DetectionResult {
frameworks,
module_system,
migration_opportunities: opportunities,
risky_areas: risky,
},
scan_time_ms: elapsed.as_millis() as u64,
cached: self.cache.len(),
total_files,
workspace,
scanned_files,
skipped_files,
}
}
fn load_package_json(&mut self) -> Option<PackageJson> {
let path = self.root.join("package.json");
if let Some(pkg) = PackageJson::load(&path) {
self.cache.insert(path, pkg.clone());
Some(pkg)
} else {
None
}
}
fn suggest_recipes(
&self,
frameworks: &[DetectedFramework],
_module_system: &ModuleSystem,
) -> Vec<MigrationOpportunity> {
let mut opportunities = Vec::new();
let has_cjs = frameworks.iter().any(|f| f.name == "CommonJS");
let has_express = frameworks.iter().any(|f| f.name == "Express");
let has_react = frameworks.iter().any(|f| f.name == "React");
let has_ts = frameworks.iter().any(|f| f.name == "TypeScript");
let has_no_ts = frameworks
.iter()
.all(|f| f.name != "TypeScript" && f.name != "CommonJS");
if has_cjs && has_express {
opportunities.push(MigrationOpportunity {
name: "CommonJS to ESM".into(),
description: "Migrate from require() to import statements".into(),
recipes: vec!["commonjs-to-esm".into()],
priority: 80,
});
}
if has_no_ts && has_react {
opportunities.push(MigrationOpportunity {
name: "JavaScript to TypeScript".into(),
description: "Add type safety to JavaScript files".into(),
recipes: vec!["js-to-ts".into()],
priority: 70,
});
}
if has_ts {
opportunities.push(MigrationOpportunity {
name: "TypeScript strict mode".into(),
description: "Enable strict type checking".into(),
recipes: vec![],
priority: 50,
});
}
opportunities
}
fn find_risky_areas(&self) -> Vec<RiskyArea> {
let mut risky = Vec::new();
for entry in walkdir::WalkDir::new(&self.root)
.max_depth(3)
.into_iter()
.filter_entry(|e| {
let name = e.file_name().to_string_lossy();
name != "node_modules" && name != ".git" && name != "target" && name != "dist" && name != "build"
})
.filter_map(|e| e.ok())
.take(self.max_files)
{
let path = entry.path();
let name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
if name == "node_modules" || name.starts_with('.') || name == "dist" || name == "build"
{
continue;
}
if let Some(ext) = path.extension().and_then(|e| e.to_str()) {
match ext {
"min.js" | "min.jsx" => {
risky.push(RiskyArea {
path: path.display().to_string(),
reason: "Minified file".into(),
severity: 60,
});
}
"bundle.js" | "chunk.js" => {
risky.push(RiskyArea {
path: path.display().to_string(),
reason: "Bundled output".into(),
severity: 70,
});
}
_ => {}
}
}
}
risky
}
}
#[derive(Debug)]
#[allow(unused)]
pub struct ScanResult {
pub root: PathBuf,
pub detection: DetectionResult,
pub scan_time_ms: u64,
pub cached: usize,
pub total_files: usize,
pub workspace: WorkspaceSummary,
pub scanned_files: Vec<ScannedFile>,
pub skipped_files: Vec<SkippedFileDiagnostic>,
}
impl ScanResult {
pub fn print_summary(&self, tag_filter: Option<&str>, verbose: bool) {
if self.scanned_files.is_empty() {
use colored::Colorize;
println!();
println!("{}", "✨ Welcome to morph-cli! ✨".bold().cyan());
println!("{}", "═".repeat(60).cyan());
println!("{}", "⚠️ No scanned files detected in this directory!".yellow().bold());
println!(" Make sure your project contains Javascript or TypeScript source files.");
println!(" Supported extensions: .js, .ts, .jsx, .tsx, .cjs, .mjs");
println!();
println!("{}", "💡 Quick Onboarding Guide:".bold().yellow());
println!(" 1. Place some JavaScript or TypeScript files in this directory.");
println!(" 2. Run `morph init` to generate a `morph-cli.toml` config file.");
println!(" 3. Run `morph list` to explore all built-in modernization recipes.");
println!();
println!("{}", "🚀 Beginner-Safe Recommendations:".bold().green());
println!(" - To migrate CommonJS require statements to modern ESM imports:");
println!(" {}", "morph run commonjs-to-esm . --dry-run".bold().cyan());
println!(" - To upgrade JavaScript files to TypeScript safely:");
println!(" {}", "morph run js-to-ts . --dry-run".bold().cyan());
println!(" - To preview a preset workflow impact:");
println!(" {}", "morph preset run modern-js .".bold().cyan());
println!();
println!("{}", "👉 Next-Step Hints:".bold().magenta());
println!(" - Run `morph magic` to start our guided, step-by-step interactive assistant!");
println!(" - Run `morph ignored` to check why any files are being skipped.");
println!("{}", "═".repeat(60).cyan());
println!();
return;
}
println!();
println!(" Scan time: {}ms", self.scan_time_ms);
println!();
println!(" Frameworks: {}", self.detection.frameworks.len());
for fw in &self.detection.frameworks {
println!(" - {} ({}%)", fw.name, fw.confidence);
if let Some(v) = &fw.version {
println!(" version: {}", v);
}
}
println!();
println!(" Module system: {:?}", self.detection.module_system);
println!();
if let Some(tag) = tag_filter {
println!(" Files matching tag '{}':", tag);
let filtered: Vec<_> = self.scanned_files.iter()
.filter(|f| f.tags.iter().any(|t| t == tag))
.collect();
if filtered.is_empty() {
println!(" No files found.");
} else {
for f in filtered {
println!(" - {} [{}]", f.path.display(), f.tags.join(", "));
}
}
} else {
println!(" Analyzed Files & Tags:");
for f in self.scanned_files.iter().take(50) {
println!(" - {} [{}]", f.path.display(), f.tags.join(", "));
}
if self.scanned_files.len() > 50 {
println!(" ... and {} more files", self.scanned_files.len() - 50);
}
}
if self.workspace.is_workspace() {
println!();
println!(" Workspaces:");
println!(
" managers: {}",
self.workspace
.managers
.iter()
.map(|manager| format!("{:?}", manager).to_lowercase())
.collect::<Vec<_>>()
.join(", ")
);
println!(" packages: {}", self.workspace.packages.len());
for package in &self.workspace.packages {
println!(" - {} ({})", package.name, package.path.display());
}
}
println!();
println!(
" Migration opportunities: {}",
self.detection.migration_opportunities.len()
);
for opp in &self.detection.migration_opportunities {
println!(" - {} (priority: {})", opp.name, opp.priority);
}
if !self.detection.risky_areas.is_empty() {
println!();
println!(" Risky areas: {}", self.detection.risky_areas.len());
}
if !self.skipped_files.is_empty() {
println!();
println!(" Ignored/Skipped Files Diagnostics:");
let mut counts = std::collections::BTreeMap::new();
for file in &self.skipped_files {
*counts.entry(&file.reason).or_insert(0) += 1;
}
for (reason, count) in &counts {
println!(" - {} file(s) skipped due to {}", count, reason);
}
if verbose {
println!();
println!(" Detailed Ignored/Skipped Files:");
for file in &self.skipped_files {
println!(" - {} ({})", file.path.display(), file.reason);
}
}
}
println!();
println!(" Project Fingerprint:");
println!(" status: updated");
println!(" path: .morph-cli/project.json");
}
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct ScanFileCounts {
pub total: usize,
pub scanned: usize,
pub skipped: usize,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct ScanRiskCounts {
pub total: usize,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct ScanSnapshot {
pub id: String,
pub timestamp: u64,
pub target_path: PathBuf,
pub detected_frameworks: Vec<String>,
pub recipe_suggestions: Vec<String>,
pub file_counts: ScanFileCounts,
pub risk_counts: ScanRiskCounts,
}
const SCAN_DIR: &str = ".morph-cli/scans";
pub struct ScanSnapshotStore {
root: PathBuf,
}
impl ScanSnapshotStore {
pub fn new(project_root: &Path) -> Self {
Self {
root: project_root.join(SCAN_DIR),
}
}
pub fn save(&self, snapshot: &ScanSnapshot) -> anyhow::Result<()> {
use anyhow::Context;
std::fs::create_dir_all(&self.root).with_context(|| {
format!(
"Failed to create scan snapshots directory: {}",
self.root.display()
)
})?;
let path = self.snapshot_path(&snapshot.id);
let json = serde_json::to_string_pretty(snapshot)
.context("Failed to serialize scan snapshot")?;
std::fs::write(&path, json)
.with_context(|| format!("Failed to write scan snapshot: {}", path.display()))?;
Ok(())
}
pub fn load(&self, id: &str) -> anyhow::Result<Option<ScanSnapshot>> {
use anyhow::Context;
let path = self.snapshot_path(id);
if !path.exists() {
return Ok(None);
}
let content = std::fs::read_to_string(&path)
.with_context(|| format!("Failed to read scan snapshot: {}", path.display()))?;
let snapshot = serde_json::from_str(&content)
.with_context(|| format!("Failed to parse scan snapshot: {}", path.display()))?;
Ok(Some(snapshot))
}
pub fn list(&self) -> anyhow::Result<Vec<ScanSnapshot>> {
let mut snapshots = Vec::new();
if !self.root.exists() {
return Ok(snapshots);
}
for entry in std::fs::read_dir(&self.root)? {
let entry = entry?;
let path = entry.path();
if path.extension().and_then(|extension| extension.to_str()) != Some("json") {
continue;
}
if let Ok(content) = std::fs::read_to_string(&path) {
if let Ok(snapshot) = serde_json::from_str::<ScanSnapshot>(&content) {
snapshots.push(snapshot);
}
}
}
snapshots.sort_by(|left, right| right.timestamp.cmp(&left.timestamp));
Ok(snapshots)
}
fn snapshot_path(&self, id: &str) -> PathBuf {
self.root.join(format!("{id}.json"))
}
}
fn current_timestamp() -> u64 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_secs()
}
fn current_timestamp_millis() -> u128 {
std::time::SystemTime::now()
.duration_since(std::time::UNIX_EPOCH)
.unwrap_or_default()
.as_millis()
}
impl ScanResult {
pub fn to_snapshot(&self, target_path: &Path) -> ScanSnapshot {
let id = format!("scan-{}", current_timestamp_millis());
let timestamp = current_timestamp();
let detected_frameworks = self.detection.frameworks.iter()
.map(|f| {
if let Some(v) = &f.version {
format!("{} ({})", f.name, v)
} else {
f.name.clone()
}
})
.collect();
let recipe_suggestions = self.detection.migration_opportunities.iter()
.map(|opp| opp.name.clone())
.collect();
ScanSnapshot {
id,
timestamp,
target_path: target_path.to_path_buf(),
detected_frameworks,
recipe_suggestions,
file_counts: ScanFileCounts {
total: self.total_files,
scanned: self.scanned_files.len(),
skipped: self.skipped_files.len(),
},
risk_counts: ScanRiskCounts {
total: self.detection.risky_areas.len(),
},
}
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_scanner_new() {
let scanner = Scanner::new(PathBuf::from("/tmp"));
assert_eq!(scanner.root, PathBuf::from("/tmp"));
}
}