morph-cli 0.1.0

AST-based codebase migration and codemod tool for JavaScript and TypeScript projects.
Documentation
use std::fs;
use std::path::{Path, PathBuf};

use anyhow::{Context, Result};

use super::schema::MorphCliSchema;

#[allow(dead_code)]
pub struct IgnoreHandler {
    exclude_patterns: Vec<String>,
    skip_large_files: bool,
    skip_minified: bool,
    skip_generated: bool,
}

impl IgnoreHandler {
    #[allow(dead_code)]
    pub fn new() -> Self {
        Self {
            exclude_patterns: Self::default_patterns(),
            skip_large_files: true,
            skip_minified: true,
            skip_generated: true,
        }
    }

    #[allow(dead_code)]
    pub fn from_schema(schema: &MorphCliSchema) -> Self {
        let mut patterns = schema.excluded_paths.clone();
        patterns.extend(Self::default_patterns());

        Self {
            exclude_patterns: patterns,
            skip_large_files: true,
            skip_minified: true,
            skip_generated: true,
        }
    }

    #[allow(dead_code)]
    fn default_patterns() -> Vec<String> {
        vec![
            "node_modules".to_string(),
            ".git".to_string(),
            "dist".to_string(),
            "build".to_string(),
            "target".to_string(),
            ".next".to_string(),
            ".nuxt".to_string(),
            "__pycache__".to_string(),
            ".venv".to_string(),
            "venv".to_string(),
            ".DS_Store".to_string(),
            "Thumbs.db".to_string(),
        ]
    }

    #[allow(dead_code)]
    pub fn should_ignore(&self, path: &Path) -> bool {
        let path_str = path.to_string_lossy();

        for pattern in &self.exclude_patterns {
            if path_str.contains(pattern.as_str()) {
                return true;
            }
        }

        if Self::has_gitignore(path) {
            return true;
        }

        false
    }

    #[allow(dead_code)]
    pub fn has_gitignore(path: &Path) -> bool {
        let mut current = path.parent().map(|p| p.to_path_buf());

        while let Some(mut dir) = current {
            let gitignore_path = dir.join(".gitignore");
            if gitignore_path.exists()
                && let Ok(content) = fs::read_to_string(&gitignore_path)
            {
                let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");

                for line in content.lines() {
                    let line = line.trim();
                    if line.is_empty() || line.starts_with('#') {
                        continue;
                    }

                    if line == file_name || line == format!("*/{}", file_name) {
                        return true;
                    }
                }
            }

            if !dir.pop() {
                break;
            }
            current = Some(dir);
        }

        false
    }

    #[allow(dead_code)]
    pub fn check_file(&self, path: &Path, content: &str, max_size_kb: usize) -> Option<String> {
        if self.should_ignore(path) {
            return Some("matches exclude pattern".to_string());
        }

        if let Ok(metadata) = fs::metadata(path) {
            if metadata.len() == 0 {
                return Some("empty file".to_string());
            }

            let size_kb = metadata.len() / 1024;
            if self.skip_large_files && size_kb > max_size_kb as u64 {
                return Some(format!(
                    "file size ({} KB) exceeds limit ({} KB)",
                    size_kb, max_size_kb
                ));
            }
        }

        if self.skip_minified && self.looks_minified(content) {
            return Some("minified file detected".to_string());
        }

        if self.skip_generated && self.looks_generated(content) {
            return Some("generated file detected".to_string());
        }

        if self.contains_binary(path, content) {
            return Some("binary content detected".to_string());
        }

        None
    }

    #[allow(dead_code)]
    fn looks_minified(&self, content: &str) -> bool {
        if content.len() < 1000 {
            return false;
        }

        let mut long_lines = 0;
        let mut total_lines = 0;

        for line in content.lines() {
            total_lines += 1;
            if line.len() > 500 {
                long_lines += 1;
            }
        }

        if total_lines == 0 {
            return false;
        }

        let ratio = long_lines as f64 / total_lines as f64;
        ratio > 0.3
    }

    #[allow(dead_code)]
    fn looks_generated(&self, content: &str) -> bool {
        let markers = [
            "// DO NOT EDIT",
            "// This file was generated",
            "@generated",
            "/* Generated by ",
            "Generated by ",
            "Auto-generated by ",
        ];

        for marker in &markers {
            if content.contains(marker) {
                return true;
            }
        }

        false
    }

    #[allow(dead_code)]
    fn contains_binary(&self, path: &Path, content: &str) -> bool {
        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");

        let binary_extensions = [
            "png", "jpg", "jpeg", "gif", "bmp", "ico", "webp", "pdf", "doc", "docx", "xls", "xlsx",
            "ppt", "pptx", "zip", "tar", "gz", "rar", "7z", "exe", "dll", "so", "dylib", "woff",
            "woff2", "ttf", "eot",
        ];

        if binary_extensions.contains(&ext) {
            return true;
        }

        for byte in content.bytes() {
            if byte == 0 {
                return true;
            }
        }

        false
    }
}

impl Default for IgnoreHandler {
    fn default() -> Self {
        Self::new()
    }
}

#[allow(dead_code)]
pub fn fast_walk(root: &Path, handler: &IgnoreHandler) -> Result<Vec<PathBuf>> {
    let mut files = Vec::new();

    if !root.exists() {
        anyhow::bail!("Path does not exist: {}", root.display());
    }

    let entries = fs::read_dir(root)
        .with_context(|| format!("Failed to read directory: {}", root.display()))?;

    for entry in entries {
        let entry = entry.with_context(|| "Failed to read directory entry")?;
        let path = entry.path();

        if path.is_dir() {
            if handler.should_ignore(&path) {
                continue;
            }

            let subdirs = fast_walk(&path, handler)?;
            files.extend(subdirs);
        } else {
            if !handler.should_ignore(&path) {
                files.push(path);
            }
        }
    }

    Ok(files)
}