devist 0.1.1

Project bootstrap CLI for AI-assisted development. Spin up new projects from templates, manage backends, and keep your codebase comprehensible.
use anyhow::Result;
use serde::Serialize;
use std::collections::BTreeMap;
use std::fs;
use std::path::{Path, PathBuf};
use walkdir::WalkDir;

/// File-level summary: language guess, line count, byte size.
#[derive(Debug, Serialize, Clone)]
pub struct FileSummary {
    pub path: String,
    pub language: String,
    pub lines: usize,
    pub bytes: u64,
}

/// Aggregate by language.
#[derive(Debug, Serialize, Clone, Default)]
pub struct LanguageStats {
    pub files: usize,
    pub lines: usize,
    pub bytes: u64,
}

/// Top-level scan result.
#[derive(Debug, Serialize)]
pub struct ScanReport {
    pub root: String,
    pub total_files: usize,
    pub total_lines: usize,
    pub total_bytes: u64,
    pub languages: BTreeMap<String, LanguageStats>,
    pub top_files_by_size: Vec<FileSummary>,
    pub entry_points: Vec<String>,
    pub config_files: Vec<String>,
}

/// Files/directories that should never be scanned.
const SKIP_DIRS: &[&str] = &[
    "node_modules",
    "target",
    "dist",
    "build",
    ".next",
    ".nuxt",
    ".turbo",
    ".cache",
    ".venv",
    "venv",
    "__pycache__",
    ".pytest_cache",
    ".ruff_cache",
    ".git",
    ".idea",
    ".vscode",
    ".expo",
    ".dart_tool",
    "vendor",
    "Pods",
    "DerivedData",
];

const SKIP_FILES: &[&str] = &[
    ".DS_Store",
    "Cargo.lock",
    "package-lock.json",
    "pnpm-lock.yaml",
    "yarn.lock",
    "uv.lock",
];

/// Files that signal entry points / important code.
const ENTRY_POINT_NAMES: &[&str] = &[
    "main.rs",
    "main.py",
    "main.ts",
    "main.tsx",
    "main.dart",
    "main.go",
    "index.ts",
    "index.tsx",
    "index.js",
    "index.html",
    "app.module.ts",
    "App.tsx",
    "App.tsx.tmpl",
];

const CONFIG_FILE_NAMES: &[&str] = &[
    "Cargo.toml",
    "package.json",
    "pyproject.toml",
    "pubspec.yaml",
    "go.mod",
    "tsconfig.json",
    "vite.config.ts",
    "vite.config.js",
    "next.config.js",
    "next.config.ts",
    "nest-cli.json",
    "docker-compose.yml",
    "Dockerfile",
    "supabase/config.toml",
    ".env.example",
    "devist.toml",
];

pub fn scan(root: &Path) -> Result<ScanReport> {
    let mut languages: BTreeMap<String, LanguageStats> = BTreeMap::new();
    let mut all_files: Vec<FileSummary> = Vec::new();
    let mut entry_points: Vec<String> = Vec::new();
    let mut config_files: Vec<String> = Vec::new();

    let mut total_files = 0;
    let mut total_lines = 0;
    let mut total_bytes: u64 = 0;

    let walker = WalkDir::new(root).into_iter().filter_entry(|e| {
        if e.file_type().is_dir() {
            let name = e.file_name().to_string_lossy().to_string();
            !SKIP_DIRS.contains(&name.as_str())
        } else {
            true
        }
    });

    for entry in walker {
        let entry = match entry {
            Ok(e) => e,
            Err(_) => continue,
        };
        if !entry.file_type().is_file() {
            continue;
        }

        let path = entry.path();
        let file_name = match path.file_name().and_then(|n| n.to_str()) {
            Some(n) => n,
            None => continue,
        };
        if SKIP_FILES.contains(&file_name) {
            continue;
        }

        let rel = path.strip_prefix(root).unwrap_or(path).to_path_buf();
        let rel_str = rel.to_string_lossy().to_string();

        let metadata = match fs::metadata(path) {
            Ok(m) => m,
            Err(_) => continue,
        };
        let bytes = metadata.len();

        // Skip very large files (likely binary blobs)
        if bytes > 5 * 1024 * 1024 {
            continue;
        }

        let language = detect_language(path);
        let lines = if is_text_language(&language) {
            count_lines(path).unwrap_or(0)
        } else {
            0
        };

        let stats = languages.entry(language.clone()).or_default();
        stats.files += 1;
        stats.lines += lines;
        stats.bytes += bytes;

        total_files += 1;
        total_lines += lines;
        total_bytes += bytes;

        if ENTRY_POINT_NAMES.contains(&file_name) {
            entry_points.push(rel_str.clone());
        }
        if is_config_file(&rel) {
            config_files.push(rel_str.clone());
        }

        all_files.push(FileSummary {
            path: rel_str,
            language,
            lines,
            bytes,
        });
    }

    // Top 10 files by size
    all_files.sort_by_key(|f| std::cmp::Reverse(f.bytes));
    let top_files_by_size: Vec<FileSummary> = all_files.into_iter().take(10).collect();

    entry_points.sort();
    config_files.sort();

    Ok(ScanReport {
        root: root.to_string_lossy().to_string(),
        total_files,
        total_lines,
        total_bytes,
        languages,
        top_files_by_size,
        entry_points,
        config_files,
    })
}

fn count_lines(path: &Path) -> Result<usize> {
    let content = fs::read_to_string(path)?;
    Ok(content.lines().count())
}

fn detect_language(path: &Path) -> String {
    let ext = path
        .extension()
        .and_then(|e| e.to_str())
        .unwrap_or("")
        .to_lowercase();

    match ext.as_str() {
        "rs" => "Rust",
        "py" => "Python",
        "ts" | "tsx" => "TypeScript",
        "js" | "jsx" | "mjs" | "cjs" => "JavaScript",
        "dart" => "Dart",
        "go" => "Go",
        "java" => "Java",
        "kt" | "kts" => "Kotlin",
        "swift" => "Swift",
        "rb" => "Ruby",
        "php" => "PHP",
        "html" | "htm" => "HTML",
        "css" | "scss" | "sass" | "less" => "CSS",
        "md" => "Markdown",
        "json" => "JSON",
        "yaml" | "yml" => "YAML",
        "toml" => "TOML",
        "sh" | "bash" | "zsh" => "Shell",
        "sql" => "SQL",
        "tmpl" => detect_tmpl_language(path),
        "" => match path.file_name().and_then(|n| n.to_str()) {
            Some("Dockerfile") => "Dockerfile",
            Some(".env") | Some(".env.local") | Some(".env.example") => "Env",
            Some(".gitignore") => "Gitignore",
            _ => "Other",
        },
        _ => "Other",
    }
    .to_string()
}

/// For .tmpl files, detect language by looking at the inner extension.
/// e.g. "App.tsx.tmpl" → TypeScript
fn detect_tmpl_language(path: &Path) -> &'static str {
    if let Some(stem) = path.file_stem().and_then(|s| s.to_str()) {
        let inner = Path::new(stem);
        if let Some(ext) = inner.extension().and_then(|e| e.to_str()) {
            return match ext.to_lowercase().as_str() {
                "rs" => "Rust",
                "py" => "Python",
                "ts" | "tsx" => "TypeScript",
                "js" | "jsx" => "JavaScript",
                "dart" => "Dart",
                "html" => "HTML",
                "css" => "CSS",
                "json" => "JSON",
                "yaml" | "yml" => "YAML",
                "toml" => "TOML",
                "md" => "Markdown",
                _ => "Template",
            };
        }
    }
    "Template"
}

fn is_text_language(lang: &str) -> bool {
    !matches!(lang, "Other")
}

fn is_config_file(rel: &Path) -> bool {
    let s = rel.to_string_lossy();
    for name in CONFIG_FILE_NAMES {
        if s == *name || s.ends_with(&format!("/{}", name)) {
            return true;
        }
    }
    false
}

/// Build a tree-style listing of the directory, skipping the same dirs as scan.
/// Caps depth and total entries to keep output sane.
pub fn build_tree(root: &Path, max_depth: usize, max_entries: usize) -> Vec<String> {
    let mut lines = Vec::new();
    let mut count = 0;

    let walker = WalkDir::new(root)
        .max_depth(max_depth)
        .into_iter()
        .filter_entry(|e| {
            if e.file_type().is_dir() {
                let name = e.file_name().to_string_lossy().to_string();
                !SKIP_DIRS.contains(&name.as_str())
            } else {
                true
            }
        });

    for entry in walker.flatten() {
        if count >= max_entries {
            lines.push("  … (truncated)".to_string());
            break;
        }
        let depth = entry.depth();
        if depth == 0 {
            continue;
        }
        let name = entry.file_name().to_string_lossy().to_string();
        let prefix = "  ".repeat(depth);
        let suffix = if entry.file_type().is_dir() { "/" } else { "" };
        lines.push(format!("{}{}{}", prefix, name, suffix));
        count += 1;
    }

    lines
}

/// Public helper: format byte count as human-readable.
pub fn human_bytes(bytes: u64) -> String {
    let units = ["B", "KB", "MB", "GB"];
    let mut size = bytes as f64;
    let mut idx = 0;
    while size >= 1024.0 && idx < units.len() - 1 {
        size /= 1024.0;
        idx += 1;
    }
    if idx == 0 {
        format!("{} {}", bytes, units[0])
    } else {
        format!("{:.1} {}", size, units[idx])
    }
}

/// Find files matching a relative path or pattern (used by `explain`).
pub fn find_path(root: &Path, query: &str) -> Option<PathBuf> {
    // Try direct path
    let direct = root.join(query);
    if direct.exists() {
        return Some(direct);
    }

    // Search by filename match
    for entry in WalkDir::new(root).into_iter().flatten() {
        if !entry.file_type().is_file() {
            continue;
        }
        let name = entry.file_name().to_string_lossy().to_string();
        if name == query {
            return Some(entry.path().to_path_buf());
        }
    }
    None
}