ai-refactor-cli 0.2.0

Rule-based legacy code refactoring CLI (TypeScript any / Python typing / Django FBV→CBV). Complement to general AI coding agents.
Documentation
//! Filesystem walker that applies rules to files.
//!
//! v0.2.0: Python files use tree-sitter AST detection (no more regex false
//! positives from comments and string literals).  TypeScript files continue to
//! use the v0.1.0 regex path until tree-sitter-typescript ships in v0.3.0.

use anyhow::{Context, Result};
use serde::Serialize;
use std::fs;
use std::path::Path;
use walkdir::WalkDir;

use crate::ast::python as py_ast;
use crate::rules::{self, Rule};

#[derive(Debug, Serialize)]
pub struct Finding {
    pub rule_id: String,
    pub file: String,
    pub line: usize,
    pub snippet: String,
}

/// Recursively scan `path`, optionally restricted to a single rule id.
pub fn scan_path(path: &str, rule_id: Option<&str>) -> Result<Vec<Finding>> {
    let active_rules: Vec<&'static Rule> = match rule_id {
        Some(id) => {
            vec![rules::find_rule(id).with_context(|| format!("unknown rule id `{}`", id))?]
        }
        None => rules::RULES.iter().collect(),
    };

    let mut findings = Vec::new();
    let root = Path::new(path);

    let walker: Box<dyn Iterator<Item = walkdir::DirEntry>> = if root.is_file() {
        Box::new(WalkDir::new(root).into_iter().filter_map(|e| e.ok()))
    } else {
        Box::new(
            WalkDir::new(root)
                .into_iter()
                .filter_entry(|e| e.depth() == 0 || !is_hidden_or_vendor(e))
                .filter_map(|e| e.ok()),
        )
    };

    // Build a parser once per scan; it can be reused across files.
    let mut py_parser = py_ast::make_parser()?;

    for entry in walker {
        if !entry.file_type().is_file() {
            continue;
        }
        let ext = match entry.path().extension().and_then(|s| s.to_str()) {
            Some(e) => e,
            None => continue,
        };
        let rules_for_file: Vec<&'static Rule> = active_rules
            .iter()
            .copied()
            .filter(|r| r.extensions.contains(&ext))
            .collect();
        if rules_for_file.is_empty() {
            continue;
        }

        let file_path = entry.path().display().to_string();

        if ext == "py" {
            // ── AST-backed Python detection ──────────────────────────────────
            let content = match fs::read(entry.path()) {
                Ok(c) => c,
                Err(_) => continue,
            };
            for rule in &rules_for_file {
                let mut rule_findings = match rule.id {
                    "python-missing-typing" => {
                        py_ast::detect_missing_typing(&file_path, &content, &mut py_parser)
                            .unwrap_or_default()
                    }
                    "django-fbv" => py_ast::detect_django_fbv(&file_path, &content, &mut py_parser)
                        .unwrap_or_default(),
                    _ => Vec::new(),
                };
                findings.append(&mut rule_findings);
            }
        } else {
            // ── Regex-backed detection (TypeScript etc.) ─────────────────────
            let content = match fs::read_to_string(entry.path()) {
                Ok(c) => c,
                Err(_) => continue,
            };
            for rule in &rules_for_file {
                let re = rules::compile(rule);
                for (lineno, line) in content.lines().enumerate() {
                    if re.is_match(line) {
                        findings.push(Finding {
                            rule_id: rule.id.to_string(),
                            file: file_path.clone(),
                            line: lineno + 1,
                            snippet: line.trim().to_string(),
                        });
                    }
                }
            }
        }
    }
    Ok(findings)
}

fn is_hidden_or_vendor(entry: &walkdir::DirEntry) -> bool {
    let name = entry.file_name().to_string_lossy();
    name.starts_with('.')
        || matches!(
            name.as_ref(),
            "node_modules" | "target" | "dist" | "build" | "__pycache__" | "venv" | ".venv"
        )
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Write;

    fn tmp_file(name: &str, contents: &str) -> tempfile::TempDir {
        let dir = tempfile::tempdir().unwrap();
        let mut f = fs::File::create(dir.path().join(name)).unwrap();
        f.write_all(contents.as_bytes()).unwrap();
        dir
    }

    #[test]
    fn scan_typescript_any() {
        let dir = tmp_file("a.ts", "const x: any = 1;\nconst y: number = 2;\n");
        let findings = scan_path(dir.path().to_str().unwrap(), Some("typescript-no-any")).unwrap();
        assert_eq!(findings.len(), 1);
        assert_eq!(findings[0].line, 1);
        assert_eq!(findings[0].rule_id, "typescript-no-any");
    }

    #[test]
    fn scan_python_untyped_def() {
        let dir = tmp_file(
            "v.py",
            "def home(request):\n    return None\n\ndef get(self) -> int:\n    return 1\n",
        );
        let findings = scan_path(dir.path().to_str().unwrap(), None).unwrap();
        // home() hits django-fbv
        assert!(findings
            .iter()
            .any(|f| f.rule_id == "django-fbv" && f.line == 1));
        // home(request) is also untyped (request has no annotation)
        assert!(findings
            .iter()
            .any(|f| f.rule_id == "python-missing-typing" && f.line == 1));
    }

    #[test]
    fn scan_python_comment_no_false_positive() {
        // v0.1.0 regex would flag comments; v0.2.0 AST must not.
        let dir = tmp_file(
            "v.py",
            "# def home(request):\ndef real(x: int) -> int:\n    return x\n",
        );
        let findings = scan_path(dir.path().to_str().unwrap(), None).unwrap();
        assert!(
            findings.is_empty(),
            "no findings expected for comment-only FBV lookalike, got: {:?}",
            findings
        );
    }

    #[test]
    fn unknown_rule_errors() {
        let dir = tempfile::tempdir().unwrap();
        assert!(scan_path(dir.path().to_str().unwrap(), Some("nope")).is_err());
    }
}