Skip to main content

ai_refactor_cli/
scanner.rs

1//! Filesystem walker that applies rules to files.
2//!
3//! v0.2.0: Python files use tree-sitter AST detection (no more regex false
4//! positives from comments and string literals).  TypeScript files continue to
5//! use the v0.1.0 regex path until tree-sitter-typescript ships in v0.3.0.
6
7use anyhow::{Context, Result};
8use serde::Serialize;
9use std::fs;
10use std::path::Path;
11use walkdir::WalkDir;
12
13use crate::ast::python as py_ast;
14use crate::rules::{self, Rule};
15
16#[derive(Debug, Serialize)]
17pub struct Finding {
18    pub rule_id: String,
19    pub file: String,
20    pub line: usize,
21    pub snippet: String,
22}
23
24/// Recursively scan `path`, optionally restricted to a single rule id.
25pub fn scan_path(path: &str, rule_id: Option<&str>) -> Result<Vec<Finding>> {
26    let active_rules: Vec<&'static Rule> = match rule_id {
27        Some(id) => {
28            vec![rules::find_rule(id).with_context(|| format!("unknown rule id `{}`", id))?]
29        }
30        None => rules::RULES.iter().collect(),
31    };
32
33    let mut findings = Vec::new();
34    let root = Path::new(path);
35
36    let walker: Box<dyn Iterator<Item = walkdir::DirEntry>> = if root.is_file() {
37        Box::new(WalkDir::new(root).into_iter().filter_map(|e| e.ok()))
38    } else {
39        Box::new(
40            WalkDir::new(root)
41                .into_iter()
42                .filter_entry(|e| e.depth() == 0 || !is_hidden_or_vendor(e))
43                .filter_map(|e| e.ok()),
44        )
45    };
46
47    // Build a parser once per scan; it can be reused across files.
48    let mut py_parser = py_ast::make_parser()?;
49
50    for entry in walker {
51        if !entry.file_type().is_file() {
52            continue;
53        }
54        let ext = match entry.path().extension().and_then(|s| s.to_str()) {
55            Some(e) => e,
56            None => continue,
57        };
58        let rules_for_file: Vec<&'static Rule> = active_rules
59            .iter()
60            .copied()
61            .filter(|r| r.extensions.contains(&ext))
62            .collect();
63        if rules_for_file.is_empty() {
64            continue;
65        }
66
67        let file_path = entry.path().display().to_string();
68
69        if ext == "py" {
70            // ── AST-backed Python detection ──────────────────────────────────
71            let content = match fs::read(entry.path()) {
72                Ok(c) => c,
73                Err(_) => continue,
74            };
75            for rule in &rules_for_file {
76                let mut rule_findings = match rule.id {
77                    "python-missing-typing" => {
78                        py_ast::detect_missing_typing(&file_path, &content, &mut py_parser)
79                            .unwrap_or_default()
80                    }
81                    "django-fbv" => py_ast::detect_django_fbv(&file_path, &content, &mut py_parser)
82                        .unwrap_or_default(),
83                    _ => Vec::new(),
84                };
85                findings.append(&mut rule_findings);
86            }
87        } else {
88            // ── Regex-backed detection (TypeScript etc.) ─────────────────────
89            let content = match fs::read_to_string(entry.path()) {
90                Ok(c) => c,
91                Err(_) => continue,
92            };
93            for rule in &rules_for_file {
94                let re = rules::compile(rule);
95                for (lineno, line) in content.lines().enumerate() {
96                    if re.is_match(line) {
97                        findings.push(Finding {
98                            rule_id: rule.id.to_string(),
99                            file: file_path.clone(),
100                            line: lineno + 1,
101                            snippet: line.trim().to_string(),
102                        });
103                    }
104                }
105            }
106        }
107    }
108    Ok(findings)
109}
110
111fn is_hidden_or_vendor(entry: &walkdir::DirEntry) -> bool {
112    let name = entry.file_name().to_string_lossy();
113    name.starts_with('.')
114        || matches!(
115            name.as_ref(),
116            "node_modules" | "target" | "dist" | "build" | "__pycache__" | "venv" | ".venv"
117        )
118}
119
120#[cfg(test)]
121mod tests {
122    use super::*;
123    use std::io::Write;
124
125    fn tmp_file(name: &str, contents: &str) -> tempfile::TempDir {
126        let dir = tempfile::tempdir().unwrap();
127        let mut f = fs::File::create(dir.path().join(name)).unwrap();
128        f.write_all(contents.as_bytes()).unwrap();
129        dir
130    }
131
132    #[test]
133    fn scan_typescript_any() {
134        let dir = tmp_file("a.ts", "const x: any = 1;\nconst y: number = 2;\n");
135        let findings = scan_path(dir.path().to_str().unwrap(), Some("typescript-no-any")).unwrap();
136        assert_eq!(findings.len(), 1);
137        assert_eq!(findings[0].line, 1);
138        assert_eq!(findings[0].rule_id, "typescript-no-any");
139    }
140
141    #[test]
142    fn scan_python_untyped_def() {
143        let dir = tmp_file(
144            "v.py",
145            "def home(request):\n    return None\n\ndef get(self) -> int:\n    return 1\n",
146        );
147        let findings = scan_path(dir.path().to_str().unwrap(), None).unwrap();
148        // home() hits django-fbv
149        assert!(findings
150            .iter()
151            .any(|f| f.rule_id == "django-fbv" && f.line == 1));
152        // home(request) is also untyped (request has no annotation)
153        assert!(findings
154            .iter()
155            .any(|f| f.rule_id == "python-missing-typing" && f.line == 1));
156    }
157
158    #[test]
159    fn scan_python_comment_no_false_positive() {
160        // v0.1.0 regex would flag comments; v0.2.0 AST must not.
161        let dir = tmp_file(
162            "v.py",
163            "# def home(request):\ndef real(x: int) -> int:\n    return x\n",
164        );
165        let findings = scan_path(dir.path().to_str().unwrap(), None).unwrap();
166        assert!(
167            findings.is_empty(),
168            "no findings expected for comment-only FBV lookalike, got: {:?}",
169            findings
170        );
171    }
172
173    #[test]
174    fn unknown_rule_errors() {
175        let dir = tempfile::tempdir().unwrap();
176        assert!(scan_path(dir.path().to_str().unwrap(), Some("nope")).is_err());
177    }
178}