Skip to main content

dissolve_python/
scanner.rs

1// Copyright (C) 2024 Jelmer Vernooij <jelmer@samba.org>
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//    http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15//! Fast scanner for @replace_me decorators.
16//!
17//! This module provides a fast pre-filter to skip files that definitely
18//! don't contain @replace_me decorators, avoiding expensive LibCST parsing.
19
20use anyhow::{Context, Result};
21use regex::Regex;
22use std::fs;
23use std::path::Path;
24
25/// Quick check if content might contain @replace_me decorators.
26///
27/// This is a fast pre-filter that uses regex to avoid parsing files
28/// that definitely don't contain @replace_me. It errs on the side of
29/// false positives to avoid missing any actual decorators.
30pub fn might_contain_replace_me(content: &str) -> bool {
31    // Use a static regex for performance
32    static RE: std::sync::OnceLock<Regex> = std::sync::OnceLock::new();
33    let re = RE.get_or_init(|| {
34        // Regex pattern to quickly check if a file might contain @replace_me
35        // This is intentionally broad to avoid false negatives
36        Regex::new(r"(?i)@?\breplace_me\b").unwrap()
37    });
38
39    re.is_match(content)
40}
41
42/// Read a file and return content if it might contain @replace_me.
43///
44/// # Arguments
45/// * `file_path` - Path to Python file
46///
47/// # Returns
48/// * `Ok(Some(content))` - File content if it might contain @replace_me
49/// * `Ok(None)` - File doesn't contain @replace_me
50/// * `Err(_)` - File cannot be read or is not valid UTF-8
51pub fn scan_file(file_path: &str) -> Result<Option<String>> {
52    let content = fs::read_to_string(file_path)
53        .with_context(|| format!("Failed to read file: {}", file_path))?;
54
55    if might_contain_replace_me(&content) {
56        Ok(Some(content))
57    } else {
58        Ok(None)
59    }
60}
61
62/// Iterator that yields files that might contain @replace_me decorators.
63///
64/// This iterator reads files and pre-filters them to avoid expensive parsing
65/// of files that definitely don't contain @replace_me decorators.
66pub fn find_files_with_replace_me<I>(file_paths: I) -> FindFilesIterator<I::IntoIter>
67where
68    I: IntoIterator,
69    I::Item: AsRef<Path>,
70{
71    FindFilesIterator {
72        paths: file_paths.into_iter(),
73    }
74}
75
76/// Iterator implementation for finding files with @replace_me
77pub struct FindFilesIterator<I> {
78    paths: I,
79}
80
81impl<I> Iterator for FindFilesIterator<I>
82where
83    I: Iterator,
84    I::Item: AsRef<Path>,
85{
86    type Item = Result<(String, String)>; // (file_path, content)
87
88    fn next(&mut self) -> Option<Self::Item> {
89        for path in &mut self.paths {
90            let path_str = path.as_ref().to_string_lossy().to_string();
91
92            match scan_file(&path_str) {
93                Ok(Some(content)) => return Some(Ok((path_str, content))),
94                Ok(None) => continue, // File doesn't contain @replace_me, skip
95                Err(e) => return Some(Err(e)),
96            }
97        }
98        None
99    }
100}
101
102/// Recursively find all Python files in a directory that might contain @replace_me
103pub fn find_python_files_with_replace_me(dir_path: &str) -> Result<Vec<(String, String)>> {
104    let mut results = Vec::new();
105    visit_directory(Path::new(dir_path), &mut results)?;
106    Ok(results)
107}
108
109fn visit_directory(dir: &Path, results: &mut Vec<(String, String)>) -> Result<()> {
110    if !dir.is_dir() {
111        return Ok(());
112    }
113
114    for entry in fs::read_dir(dir)? {
115        let entry = entry?;
116        let path = entry.path();
117
118        if path.is_dir() {
119            // Skip hidden directories and __pycache__
120            if let Some(name) = path.file_name() {
121                let name = name.to_string_lossy();
122                if !name.starts_with('.') && name != "__pycache__" {
123                    visit_directory(&path, results)?;
124                }
125            }
126        } else if path.extension().is_some_and(|ext| ext == "py") {
127            // Check if Python file contains @replace_me
128            let path_str = path.to_string_lossy().to_string();
129            if let Some(content) = scan_file(&path_str)? {
130                results.push((path_str, content));
131            }
132        }
133    }
134
135    Ok(())
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141    use std::io::Write;
142    use tempfile::NamedTempFile;
143
144    #[test]
145    fn test_might_contain_replace_me() {
146        assert!(might_contain_replace_me("@replace_me\ndef foo(): pass"));
147        assert!(might_contain_replace_me("from dissolve import replace_me"));
148        assert!(might_contain_replace_me("@dissolve.replace_me()"));
149        assert!(might_contain_replace_me("some text replace_me somewhere"));
150        assert!(!might_contain_replace_me("def regular_function(): pass"));
151        assert!(!might_contain_replace_me("# This file has no decorators"));
152    }
153
154    #[test]
155    fn test_scan_file_with_decorator() -> Result<()> {
156        let mut temp_file = NamedTempFile::new()?;
157        writeln!(temp_file, "@replace_me\ndef old_func(): pass")?;
158
159        let result = scan_file(temp_file.path().to_str().unwrap())?;
160        assert!(result.is_some());
161        assert!(result.unwrap().contains("@replace_me"));
162
163        Ok(())
164    }
165
166    #[test]
167    fn test_scan_file_without_decorator() -> Result<()> {
168        let mut temp_file = NamedTempFile::new()?;
169        writeln!(temp_file, "def regular_func(): pass")?;
170
171        let result = scan_file(temp_file.path().to_str().unwrap())?;
172        assert!(result.is_none());
173
174        Ok(())
175    }
176
177    #[test]
178    fn test_find_files_iterator() -> Result<()> {
179        // Create temp files
180        let mut temp1 = NamedTempFile::new()?;
181        let mut temp2 = NamedTempFile::new()?;
182        let mut temp3 = NamedTempFile::new()?;
183
184        writeln!(temp1, "@replace_me\ndef old_func(): pass")?;
185        writeln!(temp2, "def regular_func(): pass")?;
186        writeln!(temp3, "from dissolve import replace_me")?;
187
188        let paths = vec![
189            temp1.path().to_str().unwrap(),
190            temp2.path().to_str().unwrap(),
191            temp3.path().to_str().unwrap(),
192        ];
193
194        let results: Result<Vec<_>> = find_files_with_replace_me(paths).collect();
195        let results = results?;
196
197        // Should find temp1 and temp3, but not temp2
198        assert_eq!(results.len(), 2);
199        assert!(results
200            .iter()
201            .any(|(path, _)| path.contains(&temp1.path().to_string_lossy().to_string())));
202        assert!(results
203            .iter()
204            .any(|(path, _)| path.contains(&temp3.path().to_string_lossy().to_string())));
205        assert!(!results
206            .iter()
207            .any(|(path, _)| path.contains(&temp2.path().to_string_lossy().to_string())));
208
209        Ok(())
210    }
211
212    #[test]
213    fn test_case_insensitive_matching() {
214        // The regex should be case insensitive
215        assert!(might_contain_replace_me("@Replace_Me"));
216        assert!(might_contain_replace_me("@REPLACE_ME"));
217        assert!(might_contain_replace_me("Replace_Me somewhere"));
218    }
219
220    #[test]
221    fn test_word_boundary_matching() {
222        // Should match whole words only
223        assert!(might_contain_replace_me("replace_me"));
224        assert!(might_contain_replace_me("@replace_me()"));
225        assert!(might_contain_replace_me("import replace_me"));
226
227        // Should not match partial words (our regex should handle this)
228        // Note: Our current regex is intentionally broad, so this might match
229        // If we need stricter matching, we can adjust the regex
230    }
231}