syncable_cli/common/
file_utils.rs

1use crate::analyzer::AnalysisConfig;
2use crate::error::{SecurityError, IaCGeneratorError};
3use std::fs;
4use std::path::{Path, PathBuf};
5use walkdir::{WalkDir, DirEntry};
6
7/// Validates a project path and ensures security
8pub fn validate_project_path(path: &Path) -> Result<PathBuf, IaCGeneratorError> {
9    // Try to canonicalize, but be more forgiving on Windows
10    let canonical = match path.canonicalize() {
11        Ok(p) => p,
12        Err(e) => {
13            // On Windows, canonicalize can fail for valid paths due to permissions
14            // Fall back to absolute path if the path exists
15            if path.exists() {
16                path.to_path_buf()
17            } else {
18                return Err(SecurityError::InvalidPath(
19                    format!("Invalid path '{}': {}", path.display(), e)
20                ).into());
21            }
22        }
23    };
24    
25    // Basic validation - path should exist and be a directory
26    if !canonical.is_dir() {
27        return Err(SecurityError::InvalidPath(
28            "Path is not a directory".to_string()
29        ).into());
30    }
31    
32    Ok(canonical)
33}
34
35/// Collects project files based on configuration
36pub fn collect_project_files(
37    root: &Path,
38    config: &AnalysisConfig,
39) -> Result<Vec<PathBuf>, IaCGeneratorError> {
40    let mut files = Vec::new();
41    
42    for entry in WalkDir::new(root)
43        .follow_links(false)
44        .into_iter()
45        .filter_entry(|e| !is_ignored(e, &config.ignore_patterns, root))
46    {
47        let entry = entry?;
48        
49        if entry.file_type().is_file() {
50            let path = entry.path();
51            
52            // Check file size limit
53            if let Ok(metadata) = fs::metadata(path) {
54                if metadata.len() > config.max_file_size as u64 {
55                    log::debug!("Skipping large file: {}", path.display());
56                    continue;
57                }
58            }
59            
60            // Only include relevant file types
61            if is_relevant_file(path) {
62                files.push(path.to_path_buf());
63            }
64        }
65    }
66    
67    log::debug!("Collected {} relevant files", files.len());
68    Ok(files)
69}
70
71/// Checks if a directory entry should be ignored
72fn is_ignored(entry: &DirEntry, ignore_patterns: &[String], root: &Path) -> bool {
73    let path = entry.path();
74    
75    // Get the relative path from the root
76    let relative_path = match path.strip_prefix(root) {
77        Ok(rel) => rel,
78        Err(_) => return false, // If we can't get relative path, don't ignore
79    };
80    
81    // Check each component of the relative path
82    for component in relative_path.components() {
83        if let std::path::Component::Normal(name) = component {
84            if let Some(name_str) = name.to_str() {
85                // Check if this component matches any ignore pattern
86                for pattern in ignore_patterns {
87                    if name_str == pattern {
88                        return true;
89                    }
90                }
91                
92                // Ignore hidden files and directories (starting with .)
93                if name_str.starts_with('.') && name_str != ".env" {
94                    return true;
95                }
96            }
97        }
98    }
99    
100    false
101}
102
103/// Determines if a file is relevant for analysis
104fn is_relevant_file(path: &Path) -> bool {
105    let extension = path.extension()
106        .and_then(|ext| ext.to_str())
107        .unwrap_or("");
108    
109    let filename = path.file_name()
110        .and_then(|name| name.to_str())
111        .unwrap_or("");
112    
113    // Programming language files
114    let code_extensions = [
115        "rs", "go", "js", "ts", "jsx", "tsx", "py", "java", "kt", "scala",
116        "rb", "php", "cs", "fs", "cpp", "cc", "c", "h", "hpp", "swift",
117        "dart", "elm", "clj", "cljs", "hs", "ml", "ocaml", "r", "sh", "bash",
118        "zsh", "fish", "ps1", "bat", "cmd"
119    ];
120    
121    // Configuration and manifest files
122    let config_files = [
123        "package.json", "package-lock.json", "yarn.lock", "pnpm-lock.yaml",
124        "Cargo.toml", "Cargo.lock", "go.mod", "go.sum", "requirements.txt",
125        "Pipfile", "Pipfile.lock", "pyproject.toml", "setup.py", "setup.cfg",
126        "pom.xml", "build.gradle", "build.gradle.kts", "sbt", "build.sbt",
127        "Gemfile", "Gemfile.lock", "composer.json", "composer.lock",
128        "Dockerfile", "docker-compose.yml", "docker-compose.yaml",
129        ".dockerignore", "Makefile", "makefile", "CMakeLists.txt",
130        ".env", ".env.example", ".env.local", ".env.production",
131        "config.yml", "config.yaml", "config.json", "config.toml",
132        "app.yml", "app.yaml", "application.yml", "application.yaml",
133        "tsconfig.json", "jsconfig.json", ".eslintrc", ".eslintrc.json",
134        ".prettierrc", "webpack.config.js", "vite.config.js", "next.config.js",
135        "nuxt.config.js", "vue.config.js", "angular.json", ".angular-cli.json"
136    ];
137    
138    // Check by extension
139    if code_extensions.contains(&extension) {
140        return true;
141    }
142    
143    // Check by filename
144    if config_files.contains(&filename) {
145        return true;
146    }
147    
148    // Check for common configuration file patterns
149    if filename.ends_with(".config.js") ||
150       filename.ends_with(".config.ts") ||
151       filename.ends_with(".config.json") ||
152       filename.ends_with(".yml") ||
153       filename.ends_with(".yaml") ||
154       filename.ends_with(".toml") {
155        return true;
156    }
157    
158    false
159}
160
161/// Reads file content safely with size limits
162pub fn read_file_safe(path: &Path, max_size: usize) -> Result<String, IaCGeneratorError> {
163    let metadata = fs::metadata(path)?;
164    
165    if metadata.len() > max_size as u64 {
166        return Err(SecurityError::InvalidPath(
167            format!("File too large: {}", path.display())
168        ).into());
169    }
170    
171    Ok(fs::read_to_string(path)?)
172}
173
174/// Checks if a file exists and is readable
175pub fn is_readable_file(path: &Path) -> bool {
176    path.is_file() && fs::metadata(path).is_ok()
177}
178
179/// Gets the relative path from root to target
180pub fn get_relative_path(root: &Path, target: &Path) -> PathBuf {
181    target.strip_prefix(root)
182        .unwrap_or(target)
183        .to_path_buf()
184}
185
186/// Find files matching specific patterns using glob
187pub fn find_files_by_patterns(root: &Path, patterns: &[&str]) -> Result<Vec<PathBuf>, std::io::Error> {
188    use glob::glob;
189    let mut files = Vec::new();
190    
191    for pattern in patterns {
192        // Use cross-platform path joining
193        let full_pattern = root.join(pattern);
194        let pattern_str = full_pattern.to_string_lossy();
195        
196        // Use glob to find matching files
197        if let Ok(entries) = glob(&pattern_str) {
198            for entry in entries {
199                if let Ok(path) = entry {
200                    if path.is_file() {
201                        files.push(path);
202                    }
203                }
204            }
205        }
206    }
207    
208    // Also try recursive patterns - use cross-platform glob patterns
209    for pattern in patterns {
210        // Use proper cross-platform recursive pattern
211        let recursive_pattern = if cfg!(windows) {
212            // Windows uses backslashes but glob understands forward slashes
213            root.join("**").join(pattern)
214        } else {
215            root.join("**").join(pattern)
216        };
217        let pattern_str = recursive_pattern.to_string_lossy().replace('\\', "/");
218        
219        if let Ok(entries) = glob(&pattern_str) {
220            for entry in entries {
221                if let Ok(path) = entry {
222                    if path.is_file() && !files.contains(&path) {
223                        files.push(path);
224                    }
225                }
226            }
227        }
228    }
229    
230    files.sort();
231    files.dedup();
232    Ok(files)
233}
234
235#[cfg(test)]
236mod tests {
237    use super::*;
238    use tempfile::TempDir;
239    use std::fs;
240    
241    #[test]
242    fn test_is_relevant_file() {
243        assert!(is_relevant_file(Path::new("src/main.rs")));
244        assert!(is_relevant_file(Path::new("package.json")));
245        assert!(is_relevant_file(Path::new("Dockerfile")));
246        assert!(!is_relevant_file(Path::new("README.md")));
247        assert!(!is_relevant_file(Path::new("image.png")));
248    }
249    
250    #[test]
251    fn test_validate_project_path() {
252        let temp_dir = TempDir::new().unwrap();
253        let path = temp_dir.path();
254        
255        let result = validate_project_path(path);
256        assert!(result.is_ok());
257    }
258    
259    #[test]
260    fn test_collect_project_files() {
261        let temp_dir = TempDir::new().unwrap();
262        let root = temp_dir.path();
263        
264        // Create test files
265        fs::write(root.join("main.rs"), "fn main() {}").unwrap();
266        fs::write(root.join("package.json"), "{}").unwrap();
267        fs::write(root.join("README.md"), "# Test").unwrap();
268        
269        let config = AnalysisConfig::default();
270        let files = collect_project_files(root, &config).unwrap();
271        
272        assert_eq!(files.len(), 2); // main.rs and package.json
273        assert!(files.iter().any(|f| f.file_name().unwrap() == "main.rs"));
274        assert!(files.iter().any(|f| f.file_name().unwrap() == "package.json"));
275    }
276}