syncable_cli/common/
file_utils.rs

1use crate::analyzer::AnalysisConfig;
2use crate::error::{SecurityError, IaCGeneratorError};
3use std::fs;
4use std::path::{Path, PathBuf};
5use walkdir::{WalkDir, DirEntry};
6
7/// Validates a project path and ensures security
8pub fn validate_project_path(path: &Path) -> Result<PathBuf, IaCGeneratorError> {
9    let canonical = path.canonicalize()
10        .map_err(|_| SecurityError::InvalidPath(path.display().to_string()))?;
11    
12    // Basic validation - path should exist and be a directory
13    if !canonical.is_dir() {
14        return Err(SecurityError::InvalidPath(
15            "Path is not a directory".to_string()
16        ).into());
17    }
18    
19    Ok(canonical)
20}
21
22/// Collects project files based on configuration
23pub fn collect_project_files(
24    root: &Path,
25    config: &AnalysisConfig,
26) -> Result<Vec<PathBuf>, IaCGeneratorError> {
27    let mut files = Vec::new();
28    
29    for entry in WalkDir::new(root)
30        .follow_links(false)
31        .into_iter()
32        .filter_entry(|e| !is_ignored(e, &config.ignore_patterns, root))
33    {
34        let entry = entry?;
35        
36        if entry.file_type().is_file() {
37            let path = entry.path();
38            
39            // Check file size limit
40            if let Ok(metadata) = fs::metadata(path) {
41                if metadata.len() > config.max_file_size as u64 {
42                    log::debug!("Skipping large file: {}", path.display());
43                    continue;
44                }
45            }
46            
47            // Only include relevant file types
48            if is_relevant_file(path) {
49                files.push(path.to_path_buf());
50            }
51        }
52    }
53    
54    log::debug!("Collected {} relevant files", files.len());
55    Ok(files)
56}
57
58/// Checks if a directory entry should be ignored
59fn is_ignored(entry: &DirEntry, ignore_patterns: &[String], root: &Path) -> bool {
60    let path = entry.path();
61    
62    // Get the relative path from the root
63    let relative_path = match path.strip_prefix(root) {
64        Ok(rel) => rel,
65        Err(_) => return false, // If we can't get relative path, don't ignore
66    };
67    
68    // Check each component of the relative path
69    for component in relative_path.components() {
70        if let std::path::Component::Normal(name) = component {
71            if let Some(name_str) = name.to_str() {
72                // Check if this component matches any ignore pattern
73                for pattern in ignore_patterns {
74                    if name_str == pattern {
75                        return true;
76                    }
77                }
78                
79                // Ignore hidden files and directories (starting with .)
80                if name_str.starts_with('.') && name_str != ".env" {
81                    return true;
82                }
83            }
84        }
85    }
86    
87    false
88}
89
90/// Determines if a file is relevant for analysis
91fn is_relevant_file(path: &Path) -> bool {
92    let extension = path.extension()
93        .and_then(|ext| ext.to_str())
94        .unwrap_or("");
95    
96    let filename = path.file_name()
97        .and_then(|name| name.to_str())
98        .unwrap_or("");
99    
100    // Programming language files
101    let code_extensions = [
102        "rs", "go", "js", "ts", "jsx", "tsx", "py", "java", "kt", "scala",
103        "rb", "php", "cs", "fs", "cpp", "cc", "c", "h", "hpp", "swift",
104        "dart", "elm", "clj", "cljs", "hs", "ml", "ocaml", "r", "sh", "bash",
105        "zsh", "fish", "ps1", "bat", "cmd"
106    ];
107    
108    // Configuration and manifest files
109    let config_files = [
110        "package.json", "package-lock.json", "yarn.lock", "pnpm-lock.yaml",
111        "Cargo.toml", "Cargo.lock", "go.mod", "go.sum", "requirements.txt",
112        "Pipfile", "Pipfile.lock", "pyproject.toml", "setup.py", "setup.cfg",
113        "pom.xml", "build.gradle", "build.gradle.kts", "sbt", "build.sbt",
114        "Gemfile", "Gemfile.lock", "composer.json", "composer.lock",
115        "Dockerfile", "docker-compose.yml", "docker-compose.yaml",
116        ".dockerignore", "Makefile", "makefile", "CMakeLists.txt",
117        ".env", ".env.example", ".env.local", ".env.production",
118        "config.yml", "config.yaml", "config.json", "config.toml",
119        "app.yml", "app.yaml", "application.yml", "application.yaml",
120        "tsconfig.json", "jsconfig.json", ".eslintrc", ".eslintrc.json",
121        ".prettierrc", "webpack.config.js", "vite.config.js", "next.config.js",
122        "nuxt.config.js", "vue.config.js", "angular.json", ".angular-cli.json"
123    ];
124    
125    // Check by extension
126    if code_extensions.contains(&extension) {
127        return true;
128    }
129    
130    // Check by filename
131    if config_files.contains(&filename) {
132        return true;
133    }
134    
135    // Check for common configuration file patterns
136    if filename.ends_with(".config.js") ||
137       filename.ends_with(".config.ts") ||
138       filename.ends_with(".config.json") ||
139       filename.ends_with(".yml") ||
140       filename.ends_with(".yaml") ||
141       filename.ends_with(".toml") {
142        return true;
143    }
144    
145    false
146}
147
148/// Reads file content safely with size limits
149pub fn read_file_safe(path: &Path, max_size: usize) -> Result<String, IaCGeneratorError> {
150    let metadata = fs::metadata(path)?;
151    
152    if metadata.len() > max_size as u64 {
153        return Err(SecurityError::InvalidPath(
154            format!("File too large: {}", path.display())
155        ).into());
156    }
157    
158    Ok(fs::read_to_string(path)?)
159}
160
161/// Checks if a file exists and is readable
162pub fn is_readable_file(path: &Path) -> bool {
163    path.is_file() && fs::metadata(path).is_ok()
164}
165
166/// Gets the relative path from root to target
167pub fn get_relative_path(root: &Path, target: &Path) -> PathBuf {
168    target.strip_prefix(root)
169        .unwrap_or(target)
170        .to_path_buf()
171}
172
173/// Find files matching specific patterns using glob
174pub fn find_files_by_patterns(root: &Path, patterns: &[&str]) -> Result<Vec<PathBuf>, std::io::Error> {
175    use glob::glob;
176    let mut files = Vec::new();
177    
178    for pattern in patterns {
179        let full_pattern = root.join(pattern);
180        let pattern_str = full_pattern.to_string_lossy();
181        
182        // Use glob to find matching files
183        if let Ok(entries) = glob(&pattern_str) {
184            for entry in entries {
185                if let Ok(path) = entry {
186                    if path.is_file() {
187                        files.push(path);
188                    }
189                }
190            }
191        }
192    }
193    
194    // Also try recursive patterns
195    for pattern in patterns {
196        let recursive_pattern = root.join("**").join(pattern);
197        let pattern_str = recursive_pattern.to_string_lossy();
198        
199        if let Ok(entries) = glob(&pattern_str) {
200            for entry in entries {
201                if let Ok(path) = entry {
202                    if path.is_file() && !files.contains(&path) {
203                        files.push(path);
204                    }
205                }
206            }
207        }
208    }
209    
210    files.sort();
211    files.dedup();
212    Ok(files)
213}
214
215#[cfg(test)]
216mod tests {
217    use super::*;
218    use tempfile::TempDir;
219    use std::fs;
220    
221    #[test]
222    fn test_is_relevant_file() {
223        assert!(is_relevant_file(Path::new("src/main.rs")));
224        assert!(is_relevant_file(Path::new("package.json")));
225        assert!(is_relevant_file(Path::new("Dockerfile")));
226        assert!(!is_relevant_file(Path::new("README.md")));
227        assert!(!is_relevant_file(Path::new("image.png")));
228    }
229    
230    #[test]
231    fn test_validate_project_path() {
232        let temp_dir = TempDir::new().unwrap();
233        let path = temp_dir.path();
234        
235        let result = validate_project_path(path);
236        assert!(result.is_ok());
237    }
238    
239    #[test]
240    fn test_collect_project_files() {
241        let temp_dir = TempDir::new().unwrap();
242        let root = temp_dir.path();
243        
244        // Create test files
245        fs::write(root.join("main.rs"), "fn main() {}").unwrap();
246        fs::write(root.join("package.json"), "{}").unwrap();
247        fs::write(root.join("README.md"), "# Test").unwrap();
248        
249        let config = AnalysisConfig::default();
250        let files = collect_project_files(root, &config).unwrap();
251        
252        assert_eq!(files.len(), 2); // main.rs and package.json
253        assert!(files.iter().any(|f| f.file_name().unwrap() == "main.rs"));
254        assert!(files.iter().any(|f| f.file_name().unwrap() == "package.json"));
255    }
256}