syncable_cli/common/
file_utils.rs

1use crate::analyzer::AnalysisConfig;
2use crate::error::{IaCGeneratorError, SecurityError};
3use std::fs;
4use std::path::{Path, PathBuf};
5use walkdir::{DirEntry, WalkDir};
6
7/// Validates a project path and ensures security
8pub fn validate_project_path(path: &Path) -> Result<PathBuf, IaCGeneratorError> {
9    // Try to canonicalize, but be more forgiving on Windows
10    let canonical = match path.canonicalize() {
11        Ok(p) => p,
12        Err(e) => {
13            // On Windows, canonicalize can fail for valid paths due to permissions
14            // Fall back to absolute path if the path exists
15            if path.exists() {
16                path.to_path_buf()
17            } else {
18                return Err(SecurityError::InvalidPath(format!(
19                    "Invalid path '{}': {}",
20                    path.display(),
21                    e
22                ))
23                .into());
24            }
25        }
26    };
27
28    // Basic validation - path should exist and be a directory
29    if !canonical.is_dir() {
30        return Err(SecurityError::InvalidPath("Path is not a directory".to_string()).into());
31    }
32
33    Ok(canonical)
34}
35
36/// Collects project files based on configuration
37pub fn collect_project_files(
38    root: &Path,
39    config: &AnalysisConfig,
40) -> Result<Vec<PathBuf>, IaCGeneratorError> {
41    let mut files = Vec::new();
42
43    for entry in WalkDir::new(root)
44        .follow_links(false)
45        .into_iter()
46        .filter_entry(|e| !is_ignored(e, &config.ignore_patterns, root))
47    {
48        let entry = entry?;
49
50        if entry.file_type().is_file() {
51            let path = entry.path();
52
53            // Check file size limit
54            if let Ok(metadata) = fs::metadata(path)
55                && metadata.len() > config.max_file_size as u64
56            {
57                log::debug!("Skipping large file: {}", path.display());
58                continue;
59            }
60
61            // Only include relevant file types
62            if is_relevant_file(path) {
63                files.push(path.to_path_buf());
64            }
65        }
66    }
67
68    log::debug!("Collected {} relevant files", files.len());
69    Ok(files)
70}
71
72/// Checks if a directory entry should be ignored
73fn is_ignored(entry: &DirEntry, ignore_patterns: &[String], root: &Path) -> bool {
74    let path = entry.path();
75
76    // Get the relative path from the root
77    let relative_path = match path.strip_prefix(root) {
78        Ok(rel) => rel,
79        Err(_) => return false, // If we can't get relative path, don't ignore
80    };
81
82    // Check each component of the relative path
83    for component in relative_path.components() {
84        if let std::path::Component::Normal(name) = component
85            && let Some(name_str) = name.to_str()
86        {
87            // Check if this component matches any ignore pattern
88            for pattern in ignore_patterns {
89                if name_str == pattern {
90                    return true;
91                }
92            }
93
94            // Ignore hidden files and directories (starting with .)
95            if name_str.starts_with('.') && name_str != ".env" {
96                return true;
97            }
98        }
99    }
100
101    false
102}
103
104/// Determines if a file is relevant for analysis
105fn is_relevant_file(path: &Path) -> bool {
106    let extension = path.extension().and_then(|ext| ext.to_str()).unwrap_or("");
107
108    let filename = path
109        .file_name()
110        .and_then(|name| name.to_str())
111        .unwrap_or("");
112
113    // Programming language files
114    let code_extensions = [
115        "rs", "go", "js", "ts", "jsx", "tsx", "py", "java", "kt", "scala", "rb", "php", "cs", "fs",
116        "cpp", "cc", "c", "h", "hpp", "swift", "dart", "elm", "clj", "cljs", "hs", "ml", "ocaml",
117        "r", "sh", "bash", "zsh", "fish", "ps1", "bat", "cmd",
118    ];
119
120    // Configuration and manifest files
121    let config_files = [
122        "package.json",
123        "package-lock.json",
124        "yarn.lock",
125        "pnpm-lock.yaml",
126        "Cargo.toml",
127        "Cargo.lock",
128        "go.mod",
129        "go.sum",
130        "requirements.txt",
131        "Pipfile",
132        "Pipfile.lock",
133        "pyproject.toml",
134        "setup.py",
135        "setup.cfg",
136        "pom.xml",
137        "build.gradle",
138        "build.gradle.kts",
139        "sbt",
140        "build.sbt",
141        "Gemfile",
142        "Gemfile.lock",
143        "composer.json",
144        "composer.lock",
145        "Dockerfile",
146        "docker-compose.yml",
147        "docker-compose.yaml",
148        ".dockerignore",
149        "Makefile",
150        "makefile",
151        "CMakeLists.txt",
152        ".env",
153        ".env.example",
154        ".env.local",
155        ".env.production",
156        "config.yml",
157        "config.yaml",
158        "config.json",
159        "config.toml",
160        "app.yml",
161        "app.yaml",
162        "application.yml",
163        "application.yaml",
164        "tsconfig.json",
165        "jsconfig.json",
166        ".eslintrc",
167        ".eslintrc.json",
168        ".prettierrc",
169        "webpack.config.js",
170        "vite.config.js",
171        "next.config.js",
172        "nuxt.config.js",
173        "vue.config.js",
174        "angular.json",
175        ".angular-cli.json",
176    ];
177
178    // Check by extension
179    if code_extensions.contains(&extension) {
180        return true;
181    }
182
183    // Check by filename
184    if config_files.contains(&filename) {
185        return true;
186    }
187
188    // Check for common configuration file patterns
189    if filename.ends_with(".config.js")
190        || filename.ends_with(".config.ts")
191        || filename.ends_with(".config.json")
192        || filename.ends_with(".yml")
193        || filename.ends_with(".yaml")
194        || filename.ends_with(".toml")
195    {
196        return true;
197    }
198
199    false
200}
201
202/// Reads file content safely with size limits
203pub fn read_file_safe(path: &Path, max_size: usize) -> Result<String, IaCGeneratorError> {
204    let metadata = fs::metadata(path)?;
205
206    if metadata.len() > max_size as u64 {
207        return Err(
208            SecurityError::InvalidPath(format!("File too large: {}", path.display())).into(),
209        );
210    }
211
212    Ok(fs::read_to_string(path)?)
213}
214
215/// Checks if a file exists and is readable
216pub fn is_readable_file(path: &Path) -> bool {
217    path.is_file() && fs::metadata(path).is_ok()
218}
219
220/// Gets the relative path from root to target
221pub fn get_relative_path(root: &Path, target: &Path) -> PathBuf {
222    target.strip_prefix(root).unwrap_or(target).to_path_buf()
223}
224
225/// Find files matching specific patterns using glob
226pub fn find_files_by_patterns(
227    root: &Path,
228    patterns: &[&str],
229) -> Result<Vec<PathBuf>, std::io::Error> {
230    use glob::glob;
231    let mut files = Vec::new();
232
233    for pattern in patterns {
234        // Use cross-platform path joining
235        let full_pattern = root.join(pattern);
236        let pattern_str = full_pattern.to_string_lossy();
237
238        // Use glob to find matching files
239        if let Ok(entries) = glob(&pattern_str) {
240            for entry in entries {
241                if let Ok(path) = entry
242                    && path.is_file()
243                {
244                    files.push(path);
245                }
246            }
247        }
248    }
249
250    // Also try recursive patterns - use cross-platform glob patterns
251    for pattern in patterns {
252        // Use proper cross-platform recursive pattern
253        let recursive_pattern = if cfg!(windows) {
254            // Windows uses backslashes but glob understands forward slashes
255            root.join("**").join(pattern)
256        } else {
257            root.join("**").join(pattern)
258        };
259        let pattern_str = recursive_pattern.to_string_lossy().replace('\\', "/");
260
261        if let Ok(entries) = glob(&pattern_str) {
262            for entry in entries {
263                if let Ok(path) = entry
264                    && path.is_file()
265                    && !files.contains(&path)
266                {
267                    files.push(path);
268                }
269            }
270        }
271    }
272
273    files.sort();
274    files.dedup();
275    Ok(files)
276}
277
278#[cfg(test)]
279mod tests {
280    use super::*;
281    use std::fs;
282    use tempfile::TempDir;
283
284    #[test]
285    fn test_is_relevant_file() {
286        assert!(is_relevant_file(Path::new("src/main.rs")));
287        assert!(is_relevant_file(Path::new("package.json")));
288        assert!(is_relevant_file(Path::new("Dockerfile")));
289        assert!(!is_relevant_file(Path::new("README.md")));
290        assert!(!is_relevant_file(Path::new("image.png")));
291    }
292
293    #[test]
294    fn test_validate_project_path() {
295        let temp_dir = TempDir::new().unwrap();
296        let path = temp_dir.path();
297
298        let result = validate_project_path(path);
299        assert!(result.is_ok());
300    }
301
302    #[test]
303    fn test_collect_project_files() {
304        let temp_dir = TempDir::new().unwrap();
305        let root = temp_dir.path();
306
307        // Create test files
308        fs::write(root.join("main.rs"), "fn main() {}").unwrap();
309        fs::write(root.join("package.json"), "{}").unwrap();
310        fs::write(root.join("README.md"), "# Test").unwrap();
311
312        let config = AnalysisConfig::default();
313        let files = collect_project_files(root, &config).unwrap();
314
315        assert_eq!(files.len(), 2); // main.rs and package.json
316        assert!(files.iter().any(|f| f.file_name().unwrap() == "main.rs"));
317        assert!(
318            files
319                .iter()
320                .any(|f| f.file_name().unwrap() == "package.json")
321        );
322    }
323}