Skip to main content

morph_cli/core/config/
ignore.rs

1use std::fs;
2use std::path::{Path, PathBuf};
3
4use anyhow::{Context, Result};
5
6use super::schema::MorphCliSchema;
7
8#[allow(dead_code)]
9pub struct IgnoreHandler {
10    exclude_patterns: Vec<String>,
11    skip_large_files: bool,
12    skip_minified: bool,
13    skip_generated: bool,
14}
15
16impl IgnoreHandler {
17    #[allow(dead_code)]
18    pub fn new() -> Self {
19        Self {
20            exclude_patterns: Self::default_patterns(),
21            skip_large_files: true,
22            skip_minified: true,
23            skip_generated: true,
24        }
25    }
26
27    #[allow(dead_code)]
28    pub fn from_schema(schema: &MorphCliSchema) -> Self {
29        let mut patterns = schema.excluded_paths.clone();
30        patterns.extend(Self::default_patterns());
31
32        Self {
33            exclude_patterns: patterns,
34            skip_large_files: true,
35            skip_minified: true,
36            skip_generated: true,
37        }
38    }
39
40    #[allow(dead_code)]
41    fn default_patterns() -> Vec<String> {
42        vec![
43            "node_modules".to_string(),
44            ".git".to_string(),
45            "dist".to_string(),
46            "build".to_string(),
47            "target".to_string(),
48            ".next".to_string(),
49            ".nuxt".to_string(),
50            "__pycache__".to_string(),
51            ".venv".to_string(),
52            "venv".to_string(),
53            ".DS_Store".to_string(),
54            "Thumbs.db".to_string(),
55        ]
56    }
57
58    #[allow(dead_code)]
59    pub fn should_ignore(&self, path: &Path) -> bool {
60        let path_str = path.to_string_lossy();
61
62        for pattern in &self.exclude_patterns {
63            if path_str.contains(pattern.as_str()) {
64                return true;
65            }
66        }
67
68        if Self::has_gitignore(path) {
69            return true;
70        }
71
72        false
73    }
74
75    #[allow(dead_code)]
76    pub fn has_gitignore(path: &Path) -> bool {
77        let mut current = path.parent().map(|p| p.to_path_buf());
78
79        while let Some(mut dir) = current {
80            let gitignore_path = dir.join(".gitignore");
81            if gitignore_path.exists()
82                && let Ok(content) = fs::read_to_string(&gitignore_path)
83            {
84                let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
85
86                for line in content.lines() {
87                    let line = line.trim();
88                    if line.is_empty() || line.starts_with('#') {
89                        continue;
90                    }
91
92                    if line == file_name || line == format!("*/{}", file_name) {
93                        return true;
94                    }
95                }
96            }
97
98            if !dir.pop() {
99                break;
100            }
101            current = Some(dir);
102        }
103
104        false
105    }
106
107    #[allow(dead_code)]
108    pub fn check_file(&self, path: &Path, content: &str, max_size_kb: usize) -> Option<String> {
109        if self.should_ignore(path) {
110            return Some("matches exclude pattern".to_string());
111        }
112
113        if let Ok(metadata) = fs::metadata(path) {
114            if metadata.len() == 0 {
115                return Some("empty file".to_string());
116            }
117
118            let size_kb = metadata.len() / 1024;
119            if self.skip_large_files && size_kb > max_size_kb as u64 {
120                return Some(format!(
121                    "file size ({} KB) exceeds limit ({} KB)",
122                    size_kb, max_size_kb
123                ));
124            }
125        }
126
127        if self.skip_minified && self.looks_minified(content) {
128            return Some("minified file detected".to_string());
129        }
130
131        if self.skip_generated && self.looks_generated(content) {
132            return Some("generated file detected".to_string());
133        }
134
135        if self.contains_binary(path, content) {
136            return Some("binary content detected".to_string());
137        }
138
139        None
140    }
141
142    #[allow(dead_code)]
143    fn looks_minified(&self, content: &str) -> bool {
144        if content.len() < 1000 {
145            return false;
146        }
147
148        let mut long_lines = 0;
149        let mut total_lines = 0;
150
151        for line in content.lines() {
152            total_lines += 1;
153            if line.len() > 500 {
154                long_lines += 1;
155            }
156        }
157
158        if total_lines == 0 {
159            return false;
160        }
161
162        let ratio = long_lines as f64 / total_lines as f64;
163        ratio > 0.3
164    }
165
166    #[allow(dead_code)]
167    fn looks_generated(&self, content: &str) -> bool {
168        let markers = [
169            "// DO NOT EDIT",
170            "// This file was generated",
171            "@generated",
172            "/* Generated by ",
173            "Generated by ",
174            "Auto-generated by ",
175        ];
176
177        for marker in &markers {
178            if content.contains(marker) {
179                return true;
180            }
181        }
182
183        false
184    }
185
186    #[allow(dead_code)]
187    fn contains_binary(&self, path: &Path, content: &str) -> bool {
188        let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
189
190        let binary_extensions = [
191            "png", "jpg", "jpeg", "gif", "bmp", "ico", "webp", "pdf", "doc", "docx", "xls", "xlsx",
192            "ppt", "pptx", "zip", "tar", "gz", "rar", "7z", "exe", "dll", "so", "dylib", "woff",
193            "woff2", "ttf", "eot",
194        ];
195
196        if binary_extensions.contains(&ext) {
197            return true;
198        }
199
200        for byte in content.bytes() {
201            if byte == 0 {
202                return true;
203            }
204        }
205
206        false
207    }
208}
209
210impl Default for IgnoreHandler {
211    fn default() -> Self {
212        Self::new()
213    }
214}
215
216#[allow(dead_code)]
217pub fn fast_walk(root: &Path, handler: &IgnoreHandler) -> Result<Vec<PathBuf>> {
218    let mut files = Vec::new();
219
220    if !root.exists() {
221        anyhow::bail!("Path does not exist: {}", root.display());
222    }
223
224    let entries = fs::read_dir(root)
225        .with_context(|| format!("Failed to read directory: {}", root.display()))?;
226
227    for entry in entries {
228        let entry = entry.with_context(|| "Failed to read directory entry")?;
229        let path = entry.path();
230
231        if path.is_dir() {
232            if handler.should_ignore(&path) {
233                continue;
234            }
235
236            let subdirs = fast_walk(&path, handler)?;
237            files.extend(subdirs);
238        } else {
239            if !handler.should_ignore(&path) {
240                files.push(path);
241            }
242        }
243    }
244
245    Ok(files)
246}