morph_cli/core/config/
ignore.rs1use std::fs;
2use std::path::{Path, PathBuf};
3
4use anyhow::{Context, Result};
5
6use super::schema::MorphCliSchema;
7
8#[allow(dead_code)]
9pub struct IgnoreHandler {
10 exclude_patterns: Vec<String>,
11 skip_large_files: bool,
12 skip_minified: bool,
13 skip_generated: bool,
14}
15
16impl IgnoreHandler {
17 #[allow(dead_code)]
18 pub fn new() -> Self {
19 Self {
20 exclude_patterns: Self::default_patterns(),
21 skip_large_files: true,
22 skip_minified: true,
23 skip_generated: true,
24 }
25 }
26
27 #[allow(dead_code)]
28 pub fn from_schema(schema: &MorphCliSchema) -> Self {
29 let mut patterns = schema.excluded_paths.clone();
30 patterns.extend(Self::default_patterns());
31
32 Self {
33 exclude_patterns: patterns,
34 skip_large_files: true,
35 skip_minified: true,
36 skip_generated: true,
37 }
38 }
39
40 #[allow(dead_code)]
41 fn default_patterns() -> Vec<String> {
42 vec![
43 "node_modules".to_string(),
44 ".git".to_string(),
45 "dist".to_string(),
46 "build".to_string(),
47 "target".to_string(),
48 ".next".to_string(),
49 ".nuxt".to_string(),
50 "__pycache__".to_string(),
51 ".venv".to_string(),
52 "venv".to_string(),
53 ".DS_Store".to_string(),
54 "Thumbs.db".to_string(),
55 ]
56 }
57
58 #[allow(dead_code)]
59 pub fn should_ignore(&self, path: &Path) -> bool {
60 let path_str = path.to_string_lossy();
61
62 for pattern in &self.exclude_patterns {
63 if path_str.contains(pattern.as_str()) {
64 return true;
65 }
66 }
67
68 if Self::has_gitignore(path) {
69 return true;
70 }
71
72 false
73 }
74
75 #[allow(dead_code)]
76 pub fn has_gitignore(path: &Path) -> bool {
77 let mut current = path.parent().map(|p| p.to_path_buf());
78
79 while let Some(mut dir) = current {
80 let gitignore_path = dir.join(".gitignore");
81 if gitignore_path.exists()
82 && let Ok(content) = fs::read_to_string(&gitignore_path)
83 {
84 let file_name = path.file_name().and_then(|n| n.to_str()).unwrap_or("");
85
86 for line in content.lines() {
87 let line = line.trim();
88 if line.is_empty() || line.starts_with('#') {
89 continue;
90 }
91
92 if line == file_name || line == format!("*/{}", file_name) {
93 return true;
94 }
95 }
96 }
97
98 if !dir.pop() {
99 break;
100 }
101 current = Some(dir);
102 }
103
104 false
105 }
106
107 #[allow(dead_code)]
108 pub fn check_file(&self, path: &Path, content: &str, max_size_kb: usize) -> Option<String> {
109 if self.should_ignore(path) {
110 return Some("matches exclude pattern".to_string());
111 }
112
113 if let Ok(metadata) = fs::metadata(path) {
114 if metadata.len() == 0 {
115 return Some("empty file".to_string());
116 }
117
118 let size_kb = metadata.len() / 1024;
119 if self.skip_large_files && size_kb > max_size_kb as u64 {
120 return Some(format!(
121 "file size ({} KB) exceeds limit ({} KB)",
122 size_kb, max_size_kb
123 ));
124 }
125 }
126
127 if self.skip_minified && self.looks_minified(content) {
128 return Some("minified file detected".to_string());
129 }
130
131 if self.skip_generated && self.looks_generated(content) {
132 return Some("generated file detected".to_string());
133 }
134
135 if self.contains_binary(path, content) {
136 return Some("binary content detected".to_string());
137 }
138
139 None
140 }
141
142 #[allow(dead_code)]
143 fn looks_minified(&self, content: &str) -> bool {
144 if content.len() < 1000 {
145 return false;
146 }
147
148 let mut long_lines = 0;
149 let mut total_lines = 0;
150
151 for line in content.lines() {
152 total_lines += 1;
153 if line.len() > 500 {
154 long_lines += 1;
155 }
156 }
157
158 if total_lines == 0 {
159 return false;
160 }
161
162 let ratio = long_lines as f64 / total_lines as f64;
163 ratio > 0.3
164 }
165
166 #[allow(dead_code)]
167 fn looks_generated(&self, content: &str) -> bool {
168 let markers = [
169 "// DO NOT EDIT",
170 "// This file was generated",
171 "@generated",
172 "/* Generated by ",
173 "Generated by ",
174 "Auto-generated by ",
175 ];
176
177 for marker in &markers {
178 if content.contains(marker) {
179 return true;
180 }
181 }
182
183 false
184 }
185
186 #[allow(dead_code)]
187 fn contains_binary(&self, path: &Path, content: &str) -> bool {
188 let ext = path.extension().and_then(|e| e.to_str()).unwrap_or("");
189
190 let binary_extensions = [
191 "png", "jpg", "jpeg", "gif", "bmp", "ico", "webp", "pdf", "doc", "docx", "xls", "xlsx",
192 "ppt", "pptx", "zip", "tar", "gz", "rar", "7z", "exe", "dll", "so", "dylib", "woff",
193 "woff2", "ttf", "eot",
194 ];
195
196 if binary_extensions.contains(&ext) {
197 return true;
198 }
199
200 for byte in content.bytes() {
201 if byte == 0 {
202 return true;
203 }
204 }
205
206 false
207 }
208}
209
210impl Default for IgnoreHandler {
211 fn default() -> Self {
212 Self::new()
213 }
214}
215
216#[allow(dead_code)]
217pub fn fast_walk(root: &Path, handler: &IgnoreHandler) -> Result<Vec<PathBuf>> {
218 let mut files = Vec::new();
219
220 if !root.exists() {
221 anyhow::bail!("Path does not exist: {}", root.display());
222 }
223
224 let entries = fs::read_dir(root)
225 .with_context(|| format!("Failed to read directory: {}", root.display()))?;
226
227 for entry in entries {
228 let entry = entry.with_context(|| "Failed to read directory entry")?;
229 let path = entry.path();
230
231 if path.is_dir() {
232 if handler.should_ignore(&path) {
233 continue;
234 }
235
236 let subdirs = fast_walk(&path, handler)?;
237 files.extend(subdirs);
238 } else {
239 if !handler.should_ignore(&path) {
240 files.push(path);
241 }
242 }
243 }
244
245 Ok(files)
246}