1use crate::i18n::{get_message, Language, MessageKey};
6use crate::smart::error::{SmartError, SmartResult};
7use crate::smart::types::PredictionConfidence;
8use std::path::{Path, PathBuf};
9use walkdir::WalkDir;
10
11#[derive(Debug, Clone)]
29pub struct ExcludeRecommendation {
30 pattern: String,
31 confidence: PredictionConfidence,
32 size_reduction_gb: f64,
33 reason: String,
34}
35
36impl ExcludeRecommendation {
37 #[must_use]
39 pub const fn new(
40 pattern: String,
41 confidence: PredictionConfidence,
42 size_reduction_gb: f64,
43 reason: String,
44 ) -> Self {
45 Self {
46 pattern,
47 confidence,
48 size_reduction_gb,
49 reason,
50 }
51 }
52
53 #[must_use]
55 pub fn pattern(&self) -> &str {
56 &self.pattern
57 }
58
59 #[must_use]
61 pub const fn confidence(&self) -> PredictionConfidence {
62 self.confidence
63 }
64
65 #[must_use]
67 pub const fn size_reduction_gb(&self) -> f64 {
68 self.size_reduction_gb
69 }
70
71 #[must_use]
73 pub fn reason(&self) -> &str {
74 &self.reason
75 }
76}
77
78#[derive(Debug, Clone)]
80struct ExcludePattern {
81 pattern: String,
82 reason: String,
83 confidence: f64,
84 is_directory: bool,
85}
86
87#[derive(Debug)]
112pub struct ExcludeRecommendationEngine {
113 patterns: Vec<ExcludePattern>,
114 #[allow(dead_code)]
115 lang: Language,
116}
117
118impl ExcludeRecommendationEngine {
119 #[must_use]
121 pub fn new() -> Self {
122 Self::with_language(Language::detect())
123 }
124
125 #[must_use]
127 pub fn with_language(lang: Language) -> Self {
128 let patterns = vec![
129 ExcludePattern {
131 pattern: "node_modules".to_string(),
132 reason: get_message(MessageKey::ExcludeReasonNpmDeps, lang).to_string(),
133 confidence: 0.99,
134 is_directory: true,
135 },
136 ExcludePattern {
137 pattern: "target".to_string(),
138 reason: get_message(MessageKey::ExcludeReasonRustBuild, lang).to_string(),
139 confidence: 0.99,
140 is_directory: true,
141 },
142 ExcludePattern {
143 pattern: "vendor".to_string(),
144 reason: get_message(MessageKey::ExcludeReasonVendor, lang).to_string(),
145 confidence: 0.95,
146 is_directory: true,
147 },
148 ExcludePattern {
149 pattern: "__pycache__".to_string(),
150 reason: get_message(MessageKey::ExcludeReasonPythonCache, lang).to_string(),
151 confidence: 0.99,
152 is_directory: true,
153 },
154 ExcludePattern {
155 pattern: ".pytest_cache".to_string(),
156 reason: get_message(MessageKey::ExcludeReasonPytestCache, lang).to_string(),
157 confidence: 0.99,
158 is_directory: true,
159 },
160 ExcludePattern {
161 pattern: "dist".to_string(),
162 reason: get_message(MessageKey::ExcludeReasonBuildArtifacts, lang).to_string(),
163 confidence: 0.90,
164 is_directory: true,
165 },
166 ExcludePattern {
167 pattern: "build".to_string(),
168 reason: get_message(MessageKey::ExcludeReasonBuildArtifacts, lang).to_string(),
169 confidence: 0.90,
170 is_directory: true,
171 },
172 ExcludePattern {
174 pattern: ".cache".to_string(),
175 reason: get_message(MessageKey::ExcludeReasonCacheDir, lang).to_string(),
176 confidence: 0.95,
177 is_directory: true,
178 },
179 ExcludePattern {
180 pattern: "cache".to_string(),
181 reason: get_message(MessageKey::ExcludeReasonCacheDir, lang).to_string(),
182 confidence: 0.85,
183 is_directory: true,
184 },
185 ExcludePattern {
187 pattern: ".git".to_string(),
188 reason: get_message(MessageKey::ExcludeReasonGitMetadata, lang).to_string(),
189 confidence: 0.70,
190 is_directory: true,
191 },
192 ExcludePattern {
193 pattern: ".svn".to_string(),
194 reason: get_message(MessageKey::ExcludeReasonSvnMetadata, lang).to_string(),
195 confidence: 0.70,
196 is_directory: true,
197 },
198 ExcludePattern {
200 pattern: r".*\.tmp$".to_string(),
201 reason: get_message(MessageKey::ExcludeReasonTempFile, lang).to_string(),
202 confidence: 0.99,
203 is_directory: false,
204 },
205 ExcludePattern {
206 pattern: r".*\.temp$".to_string(),
207 reason: get_message(MessageKey::ExcludeReasonTempFile, lang).to_string(),
208 confidence: 0.99,
209 is_directory: false,
210 },
211 ExcludePattern {
212 pattern: r".*\.bak$".to_string(),
213 reason: get_message(MessageKey::ExcludeReasonBackupFile, lang).to_string(),
214 confidence: 0.85,
215 is_directory: false,
216 },
217 ExcludePattern {
218 pattern: r".*~$".to_string(),
219 reason: get_message(MessageKey::ExcludeReasonEditorTemp, lang).to_string(),
220 confidence: 0.95,
221 is_directory: false,
222 },
223 ExcludePattern {
225 pattern: r".*\.log$".to_string(),
226 reason: get_message(MessageKey::ExcludeReasonLogFile, lang).to_string(),
227 confidence: 0.70,
228 is_directory: false,
229 },
230 ExcludePattern {
232 pattern: ".DS_Store".to_string(),
233 reason: get_message(MessageKey::ExcludeReasonMacOsMetadata, lang).to_string(),
234 confidence: 0.99,
235 is_directory: false,
236 },
237 ExcludePattern {
238 pattern: "Thumbs.db".to_string(),
239 reason: get_message(MessageKey::ExcludeReasonWindowsThumb, lang).to_string(),
240 confidence: 0.99,
241 is_directory: false,
242 },
243 ExcludePattern {
244 pattern: "desktop.ini".to_string(),
245 reason: get_message(MessageKey::ExcludeReasonWindowsDesktop, lang).to_string(),
246 confidence: 0.95,
247 is_directory: false,
248 },
249 ];
250
251 Self { patterns, lang }
252 }
253
254 pub fn suggest_exclude_patterns(
260 &self,
261 base_path: &Path,
262 ) -> SmartResult<Vec<ExcludeRecommendation>> {
263 if !base_path.exists() {
264 return Err(SmartError::InvalidParameter(format!(
265 "パスが存在しません: {:?}",
266 base_path
267 )));
268 }
269
270 if !base_path.is_dir() {
271 return Err(SmartError::InvalidParameter(format!(
272 "ディレクトリではありません: {:?}",
273 base_path
274 )));
275 }
276
277 let mut recommendations = Vec::new();
278
279 for pattern_def in &self.patterns {
281 let matches = self.find_matching_paths(base_path, pattern_def)?;
282
283 if !matches.is_empty() {
284 let total_size = self.calculate_total_size(&matches)?;
286 let size_gb = total_size as f64 / 1_073_741_824.0;
287
288 if size_gb >= 0.000001 || total_size > 0 {
290 let confidence = PredictionConfidence::new(pattern_def.confidence)
291 .map_err(SmartError::InvalidParameter)?;
292
293 recommendations.push(ExcludeRecommendation::new(
294 pattern_def.pattern.clone(),
295 confidence,
296 size_gb,
297 pattern_def.reason.clone(),
298 ));
299 }
300 }
301 }
302
303 recommendations.sort_by(|a, b| {
305 b.size_reduction_gb
306 .partial_cmp(&a.size_reduction_gb)
307 .unwrap_or(std::cmp::Ordering::Equal)
308 });
309
310 Ok(recommendations)
311 }
312
313 fn find_matching_paths(
315 &self,
316 base_path: &Path,
317 pattern: &ExcludePattern,
318 ) -> SmartResult<Vec<PathBuf>> {
319 let mut matches = Vec::new();
320
321 for entry in WalkDir::new(base_path)
322 .follow_links(false)
323 .into_iter()
324 .filter_map(Result::ok)
325 {
326 let path = entry.path();
327
328 if pattern.is_directory && path.is_dir() {
330 if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
331 if file_name == pattern.pattern || file_name.contains(&pattern.pattern) {
332 matches.push(path.to_path_buf());
333 }
334 }
335 }
336 else if !pattern.is_directory && path.is_file() {
338 if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
339 if self.matches_pattern(file_name, &pattern.pattern) {
340 matches.push(path.to_path_buf());
341 }
342 }
343 }
344 }
345
346 Ok(matches)
347 }
348
349 fn matches_pattern(&self, file_name: &str, pattern: &str) -> bool {
351 if pattern.starts_with(".*") && pattern.ends_with('$') {
353 let extension = pattern
355 .trim_start_matches(".*")
356 .trim_end_matches('$')
357 .replace(r"\.", "."); return file_name.ends_with(&extension);
359 }
360
361 if file_name == pattern {
363 return true;
364 }
365
366 file_name.contains(pattern)
368 }
369
370 fn calculate_total_size(&self, paths: &[PathBuf]) -> SmartResult<u64> {
372 let mut total_size = 0u64;
373
374 for path in paths {
375 if path.is_file() {
376 if let Ok(metadata) = std::fs::metadata(path) {
377 total_size = total_size.saturating_add(metadata.len());
378 }
379 } else if path.is_dir() {
380 for entry in WalkDir::new(path)
382 .follow_links(false)
383 .into_iter()
384 .filter_map(Result::ok)
385 {
386 if entry.file_type().is_file() {
387 if let Ok(metadata) = std::fs::metadata(entry.path()) {
388 total_size = total_size.saturating_add(metadata.len());
389 }
390 }
391 }
392 }
393 }
394
395 Ok(total_size)
396 }
397}
398
399impl Default for ExcludeRecommendationEngine {
400 fn default() -> Self {
401 Self::new()
402 }
403}
404
405#[cfg(test)]
406mod tests {
407 use super::*;
408 use std::fs;
409 use tempfile::TempDir;
410
411 #[test]
412 fn test_exclude_recommendation_creation() {
413 let confidence = PredictionConfidence::new(0.95).unwrap();
414 let rec = ExcludeRecommendation::new(
415 "node_modules".to_string(),
416 confidence,
417 5.2,
418 "npm依存関係".to_string(),
419 );
420
421 assert_eq!(rec.pattern(), "node_modules");
422 assert_eq!(rec.confidence().get(), 0.95);
423 assert_eq!(rec.size_reduction_gb(), 5.2);
424 }
425
426 #[test]
427 fn test_exclude_engine_creation() {
428 let engine = ExcludeRecommendationEngine::new();
429 assert!(!engine.patterns.is_empty());
430 }
431
432 #[test]
433 fn test_matches_pattern() {
434 let engine = ExcludeRecommendationEngine::new();
435
436 assert!(engine.matches_pattern("test.tmp", r".*\.tmp$"));
437 assert!(engine.matches_pattern("file.log", r".*\.log$"));
438 assert!(!engine.matches_pattern("test.txt", r".*\.tmp$"));
439 }
440
441 #[test]
442 fn test_suggest_exclude_patterns() {
443 let temp_dir = TempDir::new().unwrap();
444 let base_path = temp_dir.path();
445
446 let node_modules = base_path.join("node_modules");
448 fs::create_dir(&node_modules).unwrap();
449 let large_content = vec![b'x'; 11_000_000]; fs::write(node_modules.join("package.json"), &large_content).unwrap();
452
453 let cache_dir = base_path.join(".cache");
454 fs::create_dir(&cache_dir).unwrap();
455 fs::write(cache_dir.join("data.cache"), &large_content).unwrap();
456
457 fs::write(base_path.join("temp.tmp"), &large_content).unwrap();
459
460 let engine = ExcludeRecommendationEngine::new();
461 let recommendations = engine.suggest_exclude_patterns(base_path).unwrap();
462
463 let has_node_modules = recommendations
465 .iter()
466 .any(|r| r.pattern() == "node_modules");
467 assert!(
468 has_node_modules,
469 "node_modules should be detected. Recommendations: {:?}",
470 recommendations
471 .iter()
472 .map(|r| r.pattern())
473 .collect::<Vec<_>>()
474 );
475
476 let has_cache = recommendations.iter().any(|r| r.pattern() == ".cache");
478 assert!(
479 has_cache,
480 ".cache should be detected. Recommendations: {:?}",
481 recommendations
482 .iter()
483 .map(|r| r.pattern())
484 .collect::<Vec<_>>()
485 );
486 }
487
488 #[test]
489 fn test_calculate_total_size() {
490 let temp_dir = TempDir::new().unwrap();
491 let base_path = temp_dir.path();
492
493 let file1 = base_path.join("file1.txt");
495 fs::write(&file1, b"1234567890").unwrap(); let file2 = base_path.join("file2.txt");
498 fs::write(&file2, b"abcdefghij").unwrap(); let engine = ExcludeRecommendationEngine::new();
501 let paths = vec![file1, file2];
502 let total_size = engine.calculate_total_size(&paths).unwrap();
503
504 assert_eq!(total_size, 20);
505 }
506}