backup_suite/smart/recommendation/
exclude.rs

1//! 除外ファイル自動検出エンジン
2//!
3//! 一時ファイル、キャッシュ、再生成可能なファイルを検出して除外を提案します。
4
5use crate::i18n::{get_message, Language, MessageKey};
6use crate::smart::error::{SmartError, SmartResult};
7use crate::smart::types::PredictionConfidence;
8use std::path::{Path, PathBuf};
9use walkdir::WalkDir;
10
11/// 除外推奨
12///
13/// # 使用例
14///
15/// ```rust
16/// use backup_suite::smart::recommendation::ExcludeRecommendation;
17/// use backup_suite::smart::PredictionConfidence;
18///
19/// let recommendation = ExcludeRecommendation::new(
20///     "node_modules/".to_string(),
21///     PredictionConfidence::new(0.95).unwrap(),
22///     5.2,
23///     "再生成可能な依存関係ファイル".to_string()
24/// );
25/// assert_eq!(recommendation.pattern(), "node_modules/");
26/// assert_eq!(recommendation.size_reduction_gb(), 5.2);
27/// ```
28#[derive(Debug, Clone)]
29pub struct ExcludeRecommendation {
30    pattern: String,
31    confidence: PredictionConfidence,
32    size_reduction_gb: f64,
33    reason: String,
34}
35
36impl ExcludeRecommendation {
37    /// 新しい除外推奨を作成
38    #[must_use]
39    pub const fn new(
40        pattern: String,
41        confidence: PredictionConfidence,
42        size_reduction_gb: f64,
43        reason: String,
44    ) -> Self {
45        Self {
46            pattern,
47            confidence,
48            size_reduction_gb,
49            reason,
50        }
51    }
52
53    /// 除外パターンを取得
54    #[must_use]
55    pub fn pattern(&self) -> &str {
56        &self.pattern
57    }
58
59    /// 信頼度を取得
60    #[must_use]
61    pub const fn confidence(&self) -> PredictionConfidence {
62        self.confidence
63    }
64
65    /// サイズ削減量(GB)を取得
66    #[must_use]
67    pub const fn size_reduction_gb(&self) -> f64 {
68        self.size_reduction_gb
69    }
70
71    /// 理由を取得
72    #[must_use]
73    pub fn reason(&self) -> &str {
74        &self.reason
75    }
76}
77
78/// 除外パターン定義
79#[derive(Debug, Clone)]
80struct ExcludePattern {
81    pattern: String,
82    reason: String,
83    confidence: f64,
84    is_directory: bool,
85}
86
87/// 除外推奨エンジン
88///
89/// ディレクトリを走査して除外すべきファイル/ディレクトリを検出します。
90///
91/// # 使用例
92///
93/// ```rust,no_run
94/// use backup_suite::smart::recommendation::ExcludeRecommendationEngine;
95/// use std::path::Path;
96///
97/// let engine = ExcludeRecommendationEngine::new();
98/// let project_dir = Path::new("/home/user/projects");
99///
100/// match engine.suggest_exclude_patterns(project_dir) {
101///     Ok(recommendations) => {
102///         for rec in recommendations {
103///             println!("除外推奨: {} (削減: {:.2}GB)", rec.pattern(), rec.size_reduction_gb());
104///             println!("理由: {}", rec.reason());
105///             println!("信頼度: {:.0}%", rec.confidence().as_percentage());
106///         }
107///     }
108///     Err(e) => eprintln!("エラー: {}", e),
109/// }
110/// ```
111#[derive(Debug)]
112pub struct ExcludeRecommendationEngine {
113    patterns: Vec<ExcludePattern>,
114    #[allow(dead_code)]
115    lang: Language,
116}
117
118impl ExcludeRecommendationEngine {
119    /// 新しい除外推奨エンジンを作成(言語自動検出)
120    #[must_use]
121    pub fn new() -> Self {
122        Self::with_language(Language::detect())
123    }
124
125    /// 言語指定で除外推奨エンジンを作成
126    #[must_use]
127    pub fn with_language(lang: Language) -> Self {
128        let patterns = vec![
129            // 開発環境の依存関係(高信頼度)
130            ExcludePattern {
131                pattern: "node_modules".to_string(),
132                reason: get_message(MessageKey::ExcludeReasonNpmDeps, lang).to_string(),
133                confidence: 0.99,
134                is_directory: true,
135            },
136            ExcludePattern {
137                pattern: "target".to_string(),
138                reason: get_message(MessageKey::ExcludeReasonRustBuild, lang).to_string(),
139                confidence: 0.99,
140                is_directory: true,
141            },
142            ExcludePattern {
143                pattern: "vendor".to_string(),
144                reason: get_message(MessageKey::ExcludeReasonVendor, lang).to_string(),
145                confidence: 0.95,
146                is_directory: true,
147            },
148            ExcludePattern {
149                pattern: "__pycache__".to_string(),
150                reason: get_message(MessageKey::ExcludeReasonPythonCache, lang).to_string(),
151                confidence: 0.99,
152                is_directory: true,
153            },
154            ExcludePattern {
155                pattern: ".pytest_cache".to_string(),
156                reason: get_message(MessageKey::ExcludeReasonPytestCache, lang).to_string(),
157                confidence: 0.99,
158                is_directory: true,
159            },
160            ExcludePattern {
161                pattern: "dist".to_string(),
162                reason: get_message(MessageKey::ExcludeReasonBuildArtifacts, lang).to_string(),
163                confidence: 0.90,
164                is_directory: true,
165            },
166            ExcludePattern {
167                pattern: "build".to_string(),
168                reason: get_message(MessageKey::ExcludeReasonBuildArtifacts, lang).to_string(),
169                confidence: 0.90,
170                is_directory: true,
171            },
172            // キャッシュディレクトリ(高信頼度)
173            ExcludePattern {
174                pattern: ".cache".to_string(),
175                reason: get_message(MessageKey::ExcludeReasonCacheDir, lang).to_string(),
176                confidence: 0.95,
177                is_directory: true,
178            },
179            ExcludePattern {
180                pattern: "cache".to_string(),
181                reason: get_message(MessageKey::ExcludeReasonCacheDir, lang).to_string(),
182                confidence: 0.85,
183                is_directory: true,
184            },
185            // バージョン管理システム(中信頼度)
186            ExcludePattern {
187                pattern: ".git".to_string(),
188                reason: get_message(MessageKey::ExcludeReasonGitMetadata, lang).to_string(),
189                confidence: 0.70,
190                is_directory: true,
191            },
192            ExcludePattern {
193                pattern: ".svn".to_string(),
194                reason: get_message(MessageKey::ExcludeReasonSvnMetadata, lang).to_string(),
195                confidence: 0.70,
196                is_directory: true,
197            },
198            // 一時ファイル(高信頼度)
199            ExcludePattern {
200                pattern: r".*\.tmp$".to_string(),
201                reason: get_message(MessageKey::ExcludeReasonTempFile, lang).to_string(),
202                confidence: 0.99,
203                is_directory: false,
204            },
205            ExcludePattern {
206                pattern: r".*\.temp$".to_string(),
207                reason: get_message(MessageKey::ExcludeReasonTempFile, lang).to_string(),
208                confidence: 0.99,
209                is_directory: false,
210            },
211            ExcludePattern {
212                pattern: r".*\.bak$".to_string(),
213                reason: get_message(MessageKey::ExcludeReasonBackupFile, lang).to_string(),
214                confidence: 0.85,
215                is_directory: false,
216            },
217            ExcludePattern {
218                pattern: r".*~$".to_string(),
219                reason: get_message(MessageKey::ExcludeReasonEditorTemp, lang).to_string(),
220                confidence: 0.95,
221                is_directory: false,
222            },
223            // ログファイル(中信頼度)
224            ExcludePattern {
225                pattern: r".*\.log$".to_string(),
226                reason: get_message(MessageKey::ExcludeReasonLogFile, lang).to_string(),
227                confidence: 0.70,
228                is_directory: false,
229            },
230            // OS固有ファイル(高信頼度)
231            ExcludePattern {
232                pattern: ".DS_Store".to_string(),
233                reason: get_message(MessageKey::ExcludeReasonMacOsMetadata, lang).to_string(),
234                confidence: 0.99,
235                is_directory: false,
236            },
237            ExcludePattern {
238                pattern: "Thumbs.db".to_string(),
239                reason: get_message(MessageKey::ExcludeReasonWindowsThumb, lang).to_string(),
240                confidence: 0.99,
241                is_directory: false,
242            },
243            ExcludePattern {
244                pattern: "desktop.ini".to_string(),
245                reason: get_message(MessageKey::ExcludeReasonWindowsDesktop, lang).to_string(),
246                confidence: 0.95,
247                is_directory: false,
248            },
249        ];
250
251        Self { patterns, lang }
252    }
253
254    /// 除外パターンを提案
255    ///
256    /// # Errors
257    ///
258    /// ファイルシステムアクセスに失敗した場合はエラーを返します。
259    pub fn suggest_exclude_patterns(
260        &self,
261        base_path: &Path,
262    ) -> SmartResult<Vec<ExcludeRecommendation>> {
263        if !base_path.exists() {
264            return Err(SmartError::InvalidParameter(format!(
265                "パスが存在しません: {:?}",
266                base_path
267            )));
268        }
269
270        if !base_path.is_dir() {
271            return Err(SmartError::InvalidParameter(format!(
272                "ディレクトリではありません: {:?}",
273                base_path
274            )));
275        }
276
277        let mut recommendations = Vec::new();
278
279        // パターンごとにマッチするディレクトリ/ファイルを検索
280        for pattern_def in &self.patterns {
281            let matches = self.find_matching_paths(base_path, pattern_def)?;
282
283            if !matches.is_empty() {
284                // 合計サイズを計算
285                let total_size = self.calculate_total_size(&matches)?;
286                let size_gb = total_size as f64 / 1_073_741_824.0;
287
288                // 推奨が意味のあるサイズの場合のみ追加(1KB以上)
289                if size_gb >= 0.000001 || total_size > 0 {
290                    let confidence = PredictionConfidence::new(pattern_def.confidence)
291                        .map_err(SmartError::InvalidParameter)?;
292
293                    recommendations.push(ExcludeRecommendation::new(
294                        pattern_def.pattern.clone(),
295                        confidence,
296                        size_gb,
297                        pattern_def.reason.clone(),
298                    ));
299                }
300            }
301        }
302
303        // サイズ削減量の大きい順にソート
304        recommendations.sort_by(|a, b| {
305            b.size_reduction_gb
306                .partial_cmp(&a.size_reduction_gb)
307                .unwrap_or(std::cmp::Ordering::Equal)
308        });
309
310        Ok(recommendations)
311    }
312
313    /// パターンにマッチするパスを検索
314    fn find_matching_paths(
315        &self,
316        base_path: &Path,
317        pattern: &ExcludePattern,
318    ) -> SmartResult<Vec<PathBuf>> {
319        let mut matches = Vec::new();
320
321        for entry in WalkDir::new(base_path)
322            .follow_links(false)
323            .into_iter()
324            .filter_map(Result::ok)
325        {
326            let path = entry.path();
327
328            // ディレクトリパターンのマッチング
329            if pattern.is_directory && path.is_dir() {
330                if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
331                    if file_name == pattern.pattern || file_name.contains(&pattern.pattern) {
332                        matches.push(path.to_path_buf());
333                    }
334                }
335            }
336            // ファイルパターンのマッチング(正規表現)
337            else if !pattern.is_directory && path.is_file() {
338                if let Some(file_name) = path.file_name().and_then(|n| n.to_str()) {
339                    if self.matches_pattern(file_name, &pattern.pattern) {
340                        matches.push(path.to_path_buf());
341                    }
342                }
343            }
344        }
345
346        Ok(matches)
347    }
348
349    /// 簡易的なパターンマッチング
350    fn matches_pattern(&self, file_name: &str, pattern: &str) -> bool {
351        // 正規表現パターン(簡易版)
352        if pattern.starts_with(".*") && pattern.ends_with('$') {
353            // ".*\.tmp$" のようなパターン → ".tmp" に変換
354            let extension = pattern
355                .trim_start_matches(".*")
356                .trim_end_matches('$')
357                .replace(r"\.", "."); // エスケープされたドットを実際のドットに変換
358            return file_name.ends_with(&extension);
359        }
360
361        // 完全一致を優先
362        if file_name == pattern {
363            return true;
364        }
365
366        // 部分一致
367        file_name.contains(pattern)
368    }
369
370    /// 合計サイズを計算
371    fn calculate_total_size(&self, paths: &[PathBuf]) -> SmartResult<u64> {
372        let mut total_size = 0u64;
373
374        for path in paths {
375            if path.is_file() {
376                if let Ok(metadata) = std::fs::metadata(path) {
377                    total_size = total_size.saturating_add(metadata.len());
378                }
379            } else if path.is_dir() {
380                // ディレクトリの場合、再帰的にサイズを計算
381                for entry in WalkDir::new(path)
382                    .follow_links(false)
383                    .into_iter()
384                    .filter_map(Result::ok)
385                {
386                    if entry.file_type().is_file() {
387                        if let Ok(metadata) = std::fs::metadata(entry.path()) {
388                            total_size = total_size.saturating_add(metadata.len());
389                        }
390                    }
391                }
392            }
393        }
394
395        Ok(total_size)
396    }
397}
398
399impl Default for ExcludeRecommendationEngine {
400    fn default() -> Self {
401        Self::new()
402    }
403}
404
405#[cfg(test)]
406mod tests {
407    use super::*;
408    use std::fs;
409    use tempfile::TempDir;
410
411    #[test]
412    fn test_exclude_recommendation_creation() {
413        let confidence = PredictionConfidence::new(0.95).unwrap();
414        let rec = ExcludeRecommendation::new(
415            "node_modules".to_string(),
416            confidence,
417            5.2,
418            "npm依存関係".to_string(),
419        );
420
421        assert_eq!(rec.pattern(), "node_modules");
422        assert_eq!(rec.confidence().get(), 0.95);
423        assert_eq!(rec.size_reduction_gb(), 5.2);
424    }
425
426    #[test]
427    fn test_exclude_engine_creation() {
428        let engine = ExcludeRecommendationEngine::new();
429        assert!(!engine.patterns.is_empty());
430    }
431
432    #[test]
433    fn test_matches_pattern() {
434        let engine = ExcludeRecommendationEngine::new();
435
436        assert!(engine.matches_pattern("test.tmp", r".*\.tmp$"));
437        assert!(engine.matches_pattern("file.log", r".*\.log$"));
438        assert!(!engine.matches_pattern("test.txt", r".*\.tmp$"));
439    }
440
441    #[test]
442    fn test_suggest_exclude_patterns() {
443        let temp_dir = TempDir::new().unwrap();
444        let base_path = temp_dir.path();
445
446        // テスト用のディレクトリ構造を作成(10MB以上のサイズにする)
447        let node_modules = base_path.join("node_modules");
448        fs::create_dir(&node_modules).unwrap();
449        // 10MB以上のファイルを作成(除外推奨の閾値をクリアするため)
450        let large_content = vec![b'x'; 11_000_000]; // 11MB
451        fs::write(node_modules.join("package.json"), &large_content).unwrap();
452
453        let cache_dir = base_path.join(".cache");
454        fs::create_dir(&cache_dir).unwrap();
455        fs::write(cache_dir.join("data.cache"), &large_content).unwrap();
456
457        // 一時ファイル(小さくてもパターンマッチで検出される)
458        fs::write(base_path.join("temp.tmp"), &large_content).unwrap();
459
460        let engine = ExcludeRecommendationEngine::new();
461        let recommendations = engine.suggest_exclude_patterns(base_path).unwrap();
462
463        // node_modules が検出されるはず
464        let has_node_modules = recommendations
465            .iter()
466            .any(|r| r.pattern() == "node_modules");
467        assert!(
468            has_node_modules,
469            "node_modules should be detected. Recommendations: {:?}",
470            recommendations
471                .iter()
472                .map(|r| r.pattern())
473                .collect::<Vec<_>>()
474        );
475
476        // .cache が検出されるはず
477        let has_cache = recommendations.iter().any(|r| r.pattern() == ".cache");
478        assert!(
479            has_cache,
480            ".cache should be detected. Recommendations: {:?}",
481            recommendations
482                .iter()
483                .map(|r| r.pattern())
484                .collect::<Vec<_>>()
485        );
486    }
487
488    #[test]
489    fn test_calculate_total_size() {
490        let temp_dir = TempDir::new().unwrap();
491        let base_path = temp_dir.path();
492
493        // ファイルを作成
494        let file1 = base_path.join("file1.txt");
495        fs::write(&file1, b"1234567890").unwrap(); // 10 bytes
496
497        let file2 = base_path.join("file2.txt");
498        fs::write(&file2, b"abcdefghij").unwrap(); // 10 bytes
499
500        let engine = ExcludeRecommendationEngine::new();
501        let paths = vec![file1, file2];
502        let total_size = engine.calculate_total_size(&paths).unwrap();
503
504        assert_eq!(total_size, 20);
505    }
506}