subx_cli/core/matcher/
engine.rs

1use crate::services::ai::{AIProvider, AnalysisRequest, ContentSample};
2use std::path::Path;
3
4use crate::Result;
5use crate::core::language::LanguageDetector;
6use crate::core::matcher::cache::{CacheData, OpItem, SnapshotItem};
7use crate::core::matcher::{FileDiscovery, MediaFile, MediaFileType};
8
9use crate::config::load_config;
10use crate::error::SubXError;
11use dirs;
12use md5;
13use serde_json;
14use toml;
15
16/// 檔案匹配引擎配置
17#[derive(Debug, Clone)]
18pub struct MatchConfig {
19    pub confidence_threshold: f32,
20    pub max_sample_length: usize,
21    pub enable_content_analysis: bool,
22    pub backup_enabled: bool,
23}
24
25#[cfg(test)]
26mod language_name_tests {
27    use super::*;
28    use crate::core::matcher::discovery::{MediaFile, MediaFileType};
29    use crate::services::ai::{
30        AIProvider, AnalysisRequest, ConfidenceScore, MatchResult, VerificationRequest,
31    };
32    use async_trait::async_trait;
33    use std::path::PathBuf;
34
35    struct DummyAI;
36    #[async_trait]
37    impl AIProvider for DummyAI {
38        async fn analyze_content(&self, _req: AnalysisRequest) -> crate::Result<MatchResult> {
39            unimplemented!()
40        }
41        async fn verify_match(&self, _req: VerificationRequest) -> crate::Result<ConfidenceScore> {
42            unimplemented!()
43        }
44    }
45
46    #[test]
47    fn test_generate_subtitle_name_with_directory_language() {
48        let engine = MatchEngine::new(
49            Box::new(DummyAI),
50            MatchConfig {
51                confidence_threshold: 0.0,
52                max_sample_length: 0,
53                enable_content_analysis: false,
54                backup_enabled: false,
55            },
56        );
57        let video = MediaFile {
58            path: PathBuf::from("movie01.mp4"),
59            file_type: MediaFileType::Video,
60            size: 0,
61            name: "movie01".to_string(),
62            extension: "mp4".to_string(),
63        };
64        let subtitle = MediaFile {
65            path: PathBuf::from("tc/subtitle01.ass"),
66            file_type: MediaFileType::Subtitle,
67            size: 0,
68            name: "subtitle01".to_string(),
69            extension: "ass".to_string(),
70        };
71        let new_name = engine.generate_subtitle_name(&video, &subtitle);
72        assert_eq!(new_name, "movie01.tc.ass");
73    }
74
75    #[test]
76    fn test_generate_subtitle_name_with_filename_language() {
77        let engine = MatchEngine::new(
78            Box::new(DummyAI),
79            MatchConfig {
80                confidence_threshold: 0.0,
81                max_sample_length: 0,
82                enable_content_analysis: false,
83                backup_enabled: false,
84            },
85        );
86        let video = MediaFile {
87            path: PathBuf::from("movie02.mp4"),
88            file_type: MediaFileType::Video,
89            size: 0,
90            name: "movie02".to_string(),
91            extension: "mp4".to_string(),
92        };
93        let subtitle = MediaFile {
94            path: PathBuf::from("subtitle02.sc.ass"),
95            file_type: MediaFileType::Subtitle,
96            size: 0,
97            name: "subtitle02".to_string(),
98            extension: "ass".to_string(),
99        };
100        let new_name = engine.generate_subtitle_name(&video, &subtitle);
101        assert_eq!(new_name, "movie02.sc.ass");
102    }
103
104    #[test]
105    fn test_generate_subtitle_name_without_language() {
106        let engine = MatchEngine::new(
107            Box::new(DummyAI),
108            MatchConfig {
109                confidence_threshold: 0.0,
110                max_sample_length: 0,
111                enable_content_analysis: false,
112                backup_enabled: false,
113            },
114        );
115        let video = MediaFile {
116            path: PathBuf::from("movie03.mp4"),
117            file_type: MediaFileType::Video,
118            size: 0,
119            name: "movie03".to_string(),
120            extension: "mp4".to_string(),
121        };
122        let subtitle = MediaFile {
123            path: PathBuf::from("subtitle03.ass"),
124            file_type: MediaFileType::Subtitle,
125            size: 0,
126            name: "subtitle03".to_string(),
127            extension: "ass".to_string(),
128        };
129        let new_name = engine.generate_subtitle_name(&video, &subtitle);
130        assert_eq!(new_name, "movie03.ass");
131    }
132}
133
134/// 單次匹配操作結果
135#[derive(Debug)]
136pub struct MatchOperation {
137    pub video_file: MediaFile,
138    pub subtitle_file: MediaFile,
139    pub new_subtitle_name: String,
140    pub confidence: f32,
141    pub reasoning: Vec<String>,
142}
143
144/// 檔案匹配引擎
145pub struct MatchEngine {
146    ai_client: Box<dyn AIProvider>,
147    discovery: FileDiscovery,
148    config: MatchConfig,
149}
150
151impl MatchEngine {
152    /// 建立匹配引擎,注入 AI 提供者與設定
153    // analyzer 已移除
154    pub fn new(ai_client: Box<dyn AIProvider>, config: MatchConfig) -> Self {
155        Self {
156            ai_client,
157            discovery: FileDiscovery::new(),
158            config,
159        }
160    }
161
162    /// 匹配指定路徑下的影片與字幕檔案,回傳符合閾值的匹配操作
163    pub async fn match_files(&self, path: &Path, recursive: bool) -> Result<Vec<MatchOperation>> {
164        // 1. 探索檔案
165        let files = self.discovery.scan_directory(path, recursive)?;
166
167        let videos: Vec<_> = files
168            .iter()
169            .filter(|f| matches!(f.file_type, MediaFileType::Video))
170            .collect();
171        let subtitles: Vec<_> = files
172            .iter()
173            .filter(|f| matches!(f.file_type, MediaFileType::Subtitle))
174            .collect();
175
176        if videos.is_empty() || subtitles.is_empty() {
177            return Ok(Vec::new());
178        }
179
180        // 2. 嘗試從 Dry-run 快取重用結果
181        if let Some(ops) = self.check_cache(path, recursive).await? {
182            return Ok(ops);
183        }
184        // 3. 內容採樣
185        let content_samples = if self.config.enable_content_analysis {
186            self.extract_content_samples(&subtitles).await?
187        } else {
188            Vec::new()
189        };
190
191        // 4. AI 分析請求
192        // 生成 AI 分析請求:在檔名中包含相對路徑與目錄資訊,提升遞迴匹配的準確度
193        let video_files: Vec<String> = videos
194            .iter()
195            .map(|v| {
196                let rel = v
197                    .path
198                    .strip_prefix(path)
199                    .unwrap_or(&v.path)
200                    .to_string_lossy();
201                let dir = v
202                    .path
203                    .parent()
204                    .and_then(|p| p.file_name())
205                    .and_then(|n| n.to_str())
206                    .unwrap_or_default();
207                format!("{} (路徑: {}, 目錄: {})", v.name, rel, dir)
208            })
209            .collect();
210        let subtitle_files: Vec<String> = subtitles
211            .iter()
212            .map(|s| {
213                let rel = s
214                    .path
215                    .strip_prefix(path)
216                    .unwrap_or(&s.path)
217                    .to_string_lossy();
218                let dir = s
219                    .path
220                    .parent()
221                    .and_then(|p| p.file_name())
222                    .and_then(|n| n.to_str())
223                    .unwrap_or_default();
224                format!("{} (路徑: {}, 目錄: {})", s.name, rel, dir)
225            })
226            .collect();
227        let analysis_request = AnalysisRequest {
228            video_files,
229            subtitle_files,
230            content_samples,
231        };
232
233        let match_result = self.ai_client.analyze_content(analysis_request).await?;
234
235        // 4. 組裝匹配操作列表
236        let mut operations = Vec::new();
237
238        for ai_match in match_result.matches {
239            if ai_match.confidence >= self.config.confidence_threshold {
240                if let (Some(video), Some(subtitle)) = (
241                    videos.iter().find(|v| v.name == ai_match.video_file),
242                    subtitles.iter().find(|s| s.name == ai_match.subtitle_file),
243                ) {
244                    let new_name = self.generate_subtitle_name(video, subtitle);
245
246                    operations.push(MatchOperation {
247                        video_file: (*video).clone(),
248                        subtitle_file: (*subtitle).clone(),
249                        new_subtitle_name: new_name,
250                        confidence: ai_match.confidence,
251                        reasoning: ai_match.match_factors,
252                    });
253                }
254            }
255        }
256
257        Ok(operations)
258    }
259
260    async fn extract_content_samples(
261        &self,
262        subtitles: &[&MediaFile],
263    ) -> Result<Vec<ContentSample>> {
264        let mut samples = Vec::new();
265
266        for subtitle in subtitles {
267            let content = std::fs::read_to_string(&subtitle.path)?;
268            let preview = self.create_content_preview(&content);
269
270            samples.push(ContentSample {
271                filename: subtitle.name.clone(),
272                content_preview: preview,
273                file_size: subtitle.size,
274            });
275        }
276
277        Ok(samples)
278    }
279
280    fn create_content_preview(&self, content: &str) -> String {
281        let lines: Vec<&str> = content.lines().take(20).collect();
282        let preview = lines.join("\n");
283
284        if preview.len() > self.config.max_sample_length {
285            format!("{}...", &preview[..self.config.max_sample_length])
286        } else {
287            preview
288        }
289    }
290
291    fn generate_subtitle_name(&self, video: &MediaFile, subtitle: &MediaFile) -> String {
292        let detector = LanguageDetector::new();
293        if let Some(code) = detector.get_primary_language(&subtitle.path) {
294            format!("{}.{}.{}", video.name, code, subtitle.extension)
295        } else {
296            format!("{}.{}", video.name, subtitle.extension)
297        }
298    }
299
300    /// 執行匹配操作,支援 Dry-run 模式
301    pub async fn execute_operations(
302        &self,
303        operations: &[MatchOperation],
304        dry_run: bool,
305    ) -> Result<()> {
306        for op in operations {
307            if dry_run {
308                println!(
309                    "預覽: {} -> {}",
310                    op.subtitle_file.name, op.new_subtitle_name
311                );
312            } else {
313                self.rename_file(op).await?;
314            }
315        }
316        Ok(())
317    }
318
319    async fn rename_file(&self, op: &MatchOperation) -> Result<()> {
320        let old_path = &op.subtitle_file.path;
321        let new_path = old_path.with_file_name(&op.new_subtitle_name);
322
323        // 備份檔案
324        if self.config.backup_enabled {
325            let backup_path =
326                old_path.with_extension(format!("{}.backup", op.subtitle_file.extension));
327            std::fs::copy(old_path, backup_path)?;
328        }
329
330        std::fs::rename(old_path, new_path)?;
331        Ok(())
332    }
333    /// 計算指定目錄的檔案快照,用於快取比對
334    fn calculate_file_snapshot(
335        &self,
336        directory: &Path,
337        recursive: bool,
338    ) -> Result<Vec<SnapshotItem>> {
339        let files = self.discovery.scan_directory(directory, recursive)?;
340        let mut snapshot = Vec::new();
341        for f in files {
342            let metadata = std::fs::metadata(&f.path)?;
343            let mtime = metadata
344                .modified()
345                .ok()
346                .and_then(|t| t.duration_since(std::time::UNIX_EPOCH).ok())
347                .map(|d| d.as_secs())
348                .unwrap_or(0);
349            snapshot.push(SnapshotItem {
350                name: f.name.clone(),
351                size: f.size,
352                mtime,
353                file_type: match f.file_type {
354                    MediaFileType::Video => "video".to_string(),
355                    MediaFileType::Subtitle => "subtitle".to_string(),
356                },
357            });
358        }
359        Ok(snapshot)
360    }
361
362    /// 檢查 Dry-run 快取,命中則回傳先前計算的匹配操作
363    pub async fn check_cache(
364        &self,
365        directory: &Path,
366        recursive: bool,
367    ) -> Result<Option<Vec<MatchOperation>>> {
368        let current_snapshot = self.calculate_file_snapshot(directory, recursive)?;
369        let cache_data = CacheData::load(&self.get_cache_file_path()?).ok();
370        if let Some(cache_data) = cache_data {
371            if cache_data.directory == directory.to_string_lossy()
372                && cache_data.file_snapshot == current_snapshot
373                && cache_data.ai_model_used == self.calculate_config_hash()?
374                && cache_data.config_hash == self.calculate_config_hash()?
375            {
376                // 重建匹配操作列表
377                let files = self.discovery.scan_directory(directory, recursive)?;
378                let mut ops = Vec::new();
379                for item in cache_data.match_operations {
380                    if let (Some(video), Some(subtitle)) = (
381                        files.iter().find(|f| {
382                            f.name == item.video_file && matches!(f.file_type, MediaFileType::Video)
383                        }),
384                        files.iter().find(|f| {
385                            f.name == item.subtitle_file
386                                && matches!(f.file_type, MediaFileType::Subtitle)
387                        }),
388                    ) {
389                        ops.push(MatchOperation {
390                            video_file: (*video).clone(),
391                            subtitle_file: (*subtitle).clone(),
392                            new_subtitle_name: item.new_subtitle_name.clone(),
393                            confidence: item.confidence,
394                            reasoning: item.reasoning.clone(),
395                        });
396                    }
397                }
398                return Ok(Some(ops));
399            }
400        }
401        Ok(None)
402    }
403
404    /// 儲存 Dry-run 快取結果
405    pub async fn save_cache(
406        &self,
407        directory: &Path,
408        recursive: bool,
409        operations: &[MatchOperation],
410    ) -> Result<()> {
411        let cache_data = CacheData {
412            cache_version: "1.0".to_string(),
413            directory: directory.to_string_lossy().to_string(),
414            file_snapshot: self.calculate_file_snapshot(directory, recursive)?,
415            match_operations: operations
416                .iter()
417                .map(|op| OpItem {
418                    video_file: op.video_file.name.clone(),
419                    subtitle_file: op.subtitle_file.name.clone(),
420                    new_subtitle_name: op.new_subtitle_name.clone(),
421                    confidence: op.confidence,
422                    reasoning: op.reasoning.clone(),
423                })
424                .collect(),
425            created_at: std::time::SystemTime::now()
426                .duration_since(std::time::UNIX_EPOCH)
427                .map(|d| d.as_secs())
428                .unwrap_or(0),
429            ai_model_used: self.calculate_config_hash()?,
430            config_hash: self.calculate_config_hash()?,
431        };
432        let path = self.get_cache_file_path()?;
433        if let Some(parent) = path.parent() {
434            std::fs::create_dir_all(parent)?;
435        }
436        let content =
437            serde_json::to_string_pretty(&cache_data).map_err(|e| SubXError::Other(e.into()))?;
438        std::fs::write(path, content)?;
439        Ok(())
440    }
441
442    /// 取得快取檔案路徑
443    fn get_cache_file_path(&self) -> Result<std::path::PathBuf> {
444        let dir = dirs::config_dir().ok_or_else(|| SubXError::config("無法確定快取目錄"))?;
445        Ok(dir.join("subx").join("match_cache.json"))
446    }
447
448    /// 計算目前配置雜湊,用於快取驗證
449    fn calculate_config_hash(&self) -> Result<String> {
450        let config = load_config()?;
451        let toml = toml::to_string(&config)
452            .map_err(|e| SubXError::config(format!("TOML 序列化錯誤: {}", e)))?;
453        Ok(format!("{:x}", md5::compute(toml)))
454    }
455}