subx_cli/core/matcher/
engine.rs

1//! File matching engine that uses AI content analysis to align video and subtitle files.
2//!
3//! This module provides the `MatchEngine`, which orchestrates discovery,
4//! content sampling, AI analysis, and caching to generate subtitle matching operations.
5//!
6//! # Examples
7//!
8//! ```rust,ignore
9//! use subx_cli::core::matcher::engine::{MatchEngine, MatchConfig};
10//! // Create a match engine with default configuration
11//! let config = MatchConfig { confidence_threshold: 0.8, max_sample_length: 1024, enable_content_analysis: true, backup_enabled: false };
12//! let engine = MatchEngine::new(Box::new(DummyAI), config);
13//! ```
14
15use crate::services::ai::{AIProvider, AnalysisRequest, ContentSample, MatchResult};
16use std::path::PathBuf;
17
18use crate::Result;
19use crate::core::language::LanguageDetector;
20use crate::core::matcher::cache::{CacheData, OpItem};
21use crate::core::matcher::discovery::generate_file_id;
22use crate::core::matcher::{FileDiscovery, MediaFile, MediaFileType};
23use crate::core::parallel::{FileProcessingTask, ProcessingOperation, Task, TaskResult};
24use crate::error::SubXError;
25use dirs;
26use serde_json;
27
28/// File relocation mode for matched subtitle files
29#[derive(Debug, Clone, PartialEq)]
30pub enum FileRelocationMode {
31    /// No file relocation
32    None,
33    /// Copy subtitle files to video folders
34    Copy,
35    /// Move subtitle files to video folders
36    Move,
37}
38
39/// Strategy for handling filename conflicts during relocation
40#[derive(Debug, Clone)]
41pub enum ConflictResolution {
42    /// Skip relocation if conflict exists
43    Skip,
44    /// Automatically rename with numeric suffix
45    AutoRename,
46    /// Prompt user for decision (interactive mode only)
47    Prompt,
48}
49
50/// Configuration settings for the file matching engine.
51///
52/// Controls various aspects of the subtitle-to-video matching process,
53/// including confidence thresholds and analysis options.
54#[derive(Debug, Clone)]
55pub struct MatchConfig {
56    /// Minimum confidence score required for a successful match (0.0 to 1.0)
57    pub confidence_threshold: f32,
58    /// Maximum number of characters to sample from subtitle content
59    pub max_sample_length: usize,
60    /// Whether to enable advanced content analysis for matching
61    pub enable_content_analysis: bool,
62    /// Whether to create backup files before operations
63    pub backup_enabled: bool,
64    /// File relocation mode
65    pub relocation_mode: FileRelocationMode,
66    /// Strategy for handling filename conflicts during relocation
67    pub conflict_resolution: ConflictResolution,
68    /// AI model name used for analysis
69    pub ai_model: String,
70}
71
72#[cfg(test)]
73mod language_name_tests {
74    use super::*;
75    use crate::core::matcher::discovery::{MediaFile, MediaFileType};
76    use crate::services::ai::{
77        AIProvider, AnalysisRequest, ConfidenceScore, MatchResult, VerificationRequest,
78    };
79    use async_trait::async_trait;
80    use std::path::PathBuf;
81
82    struct DummyAI;
83    #[async_trait]
84    impl AIProvider for DummyAI {
85        async fn analyze_content(&self, _req: AnalysisRequest) -> crate::Result<MatchResult> {
86            unimplemented!()
87        }
88        async fn verify_match(&self, _req: VerificationRequest) -> crate::Result<ConfidenceScore> {
89            unimplemented!()
90        }
91    }
92
93    #[test]
94    fn test_generate_subtitle_name_with_directory_language() {
95        let engine = MatchEngine::new(
96            Box::new(DummyAI),
97            MatchConfig {
98                confidence_threshold: 0.0,
99                max_sample_length: 0,
100                enable_content_analysis: false,
101                backup_enabled: false,
102                relocation_mode: FileRelocationMode::None,
103                conflict_resolution: ConflictResolution::Skip,
104                ai_model: "test-model".to_string(),
105            },
106        );
107        let video = MediaFile {
108            id: "".to_string(),
109            relative_path: "".to_string(),
110            path: PathBuf::from("movie01.mp4"),
111            file_type: MediaFileType::Video,
112            size: 0,
113            name: "movie01".to_string(),
114            extension: "mp4".to_string(),
115        };
116        let subtitle = MediaFile {
117            id: "".to_string(),
118            relative_path: "".to_string(),
119            path: PathBuf::from("tc/subtitle01.ass"),
120            file_type: MediaFileType::Subtitle,
121            size: 0,
122            name: "subtitle01".to_string(),
123            extension: "ass".to_string(),
124        };
125        let new_name = engine.generate_subtitle_name(&video, &subtitle);
126        assert_eq!(new_name, "movie01.tc.ass");
127    }
128
129    #[test]
130    fn test_generate_subtitle_name_with_filename_language() {
131        let engine = MatchEngine::new(
132            Box::new(DummyAI),
133            MatchConfig {
134                confidence_threshold: 0.0,
135                max_sample_length: 0,
136                enable_content_analysis: false,
137                backup_enabled: false,
138                relocation_mode: FileRelocationMode::None,
139                conflict_resolution: ConflictResolution::Skip,
140                ai_model: "test-model".to_string(),
141            },
142        );
143        let video = MediaFile {
144            id: "".to_string(),
145            relative_path: "".to_string(),
146            path: PathBuf::from("movie02.mp4"),
147            file_type: MediaFileType::Video,
148            size: 0,
149            name: "movie02".to_string(),
150            extension: "mp4".to_string(),
151        };
152        let subtitle = MediaFile {
153            id: "".to_string(),
154            relative_path: "".to_string(),
155            path: PathBuf::from("subtitle02.sc.ass"),
156            file_type: MediaFileType::Subtitle,
157            size: 0,
158            name: "subtitle02".to_string(),
159            extension: "ass".to_string(),
160        };
161        let new_name = engine.generate_subtitle_name(&video, &subtitle);
162        assert_eq!(new_name, "movie02.sc.ass");
163    }
164
165    #[test]
166    fn test_generate_subtitle_name_without_language() {
167        let engine = MatchEngine::new(
168            Box::new(DummyAI),
169            MatchConfig {
170                confidence_threshold: 0.0,
171                max_sample_length: 0,
172                enable_content_analysis: false,
173                backup_enabled: false,
174                relocation_mode: FileRelocationMode::None,
175                conflict_resolution: ConflictResolution::Skip,
176                ai_model: "test-model".to_string(),
177            },
178        );
179        let video = MediaFile {
180            id: "".to_string(),
181            relative_path: "".to_string(),
182            path: PathBuf::from("movie03.mp4"),
183            file_type: MediaFileType::Video,
184            size: 0,
185            name: "movie03".to_string(),
186            extension: "mp4".to_string(),
187        };
188        let subtitle = MediaFile {
189            id: "".to_string(),
190            relative_path: "".to_string(),
191            path: PathBuf::from("subtitle03.ass"),
192            file_type: MediaFileType::Subtitle,
193            size: 0,
194            name: "subtitle03".to_string(),
195            extension: "ass".to_string(),
196        };
197        let new_name = engine.generate_subtitle_name(&video, &subtitle);
198        assert_eq!(new_name, "movie03.ass");
199    }
200    #[test]
201    fn test_generate_subtitle_name_removes_video_extension() {
202        let engine = MatchEngine::new(
203            Box::new(DummyAI),
204            MatchConfig {
205                confidence_threshold: 0.0,
206                max_sample_length: 0,
207                enable_content_analysis: false,
208                backup_enabled: false,
209                relocation_mode: FileRelocationMode::None,
210                conflict_resolution: ConflictResolution::Skip,
211                ai_model: "test-model".to_string(),
212            },
213        );
214        let video = MediaFile {
215            id: "".to_string(),
216            relative_path: "".to_string(),
217            path: PathBuf::from("movie.mkv"),
218            file_type: MediaFileType::Video,
219            size: 0,
220            name: "movie.mkv".to_string(),
221            extension: "mkv".to_string(),
222        };
223        let subtitle = MediaFile {
224            id: "".to_string(),
225            relative_path: "".to_string(),
226            path: PathBuf::from("subtitle.srt"),
227            file_type: MediaFileType::Subtitle,
228            size: 0,
229            name: "subtitle".to_string(),
230            extension: "srt".to_string(),
231        };
232        let new_name = engine.generate_subtitle_name(&video, &subtitle);
233        assert_eq!(new_name, "movie.srt");
234    }
235
236    #[test]
237    fn test_generate_subtitle_name_with_language_removes_video_extension() {
238        let engine = MatchEngine::new(
239            Box::new(DummyAI),
240            MatchConfig {
241                confidence_threshold: 0.0,
242                max_sample_length: 0,
243                enable_content_analysis: false,
244                backup_enabled: false,
245                relocation_mode: FileRelocationMode::None,
246                conflict_resolution: ConflictResolution::Skip,
247                ai_model: "test-model".to_string(),
248            },
249        );
250        let video = MediaFile {
251            id: "".to_string(),
252            relative_path: "".to_string(),
253            path: PathBuf::from("movie.mkv"),
254            file_type: MediaFileType::Video,
255            size: 0,
256            name: "movie.mkv".to_string(),
257            extension: "mkv".to_string(),
258        };
259        let subtitle = MediaFile {
260            id: "".to_string(),
261            relative_path: "".to_string(),
262            path: PathBuf::from("tc/subtitle.srt"),
263            file_type: MediaFileType::Subtitle,
264            size: 0,
265            name: "subtitle".to_string(),
266            extension: "srt".to_string(),
267        };
268        let new_name = engine.generate_subtitle_name(&video, &subtitle);
269        assert_eq!(new_name, "movie.tc.srt");
270    }
271
272    #[test]
273    fn test_generate_subtitle_name_edge_cases() {
274        let engine = MatchEngine::new(
275            Box::new(DummyAI),
276            MatchConfig {
277                confidence_threshold: 0.0,
278                max_sample_length: 0,
279                enable_content_analysis: false,
280                backup_enabled: false,
281                relocation_mode: FileRelocationMode::None,
282                conflict_resolution: ConflictResolution::Skip,
283                ai_model: "test-model".to_string(),
284            },
285        );
286        // File name contains multiple dots and no extension case
287        let video = MediaFile {
288            id: "".to_string(),
289            relative_path: "".to_string(),
290            path: PathBuf::from("a.b.c"),
291            file_type: MediaFileType::Video,
292            size: 0,
293            name: "a.b.c".to_string(),
294            extension: "".to_string(),
295        };
296        let subtitle = MediaFile {
297            id: "".to_string(),
298            relative_path: "".to_string(),
299            path: PathBuf::from("sub.srt"),
300            file_type: MediaFileType::Subtitle,
301            size: 0,
302            name: "sub".to_string(),
303            extension: "srt".to_string(),
304        };
305        let new_name = engine.generate_subtitle_name(&video, &subtitle);
306        assert_eq!(new_name, "a.b.c.srt");
307    }
308
309    #[tokio::test]
310    async fn test_rename_file_displays_success_check_mark() {
311        use std::fs;
312        use tempfile::TempDir;
313
314        let temp_dir = TempDir::new().unwrap();
315        let temp_path = temp_dir.path();
316
317        // Create a test file
318        let original_file = temp_path.join("original.srt");
319        fs::write(
320            &original_file,
321            "1\n00:00:01,000 --> 00:00:02,000\nTest subtitle",
322        )
323        .unwrap();
324
325        // Create a test MatchEngine
326        let engine = MatchEngine::new(
327            Box::new(DummyAI),
328            MatchConfig {
329                confidence_threshold: 0.0,
330                max_sample_length: 0,
331                enable_content_analysis: false,
332                backup_enabled: false,
333                relocation_mode: FileRelocationMode::None,
334                conflict_resolution: ConflictResolution::Skip,
335                ai_model: "test-model".to_string(),
336            },
337        );
338
339        // Create a MatchOperation
340        let subtitle_file = MediaFile {
341            id: "test_id".to_string(),
342            relative_path: "original.srt".to_string(),
343            path: original_file.clone(),
344            file_type: MediaFileType::Subtitle,
345            size: 40,
346            name: "original".to_string(),
347            extension: "srt".to_string(),
348        };
349
350        let match_op = MatchOperation {
351            video_file: MediaFile {
352                id: "video_id".to_string(),
353                relative_path: "test.mp4".to_string(),
354                path: temp_path.join("test.mp4"),
355                file_type: MediaFileType::Video,
356                size: 1000,
357                name: "test".to_string(),
358                extension: "mp4".to_string(),
359            },
360            subtitle_file,
361            new_subtitle_name: "renamed.srt".to_string(),
362            confidence: 95.0,
363            reasoning: vec!["Test match".to_string()],
364            requires_relocation: false,
365            relocation_target_path: None,
366            relocation_mode: FileRelocationMode::None,
367        };
368
369        // Execute the rename operation
370        let result = engine.rename_file(&match_op).await;
371
372        // Verify the operation was successful
373        assert!(result.is_ok());
374
375        // Verify the file has been renamed
376        let renamed_file = temp_path.join("renamed.srt");
377        assert!(renamed_file.exists(), "The renamed file should exist");
378        assert!(
379            !original_file.exists(),
380            "The original file should have been renamed"
381        );
382
383        // Verify the file content is correct
384        let content = fs::read_to_string(&renamed_file).unwrap();
385        assert!(content.contains("Test subtitle"));
386    }
387
388    #[tokio::test]
389    async fn test_rename_file_displays_error_cross_mark_when_file_not_exists() {
390        use std::fs;
391        use tempfile::TempDir;
392
393        let temp_dir = TempDir::new().unwrap();
394        let temp_path = temp_dir.path();
395
396        // Create test file
397        let original_file = temp_path.join("original.srt");
398        fs::write(
399            &original_file,
400            "1\n00:00:01,000 --> 00:00:02,000\nTest subtitle",
401        )
402        .unwrap();
403
404        // Create a test MatchEngine
405        let engine = MatchEngine::new(
406            Box::new(DummyAI),
407            MatchConfig {
408                confidence_threshold: 0.0,
409                max_sample_length: 0,
410                enable_content_analysis: false,
411                backup_enabled: false,
412                relocation_mode: FileRelocationMode::None,
413                conflict_resolution: ConflictResolution::Skip,
414                ai_model: "test-model".to_string(),
415            },
416        );
417
418        // Create a MatchOperation
419        let subtitle_file = MediaFile {
420            id: "test_id".to_string(),
421            relative_path: "original.srt".to_string(),
422            path: original_file.clone(),
423            file_type: MediaFileType::Subtitle,
424            size: 40,
425            name: "original".to_string(),
426            extension: "srt".to_string(),
427        };
428
429        let match_op = MatchOperation {
430            video_file: MediaFile {
431                id: "video_id".to_string(),
432                relative_path: "test.mp4".to_string(),
433                path: temp_path.join("test.mp4"),
434                file_type: MediaFileType::Video,
435                size: 1000,
436                name: "test".to_string(),
437                extension: "mp4".to_string(),
438            },
439            subtitle_file,
440            new_subtitle_name: "renamed.srt".to_string(),
441            confidence: 95.0,
442            reasoning: vec!["Test match".to_string()],
443            requires_relocation: false,
444            relocation_target_path: None,
445            relocation_mode: FileRelocationMode::None,
446        };
447
448        // Simulate file not existing after operation
449        // First, execute the rename operation normally
450        let result = engine.rename_file(&match_op).await;
451        assert!(result.is_ok());
452
453        // Manually delete the renamed file to simulate failure
454        let renamed_file = temp_path.join("renamed.srt");
455        if renamed_file.exists() {
456            fs::remove_file(&renamed_file).unwrap();
457        }
458
459        // Recreate the original file for the second test
460        fs::write(
461            &original_file,
462            "1\n00:00:01,000 --> 00:00:02,000\nTest subtitle",
463        )
464        .unwrap();
465
466        // Create a rename operation that will fail, by overwriting the rename implementation
467        // Since we cannot directly simulate std::fs::rename failure with file not existing,
468        // we test the scenario where the file is manually removed after the operation completes
469        let result = engine.rename_file(&match_op).await;
470        assert!(result.is_ok());
471
472        // Manually delete the file again
473        let renamed_file = temp_path.join("renamed.srt");
474        if renamed_file.exists() {
475            fs::remove_file(&renamed_file).unwrap();
476        }
477
478        // This test mainly verifies the code structure is correct, the actual error message display needs to be validated through integration tests
479        // Because we cannot easily simulate the scenario where the file system operation succeeds but the file does not exist
480    }
481
482    #[test]
483    fn test_file_operation_message_format() {
484        // Test error message format
485        let source_name = "test.srt";
486        let target_name = "renamed.srt";
487
488        // Simulate success message format
489        let success_msg = format!("  āœ“ Renamed: {} -> {}", source_name, target_name);
490        assert!(success_msg.contains("āœ“"));
491        assert!(success_msg.contains("Renamed:"));
492        assert!(success_msg.contains(source_name));
493        assert!(success_msg.contains(target_name));
494
495        // Simulate failure message format
496        let error_msg = format!(
497            "  āœ— Rename failed: {} -> {} (target file does not exist after operation)",
498            source_name, target_name
499        );
500        assert!(error_msg.contains("āœ—"));
501        assert!(error_msg.contains("Rename failed:"));
502        assert!(error_msg.contains("target file does not exist"));
503        assert!(error_msg.contains(source_name));
504        assert!(error_msg.contains(target_name));
505    }
506
507    #[test]
508    fn test_copy_operation_message_format() {
509        // Test copy operation message format
510        let source_name = "subtitle.srt";
511        let target_name = "video.srt";
512
513        // Simulate success message format
514        let success_msg = format!("  āœ“ Copied: {} -> {}", source_name, target_name);
515        assert!(success_msg.contains("āœ“"));
516        assert!(success_msg.contains("Copied:"));
517
518        // Simulate failure message format
519        let error_msg = format!(
520            "  āœ— Copy failed: {} -> {} (target file does not exist after operation)",
521            source_name, target_name
522        );
523        assert!(error_msg.contains("āœ—"));
524        assert!(error_msg.contains("Copy failed:"));
525        assert!(error_msg.contains("target file does not exist"));
526    }
527
528    #[test]
529    fn test_move_operation_message_format() {
530        // Test move operation message format
531        let source_name = "subtitle.srt";
532        let target_name = "video.srt";
533
534        // Simulate success message format
535        let success_msg = format!("  āœ“ Moved: {} -> {}", source_name, target_name);
536        assert!(success_msg.contains("āœ“"));
537        assert!(success_msg.contains("Moved:"));
538
539        // Simulate failure message format
540        let error_msg = format!(
541            "  āœ— Move failed: {} -> {} (target file does not exist after operation)",
542            source_name, target_name
543        );
544        assert!(error_msg.contains("āœ—"));
545        assert!(error_msg.contains("Move failed:"));
546        assert!(error_msg.contains("target file does not exist"));
547    }
548}
549
550/// Match operation result representing a single video-subtitle match.
551///
552/// Contains all information about a successful match between a video file
553/// and a subtitle file, including confidence metrics and reasoning.
554#[derive(Debug)]
555pub struct MatchOperation {
556    /// The matched video file
557    pub video_file: MediaFile,
558    /// The matched subtitle file
559    pub subtitle_file: MediaFile,
560    /// The new filename for the subtitle file
561    pub new_subtitle_name: String,
562    /// Confidence score of the match (0.0 to 1.0)
563    pub confidence: f32,
564    /// List of reasons supporting this match
565    pub reasoning: Vec<String>,
566    /// File relocation mode for this operation
567    pub relocation_mode: FileRelocationMode,
568    /// Target relocation path if operation is needed
569    pub relocation_target_path: Option<std::path::PathBuf>,
570    /// Whether relocation operation is needed (different folders)
571    pub requires_relocation: bool,
572}
573
574/// Engine for matching video and subtitle files using AI analysis.
575pub struct MatchEngine {
576    ai_client: Box<dyn AIProvider>,
577    discovery: FileDiscovery,
578    config: MatchConfig,
579}
580
581impl MatchEngine {
582    /// Creates a new `MatchEngine` with the given AI provider and configuration.
583    pub fn new(ai_client: Box<dyn AIProvider>, config: MatchConfig) -> Self {
584        Self {
585            ai_client,
586            discovery: FileDiscovery::new(),
587            config,
588        }
589    }
590
591    /// Matches video and subtitle files from a specified list of files.
592    ///
593    /// This method processes a user-provided list of files, filtering them into
594    /// video and subtitle files, then performing AI-powered matching analysis.
595    /// This is useful when users specify exact files via -i parameters.
596    ///
597    /// # Arguments
598    ///
599    /// * `file_paths` - A slice of file paths to process for matching
600    ///
601    /// # Returns
602    ///
603    /// A list of `MatchOperation` entries that meet the confidence threshold.
604    pub async fn match_file_list(&self, file_paths: &[PathBuf]) -> Result<Vec<MatchOperation>> {
605        // 1. Process the file list to create MediaFile objects
606        let files = self.discovery.scan_file_list(file_paths)?;
607
608        let videos: Vec<_> = files
609            .iter()
610            .filter(|f| matches!(f.file_type, MediaFileType::Video))
611            .collect();
612        let subtitles: Vec<_> = files
613            .iter()
614            .filter(|f| matches!(f.file_type, MediaFileType::Subtitle))
615            .collect();
616
617        if videos.is_empty() || subtitles.is_empty() {
618            return Ok(Vec::new());
619        }
620
621        // 2. Check if we can use cache for file list operations
622        // Create a stable cache key based on sorted file paths and their metadata
623        let cache_key = self.calculate_file_list_cache_key(file_paths)?;
624        if let Some(ops) = self.check_file_list_cache(&cache_key).await? {
625            return Ok(ops);
626        }
627
628        // 3. Content sampling
629        let content_samples = if self.config.enable_content_analysis {
630            self.extract_content_samples(&subtitles).await?
631        } else {
632            Vec::new()
633        };
634
635        // 4. AI analysis request
636        // Generate AI analysis request: include file IDs for precise matching
637        let video_files: Vec<String> = videos
638            .iter()
639            .map(|v| format!("ID:{} | Name:{} | Path:{}", v.id, v.name, v.relative_path))
640            .collect();
641        let subtitle_files: Vec<String> = subtitles
642            .iter()
643            .map(|s| format!("ID:{} | Name:{} | Path:{}", s.id, s.name, s.relative_path))
644            .collect();
645
646        let analysis_request = AnalysisRequest {
647            video_files,
648            subtitle_files,
649            content_samples,
650        };
651
652        // 5. Query AI service
653        let match_result = self.ai_client.analyze_content(analysis_request).await?;
654
655        // Debug: Log AI analysis results
656        eprintln!("šŸ” AI Analysis Results:");
657        eprintln!("   - Total matches: {}", match_result.matches.len());
658        eprintln!(
659            "   - Confidence threshold: {:.2}",
660            self.config.confidence_threshold
661        );
662        for ai_match in &match_result.matches {
663            eprintln!(
664                "   - {} -> {} (confidence: {:.2})",
665                ai_match.video_file_id, ai_match.subtitle_file_id, ai_match.confidence
666            );
667        }
668
669        // 6. Assemble match operation list
670        let mut operations = Vec::new();
671
672        for ai_match in match_result.matches {
673            if ai_match.confidence >= self.config.confidence_threshold {
674                let video_match =
675                    Self::find_media_file_by_id_or_path(&videos, &ai_match.video_file_id, None);
676                let subtitle_match = Self::find_media_file_by_id_or_path(
677                    &subtitles,
678                    &ai_match.subtitle_file_id,
679                    None,
680                );
681                match (video_match, subtitle_match) {
682                    (Some(video), Some(subtitle)) => {
683                        let new_name = self.generate_subtitle_name(video, subtitle);
684
685                        // Determine if relocation is needed
686                        let requires_relocation = self.config.relocation_mode
687                            != FileRelocationMode::None
688                            && subtitle.path.parent() != video.path.parent();
689
690                        let relocation_target_path = if requires_relocation {
691                            let video_dir = video.path.parent().unwrap();
692                            Some(video_dir.join(&new_name))
693                        } else {
694                            None
695                        };
696
697                        operations.push(MatchOperation {
698                            video_file: (*video).clone(),
699                            subtitle_file: (*subtitle).clone(),
700                            new_subtitle_name: new_name,
701                            confidence: ai_match.confidence,
702                            reasoning: ai_match.match_factors,
703                            relocation_mode: self.config.relocation_mode.clone(),
704                            relocation_target_path,
705                            requires_relocation,
706                        });
707                    }
708                    _ => {
709                        eprintln!(
710                            "āš ļø  Cannot find AI-suggested file pair:\n     Video ID: '{}'\n     Subtitle ID: '{}'",
711                            ai_match.video_file_id, ai_match.subtitle_file_id
712                        );
713                        eprintln!("āŒ No matching files found that meet the criteria");
714                        eprintln!("šŸ” Available file statistics:");
715                        eprintln!("   Video files ({} files):", videos.len());
716                        for video in &videos {
717                            eprintln!("     - ID: {} | {}", video.id, video.name);
718                        }
719                        eprintln!("   Subtitle files ({} files):", subtitles.len());
720                        for subtitle in &subtitles {
721                            eprintln!("     - ID: {} | {}", subtitle.id, subtitle.name);
722                        }
723                    }
724                }
725            }
726        }
727
728        // 7. Save to cache for future use
729        self.save_file_list_cache(&cache_key, &operations).await?;
730
731        Ok(operations)
732    }
733
734    async fn extract_content_samples(
735        &self,
736        subtitles: &[&MediaFile],
737    ) -> Result<Vec<ContentSample>> {
738        let mut samples = Vec::new();
739
740        for subtitle in subtitles {
741            let content = std::fs::read_to_string(&subtitle.path)?;
742            let preview = self.create_content_preview(&content);
743
744            samples.push(ContentSample {
745                filename: subtitle.name.clone(),
746                content_preview: preview,
747                file_size: subtitle.size,
748            });
749        }
750
751        Ok(samples)
752    }
753
754    fn create_content_preview(&self, content: &str) -> String {
755        let lines: Vec<&str> = content.lines().take(20).collect();
756        let preview = lines.join("\n");
757
758        if preview.len() > self.config.max_sample_length {
759            format!("{}...", &preview[..self.config.max_sample_length])
760        } else {
761            preview
762        }
763    }
764
765    fn generate_subtitle_name(&self, video: &MediaFile, subtitle: &MediaFile) -> String {
766        let detector = LanguageDetector::new();
767
768        // Remove the extension from the video file name (if any)
769        let video_base_name = if !video.extension.is_empty() {
770            video
771                .name
772                .strip_suffix(&format!(".{}", video.extension))
773                .unwrap_or(&video.name)
774        } else {
775            &video.name
776        };
777
778        if let Some(code) = detector.get_primary_language(&subtitle.path) {
779            format!("{}.{}.{}", video_base_name, code, subtitle.extension)
780        } else {
781            format!("{}.{}", video_base_name, subtitle.extension)
782        }
783    }
784
785    /// Execute match operations with dry-run mode support
786    pub async fn execute_operations(
787        &self,
788        operations: &[MatchOperation],
789        dry_run: bool,
790    ) -> Result<()> {
791        for op in operations {
792            if dry_run {
793                println!(
794                    "Preview: {} -> {}",
795                    op.subtitle_file.name, op.new_subtitle_name
796                );
797                if op.requires_relocation {
798                    if let Some(target_path) = &op.relocation_target_path {
799                        let operation_verb = match op.relocation_mode {
800                            FileRelocationMode::Copy => "Copy",
801                            FileRelocationMode::Move => "Move",
802                            _ => "",
803                        };
804                        println!(
805                            "Preview: {} {} to {}",
806                            operation_verb,
807                            op.subtitle_file.path.display(),
808                            target_path.display()
809                        );
810                    }
811                }
812            } else {
813                // Delegate file operations to FileProcessingTask
814                let mut tasks = Vec::new();
815                // Backup source if move and enabled
816                if op.relocation_mode == FileRelocationMode::Move && self.config.backup_enabled {
817                    tasks.push(
818                        self.create_backup_task(
819                            &op.subtitle_file.path,
820                            &op.subtitle_file.extension,
821                        ),
822                    );
823                }
824                // Copy or local copy with rename
825                if op.relocation_mode == FileRelocationMode::Copy {
826                    tasks.push(self.create_copy_task(op));
827                }
828                // Rename original file if any
829                if op.relocation_mode != FileRelocationMode::Copy {
830                    tasks.push(self.create_rename_task(op));
831                }
832                // Execute all tasks sequentially
833                for t in tasks {
834                    if let TaskResult::Failed(err) = t.execute().await {
835                        return Err(SubXError::FileOperationFailed(err));
836                    }
837                }
838            }
839        }
840        Ok(())
841    }
842
843    /// Rename subtitle file by delegating to FileProcessingTask
844    async fn rename_file(&self, op: &MatchOperation) -> Result<()> {
845        let task = self.create_rename_task(op);
846        match task.execute().await {
847            TaskResult::Success(_) => Ok(()),
848            TaskResult::Failed(err) => Err(SubXError::FileOperationFailed(err)),
849            other => Err(SubXError::FileOperationFailed(format!(
850                "Unexpected rename result: {:?}",
851                other
852            ))),
853        }
854    }
855
856    /// Resolve filename conflicts by adding numeric suffix
857    fn resolve_filename_conflict(&self, target: std::path::PathBuf) -> Result<std::path::PathBuf> {
858        if !target.exists() {
859            return Ok(target);
860        }
861        match self.config.conflict_resolution {
862            ConflictResolution::Skip => {
863                eprintln!(
864                    "Warning: Skipping relocation due to existing file: {}",
865                    target.display()
866                );
867                Ok(target)
868            }
869            ConflictResolution::AutoRename => {
870                let file_stem = target
871                    .file_stem()
872                    .and_then(|s| s.to_str())
873                    .unwrap_or("file");
874                let extension = target.extension().and_then(|s| s.to_str()).unwrap_or("");
875                let parent = target.parent().unwrap_or_else(|| std::path::Path::new("."));
876                for i in 1..1000 {
877                    let new_name = if extension.is_empty() {
878                        format!("{}.{}", file_stem, i)
879                    } else {
880                        format!("{}.{}.{}", file_stem, i, extension)
881                    };
882                    let new_path = parent.join(new_name);
883                    if !new_path.exists() {
884                        return Ok(new_path);
885                    }
886                }
887                Err(SubXError::FileOperationFailed(
888                    "Could not resolve filename conflict".to_string(),
889                ))
890            }
891            ConflictResolution::Prompt => {
892                eprintln!("Warning: Conflict resolution prompt not implemented, using auto-rename");
893                self.resolve_filename_conflict(target)
894            }
895        }
896    }
897
898    /// Create a task to copy (or rename) a file with new name
899    fn create_copy_task(&self, op: &MatchOperation) -> FileProcessingTask {
900        // In copy mode, always use the original subtitle file as source
901        let source = op.subtitle_file.path.clone();
902        let target_base = op.relocation_target_path.clone().unwrap();
903        let final_target = self.resolve_filename_conflict(target_base).unwrap();
904        FileProcessingTask::new(
905            source.clone(),
906            Some(final_target.clone()),
907            ProcessingOperation::CopyWithRename {
908                source,
909                target: final_target,
910            },
911        )
912    }
913
914    /// Create a task to backup a file
915    fn create_backup_task(&self, source: &std::path::Path, ext: &str) -> FileProcessingTask {
916        let backup_path = source.with_extension(format!("{}.backup", ext));
917        FileProcessingTask::new(
918            source.to_path_buf(),
919            Some(backup_path.clone()),
920            ProcessingOperation::CreateBackup {
921                source: source.to_path_buf(),
922                backup: backup_path,
923            },
924        )
925    }
926
927    /// Create a task to rename (move) a file
928    fn create_rename_task(&self, op: &MatchOperation) -> FileProcessingTask {
929        let old = op.subtitle_file.path.clone();
930        // If relocation is required, use the relocation target path
931        let new_path = if op.requires_relocation && op.relocation_target_path.is_some() {
932            let target_base = op.relocation_target_path.clone().unwrap();
933            self.resolve_filename_conflict(target_base).unwrap()
934        } else {
935            old.with_file_name(&op.new_subtitle_name)
936        };
937
938        FileProcessingTask::new(
939            old.clone(),
940            Some(new_path.clone()),
941            ProcessingOperation::RenameFile {
942                source: old,
943                target: new_path,
944            },
945        )
946    }
947
948    /// Calculate cache key for file list operations
949    fn calculate_file_list_cache_key(&self, file_paths: &[PathBuf]) -> Result<String> {
950        use std::collections::BTreeMap;
951        use std::collections::hash_map::DefaultHasher;
952        use std::hash::{Hash, Hasher};
953
954        // Sort paths to ensure consistent key generation
955        let mut path_metadata = BTreeMap::new();
956        for path in file_paths {
957            if let Ok(metadata) = path.metadata() {
958                let canonical = path.canonicalize().unwrap_or_else(|_| path.to_path_buf());
959                path_metadata.insert(
960                    canonical.to_string_lossy().to_string(),
961                    (metadata.len(), metadata.modified().ok()),
962                );
963            }
964        }
965
966        // Include config hash to invalidate cache when configuration changes
967        let config_hash = self.calculate_config_hash()?;
968
969        let mut hasher = DefaultHasher::new();
970        path_metadata.hash(&mut hasher);
971        config_hash.hash(&mut hasher);
972
973        Ok(format!("filelist_{:016x}", hasher.finish()))
974    }
975
976    /// Check cache for file list operations
977    async fn check_file_list_cache(&self, cache_key: &str) -> Result<Option<Vec<MatchOperation>>> {
978        let cache_file_path = self.get_cache_file_path()?;
979        let cache_data = CacheData::load(&cache_file_path).ok();
980
981        if let Some(cache_data) = cache_data {
982            if cache_data.directory == cache_key {
983                // Rebuild match operation list for file list cache
984                let mut ops = Vec::new();
985                for item in cache_data.match_operations {
986                    // For file list operations, we reconstruct operations from cached data
987                    let video_path = PathBuf::from(&item.video_file);
988                    let subtitle_path = PathBuf::from(&item.subtitle_file);
989
990                    if video_path.exists() && subtitle_path.exists() {
991                        // Create minimal MediaFile objects for the operation
992                        let video_meta = video_path.metadata()?;
993                        let subtitle_meta = subtitle_path.metadata()?;
994
995                        let video_file = MediaFile {
996                            id: generate_file_id(&video_path, video_meta.len()),
997                            path: video_path.clone(),
998                            file_type: MediaFileType::Video,
999                            size: video_meta.len(),
1000                            name: video_path
1001                                .file_name()
1002                                .unwrap()
1003                                .to_string_lossy()
1004                                .to_string(),
1005                            extension: video_path
1006                                .extension()
1007                                .unwrap_or_default()
1008                                .to_string_lossy()
1009                                .to_lowercase(),
1010                            relative_path: video_path
1011                                .file_name()
1012                                .unwrap()
1013                                .to_string_lossy()
1014                                .to_string(),
1015                        };
1016
1017                        let subtitle_file = MediaFile {
1018                            id: generate_file_id(&subtitle_path, subtitle_meta.len()),
1019                            path: subtitle_path.clone(),
1020                            file_type: MediaFileType::Subtitle,
1021                            size: subtitle_meta.len(),
1022                            name: subtitle_path
1023                                .file_name()
1024                                .unwrap()
1025                                .to_string_lossy()
1026                                .to_string(),
1027                            extension: subtitle_path
1028                                .extension()
1029                                .unwrap_or_default()
1030                                .to_string_lossy()
1031                                .to_lowercase(),
1032                            relative_path: subtitle_path
1033                                .file_name()
1034                                .unwrap()
1035                                .to_string_lossy()
1036                                .to_string(),
1037                        };
1038
1039                        // Recalculate relocation information based on current configuration
1040                        let requires_relocation = self.config.relocation_mode
1041                            != FileRelocationMode::None
1042                            && subtitle_file.path.parent() != video_file.path.parent();
1043
1044                        let relocation_target_path = if requires_relocation {
1045                            let video_dir = video_file.path.parent().unwrap();
1046                            Some(video_dir.join(&item.new_subtitle_name))
1047                        } else {
1048                            None
1049                        };
1050
1051                        ops.push(MatchOperation {
1052                            video_file,
1053                            subtitle_file,
1054                            new_subtitle_name: item.new_subtitle_name,
1055                            confidence: item.confidence,
1056                            reasoning: item.reasoning,
1057                            relocation_mode: self.config.relocation_mode.clone(),
1058                            relocation_target_path,
1059                            requires_relocation,
1060                        });
1061                    }
1062                }
1063                return Ok(Some(ops));
1064            }
1065        }
1066        Ok(None)
1067    }
1068
1069    /// Save cache for file list operations
1070    async fn save_file_list_cache(
1071        &self,
1072        cache_key: &str,
1073        operations: &[MatchOperation],
1074    ) -> Result<()> {
1075        let cache_file_path = self.get_cache_file_path()?;
1076        let config_hash = self.calculate_config_hash()?;
1077
1078        let mut cache_items = Vec::new();
1079        for op in operations {
1080            cache_items.push(OpItem {
1081                video_file: op.video_file.path.to_string_lossy().to_string(),
1082                subtitle_file: op.subtitle_file.path.to_string_lossy().to_string(),
1083                new_subtitle_name: op.new_subtitle_name.clone(),
1084                confidence: op.confidence,
1085                reasoning: op.reasoning.clone(),
1086            });
1087        }
1088
1089        let cache_data = CacheData {
1090            cache_version: "1.0".to_string(),
1091            directory: cache_key.to_string(),
1092            file_snapshot: vec![], // Not used for file list cache
1093            match_operations: cache_items,
1094            created_at: std::time::SystemTime::now()
1095                .duration_since(std::time::UNIX_EPOCH)
1096                .unwrap()
1097                .as_secs(),
1098            ai_model_used: self.config.ai_model.clone(),
1099            config_hash,
1100            original_relocation_mode: format!("{:?}", self.config.relocation_mode),
1101            original_backup_enabled: self.config.backup_enabled,
1102        };
1103
1104        // Save cache data to file
1105        let cache_dir = cache_file_path.parent().unwrap();
1106        std::fs::create_dir_all(cache_dir)?;
1107        let cache_json = serde_json::to_string_pretty(&cache_data)?;
1108        std::fs::write(&cache_file_path, cache_json)?;
1109
1110        Ok(())
1111    }
1112
1113    /// Get cache file path
1114    fn get_cache_file_path(&self) -> Result<std::path::PathBuf> {
1115        // First check XDG_CONFIG_HOME environment variable (used for testing)
1116        let dir = if let Some(xdg_config) = std::env::var_os("XDG_CONFIG_HOME") {
1117            std::path::PathBuf::from(xdg_config)
1118        } else {
1119            dirs::config_dir()
1120                .ok_or_else(|| SubXError::config("Unable to determine cache directory"))?
1121        };
1122        Ok(dir.join("subx").join("match_cache.json"))
1123    }
1124
1125    /// Calculate current configuration hash for cache validation
1126    fn calculate_config_hash(&self) -> Result<String> {
1127        use std::collections::hash_map::DefaultHasher;
1128        use std::hash::{Hash, Hasher};
1129
1130        let mut hasher = DefaultHasher::new();
1131        // Add configuration items that affect cache validity to the hash
1132        format!("{:?}", self.config.relocation_mode).hash(&mut hasher);
1133        self.config.backup_enabled.hash(&mut hasher);
1134        // Add other relevant configuration items
1135
1136        Ok(format!("{:016x}", hasher.finish()))
1137    }
1138
1139    /// Find a media file by ID, with an optional fallback to relative path or name.
1140    fn find_media_file_by_id_or_path<'a>(
1141        files: &'a [&MediaFile],
1142        file_id: &str,
1143        fallback_path: Option<&str>,
1144    ) -> Option<&'a MediaFile> {
1145        if let Some(file) = files.iter().find(|f| f.id == file_id) {
1146            return Some(*file);
1147        }
1148        if let Some(path) = fallback_path {
1149            if let Some(file) = files.iter().find(|f| f.relative_path == path) {
1150                return Some(*file);
1151            }
1152            files.iter().find(|f| f.name == path).copied()
1153        } else {
1154            None
1155        }
1156    }
1157
1158    /// Log available files to assist debugging when a match is not found.
1159    fn log_available_files(&self, files: &[&MediaFile], file_type: &str) {
1160        eprintln!("   Available {} files:", file_type);
1161        for f in files {
1162            eprintln!(
1163                "     - ID: {} | Name: {} | Path: {}",
1164                f.id, f.name, f.relative_path
1165            );
1166        }
1167    }
1168
1169    /// Provide detailed information when no matches are found.
1170    fn log_no_matches_found(
1171        &self,
1172        match_result: &MatchResult,
1173        videos: &[MediaFile],
1174        subtitles: &[MediaFile],
1175    ) {
1176        eprintln!("\nāŒ No matching files found that meet the criteria");
1177        eprintln!("šŸ” AI analysis results:");
1178        eprintln!("   - Total matches: {}", match_result.matches.len());
1179        eprintln!(
1180            "   - Confidence threshold: {:.2}",
1181            self.config.confidence_threshold
1182        );
1183        eprintln!(
1184            "   - Matches meeting threshold: {}",
1185            match_result
1186                .matches
1187                .iter()
1188                .filter(|m| m.confidence >= self.config.confidence_threshold)
1189                .count()
1190        );
1191        eprintln!("\nšŸ“‚ Scanned files:");
1192        eprintln!("   Video files ({} files):", videos.len());
1193        for v in videos {
1194            eprintln!("     - ID: {} | {}", v.id, v.relative_path);
1195        }
1196        eprintln!("   Subtitle files ({} files):", subtitles.len());
1197        for s in subtitles {
1198            eprintln!("     - ID: {} | {}", s.id, s.relative_path);
1199        }
1200    }
1201}