Skip to main content

subx_cli/core/matcher/
mod.rs

1//! AI-powered subtitle file matching and discovery engine.
2//!
3//! This module provides sophisticated algorithms for automatically matching subtitle
4//! files with their corresponding video files using AI analysis, language detection,
5//! and intelligent filename pattern recognition. It handles complex scenarios including
6//! multiple subtitle languages, season/episode structures, and various naming conventions.
7//!
8//! # Core Features
9//!
10//! ## Intelligent File Discovery
11//! - **Recursive Search**: Traverses directory structures to find media and subtitle files
12//! - **Format Detection**: Automatically identifies video and subtitle file formats
13//! - **Pattern Recognition**: Understands common naming patterns and conventions
14//! - **Language Detection**: Identifies subtitle languages from filenames and content
15//!
16//! ## AI-Powered Matching
17//! - **Semantic Analysis**: Uses AI to understand filename semantics beyond patterns
18//! - **Content Correlation**: Matches based on content similarity and timing patterns
19//! - **Multi-Language Support**: Handles subtitle files in different languages
20//! - **Confidence Scoring**: Provides match confidence levels for user validation
21//!
22//! ## Advanced Matching Algorithms
23//! - **Fuzzy Matching**: Tolerates variations in naming conventions
24//! - **Episode Detection**: Recognizes season/episode patterns in TV series
25//! - **Quality Assessment**: Evaluates subtitle quality and completeness
26//! - **Conflict Resolution**: Handles multiple subtitle candidates intelligently
27//!
28//! # Architecture Overview
29//!
30//! The matching system consists of several interconnected components:
31//!
32//! ```text
33//! ┌─────────────────┐    ┌──────────────────┐    ┌─────────────────┐
34//! │   Discovery     │────│   AI Analysis    │────│   Match Engine  │
35//! │   - Find files  │    │   - Semantic     │    │   - Score calc  │
36//! │   - Language    │    │   - Content      │    │   - Validation  │
37//! │   - Metadata    │    │   - Confidence   │    │   - Ranking     │
38//! └─────────────────┘    └──────────────────┘    └─────────────────┘
39//!         │                        │                        │
40//!         └────────────────────────┼────────────────────────┘
41//!                                  │
42//!                    ┌─────────────────────────┐
43//!                    │       Cache System      │
44//!                    │   - Analysis results    │
45//!                    │   - Match history       │
46//!                    │   - Performance data    │
47//!                    └─────────────────────────┘
48//! ```
49//!
50//! # Usage Examples
51//!
52//! ## Basic File Matching
53//!
54//! ```rust,ignore
55//! use subx_cli::core::matcher::{MatchEngine, MatchConfig, FileDiscovery};
56//! use std::path::Path;
57//!
58//! // Configure matching parameters
59//! let config = MatchConfig {
60//!     confidence_threshold: 0.8,
61//!     dry_run: false,
62//!     ai_provider: Some("openai".to_string()),
63//!     ..Default::default()
64//! };
65//!
66//! // Initialize the matching engine
67//! let engine = MatchEngine::new(config);
68//!
69//! // Discover files in directories
70//! let discovery = FileDiscovery::new();
71//! let video_files = discovery.find_media_files(Path::new("/videos"))?;
72//! let subtitle_files = discovery.find_subtitle_files(Path::new("/subtitles"))?;
73//!
74//! // Perform matching
75//! let matches = engine.match_files(&video_files, &subtitle_files).await?;
76//!
77//! for match_result in matches {
78//!     println!("Matched: {} -> {} (confidence: {:.2})",
79//!         match_result.video_file.name,
80//!         match_result.subtitle_file.name,
81//!         match_result.confidence
82//!     );
83//! }
84//! ```
85//!
86//! ## Advanced Matching with Language Filtering
87//!
88//! ```rust,ignore
89//! use subx_cli::core::matcher::MatchConfig;
90//!
91//! let config = MatchConfig {
92//!     target_languages: vec!["zh".to_string(), "en".to_string()],
93//!     exclude_languages: vec!["jp".to_string()],
94//!     confidence_threshold: 0.75,
95//!     max_matches_per_video: 2, // Allow multiple subtitle languages
96//!     ..Default::default()
97//! };
98//!
99//! let matches = engine.match_files_with_config(&video_files, &subtitle_files, config).await?;
100//! ```
101//!
102//! ## TV Series Episode Matching
103//!
104//! ```rust,ignore
105//! // For TV series with season/episode structure
106//! let tv_config = MatchConfig {
107//!     series_mode: true,
108//!     season_episode_patterns: vec![
109//!         r"S(\d+)E(\d+)".to_string(),
110//!         r"Season (\d+) Episode (\d+)".to_string(),
111//!     ],
112//!     ..Default::default()
113//! };
114//!
115//! let tv_matches = engine.match_tv_series(&video_files, &subtitle_files, tv_config).await?;
116//! ```
117//!
118//! # Matching Algorithms
119//!
120//! ## 1. Filename Analysis
121//! - **Pattern Extraction**: Identifies common patterns like episode numbers, years, quality markers
122//! - **Language Code Detection**: Recognizes language codes in various formats (en, eng, english, etc.)
123//! - **Normalization**: Standardizes filenames for comparison by removing common variations
124//!
125//! ## 2. AI Semantic Analysis
126//! - **Title Extraction**: Uses AI to identify actual titles from complex filenames
127//! - **Content Understanding**: Analyzes subtitle content to understand context and themes
128//! - **Cross-Reference**: Compares extracted information between video and subtitle files
129//!
130//! ## 3. Confidence Scoring
131//! - **Multiple Factors**: Combines filename similarity, language match, content correlation
132//! - **Weighted Scoring**: Applies different weights based on reliability of each factor
133//! - **Threshold Filtering**: Only presents matches above configurable confidence levels
134//!
135//! ## 4. Conflict Resolution
136//! - **Ranking**: Orders multiple candidates by confidence score
137//! - **Deduplication**: Removes duplicate or overlapping matches
138//! - **User Preferences**: Applies user-defined preferences for language, quality, etc.
139//!
140//! # Performance Characteristics
141//!
142//! - **Caching**: Results are cached to avoid re-analysis of unchanged files
143//! - **Parallel Processing**: File analysis is performed concurrently for speed
144//! - **Incremental Updates**: Only processes new or modified files in subsequent runs
145//! - **Memory Efficient**: Streams large directory structures without loading all data
146//!
147//! # Error Handling
148//!
149//! The matching system provides comprehensive error handling for:
150//! - File system access issues (permissions, missing directories)
151//! - AI service connectivity and quota problems
152//! - Invalid or corrupted subtitle files
153//! - Configuration validation errors
154//! - Network timeouts and service degradation
155//!
156//! # Thread Safety
157//!
158//! All matching operations are thread-safe and can be used concurrently.
159//! The cache system uses appropriate synchronization for multi-threaded access.
160
161#![allow(dead_code)]
162
163pub mod discovery;
164pub mod engine;
165// Filename analyzer removed to simplify matching logic.
166
167pub use discovery::{FileDiscovery, MediaFile, MediaFileType};
168pub use engine::{MatchConfig, MatchEngine, MatchOperation};
169// pub use filename_analyzer::{FilenameAnalyzer, ParsedFilename};
170pub mod cache;
171pub mod journal;
172use crate::Result;
173use crate::core::language::{LanguageDetector, LanguageInfo};
174use crate::error::SubXError;
175use std::path::{Path, PathBuf};
176
177/// Extended file information structure with metadata for intelligent matching.
178///
179/// This structure contains comprehensive information about discovered files,
180/// including path relationships, language detection results, and contextual
181/// metadata that enables sophisticated matching algorithms.
182///
183/// # Purpose
184///
185/// `FileInfo` serves as the primary data structure for file representation
186/// in the matching system. It normalizes file information from different
187/// sources and provides a consistent interface for matching algorithms.
188///
189/// # Path Relationships
190///
191/// The structure maintains three different path representations:
192/// - `name`: Just the filename for display and basic comparison
193/// - `relative_path`: Path relative to search root for organization
194/// - `full_path`: Absolute path for file system operations
195///
196/// # Language Detection
197///
198/// Language information is automatically detected from:
199/// - Filename patterns (e.g., "movie.en.srt", "film.zh-tw.ass")
200/// - Directory structure (e.g., "English/", "Chinese/")
201/// - File content analysis for subtitle files
202///
203/// # Examples
204///
205/// ```rust,ignore
206/// use subx_cli::core::matcher::FileInfo;
207/// use std::path::PathBuf;
208///
209/// let root = PathBuf::from("/media/movies");
210/// let file_path = PathBuf::from("/media/movies/Action/movie.en.srt");
211///
212/// let file_info = FileInfo::new(&file_path, &root)?;
213///
214/// assert_eq!(file_info.name, "movie.en.srt");
215/// assert_eq!(file_info.relative_path, "Action/movie.en.srt");
216/// assert_eq!(file_info.directory, "Action");
217/// assert_eq!(file_info.depth, 1);
218///
219/// if let Some(lang) = &file_info.language {
220///     println!("Detected language: {}", lang.code);
221/// }
222/// ```
223#[derive(Debug, Clone)]
224pub struct FileInfo {
225    /// File name without directory path for display and comparison.
226    ///
227    /// This is the base filename including extension, useful for
228    /// pattern matching and user-friendly display.
229    pub name: String,
230
231    /// Path relative to the search root directory for organization.
232    ///
233    /// Maintains the directory structure context while being
234    /// independent of the absolute filesystem location.
235    pub relative_path: String,
236
237    /// Absolute file system path for file operations.
238    ///
239    /// Used for actual file reading, writing, and metadata access.
240    pub full_path: PathBuf,
241
242    /// Name of the immediate parent directory containing the file.
243    ///
244    /// Useful for organization-based matching and language detection
245    /// from directory names.
246    pub directory: String,
247
248    /// Directory depth relative to the root search path.
249    ///
250    /// Indicates how many subdirectory levels deep the file is located.
251    /// Depth 0 means the file is directly in the root directory.
252    pub depth: usize,
253
254    /// Detected language information from filename or content analysis.
255    ///
256    /// Contains language code, confidence level, and detection method.
257    /// May be `None` if no language could be reliably detected.
258    pub language: Option<LanguageInfo>,
259}
260
261impl FileInfo {
262    /// Construct a new `FileInfo` from a file path and search root directory.
263    ///
264    /// This method performs comprehensive analysis of the file location,
265    /// extracting path relationships, directory structure, and attempting
266    /// automatic language detection from the filename and path.
267    ///
268    /// # Arguments
269    ///
270    /// * `full_path` - Absolute path to the media or subtitle file
271    /// * `root_path` - Root directory for file discovery (used to compute relative paths)
272    ///
273    /// # Returns
274    ///
275    /// Returns a `FileInfo` struct with all metadata populated, including
276    /// optional language detection results.
277    ///
278    /// # Errors
279    ///
280    /// Returns `SubXError::Other` if:
281    /// - The file path cannot be made relative to the root path
282    /// - Path contains invalid Unicode characters
283    /// - File system access issues occur during analysis
284    ///
285    /// # Examples
286    ///
287    /// ```rust,ignore
288    /// use subx_cli::core::matcher::FileInfo;
289    /// use std::path::PathBuf;
290    ///
291    /// // Simple file in root directory
292    /// let root = PathBuf::from("/media/videos");
293    /// let file_path = root.join("movie.mp4");
294    /// let info = FileInfo::new(file_path, &root)?;
295    ///
296    /// assert_eq!(info.name, "movie.mp4");
297    /// assert_eq!(info.relative_path, "movie.mp4");
298    /// assert_eq!(info.depth, 0);
299    ///
300    /// // File in subdirectory with language
301    /// let sub_file = root.join("English").join("movie.en.srt");
302    /// let sub_info = FileInfo::new(sub_file, &root)?;
303    ///
304    /// assert_eq!(sub_info.name, "movie.en.srt");
305    /// assert_eq!(sub_info.relative_path, "English/movie.en.srt");
306    /// assert_eq!(sub_info.directory, "English");
307    /// assert_eq!(sub_info.depth, 1);
308    /// assert!(sub_info.language.is_some());
309    /// ```
310    ///
311    /// # Implementation Details
312    ///
313    /// - Path separators are normalized to Unix style (/) for consistency
314    /// - Directory depth is calculated based on relative path components
315    /// - Language detection runs automatically using multiple detection methods
316    /// - All path operations are Unicode-safe with fallback to empty strings
317    pub fn new(full_path: PathBuf, root_path: &Path) -> Result<Self> {
318        // Calculate relative path by stripping the root prefix
319        let relative_path = full_path
320            .strip_prefix(root_path)
321            .map_err(|e| SubXError::Other(e.into()))?
322            .to_string_lossy()
323            .replace('\\', "/"); // Normalize to Unix-style separators
324
325        // Extract the base filename
326        let name = full_path
327            .file_name()
328            .and_then(|n| n.to_str())
329            .unwrap_or_default()
330            .to_string();
331
332        // Get the immediate parent directory name
333        let directory = full_path
334            .parent()
335            .and_then(|p| p.file_name())
336            .and_then(|n| n.to_str())
337            .unwrap_or_default()
338            .to_string();
339
340        // Calculate directory depth by counting path separators
341        let depth = relative_path.matches('/').count();
342
343        // Attempt automatic language detection from path and filename
344        let detector = LanguageDetector::new();
345        let language = detector.detect_from_path(&full_path);
346
347        Ok(Self {
348            name,
349            relative_path,
350            full_path,
351            directory,
352            depth,
353            language,
354        })
355    }
356
357    /// Get the file extension without the leading dot.
358    ///
359    /// Returns the file extension in lowercase, or an empty string if
360    /// no extension is present.
361    ///
362    /// # Examples
363    ///
364    /// ```rust,ignore
365    /// assert_eq!(file_info.extension(), "mp4");
366    /// assert_eq!(subtitle_info.extension(), "srt");
367    /// ```
368    pub fn extension(&self) -> String {
369        self.full_path
370            .extension()
371            .and_then(|ext| ext.to_str())
372            .unwrap_or_default()
373            .to_lowercase()
374    }
375
376    /// Get the filename without extension (stem).
377    ///
378    /// Returns the base filename with the extension removed, useful
379    /// for comparison and matching operations.
380    ///
381    /// # Examples
382    ///
383    /// ```rust,ignore
384    /// // For "movie.en.srt"
385    /// assert_eq!(file_info.stem(), "movie.en");
386    ///
387    /// // For "episode01.mp4"
388    /// assert_eq!(file_info.stem(), "episode01");
389    /// ```
390    pub fn stem(&self) -> String {
391        self.full_path
392            .file_stem()
393            .and_then(|stem| stem.to_str())
394            .unwrap_or_default()
395            .to_string()
396    }
397
398    /// Check if this file is in the root directory (depth 0).
399    ///
400    /// Returns `true` if the file is directly in the search root,
401    /// `false` if it's in a subdirectory.
402    pub fn is_in_root(&self) -> bool {
403        self.depth == 0
404    }
405
406    /// Check if this file has detected language information.
407    ///
408    /// Returns `true` if language detection was successful and
409    /// confidence is above the detection threshold.
410    pub fn has_language(&self) -> bool {
411        self.language.is_some()
412    }
413
414    /// Get the detected language code if available.
415    ///
416    /// Returns the language code string (e.g., "en", "zh", "ja")
417    /// or `None` if no language was detected.
418    ///
419    /// # Examples
420    ///
421    /// ```rust,ignore
422    /// if let Some(lang) = file_info.language_code() {
423    ///     println!("Detected language: {}", lang);
424    /// }
425    /// ```
426    pub fn language_code(&self) -> Option<&str> {
427        self.language.as_ref().map(|lang| lang.code.as_str())
428    }
429
430    /// Create a normalized version of the filename for comparison.
431    ///
432    /// Applies various normalization rules to make filenames more
433    /// comparable during matching operations:
434    /// - Converts to lowercase
435    /// - Removes common separators and special characters
436    /// - Standardizes whitespace
437    /// - Removes quality indicators and release group tags
438    ///
439    /// # Returns
440    ///
441    /// A normalized filename string suitable for fuzzy matching.
442    ///
443    /// # Examples
444    ///
445    /// ```rust,ignore
446    /// // "Movie.Name.2023.1080p.BluRay.x264-GROUP.mkv"
447    /// // becomes "movie name 2023"
448    /// let normalized = file_info.normalized_name();
449    /// ```
450    pub fn normalized_name(&self) -> String {
451        let mut name = self.stem().to_lowercase();
452
453        // Remove common separators
454        name = name.replace(['.', '_', '-'], " ");
455
456        // Remove quality indicators
457        let quality_patterns = [
458            "1080p", "720p", "480p", "4k", "2160p", "bluray", "webrip", "hdtv", "dvdrip", "x264",
459            "x265", "h264", "h265",
460        ];
461
462        for pattern in &quality_patterns {
463            name = name.replace(pattern, "");
464        }
465
466        // Remove release group tags (text within brackets/parentheses)
467        name = regex::Regex::new(r"\[.*?\]|\(.*?\)")
468            .unwrap()
469            .replace_all(&name, "")
470            .to_string();
471
472        // Normalize whitespace
473        name.split_whitespace().collect::<Vec<_>>().join(" ")
474    }
475}
476
477#[cfg(test)]
478mod tests {
479    use super::*;
480    use tempfile::TempDir;
481
482    fn create_temp_file(root: &Path, rel: &str) -> PathBuf {
483        let path = root.join(rel);
484        std::fs::create_dir_all(path.parent().unwrap()).unwrap();
485        std::fs::write(&path, b"").unwrap();
486        path
487    }
488
489    #[test]
490    fn test_file_info_creation() -> Result<()> {
491        let temp = TempDir::new().unwrap();
492        let root = temp.path();
493        let file_path = create_temp_file(root, "season1/episode1.mp4");
494
495        let info = FileInfo::new(file_path.clone(), root)?;
496        assert_eq!(info.name, "episode1.mp4");
497        assert_eq!(info.relative_path, "season1/episode1.mp4");
498        assert_eq!(info.directory, "season1");
499        assert_eq!(info.depth, 1);
500        Ok(())
501    }
502
503    #[test]
504    fn test_file_info_deep_path() -> Result<()> {
505        let temp = TempDir::new().unwrap();
506        let root = temp.path();
507
508        let file_path = create_temp_file(root, "series/season1/episodes/ep01.mp4");
509
510        let info = FileInfo::new(file_path.clone(), root)?;
511        assert_eq!(info.relative_path, "series/season1/episodes/ep01.mp4");
512        assert_eq!(info.depth, 3);
513
514        Ok(())
515    }
516
517    #[test]
518    fn test_file_info_root_file() -> Result<()> {
519        let temp = TempDir::new().unwrap();
520        let root = temp.path();
521        let file_path = create_temp_file(root, "movie.mp4");
522
523        let info = FileInfo::new(file_path, root)?;
524        assert_eq!(info.name, "movie.mp4");
525        assert_eq!(info.relative_path, "movie.mp4");
526        assert_eq!(info.depth, 0);
527        assert!(info.is_in_root());
528        Ok(())
529    }
530
531    #[test]
532    fn test_file_info_not_in_root_for_subdirectory() -> Result<()> {
533        let temp = TempDir::new().unwrap();
534        let root = temp.path();
535        let file_path = create_temp_file(root, "subdir/movie.mp4");
536
537        let info = FileInfo::new(file_path, root)?;
538        assert!(!info.is_in_root());
539        Ok(())
540    }
541
542    #[test]
543    fn test_file_info_error_path_not_under_root() {
544        let temp = TempDir::new().unwrap();
545        let root = temp.path();
546        let other_temp = TempDir::new().unwrap();
547        let file_path = other_temp.path().join("movie.mp4");
548        std::fs::write(&file_path, b"").unwrap();
549
550        let result = FileInfo::new(file_path, root);
551        assert!(result.is_err());
552    }
553
554    #[test]
555    fn test_extension_returns_lowercase() -> Result<()> {
556        let temp = TempDir::new().unwrap();
557        let root = temp.path();
558        let file_path = create_temp_file(root, "Subtitle.SRT");
559
560        let info = FileInfo::new(file_path, root)?;
561        assert_eq!(info.extension(), "srt");
562        Ok(())
563    }
564
565    #[test]
566    fn test_extension_various_formats() -> Result<()> {
567        let temp = TempDir::new().unwrap();
568        let root = temp.path();
569
570        for (filename, expected_ext) in [
571            ("movie.mp4", "mp4"),
572            ("subtitle.ass", "ass"),
573            ("sub.vtt", "vtt"),
574            ("clip.mkv", "mkv"),
575        ] {
576            let file_path = create_temp_file(root, filename);
577            let info = FileInfo::new(file_path, root)?;
578            assert_eq!(info.extension(), expected_ext, "failed for {filename}");
579        }
580        Ok(())
581    }
582
583    #[test]
584    fn test_extension_no_extension() -> Result<()> {
585        let temp = TempDir::new().unwrap();
586        let root = temp.path();
587        let file_path = create_temp_file(root, "noextension");
588
589        let info = FileInfo::new(file_path, root)?;
590        assert_eq!(info.extension(), "");
591        Ok(())
592    }
593
594    #[test]
595    fn test_stem_basic() -> Result<()> {
596        let temp = TempDir::new().unwrap();
597        let root = temp.path();
598        let file_path = create_temp_file(root, "episode01.mp4");
599
600        let info = FileInfo::new(file_path, root)?;
601        assert_eq!(info.stem(), "episode01");
602        Ok(())
603    }
604
605    #[test]
606    fn test_stem_multiple_dots() -> Result<()> {
607        let temp = TempDir::new().unwrap();
608        let root = temp.path();
609        let file_path = create_temp_file(root, "movie.en.srt");
610
611        let info = FileInfo::new(file_path, root)?;
612        assert_eq!(info.stem(), "movie.en");
613        Ok(())
614    }
615
616    #[test]
617    fn test_stem_no_extension() -> Result<()> {
618        let temp = TempDir::new().unwrap();
619        let root = temp.path();
620        let file_path = create_temp_file(root, "noextension");
621
622        let info = FileInfo::new(file_path, root)?;
623        assert_eq!(info.stem(), "noextension");
624        Ok(())
625    }
626
627    #[test]
628    fn test_has_language_with_detected_language() -> Result<()> {
629        let temp = TempDir::new().unwrap();
630        let root = temp.path();
631        let file_path = create_temp_file(root, "movie.en.srt");
632
633        let info = FileInfo::new(file_path, root)?;
634        // "en" in filename should be detected as English
635        if info.has_language() {
636            assert!(info.language_code().is_some());
637            assert_eq!(info.language_code(), Some("en"));
638        }
639        Ok(())
640    }
641
642    #[test]
643    fn test_has_language_without_language_indicator() -> Result<()> {
644        let temp = TempDir::new().unwrap();
645        let root = temp.path();
646        let file_path = create_temp_file(root, "plainmovie.mp4");
647
648        let info = FileInfo::new(file_path, root)?;
649        assert!(!info.has_language());
650        assert!(info.language_code().is_none());
651        Ok(())
652    }
653
654    #[test]
655    fn test_language_code_returns_correct_code() -> Result<()> {
656        let temp = TempDir::new().unwrap();
657        let root = temp.path();
658        let file_path = create_temp_file(root, "movie.zh.srt");
659
660        let info = FileInfo::new(file_path, root)?;
661        if let Some(code) = info.language_code() {
662            assert_eq!(code, "zh");
663        }
664        Ok(())
665    }
666
667    #[test]
668    fn test_language_detection_from_directory_name() -> Result<()> {
669        let temp = TempDir::new().unwrap();
670        let root = temp.path();
671        let file_path = create_temp_file(root, "English/movie.srt");
672
673        let info = FileInfo::new(file_path, root)?;
674        // Directory "English" should trigger language detection
675        if info.has_language() {
676            let code = info.language_code().unwrap();
677            assert_eq!(code, "en");
678        }
679        Ok(())
680    }
681
682    #[test]
683    fn test_normalized_name_lowercase_and_separators() -> Result<()> {
684        let temp = TempDir::new().unwrap();
685        let root = temp.path();
686        let file_path = create_temp_file(root, "My.Movie.Name.mp4");
687
688        let info = FileInfo::new(file_path, root)?;
689        let normalized = info.normalized_name();
690        assert_eq!(normalized, "my movie name");
691        Ok(())
692    }
693
694    #[test]
695    fn test_normalized_name_removes_quality_indicators() -> Result<()> {
696        let temp = TempDir::new().unwrap();
697        let root = temp.path();
698
699        for (filename, expected) in [
700            ("Movie.2023.1080p.BluRay.mp4", "movie 2023"),
701            ("Show.S01E01.720p.HDTV.mkv", "show s01e01"),
702            ("Film.4K.x265.mp4", "film"),
703            ("Documentary.2160p.WEBRip.mkv", "documentary"),
704        ] {
705            let file_path = create_temp_file(root, filename);
706            let info = FileInfo::new(file_path, root)?;
707            assert_eq!(info.normalized_name(), expected, "failed for {filename}");
708        }
709        Ok(())
710    }
711
712    #[test]
713    fn test_normalized_name_removes_brackets() -> Result<()> {
714        let temp = TempDir::new().unwrap();
715        let root = temp.path();
716        let file_path = create_temp_file(root, "Movie [1080p] (BluRay).mp4");
717
718        let info = FileInfo::new(file_path, root)?;
719        let normalized = info.normalized_name();
720        // Brackets and their contents should be stripped
721        assert!(!normalized.contains('['));
722        assert!(!normalized.contains(']'));
723        assert!(!normalized.contains('('));
724        assert!(!normalized.contains(')'));
725        Ok(())
726    }
727
728    #[test]
729    fn test_normalized_name_normalizes_whitespace() -> Result<()> {
730        let temp = TempDir::new().unwrap();
731        let root = temp.path();
732        let file_path = create_temp_file(root, "movie___name.mp4");
733
734        let info = FileInfo::new(file_path, root)?;
735        let normalized = info.normalized_name();
736        // Underscores become spaces and multiple spaces collapse to one
737        assert!(!normalized.contains("  "));
738        assert!(!normalized.contains('_'));
739        Ok(())
740    }
741
742    #[test]
743    fn test_file_info_clone() -> Result<()> {
744        let temp = TempDir::new().unwrap();
745        let root = temp.path();
746        let file_path = create_temp_file(root, "movie.mp4");
747
748        let info = FileInfo::new(file_path, root)?;
749        let cloned = info.clone();
750        assert_eq!(info.name, cloned.name);
751        assert_eq!(info.relative_path, cloned.relative_path);
752        assert_eq!(info.depth, cloned.depth);
753        assert_eq!(info.directory, cloned.directory);
754        assert_eq!(info.full_path, cloned.full_path);
755        Ok(())
756    }
757
758    #[test]
759    fn test_file_info_debug_format() -> Result<()> {
760        let temp = TempDir::new().unwrap();
761        let root = temp.path();
762        let file_path = create_temp_file(root, "movie.mp4");
763
764        let info = FileInfo::new(file_path, root)?;
765        let debug_str = format!("{:?}", info);
766        assert!(debug_str.contains("FileInfo"));
767        assert!(debug_str.contains("movie.mp4"));
768        Ok(())
769    }
770
771    #[test]
772    fn test_file_info_directory_at_root() -> Result<()> {
773        let temp = TempDir::new().unwrap();
774        let root = temp.path();
775        let file_path = create_temp_file(root, "movie.mp4");
776
777        let info = FileInfo::new(file_path, root)?;
778        // Directory for a root-level file is the root directory name itself
779        let root_dir_name = root.file_name().unwrap().to_string_lossy();
780        assert_eq!(info.directory, root_dir_name.as_ref());
781        Ok(())
782    }
783
784    #[test]
785    fn test_normalized_name_removes_dvdrip() -> Result<()> {
786        let temp = TempDir::new().unwrap();
787        let root = temp.path();
788        let file_path = create_temp_file(root, "Old.Movie.DVDRip.avi");
789
790        let info = FileInfo::new(file_path, root)?;
791        let normalized = info.normalized_name();
792        assert!(!normalized.contains("dvdrip"));
793        assert!(normalized.contains("old"));
794        assert!(normalized.contains("movie"));
795        Ok(())
796    }
797
798    #[test]
799    fn test_normalized_name_h264_h265_removed() -> Result<()> {
800        let temp = TempDir::new().unwrap();
801        let root = temp.path();
802
803        for filename in ["Film.H264.mp4", "Film.H265.mp4"] {
804            let file_path = create_temp_file(root, filename);
805            let info = FileInfo::new(file_path, root)?;
806            let normalized = info.normalized_name();
807            assert!(
808                !normalized.contains("h264"),
809                "h264 not removed in {filename}"
810            );
811            assert!(
812                !normalized.contains("h265"),
813                "h265 not removed in {filename}"
814            );
815            assert!(normalized.contains("film"));
816        }
817        Ok(())
818    }
819
820    #[test]
821    fn test_file_info_full_path_preserved() -> Result<()> {
822        let temp = TempDir::new().unwrap();
823        let root = temp.path();
824        let file_path = create_temp_file(root, "subdir/movie.srt");
825
826        let info = FileInfo::new(file_path.clone(), root)?;
827        assert_eq!(info.full_path, file_path);
828        Ok(())
829    }
830
831    #[test]
832    fn test_multiple_language_codes_in_filename() -> Result<()> {
833        let temp = TempDir::new().unwrap();
834        let root = temp.path();
835
836        // Test several common language code formats
837        for (filename, expected_code) in [
838            ("movie.en.srt", "en"),
839            ("movie.zh.srt", "zh"),
840            ("movie.ja.srt", "ja"),
841        ] {
842            let file_path = create_temp_file(root, filename);
843            let info = FileInfo::new(file_path, root)?;
844            if let Some(code) = info.language_code() {
845                assert_eq!(code, expected_code, "wrong code for {filename}");
846            }
847        }
848        Ok(())
849    }
850}