audiobook_forge/utils/
merge_patterns.rs

1//! M4B merge pattern detection for identifying related audiobook parts
2
3use regex::Regex;
4use std::path::Path;
5
6/// Result of merge pattern analysis
7#[derive(Debug, Clone)]
8pub struct MergePatternResult {
9    /// Whether a merge pattern was detected
10    pub pattern_detected: bool,
11    /// The base name (without part/disc indicators)
12    pub base_name: Option<String>,
13    /// The detected pattern type
14    pub pattern_type: Option<MergePatternType>,
15}
16
17/// Types of merge patterns we recognize
18#[derive(Debug, Clone, Copy, PartialEq, Eq)]
19pub enum MergePatternType {
20    /// Part 1, Part 2, Pt 1, Pt. 1, Part1, Part2
21    Part,
22    /// Disc 1, Disc1, CD1, CD 1, Disk 1, Disk1
23    Disc,
24    /// Simple numeric suffix: Title 01.m4b, Title 02.m4b
25    NumericSuffix,
26}
27
28/// Detect if a list of M4B files follow a merge pattern
29pub fn detect_merge_pattern(files: &[&Path]) -> MergePatternResult {
30    if files.len() < 2 {
31        return MergePatternResult {
32            pattern_detected: false,
33            base_name: None,
34            pattern_type: None,
35        };
36    }
37
38    // Try each pattern type
39    if let Some((base, pattern_type)) = try_detect_pattern(files) {
40        return MergePatternResult {
41            pattern_detected: true,
42            base_name: Some(base),
43            pattern_type: Some(pattern_type),
44        };
45    }
46
47    MergePatternResult {
48        pattern_detected: false,
49        base_name: None,
50        pattern_type: None,
51    }
52}
53
54/// Try to detect a specific pattern type
55fn try_detect_pattern(files: &[&Path]) -> Option<(String, MergePatternType)> {
56    // Define patterns in order of specificity
57    lazy_static::lazy_static! {
58        // Part patterns: Part 1, Part1, Pt 1, Pt. 1
59        static ref PART_REGEX: Regex = Regex::new(
60            r"(?i)^(.+?)\s*(?:part|pt\.?)\s*(\d+)\.m4b$"
61        ).unwrap();
62
63        // Disc patterns: Disc 1, Disc1, CD1, CD 1, Disk 1
64        static ref DISC_REGEX: Regex = Regex::new(
65            r"(?i)^(.+?)\s*(?:disc|disk|cd)\s*(\d+)\.m4b$"
66        ).unwrap();
67
68        // Numeric suffix: Title 01.m4b, Title 1.m4b (must be at least 2 files with sequential numbers)
69        static ref NUMERIC_REGEX: Regex = Regex::new(
70            r"(?i)^(.+?)\s+(\d{1,2})\.m4b$"
71        ).unwrap();
72    }
73
74    // Try Part pattern
75    if let Some(base) = check_pattern_match(files, &PART_REGEX) {
76        return Some((base, MergePatternType::Part));
77    }
78
79    // Try Disc pattern
80    if let Some(base) = check_pattern_match(files, &DISC_REGEX) {
81        return Some((base, MergePatternType::Disc));
82    }
83
84    // Try Numeric suffix pattern
85    if let Some(base) = check_pattern_match(files, &NUMERIC_REGEX) {
86        return Some((base, MergePatternType::NumericSuffix));
87    }
88
89    None
90}
91
92/// Check if all files match a pattern and have the same base name
93fn check_pattern_match(files: &[&Path], regex: &Regex) -> Option<String> {
94    let mut base_names: Vec<String> = Vec::new();
95    let mut numbers: Vec<u32> = Vec::new();
96
97    for file in files {
98        let filename = file.file_name()?.to_str()?;
99        let caps = regex.captures(filename)?;
100
101        let base = caps.get(1)?.as_str().trim().to_string();
102        let num: u32 = caps.get(2)?.as_str().parse().ok()?;
103
104        base_names.push(base);
105        numbers.push(num);
106    }
107
108    // All base names must match
109    if base_names.is_empty() {
110        return None;
111    }
112
113    let first_base = &base_names[0];
114    if !base_names.iter().all(|b| b == first_base) {
115        return None;
116    }
117
118    // Numbers should be sequential starting from 1 (or 01)
119    numbers.sort();
120    let expected: Vec<u32> = (1..=(numbers.len() as u32)).collect();
121    if numbers != expected {
122        // Also try 0-indexed
123        let expected_zero: Vec<u32> = (0..(numbers.len() as u32)).collect();
124        if numbers != expected_zero {
125            return None;
126        }
127    }
128
129    Some(first_base.clone())
130}
131
132/// Sort files by their numeric part indicator
133pub fn sort_by_part_number(files: &mut [std::path::PathBuf]) {
134    lazy_static::lazy_static! {
135        static ref NUMBER_REGEX: Regex = Regex::new(
136            r"(?i)(?:part|pt\.?|disc|disk|cd)?\s*(\d+)\.m4b$"
137        ).unwrap();
138    }
139
140    files.sort_by(|a, b| {
141        let get_num = |p: &std::path::PathBuf| -> u32 {
142            p.file_name()
143                .and_then(|n| n.to_str())
144                .and_then(|s| NUMBER_REGEX.captures(s))
145                .and_then(|c| c.get(1))
146                .and_then(|m| m.as_str().parse().ok())
147                .unwrap_or(0)
148        };
149        get_num(a).cmp(&get_num(b))
150    });
151}
152
153#[cfg(test)]
154mod tests {
155    use super::*;
156
157    #[test]
158    fn test_detect_part_pattern() {
159        let files = vec![
160            Path::new("Book Name Part 1.m4b"),
161            Path::new("Book Name Part 2.m4b"),
162        ];
163        let result = detect_merge_pattern(&files);
164        assert!(result.pattern_detected);
165        assert_eq!(result.pattern_type, Some(MergePatternType::Part));
166        assert_eq!(result.base_name, Some("Book Name".to_string()));
167    }
168
169    #[test]
170    fn test_detect_disc_pattern() {
171        let files = vec![
172            Path::new("Audiobook CD1.m4b"),
173            Path::new("Audiobook CD2.m4b"),
174        ];
175        let result = detect_merge_pattern(&files);
176        assert!(result.pattern_detected);
177        assert_eq!(result.pattern_type, Some(MergePatternType::Disc));
178    }
179
180    #[test]
181    fn test_detect_numeric_suffix() {
182        let files = vec![
183            Path::new("My Book 01.m4b"),
184            Path::new("My Book 02.m4b"),
185        ];
186        let result = detect_merge_pattern(&files);
187        assert!(result.pattern_detected);
188        assert_eq!(result.pattern_type, Some(MergePatternType::NumericSuffix));
189    }
190
191    #[test]
192    fn test_no_pattern_detected() {
193        let files = vec![
194            Path::new("Different Book.m4b"),
195            Path::new("Another Book.m4b"),
196        ];
197        let result = detect_merge_pattern(&files);
198        assert!(!result.pattern_detected);
199    }
200
201    #[test]
202    fn test_single_file_no_pattern() {
203        let files = vec![Path::new("Single Book.m4b")];
204        let result = detect_merge_pattern(&files);
205        assert!(!result.pattern_detected);
206    }
207
208    #[test]
209    fn test_sort_by_part_number() {
210        let mut files = vec![
211            std::path::PathBuf::from("Book Part 3.m4b"),
212            std::path::PathBuf::from("Book Part 1.m4b"),
213            std::path::PathBuf::from("Book Part 2.m4b"),
214        ];
215        sort_by_part_number(&mut files);
216        assert_eq!(
217            files.iter().map(|p| p.file_name().unwrap().to_str().unwrap()).collect::<Vec<_>>(),
218            vec!["Book Part 1.m4b", "Book Part 2.m4b", "Book Part 3.m4b"]
219        );
220    }
221}