audiobook_forge/audio/
chapter_import.rs

1//! Chapter import and merge strategies
2
3use anyhow::{Context, Result};
4use std::path::Path;
5use crate::audio::Chapter;
6
7/// Source of chapter data
8#[derive(Debug, Clone)]
9pub enum ChapterSource {
10    /// Fetch from Audnex API by ASIN
11    Audnex { asin: String },
12    /// Parse from text file
13    TextFile { path: std::path::PathBuf },
14    /// Extract from EPUB file
15    Epub { path: std::path::PathBuf },
16    /// Use existing chapters from M4B
17    Existing,
18}
19
20/// Strategy for merging new chapters with existing ones
21#[derive(Debug, Clone, Copy, PartialEq, Eq)]
22pub enum ChapterMergeStrategy {
23    /// Keep existing timestamps, only update names
24    KeepTimestamps,
25    /// Replace both timestamps and names entirely
26    ReplaceAll,
27    /// Skip update if counts don't match
28    SkipOnMismatch,
29    /// Interactively ask user for each file
30    Interactive,
31}
32
33impl std::fmt::Display for ChapterMergeStrategy {
34    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
35        match self {
36            Self::KeepTimestamps => write!(f, "Keep existing timestamps, update names only"),
37            Self::ReplaceAll => write!(f, "Replace all chapters (timestamps + names)"),
38            Self::SkipOnMismatch => write!(f, "Skip if chapter counts don't match"),
39            Self::Interactive => write!(f, "Ask for each file"),
40        }
41    }
42}
43
44/// Result of comparing existing vs new chapters
45#[derive(Debug)]
46pub struct ChapterComparison {
47    pub existing_count: usize,
48    pub new_count: usize,
49    pub matches: bool,
50}
51
52impl ChapterComparison {
53    pub fn new(existing: &[Chapter], new: &[Chapter]) -> Self {
54        Self {
55            existing_count: existing.len(),
56            new_count: new.len(),
57            matches: existing.len() == new.len(),
58        }
59    }
60}
61
62/// Supported text file formats for chapter import
63#[derive(Debug, Clone, Copy)]
64pub enum TextFormat {
65    /// One title per line
66    Simple,
67    /// Timestamps + titles (e.g., "00:00:00 Prologue")
68    Timestamped,
69    /// MP4Box format (CHAPTER1=00:00:00\nCHAPTER1NAME=Title)
70    Mp4Box,
71}
72
73/// Parse chapters from text file
74pub fn parse_text_chapters(path: &Path) -> Result<Vec<Chapter>> {
75    let content = std::fs::read_to_string(path)
76        .context("Failed to read chapter file")?;
77
78    // Auto-detect format
79    let format = detect_text_format(&content);
80
81    match format {
82        TextFormat::Simple => parse_simple_format(&content),
83        TextFormat::Timestamped => parse_timestamped_format(&content),
84        TextFormat::Mp4Box => parse_mp4box_format(&content),
85    }
86}
87
88/// Detect text file format
89fn detect_text_format(content: &str) -> TextFormat {
90    use regex::Regex;
91
92    lazy_static::lazy_static! {
93        static ref MP4BOX_REGEX: Regex = Regex::new(r"CHAPTER\d+=\d{2}:\d{2}:\d{2}").unwrap();
94        static ref TIMESTAMP_REGEX: Regex = Regex::new(r"^\d{1,2}:\d{2}:\d{2}").unwrap();
95    }
96
97    // Check for MP4Box format
98    if MP4BOX_REGEX.is_match(content) {
99        return TextFormat::Mp4Box;
100    }
101
102    // Check for timestamped format (first line)
103    if let Some(first_line) = content.lines().next() {
104        if TIMESTAMP_REGEX.is_match(first_line.trim()) {
105            return TextFormat::Timestamped;
106        }
107    }
108
109    // Default to simple
110    TextFormat::Simple
111}
112
113/// Parse simple format (one title per line)
114fn parse_simple_format(content: &str) -> Result<Vec<Chapter>> {
115    let chapters: Vec<Chapter> = content
116        .lines()
117        .filter(|line| !line.trim().is_empty())
118        .enumerate()
119        .map(|(i, line)| {
120            Chapter::new(
121                (i + 1) as u32,
122                line.trim().to_string(),
123                0, // No timestamps in simple format
124                0,
125            )
126        })
127        .collect();
128
129    if chapters.is_empty() {
130        anyhow::bail!("No chapters found in file");
131    }
132
133    Ok(chapters)
134}
135
136/// Parse timestamped format (HH:MM:SS Title)
137fn parse_timestamped_format(content: &str) -> Result<Vec<Chapter>> {
138    use regex::Regex;
139
140    lazy_static::lazy_static! {
141        static ref TIMESTAMP_REGEX: Regex =
142            Regex::new(r"^(\d{1,2}):(\d{2}):(\d{2})\s*[-:]?\s*(.+)$").unwrap();
143    }
144
145    let mut chapters: Vec<Chapter> = Vec::new();
146
147    for (i, line) in content.lines().enumerate() {
148        let line = line.trim();
149        if line.is_empty() {
150            continue;
151        }
152
153        if let Some(caps) = TIMESTAMP_REGEX.captures(line) {
154            let hours: u64 = caps[1].parse().context("Invalid hour")?;
155            let minutes: u64 = caps[2].parse().context("Invalid minute")?;
156            let seconds: u64 = caps[3].parse().context("Invalid second")?;
157            let title = caps[4].trim().to_string();
158
159            let start_ms = (hours * 3600 + minutes * 60 + seconds) * 1000;
160
161            // Set end time for previous chapter
162            if !chapters.is_empty() {
163                let prev_idx = chapters.len() - 1;
164                chapters[prev_idx].end_time_ms = start_ms;
165            }
166
167            chapters.push(Chapter::new(
168                (i + 1) as u32,
169                title,
170                start_ms,
171                0, // Will be set by next chapter or total duration
172            ));
173        } else {
174            tracing::warn!("Skipping malformed line {}: {}", i + 1, line);
175        }
176    }
177
178    if chapters.is_empty() {
179        anyhow::bail!("No valid timestamped chapters found");
180    }
181
182    Ok(chapters)
183}
184
185/// Parse MP4Box format
186fn parse_mp4box_format(content: &str) -> Result<Vec<Chapter>> {
187    use regex::Regex;
188
189    lazy_static::lazy_static! {
190        static ref CHAPTER_REGEX: Regex =
191            Regex::new(r"CHAPTER(\d+)=(\d{2}):(\d{2}):(\d{2})\.(\d{3})").unwrap();
192        static ref NAME_REGEX: Regex =
193            Regex::new(r"CHAPTER(\d+)NAME=(.+)").unwrap();
194    }
195
196    let mut chapter_times: std::collections::HashMap<u32, u64> = std::collections::HashMap::new();
197    let mut chapter_names: std::collections::HashMap<u32, String> = std::collections::HashMap::new();
198
199    for line in content.lines() {
200        if let Some(caps) = CHAPTER_REGEX.captures(line) {
201            let num: u32 = caps[1].parse().context("Invalid chapter number")?;
202            let hours: u64 = caps[2].parse().context("Invalid hour")?;
203            let minutes: u64 = caps[3].parse().context("Invalid minute")?;
204            let seconds: u64 = caps[4].parse().context("Invalid second")?;
205            let millis: u64 = caps[5].parse().context("Invalid millisecond")?;
206
207            let start_ms = (hours * 3600 + minutes * 60 + seconds) * 1000 + millis;
208            chapter_times.insert(num, start_ms);
209        }
210
211        if let Some(caps) = NAME_REGEX.captures(line) {
212            let num: u32 = caps[1].parse().context("Invalid chapter number")?;
213            let name = caps[2].trim().to_string();
214            chapter_names.insert(num, name);
215        }
216    }
217
218    if chapter_times.is_empty() {
219        anyhow::bail!("No chapters found in MP4Box format");
220    }
221
222    // Build chapters
223    let mut chapters = Vec::new();
224    let mut numbers: Vec<u32> = chapter_times.keys().copied().collect();
225    numbers.sort();
226
227    for (i, &num) in numbers.iter().enumerate() {
228        let start_ms = *chapter_times.get(&num).unwrap();
229        let title = chapter_names
230            .get(&num)
231            .cloned()
232            .unwrap_or_else(|| format!("Chapter {}", num));
233
234        let end_ms = if i + 1 < numbers.len() {
235            *chapter_times.get(&numbers[i + 1]).unwrap()
236        } else {
237            0 // Will be set later
238        };
239
240        chapters.push(Chapter::new(num, title, start_ms, end_ms));
241    }
242
243    Ok(chapters)
244}
245
246/// Parse chapters from EPUB file (extracts from Table of Contents)
247pub fn parse_epub_chapters(path: &Path) -> Result<Vec<Chapter>> {
248    use epub::doc::EpubDoc;
249
250    let doc = EpubDoc::new(path)
251        .context("Failed to open EPUB file")?;
252
253    let toc = doc.toc
254        .iter()
255        .enumerate()
256        .map(|(i, nav_point)| {
257            Chapter::new(
258                (i + 1) as u32,
259                nav_point.label.clone(),
260                0, // No timestamps in EPUB
261                0,
262            )
263        })
264        .collect::<Vec<_>>();
265
266    if toc.is_empty() {
267        anyhow::bail!("No chapters found in EPUB table of contents");
268    }
269
270    Ok(toc)
271}
272
273/// Read existing chapters from M4B file using ffprobe
274pub async fn read_m4b_chapters(m4b_path: &Path) -> Result<Vec<Chapter>> {
275    use serde::Deserialize;
276    use tokio::process::Command;
277
278    #[derive(Debug, Deserialize)]
279    struct FfprobeChapter {
280        id: i64,
281        #[serde(default)]
282        start_time: String,
283        #[serde(default)]
284        end_time: String,
285        tags: Option<FfprobeTags>,
286    }
287
288    #[derive(Debug, Deserialize)]
289    struct FfprobeTags {
290        title: Option<String>,
291    }
292
293    #[derive(Debug, Deserialize)]
294    struct FfprobeOutput {
295        chapters: Vec<FfprobeChapter>,
296    }
297
298    let output = Command::new("ffprobe")
299        .args([
300            "-v", "quiet",
301            "-print_format", "json",
302            "-show_chapters",
303        ])
304        .arg(m4b_path)
305        .output()
306        .await
307        .context("Failed to execute ffprobe")?;
308
309    if !output.status.success() {
310        anyhow::bail!("ffprobe failed to read chapters from M4B file");
311    }
312
313    let json_str = String::from_utf8(output.stdout)
314        .context("ffprobe output is not valid UTF-8")?;
315
316    let ffprobe_output: FfprobeOutput = serde_json::from_str(&json_str)
317        .context("Failed to parse ffprobe JSON output")?;
318
319    let chapters: Vec<Chapter> = ffprobe_output
320        .chapters
321        .into_iter()
322        .enumerate()
323        .map(|(i, ch)| {
324            let title = ch
325                .tags
326                .and_then(|t| t.title)
327                .unwrap_or_else(|| format!("Chapter {}", i + 1));
328
329            let start_ms = parse_ffprobe_time(&ch.start_time).unwrap_or(0);
330            let end_ms = parse_ffprobe_time(&ch.end_time).unwrap_or(0);
331
332            Chapter::new((i + 1) as u32, title, start_ms, end_ms)
333        })
334        .collect();
335
336    if chapters.is_empty() {
337        tracing::warn!("No chapters found in M4B file");
338    }
339
340    Ok(chapters)
341}
342
343/// Parse ffprobe timestamp string (seconds.microseconds) to milliseconds
344fn parse_ffprobe_time(time_str: &str) -> Option<u64> {
345    let seconds: f64 = time_str.parse().ok()?;
346    Some((seconds * 1000.0) as u64)
347}
348
349/// Merge new chapters with existing chapters according to strategy
350pub fn merge_chapters(
351    existing: &[Chapter],
352    new: &[Chapter],
353    strategy: ChapterMergeStrategy,
354) -> Result<Vec<Chapter>> {
355    let comparison = ChapterComparison::new(existing, new);
356
357    match strategy {
358        ChapterMergeStrategy::SkipOnMismatch => {
359            if !comparison.matches {
360                anyhow::bail!(
361                    "Chapter count mismatch: existing has {}, new has {}. Skipping update.",
362                    comparison.existing_count,
363                    comparison.new_count
364                );
365            }
366            // If counts match, fall through to KeepTimestamps behavior
367            merge_keep_timestamps(existing, new)
368        }
369
370        ChapterMergeStrategy::KeepTimestamps => {
371            merge_keep_timestamps(existing, new)
372        }
373
374        ChapterMergeStrategy::ReplaceAll => {
375            // Simply return new chapters
376            Ok(new.to_vec())
377        }
378
379        ChapterMergeStrategy::Interactive => {
380            // This will be handled at a higher level (CLI handler)
381            // For now, default to KeepTimestamps
382            merge_keep_timestamps(existing, new)
383        }
384    }
385}
386
387/// Helper: Keep existing timestamps, update names only
388fn merge_keep_timestamps(existing: &[Chapter], new: &[Chapter]) -> Result<Vec<Chapter>> {
389    let min_len = existing.len().min(new.len());
390
391    let mut merged: Vec<Chapter> = existing[..min_len]
392        .iter()
393        .zip(&new[..min_len])
394        .map(|(old, new_ch)| {
395            Chapter::new(
396                old.number,
397                new_ch.title.clone(),
398                old.start_time_ms,
399                old.end_time_ms,
400            )
401        })
402        .collect();
403
404    // If there are extra existing chapters beyond new chapters, keep them
405    if existing.len() > min_len {
406        merged.extend_from_slice(&existing[min_len..]);
407    }
408
409    Ok(merged)
410}
411
412#[cfg(test)]
413mod tests {
414    use super::*;
415
416    #[test]
417    fn test_chapter_comparison() {
418        let existing = vec![
419            Chapter::new(1, "Ch1".to_string(), 0, 1000),
420            Chapter::new(2, "Ch2".to_string(), 1000, 2000),
421        ];
422
423        let new_matching = vec![
424            Chapter::new(1, "Chapter One".to_string(), 0, 1000),
425            Chapter::new(2, "Chapter Two".to_string(), 1000, 2000),
426        ];
427
428        let new_different = vec![
429            Chapter::new(1, "Chapter One".to_string(), 0, 1000),
430        ];
431
432        let comp1 = ChapterComparison::new(&existing, &new_matching);
433        assert!(comp1.matches);
434        assert_eq!(comp1.existing_count, 2);
435
436        let comp2 = ChapterComparison::new(&existing, &new_different);
437        assert!(!comp2.matches);
438    }
439
440    #[test]
441    fn test_merge_strategy_display() {
442        assert_eq!(
443            ChapterMergeStrategy::KeepTimestamps.to_string(),
444            "Keep existing timestamps, update names only"
445        );
446    }
447
448    #[test]
449    fn test_detect_simple_format() {
450        let content = "Prologue\nChapter 1\nChapter 2";
451        assert!(matches!(detect_text_format(content), TextFormat::Simple));
452    }
453
454    #[test]
455    fn test_detect_timestamped_format() {
456        let content = "00:00:00 Prologue\n00:05:30 Chapter 1";
457        assert!(matches!(detect_text_format(content), TextFormat::Timestamped));
458    }
459
460    #[test]
461    fn test_detect_mp4box_format() {
462        let content = "CHAPTER1=00:00:00.000\nCHAPTER1NAME=Prologue";
463        assert!(matches!(detect_text_format(content), TextFormat::Mp4Box));
464    }
465
466    #[test]
467    fn test_parse_simple_format() {
468        let content = "Prologue\nChapter 1: The Beginning\nChapter 2: The Journey";
469        let chapters = parse_simple_format(content).unwrap();
470
471        assert_eq!(chapters.len(), 3);
472        assert_eq!(chapters[0].title, "Prologue");
473        assert_eq!(chapters[1].title, "Chapter 1: The Beginning");
474        assert_eq!(chapters[2].title, "Chapter 2: The Journey");
475    }
476
477    #[test]
478    fn test_parse_timestamped_format() {
479        let content = "0:00:00 Prologue\n0:05:30 Chapter 1\n0:15:45 Chapter 2";
480        let chapters = parse_timestamped_format(content).unwrap();
481
482        assert_eq!(chapters.len(), 3);
483        assert_eq!(chapters[0].start_time_ms, 0);
484        assert_eq!(chapters[1].start_time_ms, 330_000); // 5:30
485        assert_eq!(chapters[2].start_time_ms, 945_000); // 15:45
486    }
487
488    #[test]
489    fn test_parse_mp4box_format() {
490        let content = "CHAPTER1=00:00:00.000\nCHAPTER1NAME=Prologue\nCHAPTER2=00:05:30.500\nCHAPTER2NAME=Chapter 1";
491        let chapters = parse_mp4box_format(content).unwrap();
492
493        assert_eq!(chapters.len(), 2);
494        assert_eq!(chapters[0].title, "Prologue");
495        assert_eq!(chapters[0].start_time_ms, 0);
496        assert_eq!(chapters[1].title, "Chapter 1");
497        assert_eq!(chapters[1].start_time_ms, 330_500);
498    }
499
500    #[test]
501    fn test_parse_epub_chapters() {
502        // This test will fail until we implement parse_epub_chapters()
503        use std::io::Write;
504        use tempfile::NamedTempFile;
505
506        // Create a minimal EPUB-like structure (won't be a real EPUB)
507        let mut temp_file = NamedTempFile::new().unwrap();
508        writeln!(temp_file, "Mock EPUB content").unwrap();
509
510        // This should fail with "not implemented" or similar
511        let result = parse_epub_chapters(temp_file.path());
512
513        // For now, we expect it to fail (function doesn't exist yet)
514        // Once implemented, this will extract chapter titles from EPUB ToC
515        assert!(result.is_err() || result.unwrap().is_empty());
516    }
517
518    #[test]
519    fn test_merge_keep_timestamps() {
520        let existing = vec![
521            Chapter::new(1, "Chapter 1".to_string(), 0, 1000),
522            Chapter::new(2, "Chapter 2".to_string(), 1000, 2000),
523            Chapter::new(3, "Chapter 3".to_string(), 2000, 3000),
524        ];
525
526        let new = vec![
527            Chapter::new(1, "Prologue".to_string(), 0, 0),
528            Chapter::new(2, "The Beginning".to_string(), 0, 0),
529            Chapter::new(3, "The Journey".to_string(), 0, 0),
530        ];
531
532        let merged = merge_chapters(&existing, &new, ChapterMergeStrategy::KeepTimestamps).unwrap();
533
534        assert_eq!(merged.len(), 3);
535        assert_eq!(merged[0].title, "Prologue");
536        assert_eq!(merged[0].start_time_ms, 0);
537        assert_eq!(merged[0].end_time_ms, 1000);
538        assert_eq!(merged[1].title, "The Beginning");
539        assert_eq!(merged[1].start_time_ms, 1000);
540        assert_eq!(merged[2].title, "The Journey");
541        assert_eq!(merged[2].start_time_ms, 2000);
542    }
543
544    #[test]
545    fn test_merge_replace_all() {
546        let existing = vec![
547            Chapter::new(1, "Chapter 1".to_string(), 0, 1000),
548            Chapter::new(2, "Chapter 2".to_string(), 1000, 2000),
549        ];
550
551        let new = vec![
552            Chapter::new(1, "Prologue".to_string(), 0, 500),
553            Chapter::new(2, "The Beginning".to_string(), 500, 1500),
554            Chapter::new(3, "The Journey".to_string(), 1500, 2500),
555        ];
556
557        let merged = merge_chapters(&existing, &new, ChapterMergeStrategy::ReplaceAll).unwrap();
558
559        assert_eq!(merged.len(), 3);
560        assert_eq!(merged[0].title, "Prologue");
561        assert_eq!(merged[0].start_time_ms, 0);
562        assert_eq!(merged[0].end_time_ms, 500);
563        assert_eq!(merged[2].title, "The Journey");
564    }
565
566    #[test]
567    fn test_merge_skip_on_mismatch() {
568        let existing = vec![
569            Chapter::new(1, "Chapter 1".to_string(), 0, 1000),
570            Chapter::new(2, "Chapter 2".to_string(), 1000, 2000),
571        ];
572
573        let new = vec![
574            Chapter::new(1, "Prologue".to_string(), 0, 0),
575        ];
576
577        let result = merge_chapters(&existing, &new, ChapterMergeStrategy::SkipOnMismatch);
578
579        assert!(result.is_err());
580        let err = result.unwrap_err();
581        assert!(err.to_string().contains("Chapter count mismatch"));
582    }
583
584    #[test]
585    fn test_merge_keep_timestamps_with_extra_existing() {
586        let existing = vec![
587            Chapter::new(1, "Chapter 1".to_string(), 0, 1000),
588            Chapter::new(2, "Chapter 2".to_string(), 1000, 2000),
589            Chapter::new(3, "Chapter 3".to_string(), 2000, 3000),
590        ];
591
592        let new = vec![
593            Chapter::new(1, "Prologue".to_string(), 0, 0),
594        ];
595
596        let merged = merge_chapters(&existing, &new, ChapterMergeStrategy::KeepTimestamps).unwrap();
597
598        // Should merge first chapter and keep the remaining existing ones
599        assert_eq!(merged.len(), 3);
600        assert_eq!(merged[0].title, "Prologue");
601        assert_eq!(merged[1].title, "Chapter 2");
602        assert_eq!(merged[2].title, "Chapter 3");
603    }
604
605    #[test]
606    fn test_parse_ffprobe_time() {
607        assert_eq!(parse_ffprobe_time("0.000000"), Some(0));
608        assert_eq!(parse_ffprobe_time("5.5"), Some(5500));
609        assert_eq!(parse_ffprobe_time("330.500"), Some(330_500));
610        assert_eq!(parse_ffprobe_time("3661.250"), Some(3_661_250)); // 1h 1m 1.25s
611        assert_eq!(parse_ffprobe_time("invalid"), None);
612        assert_eq!(parse_ffprobe_time(""), None);
613    }
614}