audiobook_forge/utils/
extraction.rs

1//! Metadata extraction from M4B files and filenames
2
3use crate::models::{CurrentMetadata, MetadataSource};
4use anyhow::{Result, Context};
5use std::path::Path;
6
7/// Extract metadata from M4B file (embedded tags first, filename fallback)
8pub fn extract_current_metadata(file_path: &Path) -> Result<CurrentMetadata> {
9    // Try embedded metadata first
10    let embedded = extract_from_embedded_tags(file_path)?;
11
12    // Always try filename parsing to fill in any gaps
13    let from_filename = extract_from_filename(file_path)?;
14
15    // Merge: prefer embedded values if present, use filename as fallback
16    // This ensures we get author from filename even if title is embedded
17    Ok(embedded.merge_with(from_filename))
18}
19
20/// Extract from embedded M4B tags
21fn extract_from_embedded_tags(file_path: &Path) -> Result<CurrentMetadata> {
22    let tag = mp4ameta::Tag::read_from_path(file_path)
23        .context("Failed to read M4B metadata")?;
24
25    Ok(CurrentMetadata {
26        title: tag.title().map(|s| s.to_string()),
27        author: tag.artist().map(|s| s.to_string())
28            .or_else(|| tag.album_artist().map(|s| s.to_string())),
29        year: tag.year().and_then(|s| s.parse::<u32>().ok()),
30        duration: None, // TODO: get from FFprobe if needed
31        source: MetadataSource::Embedded,
32    })
33}
34
35/// Extract from filename using pattern matching
36fn extract_from_filename(file_path: &Path) -> Result<CurrentMetadata> {
37    let filename = file_path
38        .file_stem()
39        .and_then(|s| s.to_str())
40        .unwrap_or("");
41
42    // Pattern: "Author - Title"
43    if let Some((author, title)) = parse_author_title_pattern(filename) {
44        return Ok(CurrentMetadata {
45            title: Some(title),
46            author: Some(author),
47            year: None,
48            duration: None,
49            source: MetadataSource::Filename,
50        });
51    }
52
53    // Fallback: use entire filename as title
54    Ok(CurrentMetadata {
55        title: Some(filename.to_string()),
56        author: None,
57        year: None,
58        duration: None,
59        source: MetadataSource::Filename,
60    })
61}
62
63/// Parse "Author - Title" pattern
64fn parse_author_title_pattern(filename: &str) -> Option<(String, String)> {
65    // Try different separators: " - ", "_-_", " -_ ", etc.
66    let separators = [" - ", "_-_", " -_ ", "_ -_", "_- "];
67
68    for separator in separators {
69        let parts: Vec<&str> = filename.split(separator).collect();
70
71        if parts.len() >= 2 {
72            // Clean up underscores from author/title and convert to spaces
73            let author = parts[0].replace('_', " ").trim().to_string();
74            let title = parts[1..].join(separator).replace('_', " ").trim().to_string();
75
76            // Only return if both author and title are non-empty
77            if !author.is_empty() && !title.is_empty() {
78                return Some((author, title));
79            }
80        }
81    }
82
83    None
84}
85
86#[cfg(test)]
87mod tests {
88    use super::*;
89
90    #[test]
91    fn test_parse_author_title_pattern() {
92        // Standard space-dash-space pattern
93        let (author, title) = parse_author_title_pattern("Andy Weir - Project Hail Mary").unwrap();
94        assert_eq!(author, "Andy Weir");
95        assert_eq!(title, "Project Hail Mary");
96
97        // Multiple hyphens
98        let (author, title) = parse_author_title_pattern("Isaac Asimov - I, Robot - Complete Edition").unwrap();
99        assert_eq!(author, "Isaac Asimov");
100        assert_eq!(title, "I, Robot - Complete Edition");
101
102        // Underscore patterns (common in downloaded audiobooks)
103        let (author, title) = parse_author_title_pattern("Adam_Phillips_-_On_Giving_Up").unwrap();
104        assert_eq!(author, "Adam Phillips");
105        assert_eq!(title, "On Giving Up");
106
107        let (author, title) = parse_author_title_pattern("Morgan_Housel_-_The_Art_of_Spending_Money").unwrap();
108        assert_eq!(author, "Morgan Housel");
109        assert_eq!(title, "The Art of Spending Money");
110
111        // Mixed underscores and spaces
112        let (author, title) = parse_author_title_pattern("Neil_deGrasse_Tyson - Just Visiting This Planet").unwrap();
113        assert_eq!(author, "Neil deGrasse Tyson");
114        assert_eq!(title, "Just Visiting This Planet");
115
116        // No match
117        assert_eq!(parse_author_title_pattern("JustATitle"), None);
118    }
119}