termusiclib/podcast/
mod.rs

1// Thanks to the author of shellcaster(https://github.com/jeff-hughes/shellcaster). Most parts of following code are taken from it.
2
3pub mod db;
4#[allow(clippy::module_name_repetitions)]
5pub mod episode;
6// repetetive name, but will do for now
7#[allow(clippy::module_inception)]
8mod podcast;
9
10use std::fs::File;
11use std::io::Write as _;
12use std::path::{Path, PathBuf};
13use std::sync::LazyLock;
14use std::time::Duration;
15
16use anyhow::{Context, Result, bail};
17use bytes::Buf;
18use chrono::{DateTime, Utc};
19use opml::{Body, Head, OPML, Outline};
20use regex::Regex;
21use reqwest::ClientBuilder;
22use rfc822_sanitizer::parse_from_rfc2822_with_fallback;
23use rss::{Channel, Item};
24use sanitize_filename::{Options, sanitize_with_options};
25use tokio::sync::mpsc::unbounded_channel;
26
27use crate::config::v2::server::PodcastSettings;
28use crate::taskpool::TaskPool;
29use db::Database;
30use episode::{Episode, EpisodeNoId};
31pub use podcast::{Podcast, PodcastNoId};
32
33// How many columns we need, minimum, before we display the
34// (unplayed/total) after the podcast title
35pub const PODCAST_UNPLAYED_TOTALS_LENGTH: usize = 25;
36
37// How many columns we need, minimum, before we display the duration of
38// the episode
39pub const EPISODE_DURATION_LENGTH: usize = 45;
40
41// How many columns we need, minimum, before we display the pubdate
42// of the episode
43pub const EPISODE_PUBDATE_LENGTH: usize = 60;
44
45/// Regex for parsing an episode "duration", which could take the form
46/// of HH:MM:SS, MM:SS, or SS.
47static RE_DURATION: LazyLock<Regex> =
48    LazyLock::new(|| Regex::new(r"(\d+)(?::(\d+))?(?::(\d+))?").unwrap());
49
50/// Regex for removing "A", "An", and "The" from the beginning of
51/// podcast titles
52static RE_ARTICLES: LazyLock<Regex> = LazyLock::new(|| Regex::new(r"^(a|an|the) ").unwrap());
53
54/// Defines interface used for both podcasts and episodes, to be
55/// used and displayed in menus.
56// TODO: unused trait & functions?
57pub trait Menuable {
58    fn get_id(&self) -> i64;
59    fn get_title(&self, length: usize) -> String;
60    fn is_played(&self) -> bool;
61}
62
63#[derive(Debug, Clone, Eq, PartialEq)]
64#[allow(clippy::module_name_repetitions)]
65pub struct PodcastFeed {
66    pub id: Option<i64>,
67    pub url: String,
68    pub title: Option<String>,
69}
70
71impl PodcastFeed {
72    #[must_use]
73    pub const fn new(id: Option<i64>, url: String, title: Option<String>) -> Self {
74        Self { id, url, title }
75    }
76}
77
78#[derive(Clone, Debug, PartialEq, Eq)]
79pub enum PodcastSyncResult {
80    FetchPodcastStart(String),
81
82    SyncData((i64, PodcastNoId)),
83    NewData(PodcastNoId),
84    Error(PodcastFeed),
85}
86
87/// Spawns a new task to check a feed and retrieve podcast data.
88///
89/// If `tx_to_main` is closed, no errors will be throws and the task will continue
90pub fn check_feed(
91    feed: PodcastFeed,
92    max_retries: usize,
93    tp: &TaskPool,
94    tx_to_main: impl Fn(PodcastSyncResult) + Send + 'static,
95) {
96    tp.execute(async move {
97        tx_to_main(PodcastSyncResult::FetchPodcastStart(feed.url.clone()));
98        match get_feed_data(&feed.url, max_retries).await {
99            Ok(pod) => match feed.id {
100                Some(id) => {
101                    tx_to_main(PodcastSyncResult::SyncData((id, pod)));
102                }
103                None => {
104                    tx_to_main(PodcastSyncResult::NewData(pod));
105                }
106            },
107            Err(err) => {
108                error!("get_feed_data had a Error: {err:#?}");
109                tx_to_main(PodcastSyncResult::Error(feed));
110            }
111        }
112    });
113}
114
115/// Given a URL, this attempts to pull the data about a podcast and its
116/// episodes from an RSS feed.
117async fn get_feed_data(url: &str, mut max_retries: usize) -> Result<PodcastNoId> {
118    let agent = ClientBuilder::new()
119        .connect_timeout(Duration::from_secs(5))
120        .build()?;
121
122    let resp: reqwest::Response = loop {
123        let response = agent.get(url).send().await;
124        if let Ok(resp) = response {
125            break resp;
126        }
127        max_retries -= 1;
128        if max_retries == 0 {
129            bail!("No response from feed");
130        }
131    };
132
133    let channel = Channel::read_from(resp.bytes().await?.reader())?;
134    Ok(parse_feed_data(channel, url))
135}
136
137/// Given a Channel with the RSS feed data, this parses the data about a
138/// podcast and its episodes and returns a Podcast. There are existing
139/// specifications for podcast RSS feeds that a feed should adhere to, but
140/// this does try to make some attempt to account for the possibility that
141/// a feed might not be valid according to the spec.
142fn parse_feed_data(channel: Channel, url: &str) -> PodcastNoId {
143    let title = channel.title().to_string();
144    let url = url.to_string();
145    let description = Some(channel.description().to_string());
146    let last_checked = Utc::now();
147
148    let mut author = None;
149    let mut explicit = None;
150    let mut image_url = None;
151    if let Some(itunes) = channel.itunes_ext() {
152        author = itunes.author().map(std::string::ToString::to_string);
153        explicit = itunes.explicit().and_then(|s| {
154            let ss = s.to_lowercase();
155            match &ss[..] {
156                "yes" | "explicit" | "true" => Some(true),
157                "no" | "clean" | "false" => Some(false),
158                _ => None,
159            }
160        });
161        image_url = itunes.image().map(std::string::ToString::to_string);
162    }
163
164    let mut episodes = Vec::new();
165    let items = channel.into_items();
166    if !items.is_empty() {
167        for item in &items {
168            episodes.push(parse_episode_data(item));
169        }
170    }
171
172    PodcastNoId {
173        title,
174        url,
175        description,
176        author,
177        explicit,
178        last_checked,
179        episodes,
180        image_url,
181    }
182}
183
184/// For an item (episode) in an RSS feed, this pulls data about the item
185/// and converts it to an Episode. There are existing specifications for
186/// podcast RSS feeds that a feed should adhere to, but this does try to
187/// make some attempt to account for the possibility that a feed might
188/// not be valid according to the spec.
189fn parse_episode_data(item: &Item) -> EpisodeNoId {
190    let title = item.title().unwrap_or("").to_string();
191    let url = match item.enclosure() {
192        Some(enc) => enc.url().to_string(),
193        None => String::new(),
194    };
195    let guid = match item.guid() {
196        Some(guid) => guid.value().to_string(),
197        None => String::new(),
198    };
199    let description = item.description().unwrap_or("").to_string();
200    let pubdate = item
201        .pub_date()
202        .and_then(|pd| parse_from_rfc2822_with_fallback(pd).ok())
203        .map(std::convert::Into::into);
204
205    let mut duration = None;
206    let mut image_url = None;
207    if let Some(itunes) = item.itunes_ext() {
208        duration = duration_to_int(itunes.duration()).map(i64::from);
209        image_url = itunes.image().map(std::string::ToString::to_string);
210    }
211
212    EpisodeNoId {
213        title,
214        url,
215        guid,
216        description,
217        pubdate,
218        duration,
219        image_url,
220    }
221}
222
223/// Given a string representing an episode duration, this attempts to
224/// convert to an integer representing the duration in seconds. Covers
225/// formats HH:MM:SS, MM:SS, and SS. If the duration cannot be converted
226/// (covering numerous reasons), it will return None.
227fn duration_to_int(duration: Option<&str>) -> Option<i32> {
228    let duration = duration?;
229    let captures = RE_DURATION.captures(duration)?;
230
231    /*
232     * Provided that the regex succeeds, we should have
233     * 4 capture groups (with 0th being the full match).
234     * Depending on the string format, however, some of
235     * these may return None. We first loop through the
236     * capture groups and push Some results to an array.
237     * This will fail on the first non-numeric value,
238     * so the duration is parsed only if all components
239     * of it were successfully converted to integers.
240     * Finally, we convert hours, minutes, and seconds
241     * into a total duration in seconds and return.
242     */
243
244    let mut times = [None; 3];
245    let mut counter = 0;
246    // cap[0] is always full match
247    for c in captures.iter().skip(1).flatten() {
248        let intval = c.as_str().parse().ok()?;
249        times[counter] = Some(intval);
250        counter += 1;
251    }
252
253    match counter {
254        // HH:MM:SS
255        3 => Some(times[0].unwrap() * 60 * 60 + times[1].unwrap() * 60 + times[2].unwrap()),
256        // MM:SS
257        2 => Some(times[0].unwrap() * 60 + times[1].unwrap()),
258        // SS
259        1 => times[0],
260        _ => None,
261    }
262}
263
264/// Imports a list of podcasts from OPML format, reading from a file. If the `replace` flag is set, this replaces all
265/// existing data in the database.
266pub async fn import_from_opml(db_path: &Path, config: &PodcastSettings, file: &Path) -> Result<()> {
267    let xml = std::fs::read_to_string(file)
268        .with_context(|| format!("Could not open OPML file: {}", file.display()))?;
269
270    let mut podcast_list = import_opml_feeds(&xml).with_context(
271        || "Could not properly parse OPML file -- file may be formatted improperly or corrupted.",
272    )?;
273
274    if podcast_list.is_empty() {
275        println!("No podcasts to import.");
276        return Ok(());
277    }
278
279    let db_inst = db::Database::new(db_path)?;
280
281    // delete database if we are replacing the data
282    // if args.is_present("replace") {
283    //     db_inst
284    //         .clear_db()
285    //         .with_context(|| "Error clearing database")?;
286    // } else {
287    let old_podcasts = db_inst.get_podcasts()?;
288
289    // if URL is already in database, remove it from import
290    podcast_list.retain(|pod| {
291        for op in &old_podcasts {
292            if pod.url == op.url {
293                return false;
294            }
295        }
296        true
297    });
298    // }
299
300    // check again, now that we may have removed feeds after looking at
301    // the database
302    if podcast_list.is_empty() {
303        println!("No podcasts to import.");
304        return Ok(());
305    }
306
307    println!("Importing {} podcasts...", podcast_list.len());
308
309    let taskpool = TaskPool::new(usize::from(config.concurrent_downloads_max.get()));
310    let (tx_to_main, mut rx_to_main) = unbounded_channel();
311
312    for pod in &podcast_list {
313        let tx_to_main_c = tx_to_main.clone();
314
315        check_feed(
316            pod.clone(),
317            usize::from(config.max_download_retries),
318            &taskpool,
319            move |msg| {
320                let _ = tx_to_main_c.send(msg);
321            },
322        );
323    }
324
325    let mut msg_counter: usize = 0;
326    let mut failure = false;
327    while let Some(message) = rx_to_main.recv().await {
328        match message {
329            PodcastSyncResult::FetchPodcastStart(_) => (),
330            PodcastSyncResult::NewData(pod) => {
331                msg_counter += 1;
332                let title = &pod.title;
333                let db_result = db_inst.insert_podcast(&pod);
334                match db_result {
335                    Ok(_) => {
336                        println!("Added {title}");
337                    }
338                    Err(err) => {
339                        failure = true;
340                        error!("Error adding {title}, err: {err}");
341                    }
342                }
343            }
344
345            PodcastSyncResult::Error(feed) => {
346                msg_counter += 1;
347                failure = true;
348                error!("Error retrieving RSS feed: {}", feed.url);
349            }
350
351            PodcastSyncResult::SyncData((_id, _pod)) => {
352                msg_counter += 1;
353            }
354        }
355
356        if msg_counter >= podcast_list.len() {
357            break;
358        }
359    }
360
361    if failure {
362        bail!("Process finished with errors.");
363    }
364    println!("Import successful.");
365
366    Ok(())
367}
368
369/// Exports all podcasts to OPML format, either printing to stdout or
370/// exporting to a file.
371pub fn export_to_opml(db_path: &Path, file: &Path) -> Result<()> {
372    let db_inst = Database::new(db_path)?;
373    let podcast_list = db_inst.get_podcasts()?;
374    let opml = export_opml_feeds(&podcast_list);
375
376    let xml = opml.to_string().context("Could not create OPML format")?;
377
378    let mut dst = File::create(file)
379        .with_context(|| format!("Could not create output file: {}", file.display()))?;
380    dst.write_all(xml.as_bytes()).with_context(|| {
381        format!(
382            "Could not copy OPML data to output file: {}",
383            file.display()
384        )
385    })?;
386    Ok(())
387}
388
389/// Import a list of podcast feeds from an OPML file. Supports
390/// v1.0, v1.1, and v2.0 OPML files.
391fn import_opml_feeds(xml: &str) -> Result<Vec<PodcastFeed>> {
392    let opml = OPML::from_str(xml)?;
393    let mut feeds = Vec::new();
394    for pod in opml.body.outlines {
395        if pod.xml_url.is_some() {
396            // match against title attribute first -- if this is
397            // not set or empty, then match against the text
398            // attribute; this must be set, but can be empty
399            let title = pod.title.filter(|t| !t.is_empty()).or({
400                if pod.text.is_empty() {
401                    None
402                } else {
403                    Some(pod.text)
404                }
405            });
406            feeds.push(PodcastFeed::new(None, pod.xml_url.unwrap(), title));
407        }
408    }
409    Ok(feeds)
410}
411
412/// Converts the current set of podcast feeds to the OPML format
413fn export_opml_feeds(podcasts: &[Podcast]) -> OPML {
414    let date = Utc::now();
415    let mut opml = OPML {
416        head: Some(Head {
417            title: Some("Termusic Podcast Feeds".to_string()),
418            date_created: Some(date.to_rfc2822()),
419            ..Head::default()
420        }),
421        ..Default::default()
422    };
423
424    let mut outlines = Vec::new();
425
426    for pod in podcasts {
427        // opml.add_feed(&pod.title, &pod.url);
428        outlines.push(Outline {
429            text: pod.title.clone(),
430            r#type: Some("rss".to_string()),
431            xml_url: Some(pod.url.clone()),
432            title: Some(pod.title.clone()),
433            ..Outline::default()
434        });
435    }
436
437    opml.body = Body { outlines };
438    opml
439}
440
441/// Enum used to communicate relevant data to the taskpool.
442#[derive(Debug, Clone, Eq, PartialEq)]
443pub struct EpData {
444    pub id: i64,
445    pub pod_id: i64,
446    pub title: String,
447    pub url: String,
448    pub pubdate: Option<DateTime<Utc>>,
449    pub file_path: Option<PathBuf>,
450}
451
452#[derive(Clone, Debug, PartialEq, Eq)]
453pub enum PodcastDLResult {
454    DLStart(EpData),
455    DLComplete(EpData),
456    DLResponseError(EpData),
457    DLFileCreateError(EpData),
458    DLFileWriteError(EpData),
459}
460
461/// This is the function the main controller uses to indicate new files to download.
462///
463/// It uses the taskpool to start jobs for every episode to be downloaded.
464/// New jobs can be requested by the user while there are still ongoing jobs.
465///
466/// If `tx_to_main` is closed, no errors will be throws and the task will continue
467pub fn download_list(
468    episodes: Vec<EpData>,
469    dest: &Path,
470    max_retries: usize,
471    tp: &TaskPool,
472    tx_to_main: impl Fn(PodcastDLResult) + Send + 'static + Clone,
473) {
474    // parse episode details and push to queue
475    for ep in episodes {
476        let tx = tx_to_main.clone();
477        let dest2 = dest.to_path_buf();
478        tp.execute(async move {
479            tx(PodcastDLResult::DLStart(ep.clone()));
480            let result = download_file(ep, dest2, max_retries).await;
481            tx(result);
482        });
483    }
484}
485
486/// Downloads a file to a local filepath, returning `DownloadMsg` variant
487/// indicating success or failure.
488async fn download_file(
489    mut ep_data: EpData,
490    destination_path: PathBuf,
491    mut max_retries: usize,
492) -> PodcastDLResult {
493    let agent = ClientBuilder::new()
494        .connect_timeout(Duration::from_secs(10))
495        .build()
496        .expect("reqwest client build failed");
497
498    let response: reqwest::Response = loop {
499        let response = agent.get(&ep_data.url).send().await;
500        if let Ok(resp) = response {
501            break resp;
502        }
503        max_retries -= 1;
504        if max_retries == 0 {
505            return PodcastDLResult::DLResponseError(ep_data);
506        }
507    };
508
509    // figure out the file type
510    let ext = if let Some(content_type) = response
511        .headers()
512        .get("content-type")
513        .and_then(|v| v.to_str().ok())
514    {
515        match content_type {
516            "audio/x-m4a" | "audio/mp4" => "m4a",
517            "audio/x-matroska" => "mka",
518            "audio/flac" => "flac",
519            "video/quicktime" => "mov",
520            "video/mp4" => "mp4",
521            "video/x-m4v" => "m4v",
522            "video/x-matroska" => "mkv",
523            "video/webm" => "webm",
524            // "audio/mpeg" => "mp3",
525            // fallback
526            _ => "mp3",
527        }
528    } else {
529        error!("The response doesn't contain a content type, using \"mp3\" as fallback!");
530        "mp3"
531    };
532
533    let mut file_name = sanitize_with_options(
534        &ep_data.title,
535        Options {
536            truncate: true,
537            windows: true, // for simplicity, we'll just use Windows-friendly paths for everyone
538            replacement: "",
539        },
540    );
541
542    if let Some(pubdate) = ep_data.pubdate {
543        file_name = format!("{file_name}_{}", pubdate.format("%Y%m%d_%H%M%S"));
544    }
545
546    let mut file_path = destination_path;
547    file_path.push(format!("{file_name}.{ext}"));
548
549    let Ok(mut dst) = File::create(&file_path) else {
550        return PodcastDLResult::DLFileCreateError(ep_data);
551    };
552
553    ep_data.file_path = Some(file_path);
554
555    let Ok(bytes) = response.bytes().await else {
556        return PodcastDLResult::DLFileCreateError(ep_data);
557    };
558
559    match std::io::copy(&mut bytes.reader(), &mut dst) {
560        Ok(_) => PodcastDLResult::DLComplete(ep_data),
561        Err(_) => PodcastDLResult::DLFileWriteError(ep_data),
562    }
563}