termusiclib/podcast/
mod.rs

1// Thanks to the author of shellcaster(https://github.com/jeff-hughes/shellcaster). Most parts of following code are taken from it.
2
3pub mod db;
4#[allow(clippy::module_name_repetitions)]
5pub mod episode;
6// repetetive name, but will do for now
7#[allow(clippy::module_inception)]
8mod podcast;
9
10use crate::config::v2::server::PodcastSettings;
11use crate::taskpool::TaskPool;
12use crate::types::{Msg, PCMsg};
13use db::Database;
14use episode::{Episode, EpisodeNoId};
15#[allow(clippy::module_name_repetitions)]
16pub use podcast::{Podcast, PodcastNoId};
17
18use anyhow::{bail, Context, Result};
19use bytes::Buf;
20use chrono::{DateTime, Utc};
21use lazy_static::lazy_static;
22use opml::{Body, Head, Outline, OPML};
23use regex::Regex;
24use reqwest::ClientBuilder;
25use rfc822_sanitizer::parse_from_rfc2822_with_fallback;
26use rss::{Channel, Item};
27use sanitize_filename::{sanitize_with_options, Options};
28use std::fs::File;
29use std::io::Write as _;
30use std::path::{Path, PathBuf};
31use std::sync::mpsc::{self, Sender};
32use std::time::Duration;
33
34// How many columns we need, minimum, before we display the
35// (unplayed/total) after the podcast title
36pub const PODCAST_UNPLAYED_TOTALS_LENGTH: usize = 25;
37
38// How many columns we need, minimum, before we display the duration of
39// the episode
40pub const EPISODE_DURATION_LENGTH: usize = 45;
41
42// How many columns we need, minimum, before we display the pubdate
43// of the episode
44pub const EPISODE_PUBDATE_LENGTH: usize = 60;
45
46lazy_static! {
47    /// Regex for parsing an episode "duration", which could take the form
48    /// of HH:MM:SS, MM:SS, or SS.
49    static ref RE_DURATION: Regex = Regex::new(r"(\d+)(?::(\d+))?(?::(\d+))?").expect("Regex error");
50
51    /// Regex for removing "A", "An", and "The" from the beginning of
52    /// podcast titles
53    static ref RE_ARTICLES: Regex = Regex::new(r"^(a|an|the) ").expect("Regex error");
54}
55
56/// Defines interface used for both podcasts and episodes, to be
57/// used and displayed in menus.
58pub trait Menuable {
59    fn get_id(&self) -> i64;
60    fn get_title(&self, length: usize) -> String;
61    fn is_played(&self) -> bool;
62}
63
64#[derive(Debug, Clone, Eq, PartialEq)]
65#[allow(clippy::module_name_repetitions)]
66pub struct PodcastFeed {
67    pub id: Option<i64>,
68    pub url: String,
69    pub title: Option<String>,
70}
71
72impl PodcastFeed {
73    #[must_use]
74    pub const fn new(id: Option<i64>, url: String, title: Option<String>) -> Self {
75        Self { id, url, title }
76    }
77}
78
79/// Spawns a new task to check a feed and retrieve podcast data.
80///
81/// If `tx_to_main` is closed, no errors will be throws and the task will continue
82pub fn check_feed(feed: PodcastFeed, max_retries: usize, tp: &TaskPool, tx_to_main: Sender<Msg>) {
83    tp.execute(async move {
84        let _ = tx_to_main.send(Msg::Podcast(PCMsg::FetchPodcastStart(feed.url.clone())));
85        match get_feed_data(&feed.url, max_retries).await {
86            Ok(pod) => match feed.id {
87                Some(id) => {
88                    let _ = tx_to_main.send(Msg::Podcast(PCMsg::SyncData((id, pod))));
89                }
90                None => {
91                    let _ = tx_to_main.send(Msg::Podcast(PCMsg::NewData(pod)));
92                }
93            },
94            Err(err) => {
95                error!("get_feed_data had a Error: {:#?}", err);
96                let _ = tx_to_main.send(Msg::Podcast(PCMsg::Error(feed)));
97            }
98        }
99    });
100}
101
102/// Given a URL, this attempts to pull the data about a podcast and its
103/// episodes from an RSS feed.
104async fn get_feed_data(url: &str, mut max_retries: usize) -> Result<PodcastNoId> {
105    let agent = ClientBuilder::new()
106        .connect_timeout(Duration::from_secs(5))
107        .build()?;
108
109    let resp: reqwest::Response = loop {
110        let response = agent.get(url).send().await;
111        if let Ok(resp) = response {
112            break resp;
113        }
114        max_retries -= 1;
115        if max_retries == 0 {
116            bail!("No response from feed");
117        }
118    };
119
120    let channel = Channel::read_from(resp.bytes().await?.reader())?;
121    Ok(parse_feed_data(channel, url))
122}
123
124/// Given a Channel with the RSS feed data, this parses the data about a
125/// podcast and its episodes and returns a Podcast. There are existing
126/// specifications for podcast RSS feeds that a feed should adhere to, but
127/// this does try to make some attempt to account for the possibility that
128/// a feed might not be valid according to the spec.
129fn parse_feed_data(channel: Channel, url: &str) -> PodcastNoId {
130    let title = channel.title().to_string();
131    let url = url.to_string();
132    let description = Some(channel.description().to_string());
133    let last_checked = Utc::now();
134
135    let mut author = None;
136    let mut explicit = None;
137    let mut image_url = None;
138    if let Some(itunes) = channel.itunes_ext() {
139        author = itunes.author().map(std::string::ToString::to_string);
140        explicit = itunes.explicit().and_then(|s| {
141            let ss = s.to_lowercase();
142            match &ss[..] {
143                "yes" | "explicit" | "true" => Some(true),
144                "no" | "clean" | "false" => Some(false),
145                _ => None,
146            }
147        });
148        image_url = itunes.image().map(std::string::ToString::to_string);
149    }
150
151    let mut episodes = Vec::new();
152    let items = channel.into_items();
153    if !items.is_empty() {
154        for item in &items {
155            episodes.push(parse_episode_data(item));
156        }
157    }
158
159    PodcastNoId {
160        title,
161        url,
162        description,
163        author,
164        explicit,
165        last_checked,
166        episodes,
167        image_url,
168    }
169}
170
171/// For an item (episode) in an RSS feed, this pulls data about the item
172/// and converts it to an Episode. There are existing specifications for
173/// podcast RSS feeds that a feed should adhere to, but this does try to
174/// make some attempt to account for the possibility that a feed might
175/// not be valid according to the spec.
176fn parse_episode_data(item: &Item) -> EpisodeNoId {
177    let title = item.title().unwrap_or("").to_string();
178    let url = match item.enclosure() {
179        Some(enc) => enc.url().to_string(),
180        None => String::new(),
181    };
182    let guid = match item.guid() {
183        Some(guid) => guid.value().to_string(),
184        None => String::new(),
185    };
186    let description = item.description().unwrap_or("").to_string();
187    let pubdate = item
188        .pub_date()
189        .and_then(|pd| parse_from_rfc2822_with_fallback(pd).ok())
190        .map(std::convert::Into::into);
191
192    let mut duration = None;
193    let mut image_url = None;
194    if let Some(itunes) = item.itunes_ext() {
195        duration = duration_to_int(itunes.duration()).map(i64::from);
196        image_url = itunes.image().map(std::string::ToString::to_string);
197    }
198
199    EpisodeNoId {
200        title,
201        url,
202        guid,
203        description,
204        pubdate,
205        duration,
206        image_url,
207    }
208}
209
210/// Given a string representing an episode duration, this attempts to
211/// convert to an integer representing the duration in seconds. Covers
212/// formats HH:MM:SS, MM:SS, and SS. If the duration cannot be converted
213/// (covering numerous reasons), it will return None.
214fn duration_to_int(duration: Option<&str>) -> Option<i32> {
215    let duration = duration?;
216    let captures = RE_DURATION.captures(duration)?;
217
218    /*
219     * Provided that the regex succeeds, we should have
220     * 4 capture groups (with 0th being the full match).
221     * Depending on the string format, however, some of
222     * these may return None. We first loop through the
223     * capture groups and push Some results to an array.
224     * This will fail on the first non-numeric value,
225     * so the duration is parsed only if all components
226     * of it were successfully converted to integers.
227     * Finally, we convert hours, minutes, and seconds
228     * into a total duration in seconds and return.
229     */
230
231    let mut times = [None; 3];
232    let mut counter = 0;
233    // cap[0] is always full match
234    for c in captures.iter().skip(1).flatten() {
235        let intval = c.as_str().parse().ok()?;
236        times[counter] = Some(intval);
237        counter += 1;
238    }
239
240    match counter {
241        // HH:MM:SS
242        3 => Some(times[0].unwrap() * 60 * 60 + times[1].unwrap() * 60 + times[2].unwrap()),
243        // MM:SS
244        2 => Some(times[0].unwrap() * 60 + times[1].unwrap()),
245        // SS
246        1 => times[0],
247        _ => None,
248    }
249}
250
251/// Imports a list of podcasts from OPML format, reading from a file. If the `replace` flag is set, this replaces all
252/// existing data in the database.
253pub fn import_from_opml(db_path: &Path, config: &PodcastSettings, file: &Path) -> Result<()> {
254    let xml = std::fs::read_to_string(file)
255        .with_context(|| format!("Could not open OPML file: {}", file.display()))?;
256
257    let mut podcast_list = import_opml_feeds(&xml).with_context(|| {
258        "Could not properly parse OPML file -- file may be formatted improperly or corrupted."
259    })?;
260
261    if podcast_list.is_empty() {
262        println!("No podcasts to import.");
263        return Ok(());
264    }
265
266    let db_inst = db::Database::new(db_path)?;
267
268    // delete database if we are replacing the data
269    // if args.is_present("replace") {
270    //     db_inst
271    //         .clear_db()
272    //         .with_context(|| "Error clearing database")?;
273    // } else {
274    let old_podcasts = db_inst.get_podcasts()?;
275
276    // if URL is already in database, remove it from import
277    podcast_list.retain(|pod| {
278        for op in &old_podcasts {
279            if pod.url == op.url {
280                return false;
281            }
282        }
283        true
284    });
285    // }
286
287    // check again, now that we may have removed feeds after looking at
288    // the database
289    if podcast_list.is_empty() {
290        println!("No podcasts to import.");
291        return Ok(());
292    }
293
294    println!("Importing {} podcasts...", podcast_list.len());
295
296    let taskpool = TaskPool::new(usize::from(config.concurrent_downloads_max.get()));
297    let (tx_to_main, rx_to_main) = mpsc::channel();
298
299    for pod in &podcast_list {
300        check_feed(
301            pod.clone(),
302            usize::from(config.max_download_retries),
303            &taskpool,
304            tx_to_main.clone(),
305        );
306    }
307
308    let mut msg_counter: usize = 0;
309    let mut failure = false;
310    while let Some(message) = rx_to_main.iter().next() {
311        match message {
312            Msg::Podcast(PCMsg::NewData(pod)) => {
313                msg_counter += 1;
314                let title = &pod.title;
315                let db_result = db_inst.insert_podcast(&pod);
316                match db_result {
317                    Ok(_) => {
318                        println!("Added {title}");
319                    }
320                    Err(err) => {
321                        failure = true;
322                        error!("Error adding {title}, err: {err}");
323                    }
324                }
325            }
326
327            Msg::Podcast(PCMsg::Error(feed)) => {
328                msg_counter += 1;
329                failure = true;
330                error!("Error retrieving RSS feed: {}", feed.url);
331            }
332
333            Msg::Podcast(PCMsg::SyncData((_id, _pod))) => {
334                msg_counter += 1;
335            }
336            _ => {}
337        }
338
339        if msg_counter >= podcast_list.len() {
340            break;
341        }
342    }
343
344    if failure {
345        bail!("Process finished with errors.");
346    }
347    println!("Import successful.");
348
349    Ok(())
350}
351
352/// Exports all podcasts to OPML format, either printing to stdout or
353/// exporting to a file.
354pub fn export_to_opml(db_path: &Path, file: &Path) -> Result<()> {
355    let db_inst = Database::new(db_path)?;
356    let podcast_list = db_inst.get_podcasts()?;
357    let opml = export_opml_feeds(&podcast_list);
358
359    let xml = opml.to_string().context("Could not create OPML format")?;
360
361    let mut dst = File::create(file)
362        .with_context(|| format!("Could not create output file: {}", file.display()))?;
363    dst.write_all(xml.as_bytes()).with_context(|| {
364        format!(
365            "Could not copy OPML data to output file: {}",
366            file.display()
367        )
368    })?;
369    Ok(())
370}
371
372/// Import a list of podcast feeds from an OPML file. Supports
373/// v1.0, v1.1, and v2.0 OPML files.
374fn import_opml_feeds(xml: &str) -> Result<Vec<PodcastFeed>> {
375    let opml = OPML::from_str(xml)?;
376    let mut feeds = Vec::new();
377    for pod in opml.body.outlines {
378        if pod.xml_url.is_some() {
379            // match against title attribute first -- if this is
380            // not set or empty, then match against the text
381            // attribute; this must be set, but can be empty
382            let title = pod.title.filter(|t| !t.is_empty()).or({
383                if pod.text.is_empty() {
384                    None
385                } else {
386                    Some(pod.text)
387                }
388            });
389            feeds.push(PodcastFeed::new(None, pod.xml_url.unwrap(), title));
390        }
391    }
392    Ok(feeds)
393}
394
395/// Converts the current set of podcast feeds to the OPML format
396fn export_opml_feeds(podcasts: &[Podcast]) -> OPML {
397    let date = Utc::now();
398    let mut opml = OPML {
399        head: Some(Head {
400            title: Some("Termusic Podcast Feeds".to_string()),
401            date_created: Some(date.to_rfc2822()),
402            ..Head::default()
403        }),
404        ..Default::default()
405    };
406
407    let mut outlines = Vec::new();
408
409    for pod in podcasts {
410        // opml.add_feed(&pod.title, &pod.url);
411        outlines.push(Outline {
412            text: pod.title.clone(),
413            r#type: Some("rss".to_string()),
414            xml_url: Some(pod.url.clone()),
415            title: Some(pod.title.clone()),
416            ..Outline::default()
417        });
418    }
419
420    opml.body = Body { outlines };
421    opml
422}
423
424/// Enum used to communicate relevant data to the taskpool.
425#[derive(Debug, Clone, Eq, PartialEq)]
426pub struct EpData {
427    pub id: i64,
428    pub pod_id: i64,
429    pub title: String,
430    pub url: String,
431    pub pubdate: Option<DateTime<Utc>>,
432    pub file_path: Option<PathBuf>,
433}
434
435/// This is the function the main controller uses to indicate new files to download.
436///
437/// It uses the taskpool to start jobs for every episode to be downloaded.
438/// New jobs can be requested by the user while there are still ongoing jobs.
439///
440/// If `tx_to_main` is closed, no errors will be throws and the task will continue
441pub fn download_list(
442    episodes: Vec<EpData>,
443    dest: &Path,
444    max_retries: usize,
445    tp: &TaskPool,
446    tx_to_main: &Sender<Msg>,
447) {
448    // parse episode details and push to queue
449    for ep in episodes {
450        let tx = tx_to_main.clone();
451        let dest2 = dest.to_path_buf();
452        tp.execute(async move {
453            let _ = tx.send(Msg::Podcast(PCMsg::DLStart(ep.clone())));
454            let result = download_file(ep, dest2, max_retries).await;
455            let _ = tx.send(Msg::Podcast(result));
456        });
457    }
458}
459
460/// Downloads a file to a local filepath, returning `DownloadMsg` variant
461/// indicating success or failure.
462async fn download_file(
463    mut ep_data: EpData,
464    destination_path: PathBuf,
465    mut max_retries: usize,
466) -> PCMsg {
467    let agent = ClientBuilder::new()
468        .connect_timeout(Duration::from_secs(10))
469        .build()
470        .expect("reqwest client build failed");
471
472    let response: reqwest::Response = loop {
473        let response = agent.get(&ep_data.url).send().await;
474        if let Ok(resp) = response {
475            break resp;
476        }
477        max_retries -= 1;
478        if max_retries == 0 {
479            return PCMsg::DLResponseError(ep_data);
480        }
481    };
482
483    // figure out the file type
484    let ext = if let Some(content_type) = response
485        .headers()
486        .get("content-type")
487        .and_then(|v| v.to_str().ok())
488    {
489        match content_type {
490            "audio/x-m4a" | "audio/mp4" => "m4a",
491            "audio/x-matroska" => "mka",
492            "audio/flac" => "flac",
493            "video/quicktime" => "mov",
494            "video/mp4" => "mp4",
495            "video/x-m4v" => "m4v",
496            "video/x-matroska" => "mkv",
497            "video/webm" => "webm",
498            // "audio/mpeg" => "mp3",
499            // fallback
500            _ => "mp3",
501        }
502    } else {
503        error!("The response doesn't contain a content type, using \"mp3\" as fallback!");
504        "mp3"
505    };
506
507    let mut file_name = sanitize_with_options(
508        &ep_data.title,
509        Options {
510            truncate: true,
511            windows: true, // for simplicity, we'll just use Windows-friendly paths for everyone
512            replacement: "",
513        },
514    );
515
516    if let Some(pubdate) = ep_data.pubdate {
517        file_name = format!("{file_name}_{}", pubdate.format("%Y%m%d_%H%M%S"));
518    }
519
520    let mut file_path = destination_path;
521    file_path.push(format!("{file_name}.{ext}"));
522
523    let Ok(mut dst) = File::create(&file_path) else {
524        return PCMsg::DLFileCreateError(ep_data);
525    };
526
527    ep_data.file_path = Some(file_path);
528
529    let Ok(bytes) = response.bytes().await else {
530        return PCMsg::DLFileCreateError(ep_data);
531    };
532
533    match std::io::copy(&mut bytes.reader(), &mut dst) {
534        Ok(_) => PCMsg::DLComplete(ep_data),
535        Err(_) => PCMsg::DLFileWriteError(ep_data),
536    }
537}