termusiclib/podcast/
mod.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
// Thanks to the author of shellcaster(https://github.com/jeff-hughes/shellcaster). Most parts of following code are taken from it.

#[allow(unused)]
pub mod db;
#[allow(clippy::module_name_repetitions)]
pub mod episode;
// repetetive name, but will do for now
#[allow(clippy::module_inception)]
mod podcast;

use crate::config::v2::server::PodcastSettings;
use crate::taskpool::TaskPool;
use crate::types::{Msg, PCMsg};
use db::Database;
use episode::{Episode, EpisodeNoId};
#[allow(clippy::module_name_repetitions)]
pub use podcast::{Podcast, PodcastNoId};

use anyhow::{bail, Context, Result};
use bytes::Buf;
use chrono::{DateTime, Utc};
use lazy_static::lazy_static;
use opml::{Body, Head, Outline, OPML};
use regex::Regex;
use reqwest::ClientBuilder;
use rfc822_sanitizer::parse_from_rfc2822_with_fallback;
use rss::{Channel, Item};
use sanitize_filename::{sanitize_with_options, Options};
use std::fs::File;
use std::io::Write as _;
use std::path::{Path, PathBuf};
use std::sync::mpsc::{self, Sender};
use std::time::Duration;

// How many columns we need, minimum, before we display the
// (unplayed/total) after the podcast title
pub const PODCAST_UNPLAYED_TOTALS_LENGTH: usize = 25;

// How many columns we need, minimum, before we display the duration of
// the episode
pub const EPISODE_DURATION_LENGTH: usize = 45;

// How many columns we need, minimum, before we display the pubdate
// of the episode
pub const EPISODE_PUBDATE_LENGTH: usize = 60;

lazy_static! {
    /// Regex for parsing an episode "duration", which could take the form
    /// of HH:MM:SS, MM:SS, or SS.
    static ref RE_DURATION: Regex = Regex::new(r"(\d+)(?::(\d+))?(?::(\d+))?").expect("Regex error");

    /// Regex for removing "A", "An", and "The" from the beginning of
    /// podcast titles
    static ref RE_ARTICLES: Regex = Regex::new(r"^(a|an|the) ").expect("Regex error");
}

/// Defines interface used for both podcasts and episodes, to be
/// used and displayed in menus.
pub trait Menuable {
    fn get_id(&self) -> i64;
    fn get_title(&self, length: usize) -> String;
    fn is_played(&self) -> bool;
}

#[derive(Debug, Clone, Eq, PartialEq)]
#[allow(clippy::module_name_repetitions)]
pub struct PodcastFeed {
    pub id: Option<i64>,
    pub url: String,
    pub title: Option<String>,
}

impl PodcastFeed {
    pub fn new(id: Option<i64>, url: &str, title: Option<String>) -> Self {
        Self {
            id,
            url: url.to_string(),
            title,
        }
    }
}

/// Spawns a new task to check a feed and retrieve podcast data.
///
/// If `tx_to_main` is closed, no errors will be throws and the task will continue
pub fn check_feed(feed: PodcastFeed, max_retries: usize, tp: &TaskPool, tx_to_main: Sender<Msg>) {
    tp.execute(async move {
        let _ = tx_to_main.send(Msg::Podcast(PCMsg::FetchPodcastStart(feed.url.clone())));
        match get_feed_data(&feed.url, max_retries).await {
            Ok(pod) => match feed.id {
                Some(id) => {
                    let _ = tx_to_main.send(Msg::Podcast(PCMsg::SyncData((id, pod))));
                }
                None => {
                    let _ = tx_to_main.send(Msg::Podcast(PCMsg::NewData(pod)));
                }
            },
            Err(err) => {
                error!("get_feed_data had a Error: {:#?}", err);
                let _ = tx_to_main.send(Msg::Podcast(PCMsg::Error(feed.url.to_string(), feed)));
            }
        }
    });
}

/// Given a URL, this attempts to pull the data about a podcast and its
/// episodes from an RSS feed.
async fn get_feed_data(url: &str, mut max_retries: usize) -> Result<PodcastNoId> {
    let agent = ClientBuilder::new()
        .connect_timeout(Duration::from_secs(5))
        .build()?;

    let resp: reqwest::Response = loop {
        let response = agent.get(url).send().await;
        if let Ok(resp) = response {
            break resp;
        }
        max_retries -= 1;
        if max_retries == 0 {
            bail!("No response from feed");
        }
    };

    let channel = Channel::read_from(resp.bytes().await?.reader())?;
    Ok(parse_feed_data(channel, url))
}

/// Given a Channel with the RSS feed data, this parses the data about a
/// podcast and its episodes and returns a Podcast. There are existing
/// specifications for podcast RSS feeds that a feed should adhere to, but
/// this does try to make some attempt to account for the possibility that
/// a feed might not be valid according to the spec.
fn parse_feed_data(channel: Channel, url: &str) -> PodcastNoId {
    let title = channel.title().to_string();
    let url = url.to_string();
    let description = Some(channel.description().to_string());
    let last_checked = Utc::now();

    let mut author = None;
    let mut explicit = None;
    let mut image_url = None;
    if let Some(itunes) = channel.itunes_ext() {
        author = itunes.author().map(std::string::ToString::to_string);
        explicit = itunes.explicit().and_then(|s| {
            let ss = s.to_lowercase();
            match &ss[..] {
                "yes" | "explicit" | "true" => Some(true),
                "no" | "clean" | "false" => Some(false),
                _ => None,
            }
        });
        image_url = itunes.image().map(std::string::ToString::to_string);
    }

    let mut episodes = Vec::new();
    let items = channel.into_items();
    if !items.is_empty() {
        for item in &items {
            episodes.push(parse_episode_data(item));
        }
    }

    PodcastNoId {
        title,
        url,
        description,
        author,
        explicit,
        last_checked,
        episodes,
        image_url,
    }
}

/// For an item (episode) in an RSS feed, this pulls data about the item
/// and converts it to an Episode. There are existing specifications for
/// podcast RSS feeds that a feed should adhere to, but this does try to
/// make some attempt to account for the possibility that a feed might
/// not be valid according to the spec.
fn parse_episode_data(item: &Item) -> EpisodeNoId {
    let title = item.title().unwrap_or("").to_string();
    let url = match item.enclosure() {
        Some(enc) => enc.url().to_string(),
        None => String::new(),
    };
    let guid = match item.guid() {
        Some(guid) => guid.value().to_string(),
        None => String::new(),
    };
    let description = item.description().unwrap_or("").to_string();
    let pubdate = item
        .pub_date()
        .and_then(|pd| parse_from_rfc2822_with_fallback(pd).ok())
        .map(std::convert::Into::into);

    let mut duration = None;
    let mut image_url = None;
    if let Some(itunes) = item.itunes_ext() {
        duration = duration_to_int(itunes.duration()).map(i64::from);
        image_url = itunes.image().map(std::string::ToString::to_string);
    }

    EpisodeNoId {
        title,
        url,
        guid,
        description,
        pubdate,
        duration,
        image_url,
    }
}

/// Given a string representing an episode duration, this attempts to
/// convert to an integer representing the duration in seconds. Covers
/// formats HH:MM:SS, MM:SS, and SS. If the duration cannot be converted
/// (covering numerous reasons), it will return None.
fn duration_to_int(duration: Option<&str>) -> Option<i32> {
    let duration = duration?;
    let captures = RE_DURATION.captures(duration)?;

    /*
     * Provided that the regex succeeds, we should have
     * 4 capture groups (with 0th being the full match).
     * Depending on the string format, however, some of
     * these may return None. We first loop through the
     * capture groups and push Some results to an array.
     * This will fail on the first non-numeric value,
     * so the duration is parsed only if all components
     * of it were successfully converted to integers.
     * Finally, we convert hours, minutes, and seconds
     * into a total duration in seconds and return.
     */

    let mut times = [None; 3];
    let mut counter = 0;
    // cap[0] is always full match
    for c in captures.iter().skip(1).flatten() {
        let intval = c.as_str().parse().ok()?;
        times[counter] = Some(intval);
        counter += 1;
    }

    match counter {
        // HH:MM:SS
        3 => Some(times[0].unwrap() * 60 * 60 + times[1].unwrap() * 60 + times[2].unwrap()),
        // MM:SS
        2 => Some(times[0].unwrap() * 60 + times[1].unwrap()),
        // SS
        1 => times[0],
        _ => None,
    }
}

/// Imports a list of podcasts from OPML format, reading from a file. If the `replace` flag is set, this replaces all
/// existing data in the database.
pub fn import_from_opml(db_path: &Path, config: &PodcastSettings, file: &Path) -> Result<()> {
    let xml = std::fs::read_to_string(file)
        .with_context(|| format!("Could not open OPML file: {}", file.display()))?;

    let mut podcast_list = import_opml_feeds(&xml).with_context(|| {
        "Could not properly parse OPML file -- file may be formatted improperly or corrupted."
    })?;

    if podcast_list.is_empty() {
        println!("No podcasts to import.");
        return Ok(());
    }

    let db_inst = db::Database::new(db_path)?;

    // delete database if we are replacing the data
    // if args.is_present("replace") {
    //     db_inst
    //         .clear_db()
    //         .with_context(|| "Error clearing database")?;
    // } else {
    let old_podcasts = db_inst.get_podcasts()?;

    // if URL is already in database, remove it from import
    podcast_list.retain(|pod| {
        for op in &old_podcasts {
            if pod.url == op.url {
                return false;
            }
        }
        true
    });
    // }

    // check again, now that we may have removed feeds after looking at
    // the database
    if podcast_list.is_empty() {
        println!("No podcasts to import.");
        return Ok(());
    }

    println!("Importing {} podcasts...", podcast_list.len());

    let taskpool = TaskPool::new(usize::from(config.concurrent_downloads_max.get()));
    let (tx_to_main, rx_to_main) = mpsc::channel();

    for pod in &podcast_list {
        check_feed(
            pod.clone(),
            usize::from(config.max_download_retries),
            &taskpool,
            tx_to_main.clone(),
        );
    }

    let mut msg_counter: usize = 0;
    let mut failure = false;
    while let Some(message) = rx_to_main.iter().next() {
        match message {
            Msg::Podcast(PCMsg::NewData(pod)) => {
                msg_counter += 1;
                let title = pod.title.clone();
                let db_result = db_inst.insert_podcast(&pod);
                match db_result {
                    Ok(_) => {
                        println!("Added {title}");
                    }
                    Err(_err) => {
                        failure = true;
                        error!("Error adding {title}");
                    }
                }
            }

            Msg::Podcast(PCMsg::Error(_, feed)) => {
                msg_counter += 1;
                failure = true;
                if let Some(t) = feed.title {
                    error!("Error retrieving RSS feed: {t}");
                } else {
                    error!("Error retrieving RSS feed");
                }
            }

            Msg::Podcast(PCMsg::SyncData((_id, _pod))) => {
                msg_counter += 1;
            }
            _ => {}
        }

        if msg_counter >= podcast_list.len() {
            break;
        }
    }

    if failure {
        bail!("Process finished with errors.");
    }
    println!("Import successful.");

    Ok(())
}

/// Exports all podcasts to OPML format, either printing to stdout or
/// exporting to a file.
pub fn export_to_opml(db_path: &Path, file: &Path) -> Result<()> {
    let db_inst = Database::new(db_path)?;
    let podcast_list = db_inst.get_podcasts()?;
    let opml = export_opml_feeds(&podcast_list);

    let xml = opml.to_string().context("Could not create OPML format")?;

    let mut dst = File::create(file)
        .with_context(|| format!("Could not create output file: {}", file.display()))?;
    dst.write_all(xml.as_bytes()).with_context(|| {
        format!(
            "Could not copy OPML data to output file: {}",
            file.display()
        )
    })?;
    Ok(())
}

/// Import a list of podcast feeds from an OPML file. Supports
/// v1.0, v1.1, and v2.0 OPML files.
fn import_opml_feeds(xml: &str) -> Result<Vec<PodcastFeed>> {
    let opml = OPML::from_str(xml)?;
    let mut feeds = Vec::new();
    for pod in opml.body.outlines {
        if pod.xml_url.is_some() {
            // match against title attribute first -- if this is
            // not set or empty, then match against the text
            // attribute; this must be set, but can be empty
            let title = pod.title.filter(|t| !t.is_empty()).or({
                if pod.text.is_empty() {
                    None
                } else {
                    Some(pod.text)
                }
            });
            feeds.push(PodcastFeed::new(None, &pod.xml_url.unwrap(), title));
        }
    }
    Ok(feeds)
}

/// Converts the current set of podcast feeds to the OPML format
fn export_opml_feeds(podcasts: &[Podcast]) -> OPML {
    let date = Utc::now();
    let mut opml = OPML {
        head: Some(Head {
            title: Some("Termusic Podcast Feeds".to_string()),
            date_created: Some(date.to_rfc2822()),
            ..Head::default()
        }),
        ..Default::default()
    };

    let mut outlines = Vec::new();

    for pod in podcasts {
        // opml.add_feed(&pod.title, &pod.url);
        outlines.push(Outline {
            text: pod.title.clone(),
            r#type: Some("rss".to_string()),
            xml_url: Some(pod.url.clone()),
            title: Some(pod.title.clone()),
            ..Outline::default()
        });
    }

    opml.body = Body { outlines };
    opml
}

/// Enum used to communicate relevant data to the taskpool.
#[derive(Debug, Clone, Eq, PartialEq)]
pub struct EpData {
    pub id: i64,
    pub pod_id: i64,
    pub title: String,
    pub url: String,
    pub pubdate: Option<DateTime<Utc>>,
    pub file_path: Option<PathBuf>,
}

/// This is the function the main controller uses to indicate new files to download.
///
/// It uses the taskpool to start jobs for every episode to be downloaded.
/// New jobs can be requested by the user while there are still ongoing jobs.
///
/// If `tx_to_main` is closed, no errors will be throws and the task will continue
pub fn download_list(
    episodes: Vec<EpData>,
    dest: &Path,
    max_retries: usize,
    tp: &TaskPool,
    tx_to_main: &Sender<Msg>,
) {
    // parse episode details and push to queue
    for ep in episodes {
        let tx = tx_to_main.clone();
        let dest2 = dest.to_path_buf();
        tp.execute(async move {
            let _ = tx.send(Msg::Podcast(PCMsg::DLStart(ep.clone())));
            let result = download_file(ep, dest2, max_retries).await;
            let _ = tx.send(Msg::Podcast(result));
        });
    }
}

/// Downloads a file to a local filepath, returning `DownloadMsg` variant
/// indicating success or failure.
async fn download_file(
    mut ep_data: EpData,
    destination_path: PathBuf,
    mut max_retries: usize,
) -> PCMsg {
    let agent = ClientBuilder::new()
        .connect_timeout(Duration::from_secs(10))
        .build()
        .expect("reqwest client build failed");

    let response: reqwest::Response = loop {
        let response = agent.get(&ep_data.url).send().await;
        if let Ok(resp) = response {
            break resp;
        }
        max_retries -= 1;
        if max_retries == 0 {
            return PCMsg::DLResponseError(ep_data);
        }
    };

    // figure out the file type
    let ext = if let Some(content_type) = response
        .headers()
        .get("content-type")
        .and_then(|v| v.to_str().ok())
    {
        match content_type {
            "audio/x-m4a" | "audio/mp4" => "m4a",
            "audio/x-matroska" => "mka",
            "audio/flac" => "flac",
            "video/quicktime" => "mov",
            "video/mp4" => "mp4",
            "video/x-m4v" => "m4v",
            "video/x-matroska" => "mkv",
            "video/webm" => "webm",
            // "audio/mpeg" => "mp3",
            // fallback
            _ => "mp3",
        }
    } else {
        error!("The response doesn't contain a content type, using \"mp3\" as fallback!");
        "mp3"
    };

    let mut file_name = sanitize_with_options(
        &ep_data.title,
        Options {
            truncate: true,
            windows: true, // for simplicity, we'll just use Windows-friendly paths for everyone
            replacement: "",
        },
    );

    if let Some(pubdate) = ep_data.pubdate {
        file_name = format!("{file_name}_{}", pubdate.format("%Y%m%d_%H%M%S"));
    }

    let mut file_path = destination_path;
    file_path.push(format!("{file_name}.{ext}"));

    let Ok(mut dst) = File::create(&file_path) else {
        return PCMsg::DLFileCreateError(ep_data);
    };

    ep_data.file_path = Some(file_path);

    let Ok(bytes) = response.bytes().await else {
        return PCMsg::DLFileCreateError(ep_data);
    };

    match std::io::copy(&mut bytes.reader(), &mut dst) {
        Ok(_) => PCMsg::DLComplete(ep_data),
        Err(_) => PCMsg::DLFileWriteError(ep_data),
    }
}