use crate::utils::{download, selectors::SCRIPT};
use scraper::Html;
use serde_json::Value;
use thiserror::Error;
#[derive(Debug, Error)]
pub enum ScrapeYoutubePlaylistError {
#[error("{0}")]
ReqwestError(#[from] reqwest::Error),
#[error("selector parse error")]
DeserializeError(#[from] serde_json::Error),
#[error("missing valid `ytInitialData` script")]
MissingScript,
}
#[derive(Debug, Clone, Default)]
pub struct Playlist {
pub title: String,
pub artist: String,
pub thumbnail: String,
pub tracks: Vec<PlaylistItem>,
}
impl Playlist {
#[must_use]
pub fn len(&self) -> usize {
self.tracks.len()
}
#[must_use]
pub fn is_empty(&self) -> bool {
self.tracks.is_empty()
}
}
#[derive(Debug, Clone, Default)]
pub struct PlaylistItem {
pub title: Option<String>,
pub id: Option<String>,
}
fn extract_playlist_data(json: &Value) -> Option<&Value> {
json.get("contents")?
.get("twoColumnBrowseResultsRenderer")?
.get("tabs")?
.get(0)?
.get("tabRenderer")?
.get("content")?
.get("sectionListRenderer")?
.get("contents")?
.get(0)?
.get("itemSectionRenderer")?
.get("contents")?
.get(0)?
.get("playlistVideoListRenderer")?
.get("contents")
}
fn extract_playlist_item(extracted_json: &Value) -> PlaylistItem {
fn extract_title(j: &Value) -> Option<String> {
if let Value::String(title) = j.get("title")?.get("runs")?.get(0)?.get("text")? {
Some(title.clone())
} else {
None
}
}
fn extract_id(j: &Value) -> Option<String> {
if let Some(Value::String(id)) = j.get("videoId") {
Some(id.clone())
} else {
None
}
}
if let Some(extracted_json) = extracted_json.get("playlistVideoRenderer") {
PlaylistItem {
title: extract_title(extracted_json),
id: extract_id(extracted_json),
}
} else {
PlaylistItem::default()
}
}
fn extract_title(json: &Value) -> &str {
fn remove_title_noise(raw_title: &str) -> &str {
raw_title.strip_prefix("Album – ").unwrap_or(raw_title)
}
fn extract_title_opt(json: &Value) -> Option<&str> {
json.get("header")?
.get("playlistHeaderRenderer")?
.get("title")?
.get("simpleText")?
.as_str()
.map(remove_title_noise)
}
extract_title_opt(json).unwrap_or_default()
}
fn extract_thumbnail(_json: &Value) -> &str {
""
}
fn extract_artist(json: &Value) -> &str {
fn extract_artist_opt(json: &Value) -> Option<&str> {
extract_playlist_data(json)?
.as_array()?
.first()?
.get("playlistVideoRenderer")?
.get("shortBylineText")?
.get("runs")?
.as_array()?
.first()?
.get("text")?
.as_str()
}
extract_artist_opt(json).unwrap_or_default()
}
pub fn scrape_playlist(url: &str) -> Result<Playlist, ScrapeYoutubePlaylistError> {
let resp = download(url)?.text()?;
let doc = Html::parse_document(&resp);
for script in doc.select(&SCRIPT) {
let inner = script.inner_html();
if let Some(Ok(json)) = inner
.strip_prefix("var ytInitialData = ")
.and_then(|s| {
if s.ends_with(';') {
s.strip_suffix(';')
} else {
Some(s)
}
})
.map(serde_json::from_str::<Value>)
{
if let Some(Value::Array(tracks)) = extract_playlist_data(&json) {
return Ok(Playlist {
title: extract_title(&json).to_string(),
artist: extract_artist(&json).to_string(),
thumbnail: extract_thumbnail(&json).to_string(),
tracks: tracks.iter().map(extract_playlist_item).collect(),
});
}
}
}
Err(ScrapeYoutubePlaylistError::MissingScript)
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn basic_a() {
let playlist = scrape_playlist(
r#"https://www.youtube.com/playlist?list=OLAK5uy_mZcxjzRvOZAUa2H6Pf8LVvyLDGeBSdmJQ"#,
)
.unwrap();
assert_eq!(playlist.tracks.len(), 6);
for track in playlist.tracks {
assert_ne!(track.title, None);
assert_ne!(track.id, None);
}
}
}