use std::lazy::SyncLazy;
use regex::Regex;
use reqwest::Client;
use serde::Deserialize;
use url::Url;
use crate::{Error, Id, IdBuf, PlayerResponse, VideoDescrambler, VideoInfo};
use crate::video_info::player_response::playability_status::PlayabilityStatus;
#[derive(Clone, derive_more::Display, derivative::Derivative)]
#[display(fmt = "VideoFetcher({})", video_id)]
#[derivative(Debug, PartialEq, Eq)]
pub struct VideoFetcher {
video_id: IdBuf,
watch_url: Url,
#[derivative(Debug = "ignore", PartialEq = "ignore")]
client: Client,
}
impl VideoFetcher {
#[inline]
#[cfg(any(feature = "regex", doc))]
#[doc(cfg(feature = "regex"))]
pub fn from_url(url: &Url) -> crate::Result<Self> {
let id = Id::from_raw(url.as_str())?
.into_owned();
Self::from_id(id)
}
#[inline]
pub fn from_id(video_id: IdBuf) -> crate::Result<Self> {
let client = Client::builder()
.cookie_store(true)
.build()?;
Ok(Self::from_id_with_client(video_id, client))
}
#[inline]
pub fn from_id_with_client(video_id: IdBuf, client: Client) -> Self {
Self {
watch_url: video_id.watch_url(),
video_id,
client,
}
}
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
pub async fn fetch(self) -> crate::Result<VideoDescrambler> {
let watch_html = self.get_html(&self.watch_url).await?;
let is_age_restricted = is_age_restricted(&watch_html);
Self::check_availability(&watch_html, is_age_restricted)?;
let (
(js, player_response),
mut video_info
) = tokio::try_join!(
self.get_js(is_age_restricted, &watch_html),
self.get_video_info(is_age_restricted)
)?;
match (&video_info.player_response.streaming_data, player_response) {
(None, Some(player_response)) => video_info.player_response = player_response,
(None, None) => return Err(Error::UnexpectedResponse(
"StreamingData is none and the watch html did not contain a valid PlayerResponse".into()
)),
_ => {}
}
Ok(VideoDescrambler {
video_info,
client: self.client,
js,
})
}
#[inline]
pub fn video_id(&self) -> Id<'_> {
self.video_id.as_borrowed()
}
#[inline]
pub fn watch_url(&self) -> &Url {
&self.watch_url
}
fn check_availability(watch_html: &str, is_age_restricted: bool) -> crate::Result<()> {
static PLAYABILITY_STATUS: SyncLazy<Regex> = SyncLazy::new(||
Regex::new(r#"["']?playabilityStatus["']?\s*[:=]\s*"#).unwrap()
);
let playability_status: PlayabilityStatus = PLAYABILITY_STATUS
.find_iter(watch_html)
.map(|m| json_object(
watch_html
.get(m.end()..)
.ok_or(Error::Internal("The regex does not match meaningful"))?
))
.filter_map(Result::ok)
.map(serde_json::from_str::<PlayabilityStatus>)
.filter_map(Result::ok)
.next()
.ok_or_else(|| Error::UnexpectedResponse(
"watch html did not contain a PlayabilityStatus".into()
))?;
match playability_status {
PlayabilityStatus::Ok { .. } => Ok(()),
PlayabilityStatus::LoginRequired { .. } if is_age_restricted => Ok(()),
ps => Err(Error::VideoUnavailable(ps))
}
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
async fn get_js(
&self,
is_age_restricted: bool,
watch_html: &str,
) -> crate::Result<(String, Option<PlayerResponse>)> {
let (js_url, player_response) = match is_age_restricted {
true => {
let embed_url = self.video_id.embed_url();
let embed_html = self.get_html(&embed_url).await?;
js_url(&embed_html)?
}
false => js_url(watch_html)?
};
self
.get_html(&js_url)
.await
.map(|html| (html, player_response))
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
async fn get_video_info(&self, is_age_restricted: bool) -> crate::Result<VideoInfo> {
let video_info_url = self.get_video_info_url(is_age_restricted);
let video_info_raw = self.get_html(&video_info_url).await?;
let mut video_info = serde_qs::from_str::<VideoInfo>(video_info_raw.as_str())?;
video_info.is_age_restricted = is_age_restricted;
Ok(video_info)
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
fn get_video_info_url(&self, is_age_restricted: bool) -> Url {
if is_age_restricted {
video_info_url_age_restricted(
self.video_id.as_borrowed(),
&self.watch_url,
)
} else {
video_info_url(
self.video_id.as_borrowed(),
&self.watch_url,
)
}
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
async fn get_html(&self, url: &Url) -> crate::Result<String> {
Ok(
self.client
.get(url.as_str())
.send()
.await?
.text()
.await?
)
}
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
fn is_age_restricted(watch_html: &str) -> bool {
static PATTERN: SyncLazy<Regex> = SyncLazy::new(|| Regex::new("og:restrictions:age").unwrap());
PATTERN.is_match(watch_html)
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
fn video_info_url(video_id: Id<'_>, watch_url: &Url) -> Url {
let params: &[(&str, &str)] = &[
("video_id", video_id.as_str()),
("ps", "default"),
("eurl", watch_url.as_str()),
("hl", "en_US")
];
_video_info_url(params)
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
fn video_info_url_age_restricted(video_id: Id<'_>, watch_url: &Url) -> Url {
static PATTERN: SyncLazy<Regex> = SyncLazy::new(|| Regex::new(r#""sts"\s*:\s*(\d+)"#).unwrap());
let sts = match PATTERN.captures(watch_url.as_str()) {
Some(c) => c.get(1).unwrap().as_str(),
None => ""
};
let eurl = format!("https://youtube.googleapis.com/v/{}", video_id.as_str());
let params: &[(&str, &str)] = &[
("video_id", video_id.as_str()),
("eurl", &eurl),
("sts", sts)
];
_video_info_url(params)
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
fn _video_info_url(params: &[(&str, &str)]) -> Url {
Url::parse_with_params(
"https://youtube.com/get_video_info?",
params,
).unwrap()
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
fn js_url(html: &str) -> crate::Result<(Url, Option<PlayerResponse>)> {
let player_response = get_ytplayer_config(html);
let base_js = match player_response {
Ok(PlayerResponse { assets: Some(ref assets), .. }) => assets.js.as_str(),
_ => get_ytplayer_js(html)?
};
Ok((Url::parse(&format!("https://youtube.com{}", base_js))?, player_response.ok()))
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
fn get_ytplayer_config(html: &str) -> crate::Result<PlayerResponse> {
static CONFIG_PATTERNS: SyncLazy<[Regex; 3]> = SyncLazy::new(|| [
Regex::new(r"ytplayer\.config\s*=\s*").unwrap(),
Regex::new(r"ytInitialPlayerResponse\s*=\s*").unwrap(),
Regex::new(r#"yt\.setConfig\(.*['"]PLAYER_CONFIG['"]:\s*"#).unwrap()
]);
CONFIG_PATTERNS
.iter()
.find_map(|pattern| {
let json = parse_for_object(html, pattern).ok()?;
deserialize_ytplayer_config(json).ok()
})
.ok_or_else(|| Error::UnexpectedResponse(
"Could not find ytplayer_config in the watch html.".into()
))
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
fn parse_for_object<'a>(html: &'a str, regex: &Regex) -> crate::Result<&'a str> {
let json_obj_start = regex
.find(html)
.ok_or(Error::Internal("The regex does not match"))?
.end();
Ok(json_object(
html
.get(json_obj_start..)
.ok_or(Error::Internal("The regex does not match meaningful"))?
)?)
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
fn deserialize_ytplayer_config(json: &str) -> crate::Result<PlayerResponse> {
#[derive(Deserialize)]
struct Args { player_response: PlayerResponse }
#[derive(Deserialize)]
#[serde(untagged)]
enum PlayerConfig { Args { args: Args }, Response(PlayerResponse) }
Ok(
match serde_json::from_str::<PlayerConfig>(json)? {
PlayerConfig::Args { args } => args.player_response,
PlayerConfig::Response(pr) => pr
}
)
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
fn get_ytplayer_js(html: &str) -> crate::Result<&str> {
static JS_URL_PATTERNS: SyncLazy<Regex> = SyncLazy::new(||
Regex::new(r"(/s/player/[\w\d]+/[\w\d_/.]+/base\.js)").unwrap()
);
match JS_URL_PATTERNS.captures(html) {
Some(function_match) => Ok(function_match.get(1).unwrap().as_str()),
None => Err(Error::UnexpectedResponse(format!(
"could not extract the ytplayer-javascript url from the watch html",
).into()))
}
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
fn json_object(mut html: &str) -> crate::Result<&str> {
html = html.trim_start_matches(|c| c != '{');
if html.is_empty() {
return Err(Error::Internal("cannot parse a json object from an empty string"));
}
let mut stack = vec![b'{'];
let mut skip = false;
let (i, _c) = html
.as_bytes()
.iter()
.enumerate()
.skip(1)
.find(
|(_i, &curr_char)| is_json_object_end(curr_char, &mut skip, &mut stack)
)
.ok_or(Error::Internal("could not find a closing delimiter"))?;
let full_obj = html
.get(..=i)
.expect("i must always mark the position of a valid '}' char");
Ok(full_obj)
}
#[inline]
#[cfg(any(feature = "fetch", doc))]
#[doc(cfg(feature = "fetch"))]
fn is_json_object_end(curr_char: u8, skip: &mut bool, stack: &mut Vec<u8>) -> bool {
if *skip {
*skip = false;
return false;
}
let context = *stack
.last()
.expect("stack must start with len == 1, and search must end, when len == 0");
match curr_char {
b'}' if context == b'{' => { stack.pop(); }
b']' if context == b'[' => { stack.pop(); }
b'"' if context == b'"' => { stack.pop(); }
b'\\' if context == b'"' => { *skip = true; }
b'{' if context != b'"' => stack.push(b'{'),
b'[' if context != b'"' => stack.push(b'['),
b'"' if context != b'"' => stack.push(b'"'),
_ => {}
}
stack.is_empty()
}