use regex::Regex;
use reqwest::header;
use reqwest::{Client, StatusCode, Url};
use crate::errors::{CouldNotRetrieveTranscript, CouldNotRetrieveTranscriptReason};
pub const WATCH_URL: &str = "https://www.youtube.com/watch?v={video_id}";
pub struct YoutubePageFetcher {
client: Client,
}
impl YoutubePageFetcher {
pub fn new(client: Client) -> Self {
Self { client }
}
pub async fn fetch_video_page(
&self,
video_id: &str,
) -> Result<String, CouldNotRetrieveTranscript> {
let url = WATCH_URL.replace("{video_id}", video_id);
let response = self
.client
.get(&url)
.header("Accept-Language", "en-US")
.send()
.await
.map_err(|e| {
let mut error = CouldNotRetrieveTranscript {
video_id: video_id.to_string(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
e.to_string(),
)),
};
if let Some(_status @ (StatusCode::FORBIDDEN | StatusCode::TOO_MANY_REQUESTS)) =
e.status()
{
error = CouldNotRetrieveTranscript {
video_id: video_id.to_string(),
reason: Some(CouldNotRetrieveTranscriptReason::IpBlocked(None)),
};
}
error
})?;
if !response.status().is_success() {
return Err(CouldNotRetrieveTranscript {
video_id: video_id.to_string(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
format!("YouTube returned status code: {}", response.status()),
)),
});
}
let html = response
.text()
.await
.map_err(|e| CouldNotRetrieveTranscript {
video_id: video_id.to_string(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
e.to_string(),
)),
})?;
if html.contains("action=\"https://consent.youtube.com/s\"") {
self.create_consent_cookie(&html, video_id).await?;
let consent_response = self
.client
.get(&url)
.header("Accept-Language", "en-US")
.send()
.await
.map_err(|e| CouldNotRetrieveTranscript {
video_id: video_id.to_string(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
e.to_string(),
)),
})?;
let html_with_consent =
consent_response
.text()
.await
.map_err(|e| CouldNotRetrieveTranscript {
video_id: video_id.to_string(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
e.to_string(),
)),
})?;
if html_with_consent.contains("action=\"https://consent.youtube.com/s\"") {
return Err(CouldNotRetrieveTranscript {
video_id: video_id.to_string(),
reason: Some(CouldNotRetrieveTranscriptReason::FailedToCreateConsentCookie),
});
}
Ok(html_with_consent)
} else {
Ok(html)
}
}
async fn create_consent_cookie(
&self,
html: &str,
video_id: &str,
) -> Result<(), CouldNotRetrieveTranscript> {
let re = Regex::new(r#"name="v" value="([^"]+)"#).unwrap();
if let Some(caps) = re.captures(html) {
if let Some(v_value) = caps.get(1) {
let cookie_value = format!("YES+{}", v_value.as_str());
let cookie_url = Url::parse("https://www.youtube.com").unwrap();
let cookie_str = format!(
"CONSENT={}; Domain=.youtube.com; Path=/; Max-Age=31536000",
cookie_value
);
self.client
.get(cookie_url)
.header(header::COOKIE, &cookie_str)
.send()
.await
.map_err(|_| CouldNotRetrieveTranscript {
video_id: video_id.to_string(),
reason: Some(CouldNotRetrieveTranscriptReason::FailedToCreateConsentCookie),
})?;
return Ok(());
}
}
Err(CouldNotRetrieveTranscript {
video_id: video_id.to_string(),
reason: Some(CouldNotRetrieveTranscriptReason::FailedToCreateConsentCookie),
})
}
}