use crate::client::FetchOptions;
use crate::error::FetchError;
use crate::fetchers::Fetcher;
use crate::types::{FetchRequest, FetchResponse};
use crate::DEFAULT_USER_AGENT;
use async_trait::async_trait;
use reqwest::header::{HeaderValue, USER_AGENT};
use serde::Deserialize;
use std::time::Duration;
use url::Url;
const API_TIMEOUT: Duration = Duration::from_secs(10);
pub struct YouTubeFetcher;
impl YouTubeFetcher {
pub fn new() -> Self {
Self
}
fn parse_video_id(url: &Url) -> Option<String> {
let host = url.host_str()?;
match host {
"youtube.com" | "www.youtube.com" | "m.youtube.com" => {
let segments: Vec<&str> =
url.path_segments().map(|s| s.collect()).unwrap_or_default();
if segments.first() != Some(&"watch") {
return None;
}
url.query_pairs()
.find(|(k, _)| k == "v")
.map(|(_, v)| v.to_string())
.filter(|v| !v.is_empty())
}
"youtu.be" => {
let segments: Vec<&str> =
url.path_segments().map(|s| s.collect()).unwrap_or_default();
segments
.first()
.filter(|s| !s.is_empty())
.map(|s| s.to_string())
}
_ => None,
}
}
}
impl Default for YouTubeFetcher {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Deserialize)]
struct OEmbedResponse {
title: Option<String>,
author_name: Option<String>,
author_url: Option<String>,
}
#[async_trait]
impl Fetcher for YouTubeFetcher {
fn name(&self) -> &'static str {
"youtube"
}
fn matches(&self, url: &Url) -> bool {
Self::parse_video_id(url).is_some()
}
async fn fetch(
&self,
request: &FetchRequest,
options: &FetchOptions,
) -> Result<FetchResponse, FetchError> {
let url = Url::parse(&request.url).map_err(|_| FetchError::InvalidUrlScheme)?;
let video_id = Self::parse_video_id(&url)
.ok_or_else(|| FetchError::FetcherError("Not a valid YouTube URL".to_string()))?;
let user_agent = options.user_agent.as_deref().unwrap_or(DEFAULT_USER_AGENT);
let mut client_builder = reqwest::Client::builder()
.connect_timeout(API_TIMEOUT)
.timeout(API_TIMEOUT)
.redirect(reqwest::redirect::Policy::limited(3));
if !options.respect_proxy_env {
client_builder = client_builder.no_proxy();
}
let client = client_builder
.build()
.map_err(FetchError::ClientBuildError)?;
let ua_header = HeaderValue::from_str(user_agent)
.unwrap_or_else(|_| HeaderValue::from_static(DEFAULT_USER_AGENT));
let canonical_url = format!("https://www.youtube.com/watch?v={}", video_id);
let mut oembed = Url::parse("https://www.youtube.com/oembed").unwrap();
oembed
.query_pairs_mut()
.append_pair("url", &canonical_url)
.append_pair("format", "json");
let oembed_url = oembed.to_string();
let oembed = match client
.get(&oembed_url)
.header(USER_AGENT, ua_header.clone())
.send()
.await
{
Ok(resp) if resp.status().is_success() => resp.json::<OEmbedResponse>().await.ok(),
_ => None,
};
let title = oembed
.as_ref()
.and_then(|o| o.title.clone())
.unwrap_or_else(|| format!("YouTube Video {}", video_id));
let author = oembed.as_ref().and_then(|o| o.author_name.clone());
let author_url = oembed.as_ref().and_then(|o| o.author_url.clone());
let mut out = String::new();
out.push_str(&format!("# {}\n\n", title));
out.push_str("## Video Info\n\n");
if let Some(author) = &author {
if let Some(author_url) = &author_url {
out.push_str(&format!("- **Channel:** [{}]({})\n", author, author_url));
} else {
out.push_str(&format!("- **Channel:** {}\n", author));
}
}
out.push_str(&format!("- **Video ID:** {}\n", video_id));
out.push_str(&format!("- **URL:** {}\n", canonical_url));
out.push_str(&format!(
"- **Thumbnail:** https://img.youtube.com/vi/{}/maxresdefault.jpg\n",
video_id
));
Ok(FetchResponse {
url: request.url.clone(),
status_code: 200,
content_type: Some("text/markdown".to_string()),
format: Some("youtube_video".to_string()),
content: Some(out),
..Default::default()
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parse_youtube_watch() {
let url = Url::parse("https://www.youtube.com/watch?v=dQw4w9WgXcQ").unwrap();
assert_eq!(
YouTubeFetcher::parse_video_id(&url),
Some("dQw4w9WgXcQ".to_string())
);
}
#[test]
fn test_parse_youtu_be() {
let url = Url::parse("https://youtu.be/dQw4w9WgXcQ").unwrap();
assert_eq!(
YouTubeFetcher::parse_video_id(&url),
Some("dQw4w9WgXcQ".to_string())
);
}
#[test]
fn test_parse_youtube_no_www() {
let url = Url::parse("https://youtube.com/watch?v=abc123").unwrap();
assert_eq!(
YouTubeFetcher::parse_video_id(&url),
Some("abc123".to_string())
);
}
#[test]
fn test_rejects_non_watch() {
let url = Url::parse("https://www.youtube.com/channel/UC123").unwrap();
assert_eq!(YouTubeFetcher::parse_video_id(&url), None);
}
#[test]
fn test_rejects_no_v_param() {
let url = Url::parse("https://www.youtube.com/watch?list=PL123").unwrap();
assert_eq!(YouTubeFetcher::parse_video_id(&url), None);
}
#[test]
fn test_rejects_non_youtube() {
let url = Url::parse("https://vimeo.com/123456").unwrap();
assert_eq!(YouTubeFetcher::parse_video_id(&url), None);
}
#[test]
fn test_fetcher_matches() {
let fetcher = YouTubeFetcher::new();
let url = Url::parse("https://www.youtube.com/watch?v=abc").unwrap();
assert!(fetcher.matches(&url));
let url = Url::parse("https://youtu.be/abc").unwrap();
assert!(fetcher.matches(&url));
let url = Url::parse("https://example.com/watch?v=abc").unwrap();
assert!(!fetcher.matches(&url));
}
}