omni_dev/transcript/sources/
youtube.rs1use std::time::Duration;
11
12use async_trait::async_trait;
13
14use crate::transcript::error::Result;
15use crate::transcript::source::{FetchOpts, LanguageInfo, MediaInfo, Transcript, TranscriptSource};
16
17pub mod innertube;
18pub mod player_response;
19pub mod timedtext;
20pub mod url;
21pub mod watch_page;
22
23pub use player_response::{
24 check_playability, extract_media_info, list_languages, parse as parse_player_response,
25 select_track, CaptionTrack, PlayerResponse, SelectedTrack,
26};
27pub use timedtext::parse as parse_timedtext;
28pub use url::extract_video_id;
29
30const DEFAULT_BASE_URL: &str = "https://www.youtube.com";
33
34const REQUEST_TIMEOUT: Duration = Duration::from_secs(30);
37
38const USER_AGENT: &str = "com.google.android.apps.youtube.vr.oculus/1.62.27 \
47 (Linux; U; Android 12; Quest 3) gzip";
48
49pub fn matches_url(input: &str) -> bool {
54 extract_video_id(input).is_ok()
55}
56
57#[derive(Debug, Clone)]
68pub struct Youtube {
69 http: reqwest::Client,
70 base_url: String,
71 visitor_data: tokio::sync::OnceCell<String>,
72}
73
74impl Youtube {
75 pub fn new() -> Result<Self> {
78 let http = reqwest::Client::builder()
79 .timeout(REQUEST_TIMEOUT)
80 .user_agent(USER_AGENT)
81 .build()?;
82 Ok(Self {
83 http,
84 base_url: DEFAULT_BASE_URL.to_string(),
85 visitor_data: tokio::sync::OnceCell::new(),
86 })
87 }
88
89 pub fn with_base_url(base_url: impl Into<String>) -> Result<Self> {
94 let http = reqwest::Client::builder()
95 .timeout(REQUEST_TIMEOUT)
96 .user_agent(USER_AGENT)
97 .build()?;
98 Ok(Self {
99 http,
100 base_url: base_url.into(),
101 visitor_data: tokio::sync::OnceCell::new(),
102 })
103 }
104
105 async fn visitor_data(&self) -> Result<&str> {
109 self.visitor_data
110 .get_or_try_init(|| watch_page::fetch_visitor_data(&self.http, &self.base_url))
111 .await
112 .map(String::as_str)
113 }
114
115 async fn load_player_response(&self, locator: &str) -> Result<PlayerResponse> {
122 let video_id = extract_video_id(locator)?;
123 let visitor_data = self.visitor_data().await?;
124 let raw =
125 innertube::fetch_player_response(&self.http, &self.base_url, &video_id, visitor_data)
126 .await?;
127 let response = parse_player_response(&raw)?;
128 check_playability(&response)?;
129 Ok(response)
130 }
131}
132
133#[async_trait]
134impl TranscriptSource for Youtube {
135 fn name(&self) -> &'static str {
136 "youtube"
137 }
138
139 fn matches(url: &str) -> bool {
140 matches_url(url)
141 }
142
143 async fn fetch(&self, locator: &str, opts: &FetchOpts) -> Result<Transcript> {
144 let response = self.load_player_response(locator).await?;
145 let selected = select_track(&response, opts)?;
146 let body = timedtext::fetch(&self.http, &selected.fetch_url).await?;
147 let cues = timedtext::parse(&body)?;
148 let locator_id = response
149 .video_details
150 .as_ref()
151 .map(|d| d.video_id.clone())
152 .unwrap_or_default();
153 Ok(Transcript {
154 source: self.name().to_string(),
155 locator_id,
156 language: selected.language.clone(),
157 kind: selected.kind,
158 cues,
159 })
160 }
161
162 async fn list_languages(&self, locator: &str) -> Result<Vec<LanguageInfo>> {
163 let response = self.load_player_response(locator).await?;
164 Ok(list_languages(&response))
165 }
166
167 async fn info(&self, locator: &str) -> Result<MediaInfo> {
168 let response = self.load_player_response(locator).await?;
169 Ok(extract_media_info(&response))
170 }
171}
172
173#[cfg(test)]
174#[allow(clippy::unwrap_used, clippy::expect_used)]
175mod tests {
176 use super::*;
189 use crate::transcript::error::TranscriptError;
190 use crate::transcript::format::srt;
191 use crate::transcript::source::{FetchOpts, TrackKind};
192 use serde_json::Value;
193 use wiremock::matchers::{method, path};
194 use wiremock::{Mock, MockServer, ResponseTemplate};
195
196 const PLAYER_RESPONSE: &str = include_str!("youtube/fixtures/player_response_basic.json");
197 const PLAYER_RESPONSE_AGE_GATED: &str =
198 include_str!("youtube/fixtures/player_response_age_gated.json");
199 const TIMEDTEXT: &str = include_str!("youtube/fixtures/timedtext_basic.json");
200 const EXPECTED_SRT: &str = include_str!("youtube/fixtures/expected_basic.srt");
201
202 const VIDEO_ID: &str = "dQw4w9WgXcQ";
203
204 #[test]
207 fn matches_url_accepts_canonical_forms() {
208 assert!(matches_url("https://www.youtube.com/watch?v=dQw4w9WgXcQ"));
209 assert!(matches_url("https://youtu.be/dQw4w9WgXcQ"));
210 }
211
212 #[test]
213 fn matches_url_rejects_other_hosts() {
214 assert!(!matches_url("https://vimeo.com/123456"));
215 assert!(!matches_url("not a url"));
216 }
217
218 #[test]
219 fn matches_url_accepts_bare_video_id() {
220 assert!(matches_url(VIDEO_ID));
221 }
222
223 #[test]
224 fn end_to_end_player_response_to_srt() {
225 let response = parse_player_response(PLAYER_RESPONSE).unwrap();
226 check_playability(&response).unwrap();
227
228 let opts = FetchOpts::new("en-US");
229 let selected = select_track(&response, &opts).unwrap();
230 assert_eq!(selected.kind, TrackKind::Manual);
231 assert_eq!(selected.language, "en-US");
232
233 let cues = parse_timedtext(TIMEDTEXT).unwrap();
234 assert_eq!(cues.len(), 3);
235
236 let video_id = response
237 .video_details
238 .as_ref()
239 .map(|d| d.video_id.clone())
240 .unwrap_or_default();
241 let transcript = Transcript {
242 source: "youtube".to_string(),
243 locator_id: video_id,
244 language: selected.language.clone(),
245 kind: selected.kind,
246 cues,
247 };
248 let rendered = srt::render(&transcript.cues);
249 assert_eq!(rendered, EXPECTED_SRT);
250 }
251
252 #[test]
253 fn end_to_end_translation_path_picks_target_language() {
254 let response = parse_player_response(PLAYER_RESPONSE).unwrap();
255 let mut opts = FetchOpts::new("ja");
256 opts.translate_to = Some("fr".into());
257 let selected = select_track(&response, &opts).unwrap();
258 assert_eq!(selected.kind, TrackKind::Translated);
259 assert_eq!(selected.language, "fr");
260 assert!(selected.fetch_url.contains("tlang=fr"));
261 }
262
263 fn fixture_with_rewritten_caption_urls(mock_uri: &str) -> String {
270 let mut value: Value = serde_json::from_str(PLAYER_RESPONSE).unwrap();
271 let tracks = value["captions"]["playerCaptionsTracklistRenderer"]["captionTracks"]
272 .as_array_mut()
273 .unwrap();
274 for track in tracks {
275 let lang = track["languageCode"].as_str().unwrap().to_string();
276 track["baseUrl"] = Value::String(format!("{mock_uri}/api/timedtext?lang={lang}"));
277 }
278 serde_json::to_string(&value).unwrap()
279 }
280
281 const WATCH_PAGE: &str = include_str!("youtube/fixtures/watch_page_with_visitor_data.html");
286
287 async fn mount_watch_page(server: &MockServer) {
292 Mock::given(method("GET"))
293 .and(path("/watch"))
294 .respond_with(ResponseTemplate::new(200).set_body_string(WATCH_PAGE))
295 .mount(server)
296 .await;
297 }
298
299 async fn mock_server_with_basic_video() -> MockServer {
300 let server = MockServer::start().await;
301 let player_response = fixture_with_rewritten_caption_urls(&server.uri());
302
303 mount_watch_page(&server).await;
304
305 Mock::given(method("POST"))
306 .and(path(innertube::PLAYER_PATH))
307 .respond_with(ResponseTemplate::new(200).set_body_string(player_response))
308 .mount(&server)
309 .await;
310
311 Mock::given(method("GET"))
312 .and(path("/api/timedtext"))
313 .respond_with(ResponseTemplate::new(200).set_body_string(TIMEDTEXT))
314 .mount(&server)
315 .await;
316
317 server
318 }
319
320 #[tokio::test]
321 async fn fetch_returns_transcript_assembled_from_both_endpoints() {
322 let server = mock_server_with_basic_video().await;
323 let yt = Youtube::with_base_url(server.uri()).unwrap();
324 let opts = FetchOpts::new("en-US");
325
326 let transcript = yt
327 .fetch(
328 &format!("https://www.youtube.com/watch?v={VIDEO_ID}"),
329 &opts,
330 )
331 .await
332 .unwrap();
333
334 assert_eq!(transcript.source, "youtube");
335 assert_eq!(transcript.locator_id, VIDEO_ID);
336 assert_eq!(transcript.language, "en-US");
337 assert_eq!(transcript.kind, TrackKind::Manual);
338 assert_eq!(transcript.cues.len(), 3);
339 assert_eq!(srt::render(&transcript.cues), EXPECTED_SRT);
342 }
343
344 #[tokio::test]
345 async fn fetch_accepts_bare_video_id_as_locator() {
346 let server = mock_server_with_basic_video().await;
347 let yt = Youtube::with_base_url(server.uri()).unwrap();
348 let opts = FetchOpts::new("en-US");
349
350 let transcript = yt.fetch(VIDEO_ID, &opts).await.unwrap();
351 assert_eq!(transcript.locator_id, VIDEO_ID);
352 }
353
354 #[tokio::test]
355 async fn fetch_propagates_language_not_found() {
356 let server = mock_server_with_basic_video().await;
357 let yt = Youtube::with_base_url(server.uri()).unwrap();
358 let opts = FetchOpts::new("zz");
359
360 let err = yt.fetch(VIDEO_ID, &opts).await.unwrap_err();
361 assert!(matches!(err, TranscriptError::LanguageNotFound { .. }));
362 }
363
364 #[tokio::test]
365 async fn fetch_surfaces_age_gated_as_playability_refused() {
366 let server = MockServer::start().await;
367 mount_watch_page(&server).await;
368 Mock::given(method("POST"))
369 .and(path(innertube::PLAYER_PATH))
370 .respond_with(ResponseTemplate::new(200).set_body_string(PLAYER_RESPONSE_AGE_GATED))
371 .mount(&server)
372 .await;
373
374 let yt = Youtube::with_base_url(server.uri()).unwrap();
375 let err = yt.fetch(VIDEO_ID, &FetchOpts::new("en")).await.unwrap_err();
376 match err {
377 TranscriptError::PlayabilityRefused { status, .. } => {
378 assert_eq!(status, "LOGIN_REQUIRED");
379 }
380 other => panic!("wrong variant: {other:?}"),
381 }
382 }
383
384 #[tokio::test]
385 async fn fetch_invalid_locator_short_circuits_before_http() {
386 let yt = Youtube::with_base_url("http://127.0.0.1:1").unwrap();
388 let err = yt
389 .fetch("not-a-url", &FetchOpts::new("en"))
390 .await
391 .unwrap_err();
392 assert!(matches!(err, TranscriptError::InvalidLocator(_)));
393 }
394
395 #[tokio::test]
396 async fn fetch_surfaces_innertube_500_as_http_error() {
397 let server = MockServer::start().await;
398 mount_watch_page(&server).await;
399 Mock::given(method("POST"))
400 .and(path(innertube::PLAYER_PATH))
401 .respond_with(ResponseTemplate::new(500))
402 .mount(&server)
403 .await;
404
405 let yt = Youtube::with_base_url(server.uri()).unwrap();
406 let err = yt.fetch(VIDEO_ID, &FetchOpts::new("en")).await.unwrap_err();
407 assert!(matches!(err, TranscriptError::Http(_)));
408 }
409
410 #[tokio::test]
411 async fn list_languages_projects_caption_tracks() {
412 let server = mock_server_with_basic_video().await;
413 let yt = Youtube::with_base_url(server.uri()).unwrap();
414
415 let langs = yt.list_languages(VIDEO_ID).await.unwrap();
416 let codes: Vec<_> = langs.iter().map(|l| l.code.as_str()).collect();
417 assert!(codes.contains(&"en-US"));
418 assert!(codes.contains(&"es"));
419 assert!(codes.contains(&"en"));
420 }
421
422 #[tokio::test]
423 async fn info_returns_video_metadata() {
424 let server = mock_server_with_basic_video().await;
425 let yt = Youtube::with_base_url(server.uri()).unwrap();
426
427 let info = yt.info(VIDEO_ID).await.unwrap();
428 assert_eq!(info.source, "youtube");
429 assert_eq!(info.locator_id, VIDEO_ID);
430 assert_eq!(info.title, "Sample Video");
431 assert_eq!(info.duration_ms, Some(212_000));
432 assert_eq!(info.languages.len(), 3);
433 }
434
435 #[tokio::test]
436 async fn matches_static_dispatch_through_trait() {
437 assert!(<Youtube as TranscriptSource>::matches(
439 "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
440 ));
441 assert!(!<Youtube as TranscriptSource>::matches(
442 "https://vimeo.com/1"
443 ));
444 }
445
446 #[tokio::test]
447 async fn name_is_lowercase_youtube() {
448 let server = mock_server_with_basic_video().await;
449 let yt = Youtube::with_base_url(server.uri()).unwrap();
450 assert_eq!(yt.name(), "youtube");
451 }
452
453 #[test]
454 fn new_constructs_default_client() {
455 let yt = Youtube::new().unwrap();
458 assert_eq!(yt.base_url, DEFAULT_BASE_URL);
459 }
460
461 #[tokio::test]
462 async fn fetch_threads_visitor_data_into_innertube_body() {
463 const EXPECTED_TOKEN: &str = "CgtkUTQyOFR3aV9NSSjFoYvBBjIKCgJVUxIEGgAgPg%3D%3D";
468
469 let server = MockServer::start().await;
470 mount_watch_page(&server).await;
471 let player_response = fixture_with_rewritten_caption_urls(&server.uri());
472
473 Mock::given(method("POST"))
474 .and(path(innertube::PLAYER_PATH))
475 .respond_with(move |req: &wiremock::Request| {
476 let parsed: Value = serde_json::from_slice(&req.body).unwrap();
477 assert_eq!(
478 parsed["context"]["client"]["visitorData"],
479 Value::String(EXPECTED_TOKEN.to_string()),
480 );
481 ResponseTemplate::new(200).set_body_string(player_response.clone())
482 })
483 .expect(1)
484 .mount(&server)
485 .await;
486
487 Mock::given(method("GET"))
488 .and(path("/api/timedtext"))
489 .respond_with(ResponseTemplate::new(200).set_body_string(TIMEDTEXT))
490 .mount(&server)
491 .await;
492
493 let yt = Youtube::with_base_url(server.uri()).unwrap();
494 let _ = yt.fetch(VIDEO_ID, &FetchOpts::new("en-US")).await.unwrap();
495 }
496
497 #[tokio::test]
498 async fn visitor_data_fetched_only_once_for_repeated_calls() {
499 let server = MockServer::start().await;
502 let player_response = fixture_with_rewritten_caption_urls(&server.uri());
503
504 Mock::given(method("GET"))
505 .and(path("/watch"))
506 .respond_with(ResponseTemplate::new(200).set_body_string(WATCH_PAGE))
507 .expect(1)
508 .mount(&server)
509 .await;
510
511 Mock::given(method("POST"))
512 .and(path(innertube::PLAYER_PATH))
513 .respond_with(ResponseTemplate::new(200).set_body_string(player_response))
514 .mount(&server)
515 .await;
516
517 Mock::given(method("GET"))
518 .and(path("/api/timedtext"))
519 .respond_with(ResponseTemplate::new(200).set_body_string(TIMEDTEXT))
520 .mount(&server)
521 .await;
522
523 let yt = Youtube::with_base_url(server.uri()).unwrap();
524 let _ = yt.fetch(VIDEO_ID, &FetchOpts::new("en-US")).await.unwrap();
525 let _ = yt.fetch(VIDEO_ID, &FetchOpts::new("en-US")).await.unwrap();
526 }
528
529 #[tokio::test]
530 async fn visitor_data_fetched_only_once_under_concurrency() {
531 let server = MockServer::start().await;
536 let player_response = fixture_with_rewritten_caption_urls(&server.uri());
537
538 Mock::given(method("GET"))
539 .and(path("/watch"))
540 .respond_with(ResponseTemplate::new(200).set_body_string(WATCH_PAGE))
541 .expect(1)
542 .mount(&server)
543 .await;
544
545 Mock::given(method("POST"))
546 .and(path(innertube::PLAYER_PATH))
547 .respond_with(ResponseTemplate::new(200).set_body_string(player_response))
548 .mount(&server)
549 .await;
550
551 Mock::given(method("GET"))
552 .and(path("/api/timedtext"))
553 .respond_with(ResponseTemplate::new(200).set_body_string(TIMEDTEXT))
554 .mount(&server)
555 .await;
556
557 let yt = Youtube::with_base_url(server.uri()).unwrap();
558 let opts = FetchOpts::new("en-US");
559 let (a, b, c) = tokio::join!(
560 yt.fetch(VIDEO_ID, &opts),
561 yt.fetch(VIDEO_ID, &opts),
562 yt.fetch(VIDEO_ID, &opts),
563 );
564 a.unwrap();
565 b.unwrap();
566 c.unwrap();
567 }
569
570 #[tokio::test]
571 async fn fetch_surfaces_missing_visitor_data_as_typed_error() {
572 let server = MockServer::start().await;
576 Mock::given(method("GET"))
577 .and(path("/watch"))
578 .respond_with(
579 ResponseTemplate::new(200).set_body_string("<html><body>no token</body></html>"),
580 )
581 .mount(&server)
582 .await;
583
584 let yt = Youtube::with_base_url(server.uri()).unwrap();
585 let err = yt.fetch(VIDEO_ID, &FetchOpts::new("en")).await.unwrap_err();
586 assert!(matches!(err, TranscriptError::MissingVisitorData { .. }));
587 }
588
589 #[tokio::test]
590 async fn fetch_surfaces_malformed_innertube_json_as_parse_error() {
591 let server = MockServer::start().await;
592 mount_watch_page(&server).await;
593 Mock::given(method("POST"))
594 .and(path(innertube::PLAYER_PATH))
595 .respond_with(ResponseTemplate::new(200).set_body_string("{ not json"))
596 .mount(&server)
597 .await;
598
599 let yt = Youtube::with_base_url(server.uri()).unwrap();
600 let err = yt.fetch(VIDEO_ID, &FetchOpts::new("en")).await.unwrap_err();
601 assert!(matches!(err, TranscriptError::ParseError(_)));
602 }
603
604 #[cfg(online_tests)]
615 #[tokio::test]
616 async fn online_fetch_against_public_video() {
617 const STABLE_VIDEO_ID: &str = "jNQXAC9IVRw";
619 let yt = Youtube::new().unwrap();
620 let opts = FetchOpts::new("en");
621 let transcript = yt.fetch(STABLE_VIDEO_ID, &opts).await.unwrap();
622 assert_eq!(transcript.source, "youtube");
623 assert_eq!(transcript.locator_id, STABLE_VIDEO_ID);
624 assert!(!transcript.cues.is_empty());
625 }
626}