Skip to main content

omni_dev/transcript/sources/
youtube.rs

1//! YouTube [`TranscriptSource`].
2//!
3//! Wires the offline parsers ([`url`], [`player_response`], [`timedtext`])
4//! into a concrete [`TranscriptSource`] backed by an HTTP client. The
5//! request shape is pinned to the `ANDROID_VR` InnerTube client (see
6//! [`innertube`]); a `visitorData` token is scraped from the watch page
7//! on first use ([`watch_page`]) and cached for the lifetime of the
8//! [`Youtube`] instance.
9
10use std::time::Duration;
11
12use async_trait::async_trait;
13
14use crate::transcript::error::Result;
15use crate::transcript::source::{FetchOpts, LanguageInfo, MediaInfo, Transcript, TranscriptSource};
16
17pub mod innertube;
18pub mod player_response;
19pub mod timedtext;
20pub mod url;
21pub mod watch_page;
22
23pub use player_response::{
24    check_playability, extract_media_info, list_languages, parse as parse_player_response,
25    select_track, CaptionTrack, PlayerResponse, SelectedTrack,
26};
27pub use timedtext::parse as parse_timedtext;
28pub use url::extract_video_id;
29
30/// Default origin for InnerTube and timedtext requests. Tests substitute
31/// a `wiremock::MockServer::uri()` instead.
32const DEFAULT_BASE_URL: &str = "https://www.youtube.com";
33
34/// HTTP request timeout. Picked to match
35/// [`crate::atlassian::client::AtlassianClient`]'s 30 s timeout.
36const REQUEST_TIMEOUT: Duration = Duration::from_secs(30);
37
38/// User-Agent advertised to YouTube on InnerTube `/player` calls. Must
39/// match the `clientName` / `clientVersion` constants in [`innertube`] —
40/// YouTube cross-checks UA against `clientName` as part of bot detection
41/// and a mismatch is one of the flagged signals. The trailing `gzip`
42/// token isn't decorative; that's what the real Quest YouTube app emits.
43///
44/// The watch-page bootstrap in [`watch_page`] uses a separate
45/// browser-shaped UA — it scrapes a public HTML page, not InnerTube.
46const USER_AGENT: &str = "com.google.android.apps.youtube.vr.oculus/1.62.27 \
47     (Linux; U; Android 12; Quest 3) gzip";
48
49/// Whether `input` is recognised as a YouTube locator (URL or bare ID).
50///
51/// Used by the future `omni-dev transcript fetch <url>` auto-detection
52/// path and by [`TranscriptSource::matches`].
53pub fn matches_url(input: &str) -> bool {
54    extract_video_id(input).is_ok()
55}
56
57/// YouTube [`TranscriptSource`].
58///
59/// Holds a single [`reqwest::Client`] reused across the watch-page,
60/// InnerTube, and timedtext calls. Cheap to construct; in steady state
61/// it is fine to keep one instance per process.
62///
63/// On first use, a `visitorData` token is scraped from the watch page
64/// and cached in [`tokio::sync::OnceCell`]. Concurrent first-callers
65/// serialise on a single fetch rather than double-fetching, and every
66/// subsequent InnerTube `/player` POST forwards the cached token.
67#[derive(Debug, Clone)]
68pub struct Youtube {
69    http: reqwest::Client,
70    base_url: String,
71    visitor_data: tokio::sync::OnceCell<String>,
72}
73
74impl Youtube {
75    /// Construct a YouTube source with default HTTP settings (30 s timeout,
76    /// ANDROID_VR User-Agent) targeting the public YouTube origin.
77    pub fn new() -> Result<Self> {
78        let http = reqwest::Client::builder()
79            .timeout(REQUEST_TIMEOUT)
80            .user_agent(USER_AGENT)
81            .build()?;
82        Ok(Self {
83            http,
84            base_url: DEFAULT_BASE_URL.to_string(),
85            visitor_data: tokio::sync::OnceCell::new(),
86        })
87    }
88
89    /// Construct a YouTube source pointed at an alternate origin. Used by
90    /// tests to inject a `wiremock::MockServer::uri()`. The HTTP client
91    /// retains the production timeout and User-Agent so request shape
92    /// matches the real client.
93    pub fn with_base_url(base_url: impl Into<String>) -> Result<Self> {
94        let http = reqwest::Client::builder()
95            .timeout(REQUEST_TIMEOUT)
96            .user_agent(USER_AGENT)
97            .build()?;
98        Ok(Self {
99            http,
100            base_url: base_url.into(),
101            visitor_data: tokio::sync::OnceCell::new(),
102        })
103    }
104
105    /// Cached `visitorData` token. First call scrapes the watch page;
106    /// concurrent first-callers serialise on a single in-flight scrape
107    /// (`OnceCell::get_or_try_init`) rather than double-fetching.
108    async fn visitor_data(&self) -> Result<&str> {
109        self.visitor_data
110            .get_or_try_init(|| watch_page::fetch_visitor_data(&self.http, &self.base_url))
111            .await
112            .map(String::as_str)
113    }
114
115    /// Common preamble: locator → video ID → watch-page bootstrap →
116    /// InnerTube POST → `playerResponse` parse → playability check.
117    ///
118    /// `extract_video_id` runs first so an invalid locator short-circuits
119    /// before any HTTP — lazy `visitor_data` fetch only happens on a
120    /// validated locator.
121    async fn load_player_response(&self, locator: &str) -> Result<PlayerResponse> {
122        let video_id = extract_video_id(locator)?;
123        let visitor_data = self.visitor_data().await?;
124        let raw =
125            innertube::fetch_player_response(&self.http, &self.base_url, &video_id, visitor_data)
126                .await?;
127        let response = parse_player_response(&raw)?;
128        check_playability(&response)?;
129        Ok(response)
130    }
131}
132
133#[async_trait]
134impl TranscriptSource for Youtube {
135    fn name(&self) -> &'static str {
136        "youtube"
137    }
138
139    fn matches(url: &str) -> bool {
140        matches_url(url)
141    }
142
143    async fn fetch(&self, locator: &str, opts: &FetchOpts) -> Result<Transcript> {
144        let response = self.load_player_response(locator).await?;
145        let selected = select_track(&response, opts)?;
146        let body = timedtext::fetch(&self.http, &selected.fetch_url).await?;
147        let cues = timedtext::parse(&body)?;
148        let locator_id = response
149            .video_details
150            .as_ref()
151            .map(|d| d.video_id.clone())
152            .unwrap_or_default();
153        Ok(Transcript {
154            source: self.name().to_string(),
155            locator_id,
156            language: selected.language.clone(),
157            kind: selected.kind,
158            cues,
159        })
160    }
161
162    async fn list_languages(&self, locator: &str) -> Result<Vec<LanguageInfo>> {
163        let response = self.load_player_response(locator).await?;
164        Ok(list_languages(&response))
165    }
166
167    async fn info(&self, locator: &str) -> Result<MediaInfo> {
168        let response = self.load_player_response(locator).await?;
169        Ok(extract_media_info(&response))
170    }
171}
172
173#[cfg(test)]
174#[allow(clippy::unwrap_used, clippy::expect_used)]
175mod tests {
176    //! Two layers:
177    //!
178    //! 1. Offline acceptance gate — parse a checked-in `playerResponse`,
179    //!    select the requested track, parse a checked-in json3 transcript,
180    //!    render via [`format::srt`], and compare to a golden `.srt`.
181    //!    Carried over from step 2.
182    //! 2. HTTP-driven `TranscriptSource` impl tested against a
183    //!    `wiremock::MockServer` serving both the InnerTube `/player`
184    //!    endpoint and the timedtext URL the player response points at.
185    //!
186    //! [`format::srt`]: crate::transcript::format::srt
187
188    use super::*;
189    use crate::transcript::error::TranscriptError;
190    use crate::transcript::format::srt;
191    use crate::transcript::source::{FetchOpts, TrackKind};
192    use serde_json::Value;
193    use wiremock::matchers::{method, path};
194    use wiremock::{Mock, MockServer, ResponseTemplate};
195
196    const PLAYER_RESPONSE: &str = include_str!("youtube/fixtures/player_response_basic.json");
197    const PLAYER_RESPONSE_AGE_GATED: &str =
198        include_str!("youtube/fixtures/player_response_age_gated.json");
199    const TIMEDTEXT: &str = include_str!("youtube/fixtures/timedtext_basic.json");
200    const EXPECTED_SRT: &str = include_str!("youtube/fixtures/expected_basic.srt");
201
202    const VIDEO_ID: &str = "dQw4w9WgXcQ";
203
204    // ── Offline acceptance gate (carried from step 2) ──
205
206    #[test]
207    fn matches_url_accepts_canonical_forms() {
208        assert!(matches_url("https://www.youtube.com/watch?v=dQw4w9WgXcQ"));
209        assert!(matches_url("https://youtu.be/dQw4w9WgXcQ"));
210    }
211
212    #[test]
213    fn matches_url_rejects_other_hosts() {
214        assert!(!matches_url("https://vimeo.com/123456"));
215        assert!(!matches_url("not a url"));
216    }
217
218    #[test]
219    fn matches_url_accepts_bare_video_id() {
220        assert!(matches_url(VIDEO_ID));
221    }
222
223    #[test]
224    fn end_to_end_player_response_to_srt() {
225        let response = parse_player_response(PLAYER_RESPONSE).unwrap();
226        check_playability(&response).unwrap();
227
228        let opts = FetchOpts::new("en-US");
229        let selected = select_track(&response, &opts).unwrap();
230        assert_eq!(selected.kind, TrackKind::Manual);
231        assert_eq!(selected.language, "en-US");
232
233        let cues = parse_timedtext(TIMEDTEXT).unwrap();
234        assert_eq!(cues.len(), 3);
235
236        let video_id = response
237            .video_details
238            .as_ref()
239            .map(|d| d.video_id.clone())
240            .unwrap_or_default();
241        let transcript = Transcript {
242            source: "youtube".to_string(),
243            locator_id: video_id,
244            language: selected.language.clone(),
245            kind: selected.kind,
246            cues,
247        };
248        let rendered = srt::render(&transcript.cues);
249        assert_eq!(rendered, EXPECTED_SRT);
250    }
251
252    #[test]
253    fn end_to_end_translation_path_picks_target_language() {
254        let response = parse_player_response(PLAYER_RESPONSE).unwrap();
255        let mut opts = FetchOpts::new("ja");
256        opts.translate_to = Some("fr".into());
257        let selected = select_track(&response, &opts).unwrap();
258        assert_eq!(selected.kind, TrackKind::Translated);
259        assert_eq!(selected.language, "fr");
260        assert!(selected.fetch_url.contains("tlang=fr"));
261    }
262
263    // ── HTTP-driven TranscriptSource impl ──
264
265    /// Take the checked-in `player_response_basic.json` fixture and rewrite
266    /// every caption track's `baseUrl` to point at the mock server, so
267    /// `select_track` produces a URL the same mock will answer for the
268    /// timedtext GET.
269    fn fixture_with_rewritten_caption_urls(mock_uri: &str) -> String {
270        let mut value: Value = serde_json::from_str(PLAYER_RESPONSE).unwrap();
271        let tracks = value["captions"]["playerCaptionsTracklistRenderer"]["captionTracks"]
272            .as_array_mut()
273            .unwrap();
274        for track in tracks {
275            let lang = track["languageCode"].as_str().unwrap().to_string();
276            track["baseUrl"] = Value::String(format!("{mock_uri}/api/timedtext?lang={lang}"));
277        }
278        serde_json::to_string(&value).unwrap()
279    }
280
281    /// Watch-page fixture used to satisfy the `visitorData` bootstrap in
282    /// every wiremock-driven test below. The exact token value doesn't
283    /// matter for these tests — only that the bootstrap returns *some*
284    /// token so [`load_player_response`] can proceed.
285    const WATCH_PAGE: &str = include_str!("youtube/fixtures/watch_page_with_visitor_data.html");
286
287    /// Mount the watch-page bootstrap mock onto `server`. Every
288    /// [`Youtube::fetch`] / `list_languages` / `info` call in the tests
289    /// below triggers this on first use (cached thereafter via
290    /// `OnceCell`).
291    async fn mount_watch_page(server: &MockServer) {
292        Mock::given(method("GET"))
293            .and(path("/watch"))
294            .respond_with(ResponseTemplate::new(200).set_body_string(WATCH_PAGE))
295            .mount(server)
296            .await;
297    }
298
299    async fn mock_server_with_basic_video() -> MockServer {
300        let server = MockServer::start().await;
301        let player_response = fixture_with_rewritten_caption_urls(&server.uri());
302
303        mount_watch_page(&server).await;
304
305        Mock::given(method("POST"))
306            .and(path(innertube::PLAYER_PATH))
307            .respond_with(ResponseTemplate::new(200).set_body_string(player_response))
308            .mount(&server)
309            .await;
310
311        Mock::given(method("GET"))
312            .and(path("/api/timedtext"))
313            .respond_with(ResponseTemplate::new(200).set_body_string(TIMEDTEXT))
314            .mount(&server)
315            .await;
316
317        server
318    }
319
320    #[tokio::test]
321    async fn fetch_returns_transcript_assembled_from_both_endpoints() {
322        let server = mock_server_with_basic_video().await;
323        let yt = Youtube::with_base_url(server.uri()).unwrap();
324        let opts = FetchOpts::new("en-US");
325
326        let transcript = yt
327            .fetch(
328                &format!("https://www.youtube.com/watch?v={VIDEO_ID}"),
329                &opts,
330            )
331            .await
332            .unwrap();
333
334        assert_eq!(transcript.source, "youtube");
335        assert_eq!(transcript.locator_id, VIDEO_ID);
336        assert_eq!(transcript.language, "en-US");
337        assert_eq!(transcript.kind, TrackKind::Manual);
338        assert_eq!(transcript.cues.len(), 3);
339        // Render and compare to the golden SRT to catch any divergence
340        // between the HTTP and offline pipelines.
341        assert_eq!(srt::render(&transcript.cues), EXPECTED_SRT);
342    }
343
344    #[tokio::test]
345    async fn fetch_accepts_bare_video_id_as_locator() {
346        let server = mock_server_with_basic_video().await;
347        let yt = Youtube::with_base_url(server.uri()).unwrap();
348        let opts = FetchOpts::new("en-US");
349
350        let transcript = yt.fetch(VIDEO_ID, &opts).await.unwrap();
351        assert_eq!(transcript.locator_id, VIDEO_ID);
352    }
353
354    #[tokio::test]
355    async fn fetch_propagates_language_not_found() {
356        let server = mock_server_with_basic_video().await;
357        let yt = Youtube::with_base_url(server.uri()).unwrap();
358        let opts = FetchOpts::new("zz");
359
360        let err = yt.fetch(VIDEO_ID, &opts).await.unwrap_err();
361        assert!(matches!(err, TranscriptError::LanguageNotFound { .. }));
362    }
363
364    #[tokio::test]
365    async fn fetch_surfaces_age_gated_as_playability_refused() {
366        let server = MockServer::start().await;
367        mount_watch_page(&server).await;
368        Mock::given(method("POST"))
369            .and(path(innertube::PLAYER_PATH))
370            .respond_with(ResponseTemplate::new(200).set_body_string(PLAYER_RESPONSE_AGE_GATED))
371            .mount(&server)
372            .await;
373
374        let yt = Youtube::with_base_url(server.uri()).unwrap();
375        let err = yt.fetch(VIDEO_ID, &FetchOpts::new("en")).await.unwrap_err();
376        match err {
377            TranscriptError::PlayabilityRefused { status, .. } => {
378                assert_eq!(status, "LOGIN_REQUIRED");
379            }
380            other => panic!("wrong variant: {other:?}"),
381        }
382    }
383
384    #[tokio::test]
385    async fn fetch_invalid_locator_short_circuits_before_http() {
386        // No mock server needed — the call should fail at URL parsing.
387        let yt = Youtube::with_base_url("http://127.0.0.1:1").unwrap();
388        let err = yt
389            .fetch("not-a-url", &FetchOpts::new("en"))
390            .await
391            .unwrap_err();
392        assert!(matches!(err, TranscriptError::InvalidLocator(_)));
393    }
394
395    #[tokio::test]
396    async fn fetch_surfaces_innertube_500_as_http_error() {
397        let server = MockServer::start().await;
398        mount_watch_page(&server).await;
399        Mock::given(method("POST"))
400            .and(path(innertube::PLAYER_PATH))
401            .respond_with(ResponseTemplate::new(500))
402            .mount(&server)
403            .await;
404
405        let yt = Youtube::with_base_url(server.uri()).unwrap();
406        let err = yt.fetch(VIDEO_ID, &FetchOpts::new("en")).await.unwrap_err();
407        assert!(matches!(err, TranscriptError::Http(_)));
408    }
409
410    #[tokio::test]
411    async fn list_languages_projects_caption_tracks() {
412        let server = mock_server_with_basic_video().await;
413        let yt = Youtube::with_base_url(server.uri()).unwrap();
414
415        let langs = yt.list_languages(VIDEO_ID).await.unwrap();
416        let codes: Vec<_> = langs.iter().map(|l| l.code.as_str()).collect();
417        assert!(codes.contains(&"en-US"));
418        assert!(codes.contains(&"es"));
419        assert!(codes.contains(&"en"));
420    }
421
422    #[tokio::test]
423    async fn info_returns_video_metadata() {
424        let server = mock_server_with_basic_video().await;
425        let yt = Youtube::with_base_url(server.uri()).unwrap();
426
427        let info = yt.info(VIDEO_ID).await.unwrap();
428        assert_eq!(info.source, "youtube");
429        assert_eq!(info.locator_id, VIDEO_ID);
430        assert_eq!(info.title, "Sample Video");
431        assert_eq!(info.duration_ms, Some(212_000));
432        assert_eq!(info.languages.len(), 3);
433    }
434
435    #[tokio::test]
436    async fn matches_static_dispatch_through_trait() {
437        // Object-safety / static-method routing sanity check.
438        assert!(<Youtube as TranscriptSource>::matches(
439            "https://www.youtube.com/watch?v=dQw4w9WgXcQ"
440        ));
441        assert!(!<Youtube as TranscriptSource>::matches(
442            "https://vimeo.com/1"
443        ));
444    }
445
446    #[tokio::test]
447    async fn name_is_lowercase_youtube() {
448        let server = mock_server_with_basic_video().await;
449        let yt = Youtube::with_base_url(server.uri()).unwrap();
450        assert_eq!(yt.name(), "youtube");
451    }
452
453    #[test]
454    fn new_constructs_default_client() {
455        // Smoke test for the production constructor — exercises the
456        // reqwest::Client::builder() path with the pinned timeout / UA.
457        let yt = Youtube::new().unwrap();
458        assert_eq!(yt.base_url, DEFAULT_BASE_URL);
459    }
460
461    #[tokio::test]
462    async fn fetch_threads_visitor_data_into_innertube_body() {
463        // Pin the bootstrap → InnerTube wiring: the token scraped from the
464        // watch page must appear under `context.client.visitorData` on the
465        // outbound /player POST. Captures the inbound JSON and asserts on
466        // the value the fixture publishes.
467        const EXPECTED_TOKEN: &str = "CgtkUTQyOFR3aV9NSSjFoYvBBjIKCgJVUxIEGgAgPg%3D%3D";
468
469        let server = MockServer::start().await;
470        mount_watch_page(&server).await;
471        let player_response = fixture_with_rewritten_caption_urls(&server.uri());
472
473        Mock::given(method("POST"))
474            .and(path(innertube::PLAYER_PATH))
475            .respond_with(move |req: &wiremock::Request| {
476                let parsed: Value = serde_json::from_slice(&req.body).unwrap();
477                assert_eq!(
478                    parsed["context"]["client"]["visitorData"],
479                    Value::String(EXPECTED_TOKEN.to_string()),
480                );
481                ResponseTemplate::new(200).set_body_string(player_response.clone())
482            })
483            .expect(1)
484            .mount(&server)
485            .await;
486
487        Mock::given(method("GET"))
488            .and(path("/api/timedtext"))
489            .respond_with(ResponseTemplate::new(200).set_body_string(TIMEDTEXT))
490            .mount(&server)
491            .await;
492
493        let yt = Youtube::with_base_url(server.uri()).unwrap();
494        let _ = yt.fetch(VIDEO_ID, &FetchOpts::new("en-US")).await.unwrap();
495    }
496
497    #[tokio::test]
498    async fn visitor_data_fetched_only_once_for_repeated_calls() {
499        // Sequential calls via a single `Youtube` instance must hit the
500        // watch page exactly once — `OnceCell` caches across calls.
501        let server = MockServer::start().await;
502        let player_response = fixture_with_rewritten_caption_urls(&server.uri());
503
504        Mock::given(method("GET"))
505            .and(path("/watch"))
506            .respond_with(ResponseTemplate::new(200).set_body_string(WATCH_PAGE))
507            .expect(1)
508            .mount(&server)
509            .await;
510
511        Mock::given(method("POST"))
512            .and(path(innertube::PLAYER_PATH))
513            .respond_with(ResponseTemplate::new(200).set_body_string(player_response))
514            .mount(&server)
515            .await;
516
517        Mock::given(method("GET"))
518            .and(path("/api/timedtext"))
519            .respond_with(ResponseTemplate::new(200).set_body_string(TIMEDTEXT))
520            .mount(&server)
521            .await;
522
523        let yt = Youtube::with_base_url(server.uri()).unwrap();
524        let _ = yt.fetch(VIDEO_ID, &FetchOpts::new("en-US")).await.unwrap();
525        let _ = yt.fetch(VIDEO_ID, &FetchOpts::new("en-US")).await.unwrap();
526        // wiremock asserts expect(1) on server drop.
527    }
528
529    #[tokio::test]
530    async fn visitor_data_fetched_only_once_under_concurrency() {
531        // Concurrent first-callers must serialise on a single in-flight
532        // scrape rather than each issuing their own watch-page GET.
533        // `tokio::sync::OnceCell::get_or_try_init` is documented to
534        // provide this guarantee; this test pins the contract.
535        let server = MockServer::start().await;
536        let player_response = fixture_with_rewritten_caption_urls(&server.uri());
537
538        Mock::given(method("GET"))
539            .and(path("/watch"))
540            .respond_with(ResponseTemplate::new(200).set_body_string(WATCH_PAGE))
541            .expect(1)
542            .mount(&server)
543            .await;
544
545        Mock::given(method("POST"))
546            .and(path(innertube::PLAYER_PATH))
547            .respond_with(ResponseTemplate::new(200).set_body_string(player_response))
548            .mount(&server)
549            .await;
550
551        Mock::given(method("GET"))
552            .and(path("/api/timedtext"))
553            .respond_with(ResponseTemplate::new(200).set_body_string(TIMEDTEXT))
554            .mount(&server)
555            .await;
556
557        let yt = Youtube::with_base_url(server.uri()).unwrap();
558        let opts = FetchOpts::new("en-US");
559        let (a, b, c) = tokio::join!(
560            yt.fetch(VIDEO_ID, &opts),
561            yt.fetch(VIDEO_ID, &opts),
562            yt.fetch(VIDEO_ID, &opts),
563        );
564        a.unwrap();
565        b.unwrap();
566        c.unwrap();
567        // wiremock asserts expect(1) on server drop.
568    }
569
570    #[tokio::test]
571    async fn fetch_surfaces_missing_visitor_data_as_typed_error() {
572        // Watch-page format has drifted (no visitorData token): the fetch
573        // must propagate `MissingVisitorData` rather than fall through to
574        // an unauthenticated /player call.
575        let server = MockServer::start().await;
576        Mock::given(method("GET"))
577            .and(path("/watch"))
578            .respond_with(
579                ResponseTemplate::new(200).set_body_string("<html><body>no token</body></html>"),
580            )
581            .mount(&server)
582            .await;
583
584        let yt = Youtube::with_base_url(server.uri()).unwrap();
585        let err = yt.fetch(VIDEO_ID, &FetchOpts::new("en")).await.unwrap_err();
586        assert!(matches!(err, TranscriptError::MissingVisitorData { .. }));
587    }
588
589    #[tokio::test]
590    async fn fetch_surfaces_malformed_innertube_json_as_parse_error() {
591        let server = MockServer::start().await;
592        mount_watch_page(&server).await;
593        Mock::given(method("POST"))
594            .and(path(innertube::PLAYER_PATH))
595            .respond_with(ResponseTemplate::new(200).set_body_string("{ not json"))
596            .mount(&server)
597            .await;
598
599        let yt = Youtube::with_base_url(server.uri()).unwrap();
600        let err = yt.fetch(VIDEO_ID, &FetchOpts::new("en")).await.unwrap_err();
601        assert!(matches!(err, TranscriptError::ParseError(_)));
602    }
603
604    // ── Online integration test ──
605    //
606    // Hits real YouTube — gated behind the `online_tests` custom cfg
607    // (declared in `Cargo.toml`'s `[lints.rust]`), *not* a cargo feature,
608    // so `cargo test --all-features` does not compile or run it. CI never
609    // sets the cfg; run manually with
610    // `RUSTFLAGS='--cfg online_tests' cargo test online_fetch_against_public_video`.
611    // Note that YouTube blocks well-known cloud / CI IPs with
612    // `LOGIN_REQUIRED`, so this test passes only from a residential
613    // network — it is intentionally manual-only.
614    #[cfg(online_tests)]
615    #[tokio::test]
616    async fn online_fetch_against_public_video() {
617        // "Me at the zoo" — the first YouTube video, captioned, stable.
618        const STABLE_VIDEO_ID: &str = "jNQXAC9IVRw";
619        let yt = Youtube::new().unwrap();
620        let opts = FetchOpts::new("en");
621        let transcript = yt.fetch(STABLE_VIDEO_ID, &opts).await.unwrap();
622        assert_eq!(transcript.source, "youtube");
623        assert_eq!(transcript.locator_id, STABLE_VIDEO_ID);
624        assert!(!transcript.cues.is_empty());
625    }
626}