1use reqwest::Client;
2use serde_json::Value;
3use url::Url;
4
5use super::types::{ContentFormat, PageContent};
6
7const WATCH_BASE_URL: &str = "https://www.youtube.com/watch";
8const PLAYER_RESPONSE_VAR: &str = "ytInitialPlayerResponse";
9const ANDROID_VR_CLIENT_NAME: &str = "28";
10const ANDROID_VR_CLIENT_VERSION: &str = "1.71.26";
11const ANDROID_VR_USER_AGENT: &str = "com.google.android.apps.youtube.vr.oculus/1.71.26 (Linux; U; Android 12L; eureka-user Build/SQ3A.220605.009.A1) gzip";
12
13#[derive(Debug, Clone, PartialEq)]
14struct VideoId(String);
15
16impl VideoId {
17 fn as_str(&self) -> &str {
18 &self.0
19 }
20}
21
22#[derive(Debug, Clone, PartialEq)]
23struct CaptionTrack {
24 base_url: String,
25 language_code: String,
26 name: String,
27 is_generated: bool,
28}
29
30#[derive(Debug, Clone, PartialEq)]
31struct TranscriptSegment {
32 start_ms: u64,
33 duration_ms: Option<u64>,
34 text: String,
35}
36
37#[derive(Debug, Clone, Default, PartialEq)]
38struct VideoMetadata {
39 title: Option<String>,
40 author: Option<String>,
41 channel_id: Option<String>,
42 duration_seconds: Option<String>,
43 view_count: Option<String>,
44 description: Option<String>,
45 publish_date: Option<String>,
46 upload_date: Option<String>,
47}
48
49struct CaptionSource {
50 tracks: Vec<CaptionTrack>,
51 selected_track: CaptionTrack,
52 segments: Vec<TranscriptSegment>,
53 source_client: &'static str,
54}
55
56pub async fn fetch_and_extract(client: &Client, url: &str) -> Result<PageContent, YouTubeError> {
57 let parsed_url = Url::parse(url).map_err(|err| YouTubeError::InvalidUrl(err.to_string()))?;
58 let video_id = extract_video_id(&parsed_url).ok_or(YouTubeError::UnsupportedUrl)?;
59 let requested_url = url.to_string();
60 let watch_url = canonical_watch_url(video_id.as_str());
61
62 let response = client
63 .get(watch_url.as_str())
64 .header("User-Agent", super::read::USER_AGENT)
65 .header("Accept", super::read::ACCEPT_HEADER)
66 .header("Accept-Language", "en-US,en;q=0.9")
67 .send()
68 .await
69 .map_err(|err| YouTubeError::Fetch(err.to_string()))?;
70
71 let status_code = response.status().as_u16();
72 if !response.status().is_success() {
73 return Err(YouTubeError::HttpStatus(
74 status_code,
75 response
76 .status()
77 .canonical_reason()
78 .unwrap_or("Unknown")
79 .to_string(),
80 ));
81 }
82
83 let final_url = response.url().to_string();
84 let content_type = response
85 .headers()
86 .get("content-type")
87 .and_then(|value| value.to_str().ok())
88 .map(str::to_string);
89 let html = response
90 .text()
91 .await
92 .map_err(|err| YouTubeError::Fetch(err.to_string()))?;
93 let raw_body_bytes = html.len();
94
95 let initial_player_response = extract_initial_player_response(&html)?;
96 let metadata = extract_metadata(&initial_player_response);
97 let visitor_data = extract_visitor_data(&html);
98 let caption_source = resolve_caption_source(
99 client,
100 video_id.as_str(),
101 &initial_player_response,
102 visitor_data.as_deref(),
103 )
104 .await;
105
106 let title = metadata
107 .title
108 .clone()
109 .unwrap_or_else(|| format!("YouTube video {}", video_id.as_str()));
110 let (text, diagnostics) = match caption_source {
111 Ok(caption_source) if !caption_source.segments.is_empty() => (
112 format_video_context(
113 video_id.as_str(),
114 &watch_url,
115 &metadata,
116 &caption_source.selected_track,
117 &caption_source.segments,
118 ),
119 build_diagnostics(
120 &caption_source.tracks,
121 &caption_source.selected_track,
122 caption_source.segments.len(),
123 caption_source.source_client,
124 ),
125 ),
126 Ok(caption_source) => (
127 format_metadata_only_context(video_id.as_str(), &watch_url, &metadata),
128 build_metadata_only_diagnostics(Some(&format!(
129 "Caption tracks were found, but no transcript segments were extracted from {}.",
130 caption_source.source_client
131 ))),
132 ),
133 Err(err @ (YouTubeError::CaptionTracksMissing | YouTubeError::NoUsableCaptionTrack)) => (
134 format_metadata_only_context(video_id.as_str(), &watch_url, &metadata),
135 build_metadata_only_diagnostics(Some(&err.to_string())),
136 ),
137 Err(err @ (YouTubeError::TranscriptEmpty | YouTubeError::TranscriptParse(_))) => (
138 format_metadata_only_context(video_id.as_str(), &watch_url, &metadata),
139 build_metadata_only_diagnostics(Some(&err.to_string())),
140 ),
141 Err(err) => return Err(err),
142 };
143
144 let was_redirected = final_url != watch_url;
145
146 Ok(PageContent {
147 title: Some(title),
148 content_length: text.len(),
149 text,
150 url: final_url,
151 requested_url,
152 status_code,
153 content_type,
154 format_received: ContentFormat::Html,
155 was_redirected,
156 raw_body_bytes,
157 diagnostics,
158 })
159}
160
161async fn resolve_caption_source(
162 client: &Client,
163 video_id: &str,
164 initial_player_response: &Value,
165 visitor_data: Option<&str>,
166) -> Result<CaptionSource, YouTubeError> {
167 let web_result = fetch_caption_source_from_response(
168 client,
169 initial_player_response,
170 super::read::USER_AGENT,
171 "web",
172 )
173 .await;
174 if web_result.is_ok() {
175 return web_result;
176 }
177 let web_error = web_result.err();
178
179 let Some(visitor_data) = visitor_data else {
180 return Err(web_error.unwrap_or(YouTubeError::VisitorDataMissing));
181 };
182
183 let android_vr_response =
184 fetch_android_vr_player_response(client, video_id, visitor_data).await?;
185 fetch_caption_source_from_response(
186 client,
187 &android_vr_response,
188 ANDROID_VR_USER_AGENT,
189 "android_vr",
190 )
191 .await
192}
193
194async fn fetch_caption_source_from_response(
195 client: &Client,
196 player_response: &Value,
197 user_agent: &str,
198 source_client: &'static str,
199) -> Result<CaptionSource, YouTubeError> {
200 let tracks = extract_caption_tracks(player_response)?;
201 let selected_track = select_caption_track(&tracks).ok_or(YouTubeError::NoUsableCaptionTrack)?;
202 let segments = fetch_transcript_segments(client, &selected_track, user_agent).await?;
203
204 Ok(CaptionSource {
205 tracks,
206 selected_track,
207 segments,
208 source_client,
209 })
210}
211
212async fn fetch_transcript_segments(
213 client: &Client,
214 track: &CaptionTrack,
215 user_agent: &str,
216) -> Result<Vec<TranscriptSegment>, YouTubeError> {
217 let transcript_url = caption_url_with_json3(&track.base_url)?;
218 let transcript_response = client
219 .get(transcript_url.as_str())
220 .header("User-Agent", user_agent)
221 .header(
222 "Accept",
223 "application/json,text/xml,text/plain;q=0.9,*/*;q=0.5",
224 )
225 .header("Accept-Language", "en-US,en;q=0.9")
226 .send()
227 .await
228 .map_err(|err| YouTubeError::Fetch(err.to_string()))?;
229
230 if !transcript_response.status().is_success() {
231 return Err(YouTubeError::TranscriptHttpStatus(
232 transcript_response.status().as_u16(),
233 ));
234 }
235
236 let transcript_text = transcript_response
237 .text()
238 .await
239 .map_err(|err| YouTubeError::TranscriptParse(err.to_string()))?;
240 if transcript_text.trim().is_empty() {
241 return Err(YouTubeError::TranscriptEmpty);
242 }
243
244 if transcript_text.trim_start().starts_with('<') {
245 return Ok(parse_xml_transcript(&transcript_text));
246 }
247
248 let transcript_json: Value = serde_json::from_str(&transcript_text)
249 .map_err(|err| YouTubeError::TranscriptParse(err.to_string()))?;
250 Ok(parse_json3_transcript(&transcript_json))
251}
252
253async fn fetch_android_vr_player_response(
254 client: &Client,
255 video_id: &str,
256 visitor_data: &str,
257) -> Result<Value, YouTubeError> {
258 let payload = serde_json::json!({
259 "context": {
260 "client": {
261 "clientName": "ANDROID_VR",
262 "clientVersion": ANDROID_VR_CLIENT_VERSION,
263 "deviceMake": "Oculus",
264 "deviceModel": "Quest 3",
265 "androidSdkVersion": 32,
266 "userAgent": ANDROID_VR_USER_AGENT,
267 "osName": "Android",
268 "osVersion": "12L",
269 "hl": "en",
270 "gl": "US"
271 }
272 },
273 "videoId": video_id,
274 "contentCheckOk": true,
275 "racyCheckOk": true
276 });
277
278 let response = client
279 .post("https://www.youtube.com/youtubei/v1/player")
280 .header("Content-Type", "application/json")
281 .header("User-Agent", ANDROID_VR_USER_AGENT)
282 .header("X-YouTube-Client-Name", ANDROID_VR_CLIENT_NAME)
283 .header("X-YouTube-Client-Version", ANDROID_VR_CLIENT_VERSION)
284 .header("X-Goog-Visitor-Id", visitor_data)
285 .header("Origin", "https://www.youtube.com")
286 .json(&payload)
287 .send()
288 .await
289 .map_err(|err| YouTubeError::Fetch(err.to_string()))?;
290
291 if !response.status().is_success() {
292 return Err(YouTubeError::PlayerApiHttpStatus(
293 response.status().as_u16(),
294 ));
295 }
296
297 let player_response = response
298 .json::<Value>()
299 .await
300 .map_err(|err| YouTubeError::PlayerResponseParse(err.to_string()))?;
301
302 if player_response
303 .pointer("/playabilityStatus/status")
304 .and_then(Value::as_str)
305 == Some("LOGIN_REQUIRED")
306 {
307 return Err(YouTubeError::PlayerApiLoginRequired(
308 player_response
309 .pointer("/playabilityStatus/reason")
310 .and_then(Value::as_str)
311 .unwrap_or("sign-in required")
312 .to_string(),
313 ));
314 }
315
316 Ok(player_response)
317}
318
319pub fn is_youtube_url(url: &Url) -> bool {
320 url.host_str().is_some_and(|host| {
321 let host = host.to_ascii_lowercase();
322 host == "youtu.be"
323 || host.ends_with(".youtu.be")
324 || host == "youtube.com"
325 || host.ends_with(".youtube.com")
326 })
327}
328
329fn extract_video_id(url: &Url) -> Option<VideoId> {
330 let host = url.host_str()?.to_ascii_lowercase();
331 if host == "youtu.be" || host.ends_with(".youtu.be") {
332 return first_path_segment(url).and_then(VideoId::from_candidate);
333 }
334
335 if !(host == "youtube.com" || host.ends_with(".youtube.com")) {
336 return None;
337 }
338
339 match first_path_segment(url).as_deref() {
340 Some("watch") => url
341 .query_pairs()
342 .find_map(|(key, value)| (key == "v").then(|| value.into_owned()))
343 .and_then(VideoId::from_candidate),
344 Some("shorts" | "embed" | "live") => url
345 .path_segments()
346 .and_then(|mut segments| segments.nth(1).map(str::to_string))
347 .and_then(VideoId::from_candidate),
348 _ => None,
349 }
350}
351
352impl VideoId {
353 fn from_candidate(candidate: String) -> Option<Self> {
354 let id = candidate.trim();
355 let is_valid = id.len() == 11
356 && id
357 .bytes()
358 .all(|byte| byte.is_ascii_alphanumeric() || byte == b'_' || byte == b'-');
359 is_valid.then(|| Self(id.to_string()))
360 }
361}
362
363fn first_path_segment(url: &Url) -> Option<String> {
364 url.path_segments()
365 .and_then(|mut segments| segments.next().map(str::to_string))
366}
367
368fn canonical_watch_url(video_id: &str) -> String {
369 format!("{WATCH_BASE_URL}?v={video_id}")
370}
371
372fn extract_initial_player_response(html: &str) -> Result<Value, YouTubeError> {
373 let marker_index = html
374 .find(PLAYER_RESPONSE_VAR)
375 .ok_or(YouTubeError::PlayerResponseMissing)?;
376 let after_marker = &html[marker_index + PLAYER_RESPONSE_VAR.len()..];
377 let brace_relative = after_marker
378 .find('{')
379 .ok_or(YouTubeError::PlayerResponseMissing)?;
380 let json_start = marker_index + PLAYER_RESPONSE_VAR.len() + brace_relative;
381 let json_end = find_balanced_json_end(html, json_start)?;
382 serde_json::from_str(&html[json_start..json_end])
383 .map_err(|err| YouTubeError::PlayerResponseParse(err.to_string()))
384}
385
386fn find_balanced_json_end(text: &str, start: usize) -> Result<usize, YouTubeError> {
387 let mut depth = 0u32;
388 let mut in_string = false;
389 let mut escaped = false;
390
391 for (offset, ch) in text[start..].char_indices() {
392 if escaped {
393 escaped = false;
394 continue;
395 }
396
397 if ch == '\\' {
398 escaped = in_string;
399 continue;
400 }
401
402 if ch == '"' {
403 in_string = !in_string;
404 continue;
405 }
406
407 if in_string {
408 continue;
409 }
410
411 match ch {
412 '{' => depth += 1,
413 '}' => {
414 depth = depth.saturating_sub(1);
415 if depth == 0 {
416 return Ok(start + offset + ch.len_utf8());
417 }
418 }
419 _ => {}
420 }
421 }
422
423 Err(YouTubeError::PlayerResponseUnterminated)
424}
425
426fn extract_visitor_data(html: &str) -> Option<String> {
427 extract_quoted_json_field(html, "VISITOR_DATA")
428 .or_else(|| extract_quoted_json_field(html, "visitorData"))
429}
430
431fn extract_quoted_json_field(text: &str, key: &str) -> Option<String> {
432 let marker = format!("\"{key}\":\"");
433 let start = text.find(&marker)? + marker.len();
434 let tail = &text[start..];
435 let end = tail.find('"')?;
436 Some(tail[..end].to_string())
437}
438
439fn extract_metadata(player_response: &Value) -> VideoMetadata {
440 let details = player_response.get("videoDetails").unwrap_or(&Value::Null);
441 let microformat = player_response
442 .pointer("/microformat/playerMicroformatRenderer")
443 .unwrap_or(&Value::Null);
444
445 VideoMetadata {
446 title: string_at(details, "title").or_else(|| text_runs_at(microformat, "title")),
447 author: string_at(details, "author").or_else(|| string_at(microformat, "ownerChannelName")),
448 channel_id: string_at(details, "channelId")
449 .or_else(|| string_at(microformat, "externalChannelId")),
450 duration_seconds: string_at(details, "lengthSeconds")
451 .or_else(|| string_at(microformat, "lengthSeconds")),
452 view_count: string_at(details, "viewCount").or_else(|| string_at(microformat, "viewCount")),
453 description: string_at(details, "shortDescription")
454 .or_else(|| text_runs_at(microformat, "description")),
455 publish_date: string_at(microformat, "publishDate"),
456 upload_date: string_at(microformat, "uploadDate"),
457 }
458}
459
460fn extract_caption_tracks(player_response: &Value) -> Result<Vec<CaptionTrack>, YouTubeError> {
461 let tracks = player_response
462 .pointer("/captions/playerCaptionsTracklistRenderer/captionTracks")
463 .and_then(Value::as_array)
464 .ok_or(YouTubeError::CaptionTracksMissing)?;
465
466 let parsed = tracks
467 .iter()
468 .filter_map(|track| {
469 Some(CaptionTrack {
470 base_url: string_at(track, "baseUrl")?,
471 language_code: string_at(track, "languageCode")?,
472 name: text_runs_at(track, "name").unwrap_or_else(|| "unknown".to_string()),
473 is_generated: string_at(track, "kind").as_deref() == Some("asr"),
474 })
475 })
476 .collect::<Vec<_>>();
477
478 if parsed.is_empty() {
479 return Err(YouTubeError::NoUsableCaptionTrack);
480 }
481
482 Ok(parsed)
483}
484
485fn select_caption_track(tracks: &[CaptionTrack]) -> Option<CaptionTrack> {
486 tracks
487 .iter()
488 .find(|track| is_english(&track.language_code) && !track.is_generated)
489 .or_else(|| tracks.iter().find(|track| is_english(&track.language_code)))
490 .or_else(|| tracks.first())
491 .cloned()
492}
493
494fn is_english(language_code: &str) -> bool {
495 let language = language_code.to_ascii_lowercase();
496 language == "en" || language.starts_with("en-") || language == "en-orig"
497}
498
499fn caption_url_with_json3(base_url: &str) -> Result<String, YouTubeError> {
500 let mut url =
501 Url::parse(base_url).map_err(|err| YouTubeError::InvalidCaptionUrl(err.to_string()))?;
502 {
503 let has_fmt = url.query_pairs().any(|(key, _)| key == "fmt");
504 if !has_fmt {
505 url.query_pairs_mut().append_pair("fmt", "json3");
506 }
507 }
508 Ok(url.to_string())
509}
510
511fn parse_json3_transcript(value: &Value) -> Vec<TranscriptSegment> {
512 value
513 .get("events")
514 .or_else(|| value.get("aAppend"))
515 .and_then(Value::as_array)
516 .into_iter()
517 .flatten()
518 .filter_map(parse_json3_event)
519 .collect()
520}
521
522fn parse_json3_event(event: &Value) -> Option<TranscriptSegment> {
523 let text = event
524 .get("segs")?
525 .as_array()?
526 .iter()
527 .filter_map(|seg| seg.get("utf8").and_then(Value::as_str))
528 .collect::<String>();
529 let text = normalize_transcript_text(&text);
530 if text.is_empty() || is_noise_segment(&text) {
531 return None;
532 }
533
534 Some(TranscriptSegment {
535 start_ms: event.get("tStartMs").and_then(Value::as_u64).unwrap_or(0),
536 duration_ms: event.get("dDurationMs").and_then(Value::as_u64),
537 text,
538 })
539}
540
541fn parse_xml_transcript(text: &str) -> Vec<TranscriptSegment> {
542 text.split("<p ")
543 .skip(1)
544 .filter_map(parse_xml_paragraph)
545 .collect()
546}
547
548fn parse_xml_paragraph(fragment: &str) -> Option<TranscriptSegment> {
549 let tag_end = fragment.find('>')?;
550 let attrs = &fragment[..tag_end];
551 let body = &fragment[tag_end + 1..fragment.find("</p>")?];
552 let text = normalize_transcript_text(&strip_xml_tags(body));
553 if text.is_empty() || is_noise_segment(&text) {
554 return None;
555 }
556
557 Some(TranscriptSegment {
558 start_ms: extract_xml_time_ms(attrs, "t").unwrap_or(0),
559 duration_ms: extract_xml_time_ms(attrs, "d"),
560 text,
561 })
562}
563
564fn extract_xml_time_ms(attrs: &str, key: &str) -> Option<u64> {
565 let marker = format!(r#"{key}=""#);
566 let start = attrs.find(&marker)? + marker.len();
567 let tail = &attrs[start..];
568 let end = tail.find('"')?;
569 tail[..end].parse().ok()
570}
571
572fn strip_xml_tags(text: &str) -> String {
573 let mut out = String::with_capacity(text.len());
574 let mut in_tag = false;
575 let mut entity = String::new();
576 let mut in_entity = false;
577
578 for ch in text.chars() {
579 if in_entity {
580 entity.push(ch);
581 if ch == ';' {
582 out.push_str(match entity.as_str() {
583 "amp;" => "&",
584 "lt;" => "<",
585 "gt;" => ">",
586 "quot;" => "\"",
587 "apos;" | "#39;" => "'",
588 _ => "",
589 });
590 entity.clear();
591 in_entity = false;
592 }
593 continue;
594 }
595
596 match ch {
597 '<' => in_tag = true,
598 '>' => in_tag = false,
599 '&' if !in_tag => in_entity = true,
600 _ if !in_tag => out.push(ch),
601 _ => {}
602 }
603 }
604
605 out
606}
607
608fn normalize_transcript_text(text: &str) -> String {
609 text.split_whitespace().collect::<Vec<_>>().join(" ")
610}
611
612fn is_noise_segment(text: &str) -> bool {
613 let normalized = text.trim().to_ascii_lowercase();
614 matches!(
615 normalized.as_str(),
616 "[music]" | "[applause]" | "[laughter]" | "♪" | "♫"
617 )
618}
619
620fn format_video_context(
621 video_id: &str,
622 canonical_url: &str,
623 metadata: &VideoMetadata,
624 track: &CaptionTrack,
625 segments: &[TranscriptSegment],
626) -> String {
627 let mut output = String::new();
628 output.push_str("# YouTube Video Context\n\n");
629 output.push_str("## Source\n");
630 output.push_str(&format!("- URL: {canonical_url}\n"));
631 output.push_str(&format!("- Video ID: {video_id}\n"));
632 push_optional(&mut output, "- Title", metadata.title.as_deref());
633 push_optional(&mut output, "- Channel", metadata.author.as_deref());
634 push_optional(&mut output, "- Channel ID", metadata.channel_id.as_deref());
635 push_optional(
636 &mut output,
637 "- Duration seconds",
638 metadata.duration_seconds.as_deref(),
639 );
640 push_optional(&mut output, "- Views", metadata.view_count.as_deref());
641 push_optional(&mut output, "- Published", metadata.publish_date.as_deref());
642 push_optional(&mut output, "- Uploaded", metadata.upload_date.as_deref());
643
644 output.push_str("\n## Transcript Track\n");
645 output.push_str(&format!("- Language: {}\n", track.language_code));
646 output.push_str(&format!("- Name: {}\n", track.name));
647 output.push_str(&format!("- Auto-generated: {}\n", track.is_generated));
648
649 if let Some(description) = metadata.description.as_deref() {
650 output.push_str("\n## Description\n");
651 output.push_str(description.trim());
652 output.push('\n');
653 }
654
655 output.push_str("\n## Transcript\n");
656 for segment in segments {
657 output.push_str(&format!(
658 "[{}] {}\n",
659 format_timestamp(segment.start_ms),
660 segment.text
661 ));
662 }
663
664 output.trim().to_string()
665}
666
667fn format_metadata_only_context(
668 video_id: &str,
669 canonical_url: &str,
670 metadata: &VideoMetadata,
671) -> String {
672 let mut output = String::new();
673 output.push_str("# YouTube Video Context\n\n");
674 output.push_str("## Source\n");
675 output.push_str(&format!("- URL: {canonical_url}\n"));
676 output.push_str(&format!("- Video ID: {video_id}\n"));
677 push_optional(&mut output, "- Title", metadata.title.as_deref());
678 push_optional(&mut output, "- Channel", metadata.author.as_deref());
679 push_optional(&mut output, "- Channel ID", metadata.channel_id.as_deref());
680 push_optional(
681 &mut output,
682 "- Duration seconds",
683 metadata.duration_seconds.as_deref(),
684 );
685 push_optional(&mut output, "- Views", metadata.view_count.as_deref());
686 push_optional(&mut output, "- Published", metadata.publish_date.as_deref());
687 push_optional(&mut output, "- Uploaded", metadata.upload_date.as_deref());
688
689 if let Some(description) = metadata.description.as_deref() {
690 output.push_str("\n## Description\n");
691 output.push_str(description.trim());
692 output.push('\n');
693 }
694
695 output.push_str("\n## Transcript\n");
696 output.push_str("Transcript unavailable. YouTube metadata was extracted, but no usable caption/transcript body was available for this video.\n");
697 output.trim().to_string()
698}
699
700fn build_diagnostics(
701 tracks: &[CaptionTrack],
702 selected_track: &CaptionTrack,
703 segment_count: usize,
704 source_client: &str,
705) -> Vec<String> {
706 vec![
707 "YouTube extraction used native HTTP transcript path; no video/audio was downloaded."
708 .to_string(),
709 format!(
710 "Selected caption track from {source_client}: {} ({}, auto-generated: {}).",
711 selected_track.name, selected_track.language_code, selected_track.is_generated
712 ),
713 format!(
714 "Found {} caption track(s), extracted {} transcript segment(s).",
715 tracks.len(),
716 segment_count
717 ),
718 ]
719}
720
721fn build_metadata_only_diagnostics(reason: Option<&str>) -> Vec<String> {
722 let mut diagnostics = vec![
723 "YouTube extraction used native HTTP metadata path; no video/audio was downloaded."
724 .to_string(),
725 "Transcript unavailable; returning metadata-only YouTube context.".to_string(),
726 ];
727 if let Some(reason) = reason.filter(|reason| !reason.trim().is_empty()) {
728 diagnostics.push(format!("Transcript unavailable reason: {reason}"));
729 }
730 diagnostics
731}
732
733fn push_optional(output: &mut String, label: &str, value: Option<&str>) {
734 if let Some(value) = value.filter(|value| !value.trim().is_empty()) {
735 output.push_str(&format!("{label}: {}\n", value.trim()));
736 }
737}
738
739fn format_timestamp(ms: u64) -> String {
740 let total_seconds = ms / 1000;
741 let hours = total_seconds / 3600;
742 let minutes = (total_seconds % 3600) / 60;
743 let seconds = total_seconds % 60;
744
745 if hours > 0 {
746 format!("{hours:02}:{minutes:02}:{seconds:02}")
747 } else {
748 format!("{minutes:02}:{seconds:02}")
749 }
750}
751
752fn string_at(value: &Value, key: &str) -> Option<String> {
753 value
754 .get(key)
755 .and_then(Value::as_str)
756 .filter(|value| !value.is_empty())
757 .map(str::to_string)
758}
759
760fn text_runs_at(value: &Value, key: &str) -> Option<String> {
761 let text_value = value.get(key)?;
762 if let Some(simple_text) = text_value.get("simpleText").and_then(Value::as_str) {
763 return Some(simple_text.to_string());
764 }
765 let runs = text_value.get("runs")?.as_array()?;
766 let text = runs
767 .iter()
768 .filter_map(|run| run.get("text").and_then(Value::as_str))
769 .collect::<String>();
770 (!text.is_empty()).then_some(text)
771}
772
773#[derive(Debug)]
774pub enum YouTubeError {
775 InvalidUrl(String),
776 UnsupportedUrl,
777 Fetch(String),
778 HttpStatus(u16, String),
779 PlayerResponseMissing,
780 PlayerResponseUnterminated,
781 PlayerResponseParse(String),
782 PlayerApiHttpStatus(u16),
783 PlayerApiLoginRequired(String),
784 VisitorDataMissing,
785 CaptionTracksMissing,
786 NoUsableCaptionTrack,
787 InvalidCaptionUrl(String),
788 TranscriptHttpStatus(u16),
789 TranscriptParse(String),
790 TranscriptEmpty,
791}
792
793impl std::fmt::Display for YouTubeError {
794 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
795 match self {
796 Self::InvalidUrl(msg) => write!(f, "Invalid YouTube URL: {msg}"),
797 Self::UnsupportedUrl => write!(f, "Unsupported YouTube URL"),
798 Self::Fetch(msg) => write!(f, "YouTube fetch failed: {msg}"),
799 Self::HttpStatus(code, reason) => write!(f, "YouTube returned HTTP {code} {reason}"),
800 Self::PlayerResponseMissing => write!(f, "YouTube player response not found"),
801 Self::PlayerResponseUnterminated => {
802 write!(f, "YouTube player response JSON was unterminated")
803 }
804 Self::PlayerResponseParse(msg) => {
805 write!(f, "YouTube player response parse failed: {msg}")
806 }
807 Self::PlayerApiHttpStatus(code) => write!(f, "YouTube player API returned HTTP {code}"),
808 Self::PlayerApiLoginRequired(reason) => {
809 write!(f, "YouTube player API required login: {reason}")
810 }
811 Self::VisitorDataMissing => write!(f, "YouTube visitor data was unavailable"),
812 Self::CaptionTracksMissing => write!(f, "YouTube captions are unavailable for this video"),
813 Self::NoUsableCaptionTrack => write!(f, "No usable YouTube caption track found"),
814 Self::InvalidCaptionUrl(msg) => write!(f, "Invalid YouTube caption URL: {msg}"),
815 Self::TranscriptHttpStatus(code) => write!(f, "YouTube transcript returned HTTP {code}"),
816 Self::TranscriptParse(msg) => write!(f, "YouTube transcript parse failed: {msg}"),
817 Self::TranscriptEmpty => write!(f, "YouTube transcript was empty; caption track metadata was found but YouTube returned no caption body for this client"),
818 }
819 }
820}
821
822#[cfg(test)]
823pub(crate) mod tests {
824 use super::*;
825 use serde_json::json;
826
827 #[test]
828 fn youtube_extracts_video_ids_from_common_urls() {
829 let cases = [
830 ("https://www.youtube.com/watch?v=McO_xcf4IYw", "McO_xcf4IYw"),
831 ("https://youtu.be/McO_xcf4IYw?t=12", "McO_xcf4IYw"),
832 ("https://www.youtube.com/shorts/McO_xcf4IYw", "McO_xcf4IYw"),
833 ("https://www.youtube.com/embed/McO_xcf4IYw", "McO_xcf4IYw"),
834 ];
835
836 for (url, expected) in cases {
837 let parsed = Url::parse(url).unwrap();
838 assert_eq!(extract_video_id(&parsed).unwrap().as_str(), expected);
839 }
840 }
841
842 #[test]
843 fn youtube_rejects_invalid_video_ids() {
844 let parsed =
845 Url::parse("https://www.youtube.com/watch?v=not-valid-because-too-long").unwrap();
846 assert!(extract_video_id(&parsed).is_none());
847 }
848
849 #[test]
850 fn youtube_extracts_balanced_player_response() {
851 let html = r#"<script>var ytInitialPlayerResponse = {"videoDetails":{"title":"A } in string","shortDescription":"escaped \" brace }"},"captions":{}};</script>"#;
852 let response = extract_initial_player_response(html).unwrap();
853 assert_eq!(response["videoDetails"]["title"], "A } in string");
854 assert_eq!(
855 response["videoDetails"]["shortDescription"],
856 "escaped \" brace }"
857 );
858 }
859
860 #[test]
861 fn youtube_extracts_visitor_data() {
862 let html = r#"ytcfg.set({"VISITOR_DATA":"visitor-token","other":true});"#;
863 assert_eq!(extract_visitor_data(html).as_deref(), Some("visitor-token"));
864 }
865
866 #[test]
867 fn youtube_selects_manual_english_before_auto_english() {
868 let tracks = vec![
869 CaptionTrack {
870 base_url: "https://example.com/fr".into(),
871 language_code: "fr".into(),
872 name: "French".into(),
873 is_generated: false,
874 },
875 CaptionTrack {
876 base_url: "https://example.com/en-auto".into(),
877 language_code: "en".into(),
878 name: "English auto".into(),
879 is_generated: true,
880 },
881 CaptionTrack {
882 base_url: "https://example.com/en".into(),
883 language_code: "en".into(),
884 name: "English".into(),
885 is_generated: false,
886 },
887 ];
888
889 let selected = select_caption_track(&tracks).unwrap();
890 assert_eq!(selected.base_url, "https://example.com/en");
891 }
892
893 #[test]
894 fn youtube_parses_json3_transcript_segments() {
895 let transcript = json!({
896 "events": [
897 {"tStartMs": 0, "dDurationMs": 1000, "segs": [{"utf8": "Hello "}, {"utf8": "world"}]},
898 {"tStartMs": 1000, "segs": [{"utf8": "\n"}]},
899 {"tStartMs": 2000, "segs": [{"utf8": "[Music]"}]},
900 {"tStartMs": 3000, "dDurationMs": 500, "segs": [{"utf8": "next line"}]}
901 ]
902 });
903
904 let segments = parse_json3_transcript(&transcript);
905 assert_eq!(segments.len(), 2);
906 assert_eq!(segments[0].text, "Hello world");
907 assert_eq!(segments[0].start_ms, 0);
908 assert_eq!(segments[1].text, "next line");
909 }
910
911 #[test]
912 fn youtube_parses_xml_transcript_segments() {
913 let transcript = r#"<?xml version="1.0" ?><timedtext><body><p t="1000" d="2000">Hello & <s>world</s></p><p t="3000" d="1000">[Music]</p></body></timedtext>"#;
914 let segments = parse_xml_transcript(transcript);
915 assert_eq!(segments.len(), 1);
916 assert_eq!(segments[0].start_ms, 1000);
917 assert_eq!(segments[0].duration_ms, Some(2000));
918 assert_eq!(segments[0].text, "Hello & world");
919 }
920
921 #[test]
922 fn youtube_adds_json3_format_to_caption_url() {
923 let url =
924 caption_url_with_json3("https://www.youtube.com/api/timedtext?v=abc&lang=en").unwrap();
925 assert!(url.contains("fmt=json3"));
926 }
927
928 #[test]
929 fn youtube_formats_metadata_only_context_with_clear_transcript_marker() {
930 let metadata = VideoMetadata {
931 title: Some("No captions example".into()),
932 author: Some("Example Channel".into()),
933 ..VideoMetadata::default()
934 };
935 let text = format_metadata_only_context(
936 "McO_xcf4IYw",
937 "https://www.youtube.com/watch?v=McO_xcf4IYw",
938 &metadata,
939 );
940 assert!(text.contains("No captions example"));
941 assert!(text.contains("Transcript unavailable"));
942 }
943
944 #[test]
945 fn youtube_metadata_only_diagnostics_include_reason() {
946 let diagnostics = build_metadata_only_diagnostics(Some("captions disabled"));
947 assert!(diagnostics
948 .iter()
949 .any(|line| line.contains("metadata-only")));
950 assert!(diagnostics
951 .iter()
952 .any(|line| line.contains("captions disabled")));
953 }
954
955 #[tokio::test]
956 #[ignore = "network smoke test for YouTube extraction"]
957 async fn youtube_reads_sample_video_over_http() {
958 let client = reqwest::Client::builder()
959 .redirect(reqwest::redirect::Policy::limited(10))
960 .build()
961 .unwrap();
962 let page = fetch_and_extract(&client, "https://www.youtube.com/watch?v=McO_xcf4IYw")
963 .await
964 .unwrap();
965 assert!(page.text.contains("# YouTube Video Context"));
966 assert!(page.text.contains("- Video ID: McO_xcf4IYw"));
967 assert!(page.text.contains("## Transcript"));
968 assert!(page.content_length > 1000);
969 }
970}