1use serde::Deserialize;
15
16use super::{fetch, html_to_text, url_guard};
17
18const INNERTUBE_PLAYER: &str = "https://www.youtube.com/youtubei/v1/player";
20const ANDROID_CLIENT_VERSION: &str = "20.10.38";
22const ANDROID_UA: &str = "com.google.android.youtube/20.10.38 (Linux; U; Android 14) gzip";
23
24#[derive(Debug, Clone)]
26pub struct Transcript {
27 pub video_id: String,
28 pub title: Option<String>,
29 pub source_url: String,
30 pub full_text: String,
31}
32
33pub fn video_id(url: &str) -> Option<String> {
35 let safe = url_guard::validate(url).ok()?;
36 let host = safe.host.to_ascii_lowercase();
37 let (path, query) = split_path_query(path_and_query(&safe));
38
39 if host == "youtu.be" || host.ends_with(".youtu.be") {
40 return clean_id(path.trim_start_matches('/'));
41 }
42 if host == "youtube.com" || host.ends_with(".youtube.com") {
43 if path.starts_with("/watch") {
44 if let Some(v) = query_param(query, "v") {
45 return clean_id(&v);
46 }
47 }
48 for prefix in ["/shorts/", "/embed/", "/v/", "/live/"] {
49 if let Some(rest) = path.strip_prefix(prefix) {
50 return clean_id(rest);
51 }
52 }
53 }
54 None
55}
56
57pub fn fetch_transcript(video_id: &str, timeout_secs: u64) -> Result<Transcript, String> {
59 let player = innertube_player(video_id, timeout_secs)?;
60
61 let tracks = player.caption_tracks();
62 if tracks.is_empty() {
63 return Err(format!(
64 "no captions available for video {video_id}{}",
65 player.unavailable_reason()
66 ));
67 }
68
69 let track = select_caption_track(&tracks);
70 let url = json3_url(&track.base_url);
71 let data = fetch::fetch(&url, fetch::DEFAULT_MAX_BYTES, timeout_secs)?;
72 if data.status >= 400 {
73 return Err(format!(
74 "failed to download transcript (HTTP {})",
75 data.status
76 ));
77 }
78
79 let full_text = parse_timedtext(&data.body_text())?;
80 if full_text.trim().is_empty() {
81 return Err(format!("transcript for video {video_id} was empty"));
82 }
83
84 Ok(Transcript {
85 video_id: video_id.to_string(),
86 title: player.title(),
87 source_url: format!("https://www.youtube.com/watch?v={video_id}"),
88 full_text,
89 })
90}
91
92fn innertube_player(video_id: &str, timeout_secs: u64) -> Result<PlayerResponse, String> {
95 let body = serde_json::json!({
96 "context": {
97 "client": {
98 "clientName": "ANDROID",
99 "clientVersion": ANDROID_CLIENT_VERSION,
100 "androidSdkVersion": 34,
101 "hl": "en"
102 }
103 },
104 "videoId": video_id
105 })
106 .to_string();
107
108 let resp = fetch::post(
109 INNERTUBE_PLAYER,
110 "application/json",
111 ANDROID_UA,
112 &body,
113 fetch::DEFAULT_MAX_BYTES,
114 timeout_secs,
115 )?;
116 if resp.status >= 400 {
117 return Err(format!("InnerTube player returned HTTP {}", resp.status));
118 }
119
120 serde_json::from_str::<PlayerResponse>(&resp.body_text())
121 .map_err(|e| format!("could not parse InnerTube player response: {e}"))
122}
123
124#[derive(Deserialize)]
125struct PlayerResponse {
126 captions: Option<CaptionsBlock>,
127 #[serde(rename = "videoDetails")]
128 video_details: Option<VideoDetails>,
129 #[serde(rename = "playabilityStatus")]
130 playability: Option<Playability>,
131}
132
133impl PlayerResponse {
134 fn caption_tracks(&self) -> Vec<CaptionTrack> {
135 self.captions
136 .as_ref()
137 .and_then(|c| c.renderer.as_ref())
138 .map(|r| r.caption_tracks.clone())
139 .unwrap_or_default()
140 }
141
142 fn title(&self) -> Option<String> {
143 self.video_details
144 .as_ref()
145 .and_then(|v| v.title.clone())
146 .filter(|t| !t.is_empty())
147 }
148
149 fn unavailable_reason(&self) -> String {
151 match self.playability.as_ref() {
152 Some(p) if p.status.as_deref().is_some_and(|s| s != "OK") => {
153 let status = p.status.as_deref().unwrap_or("");
154 let reason = p.reason.as_deref().unwrap_or("");
155 format!(
156 " ({status}{}{reason})",
157 if reason.is_empty() { "" } else { ": " }
158 )
159 }
160 _ => " (captions disabled or none published)".to_string(),
161 }
162 }
163}
164
165#[derive(Deserialize)]
166struct CaptionsBlock {
167 #[serde(rename = "playerCaptionsTracklistRenderer")]
168 renderer: Option<TracklistRenderer>,
169}
170
171#[derive(Deserialize)]
172struct TracklistRenderer {
173 #[serde(rename = "captionTracks", default)]
174 caption_tracks: Vec<CaptionTrack>,
175}
176
177#[derive(Deserialize)]
178struct VideoDetails {
179 title: Option<String>,
180}
181
182#[derive(Deserialize)]
183struct Playability {
184 status: Option<String>,
185 reason: Option<String>,
186}
187
188#[derive(Deserialize, Clone)]
191struct CaptionTrack {
192 #[serde(rename = "baseUrl")]
193 base_url: String,
194 #[serde(rename = "languageCode")]
195 language_code: Option<String>,
196 kind: Option<String>,
197}
198
199impl CaptionTrack {
200 fn is_english(&self) -> bool {
201 self.language_code
202 .as_deref()
203 .is_some_and(|c| c.starts_with("en"))
204 }
205
206 fn is_auto_generated(&self) -> bool {
207 self.kind.as_deref() == Some("asr")
208 }
209}
210
211fn select_caption_track(tracks: &[CaptionTrack]) -> &CaptionTrack {
214 tracks
215 .iter()
216 .find(|t| t.is_english() && !t.is_auto_generated())
217 .or_else(|| tracks.iter().find(|t| t.is_english()))
218 .or_else(|| tracks.iter().find(|t| !t.is_auto_generated()))
219 .unwrap_or(&tracks[0])
220}
221
222fn json3_url(base_url: &str) -> String {
224 let stripped: String = base_url
225 .split('&')
226 .filter(|seg| !seg.starts_with("fmt="))
227 .collect::<Vec<_>>()
228 .join("&");
229 format!("{stripped}&fmt=json3")
230}
231
232fn parse_timedtext(body: &str) -> Result<String, String> {
235 let trimmed = body.trim_start();
236 if trimmed.starts_with('{') {
237 parse_json3(body)
238 } else if trimmed.starts_with('<') {
239 Ok(parse_srv3_xml(body))
240 } else {
241 Err("transcript response was neither JSON3 nor srv3/XML".to_string())
242 }
243}
244
245#[derive(Deserialize)]
246struct Json3 {
247 #[serde(default)]
248 events: Vec<Json3Event>,
249}
250
251#[derive(Deserialize)]
252struct Json3Event {
253 #[serde(default)]
254 segs: Vec<Json3Seg>,
255}
256
257#[derive(Deserialize)]
258struct Json3Seg {
259 #[serde(default)]
260 utf8: String,
261}
262
263fn parse_json3(body: &str) -> Result<String, String> {
264 let parsed: Json3 =
265 serde_json::from_str(body).map_err(|e| format!("could not parse transcript json: {e}"))?;
266
267 let mut out = String::new();
268 for event in parsed.events {
269 let line: String = event.segs.iter().map(|s| s.utf8.as_str()).collect();
270 let line = line.replace('\n', " ");
271 let line = line.trim();
272 if line.is_empty() {
273 continue;
274 }
275 if !out.is_empty() {
276 out.push(' ');
277 }
278 out.push_str(line);
279 }
280 Ok(out)
281}
282
283fn parse_srv3_xml(xml: &str) -> String {
285 let mut raw = String::with_capacity(xml.len() / 2);
286 let mut in_tag = false;
287 let mut tag = String::new();
288 for c in xml.chars() {
289 match c {
290 '<' => {
291 in_tag = true;
292 tag.clear();
293 }
294 '>' => {
295 in_tag = false;
296 if tag.starts_with("/p") {
298 raw.push('\n');
299 }
300 }
301 _ if in_tag => tag.push(c),
302 _ => raw.push(c),
303 }
304 }
305
306 let decoded = html_to_text::decode_entities(&raw);
307 let mut out = String::new();
308 for line in decoded.split('\n') {
309 let line = line.trim();
310 if line.is_empty() {
311 continue;
312 }
313 if !out.is_empty() {
314 out.push(' ');
315 }
316 out.push_str(line);
317 }
318 out
319}
320
321fn path_and_query(safe: &url_guard::SafeUrl) -> &str {
324 let prefix = safe.scheme.len() + 3 + safe.authority.len();
325 safe.normalized.get(prefix..).unwrap_or("")
326}
327
328fn split_path_query(pq: &str) -> (&str, &str) {
329 let pq = pq.split('#').next().unwrap_or(pq);
330 match pq.split_once('?') {
331 Some((p, q)) => (if p.is_empty() { "/" } else { p }, q),
332 None => (if pq.is_empty() { "/" } else { pq }, ""),
333 }
334}
335
336fn query_param(query: &str, key: &str) -> Option<String> {
337 query.split('&').find_map(|pair| {
338 let (k, v) = pair.split_once('=')?;
339 if k == key {
340 Some(v.to_string())
341 } else {
342 None
343 }
344 })
345}
346
347fn clean_id(raw: &str) -> Option<String> {
348 let id: String = raw
349 .chars()
350 .take_while(|c| c.is_ascii_alphanumeric() || *c == '-' || *c == '_')
351 .collect();
352 if id.is_empty() {
353 None
354 } else {
355 Some(id)
356 }
357}
358
359#[cfg(test)]
360mod tests {
361 use super::*;
362
363 #[test]
364 fn extracts_id_from_watch_url() {
365 assert_eq!(
366 video_id("https://www.youtube.com/watch?v=dQw4w9WgXcQ&t=42"),
367 Some("dQw4w9WgXcQ".to_string())
368 );
369 }
370
371 #[test]
372 fn extracts_id_from_short_and_shorts_and_embed() {
373 assert_eq!(
374 video_id("https://youtu.be/dQw4w9WgXcQ?si=abc"),
375 Some("dQw4w9WgXcQ".to_string())
376 );
377 assert_eq!(
378 video_id("https://www.youtube.com/shorts/abc123DEF45"),
379 Some("abc123DEF45".to_string())
380 );
381 assert_eq!(
382 video_id("https://www.youtube.com/embed/xyz789ABCde"),
383 Some("xyz789ABCde".to_string())
384 );
385 }
386
387 #[test]
388 fn non_youtube_url_returns_none() {
389 assert_eq!(video_id("https://example.com/watch?v=abc"), None);
390 assert_eq!(video_id("https://vimeo.com/12345"), None);
391 }
392
393 #[test]
394 fn selects_manual_english_track_over_asr() {
395 let tracks = vec![
396 CaptionTrack {
397 base_url: "https://t/asr".into(),
398 language_code: Some("en".into()),
399 kind: Some("asr".into()),
400 },
401 CaptionTrack {
402 base_url: "https://t/manual".into(),
403 language_code: Some("en".into()),
404 kind: None,
405 },
406 CaptionTrack {
407 base_url: "https://t/de".into(),
408 language_code: Some("de".into()),
409 kind: None,
410 },
411 ];
412 assert_eq!(select_caption_track(&tracks).base_url, "https://t/manual");
413 }
414
415 #[test]
416 fn selects_any_when_no_english() {
417 let tracks = vec![CaptionTrack {
418 base_url: "https://t/fr".into(),
419 language_code: Some("fr".into()),
420 kind: Some("asr".into()),
421 }];
422 assert_eq!(select_caption_track(&tracks).base_url, "https://t/fr");
423 }
424
425 #[test]
426 fn json3_url_forces_format() {
427 assert_eq!(
428 json3_url("https://yt/api/timedtext?v=x&ei=y&fmt=srv3&hl=en"),
429 "https://yt/api/timedtext?v=x&ei=y&hl=en&fmt=json3"
430 );
431 assert_eq!(
432 json3_url("https://yt/api/timedtext?v=x"),
433 "https://yt/api/timedtext?v=x&fmt=json3"
434 );
435 }
436
437 #[test]
438 fn parses_json3_into_joined_text() {
439 let body = r#"{"events":[
440 {"tStartMs":0,"segs":[{"utf8":"Hello"},{"utf8":" world"}]},
441 {"tStartMs":1000,"segs":[{"utf8":"second\n"},{"utf8":"line"}]},
442 {"tStartMs":2000,"segs":[{"utf8":"\n"}]}
443 ]}"#;
444 assert_eq!(parse_json3(body).unwrap(), "Hello world second line");
445 }
446
447 #[test]
448 fn parses_srv3_xml_into_joined_text() {
449 let xml = r#"<?xml version="1.0" encoding="utf-8" ?><timedtext format="3">
450<body>
451<p t="0" d="1680">We're no strangers</p>
452<p t="1680" d="2000">to <s>love</s></p>
453</body></timedtext>"#;
454 assert_eq!(parse_srv3_xml(xml), "We're no strangers to love");
455 }
456
457 #[test]
458 fn parse_timedtext_dispatches_on_shape() {
459 assert!(parse_timedtext("not json or xml").is_err());
460 assert_eq!(parse_timedtext("{\"events\":[]}").unwrap(), "");
461 }
462}