yt_transcript_rs/
innertube_client.rs

1use crate::errors::{CouldNotRetrieveTranscript, CouldNotRetrieveTranscriptReason};
2use reqwest::Client;
3use serde_json::{json, Value};
4
5/// InnerTube API client for fetching YouTube transcript data
6///
7/// This client uses YouTube's internal InnerTube API instead of the legacy
8/// transcript URLs to fetch caption data. This approach is more reliable
9/// and doesn't require cookie authentication for public videos.
10pub struct InnerTubeClient {
11    client: Client,
12}
13
14impl InnerTubeClient {
15    pub fn new(client: Client) -> Self {
16        Self { client }
17    }
18
19    /// Fetch transcript data using YouTube's InnerTube API
20    pub async fn get_transcript_data(
21        &self,
22        video_id: &str,
23    ) -> Result<Value, CouldNotRetrieveTranscript> {
24        let url = "https://www.youtube.com/youtubei/v1/get_transcript";
25
26        let payload = json!({
27            "context": {
28                "client": {
29                    "clientName": "WEB",
30                    "clientVersion": "2.20231219.04.00",
31                    "hl": "en",
32                    "gl": "US"
33                }
34            },
35            "params": self.encode_transcript_params(video_id)
36        });
37
38        let response = self.client
39            .post(url)
40            .json(&payload)
41            .header("Content-Type", "application/json")
42            .header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
43            .send()
44            .await
45            .map_err(|e| CouldNotRetrieveTranscript {
46                video_id: video_id.to_string(),
47                reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
48                    format!("InnerTube API request failed: {}", e)
49                )),
50            })?;
51
52        if !response.status().is_success() {
53            return Err(CouldNotRetrieveTranscript {
54                video_id: video_id.to_string(),
55                reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
56                    format!("InnerTube API returned status: {}", response.status()),
57                )),
58            });
59        }
60
61        let data: Value = response
62            .json()
63            .await
64            .map_err(|e| CouldNotRetrieveTranscript {
65                video_id: video_id.to_string(),
66                reason: Some(CouldNotRetrieveTranscriptReason::YouTubeDataUnparsable(
67                    format!("Failed to parse InnerTube response: {}", e),
68                )),
69            })?;
70
71        Ok(data)
72    }
73
74    /// Encode transcript parameters for InnerTube API
75    /// This is a simplified version - the actual encoding may be more complex
76    fn encode_transcript_params(&self, video_id: &str) -> String {
77        // For now, we'll try a simple approach
78        // In the real implementation, this would need proper base64 encoding
79        // of the video ID and other parameters
80        use base64::{engine::general_purpose, Engine as _};
81
82        let params = json!({
83            "videoId": video_id
84        });
85
86        general_purpose::STANDARD.encode(params.to_string())
87    }
88
89    /// Alternative approach: try to get transcript list first
90    pub async fn get_transcript_list(
91        &self,
92        video_id: &str,
93    ) -> Result<Value, CouldNotRetrieveTranscript> {
94        // Try the player API first to get available captions
95        let url = "https://www.youtube.com/youtubei/v1/player";
96
97        let payload = json!({
98            "context": {
99                "client": {
100                    "clientName": "WEB",
101                    "clientVersion": "2.20231219.04.00",
102                    "hl": "en",
103                    "gl": "US"
104                }
105            },
106            "videoId": video_id
107        });
108
109        let response = self.client
110            .post(url)
111            .json(&payload)
112            .header("Content-Type", "application/json")
113            .header("User-Agent", "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
114            .send()
115            .await
116            .map_err(|e| CouldNotRetrieveTranscript {
117                video_id: video_id.to_string(),
118                reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
119                    format!("InnerTube player API request failed: {}", e)
120                )),
121            })?;
122
123        if !response.status().is_success() {
124            return Err(CouldNotRetrieveTranscript {
125                video_id: video_id.to_string(),
126                reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
127                    format!(
128                        "InnerTube player API returned status: {}",
129                        response.status()
130                    ),
131                )),
132            });
133        }
134
135        let data: Value = response
136            .json()
137            .await
138            .map_err(|e| CouldNotRetrieveTranscript {
139                video_id: video_id.to_string(),
140                reason: Some(CouldNotRetrieveTranscriptReason::YouTubeDataUnparsable(
141                    format!("Failed to parse InnerTube player response: {}", e),
142                )),
143            })?;
144
145        Ok(data)
146    }
147}