Skip to main content

tail_fin_youtube/
lib.rs

1pub mod parsing;
2pub mod site;
3pub mod types;
4pub mod util;
5
6use tail_fin_common::page::ensure_on_domain;
7use tail_fin_common::BrowserSession;
8use tail_fin_common::TailFinError;
9use tokio::sync::OnceCell;
10
11pub use site::YoutubeSite;
12pub use types::{Channel, Comment, InnerTubeContext, TranscriptSegment, Video};
13pub use util::{extract_channel_id, extract_video_id};
14
15/// Browser-based YouTube client.
16///
17/// Calls InnerTube API endpoints via browser eval fetch.
18/// InnerTube context is lazily extracted on first API call.
19pub struct YouTubeClient {
20    session: BrowserSession,
21    inner_tube: OnceCell<InnerTubeContext>,
22}
23
24impl YouTubeClient {
25    /// Wrap a browser session for YouTube API access.
26    pub fn new(session: BrowserSession) -> Self {
27        Self {
28            session,
29            inner_tube: OnceCell::new(),
30        }
31    }
32
33    /// Bootstrap InnerTube context from the page if not already cached.
34    async fn ensure_innertube(&self) -> Result<&InnerTubeContext, TailFinError> {
35        self.inner_tube
36            .get_or_try_init(|| async {
37                ensure_on_domain(&self.session, &["www.youtube.com"]).await?;
38                let result = self
39                    .session
40                    .eval(EXTRACT_INNERTUBE_JS)
41                    .await
42                    .map_err(TailFinError::Browser)?;
43
44                let parsed = if let Some(s) = result.as_str() {
45                    serde_json::from_str::<serde_json::Value>(s)?
46                } else if result.is_object() {
47                    result
48                } else {
49                    return Err(TailFinError::Parse(
50                        "Failed to extract InnerTube config from page".into(),
51                    ));
52                };
53
54                let api_key = parsed
55                    .get("apiKey")
56                    .and_then(|v| v.as_str())
57                    .ok_or_else(|| TailFinError::Parse("Missing INNERTUBE_API_KEY".into()))?
58                    .to_string();
59                let context = parsed
60                    .get("context")
61                    .cloned()
62                    .ok_or_else(|| TailFinError::Parse("Missing INNERTUBE_CONTEXT".into()))?;
63
64                Ok(InnerTubeContext { api_key, context })
65            })
66            .await
67    }
68
69    /// Make an InnerTube API call via browser eval fetch.
70    ///
71    /// Uses session.eval() directly instead of page_fetch_with_body to avoid
72    /// timeout issues on sequential calls.
73    async fn innertube_request(
74        &self,
75        endpoint: &str,
76        extra_body: serde_json::Value,
77    ) -> Result<serde_json::Value, TailFinError> {
78        let ctx = self.ensure_innertube().await?;
79        let api_key = ctx.api_key.clone();
80        let context = ctx.context.clone();
81
82        let mut body = extra_body;
83        body.as_object_mut()
84            .ok_or_else(|| TailFinError::Parse("body must be an object".into()))?
85            .insert("context".to_string(), context);
86
87        let url = format!(
88            "https://www.youtube.com/youtubei/v1/{}?key={}&prettyPrint=false",
89            endpoint, api_key
90        );
91
92        let body_json = serde_json::to_string(&body).unwrap_or_default();
93        let url_json = serde_json::to_string(&url).unwrap_or_default();
94
95        let js = format!(
96            r#"(async () => {{
97                const resp = await fetch({url}, {{
98                    method: 'POST',
99                    headers: {{ 'Content-Type': 'application/json' }},
100                    credentials: 'include',
101                    body: {body}
102                }});
103                if (!resp.ok) return {{ __error: true, status: resp.status, statusText: resp.statusText }};
104                return await resp.json();
105            }})()"#,
106            url = url_json,
107            body = serde_json::to_string(&body_json).unwrap_or_default(),
108        );
109
110        let result = self
111            .session
112            .eval(&js)
113            .await
114            .map_err(TailFinError::Browser)?;
115
116        if result
117            .get("__error")
118            .and_then(|v| v.as_bool())
119            .unwrap_or(false)
120        {
121            let status = result.get("status").and_then(|v| v.as_u64()).unwrap_or(0);
122            let text = result
123                .get("statusText")
124                .and_then(|v| v.as_str())
125                .unwrap_or("unknown");
126            return Err(TailFinError::Api(format!("HTTP {} {}", status, text)));
127        }
128
129        Ok(result)
130    }
131
132    /// Search for videos.
133    pub async fn search(&self, query: &str, count: usize) -> Result<Vec<Video>, TailFinError> {
134        let body = serde_json::json!({ "query": query });
135        let data = self.innertube_request("search", body).await?;
136        Ok(parsing::parse_search_results(&data, count))
137    }
138
139    /// Get video details.
140    pub async fn video(&self, video_id: &str) -> Result<Option<Video>, TailFinError> {
141        let body = serde_json::json!({ "videoId": video_id });
142        let data = self.innertube_request("next", body).await?;
143        Ok(parsing::parse_video_detail(&data))
144    }
145
146    /// Get channel info. If input starts with `@`, resolves via resolve_url first.
147    pub async fn channel(&self, channel_input: &str) -> Result<Option<Channel>, TailFinError> {
148        let browse_id = if channel_input.starts_with('@') {
149            // Resolve @handle to browseId via resolve_url endpoint
150            let body = serde_json::json!({
151                "url": format!("https://www.youtube.com/{}", channel_input),
152            });
153            let data = self
154                .innertube_request("navigation/resolve_url", body)
155                .await?;
156            data.pointer("/endpoint/browseEndpoint/browseId")
157                .and_then(|v| v.as_str())
158                .ok_or_else(|| {
159                    TailFinError::Parse(format!(
160                        "Could not resolve handle '{}' to channel ID",
161                        channel_input
162                    ))
163                })?
164                .to_string()
165        } else {
166            channel_input.to_string()
167        };
168
169        let body = serde_json::json!({ "browseId": browse_id });
170        let data = self.innertube_request("browse", body).await?;
171        Ok(parsing::parse_channel(&data))
172    }
173
174    /// Get video comments.
175    ///
176    /// Follows OpenCLI's approach: two-step fetch in a single browser eval
177    /// to avoid page_fetch timeout issues on sequential calls.
178    pub async fn comments(
179        &self,
180        video_id: &str,
181        count: usize,
182    ) -> Result<Vec<Comment>, TailFinError> {
183        let ctx = self.ensure_innertube().await?;
184        let api_key = ctx.api_key.clone();
185        let context_json = serde_json::to_string(&ctx.context).unwrap_or_default();
186
187        // Do both API calls in a single eval to avoid sequential page_fetch timeouts
188        let js = format!(
189            r#"(async () => {{
190                const apiKey = {api_key};
191                const context = {context};
192                // Step 1: Get continuation token
193                const nextResp = await fetch(
194                    `https://www.youtube.com/youtubei/v1/next?key=${{apiKey}}&prettyPrint=false`,
195                    {{
196                        method: 'POST',
197                        headers: {{ 'Content-Type': 'application/json' }},
198                        credentials: 'include',
199                        body: JSON.stringify({{ context, videoId: {video_id} }})
200                    }}
201                );
202                const nextData = await nextResp.json();
203                // Find comment continuation token (targetId === 'comments-section')
204                const contents = nextData?.contents?.twoColumnWatchNextResults?.results?.results?.contents;
205                let token = null;
206                if (contents) {{
207                    for (const item of contents) {{
208                        if (item?.itemSectionRenderer?.targetId === 'comments-section') {{
209                            token = item.itemSectionRenderer.contents?.[0]
210                                ?.continuationItemRenderer?.continuationEndpoint
211                                ?.continuationCommand?.token;
212                            break;
213                        }}
214                    }}
215                }}
216                if (!token) return {{ error: 'no_token' }};
217                // Step 2: Fetch comments
218                const commResp = await fetch(
219                    `https://www.youtube.com/youtubei/v1/next?key=${{apiKey}}&prettyPrint=false`,
220                    {{
221                        method: 'POST',
222                        headers: {{ 'Content-Type': 'application/json' }},
223                        credentials: 'include',
224                        body: JSON.stringify({{ context, continuation: token }})
225                    }}
226                );
227                return await commResp.json();
228            }})()"#,
229            api_key = serde_json::to_string(&api_key).unwrap_or_default(),
230            context = context_json,
231            video_id = serde_json::to_string(video_id).unwrap_or_default(),
232        );
233
234        let data = self
235            .session
236            .eval(&js)
237            .await
238            .map_err(TailFinError::Browser)?;
239
240        if data.get("error").is_some() {
241            return Ok(vec![]);
242        }
243
244        let mut comments = parsing::parse_comments_from_mutations(&data, count);
245        if comments.is_empty() {
246            comments = parsing::parse_comments(&data, count);
247        }
248        Ok(comments)
249    }
250
251    /// Get trending videos (via YouTube's trending channel).
252    pub async fn trending(&self, count: usize) -> Result<Vec<Video>, TailFinError> {
253        // YouTube removed FEtrending; use the trending channel browseId instead
254        let body = serde_json::json!({
255            "browseId": "UC4R8DWoMoI7CAwX8_LjQHig",
256            "params": "EgdsaXZldGFikgEDCKEK",
257        });
258        let data = self.innertube_request("browse", body).await?;
259        Ok(parsing::parse_trending(&data, count))
260    }
261
262    /// Get video transcript.
263    ///
264    /// Follows OpenCLI's approach: call /youtubei/v1/player with Android client
265    /// context to get caption track URLs (bypasses PoToken), then fetch the
266    /// caption XML and parse timestamps + text.
267    pub async fn transcript(&self, video_id: &str) -> Result<Vec<TranscriptSegment>, TailFinError> {
268        let ctx = self.ensure_innertube().await?;
269        let api_key = ctx.api_key.clone();
270
271        // Use Android client context to bypass PoToken requirement
272        let body = serde_json::json!({
273            "context": {
274                "client": {
275                    "clientName": "ANDROID",
276                    "clientVersion": "20.10.38",
277                }
278            },
279            "videoId": video_id,
280        });
281
282        let url = format!(
283            "https://www.youtube.com/youtubei/v1/player?key={}&prettyPrint=false",
284            api_key
285        );
286        let body_json = serde_json::to_string(&body).unwrap_or_default();
287        let url_json = serde_json::to_string(&url).unwrap_or_default();
288
289        let js = format!(
290            r#"(async () => {{
291                const resp = await fetch({url}, {{
292                    method: 'POST',
293                    headers: {{ 'Content-Type': 'application/json' }},
294                    credentials: 'include',
295                    body: {body}
296                }});
297                if (!resp.ok) return {{ __error: true, status: resp.status }};
298                return await resp.json();
299            }})()"#,
300            url = url_json,
301            body = serde_json::to_string(&body_json).unwrap_or_default(),
302        );
303
304        let data = self
305            .session
306            .eval(&js)
307            .await
308            .map_err(TailFinError::Browser)?;
309        if data.get("__error").is_some() {
310            return Err(TailFinError::Api("Failed to fetch player data".into()));
311        }
312
313        // Extract caption track URL from player response
314        let caption_url = data
315            .pointer("/captions/playerCaptionsTracklistRenderer/captionTracks")
316            .and_then(|v| v.as_array())
317            .and_then(|tracks| {
318                // Prefer manual captions, fallback to auto-generated
319                tracks.iter().find_map(|t| {
320                    t.get("baseUrl")
321                        .and_then(|v| v.as_str())
322                        .map(|s| s.to_string())
323                })
324            })
325            .ok_or_else(|| TailFinError::Api("No captions available for this video".into()))?;
326
327        // Fetch the caption XML via browser
328        let xml_js = format!(
329            r#"(async () => {{
330                const resp = await fetch({}, {{ credentials: "include" }});
331                return await resp.text();
332            }})()"#,
333            serde_json::to_string(&caption_url).unwrap_or_default()
334        );
335        let xml_result = self
336            .session
337            .eval(&xml_js)
338            .await
339            .map_err(TailFinError::Browser)?;
340        let xml = xml_result.as_str().unwrap_or("");
341
342        Ok(parsing::parse_caption_xml(xml))
343    }
344
345    /// Get subscriptions (requires login).
346    pub async fn subscriptions(&self, count: usize) -> Result<Vec<Channel>, TailFinError> {
347        let body = serde_json::json!({ "browseId": "FEchannels" });
348        let data = self.innertube_request("browse", body).await?;
349        Ok(parsing::parse_subscriptions(&data, count))
350    }
351}
352
353/// JavaScript to extract InnerTube API key and context from YouTube page.
354///
355/// Tries `window.ytcfg` first, then falls back to parsing inline `<script>` tags.
356const EXTRACT_INNERTUBE_JS: &str = r#"(() => {
357    // Try ytcfg global first
358    const cfg = window.ytcfg;
359    if (cfg) {
360        const apiKey = cfg.get('INNERTUBE_API_KEY');
361        const context = cfg.get('INNERTUBE_CONTEXT');
362        if (apiKey) return { apiKey, context };
363    }
364    // Fallback: extract from inline script tags
365    const scripts = Array.from(document.querySelectorAll('script'));
366    let apiKey = null;
367    for (const s of scripts) {
368        const text = s.textContent || '';
369        const m = text.match(/"INNERTUBE_API_KEY"\s*:\s*"([^"]+)"/);
370        if (m) { apiKey = m[1]; break; }
371    }
372    if (!apiKey) return null;
373    let ctx = null;
374    for (const s of scripts) {
375        const text = s.textContent || '';
376        const m = text.match(/"INNERTUBE_CONTEXT"\s*:\s*(\{[\s\S]*?\})\s*,\s*"INNERTUBE/);
377        if (m) { try { ctx = JSON.parse(m[1]); } catch(e) {} break; }
378    }
379    return { apiKey, context: ctx };
380})()"#;