1pub mod parsing;
2pub mod site;
3pub mod types;
4pub mod util;
5
6use tail_fin_common::page::ensure_on_domain;
7use tail_fin_common::BrowserSession;
8use tail_fin_common::TailFinError;
9use tokio::sync::OnceCell;
10
11pub use site::YoutubeSite;
12pub use types::{Channel, Comment, InnerTubeContext, TranscriptSegment, Video};
13pub use util::{extract_channel_id, extract_video_id};
14
15pub struct YouTubeClient {
20 session: BrowserSession,
21 inner_tube: OnceCell<InnerTubeContext>,
22}
23
24impl YouTubeClient {
25 pub fn new(session: BrowserSession) -> Self {
27 Self {
28 session,
29 inner_tube: OnceCell::new(),
30 }
31 }
32
33 async fn ensure_innertube(&self) -> Result<&InnerTubeContext, TailFinError> {
35 self.inner_tube
36 .get_or_try_init(|| async {
37 ensure_on_domain(&self.session, &["www.youtube.com"]).await?;
38 let result = self
39 .session
40 .eval(EXTRACT_INNERTUBE_JS)
41 .await
42 .map_err(TailFinError::Browser)?;
43
44 let parsed = if let Some(s) = result.as_str() {
45 serde_json::from_str::<serde_json::Value>(s)?
46 } else if result.is_object() {
47 result
48 } else {
49 return Err(TailFinError::Parse(
50 "Failed to extract InnerTube config from page".into(),
51 ));
52 };
53
54 let api_key = parsed
55 .get("apiKey")
56 .and_then(|v| v.as_str())
57 .ok_or_else(|| TailFinError::Parse("Missing INNERTUBE_API_KEY".into()))?
58 .to_string();
59 let context = parsed
60 .get("context")
61 .cloned()
62 .ok_or_else(|| TailFinError::Parse("Missing INNERTUBE_CONTEXT".into()))?;
63
64 Ok(InnerTubeContext { api_key, context })
65 })
66 .await
67 }
68
69 async fn innertube_request(
74 &self,
75 endpoint: &str,
76 extra_body: serde_json::Value,
77 ) -> Result<serde_json::Value, TailFinError> {
78 let ctx = self.ensure_innertube().await?;
79 let api_key = ctx.api_key.clone();
80 let context = ctx.context.clone();
81
82 let mut body = extra_body;
83 body.as_object_mut()
84 .ok_or_else(|| TailFinError::Parse("body must be an object".into()))?
85 .insert("context".to_string(), context);
86
87 let url = format!(
88 "https://www.youtube.com/youtubei/v1/{}?key={}&prettyPrint=false",
89 endpoint, api_key
90 );
91
92 let body_json = serde_json::to_string(&body).unwrap_or_default();
93 let url_json = serde_json::to_string(&url).unwrap_or_default();
94
95 let js = format!(
96 r#"(async () => {{
97 const resp = await fetch({url}, {{
98 method: 'POST',
99 headers: {{ 'Content-Type': 'application/json' }},
100 credentials: 'include',
101 body: {body}
102 }});
103 if (!resp.ok) return {{ __error: true, status: resp.status, statusText: resp.statusText }};
104 return await resp.json();
105 }})()"#,
106 url = url_json,
107 body = serde_json::to_string(&body_json).unwrap_or_default(),
108 );
109
110 let result = self
111 .session
112 .eval(&js)
113 .await
114 .map_err(TailFinError::Browser)?;
115
116 if result
117 .get("__error")
118 .and_then(|v| v.as_bool())
119 .unwrap_or(false)
120 {
121 let status = result.get("status").and_then(|v| v.as_u64()).unwrap_or(0);
122 let text = result
123 .get("statusText")
124 .and_then(|v| v.as_str())
125 .unwrap_or("unknown");
126 return Err(TailFinError::Api(format!("HTTP {} {}", status, text)));
127 }
128
129 Ok(result)
130 }
131
132 pub async fn search(&self, query: &str, count: usize) -> Result<Vec<Video>, TailFinError> {
134 let body = serde_json::json!({ "query": query });
135 let data = self.innertube_request("search", body).await?;
136 Ok(parsing::parse_search_results(&data, count))
137 }
138
139 pub async fn video(&self, video_id: &str) -> Result<Option<Video>, TailFinError> {
141 let body = serde_json::json!({ "videoId": video_id });
142 let data = self.innertube_request("next", body).await?;
143 Ok(parsing::parse_video_detail(&data))
144 }
145
146 pub async fn channel(&self, channel_input: &str) -> Result<Option<Channel>, TailFinError> {
148 let browse_id = if channel_input.starts_with('@') {
149 let body = serde_json::json!({
151 "url": format!("https://www.youtube.com/{}", channel_input),
152 });
153 let data = self
154 .innertube_request("navigation/resolve_url", body)
155 .await?;
156 data.pointer("/endpoint/browseEndpoint/browseId")
157 .and_then(|v| v.as_str())
158 .ok_or_else(|| {
159 TailFinError::Parse(format!(
160 "Could not resolve handle '{}' to channel ID",
161 channel_input
162 ))
163 })?
164 .to_string()
165 } else {
166 channel_input.to_string()
167 };
168
169 let body = serde_json::json!({ "browseId": browse_id });
170 let data = self.innertube_request("browse", body).await?;
171 Ok(parsing::parse_channel(&data))
172 }
173
174 pub async fn comments(
179 &self,
180 video_id: &str,
181 count: usize,
182 ) -> Result<Vec<Comment>, TailFinError> {
183 let ctx = self.ensure_innertube().await?;
184 let api_key = ctx.api_key.clone();
185 let context_json = serde_json::to_string(&ctx.context).unwrap_or_default();
186
187 let js = format!(
189 r#"(async () => {{
190 const apiKey = {api_key};
191 const context = {context};
192 // Step 1: Get continuation token
193 const nextResp = await fetch(
194 `https://www.youtube.com/youtubei/v1/next?key=${{apiKey}}&prettyPrint=false`,
195 {{
196 method: 'POST',
197 headers: {{ 'Content-Type': 'application/json' }},
198 credentials: 'include',
199 body: JSON.stringify({{ context, videoId: {video_id} }})
200 }}
201 );
202 const nextData = await nextResp.json();
203 // Find comment continuation token (targetId === 'comments-section')
204 const contents = nextData?.contents?.twoColumnWatchNextResults?.results?.results?.contents;
205 let token = null;
206 if (contents) {{
207 for (const item of contents) {{
208 if (item?.itemSectionRenderer?.targetId === 'comments-section') {{
209 token = item.itemSectionRenderer.contents?.[0]
210 ?.continuationItemRenderer?.continuationEndpoint
211 ?.continuationCommand?.token;
212 break;
213 }}
214 }}
215 }}
216 if (!token) return {{ error: 'no_token' }};
217 // Step 2: Fetch comments
218 const commResp = await fetch(
219 `https://www.youtube.com/youtubei/v1/next?key=${{apiKey}}&prettyPrint=false`,
220 {{
221 method: 'POST',
222 headers: {{ 'Content-Type': 'application/json' }},
223 credentials: 'include',
224 body: JSON.stringify({{ context, continuation: token }})
225 }}
226 );
227 return await commResp.json();
228 }})()"#,
229 api_key = serde_json::to_string(&api_key).unwrap_or_default(),
230 context = context_json,
231 video_id = serde_json::to_string(video_id).unwrap_or_default(),
232 );
233
234 let data = self
235 .session
236 .eval(&js)
237 .await
238 .map_err(TailFinError::Browser)?;
239
240 if data.get("error").is_some() {
241 return Ok(vec![]);
242 }
243
244 let mut comments = parsing::parse_comments_from_mutations(&data, count);
245 if comments.is_empty() {
246 comments = parsing::parse_comments(&data, count);
247 }
248 Ok(comments)
249 }
250
251 pub async fn trending(&self, count: usize) -> Result<Vec<Video>, TailFinError> {
253 let body = serde_json::json!({
255 "browseId": "UC4R8DWoMoI7CAwX8_LjQHig",
256 "params": "EgdsaXZldGFikgEDCKEK",
257 });
258 let data = self.innertube_request("browse", body).await?;
259 Ok(parsing::parse_trending(&data, count))
260 }
261
262 pub async fn transcript(&self, video_id: &str) -> Result<Vec<TranscriptSegment>, TailFinError> {
268 let ctx = self.ensure_innertube().await?;
269 let api_key = ctx.api_key.clone();
270
271 let body = serde_json::json!({
273 "context": {
274 "client": {
275 "clientName": "ANDROID",
276 "clientVersion": "20.10.38",
277 }
278 },
279 "videoId": video_id,
280 });
281
282 let url = format!(
283 "https://www.youtube.com/youtubei/v1/player?key={}&prettyPrint=false",
284 api_key
285 );
286 let body_json = serde_json::to_string(&body).unwrap_or_default();
287 let url_json = serde_json::to_string(&url).unwrap_or_default();
288
289 let js = format!(
290 r#"(async () => {{
291 const resp = await fetch({url}, {{
292 method: 'POST',
293 headers: {{ 'Content-Type': 'application/json' }},
294 credentials: 'include',
295 body: {body}
296 }});
297 if (!resp.ok) return {{ __error: true, status: resp.status }};
298 return await resp.json();
299 }})()"#,
300 url = url_json,
301 body = serde_json::to_string(&body_json).unwrap_or_default(),
302 );
303
304 let data = self
305 .session
306 .eval(&js)
307 .await
308 .map_err(TailFinError::Browser)?;
309 if data.get("__error").is_some() {
310 return Err(TailFinError::Api("Failed to fetch player data".into()));
311 }
312
313 let caption_url = data
315 .pointer("/captions/playerCaptionsTracklistRenderer/captionTracks")
316 .and_then(|v| v.as_array())
317 .and_then(|tracks| {
318 tracks.iter().find_map(|t| {
320 t.get("baseUrl")
321 .and_then(|v| v.as_str())
322 .map(|s| s.to_string())
323 })
324 })
325 .ok_or_else(|| TailFinError::Api("No captions available for this video".into()))?;
326
327 let xml_js = format!(
329 r#"(async () => {{
330 const resp = await fetch({}, {{ credentials: "include" }});
331 return await resp.text();
332 }})()"#,
333 serde_json::to_string(&caption_url).unwrap_or_default()
334 );
335 let xml_result = self
336 .session
337 .eval(&xml_js)
338 .await
339 .map_err(TailFinError::Browser)?;
340 let xml = xml_result.as_str().unwrap_or("");
341
342 Ok(parsing::parse_caption_xml(xml))
343 }
344
345 pub async fn subscriptions(&self, count: usize) -> Result<Vec<Channel>, TailFinError> {
347 let body = serde_json::json!({ "browseId": "FEchannels" });
348 let data = self.innertube_request("browse", body).await?;
349 Ok(parsing::parse_subscriptions(&data, count))
350 }
351}
352
353const EXTRACT_INNERTUBE_JS: &str = r#"(() => {
357 // Try ytcfg global first
358 const cfg = window.ytcfg;
359 if (cfg) {
360 const apiKey = cfg.get('INNERTUBE_API_KEY');
361 const context = cfg.get('INNERTUBE_CONTEXT');
362 if (apiKey) return { apiKey, context };
363 }
364 // Fallback: extract from inline script tags
365 const scripts = Array.from(document.querySelectorAll('script'));
366 let apiKey = null;
367 for (const s of scripts) {
368 const text = s.textContent || '';
369 const m = text.match(/"INNERTUBE_API_KEY"\s*:\s*"([^"]+)"/);
370 if (m) { apiKey = m[1]; break; }
371 }
372 if (!apiKey) return null;
373 let ctx = null;
374 for (const s of scripts) {
375 const text = s.textContent || '';
376 const m = text.match(/"INNERTUBE_CONTEXT"\s*:\s*(\{[\s\S]*?\})\s*,\s*"INNERTUBE/);
377 if (m) { try { ctx = JSON.parse(m[1]); } catch(e) {} break; }
378 }
379 return { apiKey, context: ctx };
380})()"#;