yt_transcript_rs/api.rs
1use reqwest::Client;
2use std::path::Path;
3use std::sync::Arc;
4
5use crate::cookie_jar_loader::CookieJarLoader;
6#[cfg(feature = "ci")]
7use crate::errors::CouldNotRetrieveTranscriptReason;
8use crate::errors::{CookieError, CouldNotRetrieveTranscript};
9use crate::models::{MicroformatData, VideoDetails};
10use crate::proxies::ProxyConfig;
11#[cfg(not(feature = "ci"))]
12use crate::video_data_fetcher::VideoDataFetcher;
13use crate::{FetchedTranscript, TranscriptList};
14
15/// # YouTubeTranscriptApi
16///
17/// The main interface for retrieving YouTube video transcripts and metadata.
18///
19/// This API provides methods to:
20/// - Fetch transcripts from YouTube videos in various languages
21/// - List all available transcript languages for a video
22/// - Retrieve detailed video metadata
23///
24/// The API supports advanced features like:
25/// - Custom HTTP clients and proxies for handling geo-restrictions
26/// - Cookie management for accessing restricted content
27/// - Preserving text formatting in transcripts
28///
29/// ## Simple Usage Example
30///
31/// ```rust,no_run
32/// use yt_transcript_rs::api::YouTubeTranscriptApi;
33///
34/// #[tokio::main]
35/// async fn main() -> Result<(), Box<dyn std::error::Error>> {
36/// // Create a new API instance with default settings
37/// let api = YouTubeTranscriptApi::new(None, None, None)?;
38///
39/// // Fetch an English transcript
40/// let transcript = api.fetch_transcript(
41/// "dQw4w9WgXcQ", // Video ID
42/// &["en"], // Preferred languages
43/// false // Don't preserve formatting
44/// ).await?;
45///
46/// // Print each snippet of the transcript
47/// for snippet in transcript.parts() {
48/// println!("[{:.1}s]: {}", snippet.start, snippet.text);
49/// }
50///
51/// Ok(())
52/// }
53/// ```
54#[derive(Clone)]
55pub struct YouTubeTranscriptApi {
56 /// The internal data fetcher used to retrieve information from YouTube
57 #[cfg(not(feature = "ci"))]
58 fetcher: Arc<VideoDataFetcher>,
59 #[cfg(feature = "ci")]
60 client: Client,
61}
62
63impl YouTubeTranscriptApi {
64 /// Creates a new YouTube Transcript API instance.
65 ///
66 /// This method initializes an API instance with optional customizations for
67 /// cookies, proxies, and HTTP client settings.
68 ///
69 /// # Parameters
70 ///
71 /// * `cookie_path` - Optional path to a Netscape-format cookie file for authenticated requests
72 /// * `proxy_config` - Optional proxy configuration for routing requests through a proxy service
73 /// * `http_client` - Optional pre-configured HTTP client to use instead of the default one
74 ///
75 /// # Returns
76 ///
77 /// * `Result<Self, CookieError>` - A new API instance or a cookie-related error
78 ///
79 /// # Errors
80 ///
81 /// This function will return an error if:
82 /// - The cookie file exists but cannot be read or parsed
83 /// - The cookie file is not in the expected Netscape format
84 ///
85 /// # Examples
86 ///
87 /// ## Basic usage with default settings
88 ///
89 /// ```rust,no_run
90 /// # use yt_transcript_rs::api::YouTubeTranscriptApi;
91 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
92 /// let api = YouTubeTranscriptApi::new(None, None, None)?;
93 /// # Ok(())
94 /// # }
95 /// ```
96 ///
97 /// ## Using a cookie file for authenticated access
98 ///
99 /// ```rust,no_run
100 /// # use std::path::Path;
101 /// # use yt_transcript_rs::api::YouTubeTranscriptApi;
102 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
103 /// let cookie_path = Path::new("path/to/cookies.txt");
104 /// let api = YouTubeTranscriptApi::new(Some(&cookie_path), None, None)?;
105 /// # Ok(())
106 /// # }
107 /// ```
108 ///
109 /// ## Using a proxy to bypass geographical restrictions
110 ///
111 /// ```rust,no_run
112 /// # use yt_transcript_rs::api::YouTubeTranscriptApi;
113 /// # use yt_transcript_rs::proxies::GenericProxyConfig;
114 /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
115 /// // Create a proxy configuration
116 /// let proxy = GenericProxyConfig::new(
117 /// Some("http://proxy.example.com:8080".to_string()),
118 /// None
119 /// )?;
120 ///
121 /// let api = YouTubeTranscriptApi::new(
122 /// None,
123 /// Some(Box::new(proxy)),
124 /// None
125 /// )?;
126 /// # Ok(())
127 /// # }
128 /// ```
129 pub fn new(
130 cookie_path: Option<&Path>,
131 proxy_config: Option<Box<dyn ProxyConfig + Send + Sync>>,
132 http_client: Option<Client>,
133 ) -> Result<Self, CookieError> {
134 let client = match http_client {
135 Some(client) => client,
136 None => {
137 let mut builder = Client::builder()
138 .user_agent("Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
139 .default_headers({
140 let mut headers = reqwest::header::HeaderMap::new();
141 headers.insert(
142 reqwest::header::ACCEPT_LANGUAGE,
143 reqwest::header::HeaderValue::from_static("en-US"),
144 );
145 headers
146 });
147
148 // Add cookie jar if needed
149 if let Some(cookie_path) = cookie_path {
150 let cookie_jar = CookieJarLoader::load_cookie_jar(cookie_path)?;
151 let cookie_jar = Arc::new(cookie_jar);
152 builder = builder.cookie_store(true).cookie_provider(cookie_jar);
153 }
154
155 // Add proxy configuration if needed
156 if let Some(proxy_config_ref) = &proxy_config {
157 // Convert the proxy configuration to a map first to avoid borrowing issues
158 let proxy_map = proxy_config_ref.to_requests_dict();
159
160 let proxies = reqwest::Proxy::custom(move |url| {
161 if url.scheme() == "http" {
162 if let Some(http_proxy) = proxy_map.get("http") {
163 return Some(http_proxy.clone());
164 }
165 } else if url.scheme() == "https" {
166 if let Some(https_proxy) = proxy_map.get("https") {
167 return Some(https_proxy.clone());
168 }
169 }
170
171 None
172 });
173
174 builder = builder.proxy(proxies);
175
176 // Disable keep-alive if needed
177 if proxy_config_ref.prevent_keeping_connections_alive() {
178 builder = builder.connection_verbose(true).tcp_keepalive(None);
179
180 let mut headers = reqwest::header::HeaderMap::new();
181 headers.insert(
182 reqwest::header::CONNECTION,
183 reqwest::header::HeaderValue::from_static("close"),
184 );
185 builder = builder.default_headers(headers);
186 }
187 }
188
189 builder.build().unwrap()
190 }
191 };
192
193 #[cfg(not(feature = "ci"))]
194 let fetcher = Arc::new(VideoDataFetcher::new(client.clone(), proxy_config));
195
196 Ok(Self {
197 #[cfg(not(feature = "ci"))]
198 fetcher,
199 #[cfg(feature = "ci")]
200 client,
201 })
202 }
203
204 /// Fetches a transcript for a YouTube video in the specified languages.
205 ///
206 /// This method attempts to retrieve a transcript in the first available language
207 /// from the provided list of language preferences. If none of the specified languages
208 /// are available, an error is returned.
209 ///
210 /// # Parameters
211 ///
212 /// * `video_id` - The YouTube video ID (e.g., "dQw4w9WgXcQ" from https://www.youtube.com/watch?v=dQw4w9WgXcQ)
213 /// * `languages` - A list of language codes in order of preference (e.g., ["en", "es", "fr"])
214 /// * `preserve_formatting` - Whether to preserve HTML formatting in the transcript text
215 ///
216 /// # Returns
217 ///
218 /// * `Result<FetchedTranscript, CouldNotRetrieveTranscript>` - The transcript or an error
219 ///
220 /// # Errors
221 ///
222 /// This method will return an error if:
223 /// - The video does not exist or is private
224 /// - The video has no transcripts available
225 /// - None of the requested languages are available
226 /// - Network issues prevent fetching the transcript
227 ///
228 /// # Examples
229 ///
230 /// ## Basic usage - get English transcript
231 ///
232 /// ```rust,no_run
233 /// # use yt_transcript_rs::api::YouTubeTranscriptApi;
234 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
235 /// let api = YouTubeTranscriptApi::new(None, None, None)?;
236 ///
237 /// // Fetch English transcript
238 /// let transcript = api.fetch_transcript(
239 /// "dQw4w9WgXcQ", // Video ID
240 /// &["en"], // Try English
241 /// false // Don't preserve formatting
242 /// ).await?;
243 ///
244 /// println!("Full transcript text: {}", transcript.text());
245 /// # Ok(())
246 /// # }
247 /// ```
248 ///
249 /// ## Multiple language preferences with formatting preserved
250 ///
251 /// ```rust,no_run
252 /// # use yt_transcript_rs::api::YouTubeTranscriptApi;
253 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
254 /// let api = YouTubeTranscriptApi::new(None, None, None)?;
255 ///
256 /// // Try English first, then Spanish, then auto-generated English
257 /// let transcript = api.fetch_transcript(
258 /// "dQw4w9WgXcQ",
259 /// &["en", "es", "en-US"],
260 /// true // Preserve formatting like <b>bold</b> text
261 /// ).await?;
262 ///
263 /// // Print each segment with timing information
264 /// for snippet in transcript.parts() {
265 /// println!("[{:.1}s-{:.1}s]: {}",
266 /// snippet.start,
267 /// snippet.start + snippet.duration,
268 /// snippet.text);
269 /// }
270 /// # Ok(())
271 /// # }
272 /// ```
273 #[cfg(feature = "ci")]
274 pub async fn fetch_transcript(
275 &self,
276 video_id: &str,
277 languages: &[&str],
278 _preserve_formatting: bool,
279 ) -> Result<FetchedTranscript, CouldNotRetrieveTranscript> {
280 if video_id == crate::tests::test_utils::NON_EXISTENT_VIDEO_ID {
281 return Err(CouldNotRetrieveTranscript {
282 video_id: video_id.to_string(),
283 reason: Some(CouldNotRetrieveTranscriptReason::VideoUnavailable),
284 });
285 }
286
287 let transcript =
288 crate::tests::mocks::create_mock_fetched_transcript(video_id, languages[0]);
289 Ok(transcript)
290 }
291
292 #[cfg(not(feature = "ci"))]
293 pub async fn fetch_transcript(
294 &self,
295 video_id: &str,
296 languages: &[&str],
297 preserve_formatting: bool,
298 ) -> Result<FetchedTranscript, CouldNotRetrieveTranscript> {
299 let transcript_list = self.list_transcripts(video_id).await?;
300 let transcript = transcript_list.find_transcript(languages)?;
301 transcript.fetch(preserve_formatting).await
302 }
303
304 /// Lists all available transcripts for a YouTube video.
305 ///
306 /// This method retrieves information about all available transcripts for a video,
307 /// including both manual and automatically generated captions in all languages.
308 ///
309 /// # Parameters
310 ///
311 /// * `video_id` - The YouTube video ID (e.g., "dQw4w9WgXcQ")
312 ///
313 /// # Returns
314 ///
315 /// * `Result<TranscriptList, CouldNotRetrieveTranscript>` - A list of available transcripts or an error
316 ///
317 /// # Errors
318 ///
319 /// This method will return an error if:
320 /// - The video does not exist or is private
321 /// - The video has no transcripts available
322 /// - Network issues prevent fetching the transcript list
323 ///
324 /// # Examples
325 ///
326 /// ```rust,no_run
327 /// # use yt_transcript_rs::api::YouTubeTranscriptApi;
328 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
329 /// let api = YouTubeTranscriptApi::new(None, None, None)?;
330 ///
331 /// // Get all available transcripts
332 /// let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
333 ///
334 /// // Print information about each available transcript
335 /// for transcript in transcript_list.transcripts() {
336 /// println!("Language: {} ({}) - {} generated",
337 /// transcript.language(),
338 /// transcript.language_code(),
339 /// if transcript.is_generated() { "Auto" } else { "Manually" });
340 /// }
341 /// # Ok(())
342 /// # }
343 /// ```
344 #[cfg(feature = "ci")]
345 pub async fn list_transcripts(
346 &self,
347 video_id: &str,
348 ) -> Result<TranscriptList, CouldNotRetrieveTranscript> {
349 // For non-existent video ID, return an error
350 if video_id == crate::tests::test_utils::NON_EXISTENT_VIDEO_ID {
351 return Err(CouldNotRetrieveTranscript {
352 video_id: video_id.to_string(),
353 reason: Some(CouldNotRetrieveTranscriptReason::VideoUnavailable),
354 });
355 }
356
357 // Return mock transcript list
358 Ok(crate::tests::mocks::create_mock_transcript_list(
359 self.client.clone(),
360 ))
361 }
362
363 #[cfg(not(feature = "ci"))]
364 pub async fn list_transcripts(
365 &self,
366 video_id: &str,
367 ) -> Result<TranscriptList, CouldNotRetrieveTranscript> {
368 self.fetcher.fetch_transcript_list(video_id).await
369 }
370
371 /// Fetches detailed metadata about a YouTube video.
372 ///
373 /// This method retrieves comprehensive information about a video, including its
374 /// title, author, view count, description, thumbnails, and other metadata.
375 ///
376 /// # Parameters
377 ///
378 /// * `video_id` - The YouTube video ID (e.g., "dQw4w9WgXcQ")
379 ///
380 /// # Returns
381 ///
382 /// * `Result<VideoDetails, CouldNotRetrieveTranscript>` - Video details or an error
383 ///
384 /// # Errors
385 ///
386 /// This method will return an error if:
387 /// - The video does not exist or is private
388 /// - Network issues prevent fetching the video details
389 /// - The YouTube page structure has changed and details cannot be extracted
390 ///
391 /// # Examples
392 ///
393 /// ```rust,no_run
394 /// # use yt_transcript_rs::api::YouTubeTranscriptApi;
395 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
396 /// let api = YouTubeTranscriptApi::new(None, None, None)?;
397 ///
398 /// // Fetch details about a video
399 /// let details = api.fetch_video_details("dQw4w9WgXcQ").await?;
400 ///
401 /// // Print basic information
402 /// println!("Title: {}", details.title);
403 /// println!("Channel: {}", details.author);
404 /// println!("Views: {}", details.view_count);
405 /// println!("Duration: {} seconds", details.length_seconds);
406 ///
407 /// // Print keywords if available
408 /// if let Some(keywords) = &details.keywords {
409 /// println!("Keywords: {}", keywords.join(", "));
410 /// }
411 ///
412 /// // Get the highest quality thumbnail
413 /// if let Some(best_thumb) = details.thumbnails.iter()
414 /// .max_by_key(|t| t.width * t.height) {
415 /// println!("Best thumbnail: {} ({}x{})",
416 /// best_thumb.url, best_thumb.width, best_thumb.height);
417 /// }
418 /// # Ok(())
419 /// # }
420 /// ```
421 #[cfg(feature = "ci")]
422 pub async fn fetch_video_details(
423 &self,
424 video_id: &str,
425 ) -> Result<VideoDetails, CouldNotRetrieveTranscript> {
426 // For non-existent video ID, return an error
427 if video_id == crate::tests::test_utils::NON_EXISTENT_VIDEO_ID {
428 return Err(CouldNotRetrieveTranscript {
429 video_id: video_id.to_string(),
430 reason: Some(CouldNotRetrieveTranscriptReason::VideoUnavailable),
431 });
432 }
433
434 // Return mock data
435 Ok(crate::tests::mocks::create_mock_video_details())
436 }
437
438 #[cfg(not(feature = "ci"))]
439 pub async fn fetch_video_details(
440 &self,
441 video_id: &str,
442 ) -> Result<VideoDetails, CouldNotRetrieveTranscript> {
443 self.fetcher.fetch_video_details(video_id).await
444 }
445
446 /// Fetches microformat data for a YouTube video.
447 ///
448 /// This method retrieves additional metadata about a video that's not included
449 /// in the main video details, such as available countries, category, and embed information.
450 ///
451 /// # Parameters
452 ///
453 /// * `video_id` - The YouTube video ID (e.g., "dQw4w9WgXcQ")
454 ///
455 /// # Returns
456 ///
457 /// * `Result<MicroformatData, CouldNotRetrieveTranscript>` - Microformat data or an error
458 ///
459 /// # Errors
460 ///
461 /// This method will return an error if:
462 /// - The video does not exist or is private
463 /// - Network issues prevent fetching the data
464 /// - The YouTube page structure has changed and data cannot be extracted
465 ///
466 /// # Examples
467 ///
468 /// ```rust,no_run
469 /// # use yt_transcript_rs::api::YouTubeTranscriptApi;
470 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
471 /// let api = YouTubeTranscriptApi::new(None, None, None)?;
472 ///
473 /// // Fetch microformat data about a video
474 /// let microformat = api.fetch_microformat("dQw4w9WgXcQ").await?;
475 ///
476 /// // Check if the video is unlisted
477 /// if let Some(is_unlisted) = microformat.is_unlisted {
478 /// println!("Video is unlisted: {}", is_unlisted);
479 /// }
480 ///
481 /// // Get video category
482 /// if let Some(category) = microformat.category {
483 /// println!("Video category: {}", category);
484 /// }
485 ///
486 /// // Check availability by country
487 /// if let Some(countries) = microformat.available_countries {
488 /// println!("Video available in {} countries", countries.len());
489 /// if countries.contains(&"US".to_string()) {
490 /// println!("Video is available in the United States");
491 /// }
492 /// }
493 /// # Ok(())
494 /// # }
495 /// ```
496 #[cfg(feature = "ci")]
497 pub async fn fetch_microformat(
498 &self,
499 video_id: &str,
500 ) -> Result<MicroformatData, CouldNotRetrieveTranscript> {
501 // For non-existent video ID, return an error
502 if video_id == crate::tests::test_utils::NON_EXISTENT_VIDEO_ID {
503 return Err(CouldNotRetrieveTranscript {
504 video_id: video_id.to_string(),
505 reason: Some(CouldNotRetrieveTranscriptReason::VideoUnavailable),
506 });
507 }
508
509 // Return mock data
510 Ok(crate::tests::mocks::create_mock_microformat_data())
511 }
512
513 #[cfg(not(feature = "ci"))]
514 pub async fn fetch_microformat(
515 &self,
516 video_id: &str,
517 ) -> Result<MicroformatData, CouldNotRetrieveTranscript> {
518 self.fetcher.fetch_microformat(video_id).await
519 }
520}