yt_transcript_rs/
transcript.rs

1use reqwest::Client;
2use std::collections::HashMap;
3use std::fmt;
4
5use crate::errors::{CouldNotRetrieveTranscript, CouldNotRetrieveTranscriptReason};
6use crate::fetched_transcript::FetchedTranscript;
7use crate::models::TranslationLanguage;
8use crate::transcript_parser::TranscriptParser;
9
10/// # Transcript
11///
12/// Represents a YouTube transcript that can be fetched or translated.
13///
14/// This struct contains the metadata and access URLs for a transcript but not
15/// the actual transcript text content. It serves as a handle to retrieve the
16/// full transcript text when needed.
17///
18/// A `Transcript` object can represent:
19/// - A native transcript in its original language
20/// - A translatable transcript that can be converted to other languages
21/// - A manually created transcript (more accurate, created by humans)
22/// - An automatically generated transcript (created by YouTube's speech recognition)
23///
24/// ## Usage Example
25///
26/// ```rust,no_run
27/// # use yt_transcript_rs::YouTubeTranscriptApi;
28/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
29/// let api = YouTubeTranscriptApi::new(None, None, None)?;
30/// let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
31///
32/// // Find an English transcript
33/// let transcript = transcript_list.find_transcript(&["en"])?;
34///
35/// // Check if it can be translated
36/// if transcript.is_translatable() {
37///     // Translate to Spanish
38///     let spanish = transcript.translate("es")?;
39///     
40///     // Fetch the translated content
41///     let fetched = spanish.fetch(false).await?;
42///     println!("Spanish transcript: {}", fetched.text());
43/// }
44///
45/// // Or fetch the original transcript
46/// let fetched = transcript.fetch(false).await?;
47/// println!("Original transcript: {}", fetched.text());
48/// # Ok(())
49/// # }
50/// ```
51#[derive(Debug, Clone)]
52pub struct Transcript {
53    /// HTTP client for making requests to YouTube
54    pub client: Client,
55
56    /// The YouTube video ID this transcript belongs to
57    pub video_id: String,
58
59    /// URL to fetch the transcript content from YouTube
60    pub url: String,
61
62    /// Full human-readable language name (e.g., "English")
63    pub language: String,
64
65    /// Language code (e.g., "en", "en-US", "es")
66    pub language_code: String,
67
68    /// Whether this transcript was automatically generated by YouTube
69    pub is_generated: bool,
70
71    /// List of languages this transcript can be translated to
72    pub translation_languages: Vec<TranslationLanguage>,
73
74    /// Mapping of language codes to language names for available translations
75    pub translation_languages_map: HashMap<String, String>,
76}
77
78impl Transcript {
79    /// Creates a new transcript instance.
80    ///
81    /// This constructor creates a transcript object that can be used to fetch
82    /// the actual transcript content or to generate translations.
83    ///
84    /// # Parameters
85    ///
86    /// * `client` - HTTP client for making requests to YouTube
87    /// * `video_id` - YouTube video ID
88    /// * `url` - URL to fetch the transcript content
89    /// * `language` - Human-readable language name (e.g., "English")
90    /// * `language_code` - Language code (e.g., "en", "en-US")
91    /// * `is_generated` - Whether this transcript was automatically generated
92    /// * `translation_languages` - List of languages this transcript can be translated to
93    ///
94    /// # Returns
95    ///
96    /// A new `Transcript` instance
97    ///
98    /// # Example (internal usage)
99    ///
100    /// ```rust,no_run
101    /// # use reqwest::Client;
102    /// # use yt_transcript_rs::transcript::Transcript;
103    /// # use yt_transcript_rs::models::TranslationLanguage;
104    /// # fn example() {
105    /// let client = Client::new();
106    ///
107    /// // Create a transcript for English
108    /// let transcript = Transcript::new(
109    ///     client,
110    ///     "dQw4w9WgXcQ".to_string(),
111    ///     "https://www.youtube.com/api/timedtext?...".to_string(),
112    ///     "English".to_string(),
113    ///     "en".to_string(),
114    ///     false, // Not automatically generated
115    ///     vec![
116    ///         TranslationLanguage {
117    ///             language: "Spanish".to_string(),
118    ///             language_code: "es".to_string()
119    ///         }
120    ///     ]
121    /// );
122    /// # }
123    /// ```
124    pub fn new(
125        client: Client,
126        video_id: String,
127        url: String,
128        language: String,
129        language_code: String,
130        is_generated: bool,
131        translation_languages: Vec<TranslationLanguage>,
132    ) -> Self {
133        let translation_languages_map = translation_languages
134            .iter()
135            .map(|lang| (lang.language_code.clone(), lang.language.clone()))
136            .collect();
137
138        Self {
139            client,
140            video_id,
141            url,
142            language,
143            language_code,
144            is_generated,
145            translation_languages,
146            translation_languages_map,
147        }
148    }
149
150    /// Fetches the actual transcript content from YouTube.
151    ///
152    /// This method retrieves the transcript text and timing information from YouTube
153    /// and returns it as a structured `FetchedTranscript` object.
154    ///
155    /// # Parameters
156    ///
157    /// * `preserve_formatting` - Whether to preserve HTML formatting in the transcript
158    ///   (e.g., bold, italic, etc.)
159    ///
160    /// # Returns
161    ///
162    /// * `Result<FetchedTranscript, CouldNotRetrieveTranscript>` - The fetched transcript or an error
163    ///
164    /// # Errors
165    ///
166    /// This method will return an error if:
167    /// - The network request to YouTube fails
168    /// - YouTube returns a non-OK status code
169    /// - The transcript data cannot be parsed
170    ///
171    /// # Example
172    ///
173    /// ```rust,no_run
174    /// # use yt_transcript_rs::YouTubeTranscriptApi;
175    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
176    /// let api = YouTubeTranscriptApi::new(None, None, None)?;
177    /// let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
178    /// let transcript = transcript_list.find_transcript(&["en"])?;
179    ///
180    /// // Fetch without preserving formatting
181    /// let plain_transcript = transcript.fetch(false).await?;
182    ///
183    /// // Fetch and preserve HTML formatting like <b>bold</b> text
184    /// let formatted_transcript = transcript.fetch(true).await?;
185    ///
186    /// // Access the full text
187    /// println!("Transcript: {}", plain_transcript.text());
188    ///
189    /// // Or iterate through individual segments
190    /// for segment in plain_transcript.parts() {
191    ///     println!("[{:.1}s]: {}", segment.start, segment.text);
192    /// }
193    /// # Ok(())
194    /// # }
195    /// ```
196    pub async fn fetch(
197        &self,
198        preserve_formatting: bool,
199    ) -> Result<FetchedTranscript, CouldNotRetrieveTranscript> {
200        let response =
201            self.client
202                .get(&self.url)
203                .send()
204                .await
205                .map_err(|e| CouldNotRetrieveTranscript {
206                    video_id: self.video_id.clone(),
207                    reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
208                        e.to_string(),
209                    )),
210                })?;
211
212        if response.status() != reqwest::StatusCode::OK {
213            return Err(CouldNotRetrieveTranscript {
214                video_id: self.video_id.clone(),
215                reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
216                    format!("YouTube returned status code: {}", response.status()),
217                )),
218            });
219        }
220
221        let text = response
222            .text()
223            .await
224            .map_err(|e| CouldNotRetrieveTranscript {
225                video_id: self.video_id.clone(),
226                reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
227                    e.to_string(),
228                )),
229            })?;
230
231        let snippets = TranscriptParser::new(preserve_formatting)
232            .parse(&text.clone())
233            .map_err(|_| CouldNotRetrieveTranscript {
234                video_id: self.video_id.clone(),
235                reason: Some(CouldNotRetrieveTranscriptReason::YouTubeDataUnparsable),
236            })?;
237
238        Ok(FetchedTranscript {
239            snippets,
240            video_id: self.video_id.clone(),
241            language: self.language.clone(),
242            language_code: self.language_code.clone(),
243            is_generated: self.is_generated,
244        })
245    }
246
247    /// Checks if this transcript can be translated to other languages.
248    ///
249    /// This method determines whether YouTube offers translation capabilities
250    /// for this transcript. Not all transcripts are translatable.
251    ///
252    /// # Returns
253    ///
254    /// * `bool` - `true` if this transcript can be translated, `false` otherwise
255    ///
256    /// # Example
257    ///
258    /// ```rust,no_run
259    /// # use yt_transcript_rs::YouTubeTranscriptApi;
260    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
261    /// let api = YouTubeTranscriptApi::new(None, None, None)?;
262    /// let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
263    /// let transcript = transcript_list.find_transcript(&["en"])?;
264    ///
265    /// if transcript.is_translatable() {
266    ///     println!("This transcript can be translated to other languages");
267    ///     
268    ///     // Available translation languages
269    ///     for lang in &transcript.translation_languages {
270    ///         println!("- {} ({})", lang.language, lang.language_code);
271    ///     }
272    /// } else {
273    ///     println!("This transcript cannot be translated");
274    /// }
275    /// # Ok(())
276    /// # }
277    /// ```
278    pub fn is_translatable(&self) -> bool {
279        !self.translation_languages.is_empty()
280    }
281
282    /// Creates a translated version of this transcript in the specified language.
283    ///
284    /// This method creates a new `Transcript` instance representing the same content
285    /// but translated to the requested language. Note that this doesn't actually perform
286    /// the translation yet - the translation happens when you call `fetch()` on the
287    /// returned transcript.
288    ///
289    /// # Parameters
290    ///
291    /// * `language_code` - The language code to translate to (e.g., "es" for Spanish)
292    ///
293    /// # Returns
294    ///
295    /// * `Result<Self, CouldNotRetrieveTranscript>` - A new transcript object representing
296    ///   the translation, or an error
297    ///
298    /// # Errors
299    ///
300    /// This method will return an error if:
301    /// - The transcript is not translatable
302    /// - The requested language is not available for translation
303    ///
304    /// # Example
305    ///
306    /// ```rust,no_run
307    /// # use yt_transcript_rs::YouTubeTranscriptApi;
308    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
309    /// let api = YouTubeTranscriptApi::new(None, None, None)?;
310    /// let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
311    /// let transcript = transcript_list.find_transcript(&["en"])?;
312    ///
313    /// // Create Spanish translation
314    /// if transcript.is_translatable() {
315    ///     let spanish = transcript.translate("es")?;
316    ///     
317    ///     // Now fetch the Spanish translation
318    ///     let spanish_content = spanish.fetch(false).await?;
319    ///     println!("Spanish: {}", spanish_content.text());
320    ///     
321    ///     // Create Japanese translation
322    ///     let japanese = transcript.translate("ja")?;
323    ///     let japanese_content = japanese.fetch(false).await?;
324    ///     println!("Japanese: {}", japanese_content.text());
325    /// }
326    /// # Ok(())
327    /// # }
328    /// ```
329    pub fn translate(&self, language_code: &str) -> Result<Self, CouldNotRetrieveTranscript> {
330        if !self.is_translatable() {
331            return Err(CouldNotRetrieveTranscript {
332                video_id: self.video_id.clone(),
333                reason: Some(CouldNotRetrieveTranscriptReason::NotTranslatable),
334            });
335        }
336
337        if !self.translation_languages_map.contains_key(language_code) {
338            return Err(CouldNotRetrieveTranscript {
339                video_id: self.video_id.clone(),
340                reason: Some(CouldNotRetrieveTranscriptReason::TranslationLanguageNotAvailable),
341            });
342        }
343
344        let language = self
345            .translation_languages_map
346            .get(language_code)
347            .unwrap()
348            .clone();
349        let url = format!("{}&tlang={}", self.url, language_code);
350
351        Ok(Transcript::new(
352            self.client.clone(),
353            self.video_id.clone(),
354            url,
355            language,
356            language_code.to_string(),
357            true,
358            vec![],
359        ))
360    }
361
362    /// Returns the full human-readable language name of this transcript.
363    ///
364    /// # Returns
365    ///
366    /// * `&str` - The language name (e.g., "English", "EspaƱol")
367    pub fn language(&self) -> &str {
368        &self.language
369    }
370
371    /// Returns the language code of this transcript.
372    ///
373    /// # Returns
374    ///
375    /// * `&str` - The language code (e.g., "en", "es", "fr-CA")
376    pub fn language_code(&self) -> &str {
377        &self.language_code
378    }
379
380    /// Checks if this transcript was automatically generated by YouTube.
381    ///
382    /// # Returns
383    ///
384    /// * `bool` - `true` if automatically generated, `false` if manually created
385    pub fn is_generated(&self) -> bool {
386        self.is_generated
387    }
388}
389
390impl fmt::Display for Transcript {
391    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
392        let translation_desc = if self.is_translatable() {
393            "[TRANSLATABLE]"
394        } else {
395            ""
396        };
397        write!(
398            f,
399            "{} ({}){}",
400            self.language_code, self.language, translation_desc
401        )
402    }
403}