yt_transcript_rs/transcript.rs
1use reqwest::Client;
2use std::collections::HashMap;
3use std::fmt;
4
5use crate::errors::{CouldNotRetrieveTranscript, CouldNotRetrieveTranscriptReason};
6use crate::fetched_transcript::FetchedTranscript;
7use crate::models::TranslationLanguage;
8use crate::transcript_parser::TranscriptParser;
9
10/// # Transcript
11///
12/// Represents a YouTube transcript that can be fetched or translated.
13///
14/// This struct contains the metadata and access URLs for a transcript but not
15/// the actual transcript text content. It serves as a handle to retrieve the
16/// full transcript text when needed.
17///
18/// A `Transcript` object can represent:
19/// - A native transcript in its original language
20/// - A translatable transcript that can be converted to other languages
21/// - A manually created transcript (more accurate, created by humans)
22/// - An automatically generated transcript (created by YouTube's speech recognition)
23///
24/// ## Usage Example
25///
26/// ```rust,no_run
27/// # use yt_transcript_rs::YouTubeTranscriptApi;
28/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
29/// let api = YouTubeTranscriptApi::new(None, None, None)?;
30/// let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
31///
32/// // Find an English transcript
33/// let transcript = transcript_list.find_transcript(&["en"])?;
34///
35/// // Check if it can be translated
36/// if transcript.is_translatable() {
37/// // Translate to Spanish
38/// let spanish = transcript.translate("es")?;
39///
40/// // Fetch the translated content
41/// let fetched = spanish.fetch(false).await?;
42/// println!("Spanish transcript: {}", fetched.text());
43/// }
44///
45/// // Or fetch the original transcript
46/// let fetched = transcript.fetch(false).await?;
47/// println!("Original transcript: {}", fetched.text());
48/// # Ok(())
49/// # }
50/// ```
51#[derive(Debug, Clone)]
52pub struct Transcript {
53 /// HTTP client for making requests to YouTube
54 pub client: Client,
55
56 /// The YouTube video ID this transcript belongs to
57 pub video_id: String,
58
59 /// URL to fetch the transcript content from YouTube
60 pub url: String,
61
62 /// Full human-readable language name (e.g., "English")
63 pub language: String,
64
65 /// Language code (e.g., "en", "en-US", "es")
66 pub language_code: String,
67
68 /// Whether this transcript was automatically generated by YouTube
69 pub is_generated: bool,
70
71 /// List of languages this transcript can be translated to
72 pub translation_languages: Vec<TranslationLanguage>,
73
74 /// Mapping of language codes to language names for available translations
75 pub translation_languages_map: HashMap<String, String>,
76}
77
78impl Transcript {
79 /// Creates a new transcript instance.
80 ///
81 /// This constructor creates a transcript object that can be used to fetch
82 /// the actual transcript content or to generate translations.
83 ///
84 /// # Parameters
85 ///
86 /// * `client` - HTTP client for making requests to YouTube
87 /// * `video_id` - YouTube video ID
88 /// * `url` - URL to fetch the transcript content
89 /// * `language` - Human-readable language name (e.g., "English")
90 /// * `language_code` - Language code (e.g., "en", "en-US")
91 /// * `is_generated` - Whether this transcript was automatically generated
92 /// * `translation_languages` - List of languages this transcript can be translated to
93 ///
94 /// # Returns
95 ///
96 /// A new `Transcript` instance
97 ///
98 /// # Example (internal usage)
99 ///
100 /// ```rust,no_run
101 /// # use reqwest::Client;
102 /// # use yt_transcript_rs::transcript::Transcript;
103 /// # use yt_transcript_rs::models::TranslationLanguage;
104 /// # fn example() {
105 /// let client = Client::new();
106 ///
107 /// // Create a transcript for English
108 /// let transcript = Transcript::new(
109 /// client,
110 /// "dQw4w9WgXcQ".to_string(),
111 /// "https://www.youtube.com/api/timedtext?...".to_string(),
112 /// "English".to_string(),
113 /// "en".to_string(),
114 /// false, // Not automatically generated
115 /// vec![
116 /// TranslationLanguage {
117 /// language: "Spanish".to_string(),
118 /// language_code: "es".to_string()
119 /// }
120 /// ]
121 /// );
122 /// # }
123 /// ```
124 pub fn new(
125 client: Client,
126 video_id: String,
127 url: String,
128 language: String,
129 language_code: String,
130 is_generated: bool,
131 translation_languages: Vec<TranslationLanguage>,
132 ) -> Self {
133 let translation_languages_map = translation_languages
134 .iter()
135 .map(|lang| (lang.language_code.clone(), lang.language.clone()))
136 .collect();
137
138 Self {
139 client,
140 video_id,
141 url,
142 language,
143 language_code,
144 is_generated,
145 translation_languages,
146 translation_languages_map,
147 }
148 }
149
150 /// Fetches the actual transcript content from YouTube.
151 ///
152 /// This method retrieves the transcript text and timing information from YouTube
153 /// and returns it as a structured `FetchedTranscript` object.
154 ///
155 /// # Parameters
156 ///
157 /// * `preserve_formatting` - Whether to preserve HTML formatting in the transcript
158 /// (e.g., bold, italic, etc.)
159 ///
160 /// # Returns
161 ///
162 /// * `Result<FetchedTranscript, CouldNotRetrieveTranscript>` - The fetched transcript or an error
163 ///
164 /// # Errors
165 ///
166 /// This method will return an error if:
167 /// - The network request to YouTube fails
168 /// - YouTube returns a non-OK status code
169 /// - The transcript data cannot be parsed
170 ///
171 /// # Example
172 ///
173 /// ```rust,no_run
174 /// # use yt_transcript_rs::YouTubeTranscriptApi;
175 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
176 /// let api = YouTubeTranscriptApi::new(None, None, None)?;
177 /// let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
178 /// let transcript = transcript_list.find_transcript(&["en"])?;
179 ///
180 /// // Fetch without preserving formatting
181 /// let plain_transcript = transcript.fetch(false).await?;
182 ///
183 /// // Fetch and preserve HTML formatting like <b>bold</b> text
184 /// let formatted_transcript = transcript.fetch(true).await?;
185 ///
186 /// // Access the full text
187 /// println!("Transcript: {}", plain_transcript.text());
188 ///
189 /// // Or iterate through individual segments
190 /// for segment in plain_transcript.parts() {
191 /// println!("[{:.1}s]: {}", segment.start, segment.text);
192 /// }
193 /// # Ok(())
194 /// # }
195 /// ```
196 pub async fn fetch(
197 &self,
198 preserve_formatting: bool,
199 ) -> Result<FetchedTranscript, CouldNotRetrieveTranscript> {
200 let response =
201 self.client
202 .get(&self.url)
203 .send()
204 .await
205 .map_err(|e| CouldNotRetrieveTranscript {
206 video_id: self.video_id.clone(),
207 reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
208 e.to_string(),
209 )),
210 })?;
211
212 if response.status() != reqwest::StatusCode::OK {
213 return Err(CouldNotRetrieveTranscript {
214 video_id: self.video_id.clone(),
215 reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
216 format!("YouTube returned status code: {}", response.status()),
217 )),
218 });
219 }
220
221 let text = response
222 .text()
223 .await
224 .map_err(|e| CouldNotRetrieveTranscript {
225 video_id: self.video_id.clone(),
226 reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
227 e.to_string(),
228 )),
229 })?;
230
231 let snippets = TranscriptParser::new(preserve_formatting)
232 .parse(&text.clone())
233 .map_err(|_| CouldNotRetrieveTranscript {
234 video_id: self.video_id.clone(),
235 reason: Some(CouldNotRetrieveTranscriptReason::YouTubeDataUnparsable),
236 })?;
237
238 Ok(FetchedTranscript {
239 snippets,
240 video_id: self.video_id.clone(),
241 language: self.language.clone(),
242 language_code: self.language_code.clone(),
243 is_generated: self.is_generated,
244 })
245 }
246
247 /// Checks if this transcript can be translated to other languages.
248 ///
249 /// This method determines whether YouTube offers translation capabilities
250 /// for this transcript. Not all transcripts are translatable.
251 ///
252 /// # Returns
253 ///
254 /// * `bool` - `true` if this transcript can be translated, `false` otherwise
255 ///
256 /// # Example
257 ///
258 /// ```rust,no_run
259 /// # use yt_transcript_rs::YouTubeTranscriptApi;
260 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
261 /// let api = YouTubeTranscriptApi::new(None, None, None)?;
262 /// let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
263 /// let transcript = transcript_list.find_transcript(&["en"])?;
264 ///
265 /// if transcript.is_translatable() {
266 /// println!("This transcript can be translated to other languages");
267 ///
268 /// // Available translation languages
269 /// for lang in &transcript.translation_languages {
270 /// println!("- {} ({})", lang.language, lang.language_code);
271 /// }
272 /// } else {
273 /// println!("This transcript cannot be translated");
274 /// }
275 /// # Ok(())
276 /// # }
277 /// ```
278 pub fn is_translatable(&self) -> bool {
279 !self.translation_languages.is_empty()
280 }
281
282 /// Creates a translated version of this transcript in the specified language.
283 ///
284 /// This method creates a new `Transcript` instance representing the same content
285 /// but translated to the requested language. Note that this doesn't actually perform
286 /// the translation yet - the translation happens when you call `fetch()` on the
287 /// returned transcript.
288 ///
289 /// # Parameters
290 ///
291 /// * `language_code` - The language code to translate to (e.g., "es" for Spanish)
292 ///
293 /// # Returns
294 ///
295 /// * `Result<Self, CouldNotRetrieveTranscript>` - A new transcript object representing
296 /// the translation, or an error
297 ///
298 /// # Errors
299 ///
300 /// This method will return an error if:
301 /// - The transcript is not translatable
302 /// - The requested language is not available for translation
303 ///
304 /// # Example
305 ///
306 /// ```rust,no_run
307 /// # use yt_transcript_rs::YouTubeTranscriptApi;
308 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
309 /// let api = YouTubeTranscriptApi::new(None, None, None)?;
310 /// let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
311 /// let transcript = transcript_list.find_transcript(&["en"])?;
312 ///
313 /// // Create Spanish translation
314 /// if transcript.is_translatable() {
315 /// let spanish = transcript.translate("es")?;
316 ///
317 /// // Now fetch the Spanish translation
318 /// let spanish_content = spanish.fetch(false).await?;
319 /// println!("Spanish: {}", spanish_content.text());
320 ///
321 /// // Create Japanese translation
322 /// let japanese = transcript.translate("ja")?;
323 /// let japanese_content = japanese.fetch(false).await?;
324 /// println!("Japanese: {}", japanese_content.text());
325 /// }
326 /// # Ok(())
327 /// # }
328 /// ```
329 pub fn translate(&self, language_code: &str) -> Result<Self, CouldNotRetrieveTranscript> {
330 if !self.is_translatable() {
331 return Err(CouldNotRetrieveTranscript {
332 video_id: self.video_id.clone(),
333 reason: Some(CouldNotRetrieveTranscriptReason::NotTranslatable),
334 });
335 }
336
337 if !self.translation_languages_map.contains_key(language_code) {
338 return Err(CouldNotRetrieveTranscript {
339 video_id: self.video_id.clone(),
340 reason: Some(CouldNotRetrieveTranscriptReason::TranslationLanguageNotAvailable),
341 });
342 }
343
344 let language = self
345 .translation_languages_map
346 .get(language_code)
347 .unwrap()
348 .clone();
349 let url = format!("{}&tlang={}", self.url, language_code);
350
351 Ok(Transcript::new(
352 self.client.clone(),
353 self.video_id.clone(),
354 url,
355 language,
356 language_code.to_string(),
357 true,
358 vec![],
359 ))
360 }
361
362 /// Returns the full human-readable language name of this transcript.
363 ///
364 /// # Returns
365 ///
366 /// * `&str` - The language name (e.g., "English", "EspaƱol")
367 pub fn language(&self) -> &str {
368 &self.language
369 }
370
371 /// Returns the language code of this transcript.
372 ///
373 /// # Returns
374 ///
375 /// * `&str` - The language code (e.g., "en", "es", "fr-CA")
376 pub fn language_code(&self) -> &str {
377 &self.language_code
378 }
379
380 /// Checks if this transcript was automatically generated by YouTube.
381 ///
382 /// # Returns
383 ///
384 /// * `bool` - `true` if automatically generated, `false` if manually created
385 pub fn is_generated(&self) -> bool {
386 self.is_generated
387 }
388}
389
390impl fmt::Display for Transcript {
391 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
392 let translation_desc = if self.is_translatable() {
393 "[TRANSLATABLE]"
394 } else {
395 ""
396 };
397 write!(
398 f,
399 "{} ({}){}",
400 self.language_code, self.language, translation_desc
401 )
402 }
403}