yt_transcript_rs/fetched_transcript.rs
1use serde::{Deserialize, Serialize};
2/// Fetched transcript representation and processing.
3///
4/// This module contains the `FetchedTranscript` type, which represents a fully retrieved
5/// transcript from YouTube including all text segments with their timing information.
6/// Unlike the `Transcript` type which serves as a handle for fetching, this type
7/// contains the actual transcript content.
8///
9/// The module provides methods for working with complete transcripts, including
10/// accessing individual segments, formatting the full text, and serializing to
11/// various formats.
12use std::collections::HashMap;
13use std::iter::Iterator;
14use std::vec::IntoIter;
15
16use crate::models::FetchedTranscriptSnippet;
17
18/// A complete transcript with all the snippets and metadata.
19///
20/// This struct represents a successfully fetched transcript from YouTube,
21/// containing both the full text content (divided into timed segments) and
22/// metadata about the transcript.
23///
24/// A `FetchedTranscript` is typically obtained by calling `fetch()` on a `Transcript`
25/// object. It provides the actual transcript content, whereas `Transcript` is more
26/// like a handle for fetching.
27///
28/// # Features
29///
30/// * Contains all text segments with their timing information
31/// * Provides metadata about the transcript (language, source, etc.)
32/// * Can be iterated over to access individual segments
33/// * Supports conversion to various formats for storage or display
34///
35/// # Example
36///
37/// ```rust,no_run
38/// # use yt_transcript_rs::YouTubeTranscriptApi;
39/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
40/// let api = YouTubeTranscriptApi::new(None, None, None)?;
41/// let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
42/// let transcript = transcript_list.find_transcript(&["en"])?;
43///
44/// // Fetch the actual transcript content
45/// let fetched = transcript.fetch(false).await?;
46///
47/// // Access the full text
48/// println!("Full transcript: {}", fetched.text());
49///
50/// // Or work with individual segments
51/// for segment in &fetched {
52/// println!("[{:.1}s - {:.1}s]: {}",
53/// segment.start,
54/// segment.start + segment.duration,
55/// segment.text);
56/// }
57/// # Ok(())
58/// # }
59/// ```
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct FetchedTranscript {
62 /// The list of transcript snippets (text segments with timing information).
63 pub snippets: Vec<FetchedTranscriptSnippet>,
64
65 /// YouTube video ID this transcript belongs to.
66 pub video_id: String,
67
68 /// Human-readable language name (e.g., "English", "Español").
69 pub language: String,
70
71 /// Language code (e.g., "en", "fr", "es-MX").
72 pub language_code: String,
73
74 /// Whether this transcript was automatically generated by YouTube.
75 ///
76 /// `true` indicates an auto-generated transcript (using speech recognition),
77 /// while `false` indicates a manually created transcript (typically more accurate).
78 pub is_generated: bool,
79}
80
81impl FetchedTranscript {
82 /// Converts the transcript to a raw data format suitable for serialization.
83 ///
84 /// This method transforms the transcript into a vector of hashmaps containing
85 /// the text, start time, and duration for each segment. This format is useful
86 /// for JSON serialization or for integrating with other systems.
87 ///
88 /// # Returns
89 ///
90 /// A vector of hashmaps, each representing one transcript segment with keys:
91 /// - "text": The segment text
92 /// - "start": The start time in seconds
93 /// - "duration": The segment duration in seconds
94 ///
95 /// # Example
96 ///
97 /// ```rust,no_run
98 /// # use yt_transcript_rs::YouTubeTranscriptApi;
99 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
100 /// # let api = YouTubeTranscriptApi::new(None, None, None)?;
101 /// # let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
102 /// # let transcript = transcript_list.find_transcript(&["en"])?;
103 /// # let fetched = transcript.fetch(false).await?;
104 /// // Convert to raw data (array of objects)
105 /// let raw_data = fetched.to_raw_data();
106 ///
107 /// // Serialize to JSON
108 /// let json = serde_json::to_string_pretty(&raw_data)?;
109 /// println!("JSON transcript:\n{}", json);
110 /// # Ok(())
111 /// # }
112 /// ```
113 pub fn to_raw_data(&self) -> Vec<HashMap<String, serde_json::Value>> {
114 self.snippets
115 .iter()
116 .map(|snippet| {
117 let mut map = HashMap::new();
118 map.insert(
119 "text".to_string(),
120 serde_json::Value::String(snippet.text.clone()),
121 );
122 map.insert(
123 "start".to_string(),
124 serde_json::Value::Number(serde_json::Number::from_f64(snippet.start).unwrap()),
125 );
126 map.insert(
127 "duration".to_string(),
128 serde_json::Value::Number(
129 serde_json::Number::from_f64(snippet.duration).unwrap(),
130 ),
131 );
132 map
133 })
134 .collect()
135 }
136
137 /// Returns the full transcript text as a single string.
138 ///
139 /// This method combines all transcript segments into a single string,
140 /// with each segment separated by a space.
141 ///
142 /// # Returns
143 ///
144 /// A String containing the full transcript text.
145 ///
146 /// # Example
147 ///
148 /// ```rust,no_run
149 /// # use yt_transcript_rs::YouTubeTranscriptApi;
150 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
151 /// # let api = YouTubeTranscriptApi::new(None, None, None)?;
152 /// # let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
153 /// # let transcript = transcript_list.find_transcript(&["en"])?;
154 /// let fetched = transcript.fetch(false).await?;
155 ///
156 /// // Get the full text as a single string
157 /// let full_text = fetched.text();
158 /// println!("Transcript: {}", full_text);
159 /// # Ok(())
160 /// # }
161 /// ```
162 pub fn text(&self) -> String {
163 self.snippets
164 .iter()
165 .map(|snippet| snippet.text.clone())
166 .collect::<Vec<String>>()
167 .join(" ")
168 }
169
170 /// Returns a reference to the individual transcript segments.
171 ///
172 /// This method provides access to the raw transcript segments, each containing
173 /// text with its corresponding timing information.
174 ///
175 /// # Returns
176 ///
177 /// A slice of `FetchedTranscriptSnippet` objects.
178 ///
179 /// # Example
180 ///
181 /// ```rust,no_run
182 /// # use yt_transcript_rs::YouTubeTranscriptApi;
183 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
184 /// # let api = YouTubeTranscriptApi::new(None, None, None)?;
185 /// # let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
186 /// # let transcript = transcript_list.find_transcript(&["en"])?;
187 /// let fetched = transcript.fetch(false).await?;
188 ///
189 /// // Access individual segments
190 /// for segment in fetched.parts() {
191 /// // Find segments mentioning a specific word
192 /// if segment.text.to_lowercase().contains("never") {
193 /// println!("Found at {}s: {}", segment.start, segment.text);
194 /// }
195 /// }
196 /// # Ok(())
197 /// # }
198 /// ```
199 pub fn parts(&self) -> &[FetchedTranscriptSnippet] {
200 &self.snippets
201 }
202
203 /// Returns the language of this transcript.
204 ///
205 /// # Returns
206 ///
207 /// The human-readable language name (e.g., "English", "Español")
208 pub fn language(&self) -> &str {
209 &self.language
210 }
211
212 /// Returns the language code of this transcript.
213 ///
214 /// # Returns
215 ///
216 /// The language code (e.g., "en", "es", "fr-CA")
217 pub fn language_code(&self) -> &str {
218 &self.language_code
219 }
220
221 /// Returns whether this transcript was automatically generated.
222 ///
223 /// # Returns
224 ///
225 /// `true` if automatically generated by YouTube, `false` if manually created
226 pub fn is_generated(&self) -> bool {
227 self.is_generated
228 }
229
230 /// Returns the total duration of the transcript in seconds.
231 ///
232 /// This calculates the end time of the last segment in the transcript.
233 ///
234 /// # Returns
235 ///
236 /// The total duration in seconds as a f64, or 0.0 if the transcript is empty.
237 ///
238 /// # Example
239 ///
240 /// ```rust,no_run
241 /// # use yt_transcript_rs::YouTubeTranscriptApi;
242 /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
243 /// # let api = YouTubeTranscriptApi::new(None, None, None)?;
244 /// # let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
245 /// # let transcript = transcript_list.find_transcript(&["en"])?;
246 /// let fetched = transcript.fetch(false).await?;
247 ///
248 /// println!("Video duration: {:.2} seconds", fetched.duration());
249 /// # Ok(())
250 /// # }
251 /// ```
252 pub fn duration(&self) -> f64 {
253 if self.snippets.is_empty() {
254 return 0.0;
255 }
256
257 let last = &self.snippets[self.snippets.len() - 1];
258 last.start + last.duration
259 }
260}
261
262impl IntoIterator for FetchedTranscript {
263 type Item = FetchedTranscriptSnippet;
264 type IntoIter = IntoIter<Self::Item>;
265
266 /// Creates an iterator that takes ownership of the transcript.
267 ///
268 /// This allows iterating over and consuming the transcript segments.
269 fn into_iter(self) -> Self::IntoIter {
270 self.snippets.into_iter()
271 }
272}
273
274impl<'a> IntoIterator for &'a FetchedTranscript {
275 type Item = &'a FetchedTranscriptSnippet;
276 type IntoIter = std::slice::Iter<'a, FetchedTranscriptSnippet>;
277
278 /// Creates an iterator that borrows the transcript.
279 ///
280 /// This allows iterating over the transcript segments without taking ownership.
281 fn into_iter(self) -> Self::IntoIter {
282 self.snippets.iter()
283 }
284}