yt_transcript_rs/
fetched_transcript.rs

1use serde::{Deserialize, Serialize};
2/// Fetched transcript representation and processing.
3///
4/// This module contains the `FetchedTranscript` type, which represents a fully retrieved
5/// transcript from YouTube including all text segments with their timing information.
6/// Unlike the `Transcript` type which serves as a handle for fetching, this type
7/// contains the actual transcript content.
8///
9/// The module provides methods for working with complete transcripts, including
10/// accessing individual segments, formatting the full text, and serializing to
11/// various formats.
12use std::collections::HashMap;
13use std::iter::Iterator;
14use std::vec::IntoIter;
15
16use crate::models::FetchedTranscriptSnippet;
17
18/// A complete transcript with all the snippets and metadata.
19///
20/// This struct represents a successfully fetched transcript from YouTube,
21/// containing both the full text content (divided into timed segments) and
22/// metadata about the transcript.
23///
24/// A `FetchedTranscript` is typically obtained by calling `fetch()` on a `Transcript`
25/// object. It provides the actual transcript content, whereas `Transcript` is more
26/// like a handle for fetching.
27///
28/// # Features
29///
30/// * Contains all text segments with their timing information
31/// * Provides metadata about the transcript (language, source, etc.)
32/// * Can be iterated over to access individual segments
33/// * Supports conversion to various formats for storage or display
34///
35/// # Example
36///
37/// ```rust,no_run
38/// # use yt_transcript_rs::YouTubeTranscriptApi;
39/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
40/// let api = YouTubeTranscriptApi::new(None, None, None)?;
41/// let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
42/// let transcript = transcript_list.find_transcript(&["en"])?;
43///
44/// // Fetch the actual transcript content
45/// let fetched = transcript.fetch(false).await?;
46///
47/// // Access the full text
48/// println!("Full transcript: {}", fetched.text());
49///
50/// // Or work with individual segments
51/// for segment in &fetched {
52///     println!("[{:.1}s - {:.1}s]: {}",
53///         segment.start,
54///         segment.start + segment.duration,
55///         segment.text);
56/// }
57/// # Ok(())
58/// # }
59/// ```
60#[derive(Debug, Clone, Serialize, Deserialize)]
61pub struct FetchedTranscript {
62    /// The list of transcript snippets (text segments with timing information).
63    pub snippets: Vec<FetchedTranscriptSnippet>,
64
65    /// YouTube video ID this transcript belongs to.
66    pub video_id: String,
67
68    /// Human-readable language name (e.g., "English", "Español").
69    pub language: String,
70
71    /// Language code (e.g., "en", "fr", "es-MX").
72    pub language_code: String,
73
74    /// Whether this transcript was automatically generated by YouTube.
75    ///
76    /// `true` indicates an auto-generated transcript (using speech recognition),
77    /// while `false` indicates a manually created transcript (typically more accurate).
78    pub is_generated: bool,
79}
80
81impl FetchedTranscript {
82    /// Converts the transcript to a raw data format suitable for serialization.
83    ///
84    /// This method transforms the transcript into a vector of hashmaps containing
85    /// the text, start time, and duration for each segment. This format is useful
86    /// for JSON serialization or for integrating with other systems.
87    ///
88    /// # Returns
89    ///
90    /// A vector of hashmaps, each representing one transcript segment with keys:
91    /// - "text": The segment text
92    /// - "start": The start time in seconds
93    /// - "duration": The segment duration in seconds
94    ///
95    /// # Example
96    ///
97    /// ```rust,no_run
98    /// # use yt_transcript_rs::YouTubeTranscriptApi;
99    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
100    /// # let api = YouTubeTranscriptApi::new(None, None, None)?;
101    /// # let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
102    /// # let transcript = transcript_list.find_transcript(&["en"])?;
103    /// # let fetched = transcript.fetch(false).await?;
104    /// // Convert to raw data (array of objects)
105    /// let raw_data = fetched.to_raw_data();
106    ///
107    /// // Serialize to JSON
108    /// let json = serde_json::to_string_pretty(&raw_data)?;
109    /// println!("JSON transcript:\n{}", json);
110    /// # Ok(())
111    /// # }
112    /// ```
113    pub fn to_raw_data(&self) -> Vec<HashMap<String, serde_json::Value>> {
114        self.snippets
115            .iter()
116            .map(|snippet| {
117                let mut map = HashMap::new();
118                map.insert(
119                    "text".to_string(),
120                    serde_json::Value::String(snippet.text.clone()),
121                );
122                map.insert(
123                    "start".to_string(),
124                    serde_json::Value::Number(serde_json::Number::from_f64(snippet.start).unwrap()),
125                );
126                map.insert(
127                    "duration".to_string(),
128                    serde_json::Value::Number(
129                        serde_json::Number::from_f64(snippet.duration).unwrap(),
130                    ),
131                );
132                map
133            })
134            .collect()
135    }
136
137    /// Returns the full transcript text as a single string.
138    ///
139    /// This method combines all transcript segments into a single string,
140    /// with each segment separated by a space.
141    ///
142    /// # Returns
143    ///
144    /// A String containing the full transcript text.
145    ///
146    /// # Example
147    ///
148    /// ```rust,no_run
149    /// # use yt_transcript_rs::YouTubeTranscriptApi;
150    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
151    /// # let api = YouTubeTranscriptApi::new(None, None, None)?;
152    /// # let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
153    /// # let transcript = transcript_list.find_transcript(&["en"])?;
154    /// let fetched = transcript.fetch(false).await?;
155    ///
156    /// // Get the full text as a single string
157    /// let full_text = fetched.text();
158    /// println!("Transcript: {}", full_text);
159    /// # Ok(())
160    /// # }
161    /// ```
162    pub fn text(&self) -> String {
163        self.snippets
164            .iter()
165            .map(|snippet| snippet.text.clone())
166            .collect::<Vec<String>>()
167            .join(" ")
168    }
169
170    /// Returns a reference to the individual transcript segments.
171    ///
172    /// This method provides access to the raw transcript segments, each containing
173    /// text with its corresponding timing information.
174    ///
175    /// # Returns
176    ///
177    /// A slice of `FetchedTranscriptSnippet` objects.
178    ///
179    /// # Example
180    ///
181    /// ```rust,no_run
182    /// # use yt_transcript_rs::YouTubeTranscriptApi;
183    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
184    /// # let api = YouTubeTranscriptApi::new(None, None, None)?;
185    /// # let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
186    /// # let transcript = transcript_list.find_transcript(&["en"])?;
187    /// let fetched = transcript.fetch(false).await?;
188    ///
189    /// // Access individual segments
190    /// for segment in fetched.parts() {
191    ///     // Find segments mentioning a specific word
192    ///     if segment.text.to_lowercase().contains("never") {
193    ///         println!("Found at {}s: {}", segment.start, segment.text);
194    ///     }
195    /// }
196    /// # Ok(())
197    /// # }
198    /// ```
199    pub fn parts(&self) -> &[FetchedTranscriptSnippet] {
200        &self.snippets
201    }
202
203    /// Returns the language of this transcript.
204    ///
205    /// # Returns
206    ///
207    /// The human-readable language name (e.g., "English", "Español")
208    pub fn language(&self) -> &str {
209        &self.language
210    }
211
212    /// Returns the language code of this transcript.
213    ///
214    /// # Returns
215    ///
216    /// The language code (e.g., "en", "es", "fr-CA")
217    pub fn language_code(&self) -> &str {
218        &self.language_code
219    }
220
221    /// Returns whether this transcript was automatically generated.
222    ///
223    /// # Returns
224    ///
225    /// `true` if automatically generated by YouTube, `false` if manually created
226    pub fn is_generated(&self) -> bool {
227        self.is_generated
228    }
229
230    /// Returns the total duration of the transcript in seconds.
231    ///
232    /// This calculates the end time of the last segment in the transcript.
233    ///
234    /// # Returns
235    ///
236    /// The total duration in seconds as a f64, or 0.0 if the transcript is empty.
237    ///
238    /// # Example
239    ///
240    /// ```rust,no_run
241    /// # use yt_transcript_rs::YouTubeTranscriptApi;
242    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
243    /// # let api = YouTubeTranscriptApi::new(None, None, None)?;
244    /// # let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
245    /// # let transcript = transcript_list.find_transcript(&["en"])?;
246    /// let fetched = transcript.fetch(false).await?;
247    ///
248    /// println!("Video duration: {:.2} seconds", fetched.duration());
249    /// # Ok(())
250    /// # }
251    /// ```
252    pub fn duration(&self) -> f64 {
253        if self.snippets.is_empty() {
254            return 0.0;
255        }
256
257        let last = &self.snippets[self.snippets.len() - 1];
258        last.start + last.duration
259    }
260}
261
262impl IntoIterator for FetchedTranscript {
263    type Item = FetchedTranscriptSnippet;
264    type IntoIter = IntoIter<Self::Item>;
265
266    /// Creates an iterator that takes ownership of the transcript.
267    ///
268    /// This allows iterating over and consuming the transcript segments.
269    fn into_iter(self) -> Self::IntoIter {
270        self.snippets.into_iter()
271    }
272}
273
274impl<'a> IntoIterator for &'a FetchedTranscript {
275    type Item = &'a FetchedTranscriptSnippet;
276    type IntoIter = std::slice::Iter<'a, FetchedTranscriptSnippet>;
277
278    /// Creates an iterator that borrows the transcript.
279    ///
280    /// This allows iterating over the transcript segments without taking ownership.
281    fn into_iter(self) -> Self::IntoIter {
282        self.snippets.iter()
283    }
284}