yt-transcript-rs 0.1.1

A Rust library for fetching and working with YouTube video transcripts
Documentation
use serde::{Deserialize, Serialize};
/// Fetched transcript representation and processing.
///
/// This module contains the `FetchedTranscript` type, which represents a fully retrieved
/// transcript from YouTube including all text segments with their timing information.
/// Unlike the `Transcript` type which serves as a handle for fetching, this type
/// contains the actual transcript content.
///
/// The module provides methods for working with complete transcripts, including
/// accessing individual segments, formatting the full text, and serializing to
/// various formats.
use std::collections::HashMap;
use std::iter::Iterator;
use std::vec::IntoIter;

use crate::models::FetchedTranscriptSnippet;

/// A complete transcript with all the snippets and metadata.
///
/// This struct represents a successfully fetched transcript from YouTube,
/// containing both the full text content (divided into timed segments) and
/// metadata about the transcript.
///
/// A `FetchedTranscript` is typically obtained by calling `fetch()` on a `Transcript`
/// object. It provides the actual transcript content, whereas `Transcript` is more
/// like a handle for fetching.
///
/// # Features
///
/// * Contains all text segments with their timing information
/// * Provides metadata about the transcript (language, source, etc.)
/// * Can be iterated over to access individual segments
/// * Supports conversion to various formats for storage or display
///
/// # Example
///
/// ```rust,no_run
/// # use yt_transcript_rs::YouTubeTranscriptApi;
/// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
/// let api = YouTubeTranscriptApi::new(None, None, None)?;
/// let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
/// let transcript = transcript_list.find_transcript(&["en"])?;
///
/// // Fetch the actual transcript content
/// let fetched = transcript.fetch(false).await?;
///
/// // Access the full text
/// println!("Full transcript: {}", fetched.text());
///
/// // Or work with individual segments
/// for segment in &fetched {
///     println!("[{:.1}s - {:.1}s]: {}",
///         segment.start,
///         segment.start + segment.duration,
///         segment.text);
/// }
/// # Ok(())
/// # }
/// ```
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FetchedTranscript {
    /// The list of transcript snippets (text segments with timing information).
    pub snippets: Vec<FetchedTranscriptSnippet>,

    /// YouTube video ID this transcript belongs to.
    pub video_id: String,

    /// Human-readable language name (e.g., "English", "Español").
    pub language: String,

    /// Language code (e.g., "en", "fr", "es-MX").
    pub language_code: String,

    /// Whether this transcript was automatically generated by YouTube.
    ///
    /// `true` indicates an auto-generated transcript (using speech recognition),
    /// while `false` indicates a manually created transcript (typically more accurate).
    pub is_generated: bool,
}

impl FetchedTranscript {
    /// Converts the transcript to a raw data format suitable for serialization.
    ///
    /// This method transforms the transcript into a vector of hashmaps containing
    /// the text, start time, and duration for each segment. This format is useful
    /// for JSON serialization or for integrating with other systems.
    ///
    /// # Returns
    ///
    /// A vector of hashmaps, each representing one transcript segment with keys:
    /// - "text": The segment text
    /// - "start": The start time in seconds
    /// - "duration": The segment duration in seconds
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use yt_transcript_rs::YouTubeTranscriptApi;
    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
    /// # let api = YouTubeTranscriptApi::new(None, None, None)?;
    /// # let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
    /// # let transcript = transcript_list.find_transcript(&["en"])?;
    /// # let fetched = transcript.fetch(false).await?;
    /// // Convert to raw data (array of objects)
    /// let raw_data = fetched.to_raw_data();
    ///
    /// // Serialize to JSON
    /// let json = serde_json::to_string_pretty(&raw_data)?;
    /// println!("JSON transcript:\n{}", json);
    /// # Ok(())
    /// # }
    /// ```
    pub fn to_raw_data(&self) -> Vec<HashMap<String, serde_json::Value>> {
        self.snippets
            .iter()
            .map(|snippet| {
                let mut map = HashMap::new();
                map.insert(
                    "text".to_string(),
                    serde_json::Value::String(snippet.text.clone()),
                );
                map.insert(
                    "start".to_string(),
                    serde_json::Value::Number(serde_json::Number::from_f64(snippet.start).unwrap()),
                );
                map.insert(
                    "duration".to_string(),
                    serde_json::Value::Number(
                        serde_json::Number::from_f64(snippet.duration).unwrap(),
                    ),
                );
                map
            })
            .collect()
    }

    /// Returns the full transcript text as a single string.
    ///
    /// This method combines all transcript segments into a single string,
    /// with each segment separated by a space.
    ///
    /// # Returns
    ///
    /// A String containing the full transcript text.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use yt_transcript_rs::YouTubeTranscriptApi;
    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
    /// # let api = YouTubeTranscriptApi::new(None, None, None)?;
    /// # let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
    /// # let transcript = transcript_list.find_transcript(&["en"])?;
    /// let fetched = transcript.fetch(false).await?;
    ///
    /// // Get the full text as a single string
    /// let full_text = fetched.text();
    /// println!("Transcript: {}", full_text);
    /// # Ok(())
    /// # }
    /// ```
    pub fn text(&self) -> String {
        self.snippets
            .iter()
            .map(|snippet| snippet.text.clone())
            .collect::<Vec<String>>()
            .join(" ")
    }

    /// Returns a reference to the individual transcript segments.
    ///
    /// This method provides access to the raw transcript segments, each containing
    /// text with its corresponding timing information.
    ///
    /// # Returns
    ///
    /// A slice of `FetchedTranscriptSnippet` objects.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use yt_transcript_rs::YouTubeTranscriptApi;
    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
    /// # let api = YouTubeTranscriptApi::new(None, None, None)?;
    /// # let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
    /// # let transcript = transcript_list.find_transcript(&["en"])?;
    /// let fetched = transcript.fetch(false).await?;
    ///
    /// // Access individual segments
    /// for segment in fetched.parts() {
    ///     // Find segments mentioning a specific word
    ///     if segment.text.to_lowercase().contains("never") {
    ///         println!("Found at {}s: {}", segment.start, segment.text);
    ///     }
    /// }
    /// # Ok(())
    /// # }
    /// ```
    pub fn parts(&self) -> &[FetchedTranscriptSnippet] {
        &self.snippets
    }

    /// Returns the language of this transcript.
    ///
    /// # Returns
    ///
    /// The human-readable language name (e.g., "English", "Español")
    pub fn language(&self) -> &str {
        &self.language
    }

    /// Returns the language code of this transcript.
    ///
    /// # Returns
    ///
    /// The language code (e.g., "en", "es", "fr-CA")
    pub fn language_code(&self) -> &str {
        &self.language_code
    }

    /// Returns whether this transcript was automatically generated.
    ///
    /// # Returns
    ///
    /// `true` if automatically generated by YouTube, `false` if manually created
    pub fn is_generated(&self) -> bool {
        self.is_generated
    }

    /// Returns the total duration of the transcript in seconds.
    ///
    /// This calculates the end time of the last segment in the transcript.
    ///
    /// # Returns
    ///
    /// The total duration in seconds as a f64, or 0.0 if the transcript is empty.
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use yt_transcript_rs::YouTubeTranscriptApi;
    /// # async fn example() -> Result<(), Box<dyn std::error::Error>> {
    /// # let api = YouTubeTranscriptApi::new(None, None, None)?;
    /// # let transcript_list = api.list_transcripts("dQw4w9WgXcQ").await?;
    /// # let transcript = transcript_list.find_transcript(&["en"])?;
    /// let fetched = transcript.fetch(false).await?;
    ///
    /// println!("Video duration: {:.2} seconds", fetched.duration());
    /// # Ok(())
    /// # }
    /// ```
    pub fn duration(&self) -> f64 {
        if self.snippets.is_empty() {
            return 0.0;
        }

        let last = &self.snippets[self.snippets.len() - 1];
        last.start + last.duration
    }
}

impl IntoIterator for FetchedTranscript {
    type Item = FetchedTranscriptSnippet;
    type IntoIter = IntoIter<Self::Item>;

    /// Creates an iterator that takes ownership of the transcript.
    ///
    /// This allows iterating over and consuming the transcript segments.
    fn into_iter(self) -> Self::IntoIter {
        self.snippets.into_iter()
    }
}

impl<'a> IntoIterator for &'a FetchedTranscript {
    type Item = &'a FetchedTranscriptSnippet;
    type IntoIter = std::slice::Iter<'a, FetchedTranscriptSnippet>;

    /// Creates an iterator that borrows the transcript.
    ///
    /// This allows iterating over the transcript segments without taking ownership.
    fn into_iter(self) -> Self::IntoIter {
        self.snippets.iter()
    }
}