yt-transcript-rs 0.1.8

A Rust library for fetching and working with YouTube video transcripts
Documentation
use crate::errors::CookieError;
use reqwest::cookie::Jar;
use std::path::Path;
use std::sync::Arc;

/// # CookieJarLoader
///
/// Utility for loading and managing cookie jars from Netscape-format cookie files.
///
/// This component provides functionality to load cookies from files, which is necessary
/// for accessing age-restricted YouTube content or when authentication is required.
/// The loader validates cookie file formats and creates properly configured cookie jars
/// that can be used with HTTP requests.
///
/// ## Cookie File Format
///
/// The loader expects cookies in the standard Netscape/Mozilla format:
/// ```text
/// # Netscape HTTP Cookie File
/// # https://curl.se/docs/http-cookies.html
/// # This file was generated by another application
/// .youtube.com   TRUE    /   TRUE    1723157402  SID     AI2OI93JsldfLLPslke93LK
/// .youtube.com   TRUE    /   TRUE    1723231432  HSID    A91JKShflp20Jp2lk34WE
/// ```
///
/// Each line contains tab-separated fields in this order:
/// 1. Domain
/// 2. Flag (whether the cookie can be sent to subdomains)
/// 3. Path
/// 4. Secure flag
/// 5. Expiration timestamp
/// 6. Name
/// 7. Value
///
/// ## Use Cases
///
/// - Accessing age-restricted YouTube videos
/// - Retrieving transcripts from videos that require authentication
/// - Using cookies from a browser session for authenticated requests
///
/// ## Usage with YouTubeTranscriptApi
///
/// ```rust,no_run
/// use std::path::Path;
/// use yt_transcript_rs::YouTubeTranscriptApi;
/// use yt_transcript_rs::CookieJarLoader;
///
/// # fn example() -> Result<(), Box<dyn std::error::Error>> {
/// // Path to your cookie file (exported from a browser)
/// let cookie_path = Path::new("path/to/cookies.txt");
///
/// // Create an API instance with the cookie file
/// let api = YouTubeTranscriptApi::new(
///     Some(&cookie_path),
///     None,  // No proxy
///     None   // Default HTTP client
/// )?;
///
/// // Now you can access age-restricted or authenticated-only content
/// # Ok(())
/// # }
/// ```
pub struct CookieJarLoader;

impl CookieJarLoader {
    /// Loads a cookie jar from a file path.
    ///
    /// This method reads a Netscape-format cookie file, validates its contents,
    /// and creates a cookie jar that can be used with HTTP requests.
    ///
    /// # Parameters
    ///
    /// * `cookie_path` - Path to the Netscape-format cookie file
    ///
    /// # Returns
    ///
    /// * `Result<Jar, CookieError>` - A configured cookie jar or an error
    ///
    /// # Errors
    ///
    /// This method will return a `CookieError` if:
    /// - The file doesn't exist or can't be read (`PathInvalid`)
    /// - The file is empty or doesn't contain valid cookies (`Invalid`)
    /// - The file format doesn't match the expected Netscape format (`Invalid`)
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use std::path::Path;
    /// # use yt_transcript_rs::CookieJarLoader;
    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
    /// let cookie_path = Path::new("cookies.txt");
    /// let jar = CookieJarLoader::load_cookie_jar(&cookie_path)?;
    ///
    /// // Now the jar can be used with a reqwest client
    /// let client = reqwest::Client::builder()
    ///     .cookie_store(true)
    ///     .cookie_provider(std::sync::Arc::new(jar))
    ///     .build()?;
    /// # Ok(())
    /// # }
    /// ```
    pub fn load_cookie_jar(cookie_path: &Path) -> Result<Jar, CookieError> {
        if !cookie_path.exists() {
            return Err(CookieError::PathInvalid(cookie_path.display().to_string()));
        }

        let content = std::fs::read_to_string(cookie_path)
            .map_err(|_| CookieError::PathInvalid(cookie_path.display().to_string()))?;

        if content.trim().is_empty() {
            return Err(CookieError::Invalid(cookie_path.display().to_string()));
        }

        // Parse the cookie file (expected to be in Netscape format)
        let jar = Jar::default();
        let cookie_lines = content
            .lines()
            .filter(|line| !line.starts_with('#') && !line.trim().is_empty());

        let mut has_cookies = false;

        for line in cookie_lines {
            let parts: Vec<&str> = line.split('\t').collect();
            if parts.len() >= 7 {
                let domain = parts[0];
                let path = parts[2];
                let secure = parts[3] == "TRUE";
                let name = parts[5];
                let value = parts[6];

                let cookie = format!("{}={}", name, value);
                let url = format!(
                    "{}://{}{}",
                    if secure { "https" } else { "http" },
                    domain,
                    path
                );

                jar.add_cookie_str(&cookie, &url.parse().unwrap());
                has_cookies = true;
            }
        }

        if !has_cookies {
            return Err(CookieError::Invalid(cookie_path.display().to_string()));
        }

        Ok(jar)
    }

    /// Creates an Arc-wrapped cookie jar from a file path.
    ///
    /// This is a convenience method that wraps the cookie jar in an `Arc`
    /// (Atomic Reference Counter), which is required by reqwest for thread-safe
    /// cookie handling.
    ///
    /// # Parameters
    ///
    /// * `cookie_path` - Path to the Netscape-format cookie file
    ///
    /// # Returns
    ///
    /// * `Result<Arc<Jar>, CookieError>` - An Arc-wrapped cookie jar or an error
    ///
    /// # Errors
    ///
    /// This method will return a `CookieError` if:
    /// - The file doesn't exist or can't be read (`PathInvalid`)
    /// - The file is empty or doesn't contain valid cookies (`Invalid`)
    /// - The file format doesn't match the expected Netscape format (`Invalid`)
    ///
    /// # Example
    ///
    /// ```rust,no_run
    /// # use std::path::Path;
    /// # use yt_transcript_rs::CookieJarLoader;
    /// # fn example() -> Result<(), Box<dyn std::error::Error>> {
    /// let cookie_path = Path::new("cookies.txt");
    ///
    /// // Create an Arc-wrapped jar directly (preferred for use with reqwest)
    /// let jar = CookieJarLoader::create_cookie_jar(&cookie_path)?;
    ///
    /// // Use with a reqwest client
    /// let client = reqwest::Client::builder()
    ///     .cookie_store(true)
    ///     .cookie_provider(jar.clone())
    ///     .build()?;
    /// # Ok(())
    /// # }
    /// ```
    pub fn create_cookie_jar(cookie_path: &Path) -> Result<Arc<Jar>, CookieError> {
        let jar = Self::load_cookie_jar(cookie_path)?;
        Ok(Arc::new(jar))
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use std::io::Write;
    use tempfile::NamedTempFile;

    // Helper function to create a temp file with given content
    fn create_temp_file(content: &str) -> NamedTempFile {
        let mut file = NamedTempFile::new().unwrap();
        file.write_all(content.as_bytes()).unwrap();
        file.flush().unwrap();
        file
    }

    #[test]
    fn test_load_valid_cookie_file() {
        // Create a temporary file with valid cookie content
        let content = r#"# Netscape HTTP Cookie File
# This is a generated cookie file
.youtube.com	TRUE	/	TRUE	1723157402	SID	TestSessionId123
.youtube.com	TRUE	/	TRUE	1723231432	HSID	TestHash456
"#;
        let file = create_temp_file(content);

        // Test loading the cookie jar
        let result = CookieJarLoader::load_cookie_jar(file.path());
        assert!(result.is_ok(), "Should successfully load valid cookie file");
    }

    #[test]
    fn test_load_nonexistent_file() {
        // Test with a path that doesn't exist
        let non_existent_path = Path::new("/this/path/does/not/exist.txt");

        let result = CookieJarLoader::load_cookie_jar(non_existent_path);

        assert!(result.is_err(), "Should fail with non-existent file");
        match result {
            Err(CookieError::PathInvalid(_)) => {
                // Expected error
            }
            _ => panic!("Expected PathInvalid error for non-existent file"),
        }
    }

    #[test]
    fn test_load_empty_file() {
        // Create an empty temporary file
        let file = create_temp_file("");

        let result = CookieJarLoader::load_cookie_jar(file.path());

        assert!(result.is_err(), "Should fail with empty file");
        match result {
            Err(CookieError::Invalid(_)) => {
                // Expected error
            }
            _ => panic!("Expected Invalid error for empty file"),
        }
    }

    #[test]
    fn test_load_file_with_only_comments() {
        // Create a temporary file with only comments and empty lines
        let content = r#"# Netscape HTTP Cookie File
# This file only contains comments
# No actual cookies here

# Another comment line
"#;
        let file = create_temp_file(content);

        let result = CookieJarLoader::load_cookie_jar(file.path());

        assert!(
            result.is_err(),
            "Should fail with file containing only comments"
        );
        match result {
            Err(CookieError::Invalid(_)) => {
                // Expected error
            }
            _ => panic!("Expected Invalid error for file with only comments"),
        }
    }

    #[test]
    fn test_load_malformed_cookie_file() {
        // Create a temporary file with malformed cookie content
        let content = r#"# Netscape HTTP Cookie File
# This is a malformed cookie file
.youtube.com	MISSING_FIELDS
invalid_format_line
.google.com	TRUE	/	TRUE	MISSING_NAME_AND_VALUE
"#;
        let file = create_temp_file(content);

        let result = CookieJarLoader::load_cookie_jar(file.path());

        assert!(result.is_err(), "Should fail with malformed cookie file");
        match result {
            Err(CookieError::Invalid(_)) => {
                // Expected error
            }
            _ => panic!("Expected Invalid error for malformed cookie file"),
        }
    }

    #[test]
    fn test_load_mixed_valid_invalid_cookies() {
        // Create a temporary file with some valid and some invalid cookies
        let content = r#"# Netscape HTTP Cookie File
# This file has mixed valid and invalid cookies
.youtube.com	TRUE	/	TRUE	1723157402	SID	ValidCookie123
invalid_line_with_no_tabs
.google.com	INVALID	FORMAT	MISSING_FIELDS
.example.com	TRUE	/	TRUE	1723157402	TEST	AnotherValidCookie
"#;
        let file = create_temp_file(content);

        let result = CookieJarLoader::load_cookie_jar(file.path());
        assert!(
            result.is_ok(),
            "Should load file with at least some valid cookies"
        );
    }

    #[test]
    fn test_create_cookie_jar() {
        // Create a temporary file with valid cookie content
        let content = r#"# Netscape HTTP Cookie File
.youtube.com	TRUE	/	TRUE	1723157402	SID	TestSessionId123
"#;
        let file = create_temp_file(content);

        // Test creating the Arc-wrapped cookie jar
        let result = CookieJarLoader::create_cookie_jar(file.path());
        assert!(
            result.is_ok(),
            "Should successfully create Arc-wrapped cookie jar"
        );

        // Verify it's an Arc
        let _jar = result.unwrap();

        // Just verify that the jar is wrapped in an Arc
        // We don't test cookie extraction which depends on reqwest internals
    }

    #[test]
    fn test_create_cookie_jar_invalid_path() {
        // Test creating an Arc-wrapped jar from a non-existent path
        let non_existent_path = Path::new("/this/path/does/not/exist.txt");

        let result = CookieJarLoader::create_cookie_jar(non_existent_path);

        assert!(result.is_err(), "Should fail with non-existent file");
        match result {
            Err(CookieError::PathInvalid(_)) => {
                // Expected error
            }
            _ => panic!("Expected PathInvalid error for non-existent file"),
        }
    }

    #[test]
    fn test_secure_vs_insecure_cookies() {
        // Create a temporary file with both secure and insecure cookies
        let content = r#"# Netscape HTTP Cookie File
.example.com	TRUE	/	TRUE	1723157402	SECURE	SecureCookieValue
.example.com	TRUE	/	FALSE	1723157402	INSECURE	InsecureCookieValue
"#;
        let file = create_temp_file(content);

        let result = CookieJarLoader::load_cookie_jar(file.path());
        assert!(result.is_ok(), "Should successfully load cookie file");

        // We don't test the cookie security aspect which depends on reqwest internals
    }
}