use reqwest::Client;
use std::collections::HashMap;
use std::fmt;
use crate::errors::{CouldNotRetrieveTranscript, CouldNotRetrieveTranscriptReason};
use crate::fetched_transcript::FetchedTranscript;
use crate::innertube_client::InnerTubeClient;
use crate::models::TranslationLanguage;
use crate::transcript_parser::TranscriptParser;
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct Transcript {
pub video_id: String,
pub url: String,
pub language: String,
pub language_code: String,
pub is_generated: bool,
pub translation_languages: Vec<TranslationLanguage>,
pub translation_languages_map: HashMap<String, String>,
}
impl Transcript {
pub fn new(
video_id: String,
url: String,
language: String,
language_code: String,
is_generated: bool,
translation_languages: Vec<TranslationLanguage>,
) -> Self {
let translation_languages_map = translation_languages
.iter()
.map(|lang| (lang.language_code.clone(), lang.language.clone()))
.collect();
Self {
video_id,
url,
language,
language_code,
is_generated,
translation_languages,
translation_languages_map,
}
}
pub async fn fetch(
&self,
client: &Client,
preserve_formatting: bool,
) -> Result<FetchedTranscript, CouldNotRetrieveTranscript> {
let innertube_client = InnerTubeClient::new(client.clone());
let data = innertube_client
.get_transcript_list(&self.video_id)
.await
.map_err(|e| CouldNotRetrieveTranscript {
video_id: self.video_id.clone(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
format!("InnerTube API failed: {}", e),
)),
})?;
let captions = data
.get("captions")
.ok_or_else(|| CouldNotRetrieveTranscript {
video_id: self.video_id.clone(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeDataUnparsable(
"No captions found in InnerTube response".to_string(),
)),
})?;
let player_captions_renderer =
captions
.get("playerCaptionsTracklistRenderer")
.ok_or_else(|| CouldNotRetrieveTranscript {
video_id: self.video_id.clone(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeDataUnparsable(
"No playerCaptionsTracklistRenderer found".to_string(),
)),
})?;
let caption_tracks = player_captions_renderer
.get("captionTracks")
.and_then(|ct| ct.as_array())
.ok_or_else(|| CouldNotRetrieveTranscript {
video_id: self.video_id.clone(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeDataUnparsable(
"No caption tracks found in InnerTube response".to_string(),
)),
})?;
let mut matching_url = None;
for track in caption_tracks {
if let Some(language_code) = track.get("languageCode").and_then(|lc| lc.as_str()) {
if language_code == self.language_code {
if let Some(base_url) = track.get("baseUrl").and_then(|url| url.as_str()) {
matching_url = Some(base_url.to_string());
break;
}
}
}
}
let transcript_url = matching_url.ok_or_else(|| CouldNotRetrieveTranscript {
video_id: self.video_id.clone(),
reason: Some(CouldNotRetrieveTranscriptReason::NoTranscriptFound {
requested_language_codes: vec![self.language_code.clone()],
transcript_data: crate::transcript_list::TranscriptList::new(
self.video_id.clone(),
HashMap::new(),
HashMap::new(),
vec![],
),
}),
})?;
let response =
client
.get(&transcript_url)
.send()
.await
.map_err(|e| CouldNotRetrieveTranscript {
video_id: self.video_id.clone(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
format!("Failed to fetch transcript: {}", e),
)),
})?;
if response.status() != reqwest::StatusCode::OK {
return Err(CouldNotRetrieveTranscript {
video_id: self.video_id.clone(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
format!("YouTube returned status code {}", response.status()),
)),
});
}
let text = response
.text()
.await
.map_err(|e| CouldNotRetrieveTranscript {
video_id: self.video_id.clone(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
format!("Failed to read transcript response: {}", e),
)),
})?;
if text.is_empty() {
return Err(CouldNotRetrieveTranscript {
video_id: self.video_id.clone(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeRequestFailed(
"YouTube returned empty transcript content. This may indicate additional restrictions or API changes.".to_string()
)),
});
}
let snippets = TranscriptParser::new(preserve_formatting)
.parse(&text)
.map_err(|e| CouldNotRetrieveTranscript {
video_id: self.video_id.clone(),
reason: Some(CouldNotRetrieveTranscriptReason::YouTubeDataUnparsable(
format!("Failed to parse transcript XML: {}", e),
)),
})?;
Ok(FetchedTranscript {
snippets,
video_id: self.video_id.clone(),
language: self.language.clone(),
language_code: self.language_code.clone(),
is_generated: self.is_generated,
})
}
pub fn is_translatable(&self) -> bool {
!self.translation_languages.is_empty()
}
pub fn translate(&self, language_code: &str) -> Result<Self, CouldNotRetrieveTranscript> {
if !self.is_translatable() {
return Err(CouldNotRetrieveTranscript {
video_id: self.video_id.clone(),
reason: Some(CouldNotRetrieveTranscriptReason::TranslationUnavailable(
"This transcript cannot be translated".to_string(),
)),
});
}
if !self.translation_languages_map.contains_key(language_code) {
let available_langs = self
.translation_languages
.iter()
.map(|l| format!("{} ({})", l.language, l.language_code))
.collect::<Vec<_>>()
.join(", ");
return Err(CouldNotRetrieveTranscript {
video_id: self.video_id.clone(),
reason: Some(
CouldNotRetrieveTranscriptReason::TranslationLanguageUnavailable(format!(
"Translation to '{}' is not available. Available languages: {}",
language_code, available_langs
)),
),
});
}
let language = self
.translation_languages_map
.get(language_code)
.cloned()
.unwrap();
let translated_url = format!("{}&tlang={}", self.url, language_code);
Ok(Self {
video_id: self.video_id.clone(),
url: translated_url,
language,
language_code: language_code.to_string(),
is_generated: self.is_generated,
translation_languages: self.translation_languages.clone(),
translation_languages_map: self.translation_languages_map.clone(),
})
}
pub async fn translate_and_fetch(
&self,
client: &Client,
language_code: &str,
preserve_formatting: bool,
) -> Result<FetchedTranscript, CouldNotRetrieveTranscript> {
let translated = self.translate(language_code)?;
translated.fetch(client, preserve_formatting).await
}
pub fn language(&self) -> &str {
&self.language
}
pub fn language_code(&self) -> &str {
&self.language_code
}
pub fn is_generated(&self) -> bool {
self.is_generated
}
}
impl fmt::Display for Transcript {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let translation_desc = if self.is_translatable() {
"[TRANSLATABLE]"
} else {
""
};
write!(
f,
"{} ({}){}",
self.language_code, self.language, translation_desc
)
}
}