use serde::{Deserialize, Serialize};
use std::collections::HashMap;
use std::iter::Iterator;
use std::vec::IntoIter;
use crate::models::FetchedTranscriptSnippet;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct FetchedTranscript {
pub snippets: Vec<FetchedTranscriptSnippet>,
pub video_id: String,
pub language: String,
pub language_code: String,
pub is_generated: bool,
}
impl FetchedTranscript {
pub fn to_raw_data(&self) -> Vec<HashMap<String, serde_json::Value>> {
self.snippets
.iter()
.map(|snippet| {
let mut map = HashMap::new();
map.insert(
"text".to_string(),
serde_json::Value::String(snippet.text.clone()),
);
map.insert(
"start".to_string(),
serde_json::Value::Number(serde_json::Number::from_f64(snippet.start).unwrap()),
);
map.insert(
"duration".to_string(),
serde_json::Value::Number(
serde_json::Number::from_f64(snippet.duration).unwrap(),
),
);
map
})
.collect()
}
pub fn text(&self) -> String {
self.snippets
.iter()
.map(|snippet| snippet.text.clone())
.collect::<Vec<String>>()
.join(" ")
}
pub fn parts(&self) -> &[FetchedTranscriptSnippet] {
&self.snippets
}
pub fn language(&self) -> &str {
&self.language
}
pub fn language_code(&self) -> &str {
&self.language_code
}
pub fn is_generated(&self) -> bool {
self.is_generated
}
pub fn duration(&self) -> f64 {
if self.snippets.is_empty() {
return 0.0;
}
let last = &self.snippets[self.snippets.len() - 1];
last.start + last.duration
}
}
impl IntoIterator for FetchedTranscript {
type Item = FetchedTranscriptSnippet;
type IntoIter = IntoIter<Self::Item>;
fn into_iter(self) -> Self::IntoIter {
self.snippets.into_iter()
}
}
impl<'a> IntoIterator for &'a FetchedTranscript {
type Item = &'a FetchedTranscriptSnippet;
type IntoIter = std::slice::Iter<'a, FetchedTranscriptSnippet>;
fn into_iter(self) -> Self::IntoIter {
self.snippets.iter()
}
}
#[cfg(test)]
mod tests {
use super::*;
use serde_json::json;
fn create_test_transcript() -> FetchedTranscript {
FetchedTranscript {
snippets: vec![
FetchedTranscriptSnippet {
text: "Hello world".to_string(),
start: 0.0,
duration: 3.5,
},
FetchedTranscriptSnippet {
text: "This is a test".to_string(),
start: 3.5,
duration: 2.8,
},
FetchedTranscriptSnippet {
text: "of the transcript system".to_string(),
start: 6.3,
duration: 4.2,
},
],
video_id: "test123".to_string(),
language: "English".to_string(),
language_code: "en".to_string(),
is_generated: false,
}
}
#[test]
fn test_to_raw_data() {
let transcript = create_test_transcript();
let raw_data = transcript.to_raw_data();
assert_eq!(raw_data.len(), 3);
assert_eq!(raw_data[0].get("text").unwrap(), &json!("Hello world"));
assert_eq!(raw_data[0].get("start").unwrap(), &json!(0.0));
assert_eq!(raw_data[0].get("duration").unwrap(), &json!(3.5));
assert_eq!(
raw_data[2].get("text").unwrap(),
&json!("of the transcript system")
);
assert_eq!(raw_data[2].get("start").unwrap(), &json!(6.3));
assert_eq!(raw_data[2].get("duration").unwrap(), &json!(4.2));
}
#[test]
fn test_text() {
let transcript = create_test_transcript();
let full_text = transcript.text();
assert_eq!(
full_text,
"Hello world This is a test of the transcript system"
);
}
#[test]
fn test_parts() {
let transcript = create_test_transcript();
let parts = transcript.parts();
assert_eq!(parts.len(), 3);
assert_eq!(parts[0].text, "Hello world");
assert_eq!(parts[1].start, 3.5);
assert_eq!(parts[2].duration, 4.2);
}
#[test]
fn test_language_getters() {
let transcript = create_test_transcript();
assert_eq!(transcript.language(), "English");
assert_eq!(transcript.language_code(), "en");
assert!(!transcript.is_generated());
}
#[test]
fn test_duration() {
let transcript = create_test_transcript();
assert_eq!(transcript.duration(), 10.5);
let empty_transcript = FetchedTranscript {
snippets: vec![],
video_id: "empty123".to_string(),
language: "English".to_string(),
language_code: "en".to_string(),
is_generated: false,
};
assert_eq!(empty_transcript.duration(), 0.0);
}
#[test]
fn test_into_iterator() {
let transcript = create_test_transcript();
let mut count = 0;
for segment in &transcript {
count += 1;
assert!(segment.start >= 0.0);
assert!(segment.duration > 0.0);
assert!(!segment.text.is_empty());
}
assert_eq!(count, 3);
let segments: Vec<FetchedTranscriptSnippet> = transcript.into_iter().collect();
assert_eq!(segments.len(), 3);
assert_eq!(segments[0].text, "Hello world");
assert_eq!(segments[1].text, "This is a test");
assert_eq!(segments[2].text, "of the transcript system");
}
#[test]
fn test_serialization() {
let transcript = create_test_transcript();
let serialized = serde_json::to_string(&transcript).unwrap();
assert!(serialized.contains("\"video_id\":\"test123\""));
assert!(serialized.contains("\"language\":\"English\""));
assert!(serialized.contains("\"language_code\":\"en\""));
assert!(serialized.contains("\"is_generated\":false"));
let deserialized: FetchedTranscript = serde_json::from_str(&serialized).unwrap();
assert_eq!(deserialized.video_id, "test123");
assert_eq!(deserialized.language, "English");
assert_eq!(deserialized.snippets.len(), 3);
assert_eq!(deserialized.snippets[0].text, "Hello world");
}
}