Skip to main content

vidsage_core/commentary/
generator.rs

1//! Commentary generator implementations
2
3use super::CommentaryStyle;
4use crate::video::metadata::VideoFormat;
5use crate::video::VideoMetadata;
6use chrono::{DateTime, Utc};
7use serde::{Deserialize, Serialize};
8use uuid::Uuid;
9
10/// Commentary input structure
11#[derive(Debug, Clone, Serialize, Deserialize)]
12pub struct CommentaryInput {
13    /// Video metadata
14    pub video_metadata: VideoMetadata,
15
16    /// Commentary style
17    pub style: CommentaryStyle,
18
19    /// Target language
20    pub language: String,
21
22    /// Maximum commentary length in characters
23    pub max_length: Option<usize>,
24
25    /// Minimum commentary length in characters
26    pub min_length: Option<usize>,
27
28    /// Custom prompts or instructions
29    pub custom_instructions: Option<String>,
30
31    /// Whether to include timestamps
32    pub include_timestamps: bool,
33
34    /// Whether to include keywords
35    pub include_keywords: bool,
36}
37
38impl Default for CommentaryInput {
39    fn default() -> Self {
40        Self {
41            video_metadata: VideoMetadata::new(
42                "Test Video".to_string(),
43                chrono::Duration::seconds(0),
44                (1920, 1080),
45                VideoFormat::MP4,
46            ),
47            style: CommentaryStyle::Professional,
48            language: "en".to_string(),
49            max_length: Some(1000),
50            min_length: Some(500),
51            custom_instructions: None,
52            include_timestamps: false,
53            include_keywords: true,
54        }
55    }
56}
57
58/// Commentary structure
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct Commentary {
61    /// Unique identifier for the commentary
62    pub id: String,
63
64    /// Associated video ID
65    pub video_id: String,
66
67    /// Commentary content
68    pub content: String,
69
70    /// Commentary style
71    pub style: CommentaryStyle,
72
73    /// Commentary language
74    pub language: String,
75
76    /// Quality score (0.0-1.0)
77    pub quality_score: f64,
78
79    /// Generated keywords
80    pub keywords: Vec<String>,
81
82    /// Generated timestamps (if enabled)
83    pub timestamps: Vec<(f64, String)>, // (seconds, caption)
84
85    /// Creation timestamp
86    pub created_at: DateTime<Utc>,
87
88    /// Last updated timestamp
89    pub updated_at: DateTime<Utc>,
90}
91
92impl Commentary {
93    /// Create a new Commentary instance
94    pub fn new(
95        video_id: String,
96        content: String,
97        style: CommentaryStyle,
98        language: String,
99    ) -> Self {
100        let now = Utc::now();
101        Self {
102            id: Uuid::new_v4().to_string(),
103            video_id,
104            content,
105            style,
106            language,
107            quality_score: 0.0,
108            keywords: Vec::new(),
109            timestamps: Vec::new(),
110            created_at: now,
111            updated_at: now,
112        }
113    }
114
115    /// Update the updated_at timestamp
116    pub fn update_timestamp(&mut self) {
117        self.updated_at = Utc::now();
118    }
119
120    /// Set the quality score
121    pub fn set_quality_score(&mut self, score: f64) {
122        self.quality_score = score.clamp(0.0, 1.0);
123    }
124}
125
126/// Commentary output structure
127#[derive(Debug, Clone, Serialize, Deserialize)]
128pub struct CommentaryOutput {
129    /// Generated commentary
130    pub commentary: Commentary,
131
132    /// Generation time in seconds
133    pub generation_time: f64,
134
135    /// Number of tokens used
136    pub tokens_used: u32,
137
138    /// Whether the commentary was truncated
139    pub truncated: bool,
140
141    /// Suggested improvements
142    pub suggested_improvements: Vec<String>,
143}
144
145/// OpenAI API configuration
146#[derive(Debug, Clone, Serialize, Deserialize)]
147pub struct OpenAIConfig {
148    /// OpenAI API key
149    pub api_key: String,
150    /// OpenAI API endpoint
151    pub api_endpoint: String,
152    /// Default model to use
153    pub default_model: String,
154    /// Default temperature
155    pub default_temperature: f32,
156    /// Default max tokens
157    pub default_max_tokens: u32,
158}
159
160impl Default for OpenAIConfig {
161    fn default() -> Self {
162        Self {
163            api_key: String::new(),
164            api_endpoint: "https://api.openai.com/v1/chat/completions".to_string(),
165            default_model: "gpt-3.5-turbo".to_string(),
166            default_temperature: 0.7,
167            default_max_tokens: 1000,
168        }
169    }
170}
171
172/// OpenAI chat message structure
173#[derive(Debug, Serialize, Deserialize)]
174struct ChatMessage {
175    #[serde(rename = "role")]
176    role: String,
177    #[serde(rename = "content")]
178    content: String,
179}
180
181/// OpenAI chat completion request
182#[derive(Debug, Serialize, Deserialize)]
183struct ChatCompletionRequest {
184    #[serde(rename = "model")]
185    model: String,
186    #[serde(rename = "messages")]
187    messages: Vec<ChatMessage>,
188    #[serde(rename = "temperature")]
189    temperature: f32,
190    #[serde(rename = "max_tokens")]
191    max_tokens: u32,
192    #[serde(rename = "top_p")]
193    top_p: f32,
194    #[serde(rename = "frequency_penalty")]
195    frequency_penalty: f32,
196    #[serde(rename = "presence_penalty")]
197    presence_penalty: f32,
198}
199
200/// OpenAI chat completion response
201#[derive(Debug, Serialize, Deserialize)]
202struct ChatCompletionResponse {
203    #[serde(rename = "id")]
204    id: String,
205    #[serde(rename = "object")]
206    object: String,
207    #[serde(rename = "created")]
208    created: u64,
209    #[serde(rename = "model")]
210    model: String,
211    #[serde(rename = "usage")]
212    usage: Usage,
213    #[serde(rename = "choices")]
214    choices: Vec<Choice>,
215}
216
217/// Usage information in response
218#[derive(Debug, Serialize, Deserialize)]
219struct Usage {
220    #[serde(rename = "prompt_tokens")]
221    prompt_tokens: u32,
222    #[serde(rename = "completion_tokens")]
223    completion_tokens: u32,
224    #[serde(rename = "total_tokens")]
225    total_tokens: u32,
226}
227
228/// Choice in response
229#[derive(Debug, Serialize, Deserialize)]
230struct Choice {
231    #[serde(rename = "message")]
232    message: ChatMessage,
233    #[serde(rename = "finish_reason")]
234    finish_reason: String,
235    #[serde(rename = "index")]
236    index: u32,
237}
238
239/// OpenAI commentary generator implementation
240#[derive(Clone)]
241pub struct OpenAIGenerator {
242    config: OpenAIConfig,
243    client: reqwest::Client,
244}
245
246impl OpenAIGenerator {
247    /// Create a new OpenAIGenerator instance
248    pub fn new(api_key: String) -> Self {
249        let config = OpenAIConfig {
250            api_key,
251            ..Default::default()
252        };
253
254        let client = reqwest::Client::new();
255
256        Self { config, client }
257    }
258
259    /// Create a new OpenAIGenerator instance with custom config
260    pub fn new_with_config(config: OpenAIConfig) -> Self {
261        let client = reqwest::Client::new();
262
263        Self { config, client }
264    }
265
266    /// Build the prompt for generating commentary
267    fn build_prompt(&self, input: &CommentaryInput) -> String {
268        let style_desc = match input.style {
269            CommentaryStyle::Professional => "professional, formal, technical commentary",
270            CommentaryStyle::Casual => "relaxed, conversational, friendly style",
271            CommentaryStyle::Educational => "informative, teaching-focused, clear explanations",
272            CommentaryStyle::Entertaining => "engaging, humorous, entertaining style",
273            CommentaryStyle::Analytical => "data-driven, detailed analysis, objective",
274            CommentaryStyle::Storytelling => "narrative, storytelling approach, engaging",
275            CommentaryStyle::Poetic => "creative, poetic language, artistic",
276            CommentaryStyle::Technical => "highly technical, detailed explanations, precise",
277        };
278
279        let mut prompt = format!(
280            "Generate {} for a video with the following metadata:\n\n",
281            style_desc
282        );
283
284        prompt.push_str(&format!("Title: {}\n", input.video_metadata.title));
285        prompt.push_str(&format!(
286            "Duration: {} seconds\n",
287            input.video_metadata.duration.num_seconds()
288        ));
289        prompt.push_str(&format!(
290            "Resolution: {}x{}\n",
291            input.video_metadata.resolution.0, input.video_metadata.resolution.1
292        ));
293        prompt.push_str(&format!("Format: {:?}\n", input.video_metadata.format));
294        prompt.push_str(&format!(
295            "Frame rate: {:.2} fps\n",
296            input.video_metadata.frame_rate
297        ));
298        prompt.push_str(&format!(
299            "Video codec: {}\n",
300            input.video_metadata.video_codec
301        ));
302        prompt.push_str(&format!(
303            "Audio codec: {}\n",
304            input.video_metadata.audio_codec
305        ));
306
307        if let Some(instructions) = &input.custom_instructions {
308            prompt.push_str(&format!("\nAdditional instructions: {}\n", instructions));
309        }
310
311        prompt.push_str(
312            "\nGenerate a comprehensive commentary that covers the key aspects of this video.",
313        );
314
315        if input.include_keywords {
316            prompt
317                .push_str(" Also, include 5-7 relevant keywords at the end, separated by commas.");
318        }
319
320        if input.include_timestamps {
321            prompt.push_str(" Include timestamps for key points in the format [0:00] Description.");
322        }
323
324        prompt
325    }
326
327    /// Extract keywords from commentary content
328    fn extract_keywords(&self, content: &str) -> Vec<String> {
329        // Simple keyword extraction for now - will be improved later
330        if let Some(keywords_section) = content.split("Keywords: ").nth(1) {
331            keywords_section
332                .split(", ")
333                .map(|k| k.trim().to_string())
334                .collect()
335        } else {
336            Vec::new()
337        }
338    }
339
340    /// Extract timestamps from commentary content
341    fn extract_timestamps(&self, content: &str) -> Vec<(f64, String)> {
342        // Simple timestamp extraction for now - will be improved later
343        let mut timestamps = Vec::new();
344
345        for line in content.lines() {
346            if let Some(timestamp_part) = line.split("[").nth(1) {
347                if let Some((time_str, desc)) = timestamp_part.split_once("] ") {
348                    if let Some((minutes, seconds)) = time_str.split_once(":") {
349                        if let (Ok(mins), Ok(secs)) =
350                            (minutes.parse::<f64>(), seconds.parse::<f64>())
351                        {
352                            let total_seconds = mins * 60.0 + secs;
353                            timestamps.push((total_seconds, desc.trim().to_string()));
354                        }
355                    }
356                }
357            }
358        }
359
360        timestamps
361    }
362}
363
364#[async_trait::async_trait]
365impl super::traits::CommentaryGenerator for OpenAIGenerator {
366    async fn generate_commentary(&self, input: CommentaryInput) -> crate::Result<CommentaryOutput> {
367        let start_time = std::time::Instant::now();
368
369        // Build the prompt
370        let prompt = self.build_prompt(&input);
371
372        // Create chat messages
373        let messages = vec![
374            ChatMessage {
375                role: "system".to_string(),
376                content: "You are an AI assistant specialized in generating high-quality video commentaries.".to_string(),
377            },
378            ChatMessage {
379                role: "user".to_string(),
380                content: prompt,
381            },
382        ];
383
384        // Create request
385        let request = ChatCompletionRequest {
386            model: self.config.default_model.clone(),
387            messages,
388            temperature: self.config.default_temperature,
389            max_tokens: self.config.default_max_tokens,
390            top_p: 1.0,
391            frequency_penalty: 0.0,
392            presence_penalty: 0.0,
393        };
394
395        // Send request to OpenAI API
396        let response = self
397            .client
398            .post(&self.config.api_endpoint)
399            .header("Authorization", format!("Bearer {}", self.config.api_key))
400            .header("Content-Type", "application/json")
401            .json(&request)
402            .send()
403            .await
404            .map_err(|e| crate::error::CoreError::NetworkError(e.to_string()))?;
405
406        // Parse response
407        let completion_response = response
408            .json::<ChatCompletionResponse>()
409            .await
410            .map_err(|e| crate::error::CoreError::JsonError(e.to_string()))?;
411
412        // Extract commentary content
413        let choice = completion_response
414            .choices
415            .into_iter()
416            .next()
417            .ok_or_else(|| {
418                crate::error::CoreError::CommentaryError(
419                    "No choices returned from OpenAI API".to_string(),
420                )
421            })?;
422
423        let content = choice.message.content;
424        let truncated = choice.finish_reason == "length";
425        let tokens_used = completion_response.usage.total_tokens;
426
427        // Create commentary
428        let mut commentary = Commentary::new(
429            input.video_metadata.id.clone(),
430            content.clone(),
431            input.style,
432            input.language.clone(),
433        );
434
435        // Extract keywords if enabled
436        if input.include_keywords {
437            commentary.keywords = self.extract_keywords(&content);
438        }
439
440        // Extract timestamps if enabled
441        if input.include_timestamps {
442            commentary.timestamps = self.extract_timestamps(&content);
443        }
444
445        // Calculate generation time
446        let generation_time = start_time.elapsed().as_secs_f64();
447
448        // Create output
449        let output = CommentaryOutput {
450            commentary,
451            generation_time,
452            tokens_used,
453            truncated,
454            suggested_improvements: Vec::new(),
455        };
456
457        Ok(output)
458    }
459
460    async fn evaluate_commentary(&self, commentary: &Commentary) -> crate::Result<f64> {
461        // Build evaluation prompt
462        let prompt = format!(
463            "Evaluate the quality of the following video commentary on a scale of 0.0 to 1.0, \
464            where 1.0 is perfect. Consider relevance, clarity, engagement, and style appropriateness. \
465            Only return a single floating point number without any explanation.\n\n{}",
466            commentary.content
467        );
468
469        // Create chat messages
470        let messages = vec![
471            ChatMessage {
472                role: "system".to_string(),
473                content: "You are an AI assistant specialized in evaluating video commentaries."
474                    .to_string(),
475            },
476            ChatMessage {
477                role: "user".to_string(),
478                content: prompt,
479            },
480        ];
481
482        // Create request
483        let request = ChatCompletionRequest {
484            model: self.config.default_model.clone(),
485            messages,
486            temperature: 0.0, // More deterministic for evaluation
487            max_tokens: 10,
488            top_p: 1.0,
489            frequency_penalty: 0.0,
490            presence_penalty: 0.0,
491        };
492
493        // Send request to OpenAI API
494        let response = self
495            .client
496            .post(&self.config.api_endpoint)
497            .header("Authorization", format!("Bearer {}", self.config.api_key))
498            .header("Content-Type", "application/json")
499            .json(&request)
500            .send()
501            .await
502            .map_err(|e| crate::error::CoreError::NetworkError(e.to_string()))?;
503
504        // Parse response
505        let completion_response = response
506            .json::<ChatCompletionResponse>()
507            .await
508            .map_err(|e| crate::error::CoreError::JsonError(e.to_string()))?;
509
510        // Extract score
511        let choice = completion_response
512            .choices
513            .into_iter()
514            .next()
515            .ok_or_else(|| {
516                crate::error::CoreError::CommentaryError(
517                    "No choices returned from OpenAI API".to_string(),
518                )
519            })?;
520
521        let score_str = choice.message.content.trim();
522        let score = score_str.parse::<f64>().map_err(|e| {
523            crate::error::CoreError::CommentaryError(format!("Failed to parse score: {}", e))
524        })?;
525
526        Ok(score.clamp(0.0, 1.0))
527    }
528
529    async fn improve_commentary(
530        &self,
531        commentary: &Commentary,
532        feedback: &str,
533    ) -> crate::Result<Commentary> {
534        // Build improvement prompt
535        let prompt = format!(
536            "Improve the following video commentary based on the feedback provided. \
537            Maintain the same style and language.\n\nCommentary:\n{}\n\nFeedback:\n{}",
538            commentary.content, feedback
539        );
540
541        // Create chat messages
542        let messages = vec![
543            ChatMessage {
544                role: "system".to_string(),
545                content: "You are an AI assistant specialized in improving video commentaries based on feedback.".to_string(),
546            },
547            ChatMessage {
548                role: "user".to_string(),
549                content: prompt,
550            },
551        ];
552
553        // Create request
554        let request = ChatCompletionRequest {
555            model: self.config.default_model.clone(),
556            messages,
557            temperature: self.config.default_temperature,
558            max_tokens: self.config.default_max_tokens,
559            top_p: 1.0,
560            frequency_penalty: 0.0,
561            presence_penalty: 0.0,
562        };
563
564        // Send request to OpenAI API
565        let response = self
566            .client
567            .post(&self.config.api_endpoint)
568            .header("Authorization", format!("Bearer {}", self.config.api_key))
569            .header("Content-Type", "application/json")
570            .json(&request)
571            .send()
572            .await
573            .map_err(|e| crate::error::CoreError::NetworkError(e.to_string()))?;
574
575        // Parse response
576        let completion_response = response
577            .json::<ChatCompletionResponse>()
578            .await
579            .map_err(|e| crate::error::CoreError::JsonError(e.to_string()))?;
580
581        // Extract improved content
582        let choice = completion_response
583            .choices
584            .into_iter()
585            .next()
586            .ok_or_else(|| {
587                crate::error::CoreError::CommentaryError(
588                    "No choices returned from OpenAI API".to_string(),
589                )
590            })?;
591
592        let improved_content = choice.message.content;
593
594        // Create improved commentary
595        let mut improved_commentary = Commentary::new(
596            commentary.video_id.clone(),
597            improved_content.clone(),
598            commentary.style,
599            commentary.language.clone(),
600        );
601
602        // Extract keywords and timestamps if present
603        improved_commentary.keywords = self.extract_keywords(&improved_content);
604        improved_commentary.timestamps = self.extract_timestamps(&improved_content);
605
606        Ok(improved_commentary)
607    }
608
609    async fn generate_multiple(
610        &self,
611        input: CommentaryInput,
612        styles: Vec<CommentaryStyle>,
613    ) -> crate::Result<Vec<CommentaryOutput>> {
614        let mut handles = Vec::new();
615
616        for style in styles {
617            let mut input_with_style = input.clone();
618            input_with_style.style = style;
619            let generator = self.clone();
620
621            let handle =
622                tokio::spawn(async move { generator.generate_commentary(input_with_style).await });
623
624            handles.push(handle);
625        }
626
627        let mut results = Vec::new();
628        for handle in handles {
629            let result = handle.await.map_err(|e| {
630                crate::error::CoreError::InternalError(format!("Task failed: {}", e))
631            })??;
632            results.push(result);
633        }
634
635        Ok(results)
636    }
637}
638
639#[cfg(test)]
640mod tests {
641    use super::*;
642    use serde_json;
643
644    #[test]
645    fn test_commentary_input_default() {
646        let input = CommentaryInput::default();
647
648        assert_eq!(input.video_metadata.title, "Test Video");
649        assert_eq!(input.style, CommentaryStyle::Professional);
650        assert_eq!(input.language, "en");
651        assert_eq!(input.max_length, Some(1000));
652        assert_eq!(input.min_length, Some(500));
653        assert_eq!(input.include_timestamps, false);
654        assert_eq!(input.include_keywords, true);
655    }
656
657    #[test]
658    fn test_commentary_creation() {
659        let video_id = "test-video-id".to_string();
660        let content = "This is a test commentary".to_string();
661        let style = CommentaryStyle::Professional;
662        let language = "en".to_string();
663
664        let commentary =
665            Commentary::new(video_id.clone(), content.clone(), style, language.clone());
666
667        assert_eq!(commentary.video_id, video_id);
668        assert_eq!(commentary.content, content);
669        assert_eq!(commentary.style, style);
670        assert_eq!(commentary.language, language);
671        assert_eq!(commentary.quality_score, 0.0);
672        assert!(commentary.keywords.is_empty());
673        assert!(commentary.timestamps.is_empty());
674        assert_eq!(commentary.created_at, commentary.updated_at);
675    }
676
677    #[test]
678    fn test_commentary_update_timestamp() {
679        let video_id = "test-video-id".to_string();
680        let content = "This is a test commentary".to_string();
681        let style = CommentaryStyle::Professional;
682        let language = "en".to_string();
683
684        let mut commentary = Commentary::new(video_id, content, style, language);
685        let old_updated_at = commentary.updated_at;
686
687        // Wait a bit to ensure time difference
688        std::thread::sleep(std::time::Duration::from_millis(10));
689
690        commentary.update_timestamp();
691
692        assert!(commentary.updated_at > old_updated_at);
693    }
694
695    #[test]
696    fn test_commentary_set_quality_score() {
697        let video_id = "test-video-id".to_string();
698        let content = "This is a test commentary".to_string();
699        let style = CommentaryStyle::Professional;
700        let language = "en".to_string();
701
702        let mut commentary = Commentary::new(video_id, content, style, language);
703
704        // Test normal score
705        commentary.set_quality_score(0.75);
706        assert_eq!(commentary.quality_score, 0.75);
707
708        // Test score below 0
709        commentary.set_quality_score(-0.5);
710        assert_eq!(commentary.quality_score, 0.0);
711
712        // Test score above 1
713        commentary.set_quality_score(1.5);
714        assert_eq!(commentary.quality_score, 1.0);
715    }
716
717    #[test]
718    fn test_commentary_serialization() {
719        let video_id = "test-video-id".to_string();
720        let content = "This is a test commentary".to_string();
721        let style = CommentaryStyle::Professional;
722        let language = "en".to_string();
723
724        let commentary = Commentary::new(video_id, content, style, language);
725
726        let json = serde_json::to_string(&commentary).unwrap();
727        let deserialized: Commentary = serde_json::from_str(&json).unwrap();
728
729        assert_eq!(deserialized.id, commentary.id);
730        assert_eq!(deserialized.video_id, commentary.video_id);
731        assert_eq!(deserialized.content, commentary.content);
732        assert_eq!(deserialized.style, commentary.style);
733        assert_eq!(deserialized.language, commentary.language);
734    }
735
736    #[test]
737    fn test_commentary_output_serialization() {
738        let video_id = "test-video-id".to_string();
739        let content = "This is a test commentary".to_string();
740        let style = CommentaryStyle::Professional;
741        let language = "en".to_string();
742
743        let commentary = Commentary::new(video_id, content, style, language);
744
745        let output = CommentaryOutput {
746            commentary,
747            generation_time: 2.5,
748            tokens_used: 100,
749            truncated: false,
750            suggested_improvements: vec![
751                "Add more details".to_string(),
752                "Improve flow".to_string(),
753            ],
754        };
755
756        let json = serde_json::to_string(&output).unwrap();
757        let deserialized: CommentaryOutput = serde_json::from_str(&json).unwrap();
758
759        assert_eq!(deserialized.generation_time, output.generation_time);
760        assert_eq!(deserialized.tokens_used, output.tokens_used);
761        assert_eq!(deserialized.truncated, output.truncated);
762        assert_eq!(
763            deserialized.suggested_improvements,
764            output.suggested_improvements
765        );
766    }
767}