use serde::{Deserialize, Serialize};
use std::collections::HashMap;
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct ContentFeatures {
pub categories: Vec<String>,
pub tags: Vec<String>,
pub duration_ms: Option<i64>,
pub language: Option<String>,
pub year: Option<u16>,
pub content_type: String,
pub quality_features: QualityFeatures,
pub engagement_features: EngagementFeatures,
pub text_features: TextFeatures,
pub custom_features: HashMap<String, f32>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct QualityFeatures {
pub resolution: Option<String>,
pub bitrate: Option<u32>,
pub framerate: Option<f32>,
pub has_hdr: bool,
pub audio_quality: Option<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize, Default)]
pub struct EngagementFeatures {
pub avg_rating: Option<f32>,
pub view_count: u64,
pub like_count: u64,
pub comment_count: u64,
pub share_count: u64,
pub completion_rate: Option<f32>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct TextFeatures {
pub title: String,
pub description: Option<String>,
pub transcript: Option<String>,
pub entities: Vec<String>,
}
impl ContentFeatures {
#[must_use]
pub fn new(title: String, categories: Vec<String>) -> Self {
Self {
categories,
tags: Vec::new(),
duration_ms: None,
language: None,
year: None,
content_type: String::from("video"),
quality_features: QualityFeatures::default(),
engagement_features: EngagementFeatures::default(),
text_features: TextFeatures {
title,
description: None,
transcript: None,
entities: Vec::new(),
},
custom_features: HashMap::new(),
}
}
#[must_use]
pub fn categorical_features(&self) -> Vec<String> {
let mut features = Vec::new();
features.extend(self.categories.clone());
features.extend(self.tags.clone());
if let Some(ref lang) = self.language {
features.push(format!("lang:{lang}"));
}
if let Some(year) = self.year {
features.push(format!("year:{year}"));
}
features.push(format!("type:{}", self.content_type));
features
}
#[must_use]
pub fn numerical_features(&self) -> Vec<f32> {
let mut features = Vec::new();
features.push(self.duration_ms.unwrap_or(0) as f32 / 3_600_000.0);
features.push(self.engagement_features.avg_rating.unwrap_or(0.0) / 5.0);
features.push((self.engagement_features.view_count as f32).ln().max(0.0) / 20.0);
features.push((self.engagement_features.like_count as f32).ln().max(0.0) / 15.0);
features.push(self.engagement_features.completion_rate.unwrap_or(0.0));
if let Some(bitrate) = self.quality_features.bitrate {
features.push((bitrate as f32).ln() / 15.0);
} else {
features.push(0.0);
}
features.push(f32::from(self.quality_features.has_hdr));
for value in self.custom_features.values() {
features.push(*value);
}
features
}
}
pub struct FeatureExtractor;
impl FeatureExtractor {
#[must_use]
pub fn extract(_metadata: &crate::ContentMetadata) -> ContentFeatures {
ContentFeatures::new(String::from("Sample"), vec![String::from("category")])
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_content_features_creation() {
let features = ContentFeatures::new(
String::from("Test Video"),
vec![String::from("Action"), String::from("Adventure")],
);
assert_eq!(features.categories.len(), 2);
assert_eq!(features.text_features.title, "Test Video");
}
#[test]
fn test_categorical_features() {
let mut features = ContentFeatures::new(String::from("Test"), vec![String::from("Drama")]);
features.language = Some(String::from("en"));
features.year = Some(2024);
let categorical = features.categorical_features();
assert!(!categorical.is_empty());
assert!(categorical.contains(&String::from("Drama")));
}
#[test]
fn test_numerical_features() {
let features = ContentFeatures::new(String::from("Test"), vec![]);
let numerical = features.numerical_features();
assert!(!numerical.is_empty());
}
#[test]
fn test_quality_features_default() {
let quality = QualityFeatures::default();
assert!(!quality.has_hdr);
assert!(quality.resolution.is_none());
}
#[test]
fn test_engagement_features_default() {
let engagement = EngagementFeatures::default();
assert_eq!(engagement.view_count, 0);
assert_eq!(engagement.like_count, 0);
}
}