Skip to main content

kino_frequency/
types.rs

1//! Core types for frequency analysis.
2
3use serde::{Deserialize, Serialize};
4
5/// Raw audio data extracted from a video file.
6#[derive(Debug, Clone)]
7pub struct AudioData {
8    /// PCM samples normalized to [-1.0, 1.0]
9    pub samples: Vec<f32>,
10    /// Sample rate in Hz
11    pub sample_rate: u32,
12    /// Number of audio channels
13    pub channels: u32,
14    /// Duration in seconds
15    pub duration_secs: f64,
16}
17
18impl AudioData {
19    /// Create new audio data from samples.
20    pub fn new(samples: Vec<f32>, sample_rate: u32) -> Self {
21        let duration_secs = samples.len() as f64 / sample_rate as f64;
22        Self {
23            samples,
24            sample_rate,
25            channels: 1,
26            duration_secs,
27        }
28    }
29
30    /// Get a slice of samples for a specific time range.
31    pub fn slice(&self, start_secs: f64, end_secs: f64) -> &[f32] {
32        let start_idx = (start_secs * self.sample_rate as f64) as usize;
33        let end_idx = (end_secs * self.sample_rate as f64) as usize;
34        &self.samples[start_idx.min(self.samples.len())..end_idx.min(self.samples.len())]
35    }
36
37    /// Get number of samples.
38    pub fn len(&self) -> usize {
39        self.samples.len()
40    }
41
42    /// Check if audio data is empty.
43    pub fn is_empty(&self) -> bool {
44        self.samples.is_empty()
45    }
46}
47
48/// A dominant frequency detected in the audio.
49#[derive(Debug, Clone, Serialize, Deserialize)]
50pub struct DominantFrequency {
51    /// Frequency in Hz
52    pub frequency_hz: f32,
53    /// Magnitude (normalized 0-1)
54    pub magnitude: f32,
55    /// Rank (1 = highest magnitude)
56    pub rank: usize,
57}
58
59/// Complete frequency analysis results.
60#[derive(Debug, Clone)]
61pub struct FrequencyAnalysis {
62    /// Full magnitude spectrum
63    pub spectrum: Vec<f32>,
64    /// Frequency bins (Hz)
65    pub frequencies: Vec<f32>,
66    /// Spectral centroid (brightness)
67    pub spectral_centroid: f32,
68    /// Spectral rolloff (95% energy point)
69    pub spectral_rolloff: f32,
70    /// Spectral flatness (tonality measure)
71    pub spectral_flatness: f32,
72    /// Band energies (sub-bass, bass, low-mid, mid, high-mid, high)
73    pub band_energies: BandEnergies,
74    /// Zero crossing rate
75    pub zero_crossing_rate: f32,
76}
77
78/// Energy distribution across frequency bands.
79#[derive(Debug, Clone, Default, Serialize, Deserialize)]
80pub struct BandEnergies {
81    /// Sub-bass: 20-60 Hz
82    pub sub_bass: f32,
83    /// Bass: 60-250 Hz
84    pub bass: f32,
85    /// Low-mid: 250-500 Hz
86    pub low_mid: f32,
87    /// Mid: 500-2000 Hz
88    pub mid: f32,
89    /// High-mid: 2000-4000 Hz
90    pub high_mid: f32,
91    /// High: 4000-20000 Hz
92    pub high: f32,
93}
94
95impl BandEnergies {
96    /// Create band energies from a spectrum and frequency bins.
97    pub fn from_spectrum(spectrum: &[f32], frequencies: &[f32]) -> Self {
98        let bands = [
99            (20.0, 60.0),     // sub_bass
100            (60.0, 250.0),    // bass
101            (250.0, 500.0),   // low_mid
102            (500.0, 2000.0),  // mid
103            (2000.0, 4000.0), // high_mid
104            (4000.0, 20000.0), // high
105        ];
106
107        let mut energies = [0.0f32; 6];
108
109        for (i, (low, high)) in bands.iter().enumerate() {
110            for (j, &freq) in frequencies.iter().enumerate() {
111                if freq >= *low && freq < *high {
112                    energies[i] += spectrum[j];
113                }
114            }
115        }
116
117        // Normalize
118        let total: f32 = energies.iter().sum();
119        if total > 0.0 {
120            for e in &mut energies {
121                *e /= total;
122            }
123        }
124
125        Self {
126            sub_bass: energies[0],
127            bass: energies[1],
128            low_mid: energies[2],
129            mid: energies[3],
130            high_mid: energies[4],
131            high: energies[5],
132        }
133    }
134
135    /// Convert to a vector for ML features.
136    pub fn to_vec(&self) -> Vec<f32> {
137        vec![
138            self.sub_bass,
139            self.bass,
140            self.low_mid,
141            self.mid,
142            self.high_mid,
143            self.high,
144        ]
145    }
146}
147
148/// Compact frequency signature for similarity matching.
149#[derive(Debug, Clone, Serialize, Deserialize)]
150pub struct FrequencySignature {
151    /// 128-dimensional feature vector (mel-scale inspired)
152    pub features: Vec<f32>,
153    /// Band energies
154    pub band_energies: BandEnergies,
155    /// Spectral centroid
156    pub centroid: f32,
157    /// Spectral flatness
158    pub flatness: f32,
159}
160
161impl FrequencySignature {
162    /// Compute cosine similarity with another signature.
163    pub fn similarity(&self, other: &FrequencySignature) -> f32 {
164        if self.features.len() != other.features.len() {
165            return 0.0;
166        }
167
168        let dot: f32 = self.features.iter()
169            .zip(other.features.iter())
170            .map(|(a, b)| a * b)
171            .sum();
172
173        let norm_a: f32 = self.features.iter().map(|x| x * x).sum::<f32>().sqrt();
174        let norm_b: f32 = other.features.iter().map(|x| x * x).sum::<f32>().sqrt();
175
176        if norm_a == 0.0 || norm_b == 0.0 {
177            return 0.0;
178        }
179
180        dot / (norm_a * norm_b)
181    }
182}
183
184/// Audio fingerprint for content verification.
185#[derive(Debug, Clone, Serialize, Deserialize)]
186pub struct AudioFingerprint {
187    /// SHA-256 hash of fingerprint data
188    pub hash: String,
189    /// Version of fingerprinting algorithm
190    pub version: u32,
191    /// Fingerprint constellation points
192    pub points: Vec<FingerprintPoint>,
193    /// Duration of analyzed audio in seconds
194    pub duration_secs: f64,
195}
196
197/// A single point in the fingerprint constellation.
198#[derive(Debug, Clone, Serialize, Deserialize)]
199pub struct FingerprintPoint {
200    /// Time offset in frames
201    pub time_offset: u32,
202    /// Frequency bin index
203    pub freq_bin: u32,
204    /// Amplitude (quantized)
205    pub amplitude: u8,
206}
207
208/// Content tag with confidence score.
209#[derive(Debug, Clone, Serialize, Deserialize)]
210pub struct ContentTag {
211    /// Tag label
212    pub label: String,
213    /// Confidence score (0-1)
214    pub confidence: f32,
215}
216
217/// Configuration for video processing pipeline.
218#[derive(Debug, Clone)]
219pub struct ProcessingConfig {
220    /// Target sample rate for analysis
221    pub sample_rate: u32,
222    /// Enable fingerprint generation
223    pub enable_fingerprint: bool,
224    /// Enable auto-tagging
225    pub enable_tagging: bool,
226    /// Enable thumbnail selection
227    pub enable_thumbnail: bool,
228    /// Enable signature generation
229    pub enable_signature: bool,
230}
231
232impl Default for ProcessingConfig {
233    fn default() -> Self {
234        Self {
235            sample_rate: 44100,
236            enable_fingerprint: true,
237            enable_tagging: true,
238            enable_thumbnail: true,
239            enable_signature: true,
240        }
241    }
242}
243
244/// Result of complete video processing.
245#[derive(Debug, Clone, Serialize, Deserialize)]
246pub struct ProcessingResult {
247    /// Unique content identifier
248    pub content_id: String,
249    /// Audio fingerprint (if enabled)
250    pub fingerprint: Option<AudioFingerprint>,
251    /// Content tags (if enabled)
252    pub tags: Vec<ContentTag>,
253    /// Optimal thumbnail timestamp in seconds (if enabled)
254    pub thumbnail_timestamp: Option<f64>,
255    /// Frequency signature (if enabled)
256    pub signature: Option<FrequencySignature>,
257    /// Top dominant frequencies
258    pub dominant_frequencies: Vec<DominantFrequency>,
259}
260
261/// Frame quality metrics for thumbnail selection.
262#[derive(Debug, Clone)]
263pub struct FrameQuality {
264    /// Timestamp in seconds
265    pub timestamp: f64,
266    /// FFT-based sharpness score
267    pub sharpness: f32,
268    /// Contrast score
269    pub contrast: f32,
270    /// Number of detected faces
271    pub face_count: u32,
272    /// Overall quality score
273    pub score: f32,
274}
275
276/// Recommendation with similarity score.
277#[derive(Debug, Clone, Serialize, Deserialize)]
278pub struct Recommendation {
279    /// Content ID of recommended item
280    pub content_id: String,
281    /// Similarity score (0-1)
282    pub similarity: f32,
283    /// Matching features that contributed to similarity
284    pub matching_features: Vec<String>,
285}