subx_cli/services/audio/
analyzer.rs1use crate::services::audio::{AudioData, AudioEnvelope};
4use crate::{Result, error::SubXError};
5use aus::{AudioFile, WindowType, analysis, operations, spectrum};
6use std::path::Path;
7
8pub struct AusAudioAnalyzer {
10 sample_rate: u32,
11 window_size: usize,
12 hop_size: usize,
13}
14
15impl AusAudioAnalyzer {
16 pub fn new(sample_rate: u32) -> Self {
18 Self {
19 sample_rate,
20 window_size: 1024,
21 hop_size: 512,
22 }
23 }
24
25 pub async fn load_audio_file<P: AsRef<Path>>(&self, audio_path: P) -> Result<AudioFile> {
27 let path = audio_path.as_ref();
28 let path_str = path
29 .to_str()
30 .ok_or_else(|| SubXError::audio_processing("無法轉換路徑為 UTF-8 字串"))?;
31 let mut audio_file = aus::read(path_str)?;
32 if audio_file.num_channels > 1 {
33 aus::mixdown(&mut audio_file);
34 }
35 Ok(audio_file)
36 }
37
38 pub async fn load_audio_data<P: AsRef<Path>>(&self, audio_path: P) -> Result<AudioData> {
40 let audio_file = self.load_audio_file(audio_path).await?;
41 let samples: Vec<f32> = audio_file.samples[0].iter().map(|&x| x as f32).collect();
42 Ok(AudioData {
43 samples,
44 sample_rate: audio_file.sample_rate,
45 channels: audio_file.num_channels,
46 duration: audio_file.duration as f32,
47 })
48 }
49
50 pub async fn extract_envelope<P: AsRef<Path>>(&self, audio_path: P) -> Result<AudioEnvelope> {
52 let audio_file = self.load_audio_file(audio_path).await?;
53 let samples = &audio_file.samples[0];
54 let mut energy_samples = Vec::new();
55 for chunk in samples.chunks(self.hop_size) {
56 let rms_energy = operations::rms(chunk);
57 energy_samples.push(rms_energy as f32);
58 }
59 let duration = audio_file.duration as f32;
60 Ok(AudioEnvelope {
61 samples: energy_samples,
62 sample_rate: self.sample_rate,
63 duration,
64 })
65 }
66
67 pub fn detect_dialogue(
69 &self,
70 envelope: &AudioEnvelope,
71 threshold: f32,
72 ) -> Vec<crate::services::audio::DialogueSegment> {
73 let mut segments = Vec::new();
74 let mut in_dialogue = false;
75 let mut start = 0.0;
76 let time_per_sample = envelope.duration / envelope.samples.len() as f32;
77
78 for (i, &e) in envelope.samples.iter().enumerate() {
79 let t = i as f32 * time_per_sample;
80 if e > threshold && !in_dialogue {
81 in_dialogue = true;
82 start = t;
83 } else if e <= threshold && in_dialogue {
84 in_dialogue = false;
85 if t - start > 0.5 {
86 segments.push(crate::services::audio::DialogueSegment {
87 start_time: start,
88 end_time: t,
89 intensity: e,
90 });
91 }
92 }
93 }
94
95 segments
96 }
97
98 pub async fn analyze_audio_features(&self, audio_file: &AudioFile) -> Result<AudioFeatures> {
100 let samples = &audio_file.samples[0];
101 let stft_result = spectrum::rstft(
102 samples,
103 self.window_size,
104 self.hop_size,
105 WindowType::Hanning,
106 );
107
108 let mut features = Vec::new();
109 for frame in stft_result.iter() {
110 let (magnitude_spectrum, _) = spectrum::complex_to_polar_rfft(frame);
111 let frequencies = spectrum::rfftfreq(self.window_size, audio_file.sample_rate);
112
113 let spectral_centroid = analysis::spectral_centroid(&magnitude_spectrum, &frequencies);
114 let spectral_entropy = analysis::spectral_entropy(&magnitude_spectrum);
115 let zero_crossing_rate = analysis::zero_crossing_rate(samples, audio_file.sample_rate);
116
117 features.push(FrameFeatures {
118 spectral_centroid: spectral_centroid as f32,
119 spectral_entropy: spectral_entropy as f32,
120 zero_crossing_rate: zero_crossing_rate as f32,
121 });
122 }
123
124 Ok(AudioFeatures { frames: features })
125 }
126}
127
128#[derive(Debug, Clone)]
130pub struct AudioFeatures {
131 pub frames: Vec<FrameFeatures>,
132}
133
134#[derive(Debug, Clone)]
135pub struct FrameFeatures {
136 pub spectral_centroid: f32,
137 pub spectral_entropy: f32,
138 pub zero_crossing_rate: f32,
139}