1use image::{Rgba, RgbaImage};
2use memmap2::Mmap;
3use mime::Mime;
4use rustfft::num_complex::Complex;
5use rustfft::num_traits::Zero;
6use rustfft::{Fft, FftPlanner};
7use semdiff_core::fs::FileLeaf;
8use semdiff_core::{Diff, DiffCalculator, MayUnsupported};
9use std::f32::consts::PI;
10use std::fmt::{Debug, Formatter};
11use std::io::Cursor;
12use std::ops::Range;
13use std::sync::{Arc, LazyLock};
14use std::{convert, iter};
15use symphonia::core::audio::AudioSpec;
16use symphonia::core::codecs::audio::AudioDecoderOptions;
17use symphonia::core::errors::Error as SymphoniaError;
18use symphonia::core::formats::probe::Hint;
19use symphonia::core::formats::{FormatOptions, TrackType};
20use symphonia::core::io::MediaSourceStream;
21use symphonia::core::meta::MetadataOptions;
22use thiserror::Error;
23
24pub mod report_html;
25pub mod report_json;
26pub mod report_summary;
27
28#[cfg(test)]
29mod tests;
30
31const WAVEFORM_WIDTH: u32 = 1024;
32const WAVEFORM_HEIGHT: u32 = 256;
33const SPECTROGRAM_WIDTH: u32 = 1024;
34const SPECTROGRAM_HEIGHT: u32 = 256;
35const SPECTROGRAM_DATA_HEIGHT: usize = 1024;
36const FFT_WINDOW_SIZE: usize = SPECTROGRAM_DATA_HEIGHT * 2;
37const LOG_EPSILON: f32 = 1e-6;
38
39pub struct AudioDiffReporter {
40 spectrogram_analyzer: SpectrogramAnalyzer,
41}
42
43impl Default for AudioDiffReporter {
44 fn default() -> Self {
45 Self::new()
46 }
47}
48
49impl AudioDiffReporter {
50 pub fn new() -> AudioDiffReporter {
51 AudioDiffReporter {
52 spectrogram_analyzer: SpectrogramAnalyzer::new(),
53 }
54 }
55
56 fn build_audio_data(&self, kind: Mime, content: Arc<Mmap>) -> Result<AudioData, AudioDecodeError> {
57 let decoded = self.spectrogram_analyzer.decode_audio(&kind, &content)?;
58 let stat = AudioStat::from_one(&decoded);
59 Ok(build_audio_data_from_decoded(kind, content, &decoded, &stat))
60 }
61}
62
63#[derive(Debug)]
64pub enum AudioDiffStatus {
65 Equal(AudioDiffDetail),
66 Different(AudioDiffDetail),
67 Incomparable,
68}
69
70impl AudioDiffStatus {
71 pub fn as_str(&self) -> &'static str {
72 match self {
73 AudioDiffStatus::Equal(_) => "equal",
74 AudioDiffStatus::Different(_) => "different",
75 AudioDiffStatus::Incomparable => "incomparable",
76 }
77 }
78}
79
80#[derive(Debug)]
81pub struct AudioDiff {
82 status: AudioDiffStatus,
83 expected: AudioData,
84 actual: AudioData,
85}
86
87impl Diff for AudioDiff {
88 fn equal(&self) -> bool {
89 matches!(self.status, AudioDiffStatus::Equal(_))
90 }
91}
92
93impl AudioDiff {
94 fn status(&self) -> &AudioDiffStatus {
95 &self.status
96 }
97
98 fn expected(&self) -> &AudioData {
99 &self.expected
100 }
101
102 fn actual(&self) -> &AudioData {
103 &self.actual
104 }
105
106 fn diff_detail(&self) -> Option<&AudioDiffDetail> {
107 match &self.status {
108 AudioDiffStatus::Equal(detail) | AudioDiffStatus::Different(detail) => Some(detail),
109 AudioDiffStatus::Incomparable => None,
110 }
111 }
112}
113
114#[derive(Debug)]
115pub struct AudioDiffDetail {
116 spectrogram_diff: Vec<RgbaImage>,
117 stat: AudioDiffStat,
118}
119
120impl AudioDiffDetail {
121 fn spectrogram_diff(&self) -> &[RgbaImage] {
122 &self.spectrogram_diff
123 }
124
125 fn stat(&self) -> &AudioDiffStat {
126 &self.stat
127 }
128}
129
130#[derive(Debug, Clone, Copy)]
131pub struct AudioDiffStat {
132 pub spectrogram_diff_rate: f64,
133 pub shift_samples: i32,
134 pub lufs_diff_db: f32,
135}
136
137#[derive(Debug)]
138pub struct AudioData {
139 mime: Mime,
140 sample_rate: u32,
141 channels: u16,
142 duration_seconds: f32,
143 waveform: Vec<RgbaImage>,
144 spectrogram: Vec<RgbaImage>,
145 content: Arc<Mmap>,
146}
147
148impl AudioData {
149 fn mime(&self) -> &Mime {
150 &self.mime
151 }
152
153 fn sample_rate(&self) -> u32 {
154 self.sample_rate
155 }
156
157 fn channels(&self) -> u16 {
158 self.channels
159 }
160
161 fn duration_seconds(&self) -> f32 {
162 self.duration_seconds
163 }
164
165 fn waveform(&self) -> &[RgbaImage] {
166 &self.waveform
167 }
168
169 fn spectrogram(&self) -> &[RgbaImage] {
170 &self.spectrogram
171 }
172
173 fn content(&self) -> &[u8] {
174 &self.content
175 }
176}
177
178#[derive(Debug, Error)]
179pub enum AudioDecodeError {
180 #[error("symphonia error: {0}")]
181 Symphonia(#[from] SymphoniaError),
182 #[error("no default audio track")]
183 NoDefaultTrack,
184 #[error("missing audio codec parameters")]
185 MissingAudioCodecParameters,
186 #[error("missing sample rate")]
187 MissingSampleRate,
188}
189
190#[derive(Default)]
191pub struct AudioDiffCalculator {
192 shift_tolerance_seconds: f32,
193 lufs_tolerance_db: f32,
194 spectral_tolerance: f32,
195 spectrogram_diff_rate_tolerance: f64,
196 spectrogram_analyzer: SpectrogramAnalyzer,
197}
198
199impl Debug for AudioDiffCalculator {
200 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
201 f.debug_struct("AudioDiffCalculator")
202 .field("shift_tolerance_seconds", &self.shift_tolerance_seconds)
203 .field("lufs_tolerance_db", &self.lufs_tolerance_db)
204 .field("spectral_tolerance", &self.spectral_tolerance)
205 .field("spectrogram_diff_rate_tolerance", &self.spectrogram_diff_rate_tolerance)
206 .finish()
207 }
208}
209
210impl AudioDiffCalculator {
211 fn diff_decoded(&self, expected: &AudioDecoded, actual: &AudioDecoded) -> AudioDiffStatus {
212 if (expected.sample_rate, expected.channels) != (actual.sample_rate, actual.channels) {
213 return AudioDiffStatus::Incomparable;
214 }
215
216 let sample_rate = expected.sample_rate;
217 let max_shift_samples = (self.shift_tolerance_seconds * sample_rate as f32).round() as i32;
218 let (aligned_expected, aligned_actual, shift_samples) =
219 align_samples(expected.samples.clone(), actual.samples.clone(), max_shift_samples);
220
221 let expected_spectrogram = aligned_expected
222 .iter()
223 .map(|channel| self.spectrogram_analyzer.compute(channel))
224 .collect::<Vec<_>>();
225 let actual_spectrogram = aligned_actual
226 .iter()
227 .map(|channel| self.spectrogram_analyzer.compute(channel))
228 .collect::<Vec<_>>();
229
230 let (spectrogram_diff, spectrogram_diff_rate) =
231 self.build_diff_images(&expected_spectrogram, &actual_spectrogram);
232
233 let lufs_diff_db = summarize_channel_metrics(&aligned_expected, &aligned_actual);
234
235 let detail = AudioDiffDetail {
236 spectrogram_diff,
237 stat: AudioDiffStat {
238 spectrogram_diff_rate,
239 shift_samples,
240 lufs_diff_db,
241 },
242 };
243
244 let equal =
245 lufs_diff_db <= self.lufs_tolerance_db && spectrogram_diff_rate <= self.spectrogram_diff_rate_tolerance;
246 if equal {
247 AudioDiffStatus::Equal(detail)
248 } else {
249 AudioDiffStatus::Different(detail)
250 }
251 }
252}
253
254impl DiffCalculator<FileLeaf> for AudioDiffCalculator {
255 type Error = convert::Infallible;
256 type Diff = AudioDiff;
257
258 fn diff(
259 &self,
260 _name: &str,
261 expected: FileLeaf,
262 actual: FileLeaf,
263 ) -> Result<MayUnsupported<Self::Diff>, Self::Error> {
264 if !is_audio_kind(&expected.kind) || !is_audio_kind(&actual.kind) {
265 return Ok(MayUnsupported::Unsupported);
266 }
267 let Ok(expected_decoded) = self
268 .spectrogram_analyzer
269 .decode_audio(&expected.kind, expected.content.as_ref())
270 else {
271 return Ok(MayUnsupported::Unsupported);
272 };
273 let Ok(actual_decoded) = self
274 .spectrogram_analyzer
275 .decode_audio(&actual.kind, actual.content.as_ref())
276 else {
277 return Ok(MayUnsupported::Unsupported);
278 };
279 let stat_decoded = AudioStat::from_pair(&expected_decoded, &actual_decoded);
280 let expected_data =
281 build_audio_data_from_decoded(expected.kind, expected.content, &expected_decoded, &stat_decoded);
282 let actual_data = build_audio_data_from_decoded(actual.kind, actual.content, &actual_decoded, &stat_decoded);
283 let status = self.diff_decoded(&expected_decoded, &actual_decoded);
284
285 Ok(MayUnsupported::Ok(AudioDiff {
286 status,
287 expected: expected_data,
288 actual: actual_data,
289 }))
290 }
291}
292
293#[derive(Debug)]
294struct AudioStat {
295 signal_max: f32,
296 spectrogram_min: f32,
297 spectrogram_max: f32,
298 duration: f32,
299}
300
301impl AudioStat {
302 fn from_one(decoded: &AudioDecoded) -> AudioStat {
303 let signal_max = decoded
304 .samples
305 .iter()
306 .flatten()
307 .copied()
308 .map(f32::abs)
309 .fold(0.0, f32::max);
310 let (spectrogram_min, spectrogram_max) = decoded
311 .spectrograms
312 .iter()
313 .flatten()
314 .flatten()
315 .copied()
316 .fold((f32::INFINITY, f32::NEG_INFINITY), |(min, max), v| {
317 (v.min(min), v.max(max))
318 });
319 let duration = decoded.duration_seconds;
320 AudioStat {
321 signal_max,
322 spectrogram_min,
323 spectrogram_max,
324 duration,
325 }
326 }
327
328 fn from_pair(expected: &AudioDecoded, actual: &AudioDecoded) -> AudioStat {
329 let signal_max = expected
330 .samples
331 .iter()
332 .chain(actual.samples.iter())
333 .flatten()
334 .copied()
335 .map(f32::abs)
336 .fold(0.0, f32::max);
337 let (spectrogram_min, spectrogram_max) = expected
338 .spectrograms
339 .iter()
340 .chain(actual.spectrograms.iter())
341 .flatten()
342 .flatten()
343 .copied()
344 .filter(|v| v.is_finite())
345 .fold((f32::INFINITY, f32::NEG_INFINITY), |(min, max), v| {
346 (v.min(min), v.max(max))
347 });
348 let duration = expected.duration_seconds.max(actual.duration_seconds);
349 AudioStat {
350 signal_max,
351 spectrogram_min,
352 spectrogram_max,
353 duration,
354 }
355 }
356}
357
358impl AudioDiffCalculator {
359 pub fn new(
360 shift_tolerance_seconds: f32,
361 lufs_tolerance_db: f32,
362 spectral_tolerance: f32,
363 spectrogram_diff_rate_tolerance: f64,
364 ) -> Self {
365 Self {
366 shift_tolerance_seconds,
367 lufs_tolerance_db,
368 spectral_tolerance,
369 spectrogram_diff_rate_tolerance,
370 spectrogram_analyzer: SpectrogramAnalyzer::new(),
371 }
372 }
373
374 fn build_diff_images(
375 &self,
376 expected: &[Vec<[f32; SPECTROGRAM_DATA_HEIGHT]>],
377 actual: &[Vec<[f32; SPECTROGRAM_DATA_HEIGHT]>],
378 ) -> (Vec<RgbaImage>, f64) {
379 assert_eq!(expected.len(), actual.len());
380 let mut diff_images = Vec::with_capacity(expected.len());
381 let mut diff_rate_sum = 0.0;
382 for (expected_frame, actual_frame) in expected.iter().zip(actual.iter()) {
383 let (diff_image, diff_rate) = self.diff_spectrograms(expected_frame, actual_frame);
384 diff_images.push(diff_image);
385 diff_rate_sum += diff_rate;
386 }
387 (diff_images, diff_rate_sum / expected.len() as f64)
388 }
389
390 fn diff_spectrograms(
391 &self,
392 expected: &[[f32; SPECTROGRAM_DATA_HEIGHT]],
393 actual: &[[f32; SPECTROGRAM_DATA_HEIGHT]],
394 ) -> (RgbaImage, f64) {
395 let spectrogram_len = expected.len().max(actual.len());
396 let mut diff_image = RgbaImage::from_pixel(SPECTROGRAM_WIDTH, SPECTROGRAM_HEIGHT, Rgba([255, 255, 255, 0]));
397 let mut diff_count = 0usize;
398 let mut total_count = 0usize;
399 assert!(SPECTROGRAM_DATA_HEIGHT >= SPECTROGRAM_HEIGHT as usize);
400 if spectrogram_len >= SPECTROGRAM_WIDTH as usize {
401 for x in 0..SPECTROGRAM_WIDTH {
402 let x_range = x as usize * spectrogram_len / SPECTROGRAM_WIDTH as usize
403 ..(x + 1) as usize * spectrogram_len / SPECTROGRAM_WIDTH as usize;
404 for y in 0..SPECTROGRAM_HEIGHT {
405 let y_range = spectrogram_log_bin_range(y);
406
407 let mut diff_sum = 0usize;
408 for y in y_range.clone() {
409 for x in x_range.clone() {
410 let expected = expected.get(x).map(|x| x[y]);
411 let actual = actual.get(x).map(|x| x[y]);
412 let diff = (expected.unwrap_or(f32::INFINITY) - actual.unwrap_or(f32::NEG_INFINITY)).abs();
413 total_count += 1;
414 if diff > self.spectral_tolerance {
415 diff_sum += 1;
416 diff_count += 1;
417 }
418 }
419 }
420 diff_image.put_pixel(
421 x,
422 SPECTROGRAM_HEIGHT - y - 1,
423 Rgba([
424 255,
425 255,
426 255,
427 (diff_sum as f64 / (x_range.len() * y_range.len()) as f64 * 255.0) as u8,
428 ]),
429 );
430 }
431 }
432 } else {
433 for x in 0..spectrogram_len {
434 let image_x_range = x as u32 * SPECTROGRAM_WIDTH / spectrogram_len as u32
435 ..(x + 1) as u32 * SPECTROGRAM_WIDTH / spectrogram_len as u32;
436 for y in 0..SPECTROGRAM_HEIGHT {
437 let y_range = spectrogram_log_bin_range(y);
438 let mut diff_sum = 0usize;
439 for y in y_range.clone() {
440 let expected = expected.get(x).map(|x| x[y]);
441 let actual = actual.get(x).map(|x| x[y]);
442 let diff = (expected.unwrap_or(f32::INFINITY) - actual.unwrap_or(f32::NEG_INFINITY)).abs();
443 total_count += 1;
444 if diff > self.spectral_tolerance {
445 diff_sum += 1;
446 diff_count += 1;
447 }
448 }
449 let color = Rgba([255, 255, 255, (diff_sum as f64 / y_range.len() as f64 * 255.0) as u8]);
450 for x in image_x_range.clone() {
451 diff_image.put_pixel(x, SPECTROGRAM_HEIGHT - y - 1, color);
452 }
453 }
454 }
455 }
456 let diff_rate = if total_count == 0 {
457 0.0
458 } else {
459 diff_count as f64 / total_count as f64
460 };
461 (diff_image, diff_rate)
462 }
463}
464
465pub fn audio_extension(kind: &Mime) -> Option<&'static str> {
466 match kind.essence_str() {
467 "audio/mpeg" => Some("mp3"),
468 "audio/wav" | "audio/x-wav" => Some("wav"),
469 "audio/flac" => Some("flac"),
470 "audio/ogg" | "application/ogg" => Some("ogg"),
471 "audio/opus" => Some("opus"),
472 "audio/webm" => Some("webm"),
473 "audio/aac" => Some("aac"),
474 "audio/mp4" | "video/mp4" => Some("m4a"),
475 "audio/x-m4a" => Some("m4a"),
476 _ => mime_guess::get_mime_extensions(kind).and_then(|exts| exts.first().copied()),
477 }
478}
479
480fn is_audio_kind(kind: &Mime) -> bool {
481 kind.type_() == mime::AUDIO || kind.type_() == mime::VIDEO
482}
483
484fn build_audio_data_from_decoded(
485 mime: Mime,
486 content: Arc<Mmap>,
487 decoded: &AudioDecoded,
488 stat: &AudioStat,
489) -> AudioData {
490 let waveform = render_waveforms(&decoded.samples, stat, decoded.sample_rate);
491 let spectrogram = render_spectrograms(&decoded.spectrograms, stat, decoded.sample_rate);
492 AudioData {
493 mime,
494 sample_rate: decoded.sample_rate,
495 channels: decoded.channels,
496 duration_seconds: decoded.duration_seconds,
497 waveform,
498 spectrogram,
499 content,
500 }
501}
502
503struct AudioDecoded {
504 sample_rate: u32,
505 channels: u16,
506 duration_seconds: f32,
507 samples: Vec<Vec<f32>>,
508 spectrograms: Vec<Vec<[f32; SPECTROGRAM_DATA_HEIGHT]>>,
509}
510
511fn align_samples(
512 mut expected: Vec<Vec<f32>>,
513 mut actual: Vec<Vec<f32>>,
514 max_shift_samples: i32,
515) -> (Vec<Vec<f32>>, Vec<Vec<f32>>, i32) {
516 assert_eq!(expected.len(), actual.len());
517 let best_shift = (-max_shift_samples..=max_shift_samples)
518 .map(|shift| {
519 let score_sum = expected
520 .iter()
521 .zip(actual.iter())
522 .map(|(expected_channel, actual_channel)| {
523 let (expected_slice, actual_slice) = overlap_slices(expected_channel, actual_channel, shift);
524 normalized_correlation(expected_slice, actual_slice)
525 })
526 .sum::<f32>();
527 (shift, score_sum)
528 })
529 .min_by(|&(_, score1), &(_, score2)| score1.partial_cmp(&score2).unwrap())
530 .map_or(0, |(shift, _)| shift);
531
532 for (expected, actual) in expected.iter_mut().zip(actual.iter_mut()) {
533 let (expected_range, actual_range) = overlap_range(expected.len(), actual.len(), best_shift);
534 expected.drain(..expected_range.start.min(expected.len()));
535 actual.drain(..actual_range.start.min(actual.len()));
536 }
537
538 (expected, actual, best_shift)
539}
540
541fn summarize_channel_metrics(expected: &[Vec<f32>], actual: &[Vec<f32>]) -> f32 {
542 let channel_count = expected.len().min(actual.len());
543 if channel_count == 0 {
544 return f32::INFINITY;
545 }
546 let mut max_lufs_diff = 0.0f32;
547 for channel_index in 0..channel_count {
548 let expected_channel = &expected[channel_index];
549 let actual_channel = &actual[channel_index];
550 if expected_channel.is_empty() || actual_channel.is_empty() {
551 continue;
552 }
553 let expected_lufs = loudness_db(expected_channel);
554 let actual_lufs = loudness_db(actual_channel);
555 max_lufs_diff = max_lufs_diff.max((expected_lufs - actual_lufs).abs());
556 }
557 max_lufs_diff
558}
559
560fn render_waveforms(samples: &[Vec<f32>], stat: &AudioStat, sample_rate: u32) -> Vec<RgbaImage> {
561 samples
562 .iter()
563 .map(|channel| render_waveform(channel, stat, sample_rate))
564 .collect()
565}
566
567fn overlap_range(expected_len: usize, actual_len: usize, shift: i32) -> (Range<usize>, Range<usize>) {
568 if shift >= 0 {
569 let shift = shift as usize;
570 let len = expected_len.min(actual_len.saturating_sub(shift));
571 (0..len, shift..shift + len)
572 } else {
573 let shift = (-shift) as usize;
574 let len = actual_len.min(expected_len.saturating_sub(shift));
575 (shift..shift + len, 0..len)
576 }
577}
578
579fn overlap_slices<'a>(expected: &'a [f32], actual: &'a [f32], shift: i32) -> (&'a [f32], &'a [f32]) {
580 let (expected_range, actual_range) = overlap_range(expected.len(), actual.len(), shift);
581 (&expected[expected_range], &actual[actual_range])
582}
583
584fn normalized_correlation(expected: &[f32], actual: &[f32]) -> f32 {
585 assert_eq!(expected.len(), actual.len());
586 let mut dot = 0.0f32;
587 let mut expected_power = 0.0f32;
588 let mut actual_power = 0.0f32;
589 for (&e, &a) in expected.iter().zip(actual.iter()) {
590 dot += e * a;
591 expected_power += e * e;
592 actual_power += a * a;
593 }
594 let denom = (expected_power.sqrt() * actual_power.sqrt()).max(LOG_EPSILON);
595 dot / denom
596}
597
598fn loudness_db(samples: &[f32]) -> f32 {
599 if samples.is_empty() {
600 return -100.0;
601 }
602 let power = samples.iter().map(|sample| sample * sample).sum::<f32>() / samples.len() as f32;
603 let rms = power.sqrt();
604 20.0 * rms.max(LOG_EPSILON).log10()
605}
606
607fn render_waveform(samples: &[f32], stat: &AudioStat, sample_rate: u32) -> RgbaImage {
608 const WAVEFORM_COLOR: Rgba<u8> = Rgba([255, 255, 255, 255]);
609 let clip = (stat.signal_max * 1.2).clamp(LOG_EPSILON, 1.0);
610 let mut image = RgbaImage::from_pixel(WAVEFORM_WIDTH, WAVEFORM_HEIGHT, Rgba([255, 255, 255, 0]));
611 if stat.duration <= 0.0 || sample_rate == 0 {
612 return image;
613 }
614 let to_y = |value: f32| {
615 let normalized = (value + clip) / (2.0 * clip);
616 ((normalized * WAVEFORM_HEIGHT as f32).round() as u32).min(WAVEFORM_HEIGHT - 1)
617 };
618 let duration = stat.duration;
619 for x in 0..WAVEFORM_WIDTH {
620 let start_time = x as f32 * duration / WAVEFORM_WIDTH as f32;
621 let end_time = (x + 1) as f32 * duration / WAVEFORM_WIDTH as f32;
622 let start = (start_time * sample_rate as f32).floor() as usize;
623 let end = (end_time * sample_rate as f32).ceil() as usize;
624 let start = start.min(samples.len());
625 let end = end.min(samples.len());
626 if end <= start {
627 continue;
628 }
629 let (min, max) = samples[start..end]
630 .iter()
631 .fold((1.0f32, -1.0f32), |(min, max), &s| (min.min(s), max.max(s)));
632 let y_min = to_y(min);
633 let y_max = to_y(max);
634 for y in y_min..=y_max {
635 image.put_pixel(x, WAVEFORM_HEIGHT - y - 1, WAVEFORM_COLOR);
636 }
637 }
638 image
639}
640
641fn render_spectrograms(
642 spectrograms: &[Vec<[f32; SPECTROGRAM_DATA_HEIGHT]>],
643 stat: &AudioStat,
644 sample_rate: u32,
645) -> Vec<RgbaImage> {
646 spectrograms
647 .iter()
648 .map(|channel| render_spectrogram(channel, stat, sample_rate))
649 .collect()
650}
651
652fn spectrogram_log_bin_range(y: u32) -> Range<usize> {
653 static RANGES: LazyLock<[Range<usize>; SPECTROGRAM_HEIGHT as usize]> = LazyLock::new(|| {
654 const B: f64 = 20.0;
655 const A: f64 = SPECTROGRAM_DATA_HEIGHT as f64 / (B - 1.0);
656 let mut ranges = [const { 0usize..0 }; SPECTROGRAM_HEIGHT as usize];
657 let mut wrote = 0;
658 for y in 0..SPECTROGRAM_DATA_HEIGHT {
659 let p1 = f64::log(1.0 / A * y as f64 + 1.0, B);
660 let p2 = f64::log(1.0 / A * (y + 1) as f64 + 1.0, B);
661 let range = p1 * SPECTROGRAM_HEIGHT as f64..p2 * SPECTROGRAM_HEIGHT as f64;
662 if range.end - range.start < 1.0 {
663 break;
664 }
665 ranges[range.start.round() as usize..range.end.round() as usize].fill(y..y + 1);
666 wrote = y + 1;
667 }
668 for (y, slot) in ranges
669 .iter_mut()
670 .enumerate()
671 .take(SPECTROGRAM_HEIGHT as usize)
672 .skip(wrote)
673 {
674 let range = (A * (f64::powf(B, y as f64 / SPECTROGRAM_HEIGHT as f64) - 1.0)).round() as usize
675 ..(A * (f64::powf(B, (y + 1) as f64 / SPECTROGRAM_HEIGHT as f64) - 1.0)).round() as usize;
676 *slot = range;
677 }
678 ranges
679 });
680 RANGES[y as usize].clone()
681}
682
683struct SpectrogramAnalyzer {
684 fft: Arc<dyn Fft<f32>>,
685 window: Box<[f32]>,
686}
687
688impl Default for SpectrogramAnalyzer {
689 fn default() -> Self {
690 Self::new()
691 }
692}
693
694impl SpectrogramAnalyzer {
695 fn new() -> SpectrogramAnalyzer {
696 let fft = FftPlanner::<f32>::new().plan_fft_forward(FFT_WINDOW_SIZE);
697 let window = (0..FFT_WINDOW_SIZE)
698 .map(|i| (PI * i as f32 / (FFT_WINDOW_SIZE - 1) as f32).sin())
699 .collect();
700 SpectrogramAnalyzer { fft, window }
701 }
702
703 fn decode_audio(&self, mime: &Mime, content: &[u8]) -> Result<AudioDecoded, AudioDecodeError> {
704 let mut hint = Hint::new();
705 if let Some(extension) = audio_extension(mime) {
706 hint.with_extension(extension);
707 }
708
709 let owned = content.to_vec();
710 let mss = MediaSourceStream::new(Box::new(Cursor::new(owned)), Default::default());
711 let mut format =
712 symphonia::default::get_probe().probe(&hint, mss, FormatOptions::default(), MetadataOptions::default())?;
713 let track = format
714 .default_track(TrackType::Audio)
715 .ok_or(AudioDecodeError::NoDefaultTrack)?;
716 let track_id = track.id;
717 let codec_params = track
718 .codec_params
719 .clone()
720 .ok_or(AudioDecodeError::MissingAudioCodecParameters)?;
721 let audio_codec_params = codec_params
722 .audio()
723 .ok_or(AudioDecodeError::MissingAudioCodecParameters)?;
724 let mut decoder =
725 symphonia::default::get_codecs().make_audio_decoder(audio_codec_params, &AudioDecoderOptions::default())?;
726
727 let mut samples = Vec::<Vec<f32>>::new();
728 let mut signal_spec = match (audio_codec_params.sample_rate, audio_codec_params.channels.clone()) {
729 (Some(rate), Some(channels)) => Some(AudioSpec::new(rate, channels)),
730 _ => None,
731 };
732 loop {
733 let packet = match format.next_packet() {
734 Ok(Some(packet)) => packet,
735 Ok(None) => break,
736 Err(SymphoniaError::ResetRequired) => {
737 decoder.reset();
738 continue;
739 }
740 Err(err) => return Err(err.into()),
741 };
742 if packet.track_id != track_id {
743 continue;
744 }
745 let decoded = decoder.decode(&packet)?;
746 if signal_spec.is_none() {
747 signal_spec = Some(decoded.spec().clone());
748 }
749 let mut packet_samples = Vec::<Vec<f32>>::new();
750 decoded.copy_to_vecs_planar(&mut packet_samples);
751 samples.resize_with(packet_samples.len(), Vec::new);
752 for (plane, samples) in packet_samples.into_iter().zip(samples.iter_mut()) {
753 samples.extend(plane);
754 }
755 }
756
757 let Some(signal_spec) = signal_spec else {
758 return Err(AudioDecodeError::MissingSampleRate);
759 };
760
761 let max_len = samples.iter().map(|channel| channel.len()).max().unwrap_or(0);
762 let sample_rate = signal_spec.rate();
763 if sample_rate == 0 {
764 return Err(AudioDecodeError::MissingSampleRate);
765 }
766 let duration_seconds = max_len as f32 / sample_rate as f32;
767
768 let spectrograms = samples.iter().map(|sample| self.compute(sample)).collect::<Vec<_>>();
769
770 Ok(AudioDecoded {
771 sample_rate,
772 channels: signal_spec.channels().count() as u16,
773 duration_seconds,
774 samples,
775 spectrograms,
776 })
777 }
778
779 fn compute(&self, samples: &[f32]) -> Vec<[f32; SPECTROGRAM_DATA_HEIGHT]> {
780 let mut buffer =
781 Box::<[Complex<f32>; FFT_WINDOW_SIZE]>::try_from(vec![Complex::zero(); FFT_WINDOW_SIZE]).unwrap();
782 let mut scratch = vec![Complex::zero(); self.fft.get_inplace_scratch_len()];
783 let mut result = Vec::with_capacity(samples.len() / (FFT_WINDOW_SIZE / 2));
784 for i in 0.. {
785 let Some(samples) = samples.get(i * (FFT_WINDOW_SIZE / 2)..) else {
786 break;
787 };
788 buffer
789 .iter_mut()
790 .zip(
791 samples
792 .iter()
793 .copied()
794 .chain(iter::repeat(0.0))
795 .zip(self.window.iter().copied()),
796 )
797 .for_each(|(slot, (s, w))| *slot = Complex::from(s * w));
798 self.fft.process_with_scratch(&mut *buffer, &mut scratch);
799 result.push([0.0; SPECTROGRAM_DATA_HEIGHT]);
800 result
801 .last_mut()
802 .unwrap()
803 .iter_mut()
804 .zip(buffer.iter().copied())
805 .for_each(|(slot, b)| *slot = b.norm_sqr().max(1e-32).log10());
806 }
807 result
808 }
809}
810
811fn render_spectrogram(spectrogram: &[[f32; SPECTROGRAM_DATA_HEIGHT]], stat: &AudioStat, sample_rate: u32) -> RgbaImage {
812 let mut image = RgbaImage::from_pixel(SPECTROGRAM_WIDTH, SPECTROGRAM_HEIGHT, Rgba([255, 255, 255, 0]));
813 if spectrogram.is_empty() || stat.duration <= 0.0 || sample_rate == 0 {
814 return image;
815 }
816
817 let value_range = (stat.spectrogram_max - stat.spectrogram_min).max(LOG_EPSILON);
818 let map_value = |v: f32| (v - stat.spectrogram_min) / value_range;
819 assert!(SPECTROGRAM_HEIGHT <= SPECTROGRAM_DATA_HEIGHT as u32);
820 let duration = stat.duration;
821 let hop_samples = (FFT_WINDOW_SIZE / 2) as f32;
822 let frame_duration = hop_samples / sample_rate as f32;
823 if frame_duration <= 0.0 {
824 return image;
825 }
826
827 for x in 0..SPECTROGRAM_WIDTH {
828 let start_time = x as f32 * duration / SPECTROGRAM_WIDTH as f32;
829 let end_time = (x + 1) as f32 * duration / SPECTROGRAM_WIDTH as f32;
830 let start = (start_time / frame_duration).floor() as usize;
831 let end = (end_time / frame_duration).ceil() as usize;
832 let start = start.min(spectrogram.len());
833 let end = end.min(spectrogram.len());
834 if end <= start {
835 continue;
836 }
837
838 for y in 0..SPECTROGRAM_HEIGHT {
839 let y_range = spectrogram_log_bin_range(y);
840 let sum = spectrogram[start..end]
841 .iter()
842 .flat_map(|spec| spec[y_range.clone()].iter().copied())
843 .sum::<f32>();
844 let value = sum / ((end - start) * y_range.len()) as f32;
845 let intensity = map_value(value);
846 image.put_pixel(
847 x,
848 SPECTROGRAM_HEIGHT - y - 1,
849 Rgba([255, 255, 255, (intensity * 255.0) as u8]),
850 );
851 }
852 }
853 image
854}