use alloc::vec::Vec;
use libm::log10f;
use crate::dsp::peaks::{Peak, PeakPicker, PeakPickerConfig};
use crate::dsp::stft::{ShortTimeFFT, StftConfig};
use crate::dsp::windows::WindowKind;
use crate::{
AfpError, AudioBuffer, Fingerprinter, Result, SampleRate, StreamingFingerprinter, TimestampMs,
};
#[repr(C)]
#[derive(Copy, Clone, Debug, PartialEq, Eq, Hash, bytemuck::Pod, bytemuck::Zeroable)]
pub struct WangHash {
pub hash: u32,
pub t_anchor: u32,
}
#[derive(Clone, Debug)]
pub struct WangFingerprint {
pub hashes: Vec<WangHash>,
pub frames_per_sec: f32,
}
#[derive(Clone, Debug)]
pub struct WangConfig {
pub fan_out: u16,
pub target_zone_t: u16,
pub target_zone_f: u16,
pub peaks_per_sec: u16,
pub min_anchor_mag_db: f32,
}
impl Default for WangConfig {
fn default() -> Self {
Self {
fan_out: 10,
target_zone_t: 63,
target_zone_f: 64,
peaks_per_sec: 30,
min_anchor_mag_db: -50.0,
}
}
}
const WANG_N_FFT: usize = 1024;
const WANG_HOP: usize = 128;
const WANG_SR: u32 = 8_000;
const WANG_FRAMES_PER_SEC: f32 = WANG_SR as f32 / WANG_HOP as f32;
const WANG_FREQ_BUCKETS: u32 = 512;
const WANG_PEAK_NEIGHBOURHOOD: usize = 15;
const WANG_LOG_FLOOR: f32 = 1e-6;
pub struct Wang {
cfg: WangConfig,
stft: ShortTimeFFT,
}
impl Default for Wang {
fn default() -> Self {
Self::new(WangConfig::default())
}
}
impl Wang {
#[must_use]
pub fn new(cfg: WangConfig) -> Self {
let stft = ShortTimeFFT::new(StftConfig {
n_fft: WANG_N_FFT,
hop: WANG_HOP,
window: WindowKind::Hann,
center: false,
});
Self { cfg, stft }
}
}
impl Fingerprinter for Wang {
type Output = WangFingerprint;
type Config = WangConfig;
fn name(&self) -> &'static str {
"wang-v1"
}
fn config(&self) -> &Self::Config {
&self.cfg
}
fn required_sample_rate(&self) -> u32 {
WANG_SR
}
fn min_samples(&self) -> usize {
WANG_SR as usize * 2
}
fn extract(&mut self, audio: AudioBuffer<'_>) -> Result<Self::Output> {
if audio.rate.hz() != WANG_SR {
return Err(AfpError::UnsupportedSampleRate(audio.rate.hz()));
}
if audio.samples.len() < self.min_samples() {
return Err(AfpError::AudioTooShort {
needed: self.min_samples(),
got: audio.samples.len(),
});
}
let spec = self.stft.magnitude(audio.samples);
let n_frames = spec.len();
if n_frames == 0 {
return Ok(WangFingerprint {
hashes: Vec::new(),
frames_per_sec: WANG_FRAMES_PER_SEC,
});
}
let n_bins = self.stft.n_bins();
let mut log_spec = Vec::with_capacity(n_frames * n_bins);
for frame in &spec {
for &m in frame {
log_spec.push(20.0 * log10f(m.max(WANG_LOG_FLOOR)));
}
}
let picker = PeakPicker::new(PeakPickerConfig {
neighborhood_t: WANG_PEAK_NEIGHBOURHOOD,
neighborhood_f: WANG_PEAK_NEIGHBOURHOOD,
min_magnitude: self.cfg.min_anchor_mag_db,
target_per_sec: self.cfg.peaks_per_sec as usize,
});
let peaks = picker.pick(&log_spec, n_frames, n_bins, WANG_FRAMES_PER_SEC);
let mut hashes = build_hashes(&peaks, &self.cfg);
hashes.sort_unstable_by_key(|h| (h.t_anchor, h.hash));
Ok(WangFingerprint {
hashes,
frames_per_sec: WANG_FRAMES_PER_SEC,
})
}
}
fn build_hashes(peaks: &[Peak], cfg: &WangConfig) -> Vec<WangHash> {
let mut hashes = Vec::with_capacity(peaks.len() * cfg.fan_out as usize);
let target_zone_t = cfg.target_zone_t as i32;
let target_zone_f = cfg.target_zone_f as i32;
let fan_out = cfg.fan_out as usize;
let mut targets: Vec<&Peak> = Vec::with_capacity(64);
for (i, anchor) in peaks.iter().enumerate() {
targets.clear();
for target in &peaks[i + 1..] {
let dt = target.t_frame as i32 - anchor.t_frame as i32;
if dt < 1 {
continue;
}
if dt > target_zone_t {
break;
}
let df = target.f_bin as i32 - anchor.f_bin as i32;
if df.abs() > target_zone_f {
continue;
}
targets.push(target);
}
targets.sort_unstable_by(|a, b| {
b.mag
.partial_cmp(&a.mag)
.unwrap_or(core::cmp::Ordering::Equal)
.then_with(|| (a.t_frame, a.f_bin).cmp(&(b.t_frame, b.f_bin)))
});
targets.truncate(fan_out);
for target in &targets {
let f_a_q = quantise_freq(anchor.f_bin);
let f_b_q = quantise_freq(target.f_bin);
let dt = ((target.t_frame - anchor.t_frame) & 0x3FFF).max(1);
let hash = ((f_a_q & 0x1FF) << 23) | ((f_b_q & 0x1FF) << 14) | (dt & 0x3FFF);
hashes.push(WangHash {
hash,
t_anchor: anchor.t_frame,
});
}
}
hashes
}
#[inline]
fn quantise_freq(bin: u16) -> u32 {
(bin as u32 * WANG_FREQ_BUCKETS) / 513
}
pub struct StreamingWang {
cfg: WangConfig,
accumulated: Vec<f32>,
next_anchor_frame: u32,
}
impl Default for StreamingWang {
fn default() -> Self {
Self::new(WangConfig::default())
}
}
impl StreamingWang {
#[must_use]
pub fn new(cfg: WangConfig) -> Self {
Self {
cfg,
accumulated: Vec::new(),
next_anchor_frame: 0,
}
}
#[must_use]
pub fn config(&self) -> &WangConfig {
&self.cfg
}
fn frames_buffered(&self) -> u32 {
if self.accumulated.len() < WANG_N_FFT {
0
} else {
((self.accumulated.len() - WANG_N_FFT) / WANG_HOP + 1) as u32
}
}
fn lookahead_frames(&self) -> u32 {
self.cfg.target_zone_t as u32
+ WANG_PEAK_NEIGHBOURHOOD as u32
+ WANG_FRAMES_PER_SEC.ceil() as u32
}
fn drain_up_to(&mut self, cutoff: u32) -> Vec<(TimestampMs, WangHash)> {
if cutoff <= self.next_anchor_frame {
return Vec::new();
}
let mut wang = Wang::new(self.cfg.clone());
let audio = AudioBuffer {
samples: &self.accumulated,
rate: SampleRate::HZ_8000,
};
let result = match wang.extract(audio) {
Ok(r) => r,
Err(_) => return Vec::new(),
};
let mut emitted = Vec::with_capacity(result.hashes.len());
for h in result.hashes {
if h.t_anchor >= self.next_anchor_frame && h.t_anchor < cutoff {
let t_ms = (h.t_anchor as u64 * WANG_HOP as u64 * 1000) / WANG_SR as u64;
emitted.push((TimestampMs(t_ms), h));
}
}
self.next_anchor_frame = cutoff;
emitted
}
}
impl StreamingFingerprinter for StreamingWang {
type Frame = WangHash;
fn push(&mut self, samples: &[f32]) -> Vec<(TimestampMs, Self::Frame)> {
self.accumulated.extend_from_slice(samples);
let frames = self.frames_buffered();
let cutoff = frames.saturating_sub(self.lookahead_frames());
self.drain_up_to(cutoff)
}
fn flush(&mut self) -> Vec<(TimestampMs, Self::Frame)> {
self.drain_up_to(u32::MAX)
}
fn latency_ms(&self) -> u32 {
(self.lookahead_frames() * WANG_HOP as u32 * 1000) / WANG_SR
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::SampleRate;
use alloc::vec;
use core::f32::consts::PI;
fn synthetic_audio(seed: u32, len: usize) -> Vec<f32> {
let mut out = Vec::with_capacity(len);
let mut x: u32 = seed.max(1);
for n in 0..len {
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
let noise = ((x as i32 as f32) / (i32::MAX as f32)) * 0.05;
let t = n as f32 / 8_000.0;
let s = 0.5 * libm::sinf(2.0 * PI * 880.0 * t)
+ 0.3 * libm::sinf(2.0 * PI * 1320.0 * t)
+ noise;
out.push(s);
}
out
}
#[test]
fn rejects_wrong_sample_rate() {
let mut fp = Wang::default();
let samples = vec![0.0_f32; 16_000];
let buf = AudioBuffer {
samples: &samples,
rate: SampleRate::HZ_16000,
};
match fp.extract(buf) {
Err(AfpError::UnsupportedSampleRate(16_000)) => {}
other => panic!("expected UnsupportedSampleRate(16000), got {other:?}"),
}
}
#[test]
fn rejects_short_audio() {
let mut fp = Wang::default();
let samples = vec![0.0_f32; 8_000]; let buf = AudioBuffer {
samples: &samples,
rate: SampleRate::HZ_8000,
};
match fp.extract(buf) {
Err(AfpError::AudioTooShort {
needed: 16_000,
got: 8_000,
}) => {}
other => panic!("expected AudioTooShort, got {other:?}"),
}
}
#[test]
fn silence_gives_empty_fingerprint() {
let mut fp = Wang::default();
let samples = vec![0.0_f32; 8_000 * 3];
let buf = AudioBuffer {
samples: &samples,
rate: SampleRate::HZ_8000,
};
let fpr = fp.extract(buf).unwrap();
assert_eq!(fpr.frames_per_sec, 62.5);
assert!(fpr.hashes.is_empty());
}
#[test]
fn synthetic_signal_produces_hashes() {
let mut fp = Wang::default();
let samples = synthetic_audio(0xC0FFEE, 8_000 * 5);
let buf = AudioBuffer {
samples: &samples,
rate: SampleRate::HZ_8000,
};
let fpr = fp.extract(buf).unwrap();
assert!(!fpr.hashes.is_empty(), "expected hashes from a 5s tone");
for w in fpr.hashes.windows(2) {
assert!((w[0].t_anchor, w[0].hash) <= (w[1].t_anchor, w[1].hash));
}
}
#[test]
fn extraction_is_deterministic() {
let samples = synthetic_audio(0xDEAD, 8_000 * 4);
let mut fp1 = Wang::default();
let buf1 = AudioBuffer {
samples: &samples,
rate: SampleRate::HZ_8000,
};
let f1 = fp1.extract(buf1).unwrap();
let mut fp2 = Wang::default();
let buf2 = AudioBuffer {
samples: &samples,
rate: SampleRate::HZ_8000,
};
let f2 = fp2.extract(buf2).unwrap();
assert_eq!(f1.hashes.len(), f2.hashes.len());
for (a, b) in f1.hashes.iter().zip(f2.hashes.iter()) {
assert_eq!(a, b);
}
}
#[test]
fn different_signals_diverge() {
let samples_a = synthetic_audio(0x1111, 8_000 * 3);
let samples_b = synthetic_audio(0x2222, 8_000 * 3);
let mut fp = Wang::default();
let fa = fp
.extract(AudioBuffer {
samples: &samples_a,
rate: SampleRate::HZ_8000,
})
.unwrap();
let fb = fp
.extract(AudioBuffer {
samples: &samples_b,
rate: SampleRate::HZ_8000,
})
.unwrap();
assert_ne!(fa.hashes, fb.hashes);
}
#[test]
fn hash_packing_round_trips() {
let peaks = alloc::vec![
Peak {
t_frame: 100,
f_bin: 50,
_pad: 0,
mag: -10.0
},
Peak {
t_frame: 110,
f_bin: 70,
_pad: 0,
mag: -12.0
},
];
let cfg = WangConfig::default();
let hashes = build_hashes(&peaks, &cfg);
assert_eq!(hashes.len(), 1);
let h = hashes[0].hash;
let f_a_q = (h >> 23) & 0x1FF;
let f_b_q = (h >> 14) & 0x1FF;
let dt = h & 0x3FFF;
assert_eq!(f_a_q, quantise_freq(50));
assert_eq!(f_b_q, quantise_freq(70));
assert_eq!(dt, 10);
assert_eq!(hashes[0].t_anchor, 100);
}
#[test]
fn streaming_latency_matches_lookahead() {
let s = StreamingWang::default();
assert_eq!(s.latency_ms(), 2_256);
}
#[test]
fn streaming_empty_push_is_empty() {
let mut s = StreamingWang::default();
assert!(s.push(&[]).is_empty());
assert!(s.flush().is_empty());
}
#[test]
fn streaming_silence_emits_nothing() {
let mut s = StreamingWang::default();
let zeros = vec![0.0_f32; 8_000 * 4];
assert!(s.push(&zeros).is_empty());
assert!(s.flush().is_empty());
}
fn chunk_sizes(seed: u32, total: usize, max_chunk: usize) -> Vec<usize> {
let mut x = seed.max(1);
let mut out = Vec::new();
let mut remaining = total;
while remaining > 0 {
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
let n = ((x as usize) % max_chunk).max(1).min(remaining);
out.push(n);
remaining -= n;
}
out
}
#[test]
fn streaming_offline_equivalence() {
let samples = synthetic_audio(0xBEEF, 8_000 * 6);
let mut offline = Wang::default();
let off = offline
.extract(AudioBuffer {
samples: &samples,
rate: SampleRate::HZ_8000,
})
.unwrap();
let mut streaming = StreamingWang::default();
let mut online = Vec::new();
let mut cursor = 0;
for n in chunk_sizes(0xCAFE, samples.len(), 4_000) {
let end = cursor + n;
online.extend(
streaming
.push(&samples[cursor..end])
.into_iter()
.map(|(_, h)| h),
);
cursor = end;
}
online.extend(streaming.flush().into_iter().map(|(_, h)| h));
let mut a: Vec<WangHash> = off.hashes;
let mut b: Vec<WangHash> = online;
a.sort_unstable_by_key(|h| (h.t_anchor, h.hash));
b.sort_unstable_by_key(|h| (h.t_anchor, h.hash));
assert_eq!(a.len(), b.len(), "hash count mismatch");
assert_eq!(a, b, "hash sequences differ");
}
#[test]
fn smaller_fan_out_yields_fewer_hashes() {
let samples = synthetic_audio(0xFEED, 8_000 * 4);
let buf_a = AudioBuffer {
samples: &samples,
rate: SampleRate::HZ_8000,
};
let buf_b = AudioBuffer {
samples: &samples,
rate: SampleRate::HZ_8000,
};
let mut wide = Wang::new(WangConfig {
fan_out: 10,
..WangConfig::default()
});
let mut narrow = Wang::new(WangConfig {
fan_out: 3,
..WangConfig::default()
});
let f_wide = wide.extract(buf_a).unwrap();
let f_narrow = narrow.extract(buf_b).unwrap();
assert!(
f_narrow.hashes.len() < f_wide.hashes.len(),
"narrow={} wide={}",
f_narrow.hashes.len(),
f_wide.hashes.len(),
);
}
#[test]
fn quantise_freq_covers_full_range() {
assert_eq!(quantise_freq(0), 0);
assert!(quantise_freq(512) < WANG_FREQ_BUCKETS);
let mut prev = 0;
for b in 0..513_u16 {
let q = quantise_freq(b);
assert!(q >= prev);
assert!(q < WANG_FREQ_BUCKETS);
prev = q;
}
}
#[test]
fn streaming_with_one_sample_chunks_still_matches_offline() {
let samples = synthetic_audio(0xABCD, 8_000 * 3);
let mut offline = Wang::default();
let off = offline
.extract(AudioBuffer {
samples: &samples,
rate: SampleRate::HZ_8000,
})
.unwrap();
let mut s = StreamingWang::default();
let mut online = Vec::new();
for &sample in &samples {
online.extend(s.push(&[sample]).into_iter().map(|(_, h)| h));
}
online.extend(s.flush().into_iter().map(|(_, h)| h));
let mut a = off.hashes;
let mut b = online;
a.sort_unstable_by_key(|h| (h.t_anchor, h.hash));
b.sort_unstable_by_key(|h| (h.t_anchor, h.hash));
assert_eq!(a, b);
}
#[test]
fn target_zone_filters_far_peaks() {
let peaks = alloc::vec![
Peak {
t_frame: 0,
f_bin: 100,
_pad: 0,
mag: 0.0
},
Peak {
t_frame: 0,
f_bin: 200,
_pad: 0,
mag: 0.0
},
Peak {
t_frame: 70,
f_bin: 100,
_pad: 0,
mag: 0.0
},
Peak {
t_frame: 5,
f_bin: 110,
_pad: 0,
mag: 0.0
},
Peak {
t_frame: 5,
f_bin: 300,
_pad: 0,
mag: 0.0
},
];
let mut sorted = peaks;
sorted.sort_unstable_by_key(|p| (p.t_frame, p.f_bin));
let cfg = WangConfig::default();
let hashes = build_hashes(&sorted, &cfg);
assert_eq!(hashes.len(), 1);
assert_eq!(hashes[0].t_anchor, 0);
}
}