use alloc::vec::Vec;
use libm::powf;
use crate::dsp::stft::{ShortTimeFFT, StftConfig};
use crate::dsp::windows::WindowKind;
use crate::{AfpError, AudioBuffer, Fingerprinter, Result, StreamingFingerprinter, TimestampMs};
#[derive(Clone, Debug)]
pub struct HaitsmaFingerprint {
pub frames: Vec<u32>,
pub frames_per_sec: f32,
}
#[derive(Clone, Debug)]
pub struct HaitsmaConfig {
pub fmin: f32,
pub fmax: f32,
}
impl Default for HaitsmaConfig {
fn default() -> Self {
Self {
fmin: 300.0,
fmax: 2_000.0,
}
}
}
const HAITSMA_N_FFT: usize = 2048;
const HAITSMA_HOP: usize = 64;
const HAITSMA_SR: u32 = 5_000;
const HAITSMA_FRAMES_PER_SEC: f32 = HAITSMA_SR as f32 / HAITSMA_HOP as f32;
const HAITSMA_N_BANDS: usize = 33;
pub struct Haitsma {
cfg: HaitsmaConfig,
stft: ShortTimeFFT,
bin_to_band: Vec<Option<u8>>,
}
impl Default for Haitsma {
fn default() -> Self {
Self::new(HaitsmaConfig::default())
}
}
impl Haitsma {
#[must_use]
pub fn new(cfg: HaitsmaConfig) -> Self {
assert!(cfg.fmin > 0.0, "fmin must be positive");
assert!(cfg.fmax > cfg.fmin, "fmax must exceed fmin");
assert!(
cfg.fmax < HAITSMA_SR as f32 / 2.0,
"fmax must be below Nyquist ({} Hz)",
HAITSMA_SR / 2
);
let stft = ShortTimeFFT::new(StftConfig {
n_fft: HAITSMA_N_FFT,
hop: HAITSMA_HOP,
window: WindowKind::Hann,
center: false,
});
let bin_to_band = build_bin_to_band(&cfg, stft.n_bins());
Self {
cfg,
stft,
bin_to_band,
}
}
}
impl Fingerprinter for Haitsma {
type Output = HaitsmaFingerprint;
type Config = HaitsmaConfig;
fn name(&self) -> &'static str {
"haitsma-v1"
}
fn config(&self) -> &Self::Config {
&self.cfg
}
fn required_sample_rate(&self) -> u32 {
HAITSMA_SR
}
fn min_samples(&self) -> usize {
HAITSMA_SR as usize * 2
}
fn extract(&mut self, audio: AudioBuffer<'_>) -> Result<Self::Output> {
if audio.rate.hz() != HAITSMA_SR {
return Err(AfpError::UnsupportedSampleRate(audio.rate.hz()));
}
if audio.samples.len() < self.min_samples() {
return Err(AfpError::AudioTooShort {
needed: self.min_samples(),
got: audio.samples.len(),
});
}
let (power_flat, n_frames, n_bins) = self.stft.power_flat(audio.samples);
if n_frames < 2 {
return Ok(HaitsmaFingerprint {
frames: Vec::new(),
frames_per_sec: HAITSMA_FRAMES_PER_SEC,
});
}
let mut energies: Vec<[f32; HAITSMA_N_BANDS]> = Vec::with_capacity(n_frames);
for f in 0..n_frames {
let row = &power_flat[f * n_bins..(f + 1) * n_bins];
let mut e = [0.0_f32; HAITSMA_N_BANDS];
for (bin, &p) in row.iter().enumerate() {
if let Some(b) = self.bin_to_band[bin] {
e[b as usize] += p;
}
}
energies.push(e);
}
let mut frames = Vec::with_capacity(energies.len() - 1);
for n in 1..energies.len() {
frames.push(pack_frame_bits(&energies[n], &energies[n - 1]));
}
Ok(HaitsmaFingerprint {
frames,
frames_per_sec: HAITSMA_FRAMES_PER_SEC,
})
}
}
fn pack_frame_bits(curr: &[f32; HAITSMA_N_BANDS], prev: &[f32; HAITSMA_N_BANDS]) -> u32 {
let mut hash = 0_u32;
for b in 0..32 {
let lhs = curr[b] - curr[b + 1];
let rhs = prev[b] - prev[b + 1];
if lhs - rhs > 0.0 {
hash |= 1_u32 << (31 - b);
}
}
hash
}
fn build_bin_to_band(cfg: &HaitsmaConfig, n_bins: usize) -> Vec<Option<u8>> {
let n_edges = HAITSMA_N_BANDS + 1;
let mut edges = [0.0_f32; HAITSMA_N_BANDS + 1];
let ratio = cfg.fmax / cfg.fmin;
for (k, e) in edges.iter_mut().enumerate() {
let frac = k as f32 / HAITSMA_N_BANDS as f32;
*e = cfg.fmin * powf(ratio, frac);
}
let bin_hz = HAITSMA_SR as f32 / HAITSMA_N_FFT as f32;
let mut out = Vec::with_capacity(n_bins);
for i in 0..n_bins {
let f = i as f32 * bin_hz;
if f < edges[0] || f >= edges[n_edges - 1] {
out.push(None);
continue;
}
let mut found = None;
for b in 0..HAITSMA_N_BANDS {
if f >= edges[b] && f < edges[b + 1] {
found = Some(b as u8);
break;
}
}
out.push(found);
}
out
}
pub struct StreamingHaitsma {
cfg: HaitsmaConfig,
stft: ShortTimeFFT,
sample_carry: Vec<f32>,
bin_to_band: Vec<Option<u8>>,
frame_power: Vec<f32>,
has_prev: bool,
prev_energy: [f32; HAITSMA_N_BANDS],
next_frame_idx: u32,
pending: Vec<(TimestampMs, u32)>,
}
impl Default for StreamingHaitsma {
fn default() -> Self {
Self::new(HaitsmaConfig::default())
}
}
impl StreamingHaitsma {
#[must_use]
pub fn new(cfg: HaitsmaConfig) -> Self {
let stft = ShortTimeFFT::new(StftConfig {
n_fft: HAITSMA_N_FFT,
hop: HAITSMA_HOP,
window: WindowKind::Hann,
center: false,
});
let bin_to_band = build_bin_to_band(&cfg, stft.n_bins());
let n_bins = stft.n_bins();
Self {
cfg,
stft,
sample_carry: Vec::new(),
bin_to_band,
frame_power: alloc::vec![0.0_f32; n_bins],
has_prev: false,
prev_energy: [0.0_f32; HAITSMA_N_BANDS],
next_frame_idx: 1,
pending: Vec::new(),
}
}
#[must_use]
pub fn config(&self) -> &HaitsmaConfig {
&self.cfg
}
}
impl StreamingFingerprinter for StreamingHaitsma {
type Frame = u32;
fn push(&mut self, samples: &[f32]) -> Vec<(TimestampMs, Self::Frame)> {
self.sample_carry.extend_from_slice(samples);
let mut off = 0usize;
while self.sample_carry.len() - off >= HAITSMA_N_FFT {
self.stft.process_frame_power(
&self.sample_carry[off..off + HAITSMA_N_FFT],
&mut self.frame_power,
);
let mut e = [0.0_f32; HAITSMA_N_BANDS];
for (bin, &p) in self.frame_power.iter().enumerate() {
if let Some(b) = self.bin_to_band[bin] {
e[b as usize] += p;
}
}
if self.has_prev {
let hash = pack_frame_bits(&e, &self.prev_energy);
let abs_frame = self.next_frame_idx;
let t_ms = (abs_frame as u64 * HAITSMA_HOP as u64 * 1000) / HAITSMA_SR as u64;
self.pending.push((TimestampMs(t_ms), hash));
self.next_frame_idx += 1;
} else {
self.has_prev = true;
}
self.prev_energy = e;
off += HAITSMA_HOP;
}
if off > 0 {
self.sample_carry.drain(0..off);
}
core::mem::take(&mut self.pending)
}
fn flush(&mut self) -> Vec<(TimestampMs, Self::Frame)> {
core::mem::take(&mut self.pending)
}
fn latency_ms(&self) -> u32 {
(HAITSMA_N_FFT as u32 * 1000) / HAITSMA_SR
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::SampleRate;
use alloc::vec;
use core::f32::consts::PI;
fn synthetic_audio(seed: u32, len: usize) -> Vec<f32> {
let mut out = Vec::with_capacity(len);
let mut x: u32 = seed.max(1);
for n in 0..len {
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
let noise = ((x as i32 as f32) / (i32::MAX as f32)) * 0.05;
let t = n as f32 / 5_000.0;
let s = 0.5 * libm::sinf(2.0 * PI * 600.0 * t)
+ 0.3 * libm::sinf(2.0 * PI * 1200.0 * t)
+ noise;
out.push(s);
}
out
}
fn chunk_sizes(seed: u32, total: usize, max_chunk: usize) -> Vec<usize> {
let mut x = seed.max(1);
let mut out = Vec::new();
let mut remaining = total;
while remaining > 0 {
x ^= x << 13;
x ^= x >> 17;
x ^= x << 5;
let n = ((x as usize) % max_chunk).max(1).min(remaining);
out.push(n);
remaining -= n;
}
out
}
fn sr_5khz() -> SampleRate {
SampleRate::new(5_000).unwrap()
}
#[test]
fn rejects_wrong_sample_rate() {
let mut fp = Haitsma::default();
let samples = vec![0.0_f32; 10_000];
let buf = AudioBuffer {
samples: &samples,
rate: SampleRate::HZ_16000,
};
match fp.extract(buf) {
Err(AfpError::UnsupportedSampleRate(16_000)) => {}
other => panic!("expected UnsupportedSampleRate(16000), got {other:?}"),
}
}
#[test]
fn rejects_short_audio() {
let mut fp = Haitsma::default();
let samples = vec![0.0_f32; 5_000];
let buf = AudioBuffer {
samples: &samples,
rate: sr_5khz(),
};
match fp.extract(buf) {
Err(AfpError::AudioTooShort {
needed: 10_000,
got: 5_000,
}) => {}
other => panic!("expected AudioTooShort, got {other:?}"),
}
}
#[test]
fn silence_gives_all_zero_frames() {
let mut fp = Haitsma::default();
let samples = vec![0.0_f32; 5_000 * 3];
let buf = AudioBuffer {
samples: &samples,
rate: sr_5khz(),
};
let fpr = fp.extract(buf).unwrap();
assert_eq!(fpr.frames_per_sec, 78.125);
assert!(!fpr.frames.is_empty());
for &h in &fpr.frames {
assert_eq!(h, 0, "silence should produce zero hash");
}
}
#[test]
fn synthetic_signal_produces_nonzero_hashes() {
let mut fp = Haitsma::default();
let samples = synthetic_audio(0xC0FFEE, 5_000 * 4);
let buf = AudioBuffer {
samples: &samples,
rate: sr_5khz(),
};
let fpr = fp.extract(buf).unwrap();
assert!(!fpr.frames.is_empty());
let nonzero = fpr.frames.iter().filter(|&&h| h != 0).count();
assert!(
nonzero > fpr.frames.len() / 4,
"expected most frames to have at least one bit set, got {nonzero}/{}",
fpr.frames.len()
);
}
#[test]
fn extraction_is_deterministic() {
let samples = synthetic_audio(0xDEAD, 5_000 * 3);
let mut fp1 = Haitsma::default();
let f1 = fp1
.extract(AudioBuffer {
samples: &samples,
rate: sr_5khz(),
})
.unwrap();
let mut fp2 = Haitsma::default();
let f2 = fp2
.extract(AudioBuffer {
samples: &samples,
rate: sr_5khz(),
})
.unwrap();
assert_eq!(f1.frames, f2.frames);
}
#[test]
fn different_signals_diverge() {
let a = synthetic_audio(0x1111, 5_000 * 3);
let b = synthetic_audio(0x2222, 5_000 * 3);
let mut fp = Haitsma::default();
let fa = fp
.extract(AudioBuffer {
samples: &a,
rate: sr_5khz(),
})
.unwrap();
let fb = fp
.extract(AudioBuffer {
samples: &b,
rate: sr_5khz(),
})
.unwrap();
assert_ne!(fa.frames, fb.frames);
}
#[test]
fn pack_frame_bits_msb_zero_band_layout() {
let mut curr = [0.0_f32; HAITSMA_N_BANDS];
let prev = [0.0_f32; HAITSMA_N_BANDS];
curr[0] = 1.0;
let h = pack_frame_bits(&curr, &prev);
assert_eq!(h, 1 << 31);
}
#[test]
fn band_31_lives_in_the_lsb() {
let mut curr = [0.0_f32; HAITSMA_N_BANDS];
let prev = [0.0_f32; HAITSMA_N_BANDS];
curr[31] = 1.0;
let h = pack_frame_bits(&curr, &prev);
assert_eq!(h, 1);
}
#[test]
fn streaming_latency_matches_n_fft() {
let s = StreamingHaitsma::default();
assert_eq!(s.latency_ms(), 409);
}
#[test]
fn band_lookup_table_covers_in_band_frequencies() {
let cfg = HaitsmaConfig::default();
let n_bins = HAITSMA_N_FFT / 2 + 1;
let lookup = build_bin_to_band(&cfg, n_bins);
assert_eq!(lookup.len(), n_bins);
let bin_hz = HAITSMA_SR as f32 / HAITSMA_N_FFT as f32;
let mut hit_per_band = [false; HAITSMA_N_BANDS];
for &b in &lookup {
if let Some(b) = b {
hit_per_band[b as usize] = true;
}
}
for (i, &h) in hit_per_band.iter().enumerate() {
assert!(h, "band {i} has no FFT bins");
}
let bin_at_100hz = (100.0 / bin_hz) as usize;
assert!(
lookup[bin_at_100hz].is_none(),
"100 Hz should be below fmin=300"
);
}
#[test]
fn custom_band_range() {
let cfg = HaitsmaConfig {
fmin: 500.0,
fmax: 1500.0,
};
let mut h = Haitsma::new(cfg.clone());
let samples = synthetic_audio(0xC0FFEE, 5_000 * 3);
let buf = AudioBuffer {
samples: &samples,
rate: sr_5khz(),
};
let f = h.extract(buf).unwrap();
assert!(!f.frames.is_empty());
}
#[test]
#[should_panic(expected = "fmax must exceed fmin")]
fn invalid_band_range_panics() {
let _ = Haitsma::new(HaitsmaConfig {
fmin: 1000.0,
fmax: 1000.0,
});
}
#[test]
#[should_panic(expected = "below Nyquist")]
fn fmax_above_nyquist_panics() {
let _ = Haitsma::new(HaitsmaConfig {
fmin: 300.0,
fmax: 3_000.0,
});
}
#[test]
fn streaming_offline_equivalence() {
let samples = synthetic_audio(0xBEEF, 5_000 * 5);
let mut offline = Haitsma::default();
let off = offline
.extract(AudioBuffer {
samples: &samples,
rate: sr_5khz(),
})
.unwrap();
let mut streaming = StreamingHaitsma::default();
let mut online: Vec<u32> = Vec::new();
let mut cursor = 0;
for n in chunk_sizes(0xCAFE, samples.len(), 3_000) {
let end = cursor + n;
online.extend(
streaming
.push(&samples[cursor..end])
.into_iter()
.map(|(_, h)| h),
);
cursor = end;
}
online.extend(streaming.flush().into_iter().map(|(_, h)| h));
assert_eq!(off.frames, online, "streaming != offline frame sequence");
}
#[test]
fn streaming_state_stays_bounded_under_long_input() {
let secs = 30usize;
let samples = synthetic_audio(13, HAITSMA_SR as usize * secs);
let chunk = 256usize;
let mut s = StreamingHaitsma::default();
let mut peak_carry = 0usize;
let mut start = 0usize;
while start < samples.len() {
let end = (start + chunk).min(samples.len());
let _ = s.push(&samples[start..end]);
peak_carry = peak_carry.max(s.sample_carry.len());
assert!(s.sample_carry.len() < HAITSMA_N_FFT);
assert_eq!(s.pending.len(), 0, "pending leaked between pushes");
start = end;
}
assert!(peak_carry < HAITSMA_N_FFT, "peak_carry {peak_carry}");
assert!(
peak_carry >= HAITSMA_N_FFT - HAITSMA_HOP,
"expected the carry to fill close to N_FFT under continuous input, got {peak_carry}",
);
let _ = s.flush();
assert_eq!(s.pending.len(), 0, "pending after flush");
}
}