stratum_dsp/
lib.rs

1//! # Stratum DSP
2//!
3//! A professional-grade audio analysis engine for DJ applications, providing
4//! accurate BPM detection, key detection, and beat tracking.
5//!
6//! ## Features
7//!
8//! - **BPM Detection**: Multi-method onset detection with autocorrelation and comb filterbank
9//! - **Key Detection**: Chroma-based analysis with Krumhansl-Kessler template matching
10//! - **Beat Tracking**: HMM-based beat grid generation with tempo drift correction
11//! - **ML Refinement**: Optional ONNX model for edge case correction (Phase 2)
12//!
13//! ## Quick Start
14//!
15//! ```no_run
16//! use stratum_dsp::{analyze_audio, AnalysisConfig};
17//!
18//! // Load audio samples (mono, f32, normalized)
19//! let samples: Vec<f32> = vec![]; // Your audio data
20//! let sample_rate = 44100;
21//!
22//! // Analyze
23//! let result = analyze_audio(&samples, sample_rate, AnalysisConfig::default())?;
24//!
25//! println!("BPM: {:.2} (confidence: {:.2})", result.bpm, result.bpm_confidence);
26//! println!("Key: {:?} (confidence: {:.2})", result.key, result.key_confidence);
27//! # Ok::<(), stratum_dsp::AnalysisError>(())
28//! ```
29//!
30//! ## Architecture
31//!
32//! The analysis pipeline follows this flow:
33//!
34//! ```text
35//! Audio Input -> Preprocessing -> Feature Extraction -> Analysis -> ML Refinement -> Output
36//! ```
37//!
38//! See the [module documentation](https://docs.rs/stratum-dsp) for details.
39
40#![warn(missing_docs)]
41#![warn(clippy::all)]
42
43pub mod analysis;
44pub mod config;
45pub mod error;
46pub mod features;
47pub mod preprocessing;
48
49#[cfg(feature = "ml")]
50pub mod ml;
51
52// Re-export main types
53pub use analysis::result::{AnalysisResult, AnalysisMetadata, BeatGrid, Key, KeyType};
54pub use analysis::confidence::{AnalysisConfidence, compute_confidence};
55pub use config::AnalysisConfig;
56pub use error::AnalysisError;
57
58/// Main analysis function
59///
60/// Analyzes audio samples and returns comprehensive analysis results including
61/// BPM, key, beat grid, and confidence scores.
62///
63/// # Arguments
64///
65/// * `samples` - Mono audio samples, normalized to [-1.0, 1.0]
66/// * `sample_rate` - Sample rate in Hz (typically 44100 or 48000)
67/// * `config` - Analysis configuration parameters
68///
69/// # Returns
70///
71/// `AnalysisResult` containing BPM, key, beat grid, and confidence metrics
72///
73/// # Errors
74///
75/// Returns `AnalysisError` if analysis fails (invalid input, processing error, etc.)
76///
77/// # Example
78///
79/// ```no_run
80/// use stratum_dsp::{analyze_audio, AnalysisConfig};
81///
82/// let samples = vec![0.0f32; 44100 * 30]; // 30 seconds of silence
83/// let result = analyze_audio(&samples, 44100, AnalysisConfig::default())?;
84/// # Ok::<(), stratum_dsp::AnalysisError>(())
85/// ```
86pub fn analyze_audio(
87    samples: &[f32],
88    sample_rate: u32,
89    config: AnalysisConfig,
90) -> Result<AnalysisResult, AnalysisError> {
91    use std::time::Instant;
92    let start_time = Instant::now();
93    
94    log::debug!("Starting audio analysis: {} samples at {} Hz", samples.len(), sample_rate);
95    
96    if samples.is_empty() {
97        return Err(AnalysisError::InvalidInput("Empty audio samples".to_string()));
98    }
99    
100    if sample_rate == 0 {
101        return Err(AnalysisError::InvalidInput("Invalid sample rate".to_string()));
102    }
103    
104    // Phase 1A: Preprocessing
105    let mut processed_samples = samples.to_vec();
106    
107    // 1. Normalization
108    use preprocessing::normalization::{normalize, NormalizationConfig};
109    if config.enable_normalization {
110        let norm_config = NormalizationConfig {
111            method: config.normalization,
112            target_loudness_lufs: -14.0, // Default target
113            max_headroom_db: 1.0,
114        };
115        let _loudness_metadata = normalize(&mut processed_samples, norm_config, sample_rate as f32)?;
116    } else {
117        log::debug!("Skipping normalization (enable_normalization=false)");
118    }
119    
120    // 2. Silence detection and trimming
121    use preprocessing::silence::{detect_and_trim, SilenceDetector};
122    let (trimmed_samples, _silence_regions) = if config.enable_silence_trimming {
123        let silence_detector = SilenceDetector {
124            threshold_db: config.min_amplitude_db,
125            min_duration_ms: 500,
126            frame_size: config.frame_size,
127        };
128        detect_and_trim(&processed_samples, sample_rate, silence_detector)?
129    } else {
130        log::debug!("Skipping silence trimming (enable_silence_trimming=false)");
131        (processed_samples.clone(), Vec::new())
132    };
133    
134    if trimmed_samples.is_empty() {
135        return Err(AnalysisError::ProcessingError("Audio is entirely silent after trimming".to_string()));
136    }
137    
138    // Phase 1A: Onset Detection
139    // Note: For now, we only have energy flux working directly on samples
140    // Spectral methods (spectral_flux, hfc, hpss) require STFT which will be added in Phase 1B
141    use features::onset::energy_flux::detect_energy_flux_onsets;
142    
143    let energy_onsets = detect_energy_flux_onsets(
144        &trimmed_samples,
145        config.frame_size,
146        config.hop_size,
147        -20.0, // threshold_db
148    )?;
149    
150    log::debug!("Detected {} onsets using energy flux", energy_onsets.len());
151    
152    // Phase 1F: Tempogram-based BPM Detection (replaces Phase 1B period estimation)
153    // Compute STFT once (used by tempogram BPM and STFT-based onset detectors)
154    use features::chroma::extractor::compute_stft;
155    let magnitude_spec_frames = compute_stft(
156        &trimmed_samples,
157        config.frame_size,
158        config.hop_size,
159    )?;
160
161    // Onset consensus (improves beat tracking + legacy BPM fallback robustness)
162    //
163    // Important: Tempogram BPM does NOT use these onsets, but the legacy BPM estimator and
164    // beat tracker do. Since we will compare / integrate legacy + tempogram estimates,
165    // we want the best onsets we can get.
166    let mut onsets_for_legacy: Vec<usize> = energy_onsets.clone();
167    let mut onsets_for_beat_tracking: Vec<usize> = energy_onsets.clone();
168
169    if config.enable_onset_consensus && !magnitude_spec_frames.is_empty() {
170        use features::onset::consensus::{vote_onsets, OnsetConsensus};
171        use features::onset::hfc::detect_hfc_onsets;
172        use features::onset::spectral_flux::detect_spectral_flux_onsets;
173
174        let to_samples = |frames: Vec<usize>, hop_size: usize, n_samples: usize| -> Vec<usize> {
175            let mut out: Vec<usize> = frames
176                .into_iter()
177                .map(|f| f.saturating_mul(hop_size))
178                .filter(|&s| s < n_samples)
179                .collect();
180            out.sort_unstable();
181            out.dedup();
182            out
183        };
184
185        let spectral_onsets_frames = match detect_spectral_flux_onsets(
186            &magnitude_spec_frames,
187            config.onset_threshold_percentile,
188        ) {
189            Ok(v) => v,
190            Err(e) => {
191                log::warn!("Spectral flux onset detection failed: {}", e);
192                Vec::new()
193            }
194        };
195        let spectral_onsets_samples = to_samples(
196            spectral_onsets_frames,
197            config.hop_size,
198            trimmed_samples.len(),
199        );
200
201        let hfc_onsets_frames = match detect_hfc_onsets(
202            &magnitude_spec_frames,
203            sample_rate,
204            config.onset_threshold_percentile,
205        ) {
206            Ok(v) => v,
207            Err(e) => {
208                log::warn!("HFC onset detection failed: {}", e);
209                Vec::new()
210            }
211        };
212        let hfc_onsets_samples = to_samples(hfc_onsets_frames, config.hop_size, trimmed_samples.len());
213
214        let hpss_onsets_samples = if config.enable_hpss_onsets {
215            use features::onset::hpss::{detect_hpss_onsets, hpss_decompose};
216            match hpss_decompose(&magnitude_spec_frames, config.hpss_margin)
217                .and_then(|(_, p)| detect_hpss_onsets(&p, config.onset_threshold_percentile))
218            {
219                Ok(hpss_frames) => to_samples(hpss_frames, config.hop_size, trimmed_samples.len()),
220                Err(e) => {
221                    log::warn!("HPSS onset detection failed: {}", e);
222                    Vec::new()
223                }
224            }
225        } else {
226            Vec::new()
227        };
228
229        log::debug!(
230            "Onset detectors: energy_flux(samples)={}, spectral_flux(samples)={}, hfc(samples)={}, hpss(samples)={}",
231            energy_onsets.len(),
232            spectral_onsets_samples.len(),
233            hfc_onsets_samples.len(),
234            hpss_onsets_samples.len()
235        );
236
237        let consensus = OnsetConsensus {
238            energy_flux: energy_onsets.clone(),
239            spectral_flux: spectral_onsets_samples,
240            hfc: hfc_onsets_samples,
241            hpss: hpss_onsets_samples,
242        };
243
244        match vote_onsets(
245            consensus,
246            config.onset_consensus_weights,
247            config.onset_consensus_tolerance_ms,
248            sample_rate,
249        ) {
250            Ok(candidates) => {
251                // Default policy: prefer onsets confirmed by >=2 methods.
252                // If that yields nothing, fall back to the full clustered set (>=1 method).
253                let mut strong: Vec<usize> = candidates
254                    .iter()
255                    .filter(|c| c.voted_by >= 2)
256                    .map(|c| c.time_samples)
257                    .collect();
258                strong.sort_unstable();
259                strong.dedup();
260
261                let mut any: Vec<usize> = candidates.iter().map(|c| c.time_samples).collect();
262                any.sort_unstable();
263                any.dedup();
264
265                let chosen = if !strong.is_empty() { strong } else { any };
266                if !chosen.is_empty() {
267                    log::debug!(
268                        "Onset consensus: chosen {} onsets (strong>=2 methods: {}, total_clusters: {})",
269                        chosen.len(),
270                        candidates.iter().filter(|c| c.voted_by >= 2).count(),
271                        candidates.len()
272                    );
273                    onsets_for_legacy = chosen.clone();
274                    onsets_for_beat_tracking = chosen;
275                } else {
276                    log::debug!("Onset consensus produced no candidates; using energy-flux onsets");
277                }
278            }
279            Err(e) => {
280                log::warn!("Onset consensus voting failed: {}", e);
281            }
282        }
283    }
284    
285    // BPM estimation: tempogram (Phase 1F) + legacy (Phase 1B), optionally fused
286    let legacy_estimate = {
287        use features::period::{estimate_bpm, estimate_bpm_with_guardrails, LegacyBpmGuardrails};
288        if onsets_for_legacy.len() >= 2 {
289            if config.enable_legacy_bpm_guardrails {
290                let guardrails = LegacyBpmGuardrails {
291                    preferred_min: config.legacy_bpm_preferred_min,
292                    preferred_max: config.legacy_bpm_preferred_max,
293                    soft_min: config.legacy_bpm_soft_min,
294                    soft_max: config.legacy_bpm_soft_max,
295                    mul_preferred: config.legacy_bpm_conf_mul_preferred,
296                    mul_soft: config.legacy_bpm_conf_mul_soft,
297                    mul_extreme: config.legacy_bpm_conf_mul_extreme,
298                };
299                estimate_bpm_with_guardrails(
300                    &onsets_for_legacy,
301                    sample_rate,
302                    config.hop_size,
303                    config.min_bpm,
304                    config.max_bpm,
305                    config.bpm_resolution,
306                    guardrails,
307                )?
308            } else {
309                estimate_bpm(
310                    &onsets_for_legacy,
311                    sample_rate,
312                    config.hop_size,
313                    config.min_bpm,
314                    config.max_bpm,
315                    config.bpm_resolution,
316                )?
317            }
318        } else {
319            None
320        }
321    };
322
323    let mut tempogram_candidates: Option<Vec<crate::analysis::result::TempoCandidateDebug>> = None;
324    let mut tempogram_multi_res_triggered: Option<bool> = None;
325    let mut tempogram_multi_res_used: Option<bool> = None;
326    let mut tempogram_percussive_triggered: Option<bool> = None;
327    let mut tempogram_percussive_used: Option<bool> = None;
328
329    let tempogram_estimate = if !config.force_legacy_bpm && !magnitude_spec_frames.is_empty() {
330        use crate::analysis::result::TempoCandidateDebug;
331        use features::period::multi_resolution::multi_resolution_tempogram_from_samples;
332        use features::period::tempogram::{
333            estimate_bpm_tempogram,
334            estimate_bpm_tempogram_band_fusion,
335            estimate_bpm_tempogram_with_candidates,
336            estimate_bpm_tempogram_with_candidates_band_fusion,
337            TempogramBandFusionConfig,
338        };
339
340        let band_cfg = TempogramBandFusionConfig {
341            enabled: config.enable_tempogram_band_fusion,
342            low_max_hz: config.tempogram_band_low_max_hz,
343            mid_max_hz: config.tempogram_band_mid_max_hz,
344            high_max_hz: config.tempogram_band_high_max_hz,
345            w_full: config.tempogram_band_w_full,
346            w_low: config.tempogram_band_w_low,
347            w_mid: config.tempogram_band_w_mid,
348            w_high: config.tempogram_band_w_high,
349            seed_only: config.tempogram_band_seed_only,
350            support_threshold: config.tempogram_band_support_threshold,
351            consensus_bonus: config.tempogram_band_consensus_bonus,
352            enable_mel: config.enable_tempogram_mel_novelty,
353            mel_n_mels: config.tempogram_mel_n_mels,
354            mel_fmin_hz: config.tempogram_mel_fmin_hz,
355            mel_fmax_hz: config.tempogram_mel_fmax_hz,
356            mel_max_filter_bins: config.tempogram_mel_max_filter_bins,
357            w_mel: config.tempogram_mel_weight,
358            novelty_w_spectral: config.tempogram_novelty_w_spectral,
359            novelty_w_energy: config.tempogram_novelty_w_energy,
360            novelty_w_hfc: config.tempogram_novelty_w_hfc,
361            novelty_local_mean_window: config.tempogram_novelty_local_mean_window,
362            novelty_smooth_window: config.tempogram_novelty_smooth_window,
363            debug_track_id: config.debug_track_id,
364            debug_gt_bpm: config.debug_gt_bpm,
365            debug_top_n: config.debug_top_n,
366            superflux_max_filter_bins: config.tempogram_superflux_max_filter_bins,
367        };
368
369        let use_aux_variants = config.enable_tempogram_band_fusion
370            || config.enable_tempogram_mel_novelty
371            || config.tempogram_band_consensus_bonus > 0.0;
372
373        if config.enable_tempogram_multi_resolution {
374            // Run single-resolution tempogram first; only escalate to multi-resolution
375            // when the result looks ambiguous (prevents global regressions).
376            let base_top_n = config
377                .tempogram_candidates_top_n
378                .max(config.tempogram_multi_res_top_k)
379                .max(10);
380
381            let base_call = if use_aux_variants {
382                estimate_bpm_tempogram_with_candidates_band_fusion(
383                    &magnitude_spec_frames,
384                    sample_rate,
385                    config.hop_size as u32,
386                    config.min_bpm,
387                    config.max_bpm,
388                    config.bpm_resolution,
389                    base_top_n,
390                    band_cfg.clone(),
391                )
392            } else {
393                estimate_bpm_tempogram_with_candidates(
394                    &magnitude_spec_frames,
395                    sample_rate,
396                    config.hop_size as u32,
397                    config.min_bpm,
398                    config.max_bpm,
399                    config.bpm_resolution,
400                    base_top_n,
401                )
402            };
403
404            match base_call {
405                Ok((base_est, base_cands)) => {
406                    let trap_low = base_est.bpm >= 55.0 && base_est.bpm <= 80.0;
407                    let trap_high = base_est.bpm >= 170.0 && base_est.bpm <= 200.0;
408
409                    // Additional ambiguity detection: if the single-resolution candidate list already
410                    // contains strong tempo-family alternatives (2× or 1/2×), escalate to multi-res.
411                    //
412                    // This catches cases like GT~184 predicted ~92 where base BPM is not in our
413                    // original “trap” window but is a classic half-time error.
414                    fn cand_support(
415                        cands: &[features::period::tempogram::TempogramCandidateDebug],
416                        bpm: f32,
417                        tol: f32,
418                    ) -> f32 {
419                        let mut best = 0.0f32;
420                        for c in cands {
421                            if (c.bpm - bpm).abs() <= tol {
422                                best = best.max(c.score);
423                            }
424                        }
425                        best
426                    }
427
428                    let tol = 2.0f32.max(config.bpm_resolution);
429                    let s_base = cand_support(&base_cands, base_est.bpm, tol);
430                    let s_2x = cand_support(&base_cands, base_est.bpm * 2.0, tol);
431                    let s_half = cand_support(&base_cands, base_est.bpm * 0.5, tol);
432                    let family_competes = (s_2x > 0.0 && s_2x >= s_base * 0.90)
433                        || (s_half > 0.0 && s_half >= s_base * 0.90);
434
435                    // IMPORTANT: Our tempogram "confidence" is currently conservative and can be low even
436                    // for correct tempos. If we use a generic confidence threshold, we end up escalating
437                    // on nearly every track (catastrophic for performance, especially with HPSS).
438                    //
439                    // For now, only escalate in the known tempo-family trap zones. We'll widen this later
440                    // once we have a better uncertainty measure.
441                    // Escape hatch: if confidence/agreement is poor and a 2× fold would land in the
442                    // high trap zone (or a 1/2× fold would land in the low trap zone), escalate even
443                    // if the candidate list didn’t surface it strongly (prevents missing half-time errors).
444                    // Only use the "fold_into_trap" escape hatch for the missed half-time case:
445                    // base ~90, true ~180. Do NOT trigger on base ~120 (since 120/2=60 is common and
446                    // would cause unnecessary multi-res runs / regressions).
447                    let fold_into_trap = base_est.bpm * 2.0 >= 170.0 && base_est.bpm * 2.0 <= 200.0;
448                    let weak_base = base_est.method_agreement == 0 || base_est.confidence < 0.06;
449
450                    let ambiguous = trap_low || trap_high || family_competes || (weak_base && fold_into_trap);
451                    // Instrumentation: "triggered" means we *considered* escalation, not just "base looks ambiguous".
452                    tempogram_multi_res_triggered = Some(ambiguous);
453
454                    if let Some(track_id) = config.debug_track_id {
455                        eprintln!("\n=== DEBUG base tempogram (track_id={}) ===", track_id);
456                        if let Some(gt) = config.debug_gt_bpm {
457                            eprintln!("GT bpm: {:.3}", gt);
458                        }
459                        eprintln!(
460                            "base_est: bpm={:.2} conf={:.4} agree={} (trap_low={} trap_high={} ambiguous={})",
461                            base_est.bpm,
462                            base_est.confidence,
463                            base_est.method_agreement,
464                            trap_low,
465                            trap_high,
466                            ambiguous
467                        );
468                        eprintln!(
469                            "ambiguity signals: family_competes={} (s_base={:.4} s_2x={:.4} s_half={:.4}) weak_base={} fold_into_trap={}",
470                            family_competes,
471                            s_base,
472                            s_2x,
473                            s_half,
474                            weak_base,
475                            fold_into_trap
476                        );
477                        if !ambiguous {
478                            eprintln!("NOTE: multi-res not run (outside trap zones).");
479                        }
480                    }
481
482                    let mut chosen_est = base_est.clone();
483                    let mut chosen_cands = base_cands;
484                    let mut used_mr = false;
485
486                    if ambiguous {
487                        match multi_resolution_tempogram_from_samples(
488                            &trimmed_samples,
489                            sample_rate,
490                            config.frame_size,
491                            config.min_bpm,
492                            config.max_bpm,
493                            config.bpm_resolution,
494                            config.tempogram_multi_res_top_k,
495                            config.tempogram_multi_res_w512,
496                            config.tempogram_multi_res_w256,
497                            config.tempogram_multi_res_w1024,
498                            config.tempogram_multi_res_structural_discount,
499                            config.tempogram_multi_res_double_time_512_factor,
500                            config.tempogram_multi_res_margin_threshold,
501                            config.tempogram_multi_res_use_human_prior,
502                            Some(band_cfg.clone()),
503                        ) {
504                            Ok((mr_est, mr_cands_512)) => {
505                                let mr_est_log = mr_est.clone();
506                                // Choose multi-res only if it provides stronger evidence or
507                                // a safer tempo-family choice in the trap regions.
508                                let rel = if base_est.bpm > 1e-6 {
509                                    (mr_est.bpm / base_est.bpm).max(base_est.bpm / mr_est.bpm)
510                                } else {
511                                    1.0
512                                };
513                                let family_related =
514                                    (rel - 2.0).abs() < 0.05 || (rel - 1.5).abs() < 0.05 || (rel - (4.0 / 3.0)).abs() < 0.05;
515
516                                // Hard safety rule: do not “promote” a sane in-range tempo into an extreme
517                                // high tempo (e.g., 120 -> 240). Multi-res should primarily resolve
518                                // octave *folding* errors, not create them.
519                                let forbid_promote_high = base_est.bpm <= 180.0 && mr_est.bpm > 180.0;
520
521                                let mr_better = !forbid_promote_high
522                                    && (mr_est.confidence >= (base_est.confidence + 0.05)
523                                        || (mr_est.method_agreement > base_est.method_agreement
524                                            && mr_est.confidence >= base_est.confidence * 0.90)
525                                        || ((trap_low || trap_high)
526                                            && family_related
527                                            && mr_est.confidence >= base_est.confidence * 0.88
528                                            // Additional safety: only accept family moves that land in a
529                                            // typical music/DJ tempo band unless base was already extreme.
530                                            && ((mr_est.bpm >= 70.0 && mr_est.bpm <= 180.0) || base_est.bpm > 180.0)));
531
532                                if mr_better {
533                                    chosen_est = mr_est;
534                                    chosen_cands = mr_cands_512;
535                                    used_mr = true;
536                                }
537
538                                if let Some(track_id) = config.debug_track_id {
539                                    eprintln!("\n=== DEBUG multi-res decision (track_id={}) ===", track_id);
540                                    if let Some(gt) = config.debug_gt_bpm {
541                                        eprintln!("GT bpm: {:.3}", gt);
542                                    }
543                                    eprintln!("base_est: bpm={:.2} conf={:.4} agree={}", base_est.bpm, base_est.confidence, base_est.method_agreement);
544                                    eprintln!("mr_est:   bpm={:.2} conf={:.4} agree={}", mr_est_log.bpm, mr_est_log.confidence, mr_est_log.method_agreement);
545                                    eprintln!("ambiguous(trap_low||trap_high)={}", ambiguous);
546                                    eprintln!("rel={:.3} family_related={} forbid_promote_high={}", rel, family_related, forbid_promote_high);
547                                    eprintln!("mr_better={} used_mr={}", mr_better, used_mr);
548                                }
549                            }
550                            Err(e) => {
551                                log::debug!("Multi-resolution escalation skipped (failed): {}", e);
552                            }
553                        }
554                    }
555                    tempogram_multi_res_used = Some(used_mr);
556
557                    // Percussive-only fallback (HPSS) for ambiguous cases (generation improvement).
558                    //
559                    // Important: HPSS is expensive. We only run it when we are in the classic
560                    // low-tempo ambiguity zone where sustained harmonic content commonly causes
561                    // half/double-time traps.
562                    let percussive_needed = ambiguous && trap_low;
563                    tempogram_percussive_triggered = Some(percussive_needed);
564
565                    if config.enable_tempogram_percussive_fallback && percussive_needed {
566                        use features::onset::hpss::hpss_decompose;
567
568                        // Decompose the already computed spectrogram at the base hop_size.
569                        match hpss_decompose(&magnitude_spec_frames, config.hpss_margin) {
570                            Ok((_h, p)) => {
571                                // Re-run tempogram on percussive component.
572                                let p_call = if use_aux_variants {
573                                    estimate_bpm_tempogram_with_candidates_band_fusion(
574                                        &p,
575                                        sample_rate,
576                                        config.hop_size as u32,
577                                        config.min_bpm,
578                                        config.max_bpm,
579                                        config.bpm_resolution,
580                                        base_top_n,
581                                        band_cfg.clone(),
582                                    )
583                                } else {
584                                    estimate_bpm_tempogram_with_candidates(
585                                        &p,
586                                        sample_rate,
587                                        config.hop_size as u32,
588                                        config.min_bpm,
589                                        config.max_bpm,
590                                        config.bpm_resolution,
591                                        base_top_n,
592                                    )
593                                };
594
595                                match p_call {
596                                    Ok((p_est, p_cands)) => {
597                                        // Accept percussive estimate only when it is a tempo-family move
598                                        // and does not promote sane tempos into extremes.
599                                        let rel = if chosen_est.bpm > 1e-6 {
600                                            (p_est.bpm / chosen_est.bpm).max(chosen_est.bpm / p_est.bpm)
601                                        } else {
602                                            1.0
603                                        };
604                                        let family_related = (rel - 2.0).abs() < 0.05
605                                            || (rel - 1.5).abs() < 0.05
606                                            || (rel - (4.0 / 3.0)).abs() < 0.05
607                                            || (rel - (3.0 / 2.0)).abs() < 0.05
608                                            || (rel - (2.0 / 3.0)).abs() < 0.05
609                                            || (rel - (3.0 / 4.0)).abs() < 0.05;
610
611                                        let forbid_promote_high = chosen_est.bpm <= 180.0 && p_est.bpm > 180.0;
612
613                                        // Slightly more permissive acceptance in the low-tempo trap region:
614                                        // if percussive yields a coherent 2× tempo in a common range, take it
615                                        // even if confidence is only marginally better.
616                                        let base_low_trap = trap_low || base_est.bpm < 95.0;
617                                        let percussive_in_common = p_est.bpm >= 70.0 && p_est.bpm <= 180.0;
618
619                                        let p_better = !forbid_promote_high
620                                            && family_related
621                                            && percussive_in_common
622                                            && (p_est.confidence >= chosen_est.confidence + 0.04
623                                                || (base_low_trap && p_est.confidence >= chosen_est.confidence * 0.85)
624                                                || (p_est.method_agreement > chosen_est.method_agreement
625                                                    && p_est.confidence >= chosen_est.confidence * 0.92));
626
627                                        if p_better {
628                                            chosen_est = p_est;
629                                            chosen_cands = p_cands;
630                                            tempogram_percussive_used = Some(true);
631                                        } else {
632                                            tempogram_percussive_used = Some(false);
633                                        }
634                                    }
635                                    Err(e) => {
636                                        log::debug!("Percussive tempogram fallback failed: {}", e);
637                                        tempogram_percussive_used = Some(false);
638                                    }
639                                }
640                            }
641                            Err(e) => {
642                                log::debug!("HPSS decomposition for percussive tempogram failed: {}", e);
643                                tempogram_percussive_used = Some(false);
644                            }
645                        }
646                    } else if config.enable_tempogram_percussive_fallback {
647                        tempogram_percussive_used = Some(false);
648                    }
649
650                    if config.emit_tempogram_candidates {
651                        tempogram_candidates = Some(
652                            chosen_cands
653                                .into_iter()
654                                .map(|c| TempoCandidateDebug {
655                                    bpm: c.bpm,
656                                    score: c.score,
657                                    fft_norm: c.fft_norm,
658                                    autocorr_norm: c.autocorr_norm,
659                                    selected: c.selected,
660                                })
661                                .collect(),
662                        );
663                    }
664
665                    log::debug!(
666                        "Tempogram BPM estimate: {:.2} (confidence: {:.3}, method_agreement: {}, multi_res={})",
667                        chosen_est.bpm,
668                        chosen_est.confidence,
669                        chosen_est.method_agreement,
670                        ambiguous
671                    );
672
673                    Some(chosen_est)
674                }
675                Err(e) => {
676                    log::warn!("Tempogram BPM detection failed: {}", e);
677                    None
678                }
679            }
680        } else if config.emit_tempogram_candidates {
681            let call = if use_aux_variants {
682                estimate_bpm_tempogram_with_candidates_band_fusion(
683                    &magnitude_spec_frames,
684                    sample_rate,
685                    config.hop_size as u32,
686                    config.min_bpm,
687                    config.max_bpm,
688                    config.bpm_resolution,
689                    config.tempogram_candidates_top_n,
690                    band_cfg.clone(),
691                )
692            } else {
693                estimate_bpm_tempogram_with_candidates(
694                    &magnitude_spec_frames,
695                    sample_rate,
696                    config.hop_size as u32,
697                    config.min_bpm,
698                    config.max_bpm,
699                    config.bpm_resolution,
700                    config.tempogram_candidates_top_n,
701                )
702            };
703
704            match call {
705                Ok((estimate, cands)) => {
706                    tempogram_candidates = Some(
707                        cands.into_iter()
708                            .map(|c| TempoCandidateDebug {
709                                bpm: c.bpm,
710                                score: c.score,
711                                fft_norm: c.fft_norm,
712                                autocorr_norm: c.autocorr_norm,
713                                selected: c.selected,
714                            })
715                            .collect(),
716                    );
717                    log::debug!(
718                        "Tempogram BPM estimate: {:.2} (confidence: {:.3}, method_agreement: {}, candidates_emitted={})",
719                        estimate.bpm,
720                        estimate.confidence,
721                        estimate.method_agreement,
722                        tempogram_candidates.as_ref().map(|v| v.len()).unwrap_or(0)
723                    );
724                    Some(estimate)
725                }
726                Err(e) => {
727                    log::warn!("Tempogram BPM detection failed: {}", e);
728                    None
729                }
730            }
731        } else {
732            let call = if use_aux_variants {
733                estimate_bpm_tempogram_band_fusion(
734                    &magnitude_spec_frames,
735                    sample_rate,
736                    config.hop_size as u32,
737                    config.min_bpm,
738                    config.max_bpm,
739                    config.bpm_resolution,
740                    band_cfg.clone(),
741                )
742            } else {
743                estimate_bpm_tempogram(
744                    &magnitude_spec_frames,
745                    sample_rate,
746                    config.hop_size as u32,
747                    config.min_bpm,
748                    config.max_bpm,
749                    config.bpm_resolution,
750                )
751            };
752
753            match call {
754                Ok(estimate) => {
755                    log::debug!(
756                        "Tempogram BPM estimate: {:.2} (confidence: {:.3}, method_agreement: {})",
757                        estimate.bpm,
758                        estimate.confidence,
759                        estimate.method_agreement
760                    );
761                    Some(estimate)
762                }
763                Err(e) => {
764                    log::warn!("Tempogram BPM detection failed: {}", e);
765                    None
766                }
767            }
768        }
769    } else {
770        if config.force_legacy_bpm {
771            log::debug!("Forcing legacy BPM estimation (force_legacy_bpm=true)");
772        } else if magnitude_spec_frames.is_empty() {
773            log::warn!("Could not compute STFT for tempogram");
774        }
775        None
776    };
777
778    let (bpm, bpm_confidence) = if config.force_legacy_bpm {
779        legacy_estimate
780            .as_ref()
781            .map(|e| (e.bpm, e.confidence))
782            .unwrap_or((0.0, 0.0))
783    } else if config.enable_bpm_fusion {
784        // Fusion (safe validator mode):
785        // - **Never** override the tempogram BPM (so fusion cannot regress BPM accuracy).
786        // - Use legacy only to adjust *confidence* and emit diagnostics.
787        let (t_bpm, t_conf, t_agree) = tempogram_estimate
788            .as_ref()
789            .map(|e| (e.bpm, e.confidence, e.method_agreement))
790            .unwrap_or((0.0, 0.0, 0));
791        let (l_bpm, l_conf_raw) = legacy_estimate
792            .as_ref()
793            .map(|e| (e.bpm, e.confidence))
794            .unwrap_or((0.0, 0.0));
795        let l_conf = l_conf_raw.clamp(0.0, 1.0);
796
797        // If tempogram is unavailable, fall back to legacy (guardrailed).
798        if t_bpm <= 0.0 {
799            legacy_estimate
800                .as_ref()
801                .map(|e| (e.bpm, e.confidence))
802                .unwrap_or((0.0, 0.0))
803        } else {
804            let tol = 2.0f32;
805            let mut conf = t_conf.clamp(0.0, 1.0);
806
807            // Agreement / validation scoring between legacy and tempogram BPMs.
808            let agreement = if l_bpm > 0.0 {
809                // Allow common metrical ambiguity relations without forcing an override.
810                let diffs = [
811                    (l_bpm - t_bpm).abs(),
812                    (l_bpm - (t_bpm * 0.5)).abs(),
813                    (l_bpm - (t_bpm * 2.0)).abs(),
814                    (l_bpm - (t_bpm * (2.0 / 3.0))).abs(),
815                    (l_bpm - (t_bpm * (3.0 / 2.0))).abs(),
816                ];
817                diffs.into_iter().any(|d| d <= tol)
818            } else {
819                false
820            };
821
822            if agreement {
823                // Modest boost when legacy is consistent (even if it’s at a different metrical level).
824                let boost = 0.12 * l_conf;
825                conf = (conf + boost).clamp(0.0, 1.0);
826                log::debug!(
827                    "BPM fusion (validator): tempogram {:.2} kept; legacy {:.2} validates (agree≈true); conf {:.3}->{:.3}; temp_agree={}",
828                    t_bpm,
829                    l_bpm,
830                    t_conf,
831                    conf,
832                    t_agree
833                );
834            } else if l_bpm > 0.0 {
835                // If legacy strongly disagrees, slightly down-weight confidence.
836                // This helps downstream beat-tracking avoid over-trusting borderline tempos,
837                // while preserving the tempogram BPM choice.
838                conf = (conf * 0.90).clamp(0.0, 1.0);
839                log::debug!(
840                    "BPM fusion (validator): tempogram {:.2} kept; legacy {:.2} disagrees; conf {:.3}->{:.3}; temp_agree={}",
841                    t_bpm,
842                    l_bpm,
843                    t_conf,
844                    conf,
845                    t_agree
846                );
847            } else {
848                log::debug!(
849                    "BPM fusion (validator): tempogram {:.2} kept; no legacy estimate available; temp_agree={}",
850                    t_bpm,
851                    t_agree
852                );
853            }
854
855            (t_bpm, conf)
856        }
857    } else {
858        // Default behavior: tempogram first; legacy fallback only if tempogram fails.
859        tempogram_estimate
860            .as_ref()
861            .map(|e| (e.bpm, e.confidence))
862            .or_else(|| legacy_estimate.as_ref().map(|e| (e.bpm, e.confidence)))
863            .unwrap_or((0.0, 0.0))
864    };
865    
866    if bpm == 0.0 {
867        log::warn!("Could not estimate BPM: tempogram and legacy methods both failed");
868    } else {
869        log::debug!("Estimated BPM: {:.2} (confidence: {:.3})", bpm, bpm_confidence);
870    }
871    
872    // Phase 1C: Beat Tracking
873    let (beat_grid, grid_stability) = if bpm > 0.0 && onsets_for_beat_tracking.len() >= 2 {
874        // Convert onsets from sample indices to seconds
875        let onsets_seconds: Vec<f32> = onsets_for_beat_tracking
876            .iter()
877            .map(|&sample_idx| sample_idx as f32 / sample_rate as f32)
878            .collect();
879
880        // Generate beat grid using HMM Viterbi algorithm
881        use features::beat_tracking::generate_beat_grid;
882        match generate_beat_grid(bpm, bpm_confidence, &onsets_seconds, sample_rate) {
883            Ok((grid, stability)) => {
884                log::debug!(
885                    "Beat grid generated: {} beats, {} downbeats, stability={:.3}",
886                    grid.beats.len(),
887                    grid.downbeats.len(),
888                    stability
889                );
890                (grid, stability)
891            }
892            Err(e) => {
893                log::warn!("Beat tracking failed: {}, using empty grid", e);
894                (
895                    BeatGrid {
896                        downbeats: vec![],
897                        beats: vec![],
898                        bars: vec![],
899                    },
900                    0.0,
901                )
902            }
903        }
904    } else {
905        log::debug!("Skipping beat tracking: BPM={:.2}, onsets={}", bpm, energy_onsets.len());
906        (
907            BeatGrid {
908                downbeats: vec![],
909                beats: vec![],
910                bars: vec![],
911            },
912            0.0,
913        )
914    };
915    
916    // Phase 1D: Key Detection
917    let (key, key_confidence, key_clarity) = if trimmed_samples.len() >= config.frame_size {
918        // Extract chroma vectors with configurable options
919        //
920        // IMPORTANT: We already computed the STFT magnitudes for tempogram BPM. Reuse them here
921        // to avoid a second STFT pass (and to enable spectrogram-domain conditioning for key).
922        use features::chroma::extractor::{
923            convert_linear_to_log_frequency_spectrogram,
924            extract_beat_synchronous_chroma,
925            extract_chroma_from_log_frequency_spectrogram,
926            extract_chroma_from_spectrogram_with_options_and_energy,
927            extract_chroma_from_spectrogram_with_options_and_energy_tuned,
928            extract_hpcp_from_spectrogram_with_options_and_energy_tuned,
929            extract_hpcp_bass_blend_from_spectrogram_with_options_and_energy_tuned,
930            estimate_tuning_offset_semitones_from_spectrogram,
931            harmonic_spectrogram_hpss_median_mask,
932            harmonic_spectrogram_time_mask,
933            smooth_spectrogram_time,
934        };
935        use features::chroma::normalization::sharpen_chroma;
936        use features::chroma::smoothing::smooth_chroma;
937        use features::key::{
938            compute_key_clarity, detect_key_ensemble, detect_key_multi_scale, detect_key_weighted, detect_key_weighted_mode_heuristic,
939            KeyDetectionResult, KeyTemplates,
940        };
941        
942        // Key-only STFT override (optional): allow higher frequency resolution for key detection.
943        let key_fft_size = if config.enable_key_stft_override {
944            config.key_stft_frame_size.max(256)
945        } else {
946            config.frame_size
947        };
948        let key_hop_size = if config.enable_key_stft_override {
949            config.key_stft_hop_size.max(1)
950        } else {
951            config.hop_size
952        };
953
954        let key_spec_frames = if config.enable_key_stft_override {
955            match compute_stft(&trimmed_samples, key_fft_size, key_hop_size) {
956                Ok(s) => s,
957                Err(e) => {
958                    log::warn!("Key-only STFT override failed: {}, falling back to shared STFT", e);
959                    magnitude_spec_frames.clone()
960                }
961            }
962        } else {
963            magnitude_spec_frames.clone()
964        };
965
966        // Key-only spectrogram conditioning: suppress percussive broadband transients.
967        let spec_for_key = if !key_spec_frames.is_empty() {
968            if config.enable_key_hpss_harmonic {
969                match harmonic_spectrogram_hpss_median_mask(
970                    &key_spec_frames,
971                    sample_rate,
972                    key_fft_size,
973                    100.0,
974                    5000.0,
975                    config.key_hpss_frame_step,
976                    config.key_hpss_time_margin,
977                    config.key_hpss_freq_margin,
978                    config.key_hpss_mask_power,
979                ) {
980                    Ok(s) => s,
981                    Err(e) => {
982                        log::warn!("Key HPSS harmonic mask failed: {}, falling back", e);
983                        key_spec_frames.clone()
984                    }
985                }
986            } else if config.enable_key_harmonic_mask {
987                match harmonic_spectrogram_time_mask(
988                    &key_spec_frames,
989                    config.key_spectrogram_smooth_margin,
990                    config.key_harmonic_mask_power,
991                ) {
992                    Ok(s) => s,
993                    Err(e) => {
994                        log::warn!("Key harmonic mask failed: {}, falling back", e);
995                        key_spec_frames.clone()
996                    }
997                }
998            } else if config.enable_key_spectrogram_time_smoothing {
999                match smooth_spectrogram_time(&key_spec_frames, config.key_spectrogram_smooth_margin) {
1000                    Ok(s) => s,
1001                    Err(e) => {
1002                        log::warn!("Key spectrogram time smoothing failed: {}, using raw spectrogram", e);
1003                        key_spec_frames.clone()
1004                    }
1005                }
1006            } else {
1007                key_spec_frames.clone()
1008            }
1009        } else {
1010            key_spec_frames.clone()
1011        };
1012
1013        // Optional: convert to log-frequency (semitone-aligned) spectrogram for key detection.
1014        // When enabled, HPCP is disabled (HPCP requires frequency information for harmonic summation).
1015        let (use_log_freq, log_freq_spec, semitone_offset) = if config.enable_key_log_frequency && !spec_for_key.is_empty() {
1016            match convert_linear_to_log_frequency_spectrogram(
1017                &spec_for_key,
1018                sample_rate,
1019                key_fft_size,
1020                100.0,
1021                5000.0,
1022            ) {
1023                Ok(log_spec) => {
1024                    // Compute semitone offset of first bin
1025                    let fmin: f32 = 100.0;
1026                    let semitone_min: f32 = 12.0 * (fmin / 440.0).log2() + 57.0;
1027                    let semitone_bin_min = semitone_min.floor() as i32;
1028                    (true, log_spec, semitone_bin_min)
1029                }
1030                Err(e) => {
1031                    log::warn!("Key log-frequency conversion failed: {}, falling back to linear", e);
1032                    (false, vec![], 0)
1033                }
1034            }
1035        } else {
1036            (false, vec![], 0)
1037        };
1038
1039        // Optional: estimate tuning offset (in semitones) and apply during chroma mapping.
1040        // Note: tuning estimation still uses linear spectrogram (before log-freq conversion).
1041        let tuning_offset = if config.enable_key_tuning_compensation && !spec_for_key.is_empty() && !use_log_freq {
1042            match estimate_tuning_offset_semitones_from_spectrogram(
1043                &spec_for_key,
1044                sample_rate,
1045                key_fft_size,
1046                80.0,
1047                2000.0,
1048                config.key_tuning_frame_step,
1049                config.key_tuning_peak_rel_threshold,
1050            ) {
1051                Ok(d) => d.clamp(
1052                    -config.key_tuning_max_abs_semitones.abs(),
1053                    config.key_tuning_max_abs_semitones.abs(),
1054                ),
1055                Err(e) => {
1056                    log::warn!("Key tuning estimation failed: {}", e);
1057                    0.0
1058                }
1059            }
1060        } else {
1061            0.0
1062        };
1063
1064        let chroma_call = if config.enable_key_beat_synchronous && !beat_grid.beats.is_empty() && !use_log_freq {
1065            // Beat-synchronous chroma: align chroma windows to beat boundaries
1066            extract_beat_synchronous_chroma(
1067                &spec_for_key,
1068                sample_rate,
1069                key_fft_size,
1070                key_hop_size,
1071                &beat_grid.beats,
1072                config.soft_chroma_mapping,
1073                config.soft_mapping_sigma,
1074                tuning_offset,
1075            )
1076        } else if use_log_freq {
1077            // Extract chroma from log-frequency spectrogram (each bin is already a semitone)
1078            extract_chroma_from_log_frequency_spectrogram(&log_freq_spec, semitone_offset)
1079                .map(|chroma_vecs| {
1080                    // Compute frame energies from log-frequency spectrogram
1081                    let energies: Vec<f32> = log_freq_spec
1082                        .iter()
1083                        .map(|frame| frame.iter().map(|&x| x * x).sum())
1084                        .collect();
1085                    (chroma_vecs, energies)
1086                })
1087        } else if config.enable_key_hpcp {
1088            if config.enable_key_hpcp_bass_blend {
1089                extract_hpcp_bass_blend_from_spectrogram_with_options_and_energy_tuned(
1090                    &spec_for_key,
1091                    sample_rate,
1092                    key_fft_size,
1093                    config.soft_mapping_sigma,
1094                    tuning_offset,
1095                    config.key_hpcp_peaks_per_frame,
1096                    config.key_hpcp_num_harmonics,
1097                    config.key_hpcp_harmonic_decay,
1098                    config.key_hpcp_mag_power,
1099                    config.enable_key_hpcp_whitening,
1100                    config.key_hpcp_whitening_smooth_bins,
1101                    config.key_hpcp_bass_fmin_hz,
1102                    config.key_hpcp_bass_fmax_hz,
1103                    config.key_hpcp_bass_weight,
1104                )
1105            } else {
1106                extract_hpcp_from_spectrogram_with_options_and_energy_tuned(
1107                    &spec_for_key,
1108                    sample_rate,
1109                    key_fft_size,
1110                    config.soft_mapping_sigma,
1111                    tuning_offset,
1112                    config.key_hpcp_peaks_per_frame,
1113                    config.key_hpcp_num_harmonics,
1114                    config.key_hpcp_harmonic_decay,
1115                    config.key_hpcp_mag_power,
1116                    config.enable_key_hpcp_whitening,
1117                    config.key_hpcp_whitening_smooth_bins,
1118                )
1119            }
1120        } else if config.enable_key_tuning_compensation && tuning_offset.abs() > 1e-6 {
1121            extract_chroma_from_spectrogram_with_options_and_energy_tuned(
1122                &spec_for_key,
1123                sample_rate,
1124                key_fft_size,
1125                config.soft_chroma_mapping,
1126                config.soft_mapping_sigma,
1127                tuning_offset,
1128            )
1129        } else {
1130            extract_chroma_from_spectrogram_with_options_and_energy(
1131                &spec_for_key,
1132                sample_rate,
1133                key_fft_size,
1134                config.soft_chroma_mapping,
1135                config.soft_mapping_sigma,
1136            )
1137        };
1138
1139        match chroma_call {
1140            Ok((mut chroma_vectors, frame_energies)) => {
1141                // Apply chroma sharpening if enabled (power > 1.0)
1142                if config.chroma_sharpening_power > 1.0 {
1143                    for chroma in &mut chroma_vectors {
1144                        *chroma = sharpen_chroma(chroma, config.chroma_sharpening_power);
1145                    }
1146                    log::debug!("Applied chroma sharpening with power {:.2}", config.chroma_sharpening_power);
1147                }
1148                
1149                // Apply temporal smoothing (optional but recommended)
1150                if chroma_vectors.len() > 5 {
1151                    chroma_vectors = smooth_chroma(&chroma_vectors, 5);
1152                }
1153
1154                // Optional edge trimming: remove intro/outro frames (often beat-only / percussive),
1155                // focusing key detection on the more harmonically informative middle section.
1156                let (chroma_slice, energy_slice): (&[Vec<f32>], &[f32]) = if config.enable_key_edge_trim
1157                    && chroma_vectors.len() == frame_energies.len()
1158                    && chroma_vectors.len() >= 200
1159                {
1160                    let frac = config.key_edge_trim_fraction.clamp(0.0, 0.49);
1161                    let n = chroma_vectors.len();
1162                    let start = ((n as f32) * frac).round() as usize;
1163                    let end = ((n as f32) * (1.0 - frac)).round() as usize;
1164                    if end > start + 50 && end <= n {
1165                        (&chroma_vectors[start..end], &frame_energies[start..end])
1166                    } else {
1167                        (&chroma_vectors[..], &frame_energies[..])
1168                    }
1169                } else {
1170                    (&chroma_vectors[..], &frame_energies[..])
1171                };
1172
1173                // Build per-frame weights (optional): normalize energy by median, combine with tonalness.
1174                fn chroma_tonalness(chroma: &[f32]) -> f32 {
1175                    let sum: f32 = chroma.iter().sum();
1176                    if sum <= 1e-12 {
1177                        return 0.0;
1178                    }
1179                    let mut entropy = 0.0f32;
1180                    for &x in chroma {
1181                        let p = x / sum;
1182                        if p > 1e-12 {
1183                            entropy -= p * p.ln();
1184                        }
1185                    }
1186                    let max_entropy = (12.0f32).ln();
1187                    let t = 1.0 - (entropy / max_entropy);
1188                    t.clamp(0.0, 1.0)
1189                }
1190                
1191                let mut frame_weights: Option<Vec<f32>> = if config.enable_key_frame_weighting
1192                    && !energy_slice.is_empty()
1193                    && energy_slice.len() == chroma_slice.len()
1194                {
1195                    // Median energy for scale normalization.
1196                    let mut sorted = energy_slice.to_vec();
1197                    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
1198                    let median = sorted[sorted.len() / 2].max(1e-12);
1199
1200                    let mut weights = Vec::with_capacity(chroma_slice.len());
1201                    for (ch, &e) in chroma_slice.iter().zip(energy_slice.iter()) {
1202                        let tonal = chroma_tonalness(ch);
1203                        let tonal = if tonal < config.key_min_tonalness { 0.0 } else { tonal };
1204                        let e_norm = (e / median).max(0.0);
1205                        let w_t = tonal.powf(config.key_tonalness_power.max(0.0));
1206                        let w_e = e_norm.powf(config.key_energy_power.max(0.0));
1207                        weights.push((w_t * w_e).max(0.0));
1208                    }
1209                    Some(weights)
1210                } else {
1211                    None
1212                };
1213
1214                // Safety: if weighting zeroes out essentially everything, fall back to unweighted.
1215                if let Some(w) = frame_weights.as_ref() {
1216                    let sum_w: f32 = w.iter().sum();
1217                    let used = w.iter().filter(|&&x| x > 0.0).count();
1218                    if sum_w <= 1e-12 || used < 10 {
1219                        frame_weights = None;
1220                    }
1221                }
1222
1223                // Optional: ensemble key detection (combine K-K and Temperley template scores).
1224                let key_call: Result<KeyDetectionResult, AnalysisError> = if config.enable_key_ensemble {
1225                    detect_key_ensemble(
1226                        chroma_slice,
1227                        frame_weights.as_deref(),
1228                        config.key_ensemble_kk_weight,
1229                        config.key_ensemble_temperley_weight,
1230                    )
1231                // Optional: multi-scale key detection (ensemble voting across multiple time scales).
1232                } else {
1233                    // Detect key using templates (selected template set)
1234                    let templates = KeyTemplates::new_with_template_set(config.key_template_set);
1235                    
1236                    if config.enable_key_multi_scale
1237                        && !config.key_multi_scale_lengths.is_empty()
1238                        && chroma_slice.len() >= *config.key_multi_scale_lengths.iter().min().unwrap_or(&1)
1239                    {
1240                        detect_key_multi_scale(
1241                            chroma_slice,
1242                            &templates,
1243                            frame_weights.as_deref(),
1244                            &config.key_multi_scale_lengths,
1245                            config.key_multi_scale_hop.max(1),
1246                            config.key_multi_scale_min_clarity.clamp(0.0, 1.0),
1247                            if config.key_multi_scale_weights.is_empty() {
1248                                None
1249                            } else {
1250                                Some(&config.key_multi_scale_weights)
1251                            },
1252                            config.enable_key_mode_heuristic,
1253                            config.key_mode_third_ratio_margin,
1254                            if config.enable_key_mode_heuristic {
1255                                config.key_mode_flip_min_score_ratio
1256                            } else {
1257                                0.0
1258                            },
1259                            config.enable_key_minor_harmonic_bonus,
1260                            config.key_minor_leading_tone_bonus_weight,
1261                        )
1262                    // Optional: segment voting (windowed detection + clarity-weighted score accumulation).
1263                    } else if config.enable_key_segment_voting
1264                    && chroma_slice.len() >= config.key_segment_len_frames.max(1)
1265                    && config.key_segment_len_frames >= 120
1266                    && config.key_segment_hop_frames >= 1
1267                {
1268                    let seg_len = config.key_segment_len_frames.min(chroma_slice.len());
1269                    let hop = config.key_segment_hop_frames.min(seg_len).max(1);
1270                    let min_clarity = config.key_segment_min_clarity.clamp(0.0, 1.0);
1271
1272                    let mut acc_scores: Vec<(Key, f32)> = Vec::with_capacity(24);
1273                    // init score table
1274                    for k in 0..12 {
1275                        acc_scores.push((Key::Major(k as u32), 0.0));
1276                    }
1277                    for k in 0..12 {
1278                        acc_scores.push((Key::Minor(k as u32), 0.0));
1279                    }
1280
1281                    let mut used_segments = 0usize;
1282                    let mut start = 0usize;
1283                    while start + seg_len <= chroma_slice.len() {
1284                        let seg = &chroma_slice[start..start + seg_len];
1285                        let wseg = frame_weights
1286                            .as_ref()
1287                            .map(|w| &w[start..start + seg_len]);
1288                        let seg_res = if config.enable_key_mode_heuristic || config.enable_key_minor_harmonic_bonus {
1289                            detect_key_weighted_mode_heuristic(
1290                                seg,
1291                                &templates,
1292                                wseg,
1293                                config.key_mode_third_ratio_margin,
1294                                if config.enable_key_mode_heuristic {
1295                                    config.key_mode_flip_min_score_ratio
1296                                } else {
1297                                    0.0
1298                                },
1299                                config.enable_key_minor_harmonic_bonus,
1300                                config.key_minor_leading_tone_bonus_weight,
1301                            )?
1302                        } else {
1303                            detect_key_weighted(seg, &templates, wseg)?
1304                        };
1305                        let seg_clarity = compute_key_clarity(&seg_res.all_scores);
1306                        if seg_clarity >= min_clarity {
1307                            used_segments += 1;
1308                            // Add all scores, weighted by clarity
1309                            for (k, s) in seg_res.all_scores.iter() {
1310                                if let Some((_kk, dst)) = acc_scores.iter_mut().find(|(kk, _)| kk == k) {
1311                                    *dst += *s * seg_clarity;
1312                                }
1313                            }
1314                        }
1315                        start += hop;
1316                    }
1317
1318                    if used_segments == 0 {
1319                        if config.enable_key_mode_heuristic || config.enable_key_minor_harmonic_bonus {
1320                            detect_key_weighted_mode_heuristic(
1321                                chroma_slice,
1322                                &templates,
1323                                frame_weights.as_deref(),
1324                                config.key_mode_third_ratio_margin,
1325                                if config.enable_key_mode_heuristic {
1326                                    config.key_mode_flip_min_score_ratio
1327                                } else {
1328                                    0.0
1329                                },
1330                                config.enable_key_minor_harmonic_bonus,
1331                                config.key_minor_leading_tone_bonus_weight,
1332                            )
1333                        } else {
1334                            detect_key_weighted(chroma_slice, &templates, frame_weights.as_deref())
1335                        }
1336                    } else {
1337                        // Sort accumulated scores and build a KeyDetectionResult.
1338                        acc_scores.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap());
1339                        let (best_key, best_score) = acc_scores[0];
1340                        let second_score = if acc_scores.len() > 1 { acc_scores[1].1 } else { 0.0 };
1341                        let confidence = if best_score > 0.0 {
1342                            ((best_score - second_score) / best_score).max(0.0).min(1.0)
1343                        } else {
1344                            0.0
1345                        };
1346                        let top_n = 3usize.min(acc_scores.len());
1347                        let top_keys = acc_scores.iter().take(top_n).cloned().collect::<Vec<_>>();
1348                        Ok(KeyDetectionResult {
1349                            key: best_key,
1350                            confidence,
1351                            all_scores: acc_scores,
1352                            top_keys,
1353                        })
1354                    }
1355                } else if config.enable_key_mode_heuristic || config.enable_key_minor_harmonic_bonus {
1356                    detect_key_weighted_mode_heuristic(
1357                        chroma_slice,
1358                        &templates,
1359                        frame_weights.as_deref(),
1360                        config.key_mode_third_ratio_margin,
1361                        if config.enable_key_mode_heuristic {
1362                            config.key_mode_flip_min_score_ratio
1363                        } else {
1364                            0.0
1365                        },
1366                        config.enable_key_minor_harmonic_bonus,
1367                        config.key_minor_leading_tone_bonus_weight,
1368                    )
1369                } else {
1370                    detect_key_weighted(chroma_slice, &templates, frame_weights.as_deref())
1371                }
1372            };
1373
1374                match key_call {
1375                    Ok(key_result) => {
1376                        // Compute key clarity
1377                        let clarity = compute_key_clarity(&key_result.all_scores);
1378                        
1379                        log::debug!("Detected key: {:?}, confidence: {:.3}, clarity: {:.3}",
1380                                   key_result.key, key_result.confidence, clarity);
1381
1382                        // Optional debug dump to stderr (captured by validation harness)
1383                        if let Some(track_id) = config.debug_track_id {
1384                            // Weighted pitch-class summary (for diagnosing collapses)
1385                            let mut agg = vec![0.0f32; 12];
1386                            let mut used = 0usize;
1387                            for (idx, ch) in chroma_slice.iter().enumerate() {
1388                                let w = frame_weights
1389                                    .as_ref()
1390                                    .and_then(|v| v.get(idx).copied())
1391                                    .unwrap_or(1.0);
1392                                if w <= 0.0 {
1393                                    continue;
1394                                }
1395                                used += 1;
1396                                for i in 0..12 {
1397                                    agg[i] += w * ch[i];
1398                                }
1399                            }
1400                            let sum_agg: f32 = agg.iter().sum();
1401                            if sum_agg > 1e-12 {
1402                                for x in agg.iter_mut() {
1403                                    *x /= sum_agg;
1404                                }
1405                            }
1406                            let mut pcs: Vec<(usize, f32)> = agg.iter().cloned().enumerate().collect();
1407                            pcs.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
1408
1409                            eprintln!("\n=== DEBUG key (track_id={}) ===", track_id);
1410                            eprintln!(
1411                                "key={} conf={:.4} clarity={:.4} frames={} used_frames={} soft_mapping={} sigma={:.3} harmonic_mask={} mask_p={:.2} tuning={:.4} time_smooth={} margin={} edge_trim={} trim_frac={:.2}",
1412                                key_result.key.name(),
1413                                key_result.confidence,
1414                                clarity,
1415                                chroma_slice.len(),
1416                                used,
1417                                config.soft_chroma_mapping,
1418                                config.soft_mapping_sigma,
1419                                config.enable_key_harmonic_mask,
1420                                config.key_harmonic_mask_power,
1421                                tuning_offset,
1422                                config.enable_key_spectrogram_time_smoothing,
1423                                config.key_spectrogram_smooth_margin,
1424                                config.enable_key_edge_trim,
1425                                config.key_edge_trim_fraction
1426                            );
1427                            eprintln!(
1428                                "top_keys: {}",
1429                                key_result
1430                                    .top_keys
1431                                    .iter()
1432                                    .map(|(k, s)| format!("{}:{:.4}", k.name(), s))
1433                                    .collect::<Vec<_>>()
1434                                    .join(", ")
1435                            );
1436                            let note_names = ["C","C#","D","D#","E","F","F#","G","G#","A","A#","B"];
1437                            eprintln!(
1438                                "top_pitch_classes(weighted): {}",
1439                                pcs.iter()
1440                                    .take(6)
1441                                    .map(|(i, v)| format!("{}:{:.3}", note_names[*i], v))
1442                                    .collect::<Vec<_>>()
1443                                    .join(", ")
1444                            );
1445                        }
1446                        
1447                        (key_result.key, key_result.confidence, clarity)
1448                    }
1449                    Err(e) => {
1450                        log::warn!("Key detection failed: {}, using default", e);
1451                        (Key::Major(0), 0.0, 0.0)
1452                    }
1453                }
1454            }
1455            Err(e) => {
1456                log::warn!("Chroma extraction failed: {}, using default key", e);
1457                (Key::Major(0), 0.0, 0.0)
1458            }
1459        }
1460    } else {
1461        log::debug!("Skipping key detection: insufficient samples (need at least {} samples)",
1462                   config.frame_size);
1463        (Key::Major(0), 0.0, 0.0)
1464    };
1465    
1466    let processing_time_ms = start_time.elapsed().as_secs_f32() * 1000.0;
1467    
1468    // Build confidence warnings
1469    let mut confidence_warnings = Vec::new();
1470    let mut flags = Vec::new();
1471    
1472    if bpm == 0.0 {
1473        confidence_warnings.push("BPM detection failed: insufficient onsets or estimation error".to_string());
1474    }
1475    if grid_stability < 0.5 {
1476        confidence_warnings.push(format!("Low beat grid stability: {:.2} (may indicate tempo variation)", grid_stability));
1477    }
1478    if key_confidence < 0.3 {
1479        confidence_warnings.push(format!("Low key detection confidence: {:.2} (may indicate ambiguous or atonal music)", key_confidence));
1480    }
1481    if key_clarity < 0.2 {
1482        confidence_warnings.push(format!("Low key clarity: {:.2} (track may be atonal or have weak tonality)", key_clarity));
1483        flags.push(crate::analysis::result::AnalysisFlag::WeakTonality);
1484    }
1485    
1486    // Phase 1E: Build result and compute comprehensive confidence scores
1487    let result = AnalysisResult {
1488        bpm,
1489        bpm_confidence,
1490        key,
1491        key_confidence,
1492        key_clarity,
1493        beat_grid,
1494        grid_stability,
1495        metadata: AnalysisMetadata {
1496            duration_seconds: trimmed_samples.len() as f32 / sample_rate as f32,
1497            sample_rate,
1498            processing_time_ms,
1499            algorithm_version: "0.1.0-alpha".to_string(),
1500            onset_method_consensus: if energy_onsets.is_empty() { 0.0 } else { 1.0 },
1501            methods_used: vec!["energy_flux".to_string(), "chroma_extraction".to_string(), "key_detection".to_string()],
1502            flags,
1503            confidence_warnings,
1504            tempogram_candidates,
1505            tempogram_multi_res_triggered,
1506            tempogram_multi_res_used,
1507            tempogram_percussive_triggered,
1508            tempogram_percussive_used,
1509        },
1510    };
1511    
1512    // Phase 1E: Compute comprehensive confidence scores
1513    use analysis::confidence::compute_confidence;
1514    let confidence = compute_confidence(&result);
1515    log::debug!(
1516        "Analysis complete: BPM={:.2} (conf={:.3}), Key={:?} (conf={:.3}), Overall confidence={:.3}",
1517        result.bpm,
1518        confidence.bpm_confidence,
1519        result.key,
1520        confidence.key_confidence,
1521        confidence.overall_confidence
1522    );
1523    
1524    // Return result with Phase 1E confidence scoring integrated
1525    Ok(result)
1526}
1527
stratum_dsp/lib.rs

stratum_dsp/
lib.rs