Struct AnalysisConfig

Source

pub struct AnalysisConfig {Show 124 fields
    pub min_amplitude_db: f32,
    pub normalization: NormalizationMethod,
    pub enable_normalization: bool,
    pub enable_silence_trimming: bool,
    pub enable_onset_consensus: bool,
    pub onset_threshold_percentile: f32,
    pub onset_consensus_tolerance_ms: u32,
    pub onset_consensus_weights: [f32; 4],
    pub enable_hpss_onsets: bool,
    pub hpss_margin: usize,
    pub force_legacy_bpm: bool,
    pub enable_bpm_fusion: bool,
    pub enable_legacy_bpm_guardrails: bool,
    pub enable_tempogram_multi_resolution: bool,
    pub tempogram_multi_res_top_k: usize,
    pub tempogram_multi_res_w512: f32,
    pub tempogram_multi_res_w256: f32,
    pub tempogram_multi_res_w1024: f32,
    pub tempogram_multi_res_structural_discount: f32,
    pub tempogram_multi_res_double_time_512_factor: f32,
    pub tempogram_multi_res_margin_threshold: f32,
    pub tempogram_multi_res_use_human_prior: bool,
    pub enable_tempogram_percussive_fallback: bool,
    pub enable_tempogram_band_fusion: bool,
    pub tempogram_band_low_max_hz: f32,
    pub tempogram_band_mid_max_hz: f32,
    pub tempogram_band_high_max_hz: f32,
    pub tempogram_band_w_full: f32,
    pub tempogram_band_w_low: f32,
    pub tempogram_band_w_mid: f32,
    pub tempogram_band_w_high: f32,
    pub tempogram_band_seed_only: bool,
    pub tempogram_band_support_threshold: f32,
    pub tempogram_band_consensus_bonus: f32,
    pub tempogram_novelty_w_spectral: f32,
    pub tempogram_novelty_w_energy: f32,
    pub tempogram_novelty_w_hfc: f32,
    pub tempogram_novelty_local_mean_window: usize,
    pub tempogram_novelty_smooth_window: usize,
    pub debug_track_id: Option<u32>,
    pub debug_gt_bpm: Option<f32>,
    pub debug_top_n: usize,
    pub enable_tempogram_mel_novelty: bool,
    pub tempogram_mel_n_mels: usize,
    pub tempogram_mel_fmin_hz: f32,
    pub tempogram_mel_fmax_hz: f32,
    pub tempogram_mel_max_filter_bins: usize,
    pub tempogram_mel_weight: f32,
    pub tempogram_superflux_max_filter_bins: usize,
    pub emit_tempogram_candidates: bool,
    pub tempogram_candidates_top_n: usize,
    pub legacy_bpm_preferred_min: f32,
    pub legacy_bpm_preferred_max: f32,
    pub legacy_bpm_soft_min: f32,
    pub legacy_bpm_soft_max: f32,
    pub legacy_bpm_conf_mul_preferred: f32,
    pub legacy_bpm_conf_mul_soft: f32,
    pub legacy_bpm_conf_mul_extreme: f32,
    pub min_bpm: f32,
    pub max_bpm: f32,
    pub bpm_resolution: f32,
    pub frame_size: usize,
    pub hop_size: usize,
    pub center_frequency: f32,
    pub soft_chroma_mapping: bool,
    pub soft_mapping_sigma: f32,
    pub chroma_sharpening_power: f32,
    pub enable_key_spectrogram_time_smoothing: bool,
    pub key_spectrogram_smooth_margin: usize,
    pub enable_key_frame_weighting: bool,
    pub key_min_tonalness: f32,
    pub key_tonalness_power: f32,
    pub key_energy_power: f32,
    pub enable_key_harmonic_mask: bool,
    pub key_harmonic_mask_power: f32,
    pub enable_key_hpss_harmonic: bool,
    pub key_hpss_frame_step: usize,
    pub key_hpss_time_margin: usize,
    pub key_hpss_freq_margin: usize,
    pub key_hpss_mask_power: f32,
    pub enable_key_stft_override: bool,
    pub key_stft_frame_size: usize,
    pub key_stft_hop_size: usize,
    pub enable_key_log_frequency: bool,
    pub enable_key_beat_synchronous: bool,
    pub enable_key_multi_scale: bool,
    pub key_template_set: TemplateSet,
    pub enable_key_ensemble: bool,
    pub key_ensemble_kk_weight: f32,
    pub key_ensemble_temperley_weight: f32,
    pub enable_key_median: bool,
    pub key_median_segment_length_frames: usize,
    pub key_median_segment_hop_frames: usize,
    pub key_median_min_segments: usize,
    pub key_multi_scale_lengths: Vec<usize>,
    pub key_multi_scale_hop: usize,
    pub key_multi_scale_min_clarity: f32,
    pub key_multi_scale_weights: Vec<f32>,
    pub enable_key_tuning_compensation: bool,
    pub key_tuning_max_abs_semitones: f32,
    pub key_tuning_frame_step: usize,
    pub key_tuning_peak_rel_threshold: f32,
    pub enable_key_edge_trim: bool,
    pub key_edge_trim_fraction: f32,
    pub enable_key_segment_voting: bool,
    pub key_segment_len_frames: usize,
    pub key_segment_hop_frames: usize,
    pub key_segment_min_clarity: f32,
    pub enable_key_mode_heuristic: bool,
    pub key_mode_third_ratio_margin: f32,
    pub key_mode_flip_min_score_ratio: f32,
    pub enable_key_hpcp: bool,
    pub key_hpcp_peaks_per_frame: usize,
    pub key_hpcp_num_harmonics: usize,
    pub key_hpcp_harmonic_decay: f32,
    pub key_hpcp_mag_power: f32,
    pub enable_key_hpcp_whitening: bool,
    pub key_hpcp_whitening_smooth_bins: usize,
    pub enable_key_hpcp_bass_blend: bool,
    pub key_hpcp_bass_fmin_hz: f32,
    pub key_hpcp_bass_fmax_hz: f32,
    pub key_hpcp_bass_weight: f32,
    pub enable_key_minor_harmonic_bonus: bool,
    pub key_minor_leading_tone_bonus_weight: f32,
}

Expand description

Analysis configuration parameters

Fields§

§min_amplitude_db: f32

Silence detection threshold in dB (default: -40.0) Frames with RMS below this threshold are considered silent

§normalization: NormalizationMethod

Normalization method to use (default: Peak)

§enable_normalization: bool

Enable normalization step (default: true)

§enable_silence_trimming: bool

Enable silence detection + trimming step (default: true)

§enable_onset_consensus: bool

Enable multi-detector onset consensus (spectral flux + HFC + optional HPSS) (default: true)

Note: Tempogram BPM does not use this onset list, but legacy BPM + beat tracking do.

§onset_threshold_percentile: f32

Threshold percentile for STFT-based onset detectors (spectral flux / HFC / HPSS) (default: 0.80) Range: [0.0, 1.0]

§onset_consensus_tolerance_ms: u32

Onset clustering tolerance window in milliseconds for consensus voting (default: 50 ms)

§onset_consensus_weights: [f32; 4]

Consensus method weights [energy_flux, spectral_flux, hfc, hpss] (default: equal weights)

§enable_hpss_onsets: bool

Enable HPSS-based onset detector inside consensus (default: false; more expensive)

§hpss_margin: usize

HPSS median-filter margin (default: 10). Typical values: 5–20.

§force_legacy_bpm: bool

Force legacy BPM estimation (Phase 1B autocorrelation + comb filter) and skip tempogram. Default: false.

Intended for A/B validation and hybrid/consensus experimentation.

§enable_bpm_fusion: bool

Enable BPM fusion (compute tempogram + legacy in parallel, then choose using consensus logic). Default: false (tempogram-only unless it fails, then legacy fallback).

§enable_legacy_bpm_guardrails: bool

Enable legacy BPM guardrails (soft confidence caps by tempo range). Default: true.

§enable_tempogram_multi_resolution: bool

Enable true multi-resolution tempogram BPM estimation.

When enabled, BPM estimation recomputes STFT at hop sizes {256, 512, 1024} and fuses candidates using a cross-resolution scoring rule. This is intended to reduce metrical-level (T vs 2T vs T/2) errors.

Default: true (Phase 1F tuning path).

§tempogram_multi_res_top_k: usize

Multi-resolution fusion: number of hop=512 candidates to consider as anchors. Default: 10.

§tempogram_multi_res_w512: f32

Multi-resolution fusion weight for hop=512 (global beat).

§tempogram_multi_res_w256: f32

Multi-resolution fusion weight for hop=256 (fine transients).

§tempogram_multi_res_w1024: f32

Multi-resolution fusion weight for hop=1024 (structural/metre level).

§tempogram_multi_res_structural_discount: f32

Structural discount factor applied when hop=1024 supports 2T instead of T.

§tempogram_multi_res_double_time_512_factor: f32

Factor applied to hop=512 support when evaluating the 2T / T/2 hypotheses.

§tempogram_multi_res_margin_threshold: f32

Minimum score margin (absolute) required to switch between T / 2T / T/2 hypotheses.

§tempogram_multi_res_use_human_prior: bool

Enable a gentle human-tempo prior as a tie-breaker (only when scores are very close). Default: false.

§enable_tempogram_percussive_fallback: bool

Enable HPSS percussive-only tempogram fallback (ambiguous-only).

This computes an HPSS decomposition on the (already computed) STFT magnitudes and re-runs tempogram on the percussive component. Intended to reduce low-tempo half/double-time traps caused by sustained harmonic energy.

Default: true (Phase 1F tuning path).

§enable_tempogram_band_fusion: bool

Enable multi-band novelty fusion inside the tempogram estimator.

This computes novelty curves over low/mid/high frequency bands, runs the tempogram on each, then fuses their support when scoring BPM candidates. This is primarily intended to improve candidate generation (getting GT into top-N candidates), which is currently the limiting factor after metrical selection improvements.

Default: true (Phase 1F tuning path).

§tempogram_band_low_max_hz: f32

Band split cutoffs (Hz). Bands are: low=[~0..low_max], mid=[low_max..mid_max], high=[mid_max..high_max]. If tempogram_band_high_max_hz <= 0, high extends to Nyquist.

§tempogram_band_mid_max_hz: f32

Upper cutoff for the mid band (Hz).

§tempogram_band_high_max_hz: f32

Upper cutoff for the high band (Hz). If <= 0, uses Nyquist.

§tempogram_band_w_full: f32

Weight for the full-band tempogram contribution when band-score fusion is enabled.

§tempogram_band_w_low: f32

Weight for the low band contribution.

§tempogram_band_w_mid: f32

Weight for the mid band contribution.

§tempogram_band_w_high: f32

Weight for the high band contribution.

§tempogram_band_seed_only: bool

If true, multi-band tempograms contribute only to candidate seeding (peak proposals), while final candidate scoring remains full-band-only.

This is the safer default: high-frequency bands often emphasize subdivisions (hi-hats), which can otherwise increase 2× / 3:2 metrical errors if they directly affect scoring.

§tempogram_band_support_threshold: f32

Minimum per-band normalized support required to count as “supporting” a BPM candidate for band-consensus scoring.

Range: [0, 1]. Default: 0.25.

§tempogram_band_consensus_bonus: f32

Bonus multiplier applied when multiple bands support the same BPM candidate.

This is a lightweight “consensus” heuristic intended to reduce metrical/subdivision errors (e.g., a 2× tempo supported only by the high band should not win over a tempo supported by low+mid bands).

Score adjustment: score *= (1 + bonus * max(0, support_bands - 1)).

§tempogram_novelty_w_spectral: f32

Tempogram novelty weights for combining {spectral, energy, HFC}.

§tempogram_novelty_w_energy: f32

Tempogram novelty weight for energy flux.

§tempogram_novelty_w_hfc: f32

Tempogram novelty weight for HFC.

§tempogram_novelty_local_mean_window: usize

Tempogram novelty conditioning windows.

§tempogram_novelty_smooth_window: usize

Tempogram novelty moving-average smoothing window (frames). Use 0/1 to disable.

§debug_track_id: Option<u32>

Debug: if set, the analyze_file example will pass this track ID through to the multi-resolution fusion so it can print detailed scoring diagnostics.

§debug_gt_bpm: Option<f32>

Debug: optional ground-truth BPM passed alongside debug_track_id.

§debug_top_n: usize

Debug: number of top candidates per hop to print when debug_track_id is set.

§enable_tempogram_mel_novelty: bool

Enable log-mel novelty tempogram as an additional candidate generator/support signal.

This computes a log-mel SuperFlux-style novelty curve, then runs the tempogram on it. The resulting candidates are used for seeding and for the consensus bonus logic.

§tempogram_mel_n_mels: usize

Mel band count used by log-mel novelty.

§tempogram_mel_fmin_hz: f32

Minimum mel frequency (Hz).

§tempogram_mel_fmax_hz: f32

Maximum mel frequency (Hz). If <= 0, uses Nyquist.

§tempogram_mel_max_filter_bins: usize

Max-filter neighborhood radius in mel bins (SuperFlux-style reference).

§tempogram_mel_weight: f32

Weight for mel variant when band scoring fusion is enabled (seed_only=false).

§tempogram_superflux_max_filter_bins: usize

SuperFlux max-filter neighborhood radius (bins) used by the tempogram novelty extractor.

§emit_tempogram_candidates: bool

Emit tempogram BPM candidate list (top-N) into AnalysisMetadata for validation/tuning.

Default: false (avoid bloating outputs in normal use).

§tempogram_candidates_top_n: usize

Number of tempogram candidates to emit when emit_tempogram_candidates is enabled. Default: 10.

§legacy_bpm_preferred_min: f32

Legacy guardrails: preferred BPM range (default: 75–150).

§legacy_bpm_preferred_max: f32

Legacy guardrails: preferred BPM range upper bound (default: 150).

§legacy_bpm_soft_min: f32

Legacy guardrails: soft BPM range (default: 60–180). Values in [soft_min, preferred_min) or (preferred_max, soft_max] get a medium cap.

§legacy_bpm_soft_max: f32

Legacy guardrails: soft BPM range upper bound (default: 180).

§legacy_bpm_conf_mul_preferred: f32

Legacy guardrails: confidence caps by range.

preferred: inside [preferred_min, preferred_max]
soft: inside [soft_min, soft_max] but outside preferred
extreme: outside [soft_min, soft_max]

Multiplier semantics: these are applied as confidence *= multiplier to legacy candidates/estimates (softly biasing the selection).

§legacy_bpm_conf_mul_soft: f32

Legacy guardrails: confidence multiplier for the soft band (default: 0.50).

§legacy_bpm_conf_mul_extreme: f32

Legacy guardrails: confidence multiplier for extremes (default: 0.10).

§min_bpm: f32

Minimum BPM to consider (default: 60.0)

§max_bpm: f32

Maximum BPM to consider (default: 180.0)

§bpm_resolution: f32

BPM resolution for comb filterbank (default: 1.0)

§frame_size: usize

Frame size for STFT (default: 2048)

§hop_size: usize

Hop size for STFT (default: 512)

§center_frequency: f32

Center frequency for chroma extraction (default: 440.0 Hz, A4)

§soft_chroma_mapping: bool

Enable soft chroma mapping (default: true) Soft mapping spreads frequency bins to neighboring semitones for robustness

§soft_mapping_sigma: f32

Soft mapping standard deviation in semitones (default: 0.5) Lower values = sharper mapping, higher values = more spread

§chroma_sharpening_power: f32

Chroma sharpening power (default: 1.0 = no sharpening, 1.5-2.0 recommended) Power > 1.0 emphasizes prominent semitones, improving key detection

§enable_key_spectrogram_time_smoothing: bool

Enable a lightweight percussive-suppression step for key detection by time-smoothing the STFT magnitude spectrogram prior to chroma extraction.

This is HPSS-inspired (harmonic content is sustained in time; percussive is transient), but uses a cheap moving-average rather than full iterative HPSS.

Default: true.

§key_spectrogram_smooth_margin: usize

Half-window size (in frames) for the key spectrogram time-smoothing. Effective window length is 2*margin + 1.

Default: 12 (≈ 12 * hop_size samples ≈ 140 ms at 44.1kHz with hop=512).

§enable_key_frame_weighting: bool

Enable weighted key aggregation (frame weights based on tonality + energy). Default: true.

§key_min_tonalness: f32

Minimum per-frame “tonalness” required to include the frame in key aggregation. Tonalness is computed from chroma entropy and mapped to [0, 1]. Default: 0.10.

§key_tonalness_power: f32

Exponent applied to tonalness when building frame weights (>= 0). Default: 2.0.

§key_energy_power: f32

Exponent applied to normalized frame energy when building frame weights (>= 0). Default: 0.50 (square-root weighting).

§enable_key_harmonic_mask: bool

Enable a harmonic-emphasized spectrogram for key detection via a time-smoothing-derived soft mask (cheap HPSS-inspired).

If enabled, key detection uses harmonic_spectrogram_time_mask() instead of raw/time-smoothed magnitudes when extracting chroma.

Default: true.

§key_harmonic_mask_power: f32

Soft-mask exponent (p) for harmonic masking (>= 1.0). Higher values produce harder masks. Default: 2.0.

§enable_key_hpss_harmonic: bool

Enable median-filter HPSS harmonic extraction for key detection (key-only).

This is a more literature-standard HPSS step than harmonic_spectrogram_time_mask(). We compute time- and frequency-median estimates on a time-downsampled, band-limited spectrogram, build a soft mask, then apply it to the full-resolution spectrogram.

Default: false (opt-in; more expensive).

§key_hpss_frame_step: usize

Time-downsampling step for key HPSS (>= 1). Values like 2–6 greatly reduce cost. Default: 4.

§key_hpss_time_margin: usize

Half-window size (in downsampled frames) for the HPSS harmonic (time) median filter. Effective window length is 2*margin + 1 (in downsampled frames). Default: 8.

§key_hpss_freq_margin: usize

Half-window size (in frequency bins) for the HPSS percussive (frequency) median filter. Effective window length is 2*margin + 1 bins. Default: 8.

§key_hpss_mask_power: f32

Soft-mask exponent (p) for HPSS masking (>= 1.0). Higher values produce harder masks. Default: 2.0.

§enable_key_stft_override: bool

Enable a key-only STFT override (compute a separate STFT for key detection).

Rationale: key detection benefits from higher frequency resolution than BPM/onset work. A larger FFT size improves pitch precision at low frequencies where semitone spacing is small.

Default: false (keep single shared STFT by default).

§key_stft_frame_size: usize

FFT frame size used for key-only STFT when enable_key_stft_override is true. Default: 8192.

§key_stft_hop_size: usize

Hop size used for key-only STFT when enable_key_stft_override is true. Default: 512.

§enable_key_log_frequency: bool

Enable log-frequency (semitone-aligned) spectrogram for key detection.

This converts the linear STFT magnitude spectrogram into a log-frequency representation where each bin corresponds to one semitone. This provides better pitch-class resolution than mapping linear FFT bins to semitones, especially at low frequencies.

When enabled, chroma extraction works directly on semitone bins (no frequency-to-semitone mapping needed). HPCP is disabled when log-frequency is enabled (HPCP requires frequency information for harmonic summation).

Default: false (use linear STFT with frequency-to-semitone mapping).

§enable_key_beat_synchronous: bool

Enable beat-synchronous chroma extraction for key detection.

This aligns chroma windows to beat boundaries instead of fixed-time frames, improving harmonic coherence by aligning to musical structure. For each beat interval, chroma vectors from all STFT frames within that interval are averaged.

Requires a valid beat grid (falls back to frame-based chroma if beat grid is unavailable). HPCP is disabled when beat-synchronous is enabled (HPCP requires frame-based processing).

Default: false (use frame-based chroma extraction).

§enable_key_multi_scale: bool

Enable multi-scale key detection (ensemble voting across multiple time scales).

This runs key detection at multiple segment lengths (short, medium, long) and aggregates results using clarity-weighted voting. This captures both local and global key information, improving robustness on tracks with key changes or varying harmonic stability.

Default: false (use single-scale detection).

§key_template_set: TemplateSet

Template set to use for key detection.

KrumhanslKessler: Krumhansl-Kessler (1982) templates (empirical, from listening experiments)
Temperley: Temperley (1999) templates (statistical, from corpus analysis)

Default: KrumhanslKessler.

§enable_key_ensemble: bool

Enable ensemble key detection (combine K-K and Temperley template scores).

This runs key detection with both template sets and combines their scores using weighted voting. This ensemble approach can improve robustness by leveraging complementary strengths of different template sets.

Default: false (use single template set).

§key_ensemble_kk_weight: f32

Weight for Krumhansl-Kessler scores in ensemble detection.

Default: 0.5 (equal weight with Temperley).

§key_ensemble_temperley_weight: f32

Weight for Temperley scores in ensemble detection.

Default: 0.5 (equal weight with K-K).

§enable_key_median: bool

Enable median key detection (detect key from multiple short segments and select median).

This divides the track into multiple short overlapping segments, detects key for each segment, and selects the median key (most common key across segments). This helps handle brief modulations, breakdowns, or ambiguous sections.

Default: false (use global key detection).

§key_median_segment_length_frames: usize

Segment length (in frames) for median key detection.

Default: 480 (~4 seconds at typical frame rates).

§key_median_segment_hop_frames: usize

Segment hop size (in frames) for median key detection.

Default: 120 (~1 second).

§key_median_min_segments: usize

Minimum number of segments required for median key detection.

If fewer segments are available, falls back to global detection.

Default: 3.

§key_multi_scale_lengths: Vec<usize>

Segment lengths (in frames) for multi-scale key detection. Multiple scales are processed and aggregated with clarity-weighted voting. Default: [120, 360, 720] (approximately 2s, 6s, 12s at typical frame rates).

§key_multi_scale_hop: usize

Hop size (in frames) between segments for multi-scale detection. Default: 60 (approximately 1s at typical frame rates).

§key_multi_scale_min_clarity: f32

Minimum clarity threshold for including a segment in multi-scale aggregation. Default: 0.20.

§key_multi_scale_weights: Vec<f32>

Optional weights for each scale in multi-scale detection (if empty, all scales weighted equally). Length should match key_multi_scale_lengths. Default: empty (equal weights).

§enable_key_tuning_compensation: bool

Enable per-track tuning compensation for key detection.

This estimates a global detuning offset (in semitones, relative to A4=440Hz) from the key spectrogram, then shifts semitone mapping by that offset during chroma extraction.

Default: true.

§key_tuning_max_abs_semitones: f32

Maximum absolute tuning correction to apply (semitones). Default: 0.25.

§key_tuning_frame_step: usize

Frame subsampling step used for tuning estimation (>= 1). Default: 20.

§key_tuning_peak_rel_threshold: f32

Relative threshold (fraction of per-frame peak) for selecting bins used in tuning estimation. Default: 0.35.

§enable_key_edge_trim: bool

Enable trimming the first/last fraction of frames for key detection.

DJ tracks often have long beat-only intros/outros; trimming edges reduces percussive bias without affecting tempo (tempo uses its own pipeline).

Default: true.

§key_edge_trim_fraction: f32

Fraction (0..0.49) to trim from the start and end (symmetric) when enable_key_edge_trim is true. Default: 0.15 (use middle 70%).

§enable_key_segment_voting: bool

Enable segment voting for key detection (windowed key detection + score accumulation).

Rationale: long-form DJ tracks can modulate, have breakdowns, or contain beat-only sections. Segment voting helps focus on harmonically stable portions without requiring full key-change tracking.

Default: true.

§key_segment_len_frames: usize

Segment length in chroma frames for key voting. Default: 1024 (~11.9s at 44.1kHz, hop=512).

§key_segment_hop_frames: usize

Segment hop/stride in frames for key voting. Default: 512 (~50% overlap).

§key_segment_min_clarity: f32

Minimum clarity required to include a segment in voting (0..1). Default: 0.20.

§enable_key_mode_heuristic: bool

Enable a conservative mode heuristic to reduce minor→major mistakes.

Uses the 3rd degree (minor third vs major third) from the aggregated chroma to potentially flip parallel mode, gated by a score-ratio threshold.

Default: true.

§key_mode_third_ratio_margin: f32

Required ratio margin for the 3rd-degree test (>=0). If p(min3) > p(maj3) * (1+margin) we prefer minor (and vice versa for major). Default: 0.05.

§key_mode_flip_min_score_ratio: f32

Only flip parallel mode if the alternate mode’s template score is at least this ratio of the best mode’s score (0..1). Default: 0.92.

§enable_key_hpcp: bool

Enable HPCP-style pitch-class profile extraction for key detection.

This uses spectral peak picking + harmonic summation to form a more robust tonal profile than raw STFT-bin chroma on real-world mixes.

Default: false (experimental).

§key_hpcp_peaks_per_frame: usize

Number of spectral peaks per frame used for HPCP extraction. Default: 24.

§key_hpcp_num_harmonics: usize

Number of harmonics per peak used for HPCP extraction. Default: 4.

§key_hpcp_harmonic_decay: f32

Harmonic decay factor applied per harmonic (0..1). Lower values emphasize fundamentals. Default: 0.60.

§key_hpcp_mag_power: f32

Magnitude compression exponent for peak weights (0..1]. Default: 0.50 (sqrt).

§enable_key_hpcp_whitening: bool

Enable spectral whitening (per-frame frequency-domain normalization) for HPCP peak picking.

This suppresses timbral formants and broadband coloration, helping peaks corresponding to harmonic partials stand out more consistently across mixes.

Default: false.

§key_hpcp_whitening_smooth_bins: usize

Frequency smoothing window (in FFT bins) for HPCP whitening. Larger values whiten more aggressively (more timbre suppression), but can also amplify noise.

Default: 31.

§enable_key_hpcp_bass_blend: bool

Enable a bass-band HPCP blend (tonic reinforcement).

Relative major/minor share pitch classes; bass/tonic emphasis can disambiguate mode in dance music where the bassline strongly implies the tonic.

Default: true.

§key_hpcp_bass_fmin_hz: f32

Bass-band lower cutoff (Hz) for bass HPCP. Default: 55.0.

§key_hpcp_bass_fmax_hz: f32

Bass-band upper cutoff (Hz) for bass HPCP. Default: 300.0.

§key_hpcp_bass_weight: f32

Blend weight for bass HPCP (0..1). Final PCP = normalize((1-w)full + wbass). Default: 0.35.

§enable_key_minor_harmonic_bonus: bool

Enable a minor-key harmonic bonus (leading-tone vs flat-7) when scoring templates.

Many dance tracks in minor heavily use harmonic minor gestures (raised 7th). This bonus nudges minor candidates whose pitch-class distribution supports a leading-tone.

Default: true.

§key_minor_leading_tone_bonus_weight: f32

Weight for the minor harmonic bonus. Internally scaled by the sum of frame weights so it is comparable to the template-score scale.

Default: 0.8.