rlx_ir/ops/audio_ops.rs
1// RLX — versatile ML compiler + runtime.
2// Copyright (C) 2026 Eugene Hauptmann, Nataliya Kosmyna.
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, version 3.
7//
8// This program is distributed in the hope that it will be useful,
9// but WITHOUT ANY WARRANTY; without even the implied warranty of
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11// GNU General Public License for more details.
12//
13// You should have received a copy of the GNU General Public License
14// along with this program. If not, see <https://www.gnu.org/licenses/>.
15
16//! Audio frontend graph builders.
17
18use crate::{Graph, NodeId, Op};
19
20impl Graph {
21 /// Log-mel spectrogram (Whisper-style) from RLX FFT block-layout spectrum.
22 ///
23 /// * **spectrum** — `[..., 2*n_fft]` with re plane then im plane (same as `Op::Fft` output).
24 /// * **filters** — `[n_mels, n_bins]` mel filterbank (`n_bins = n_fft/2 + 1`).
25 ///
26 /// Output: `[..., n_mels]`.
27 pub fn log_mel(&mut self, spectrum: NodeId, filters: NodeId) -> NodeId {
28 let spec_shape = self.shape(spectrum).clone();
29 let filt_shape = self.shape(filters).clone();
30 let out = crate::audio::log_mel_output_shape(&spec_shape, &filt_shape)
31 .unwrap_or_else(|e| panic!("log_mel shape error: {e}"));
32 self.push(Op::LogMel, vec![spectrum, filters], out, None)
33 }
34
35 /// VJP of [`log_mel`] w.r.t. `spectrum`.
36 pub fn log_mel_backward(&mut self, spectrum: NodeId, filters: NodeId, dy: NodeId) -> NodeId {
37 let spec_shape = self.shape(spectrum).clone();
38 self.push(
39 Op::LogMelBackward,
40 vec![spectrum, filters, dy],
41 spec_shape,
42 None,
43 )
44 }
45}