Skip to main content

rlx_ir/ops/
audio_ops.rs

1// RLX — versatile ML compiler + runtime.
2// Copyright (C) 2026 Eugene Hauptmann, Nataliya Kosmyna.
3//
4// This program is free software: you can redistribute it and/or modify
5// it under the terms of the GNU General Public License as published by
6// the Free Software Foundation, version 3.
7//
8// This program is distributed in the hope that it will be useful,
9// but WITHOUT ANY WARRANTY; without even the implied warranty of
10// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11// GNU General Public License for more details.
12//
13// You should have received a copy of the GNU General Public License
14// along with this program. If not, see <https://www.gnu.org/licenses/>.
15
16//! Audio frontend graph builders.
17
18use crate::{Graph, NodeId, Op};
19
20impl Graph {
21    /// Log-mel spectrogram (Whisper-style) from RLX FFT block-layout spectrum.
22    ///
23    /// * **spectrum** — `[..., 2*n_fft]` with re plane then im plane (same as `Op::Fft` output).
24    /// * **filters** — `[n_mels, n_bins]` mel filterbank (`n_bins = n_fft/2 + 1`).
25    ///
26    /// Output: `[..., n_mels]`.
27    pub fn log_mel(&mut self, spectrum: NodeId, filters: NodeId) -> NodeId {
28        let spec_shape = self.shape(spectrum).clone();
29        let filt_shape = self.shape(filters).clone();
30        let out = crate::audio::log_mel_output_shape(&spec_shape, &filt_shape)
31            .unwrap_or_else(|e| panic!("log_mel shape error: {e}"));
32        self.push(Op::LogMel, vec![spectrum, filters], out, None)
33    }
34
35    /// VJP of [`log_mel`] w.r.t. `spectrum`.
36    pub fn log_mel_backward(&mut self, spectrum: NodeId, filters: NodeId, dy: NodeId) -> NodeId {
37        let spec_shape = self.shape(spectrum).clone();
38        self.push(
39            Op::LogMelBackward,
40            vec![spectrum, filters, dy],
41            spec_shape,
42            None,
43        )
44    }
45}