sonogram/
builder.rs

1/*
2 * Copyright (C) Simon Werner, 2022.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18use std::f32;
19#[cfg(feature = "png")]
20use std::path::Path;
21
22use crate::errors::SonogramError;
23use crate::window_fn;
24use crate::SpecCompute;
25
26type WindowFn = fn(usize, usize) -> f32;
27
28///
29/// A builder struct that will output a spectrogram creator when complete.
30/// This builder will require the height and width of the final spectrogram,
31/// at a minimum.  However you can load data from a .wav file, or directly
32/// from a Vec<i16> memory object.
33///
34/// # Example
35///
36/// ```Rust
37///   let mut spectrograph = SpecOptionsBuilder::new(512, 128)
38///     .set_window_fn(utility::blackman_harris)
39///     .load_data_from_file(&std::path::Path::new("test.wav"))?
40///     .build();
41/// ```
42///
43pub struct SpecOptionsBuilder {
44    // Inputs
45    data: Vec<f32>,                    // Our time-domain data (audio samples)
46    sample_rate: u32,                  // The sample rate of the wav data
47    channel: u16,                      // The audio channel
48    scale_factor: Option<f32>,         // How much to scale the sample amplitude by
49    do_normalise: bool,                // Normalise the samples to between -1.0...1.0
50    downsample_divisor: Option<usize>, // Downsample the samples by a given amount
51
52    // FFT info
53    num_bins: usize,     // The number of FFT bins
54    step_size: usize,    // How far to step between each window function
55    window_fn: WindowFn, // The windowing function to use.
56}
57
58impl SpecOptionsBuilder {
59    /// Create a new SpecOptionsBuilder.  The final height and width of
60    /// the spectrogram must be supplied.  Before the `build` function
61    /// can be called a `load_data_from_*` function needs to be called.
62    ///
63    /// # Arguments
64    ///  
65    ///  * `num_bins` - Number of bins in the discrete fourier transform (FFT)
66    ///
67    pub fn new(num_bins: usize) -> Self {
68        SpecOptionsBuilder {
69            data: vec![],
70            sample_rate: 11025,
71            channel: 1,
72            scale_factor: None,
73            do_normalise: false,
74            downsample_divisor: None,
75            num_bins,
76            window_fn: window_fn::rectangular,
77            step_size: num_bins,
78        }
79    }
80
81    /// Load a .wav file to memory and use that file as the input.
82    ///
83    /// # Arguments
84    ///
85    ///  * `fname` - The path to the file.
86    ///
87    #[cfg(feature = "hound")]
88    pub fn load_data_from_file(self, fname: &Path) -> Result<Self, SonogramError> {
89        let mut reader = hound::WavReader::open(fname)?;
90
91        // Can only handle 16 bit data
92        // TODO: Add more data here
93        if 16 != reader.spec().bits_per_sample {
94            return Err(SonogramError::InvalidCodec);
95        }
96
97        if self.channel > reader.spec().channels {
98            return Err(SonogramError::InvalidChannel);
99        }
100
101        let data: Vec<i16> = {
102            let first_sample = self.channel as usize - 1;
103            let step_size = reader.spec().channels as usize;
104            let mut s = reader.samples();
105
106            // TODO: replace this with .advanced_by in the future
107            for _ in 0..first_sample {
108                s.next();
109            }
110
111            s.step_by(step_size).map(|x| x.unwrap()).collect()
112        };
113        let sample_rate = reader.spec().sample_rate;
114
115        Ok(self.load_data_from_memory(data, sample_rate))
116    }
117
118    /// Load data directly from memory - i16 version.
119    ///
120    /// # Arguments
121    ///
122    ///  * `data` - The raw wavform data that will be converted to a spectrogram.
123    ///  * `sample_rate` - The sample rate, in Hz, of the data.
124    ///
125    pub fn load_data_from_memory(mut self, data: Vec<i16>, sample_rate: u32) -> Self {
126        self.data = data.iter().map(|&x| x as f32 / (i16::MAX as f32)).collect();
127        self.sample_rate = sample_rate;
128        self
129    }
130
131    /// Load data directly from memory - f32 version.
132    ///
133    /// # Arguments
134    ///
135    ///  * `data` - The raw wavform data that will be converted to a spectrogram. Samples must be in the range -1.0 to 1.0.
136    ///  * `sample_rate` - The sample rate, in Hz, of the data.
137    ///
138    pub fn load_data_from_memory_f32(mut self, data: Vec<f32>, sample_rate: u32) -> Self {
139        self.data = data;
140        self.sample_rate = sample_rate;
141        self
142    }
143
144    ///
145    /// Down sample the data by the given divisor.  This is a cheap way of
146    /// improving the performance of the FFT.
147    ///
148    /// # Arguments
149    ///
150    ///  * `divisor` - How much to reduce the data by.
151    ///
152    pub fn downsample(mut self, divisor: usize) -> Self {
153        self.downsample_divisor = Some(divisor);
154        self
155    }
156
157    ///
158    /// Set the audio channel to use when importing a WAV file.
159    /// By default this is 1.
160    ///
161    pub fn channel(mut self, channel: u16) -> Self {
162        self.channel = channel;
163        self
164    }
165
166    ///
167    /// Normalise all the sample values to range from -1.0 to 1.0.
168    ///
169    pub fn normalise(mut self) -> Self {
170        self.do_normalise = true;
171        self
172    }
173
174    ///
175    /// Scale the sample data by the given amount.
176    ///
177    pub fn scale(mut self, scale_factor: f32) -> Self {
178        self.scale_factor = Some(scale_factor);
179        self
180    }
181
182    /// A window function describes the type of window to use during the
183    /// DFT (discrete fourier transform).  See
184    /// (here)[https://en.wikipedia.org/wiki/Window_function] for more details.
185    ///
186    /// # Arguments
187    ///
188    ///  * `window` - The window function to be used.
189    ///
190    pub fn set_window_fn(mut self, window_fn: WindowFn) -> Self {
191        self.window_fn = window_fn;
192        self
193    }
194
195    ///
196    /// This is the step size (as the number of samples) between each
197    /// application of the window function.  A smaller step size may
198    /// increase the smoothness of the sample, but take more time.  The default
199    /// step size, if not set, is the same as the number of FFT bins.  This
200    /// there is no overlap between windows and it most cases will suit your
201    /// needs.
202    ///
203    pub fn set_step_size(mut self, step_size: usize) -> Self {
204        self.step_size = step_size;
205        self
206    }
207
208    ///
209    /// The final method to be called.  This will create an instance of
210    /// [Spectrograph].
211    ///
212    pub fn build(mut self) -> Result<SpecCompute, SonogramError> {
213        if self.data.is_empty() {
214            // SpecOptionsBuilder requires data to be loaded
215            return Err(SonogramError::IncompleteData);
216        }
217
218        if self.channel == 0 {
219            // The channel must be an integer 1 or greater
220            return Err(SonogramError::InvalidChannel);
221        }
222
223        //
224        // Do downsample
225        //
226
227        if let Some(divisor) = self.downsample_divisor {
228            if divisor == 0 {
229                return Err(SonogramError::InvalidDivisor);
230            }
231
232            if divisor > 1 {
233                for (j, i) in (0..self.data.len() - divisor).step_by(divisor).enumerate() {
234                    let sum: f32 = self.data[i..i + divisor].iter().fold(0.0, |mut sum, &val| {
235                        sum += val;
236                        sum
237                    });
238                    let avg = sum / (divisor as f32);
239
240                    self.data[j] = avg;
241                }
242                self.data.resize(self.data.len() / divisor, 0.0);
243                self.sample_rate /= divisor as u32;
244            }
245        }
246
247        //
248        // Normalise
249        //
250
251        if self.do_normalise {
252            let max = self
253                .data
254                .iter()
255                .reduce(|max, x| if x > max { x } else { max })
256                .unwrap();
257
258            let norm = 1.0 / max;
259            for x in self.data.iter_mut() {
260                *x *= norm;
261            }
262        }
263
264        //
265        // Apply the scale factor
266        //
267
268        if let Some(scale_factor) = self.scale_factor {
269            for x in self.data.iter_mut() {
270                *x *= scale_factor;
271            }
272        }
273
274        Ok(SpecCompute::new(
275            self.num_bins,
276            self.step_size,
277            self.data,
278            self.window_fn,
279        ))
280    }
281}