sonogram/builder.rs
1/*
2 * Copyright (C) Simon Werner, 2022.
3 *
4 * This program is free software; you can redistribute it and/or modify
5 * it under the terms of the GNU General Public License as published by
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, see <http://www.gnu.org/licenses/>.
16 */
17
18use std::f32;
19#[cfg(feature = "png")]
20use std::path::Path;
21
22use crate::errors::SonogramError;
23use crate::window_fn;
24use crate::SpecCompute;
25
26type WindowFn = fn(usize, usize) -> f32;
27
28///
29/// A builder struct that will output a spectrogram creator when complete.
30/// This builder will require the height and width of the final spectrogram,
31/// at a minimum. However you can load data from a .wav file, or directly
32/// from a Vec<i16> memory object.
33///
34/// # Example
35///
36/// ```Rust
37/// let mut spectrograph = SpecOptionsBuilder::new(512, 128)
38/// .set_window_fn(utility::blackman_harris)
39/// .load_data_from_file(&std::path::Path::new("test.wav"))?
40/// .build();
41/// ```
42///
43pub struct SpecOptionsBuilder {
44 // Inputs
45 data: Vec<f32>, // Our time-domain data (audio samples)
46 sample_rate: u32, // The sample rate of the wav data
47 channel: u16, // The audio channel
48 scale_factor: Option<f32>, // How much to scale the sample amplitude by
49 do_normalise: bool, // Normalise the samples to between -1.0...1.0
50 downsample_divisor: Option<usize>, // Downsample the samples by a given amount
51
52 // FFT info
53 num_bins: usize, // The number of FFT bins
54 step_size: usize, // How far to step between each window function
55 window_fn: WindowFn, // The windowing function to use.
56}
57
58impl SpecOptionsBuilder {
59 /// Create a new SpecOptionsBuilder. The final height and width of
60 /// the spectrogram must be supplied. Before the `build` function
61 /// can be called a `load_data_from_*` function needs to be called.
62 ///
63 /// # Arguments
64 ///
65 /// * `num_bins` - Number of bins in the discrete fourier transform (FFT)
66 ///
67 pub fn new(num_bins: usize) -> Self {
68 SpecOptionsBuilder {
69 data: vec![],
70 sample_rate: 11025,
71 channel: 1,
72 scale_factor: None,
73 do_normalise: false,
74 downsample_divisor: None,
75 num_bins,
76 window_fn: window_fn::rectangular,
77 step_size: num_bins,
78 }
79 }
80
81 /// Load a .wav file to memory and use that file as the input.
82 ///
83 /// # Arguments
84 ///
85 /// * `fname` - The path to the file.
86 ///
87 #[cfg(feature = "hound")]
88 pub fn load_data_from_file(self, fname: &Path) -> Result<Self, SonogramError> {
89 let mut reader = hound::WavReader::open(fname)?;
90
91 // Can only handle 16 bit data
92 // TODO: Add more data here
93 if 16 != reader.spec().bits_per_sample {
94 return Err(SonogramError::InvalidCodec);
95 }
96
97 if self.channel > reader.spec().channels {
98 return Err(SonogramError::InvalidChannel);
99 }
100
101 let data: Vec<i16> = {
102 let first_sample = self.channel as usize - 1;
103 let step_size = reader.spec().channels as usize;
104 let mut s = reader.samples();
105
106 // TODO: replace this with .advanced_by in the future
107 for _ in 0..first_sample {
108 s.next();
109 }
110
111 s.step_by(step_size).map(|x| x.unwrap()).collect()
112 };
113 let sample_rate = reader.spec().sample_rate;
114
115 Ok(self.load_data_from_memory(data, sample_rate))
116 }
117
118 /// Load data directly from memory - i16 version.
119 ///
120 /// # Arguments
121 ///
122 /// * `data` - The raw wavform data that will be converted to a spectrogram.
123 /// * `sample_rate` - The sample rate, in Hz, of the data.
124 ///
125 pub fn load_data_from_memory(mut self, data: Vec<i16>, sample_rate: u32) -> Self {
126 self.data = data.iter().map(|&x| x as f32 / (i16::MAX as f32)).collect();
127 self.sample_rate = sample_rate;
128 self
129 }
130
131 /// Load data directly from memory - f32 version.
132 ///
133 /// # Arguments
134 ///
135 /// * `data` - The raw wavform data that will be converted to a spectrogram. Samples must be in the range -1.0 to 1.0.
136 /// * `sample_rate` - The sample rate, in Hz, of the data.
137 ///
138 pub fn load_data_from_memory_f32(mut self, data: Vec<f32>, sample_rate: u32) -> Self {
139 self.data = data;
140 self.sample_rate = sample_rate;
141 self
142 }
143
144 ///
145 /// Down sample the data by the given divisor. This is a cheap way of
146 /// improving the performance of the FFT.
147 ///
148 /// # Arguments
149 ///
150 /// * `divisor` - How much to reduce the data by.
151 ///
152 pub fn downsample(mut self, divisor: usize) -> Self {
153 self.downsample_divisor = Some(divisor);
154 self
155 }
156
157 ///
158 /// Set the audio channel to use when importing a WAV file.
159 /// By default this is 1.
160 ///
161 pub fn channel(mut self, channel: u16) -> Self {
162 self.channel = channel;
163 self
164 }
165
166 ///
167 /// Normalise all the sample values to range from -1.0 to 1.0.
168 ///
169 pub fn normalise(mut self) -> Self {
170 self.do_normalise = true;
171 self
172 }
173
174 ///
175 /// Scale the sample data by the given amount.
176 ///
177 pub fn scale(mut self, scale_factor: f32) -> Self {
178 self.scale_factor = Some(scale_factor);
179 self
180 }
181
182 /// A window function describes the type of window to use during the
183 /// DFT (discrete fourier transform). See
184 /// (here)[https://en.wikipedia.org/wiki/Window_function] for more details.
185 ///
186 /// # Arguments
187 ///
188 /// * `window` - The window function to be used.
189 ///
190 pub fn set_window_fn(mut self, window_fn: WindowFn) -> Self {
191 self.window_fn = window_fn;
192 self
193 }
194
195 ///
196 /// This is the step size (as the number of samples) between each
197 /// application of the window function. A smaller step size may
198 /// increase the smoothness of the sample, but take more time. The default
199 /// step size, if not set, is the same as the number of FFT bins. This
200 /// there is no overlap between windows and it most cases will suit your
201 /// needs.
202 ///
203 pub fn set_step_size(mut self, step_size: usize) -> Self {
204 self.step_size = step_size;
205 self
206 }
207
208 ///
209 /// The final method to be called. This will create an instance of
210 /// [Spectrograph].
211 ///
212 pub fn build(mut self) -> Result<SpecCompute, SonogramError> {
213 if self.data.is_empty() {
214 // SpecOptionsBuilder requires data to be loaded
215 return Err(SonogramError::IncompleteData);
216 }
217
218 if self.channel == 0 {
219 // The channel must be an integer 1 or greater
220 return Err(SonogramError::InvalidChannel);
221 }
222
223 //
224 // Do downsample
225 //
226
227 if let Some(divisor) = self.downsample_divisor {
228 if divisor == 0 {
229 return Err(SonogramError::InvalidDivisor);
230 }
231
232 if divisor > 1 {
233 for (j, i) in (0..self.data.len() - divisor).step_by(divisor).enumerate() {
234 let sum: f32 = self.data[i..i + divisor].iter().fold(0.0, |mut sum, &val| {
235 sum += val;
236 sum
237 });
238 let avg = sum / (divisor as f32);
239
240 self.data[j] = avg;
241 }
242 self.data.resize(self.data.len() / divisor, 0.0);
243 self.sample_rate /= divisor as u32;
244 }
245 }
246
247 //
248 // Normalise
249 //
250
251 if self.do_normalise {
252 let max = self
253 .data
254 .iter()
255 .reduce(|max, x| if x > max { x } else { max })
256 .unwrap();
257
258 let norm = 1.0 / max;
259 for x in self.data.iter_mut() {
260 *x *= norm;
261 }
262 }
263
264 //
265 // Apply the scale factor
266 //
267
268 if let Some(scale_factor) = self.scale_factor {
269 for x in self.data.iter_mut() {
270 *x *= scale_factor;
271 }
272 }
273
274 Ok(SpecCompute::new(
275 self.num_bins,
276 self.step_size,
277 self.data,
278 self.window_fn,
279 ))
280 }
281}