stabilizer_stream/
psd.rs

1use idsp::hbf::{Filter, HbfDecCascade};
2use rustfft::{num_complex::Complex, Fft, FftPlanner};
3use std::sync::Arc;
4
5/// Window kernel
6///
7/// <https://holometer.fnal.gov/GH_FFT.pdf>
8/// <https://gist.github.com/endolith/c4b8e1e3c630a260424123b4e9d964c4>
9/// <https://docs.google.com/spreadsheets/d/1glvo-y1tqCiYwK0QQWhB4AAcDFiK_C_0M4SeA0Uyqdc/edit>
10#[derive(Copy, Clone, Debug, PartialEq)]
11pub struct Window<const N: usize> {
12    pub win: [f32; N],
13    /// Mean squared
14    pub power: f32,
15    /// Normalized effective noise bandwidth (in bins)
16    pub nenbw: f32,
17    /// Optimal overlap
18    pub overlap: usize,
19}
20
21impl<const N: usize> Window<N> {
22    /// Rectangular window
23    pub fn rectangular() -> Self {
24        assert!(N > 0);
25        Self {
26            win: [1.0; N],
27            power: 1.0,
28            nenbw: 1.0,
29            overlap: 0,
30        }
31    }
32
33    /// Hann window
34    ///
35    /// This is the "numerical" version of the window with period `N`, `win[0] = win[N]`
36    /// (conceptually), specifically `win[0] != win[win.len() - 1]`.
37    /// Matplotlib's `matplotlib.mlab.window_hanning()` (but not scipy.signal.get_window())
38    /// uses the symetric one of period `N-1`, with `win[0] = win[N - 1] = 0`
39    /// which looses a lot of useful properties (exact nenbw() and power() independent of `N`,
40    /// exact optimal overlap etc)
41    pub fn hann() -> Self {
42        assert!(N > 0);
43        let df = core::f32::consts::PI / N as f32;
44        let mut win = [0.0; N];
45        for (i, w) in win.iter_mut().enumerate() {
46            *w = (df * i as f32).sin().powi(2);
47        }
48        Self {
49            win,
50            power: 0.25,
51            nenbw: 1.5,
52            overlap: N / 2,
53        }
54    }
55}
56
57/// Detrend method
58#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord, clap::ValueEnum)]
59pub enum Detrend {
60    /// No detrending
61    #[default]
62    None,
63    /// Subtract the midpoint of each segment
64    Midpoint,
65    /// Remove linear interpolation between first and last item for each segment
66    Span,
67    /// Remove the mean of the segment
68    Mean,
69    /// Remove the linear regression of each segment
70    Linear,
71}
72
73impl core::fmt::Display for Detrend {
74    fn fmt(&self, f: &mut core::fmt::Formatter) -> core::fmt::Result {
75        core::fmt::Debug::fmt(self, f)
76    }
77}
78
79impl Detrend {
80    pub fn apply<const N: usize>(&self, x: &[f32; N], win: &Window<N>) -> [Complex<f32>; N] {
81        // apply detrending, window, make complex
82        let mut c = [Complex::default(); N];
83
84        match self {
85            Detrend::None => {
86                for ((c, x), w) in c.iter_mut().zip(x.iter()).zip(win.win.iter()) {
87                    c.re = x * w;
88                    c.im = 0.0;
89                }
90            }
91            Detrend::Midpoint => {
92                let offset = x[N / 2];
93                for ((c, x), w) in c.iter_mut().zip(x.iter()).zip(win.win.iter()) {
94                    c.re = (x - offset) * w;
95                    c.im = 0.0;
96                }
97            }
98            Detrend::Span => {
99                let mut offset = x[0];
100                let slope = (x[N - 1] - x[0]) / (N - 1) as f32;
101                for ((c, x), w) in c.iter_mut().zip(x.iter()).zip(win.win.iter()) {
102                    c.re = (x - offset) * w;
103                    c.im = 0.0;
104                    offset += slope;
105                }
106            }
107            Detrend::Mean => {
108                let offset = x.iter().sum::<f32>() / N as f32;
109                for ((c, x), w) in c.iter_mut().zip(x.iter()).zip(win.win.iter()) {
110                    c.re = (x - offset) * w;
111                    c.im = 0.0;
112                }
113            }
114            Detrend::Linear => unimplemented!(),
115        };
116        c
117    }
118}
119
120/// Power spectral density accumulator and decimator
121///
122/// One stage in [PsdCascade].
123#[derive(Clone)]
124pub struct Psd<const N: usize> {
125    hbf: HbfDecCascade,
126    buf: [f32; N],
127    idx: usize,
128    spectrum: [f32; N], // using only the positive half N/2 + 1
129    count: u32,
130    drain: usize,
131    fft: Arc<dyn Fft<f32>>,
132    win: Arc<Window<N>>,
133    detrend: Detrend,
134    avg: u32,
135}
136
137impl<const N: usize> Psd<N> {
138    pub fn new(fft: Arc<dyn Fft<f32>>, win: Arc<Window<N>>) -> Self {
139        let hbf = HbfDecCascade::default();
140        assert_eq!(N, fft.len());
141        // check fft and decimation block size compatibility
142        assert!(N >= 2); // Nyquist and DC distinction
143        let mut s = Self {
144            hbf,
145            buf: [0.0; N],
146            idx: 0,
147            spectrum: [0.0; N],
148            count: 0,
149            fft,
150            win,
151            detrend: Detrend::default(),
152            drain: 0,
153            avg: u32::MAX,
154        };
155        s.set_stage_depth(0);
156        s
157    }
158
159    pub fn set_avg(&mut self, avg: u32) {
160        self.avg = avg;
161    }
162
163    pub fn set_detrend(&mut self, d: Detrend) {
164        self.detrend = d;
165    }
166
167    pub fn set_stage_depth(&mut self, n: usize) {
168        self.hbf.set_depth(n);
169        self.drain = self.hbf.response_length() as _;
170    }
171}
172
173pub trait PsdStage {
174    /// Process items
175    ///
176    /// Unused items are buffered.
177    /// Full FFT blocks are processed.
178    /// Overlap is kept.
179    /// Decimation is performed on fully processed input items.
180    ///
181    /// Note: When feeding more than ~N*1e6 items expect loss of accuracy
182    /// due to rounding errors on accumulation.
183    ///
184    /// Note: Also be aware of the usual accuracy limitation of the item data type
185    ///
186    /// # Args
187    /// * `x`: input items
188    /// * `y`: output items
189    ///
190    /// # Returns
191    /// number if items written to `y`
192    fn process<'a>(&mut self, x: &[f32], y: &'a mut [f32]) -> &'a mut [f32];
193    /// Return the positive frequency half of the spectrum
194    fn spectrum(&self) -> &[f32];
195    /// PSD normalization factor
196    ///
197    /// one-sided
198    fn gain(&self) -> f32;
199    /// Number of averages
200    fn count(&self) -> u32;
201    /// Currently buffered input items
202    fn buf(&self) -> &[f32];
203}
204
205impl<const N: usize> PsdStage for Psd<N> {
206    fn process<'a>(&mut self, mut x: &[f32], y: &'a mut [f32]) -> &'a mut [f32] {
207        let mut n = 0;
208        // TODO: this could be made faster with less copying for internal segments of x
209        while !x.is_empty() {
210            // load
211            let take = x.len().min(self.buf.len() - self.idx);
212            let chunk;
213            (chunk, x) = x.split_at(take);
214            self.buf[self.idx..][..take].copy_from_slice(chunk);
215            self.idx += take;
216            if self.idx < N {
217                break;
218            }
219
220            // detrend and window
221            let mut c = self.detrend.apply(&self.buf, &self.win);
222            // fft in-place
223            self.fft.process(&mut c);
224
225            // normalize and keep for EWMA
226            let g = if self.count > self.avg {
227                let g = self.avg as f32 / self.count as f32;
228                self.count = self.avg;
229                g
230            } else {
231                1.0
232            };
233            // convert positive frequency spectrum to power and accumulate
234            for (c, p) in c[..N / 2 + 1]
235                .iter()
236                .zip(self.spectrum[..N / 2 + 1].iter_mut())
237            {
238                *p = g * *p + c.norm_sqr();
239            }
240
241            let start = if self.count == 0 {
242                // decimate entire segment into lower half, keep overlap later
243                0
244            } else {
245                // keep overlap
246                self.buf.copy_within(N - self.win.overlap..N, 0);
247                // decimate only new items into third quarter
248                self.win.overlap
249            };
250
251            // decimate
252            let mut yi = self.hbf.process_block(None, &mut self.buf[start..]);
253            // drain decimator impulse response to initial state (zeros)
254            let skip = self.drain.min(yi.len());
255            self.drain -= skip;
256            yi = &mut yi[skip..];
257            // yield l
258            y[n..][..yi.len()].copy_from_slice(yi);
259            n += yi.len();
260
261            if self.count == 0 {
262                // keep overlap after decimating entire segment
263                self.buf.copy_within(N - self.win.overlap..N, 0);
264            }
265
266            self.count += 1;
267            self.idx = self.win.overlap;
268        }
269        &mut y[..n]
270    }
271
272    fn spectrum(&self) -> &[f32] {
273        &self.spectrum[..N / 2 + 1]
274    }
275
276    fn count(&self) -> u32 {
277        self.count
278    }
279
280    fn gain(&self) -> f32 {
281        // 2 for one-sided
282        // overlap is compensated by counting
283        (N as u32 / 2 * self.count) as f32 * self.win.nenbw * self.win.power
284    }
285
286    fn buf(&self) -> &[f32] {
287        &self.buf[..self.idx]
288    }
289}
290
291/// Stage break information
292#[derive(Copy, Clone, Debug, PartialEq, Eq, PartialOrd, Ord, Hash)]
293pub struct Break {
294    /// Start index in PSD and frequencies
295    pub start: usize,
296    /// Number of averages
297    pub count: u32,
298    /// Averaging limit
299    pub avg: u32,
300    /// Highes FFT bin (at `start`)
301    pub highest_bin: usize,
302    /// FFT size
303    pub fft_size: usize,
304    /// The decimation power of two
305    pub decimation: usize,
306    /// Unprocessed number of input samples (includes overlap)
307    pub pending: usize,
308    /// Total number of samples processed (excluding overlap, ignoring averaging)
309    pub processed: usize,
310}
311
312impl Break {
313    /// Compute PSD bin center frequencies from stage breaks.
314    pub fn frequencies(b: &[Self], opts: &MergeOpts) -> Vec<f32> {
315        let Some(bi) = b.last() else { return vec![] };
316        let mut f = Vec::with_capacity(bi.start + bi.highest_bin);
317        for bi in b.iter() {
318            if opts.remove_overlap {
319                f.truncate(bi.start);
320            }
321            let df = 1.0 / bi.effective_fft_size() as f32;
322            f.extend((0..bi.highest_bin).rev().map(|f| f as f32 * df));
323        }
324        assert_eq!(f.len(), bi.start + bi.highest_bin);
325        debug_assert_eq!(f.first(), Some(&0.5));
326        debug_assert_eq!(f.last(), Some(&0.0));
327        f
328    }
329
330    pub fn effective_fft_size(&self) -> usize {
331        self.fft_size << self.decimation
332    }
333}
334
335/// PSD segment merge options
336#[derive(Copy, Clone, Debug, PartialEq, Eq)]
337pub struct MergeOpts {
338    /// Remove low resolution bins
339    pub remove_overlap: bool,
340    /// Minimum averaging level
341    pub min_count: u32,
342}
343
344impl Default for MergeOpts {
345    fn default() -> Self {
346        Self {
347            remove_overlap: true,
348            min_count: 1,
349        }
350    }
351}
352
353/// Averaging options
354#[derive(Copy, Clone, Debug, PartialEq, Eq)]
355pub struct AvgOpts {
356    /// Scale averaging with decimation
357    pub scale: bool,
358    /// Averaging
359    pub count: u32,
360}
361
362impl Default for AvgOpts {
363    fn default() -> Self {
364        Self {
365            scale: false,
366            count: u32::MAX,
367        }
368    }
369}
370
371/// Online power spectral density estimation
372///
373/// This performs efficient long term power spectral density monitoring in real time.
374/// The idea is to perform FFTs over relatively short windows and simultaneously decimate
375/// the time domain data, everything in multiple stages, then
376/// stitch together the FFT bins from the different stages.
377/// This allows arbitrarily large effective FFTs sizes in practice with only
378/// logarithmically increasing memory and cpu consumption. And it makes available PSD data
379/// from higher frequency stages immediately to get rid of the delay in
380/// recording and computing large FFTs. The effective full FFT size grows in real-time,
381/// is unlimited, and does not need to be fixed.
382/// This is well defined with the caveat that spur power (bin power not dominated by noise)
383/// depends on the stage-dependent bin width.
384/// This also typically what some modern signal analyzers or noise metrology instruments do.
385///
386/// See also [`csdl`](https://github.com/jordens/csdl) or
387/// [LPSD](https://doi.org/10.1016/j.measurement.2005.10.010).
388///
389/// Infinite averaging
390/// Incremental updates
391/// Automatic FFT stage extension
392#[derive(Clone)]
393pub struct PsdCascade<const N: usize> {
394    stages: Vec<Psd<N>>,
395    fft: Arc<dyn Fft<f32>>,
396    stage_depth: usize,
397    detrend: Detrend,
398    win: Arc<Window<N>>,
399    avg: AvgOpts,
400}
401
402impl<const N: usize> PsdCascade<N> {
403    /// Create a new Psd instance
404    ///
405    /// fft_size: size of the FFT blocks and the window
406    /// stage_length: number of decimation stages. rate change per stage is 1 << stage_length
407    /// detrend: [Detrend] method
408    pub fn new(stage_depth: usize) -> Self {
409        assert!(stage_depth > 0);
410        Self {
411            stages: Vec::with_capacity(4),
412            fft: FftPlanner::new().plan_fft_forward(N),
413            stage_depth,
414            detrend: Detrend::None,
415            win: Arc::new(Window::hann()),
416            avg: AvgOpts::default(),
417        }
418    }
419
420    pub fn set_avg(&mut self, avg: AvgOpts) {
421        self.avg = avg;
422        for (i, stage) in self.stages.iter_mut().enumerate() {
423            stage.set_avg(
424                self.avg.count
425                    >> if self.avg.scale {
426                        self.stage_depth * i
427                    } else {
428                        0
429                    },
430            );
431        }
432    }
433
434    pub fn set_detrend(&mut self, d: Detrend) {
435        self.detrend = d;
436        for stage in self.stages.iter_mut() {
437            stage.set_detrend(self.detrend);
438        }
439    }
440
441    fn get_or_add(&mut self, i: usize) -> &mut Psd<N> {
442        while i >= self.stages.len() {
443            let mut stage = Psd::new(self.fft.clone(), self.win.clone());
444            stage.set_stage_depth(self.stage_depth);
445            stage.set_detrend(self.detrend);
446            stage.set_avg(
447                self.avg.count
448                    >> if self.avg.scale {
449                        self.stage_depth * i
450                    } else {
451                        0
452                    },
453            );
454            self.stages.push(stage);
455        }
456        &mut self.stages[i]
457    }
458
459    /// Process input items
460    pub fn process(&mut self, x: &[f32]) {
461        let mut a = ([0f32; N], [0f32; N]);
462        let (mut y, mut z) = (&mut a.0, &mut a.1);
463        for mut x in x.chunks(N << self.stage_depth) {
464            let mut i = 0;
465            while !x.is_empty() {
466                let n = self.get_or_add(i).process(x, y).len();
467                core::mem::swap(&mut z, &mut y);
468                x = &z[..n];
469                i += 1;
470            }
471        }
472    }
473
474    /// Return the PSD and a Vec of segement break information
475    ///
476    /// # Args
477    /// * `min_count`: minimum number of averages to include in output, if zero, also return
478    ///   bins that would otherwise be masked by lower stage bins.
479    ///
480    /// # Returns
481    /// * `psd`: `Vec` normalized reversed (Nyquist first, DC last)
482    /// * `breaks`: `Vec` of stage breaks
483    pub fn psd(&self, opts: &MergeOpts) -> (Vec<f32>, Vec<Break>) {
484        let mut p = Vec::with_capacity(self.stages.len() * (N / 2 + 1));
485        let mut b = Vec::with_capacity(self.stages.len());
486        let mut n = 0;
487        for stage in self.stages.iter().take_while(|s| s.count >= opts.min_count) {
488            let mut pi = stage.spectrum();
489            // a stage yields frequency bins 0..N/2 from DC up to its nyquist
490            // 0..floor(0.4*N) is its passband if it was preceeded by a decimator
491            // 0..floor(0.4*N)/R is the passband of the next lower stage
492            // hence take bins ceil(floor(0.4*N)/R)..floor(0.4*N) from a non-edge stage
493            if !p.is_empty() {
494                // not the first stage
495                // remove transition band of previous stage's decimator, floor
496                let f_pass = 2 * N / 5;
497                pi = &pi[..f_pass];
498                if opts.remove_overlap {
499                    // remove low f bins from previous stage, ceil
500                    let d = stage.hbf.depth();
501                    let f_low = (f_pass + (1 << d) - 1) >> d;
502                    p.truncate(p.len() - f_low);
503                }
504            }
505            b.push(Break {
506                start: p.len(),
507                count: stage.count(),
508                avg: stage.avg,
509                highest_bin: pi.len(),
510                fft_size: N,
511                decimation: n,
512                processed: ((N - stage.win.overlap) * stage.count() as usize
513                    + stage.win.overlap * stage.count().min(1) as usize),
514                pending: stage.buf().len(),
515            });
516            let g = (1 << n) as f32 / stage.gain();
517            p.extend(pi.iter().rev().map(|pi| pi * g));
518            n += stage.hbf.depth();
519        }
520        // Do not "correct" DC and Nyquist bins.
521        // Common psd algorithms argue that as both only contribute once to the one-sided
522        // spectrum, they should be scaled by 0.5.
523        // This would match matplotlib and matlab but is a highly questionable step usually done to
524        // satisfy a oversimplified Parseval check.
525        // The DC and Nyquist bins must not be scaled by 0.5, simply because modulation with
526        // a frequency that is not exactly DC or Nyquist
527        // but still contributes to those bins would be counted wrong. This is always the case
528        // for noise (not spurs). In turn take care when doing Parseval checks.
529        // See also Heinzel, RĂ¼diger, Shilling:
530        // "Spectrum and spectral density estimation by the Discrete Fourier transform (DFT),
531        // including a comprehensive list of window functions and some new flat-top windows.";
532        // 2002
533        // if let Some(p) = p.first_mut() {
534        //     *p *= 0.5;
535        // }
536        // if let Some(p) = p.last_mut() {
537        //    *p *= 0.5;
538        // }
539        (p, b)
540    }
541}
542
543#[cfg(test)]
544mod test {
545    use super::*;
546
547    /// 36 insns per item: > 200 MS/s per skylake core
548    #[test]
549    #[ignore]
550    fn insn() {
551        let mut s = PsdCascade::<{ 1 << 9 }>::new(3);
552        let x: Vec<_> = (0..1 << 16).map(|_| rand::random::<f32>() - 0.5).collect();
553        for _ in 0..(1 << 12) {
554            // + 293
555            s.process(&x);
556        }
557    }
558
559    /// full accuracy tests
560    #[test]
561    fn exact() {
562        const N: usize = 4;
563        let mut s = Psd::<N>::new(
564            FftPlanner::new().plan_fft_forward(N),
565            Arc::new(Window::rectangular()),
566        );
567        let x = vec![1.0; N];
568        let mut y = vec![0.0; N];
569        let y = s.process(&x, &mut y);
570        assert_eq!(y, &x[..N]);
571        println!("{:?}, {}", s.spectrum(), s.gain());
572
573        let mut s = PsdCascade::<N>::new(1);
574        s.process(&x);
575        let merge_opts = MergeOpts {
576            remove_overlap: false,
577            min_count: 0,
578        };
579        let (p, b) = s.psd(&merge_opts);
580        let f = Break::frequencies(&b, &merge_opts);
581        println!("{:?}, {:?}", p, f);
582        assert!(p
583            .iter()
584            .zip([0.0, 4.0 / 3.0, 16.0 / 3.0].iter())
585            .all(|(p, p0)| (p - p0).abs() < 1e-7));
586        assert!(f
587            .iter()
588            .zip([0.5, 0.25, 0.0].iter())
589            .all(|(p, p0)| (p - p0).abs() < 1e-7));
590    }
591
592    #[test]
593    fn test() {
594        assert_eq!(idsp::hbf::HBF_PASSBAND, 0.4);
595
596        // make uniform noise, with zero mean and rms = 1 ignore the epsilon.
597        let x: Vec<_> = (0..1 << 16)
598            .map(|_| (rand::random::<f32>() - 0.5) * 12f32.sqrt())
599            .collect();
600        let xm = x.iter().map(|x| *x as f64).sum::<f64>() as f32 / x.len() as f32;
601        // mean is 0, take 10 sigma here and elsewhere
602        assert!(xm.abs() < 10.0 / (x.len() as f32).sqrt());
603        let xv = x.iter().map(|x| (x * x) as f64).sum::<f64>() as f32 / x.len() as f32;
604        // variance is 1
605        assert!((xv - 1.0).abs() < 10.0 / (x.len() as f32).sqrt());
606
607        const N: usize = 1 << 9;
608        let n = 3;
609        let mut s = Psd::<N>::new(
610            FftPlanner::new().plan_fft_forward(N),
611            Arc::new(Window::hann()),
612        );
613        s.set_stage_depth(n);
614        let mut y = vec![0.0; x.len() >> n];
615        let y = s.process(&x, &mut y[..]);
616
617        let mut hbf = HbfDecCascade::default();
618        hbf.set_depth(n);
619        assert_eq!(y.len(), (x.len() >> n) - hbf.response_length());
620        let g = 1.0 / s.gain();
621        let p: Vec<_> = s.spectrum().iter().map(|p| p * g).collect();
622        // psd of a stage
623        assert!(
624            p.iter()
625                // 0.5 for one-sided spectrum
626                .all(|p| (p * 0.5 - 1.0).abs() < 10.0 / (s.count() as f32).sqrt()),
627            "{:?}",
628            &p[..]
629        );
630
631        let mut d = PsdCascade::<N>::new(n);
632        d.process(&x);
633        let (p, b) = d.psd(&MergeOpts::default());
634        // do not tweak DC and Nyquist!
635        let n = p.len();
636        for (i, bi) in b.iter().enumerate() {
637            // let (start, count, high, size) = bi.into();
638            let end = b.get(i + 1).map(|bi| bi.start).unwrap_or(n);
639            let pi = &p[bi.start..end];
640            // psd of the cascade
641            assert!(pi
642                .iter()
643                // 0.5 for one-sided spectrum
644                .all(|p| (p * 0.5 - 1.0).abs() < 10.0 / (bi.count as f32).sqrt()));
645        }
646    }
647}