stream_dct/
lib.rs

1//! A Rust library for allocation-limited computation of the Discrete Cosine Transform.
2//!
3//! 1D DCTs are allocation-free but 2D requires allocation.
4//!
5//! Features:
6//!
7//! * `simd`: use SIMD types to speed computation (2D DCT only)
8//! * `cos-approx`: use a Taylor series approximation of cosine instead of the stdlib
9//! implementation (which is usually much slower but also higher precision)
10
11use std::f64::consts::{PI, SQRT_2};
12use std::ops::Range;
13
14/// An allocation-free one-dimensional Discrete Cosine Transform.
15///
16/// Each iteration produces the next DCT value in the sequence.
17#[derive(Clone, Debug)]
18pub struct DCT1D<'a> {
19    data: &'a [f64],
20    curr: Range<usize>,
21}
22
23impl<'a> DCT1D<'a> {
24    /// Create a new DCT 1D adaptor from a 1D vector of data.
25    pub fn new(data: &[f64]) -> DCT1D {
26        let curr = 0 .. data.len();
27
28        DCT1D {
29            data: data,
30            curr: curr,
31        }
32    }
33
34    // Converted from the C implementation here:
35    // http://unix4lyfe.org/dct/listing2.c
36    // Source page:
37    // http://unix4lyfe.org/dct/ (Accessed 8/10/2014)
38    fn next_dct_val(&mut self) -> Option<f64> {
39        self.curr.next().map(|u| {
40            let mut z = 0.0;
41
42            let data_len = self.data.len();
43
44            for (x_idx, &x) in self.data.iter().enumerate() {
45                z += x * cos(
46                    PI * u as f64 * (2 * x_idx + 1) as f64 
47                    / (2 * data_len) as f64
48                );
49            } 
50
51            if u == 0 {
52                z *= 1.0 / SQRT_2;
53            }
54
55            z / 2.0
56        })
57    }
58}
59
60impl<'a> Iterator for DCT1D<'a> {
61    type Item = f64;
62
63    fn next(&mut self) -> Option<f64> {
64        self.next_dct_val()
65    }
66}
67
68#[inline(always)]
69fn cos(x: f64) -> f64 {
70    if cfg!(feature = "cos-approx") {
71        let x = (x.abs() + PI) % (2.0 * PI) - PI;
72
73        // Approximate the cosine of `val` using a 4-term Taylor series.
74        // Can be expanded for higher precision.
75        let x2 = x.powi(2);
76        let x4 = x.powi(4);
77        let x6 = x.powi(6);
78        let x8 = x.powi(8);
79
80        1.0 - (x2 / 2.0) + (x4 / 24.0) - (x6 / 720.0) + (x8 / 40320.0)
81    } else {
82        x.cos()
83    }
84}
85
86/// Perform a 2D DCT on a 1D-packed vector with a given rowstride.
87///
88/// E.g. a vector of length 9 with a rowstride of 3 will be processed as a 3x3 matrix.
89///
90/// Returns a vector of the same size packed in the same way.
91pub fn dct_2d(packed_2d: &[f64], rowstride: usize) -> Vec<f64> {
92    assert_eq!(packed_2d.len() % rowstride, 0);
93
94    let mut row_dct: Vec<f64> = packed_2d
95        .chunks(rowstride)
96        .flat_map(DCT1D::new)
97        .collect();
98
99    swap_rows_columns(&mut row_dct, rowstride);
100
101    let mut column_dct: Vec<f64> = packed_2d
102        .chunks(rowstride)
103        .flat_map(DCT1D::new)
104        .collect();
105
106    swap_rows_columns(&mut column_dct, rowstride);
107
108    column_dct
109}
110
111fn swap_rows_columns(data: &mut [f64], rowstride: usize) {
112    let height = data.len() / rowstride;
113
114    for y in 0 .. height {
115        for x in 0 .. rowstride {
116            data.swap(y * rowstride + x, x * rowstride + y);
117        }
118    }    
119}
120
121#[cfg_attr(all(test, feature = "cos-approx"), test)]
122#[cfg_attr(not(all(test, feature = "cos-approx")), allow(dead_code))]
123fn test_cos_approx() {
124    use std::f64::consts::PI;
125
126    const ERROR: f64 = 0.05;
127
128    fn test_cos_approx(x: f64) {
129        let approx = cos(x);
130        let cos = x.cos();
131
132        assert!(
133            approx.abs_sub(x.cos()) <= ERROR, 
134            "Approximation cos({x}) = {approx} was outside a tolerance of {error}; control value: {cos}",
135            x = x, approx = approx, error = ERROR, cos = cos,
136        );
137    }
138
139    let test_values = [PI, PI / 2.0, PI / 4.0, 1.0, -1.0, 2.0 * PI, 3.0 * PI, 4.0 / 3.0 * PI];
140
141    for &x in &test_values {
142        test_cos_approx(x);
143        test_cos_approx(-x);
144    }
145}
146
147/*
148#[cfg(feature = "simd")]
149mod dct_simd {
150    use simdty::f64x2;
151
152    use std::f64::consts::{PI, SQRT_2};
153    
154    macro_rules! valx2 ( ($val:expr) => ( ::simdty::f64x2($val, $val) ) );
155
156    const PI: f64x2 = valx2!(PI);
157    const ONE_DIV_SQRT_2: f64x2 = valx2!(1 / SQRT_2);
158    const SQRT_2: f64x2 = valx2!(SQRT_2);
159
160    pub dct_rows(vals: &[Vec<f64>]) -> Vec<Vec<f64>> {
161        let mut out = Vec::with_capacity(vals.len());
162
163        for pair in vals.iter().chunks(2) {
164            if pair.len() == 2 {
165                let vals = pair[0].iter().cloned().zip(pair[1].iter().cloned())
166                    .map(f64x2)
167                    .collect();
168
169                dct_1dx2(vals);
170
171
172        
173        }
174    }
175
176    fn dct_1dx2(vec: Vec<f64x2>) -> Vec<f64x2> {
177        let mut out = Vec::with_capacity(vec.len());
178
179        for u in 0 .. vec.len() {
180            let mut z = valx2!(0.0);
181
182            for x in 0 .. vec.len() {
183                z += vec[x] * cos_approx(
184                    PI * valx2!(
185                        u as f64 * (2 * x + 1) as f64 
186                            / (2 * vec.len()) as f64
187                    )
188                );
189            }
190
191            if u == 0 {
192                z *= ONE_DIV_SQRT_2;
193            }
194
195            out.insert(u, z / valx2!(2.0));
196        }
197
198        out 
199    }
200
201    fn cos_approx(x2: f64x2) -> f64x2 {
202        #[inline(always)]
203        fn powi(val: f64x2, pow: i32) -> f64x2 {
204            unsafe { llvmint::powi_v2f64(val, pow) }
205        }
206
207        let x2 = powi(val, 2);
208        let x4 = powi(val, 4);
209        let x6 = powi(val, 6);
210        let x8 = powi(val, 8);
211
212        valx2!(1.0) - (x2 / valx2!(2.0)) + (x4 / valx2!(24.0)) 
213            - (x6 / valx2!(720.0)) + (x8 / valx2!(40320.0))
214    }
215}
216*/
217
stream_dct/lib.rs

stream_dct/
lib.rs