1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
// SPDX-License-Identifier: MPL-2.0

//! Fourier transform for 2D data such as images.

use rustfft::FftDirection;
use rustfft::{num_complex::Complex, FftPlanner};

/// Compute the 2D Fourier transform of an image buffer.
///
/// The image buffer is considered to be stored in row major order.
/// After the 2D FFT has been applied, the buffer contains the transposed
/// of the Fourier transform since one transposition is needed to process
/// the columns of the image buffer.
///
/// The transformation is not normalized.
/// To normalize the output, you should multiply it by 1 / sqrt( width * height ).
/// If the transformed buffer is intended to be processed
/// and then converted back into an image with an inverse Fourier transform,
/// it is more efficient to multiply at the end by 1 / (width * height).
///
/// Remark: an allocation the size of the image buffer is performed for the transposition,
/// as well as scratch buffers while performing the rows and columns FFTs.
pub fn fft_2d(width: usize, height: usize, img_buffer: &mut [Complex<f64>]) {
    fft_2d_with_direction(width, height, img_buffer, FftDirection::Forward)
}

/// Compute the inverse 2D Fourier transform to get back an image buffer.
///
/// After the inverse 2D FFT has been applied, the image buffer contains the transposed
/// of the inverse Fourier transform since one transposition is needed to process
/// the columns of the buffer.
///
/// The transformation is not normalized.
/// To normalize the output, you should multiply it by 1 / sqrt( width * height ).
/// If this is used as a pair of FFT followed by inverse FFT,
/// is is more efficient to normalize only once by 1 / (width * height) at the end.
///
/// Remark: an allocation the size of the image buffer is performed for the transposition,
/// as well as scratch buffers while performing the rows and columns FFTs.
pub fn ifft_2d(width: usize, height: usize, img_buffer: &mut [Complex<f64>]) {
    fft_2d_with_direction(width, height, img_buffer, FftDirection::Inverse)
}

/// Compute the 2D Fourier transform or inverse transform of an image buffer.
///
/// The image buffer is considered to be stored in row major order.
/// After the 2D FFT has been applied, the buffer contains the transposed
/// of the Fourier transform since one transposition is needed to process
/// the columns of the image buffer.
///
/// The transformation is not normalized.
/// To normalize the output, you should multiply it by 1 / sqrt( width * height ).
/// If this is used as a pair of FFT followed by inverse FFT,
/// is is more efficient to normalize only once by 1 / (width * height) at the end.
///
/// Remark: an allocation the size of the image buffer is performed for the transposition,
/// as well as a scratch buffer while performing the rows and columns FFTs.
fn fft_2d_with_direction(
    width: usize,
    height: usize,
    img_buffer: &mut [Complex<f64>],
    direction: FftDirection,
) {
    // Compute the FFT of each row of the image.
    let mut planner = FftPlanner::new();
    let fft_width = planner.plan_fft(width, direction);
    let mut scratch = vec![Complex::default(); fft_width.get_inplace_scratch_len()];
    for row_buffer in img_buffer.chunks_exact_mut(width) {
        fft_width.process_with_scratch(row_buffer, &mut scratch);
    }

    // Transpose the image to be able to compute the FFT on the other dimension.
    let mut transposed = transpose(width, height, img_buffer);
    let fft_height = planner.plan_fft(height, direction);
    scratch.resize(fft_height.get_outofplace_scratch_len(), Complex::default());
    for (tr_buf, col_buf) in transposed
        .chunks_exact_mut(height)
        .zip(img_buffer.chunks_exact_mut(height))
    {
        fft_height.process_outofplace_with_scratch(tr_buf, col_buf, &mut scratch);
    }
}

fn transpose<T: Copy + Default>(width: usize, height: usize, matrix: &[T]) -> Vec<T> {
    let mut ind = 0;
    let mut ind_tr;
    let mut transposed = vec![T::default(); matrix.len()];
    for row in 0..height {
        ind_tr = row;
        for _ in 0..width {
            transposed[ind_tr] = matrix[ind];
            ind += 1;
            ind_tr += height;
        }
    }
    transposed
}

/// Inverse operation of the quadrants shift performed by fftshift.
///
/// It is different than fftshift if one dimension has an odd length.
pub fn ifftshift<T: Copy + Default>(width: usize, height: usize, matrix: &[T]) -> Vec<T> {
    // TODO: do actual code instead of relying on fftshift.
    let is_even = |length| length % 2 == 0;
    assert!(is_even(width), "Need a dedicated implementation");
    assert!(is_even(height), "Need a dedicated implementation");
    fftshift(width, height, matrix)
}

/// Shift the 4 quadrants of a Fourier transform to have all the low frequencies
/// at the center of the image.
pub fn fftshift<T: Copy + Default>(width: usize, height: usize, matrix: &[T]) -> Vec<T> {
    let mut shifted = vec![T::default(); matrix.len()];
    let half_width = width / 2;
    let half_height = height / 2;
    // Shift top and bottom quadrants.
    for row in 0..half_height {
        // top
        let mrow_start = row * width;
        let m_row = &matrix[mrow_start..mrow_start + width];
        // bottom
        let srow_start = mrow_start + (height - half_height) * width;
        let s_row = &mut shifted[srow_start..srow_start + width];
        // swap left and right
        s_row[width - half_width..width].copy_from_slice(&m_row[0..half_width]);
        s_row[0..width - half_width].copy_from_slice(&m_row[half_width..width]);
    }
    // Shift bottom and top quadrants.
    for row in half_height..height {
        // bottom
        let mrow_start = row * width;
        let m_row = &matrix[mrow_start..mrow_start + width];
        // top
        let srow_start = (row - half_height) * width;
        let s_row = &mut shifted[srow_start..srow_start + width];
        // swap left and right
        s_row[width - half_width..width].copy_from_slice(&m_row[0..half_width]);
        s_row[0..width - half_width].copy_from_slice(&m_row[half_width..width]);
    }
    shifted
}

// Sine and Cosine transforms ##################################################

#[cfg(feature = "rustdct")]
/// Cosine and Sine transforms.
pub mod dcst {

    use super::transpose;
    use rustdct::DctPlanner;

    /// Compute the 2D cosine transform of an image buffer.
    ///
    /// The image buffer is considered to be stored in row major order.
    /// After the 2D DCT has been applied, the buffer contains the transposed
    /// of the cosine transform since one transposition is needed to process
    /// the columns of the image buffer.
    ///
    /// The transformation is not normalized.
    /// To normalize the output, you should multiply it by 2 / sqrt( width * height ).
    /// If this is used as a pair of DCT followed by inverse DCT,
    /// is is more efficient to normalize only once at the end.
    ///
    /// Remark: an allocation the size of the image buffer is performed for the transposition,
    /// as well as a scratch buffer while performing the rows and columns transforms.
    pub fn dct_2d(width: usize, height: usize, img_buffer: &mut [f64]) {
        // Compute the FFT of each row of the image.
        let mut planner = DctPlanner::new();
        let dct_width = planner.plan_dct2(width);
        let mut scratch = vec![0.0; dct_width.get_scratch_len()];
        for row_buffer in img_buffer.chunks_exact_mut(width) {
            dct_width.process_dct2_with_scratch(row_buffer, &mut scratch);
        }

        // Transpose the image to be able to compute the FFT on the other dimension.
        let mut transposed = transpose(width, height, img_buffer);
        let dct_height = planner.plan_dct2(height);
        scratch.resize(dct_height.get_scratch_len(), 0.0);
        for column_buffer in transposed.chunks_exact_mut(height) {
            dct_height.process_dct2_with_scratch(column_buffer, &mut scratch);
        }
        img_buffer.copy_from_slice(&transposed);
    }

    /// Parallel version of [`dct_2d`].
    ///
    /// This uses rayon internally, see the rayon crate docs to control the level
    /// of parallelism.
    #[cfg(feature = "parallel")]
    pub fn par_dct_2d(width: usize, height: usize, img_buffer: &mut [f64]) {
        use rayon::prelude::{ParallelIterator, ParallelSliceMut};

        let mut planner = DctPlanner::new();
        let dct_width = planner.plan_dct2(width);

        img_buffer
            .par_chunks_exact_mut(width)
            .for_each(|row_buffer| {
                dct_width.process_dct2(row_buffer);
            });

        let mut transposed = transpose(width, height, img_buffer);
        let dct_height = planner.plan_dct2(height);

        transposed
            .par_chunks_exact_mut(height)
            .for_each(|column_buffer| {
                dct_height.process_dct2(column_buffer);
            });

        img_buffer.copy_from_slice(&transposed);
    }

    /// Compute the inverse 2D cosine transform of an image buffer.
    ///
    /// The image buffer is considered to be stored in row major order.
    /// After the 2D iDCT has been applied, the buffer contains the transposed
    /// of the cosine transform since one transposition is needed to process
    /// the columns of the image buffer.
    ///
    /// The transformation is not normalized.
    /// To normalize the output, you should multiply it by 2 / sqrt( width * height ).
    /// If this is used as a pair of DCT followed by inverse DCT,
    /// is is more efficient to normalize only once at the end.
    ///
    /// Remark: an allocation the size of the image buffer is performed for the transposition,
    /// as well as a scratch buffer while performing the rows and columns transforms.
    pub fn idct_2d(width: usize, height: usize, img_buffer: &mut [f64]) {
        // Compute the FFT of each row of the image.
        let mut planner = DctPlanner::new();
        let dct_width = planner.plan_dct3(width);
        let mut scratch = vec![0.0; dct_width.get_scratch_len()];
        for row_buffer in img_buffer.chunks_exact_mut(width) {
            dct_width.process_dct3_with_scratch(row_buffer, &mut scratch);
        }

        // Transpose the image to be able to compute the FFT on the other dimension.
        let mut transposed = transpose(width, height, img_buffer);
        let dct_height = planner.plan_dct3(height);
        scratch.resize(dct_height.get_scratch_len(), 0.0);
        for column_buffer in transposed.chunks_exact_mut(height) {
            dct_height.process_dct3_with_scratch(column_buffer, &mut scratch);
        }
        img_buffer.copy_from_slice(&transposed);
    }

    /// Parallel version of [`dct_2d`].
    ///
    /// This uses rayon internally, see the rayon crate docs to control the level
    /// of parallelism.
    #[cfg(feature = "parallel")]
    pub fn par_idct_2d(width: usize, height: usize, img_buffer: &mut [f64]) {
        use rayon::prelude::{ParallelIterator, ParallelSliceMut};

        let mut planner = DctPlanner::new();
        let dct_width = planner.plan_dct3(width);
        img_buffer
            .par_chunks_exact_mut(width)
            .for_each(|row_buffer| {
                dct_width.process_dct3(row_buffer);
            });

        let mut transposed = transpose(width, height, img_buffer);
        let dct_height = planner.plan_dct3(height);
        transposed
            .par_chunks_exact_mut(height)
            .for_each(|column_buffer| {
                dct_height.process_dct3(column_buffer);
            });
        img_buffer.copy_from_slice(&transposed);
    }
}

#[cfg(test)]
#[cfg(feature = "rustdct")]
#[cfg(feature = "parallel")]
mod tests {
    use super::*;

    #[test]
    fn test_identical_par_dct_result() {
        let test_vec = vec![
            54.75, 0.25, 69.39, 121.95, 15.86, 17.24, 77.48, 108.55, 127.40, 93.14, 49.28, 61.86,
            55.75, 47.64, 28.32, 35.08, 92.85, 66.36, 94.34, 12.58, 50.07, 66.83, 101.12, 67.24,
            111.74, 12.77, 114.64, 122.66, 86.15, 122.18, 33.94, 120.62, 107.30, 76.17, 99.48,
            44.19, 86.03, 113.70, 28.54, 110.29, 80.88, 127.94, 14.04, 70.76, 80.95, 79.83, 56.34,
            11.44, 65.98, 107.16, 54.12, 92.06, 5.32, 47.41, 83.55, 46.60, 17.94, 23.93, 56.11,
            64.69, 87.37, 47.92, 61.87, 63.50, 40.83, 53.61, 57.16, 18.06, 1.11, 51.35, 53.03,
            98.74, 43.84, 104.86, 52.87, 103.40, 114.36, 77.39, 45.10, 19.30, 90.93, 4.71, 95.27,
            26.99, 68.58, 112.49, 114.11, 11.85, 124.35, 28.06, 31.43, 12.53, 57.44, 63.72, 126.73,
            97.03, 97.45, 90.99, 15.45, 86.07, 27.62, 25.03, 106.54, 79.98, 49.95, 96.92, 124.75,
            80.09, 127.06, 84.39, 120.42, 124.40, 15.50, 121.84, 105.86, 24.44, 81.38, 111.54,
            27.66, 1.35, 119.06, 71.15, 108.78, 8.80, 19.83, 27.76, 75.44, 35.15,
        ];
        let mut non_para = test_vec.clone();
        let mut parallel = test_vec.clone();
        dcst::dct_2d(16, 8, &mut non_para);
        dcst::par_dct_2d(16, 8, &mut parallel);
        assert_eq!(non_para, parallel);
    }
    #[test]
    fn test_identical_par_idct_result() {
        let test_vec = vec![
            72.16, 47.41, 122.96, 52.90, 36.35, 98.84, 84.12, 34.52, 61.06, 112.66, 39.91, 67.93,
            84.70, 127.92, 13.63, 107.69, 4.49, 13.85, 124.56, 30.33, 105.12, 90.85, 75.41, 121.80,
            90.34, 105.42, 49.07, 14.55, 14.52, 33.06, 112.38, 46.69, 125.16, 73.96, 125.25, 7.11,
            4.42, 38.53, 105.64, 73.45, 43.45, 64.49, 7.68, 85.51, 109.86, 15.45, 122.59, 113.16,
            64.02, 117.34, 113.04, 56.70, 99.40, 120.27, 51.70, 11.26, 44.75, 1.58, 34.81, 30.54,
            6.71, 62.75, 72.62, 108.74, 17.51, 54.30, 44.35, 13.97, 26.33, 86.44, 34.88, 105.40,
            67.61, 22.40, 6.95, 48.64, 7.90, 76.50, 35.04, 29.92, 123.98, 83.62, 3.96, 75.32,
            42.37, 21.68, 23.58, 98.29, 19.81, 20.84, 110.50, 112.61, 92.65, 30.85, 113.19, 56.70,
            46.94, 107.89, 92.47, 12.77, 34.66, 0.95, 127.74, 53.54, 56.52, 106.35, 25.50, 52.36,
            100.60, 13.80, 19.96, 101.19, 58.99, 85.71, 30.79, 41.23, 56.03, 68.65, 46.78, 36.18,
            30.12, 63.23, 25.27, 93.14, 39.77, 72.21, 7.03, 62.79,
        ];
        let mut non_para = test_vec.clone();
        let mut parallel = test_vec.clone();
        dcst::idct_2d(16, 8, &mut non_para);
        dcst::par_idct_2d(16, 8, &mut parallel);
        assert_eq!(non_para, parallel);
    }
}