phasm-core 0.2.1

Pure-Rust steganography engine — hide encrypted messages in JPEG photos
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
// Copyright (c) 2026 Christoph Gaffga
// SPDX-License-Identifier: GPL-3.0-only
// https://github.com/cgaffga/phasmcore

//! Deterministic 2D FFT/IFFT using only WASM-intrinsic f64 operations.
//!
//! Replaces `rustfft` with an in-house implementation:
//! - Radix-2 Cooley-Tukey for power-of-2 sizes
//! - Bluestein's chirp-z transform for arbitrary sizes
//! All twiddle factors computed via `det_sincos()`.
//!
//! Memory optimizations (Phase 3):
//! - P1a: Column FFTs use gather-FFT-scatter with a single column buffer
//!   instead of a full transposed copy (saves ~186 MB for 4032x3024).
//! - P1b: All FFT data uses f32 (Complex32) — template detection only needs
//!   coarse peak finding. Twiddle factors still computed in f64 via
//!   `det_sincos()` then cast to f32.
//! - P2a: BluesteinPlan precomputes chirp factors and FFT(b_hat) for reuse
//!   across all rows/columns of the same length.

use num_complex::Complex;
use crate::det_math::{det_sincos, det_hypot};
use std::f64::consts::PI;

/// Complex32 type alias for f32 complex numbers.
pub type Complex32 = Complex<f32>;

/// 2D complex spectrum using f32 for memory efficiency.
pub struct Spectrum2D {
    pub data: Vec<Complex32>,
    pub width: usize,
    pub height: usize,
}

// ──────────────────────────────────────────────────────────────────────────
// Bluestein plan: precomputed chirp factors for reuse (P2a)
// ──────────────────────────────────────────────────────────────────────────

/// Precomputed Bluestein chirp factors and FFT(b_hat) for a given (n, sign).
///
/// Eliminates redundant chirp computation and FFT(b) calls when processing
/// many rows or columns of the same length.
struct BluesteinPlan {
    n: usize,
    m: usize, // next_pow2(2*n - 1)
    chirp: Vec<Complex32>,
    b_hat: Vec<Complex32>, // FFT of padded conjugate chirp
}

impl BluesteinPlan {
    /// Create a new Bluestein plan for length `n` and direction `sign`.
    fn new(n: usize, sign: f64) -> Self {
        let m = next_pow2(2 * n - 1);

        // Chirp factors: w_k = exp(sign * i * pi * k^2 / n)
        let mut chirp = vec![Complex32::new(0.0, 0.0); n];
        for k in 0..n {
            let angle = sign * PI * (k as f64 * k as f64) / n as f64;
            let (s, c) = det_sincos(angle);
            chirp[k] = Complex32::new(c as f32, s as f32);
        }

        // b[k] = chirp[k], with wrap-around for negative indices, zero-padded
        let mut b = vec![Complex32::new(0.0, 0.0); m];
        b[0] = chirp[0];
        for k in 1..n {
            b[k] = chirp[k];
            b[m - k] = chirp[k];
        }

        // Precompute FFT(b)
        fft_radix2_f32(&mut b, -1.0);

        BluesteinPlan { n, m, chirp, b_hat: b }
    }

    /// Execute Bluestein FFT using precomputed plan.
    fn execute(&self, input: &[Complex32]) -> Vec<Complex32> {
        debug_assert_eq!(input.len(), self.n);

        // a[k] = x[k] * conj(chirp[k]), zero-padded to length m
        let mut a = vec![Complex32::new(0.0, 0.0); self.m];
        for k in 0..self.n {
            a[k] = input[k] * self.chirp[k].conj();
        }

        // Convolve: A = FFT(a), C = IFFT(A * B_hat)
        fft_radix2_f32(&mut a, -1.0);
        for i in 0..self.m {
            a[i] *= self.b_hat[i];
        }
        fft_radix2_f32(&mut a, 1.0);

        // Normalize radix-2 inverse and apply chirp
        let inv_m = 1.0 / self.m as f32;
        let mut result = vec![Complex32::new(0.0, 0.0); self.n];
        for k in 0..self.n {
            result[k] = a[k] * inv_m * self.chirp[k].conj();
        }

        result
    }
}

// ──────────────────────────────────────────────────────────────────────────
// 1D FFT primitives (f32)
// ──────────────────────────────────────────────────────────────────────────

/// Next power of 2 >= n.
fn next_pow2(n: usize) -> usize {
    let mut p = 1;
    while p < n {
        p <<= 1;
    }
    p
}

/// In-place radix-2 Cooley-Tukey FFT for f32.  `data.len()` must be a power of 2.
/// `sign`: -1.0 for forward FFT, +1.0 for inverse FFT.
fn fft_radix2_f32(data: &mut [Complex32], sign: f64) {
    let n = data.len();
    debug_assert!(n.is_power_of_two());
    if n <= 1 {
        return;
    }

    // Bit-reversal permutation
    let mut j = 0usize;
    for i in 1..n {
        let mut bit = n >> 1;
        while j & bit != 0 {
            j ^= bit;
            bit >>= 1;
        }
        j ^= bit;
        if i < j {
            data.swap(i, j);
        }
    }

    // Pre-allocate the twiddle buffer once; largest stage needs n/2 entries.
    let mut twiddles: Vec<Complex32> = Vec::with_capacity(n / 2);

    // Butterfly stages
    let mut len = 2;
    while len <= n {
        let half = len / 2;
        let angle_step = sign * PI / half as f64;

        // Pre-compute twiddles for this stage. The previous code recomputed
        // (s, c) for every (start, k) pair, but the values depend only on
        // (half, k) — within a stage of width `len`, each twiddle was being
        // recomputed `n / len` times. With this cache, det_sincos is called
        // `half` times per stage instead of `(n / len) * half = n / 2`. Total
        // det_sincos calls across all stages drop from `~(n / 2) · log2(n)`
        // to `~n` — a `log2(n)` reduction (≈12× on 4096-point row FFTs).
        twiddles.clear();
        twiddles.extend((0..half).map(|k| {
            let angle = angle_step * k as f64;
            let (s, c) = det_sincos(angle);
            Complex32::new(c as f32, s as f32)
        }));

        for start in (0..n).step_by(len) {
            for k in 0..half {
                let w = twiddles[k];
                let u = data[start + k];
                let v = data[start + k + half] * w;
                data[start + k] = u + v;
                data[start + k + half] = u - v;
            }
        }
        len <<= 1;
    }
}

/// 1D FFT for arbitrary length using f32.
/// Uses BluesteinPlan if available (for 2D FFT reuse), or creates one on the fly.
/// `sign`: -1.0 for forward, +1.0 for inverse.
fn fft1d_f32_with_plan(input: &[Complex32], sign: f64, plan: Option<&BluesteinPlan>) -> Vec<Complex32> {
    let n = input.len();
    if n == 0 {
        return vec![];
    }
    if n == 1 {
        return input.to_vec();
    }
    if n.is_power_of_two() {
        let mut buf = input.to_vec();
        fft_radix2_f32(&mut buf, sign);
        return buf;
    }

    // Use precomputed plan if available
    if let Some(p) = plan {
        debug_assert_eq!(p.n, n);
        return p.execute(input);
    }

    // Fallback: create a temporary plan
    let temp_plan = BluesteinPlan::new(n, sign);
    temp_plan.execute(input)
}

/// 1D forward FFT (arbitrary length, f32).
#[allow(dead_code)]
fn fft1d_f32(data: &[Complex32]) -> Vec<Complex32> {
    fft1d_f32_with_plan(data, -1.0, None)
}

/// 1D inverse FFT (arbitrary length, f32) — unnormalized.
#[allow(dead_code)]
fn ifft1d_f32(data: &[Complex32]) -> Vec<Complex32> {
    fft1d_f32_with_plan(data, 1.0, None)
}

// ──────────────────────────────────────────────────────────────────────────
// 2D FFT / IFFT — public API (f32, memory-optimized)
// ──────────────────────────────────────────────────────────────────────────

/// Real-valued pixel array -> 2D complex spectrum (f32).
///
/// The input is a row-major f64 array of size `width * height`.
/// Uses gather-FFT-scatter for columns (P1a) and precomputed Bluestein
/// plans for chirp reuse (P2a).
pub fn fft2d(pixels: &[f64], width: usize, height: usize) -> Spectrum2D {
    assert_eq!(pixels.len(), width * height);

    let mut data: Vec<Complex32> = pixels.iter().map(|&v| Complex32::new(v as f32, 0.0)).collect();

    // P2a: Precompute Bluestein plans for row and column lengths (if non-power-of-2)
    let row_plan = if !width.is_power_of_two() && width > 1 {
        Some(BluesteinPlan::new(width, -1.0))
    } else {
        None
    };
    let col_plan = if !height.is_power_of_two() && height > 1 {
        Some(BluesteinPlan::new(height, -1.0))
    } else {
        None
    };

    // FFT each row
    for row in 0..height {
        let start = row * width;
        let row_data = &data[start..start + width];
        let transformed = fft1d_f32_with_plan(row_data, -1.0, row_plan.as_ref());
        data[start..start + width].copy_from_slice(&transformed);
    }

    // P1a: FFT each column using gather-FFT-scatter with a single column buffer.
    // No full transposed buffer needed.
    let mut col_buf = vec![Complex32::new(0.0, 0.0); height];
    for col in 0..width {
        // Gather column
        for r in 0..height {
            col_buf[r] = data[r * width + col];
        }
        // FFT
        let transformed = fft1d_f32_with_plan(&col_buf, -1.0, col_plan.as_ref());
        // Scatter back
        for r in 0..height {
            data[r * width + col] = transformed[r];
        }
    }

    Spectrum2D { data, width, height }
}

/// 2D complex spectrum -> real-valued pixel array.
///
/// Takes the real parts after inverse FFT, normalized by `1/(width*height)`.
/// Uses gather-IFFT-scatter for columns (P1a) and precomputed Bluestein
/// plans for chirp reuse (P2a).
pub fn ifft2d(spectrum: &Spectrum2D) -> Vec<f64> {
    let width = spectrum.width;
    let height = spectrum.height;
    let mut data = spectrum.data.clone();

    // P2a: Precompute Bluestein plans for row and column lengths (if non-power-of-2)
    let row_plan = if !width.is_power_of_two() && width > 1 {
        Some(BluesteinPlan::new(width, 1.0))
    } else {
        None
    };
    let col_plan = if !height.is_power_of_two() && height > 1 {
        Some(BluesteinPlan::new(height, 1.0))
    } else {
        None
    };

    // IFFT each row
    for row in 0..height {
        let start = row * width;
        let row_data = &data[start..start + width];
        let transformed = fft1d_f32_with_plan(row_data, 1.0, row_plan.as_ref());
        data[start..start + width].copy_from_slice(&transformed);
    }

    // P1a: IFFT each column using gather-IFFT-scatter with a single column buffer.
    let mut col_buf = vec![Complex32::new(0.0, 0.0); height];
    for col in 0..width {
        // Gather column
        for r in 0..height {
            col_buf[r] = data[r * width + col];
        }
        // IFFT
        let transformed = fft1d_f32_with_plan(&col_buf, 1.0, col_plan.as_ref());
        // Scatter back
        for r in 0..height {
            data[r * width + col] = transformed[r];
        }
    }

    // Normalize and extract real parts
    let norm = 1.0 / (width * height) as f64;
    let mut result = vec![0.0f64; width * height];
    for i in 0..data.len() {
        result[i] = data[i].re as f64 * norm;
    }

    result
}

/// Compute magnitude of each spectrum element (returns f32).
pub fn magnitude_spectrum(spectrum: &Spectrum2D) -> Vec<f32> {
    spectrum.data.iter().map(|c| {
        // Use f64 det_hypot for precision, cast result to f32
        det_hypot(c.re as f64, c.im as f64) as f32
    }).collect()
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn fft_ifft_roundtrip() {
        let width = 16;
        let height = 16;
        let pixels: Vec<f64> = (0..width * height).map(|i| (i as f64) * 0.1 + 50.0).collect();

        let spectrum = fft2d(&pixels, width, height);
        let recovered = ifft2d(&spectrum);

        for i in 0..pixels.len() {
            assert!(
                (pixels[i] - recovered[i]).abs() < 1e-3,
                "Mismatch at {i}: expected {}, got {}",
                pixels[i],
                recovered[i]
            );
        }
    }

    #[test]
    fn fft_ifft_roundtrip_non_pow2() {
        // Test with non-power-of-2 dimensions (Bluestein path)
        let width = 12;
        let height = 10;
        let pixels: Vec<f64> = (0..width * height).map(|i| (i as f64) * 0.3 + 20.0).collect();

        let spectrum = fft2d(&pixels, width, height);
        let recovered = ifft2d(&spectrum);

        for i in 0..pixels.len() {
            assert!(
                (pixels[i] - recovered[i]).abs() < 0.1,
                "Mismatch at {i}: expected {}, got {}",
                pixels[i],
                recovered[i]
            );
        }
    }

    #[test]
    fn parseval_theorem() {
        let width = 8;
        let height = 8;
        let pixels: Vec<f64> = (0..width * height).map(|i| ((i * 7 + 3) % 256) as f64).collect();

        let spatial_energy: f64 = pixels.iter().map(|v| v * v).sum();

        let spectrum = fft2d(&pixels, width, height);
        let freq_energy: f64 = spectrum.data.iter().map(|c| {
            let re = c.re as f64;
            let im = c.im as f64;
            re * re + im * im
        }).sum();

        let n = (width * height) as f64;
        // Relaxed tolerance for f32 spectrum
        assert!(
            (spatial_energy - freq_energy / n).abs() < 10.0,
            "Parseval's theorem violated: spatial={spatial_energy}, freq/N={}", freq_energy / n
        );
    }

    #[test]
    fn dc_component_is_sum() {
        let width = 4;
        let height = 4;
        let pixels = vec![1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0,
                          9.0, 10.0, 11.0, 12.0, 13.0, 14.0, 15.0, 16.0];

        let spectrum = fft2d(&pixels, width, height);

        let expected_dc: f64 = pixels.iter().sum();
        // Relaxed tolerance for f32
        assert!(
            (spectrum.data[0].re as f64 - expected_dc).abs() < 0.1,
            "DC component should be sum of all pixels: expected {expected_dc}, got {}",
            spectrum.data[0].re
        );
        assert!((spectrum.data[0].im as f64).abs() < 0.1);
    }

    #[test]
    fn fft1d_basic() {
        // FFT of [1, 0, 0, 0] should be [1, 1, 1, 1]
        let input = vec![
            Complex32::new(1.0, 0.0),
            Complex32::new(0.0, 0.0),
            Complex32::new(0.0, 0.0),
            Complex32::new(0.0, 0.0),
        ];
        let output = fft1d_f32(&input);
        for k in 0..4 {
            assert!((output[k].re - 1.0).abs() < 1e-5, "Re[{k}]={}", output[k].re);
            assert!(output[k].im.abs() < 1e-5, "Im[{k}]={}", output[k].im);
        }
    }

    #[test]
    fn bluestein_matches_radix2() {
        // For power-of-2 size, Bluestein plan should give same result as radix-2
        let n = 8;
        let input: Vec<Complex32> = (0..n).map(|i| Complex32::new((i * 3 + 1) as f32, (i * 2) as f32)).collect();

        let mut radix2_buf = input.clone();
        fft_radix2_f32(&mut radix2_buf, -1.0);

        // Test via plan
        let _plan = BluesteinPlan::new(n, -1.0);
        // For power-of-2, fft1d_f32_with_plan uses radix-2 directly, not the plan.
        // Test the plan directly on a non-power-of-2 size instead.
        let n2 = 7;
        let input2: Vec<Complex32> = (0..n2).map(|i| Complex32::new((i * 3 + 1) as f32, (i * 2) as f32)).collect();
        let plan2 = BluesteinPlan::new(n2, -1.0);
        let result_plan = plan2.execute(&input2);
        let result_direct = fft1d_f32(&input2);
        for k in 0..n2 {
            assert!(
                (result_plan[k].re - result_direct[k].re).abs() < 1e-3 &&
                (result_plan[k].im - result_direct[k].im).abs() < 1e-3,
                "Plan vs direct mismatch at {k}: plan={}, direct={}",
                result_plan[k], result_direct[k]
            );
        }

        // Also verify radix-2 results haven't changed (basic sanity)
        let result_r2 = fft1d_f32(&input);
        for k in 0..n {
            assert!(
                (radix2_buf[k].re - result_r2[k].re).abs() < 1e-3 &&
                (radix2_buf[k].im - result_r2[k].im).abs() < 1e-3,
                "Mismatch at {k}: radix2={}, fft1d={}",
                radix2_buf[k], result_r2[k]
            );
        }
    }

    #[test]
    fn bluestein_plan_reuse() {
        // Verify that reusing a BluesteinPlan gives the same result each time
        let n = 13; // non-power-of-2
        let plan = BluesteinPlan::new(n, -1.0);

        let input1: Vec<Complex32> = (0..n).map(|i| Complex32::new(i as f32, 0.0)).collect();
        let input2: Vec<Complex32> = (0..n).map(|i| Complex32::new(0.0, i as f32)).collect();

        let r1a = plan.execute(&input1);
        let r2 = plan.execute(&input2);
        let r1b = plan.execute(&input1);

        for k in 0..n {
            assert!(
                (r1a[k].re - r1b[k].re).abs() < 1e-5 &&
                (r1a[k].im - r1b[k].im).abs() < 1e-5,
                "Plan reuse gave different results at {k}: first={}, second={}",
                r1a[k], r1b[k]
            );
        }
        // Just verify r2 didn't corrupt anything (no specific value check needed)
        assert_eq!(r2.len(), n);
    }
}