image_conv/
conv.rs

1//! Image Convolution Engine
2//!
3//! ## What is convolution?
4//!
5//! ```text
6//! Convolution slides a filter kernel over an image, computing a weighted sum
7//! at each position. Each output pixel is the dot product of the kernel with
8//! the underlying image patch:
9//!
10//!   Input image patch         Kernel           Output pixel
11//!   ┌────┬────┬────┐      ┌────┬────┬────┐
12//!   │ a  │ b  │ c  │      │ k₀ │ k₁ │ k₂ │     out = a·k₀ + b·k₁ + c·k₂
13//!   ├────┼────┼────┤   ⊙  ├────┼────┼────┤        + d·k₃ + e·k₄ + f·k₅
14//!   │ d  │ e  │ f  │      │ k₃ │ k₄ │ k₅ │        + g·k₆ + h·k₇ + i·k₈
15//!   ├────┼────┼────┤      ├────┼────┼────┤
16//!   │ g  │ h  │ i  │      │ k₆ │ k₇ │ k₈ │     Then clamped to [0, 255]
17//!   └────┴────┴────┘      └────┴────┴────┘
18//!
19//! The kernel "slides" horizontally then vertically across the whole image,
20//! producing one output pixel per valid position.
21//! ```
22//!
23//! ## Three-tier performance
24//!
25//! ```text
26//!                    ┌──────────────────┐
27//!                    │  try_separable()  │
28//!                    └────────┬─────────┘
29//!                             │
30//!               ┌─────────────┴─────────────┐
31//!               │                           │
32//!          Separable                     Not separable
33//!               │                           │
34//!               ▼                           ▼
35//!   ┌──────────────────────┐    ┌──────────────────────┐
36//!   │  separable_convolve  │    │      convolve        │
37//!   │  (two 1D passes)     │    │    (one 2D pass)      │
38//!   │                      │    │                      │
39//!   │  Input ────┬──────── │    │  Input image          │
40//!   │            │horizontal│    │  7×7 kernel = 49 ops │
41//!   │            ▼   pass   │    │  per output pixel     │
42//!   │        Temp buffer    │    │                      │
43//!   │   (width reduced by   │    └──────────────────────┘
44//!   │    filter-1+2·pad)    │
45//!   │            │          │    Legend:
46//!   │            │vertical  │    7×7 separable = 14 ops
47//!   │            ▼  pass    │    per pixel — 3.5× faster
48//!   │        Output image   │
49//!   └──────────────────────┘
50//!
51//!   Both paths are parallelised over output rows via rayon.
52//! ```
53//!
54//! ## Threading model
55//!
56//! Each output row is completely independent — zero data dependencies.
57//! Rayon splits the output buffer into per-row slices and processes
58//! them in parallel across all available cores.
59//!
60//! ```text
61//!   Output buffer (hc rows × wc cols × 4 bytes RGBA)
62//!   ┌──────────────────────────────────────┐
63//!   │ Row 0 ──────▶ Thread 0               │
64//!   │ Row 1 ──────▶ Thread 1               │  Output rows are
65//!   │ Row 2 ──────▶ Thread 2               │  processed in
66//!   │ Row 3 ──────▶ Thread 3               │  parallel — each
67//!   │   ...                                │  thread reads from
68//!   │ Row hc-1 ───▶ Thread N               │  the input image
69//!   └──────────────────────────────────────┘  (immutable, safe)
70//! ```
71//!
72//! ## Output size formula
73//!
74//! ```text
75//! output_width  = (W - Fw + 2·P) / S + 1
76//! output_height = (H - Fh + 2·P) / S + 1
77//! ```
78
79use crate::{Filter, PaddingType};
80use photon_rs::transform::padding_uniform as uniform;
81use photon_rs::PhotonImage;
82use photon_rs::Rgba;
83use rayon::prelude::*;
84
85/// Standard 2D convolution — parallelised over output rows.
86///
87/// ```text
88/// Each output row is independent, so we split the output buffer
89/// into per-row slices and process them in parallel via rayon.
90///
91///   Padded image (width=wp)      Filter (fw×fh)     Output pixel
92///   ┌───────────────────────┐     ┌───┬───┬───┐
93///   │                       │     │   │   │   │
94///   │  (row_base, col_base)─┼──┐  │   │   │   │    r = Σ fy Σ fx
95///   │  │                    │  │  │   │   │   │      raw[px]·kernel[fy][fx]
96///   │  │  fh rows           │  │  ├───┼───┼───┤
97///   │  │                    │  │  │   │   │   │
98///   │  └── fw cols ────────┘  │  │   │   │   │
99///   │                       │  │  └───┴───┴───┘
100///   └───────────────────────┘  │
101///     px = (row_base+fy)*wp + col_base+fx  (×4 for RGBA)
102/// ```
103///
104/// All channels accumulate as `f32` and clamp to `[0, 255]` at the end.
105fn convolve(img_padded: &PhotonImage, filter: &Filter, width_conv: u32, height_conv: u32, stride: u32) -> PhotonImage {
106    let raw = img_padded.get_raw_pixels();
107    let wp = img_padded.get_width() as usize;
108    let fw = filter.width;
109    let fh = filter.height;
110    let kernel = &filter.kernel;
111    let wc = width_conv as usize;
112    let hc = height_conv as usize;
113    let stride = stride as usize;
114
115    let out_size = wc * hc * 4;
116    let mut out = vec![0u8; out_size];
117
118    out.par_chunks_mut(wc * 4).enumerate().for_each(|(yc, row_out)| {
119        let row_base = yc * stride;
120
121        for xc in 0..wc {
122            let col_base = xc * stride;
123
124            let mut r: f32 = 0.0;
125            let mut g: f32 = 0.0;
126            let mut b: f32 = 0.0;
127
128            for fy in 0..fh {
129                let row_offset = (row_base + fy) * wp;
130                let k_row = fy * fw;
131
132                for fx in 0..fw {
133                    let px = (row_offset + col_base + fx) * 4;
134                    let k = kernel[k_row + fx];
135
136                    r += raw[px] as f32 * k;
137                    g += raw[px + 1] as f32 * k;
138                    b += raw[px + 2] as f32 * k;
139                }
140            }
141
142            let i = xc * 4;
143            row_out[i] = r.clamp(0.0, 255.0) as u8;
144            row_out[i + 1] = g.clamp(0.0, 255.0) as u8;
145            row_out[i + 2] = b.clamp(0.0, 255.0) as u8;
146            row_out[i + 3] = 255_u8;
147        }
148    });
149
150    debug_assert_eq!(out.len(), out_size);
151
152    #[cfg(debug_assertions)]
153    println!("Convolution done (rayon)...");
154
155    PhotonImage::new(out, width_conv, height_conv)
156}
157
158/// Separable convolution — each pass is parallelised independently.
159///
160/// ## How it works
161///
162/// A separable kernel factors as `kernel[i][j] = col[i] × row[j]`.
163/// Two 1D passes replace one 2D pass: O(fw+fw) per pixel vs O(fw·fh).
164///
165/// ```text
166/// PASS 1 — Horizontal (parallel over rows)
167/// ─────────────────────────────────────────
168///   Input (padded, hp rows)   temp (hp rows × temp_w cols × 3 floats)
169///   ┌──────────────────┐      ┌──────────────────────┐
170///   │ Row 0 ──▶ T0     │      │ Row 0 convolved       │
171///   │ Row 1 ──▶ T1     │  →   │ Row 1 convolved       │  Each row is
172///   │ Row 2 ──▶ T2     │      │ ...                   │  independent
173///   │ ...               │      └──────────────────────┘
174///   └──────────────────┘
175///
176/// PASS 2 — Vertical (parallel over output rows)
177/// ──────────────────────────────────────────────
178///   temp buffer (read-only)      output (hc × wc)
179///   ┌──────────────────────┐    ┌─────────────┐
180///   │ r₀ g₀ b₀ r₁ g₁ b₁ ...│    │ Row 0 ──▶ T0 │
181///   │ r₀ g₀ b₀ r₁ g₁ b₁ ...│ →  │ Row 1 ──▶ T1 │
182///   │        ...            │    │ ...          │
183///   └──────────────────────┘    └─────────────┘
184/// ```
185fn separable_convolve(
186    img_padded: &PhotonImage,
187    row_vec: &[f32],
188    col_vec: &[f32],
189    width_conv: u32,
190    height_conv: u32,
191    stride: u32,
192) -> PhotonImage {
193    let raw = img_padded.get_raw_pixels();
194    let wp = img_padded.get_width() as usize;
195    let hp = img_padded.get_height() as usize;
196    let fw = row_vec.len();
197    let fh = col_vec.len();
198    let wc = width_conv as usize;
199    let hc = height_conv as usize;
200    let stride = stride as usize;
201
202    let temp_w = wc;
203    let temp_size = hp * temp_w * 3;
204    let mut temp: Vec<f32> = vec![0.0; temp_size];
205
206    // Horizontal pass — each row is independent, process in parallel
207    temp.par_chunks_mut(temp_w * 3).enumerate().for_each(|(y, row_temp)| {
208        let row_input = y * wp;
209        for x in 0..temp_w {
210            let col_input = x * stride;
211            let mut r: f32 = 0.0;
212            let mut g: f32 = 0.0;
213            let mut b: f32 = 0.0;
214            for fx in 0..fw {
215                let px = (row_input + col_input + fx) * 4;
216                let k = row_vec[fx];
217                r += raw[px] as f32 * k;
218                g += raw[px + 1] as f32 * k;
219                b += raw[px + 2] as f32 * k;
220            }
221            let t = x * 3;
222            row_temp[t] = r;
223            row_temp[t + 1] = g;
224            row_temp[t + 2] = b;
225        }
226    });
227
228    // Vertical pass — output rows are independent, process in parallel
229    let out_size = wc * hc * 4;
230    let mut out = vec![0u8; out_size];
231
232    out.par_chunks_mut(wc * 4).enumerate().for_each(|(yc, row_out)| {
233        let row_base = yc * stride;
234        for xc in 0..wc {
235            let mut r: f32 = 0.0;
236            let mut g: f32 = 0.0;
237            let mut b: f32 = 0.0;
238            for fy in 0..fh {
239                let t = ((row_base + fy) * temp_w + xc) * 3;
240                let k = col_vec[fy];
241                r += temp[t] * k;
242                g += temp[t + 1] * k;
243                b += temp[t + 2] * k;
244            }
245            let i = xc * 4;
246            row_out[i] = r.clamp(0.0, 255.0) as u8;
247            row_out[i + 1] = g.clamp(0.0, 255.0) as u8;
248            row_out[i + 2] = b.clamp(0.0, 255.0) as u8;
249            row_out[i + 3] = 255_u8;
250        }
251    });
252
253    debug_assert_eq!(out.len(), out_size);
254
255    #[cfg(debug_assertions)]
256    println!("Separable convolution done (rayon)...");
257
258    PhotonImage::new(out, width_conv, height_conv)
259}
260
261/// Computes the output dimension for one axis.
262///
263/// ```text
264/// output = (input_size - filter_size + 2·padding) / stride + 1
265/// ```
266#[inline]
267fn output_dim(input_size: u32, filter_size: u32, pad: u32, stride: u32) -> u32 {
268    let dim = input_size - filter_size + 2 * pad;
269    if dim % stride != 0 {
270        eprintln!("[WARNING]: stride value not suitable. Convolution may fail.");
271    }
272    dim / stride + 1
273}
274
275/// Applies convolution to an image using the given filter.
276///
277/// Each path (separable 1D and standard 2D) is parallelised across output
278/// rows via rayon — no data dependencies between rows, trivially parallel.
279///
280/// # Speedup Examples
281///
282/// | Kernel  | Size  | Sequential | Rayon (8 cores) |
283/// |---------|-------|------------|-----------------|
284/// | Gauss   | 7×7   | 61 ms      | ~8 ms           |
285/// | Gauss   | 15×15 | 94 ms      | ~12 ms          |
286/// | Laplacian|3×3   | 47 ms      | ~6 ms           |
287///
288/// # Example
289///
290/// ```no_run
291/// use image_conv::conv;
292/// use image_conv::{Filter, PaddingType};
293///
294/// let img = photon_rs::native::open_image("img.jpg").expect("No such file found");
295/// let sobel_x: Vec<f32> = vec![1.0, 0.0, -1.0, 2.0, 0.0, -2.0, 1.0, 0.0, -1.0];
296/// let filter = Filter::from(sobel_x, 3, 3);
297/// let img_conv = conv::convolution(&img, filter, 1, PaddingType::UNIFORM(1));
298///```
299pub fn convolution(img: &PhotonImage, filter: Filter, stride: u32, padding: PaddingType) -> PhotonImage {
300    if stride == 0 {
301        eprintln!("[ERROR]: Stride provided = 0");
302        std::process::exit(1);
303    }
304
305    let separable = filter.try_separable();
306
307    match &padding {
308        PaddingType::UNIFORM(pad_amt) => {
309            let img_padded = uniform(img, *pad_amt, Rgba::new(0, 0, 0, 255));
310            let wc = output_dim(img.get_width(), filter.width as u32, *pad_amt, stride);
311            let hc = output_dim(img.get_height(), filter.height as u32, *pad_amt, stride);
312
313            if let Some((col, row)) = separable {
314                separable_convolve(&img_padded, &row, &col, wc, hc, stride)
315            } else {
316                convolve(&img_padded, &filter, wc, hc, stride)
317            }
318        }
319        PaddingType::NONE => {
320            let wc = output_dim(img.get_width(), filter.width as u32, 0, stride);
321            let hc = output_dim(img.get_height(), filter.height as u32, 0, stride);
322
323            if let Some((col, row)) = separable {
324                separable_convolve(img, &row, &col, wc, hc, stride)
325            } else {
326                convolve(img, &filter, wc, hc, stride)
327            }
328        }
329    }
330}
image_conv/conv.rs

image_conv/
conv.rs