stackblur_iter/
lib.rs

1//! A fast, iterative, correct approach to Stackblur, resulting in a very smooth
2//! and high-quality output, with no edge bleeding.
3//!
4//! This crate implements a tweaked version of the Stackblur algorithm requiring
5//! `radius * 2 + 2` elements of space rather than `radius * 2 + 1`, which is a
6//! small tradeoff for much-increased visual quality.
7//!
8//! The algorithm is exposed as an iterator ([`StackBlur`]) that can wrap any
9//! other iterator that yields elements of [`StackBlurrable`]. The [`StackBlur`]
10//! will then yield elements blurred by the specified radius.
11//!
12//! ## Benefits of this crate
13//!
14//! Stackblur is essentially constant-time. Regardless of the radius, it always
15//! performs only 1 scan over the input iterator and outputs exactly the same
16//! amount of elements.
17//!
18//! Additionally, it produces results that are comparable to slow and expensive
19//! Gaussian blurs. As opposed to box blur which uses a basic rolling average,
20//! Stackblur uses a weighted average where each output pixel is affected more
21//! strongly by the inputs that were closest to it.
22//!
23//! Despite that, Stackblur does not perform much worse compared to naive box
24//! blurs, and is quite cheap compared to full Gaussian blurs, at least for the
25//! CPU. The implementation in this crate will most likely beat most unoptimized
26//! blurs you can find on crates.io, as well as some optimized ones, and it is
27//! extremely flexible and generic.
28//!
29//! For a full explanation of the improvements made to the Stackblur algorithm,
30//! see the [`iter`] module.
31//!
32//! ## Comparison to the `stackblur` crate
33//!
34//! `stackblur` suffers from edge bleeding and flexibility problems. For
35//! example, it can only operate on buffers of 32-bit integers, and expects them
36//! to be packed linear ARGB pixels. Additionally, it cannot operate on a 2D
37//! subslice of a buffer (like `imgref` allows for this crate), and it does not
38//! offer any streaming iterators or documentation. And it also only supports
39//! a blur radius of up to 255.
40//!
41//! ## Usage
42//!
43//! Aside from [`StackBlurrable`] and [`StackBlur`] which host their own
44//! documentation, there are helper functions like [`blur`] and [`blur_argb`]
45//! that can be used to interact with 2D image buffers, due to the fact that
46//! doing so manually involves unsafe code (if you want no-copy).
47
48#![cfg_attr(feature = "simd", feature(portable_simd))]
49#![cfg_attr(test, feature(test))]
50
51use std::collections::VecDeque;
52#[cfg(any(doc, feature = "simd"))]
53use std::simd::{LaneCount, SupportedLaneCount};
54
55pub extern crate imgref;
56
57use imgref::ImgRefMut;
58
59#[cfg(test)]
60mod test;
61
62pub mod traits;
63pub mod iter;
64mod color;
65
66use traits::StackBlurrable;
67use iter::StackBlur;
68use color::Argb;
69
70/// Blurs a buffer, assuming one element per pixel.
71///
72/// The provided closures are used to convert from the buffer's native pixel
73/// format to [`StackBlurrable`] values that can be consumed by [`StackBlur`].
74pub fn blur<T, B: StackBlurrable>(
75	buffer: &mut ImgRefMut<T>,
76	radius: usize,
77	mut to_blurrable: impl FnMut(&T) -> B,
78	mut to_pixel: impl FnMut(B) -> T
79) {
80	use imgref_iter::traits::{ImgIter, ImgIterMut, ImgIterPtrMut};
81	use imgref_iter::iter::{IterWindows, IterWindowsPtrMut};
82
83	let mut ops = VecDeque::new();
84
85	// This is needed to avoid Undefined Behavior. Writing to the rows of the
86	// must be done before constructing the columns iterators, because otherwise
87	// the writes would invalidate their borrows. However I don't want to
88	// duplicate this loop, so make it a closure.
89	let mut blur_windows = |writer: IterWindowsPtrMut<T>, reader: IterWindows<T>, mut ops: VecDeque<B>| {
90		for (write, read) in writer.zip(reader) {
91			let mut blur = StackBlur::new(read.map(&mut to_blurrable), radius, ops);
92			write.for_each(|place| unsafe { *place = to_pixel(blur.next().unwrap()) });
93			ops = blur.into_ops();
94		}
95
96		ops
97	};
98
99	let buffer_ptr = buffer.as_mut_ptr();
100	ops = blur_windows(unsafe { buffer_ptr.iter_rows_ptr_mut() }, buffer.iter_rows(), ops);
101	blur_windows(unsafe { buffer_ptr.iter_cols_ptr_mut() }, buffer.iter_cols(), ops);
102}
103
104/// Blurs a buffer in parallel, assuming one element per pixel.
105///
106/// The provided closures are used to convert from the buffer's native pixel
107/// format to [`StackBlurrable`] values that can be consumed by [`StackBlur`].
108#[cfg(any(doc, feature = "rayon"))]
109pub fn par_blur<T: Send + Sync, B: StackBlurrable + Send + Sync>(
110	buffer: &mut ImgRefMut<T>,
111	radius: usize,
112	to_blurrable: impl Fn(&T) -> B + Sync,
113	to_pixel: impl Fn(B) -> T + Sync
114) {
115	use imgref_iter::traits::{ImgIter, ImgIterMut, ImgIterPtrMut};
116	use imgref_iter::iter::{IterWindows, IterWindowsPtrMut};
117	#[cfg(not(doc))]
118	use rayon::iter::{ParallelBridge, ParallelIterator};
119
120	let mut opses = vec![Some(VecDeque::new()); rayon::current_num_threads()];
121	let opses_ptr = unsafe { unique::Unique::new_unchecked(opses.as_mut_ptr()) };
122
123	let par_blur_windows = |writer: IterWindowsPtrMut<T>, reader: IterWindows<T>| {
124		writer.zip(reader).par_bridge().for_each(|(write, read)| {
125			let ops_ref = unsafe { &mut *opses_ptr.as_ptr().add(rayon::current_thread_index().unwrap()) };
126			let ops = ops_ref.take().unwrap();
127			let mut blur = StackBlur::new(read.map(&to_blurrable), radius, ops);
128			write.for_each(|place| unsafe { *place = to_pixel(blur.next().unwrap()) });
129			ops_ref.replace(blur.into_ops());
130		});
131	};
132
133	let buffer_ptr = buffer.as_mut_ptr();
134	par_blur_windows(unsafe { buffer_ptr.iter_rows_ptr_mut() }, buffer.iter_rows());
135	par_blur_windows(unsafe { buffer_ptr.iter_cols_ptr_mut() }, buffer.iter_cols());
136}
137
138/// Blurs a buffer with SIMD, assuming one element per pixel.
139///
140/// The provided closures are used to convert from the buffer's native pixel
141/// format to [`StackBlurrable`] values that can be consumed by [`StackBlur`].
142#[cfg(any(doc, feature = "simd"))]
143pub fn simd_blur<T, Bsimd: StackBlurrable, Bsingle: StackBlurrable, const LANES: usize>(
144	buffer: &mut ImgRefMut<T>,
145	radius: usize,
146	mut to_blurrable_simd: impl FnMut([&T; LANES]) -> Bsimd,
147	mut to_pixel_simd: impl FnMut(Bsimd) -> [T; LANES],
148	mut to_blurrable_single: impl FnMut(&T) -> Bsingle,
149	mut to_pixel_single: impl FnMut(Bsingle) -> T
150) where LaneCount<LANES>: SupportedLaneCount {
151	#[cfg(not(doc))]
152	use imgref_iter::traits::{ImgIterMut, ImgSimdIter, ImgSimdIterPtrMut};
153	#[cfg(not(doc))]
154	use imgref_iter::iter::{SimdIterWindow, SimdIterWindowPtrMut, SimdIterWindows, SimdIterWindowsPtrMut};
155
156	let mut ops_simd = VecDeque::new();
157	let mut ops_single = VecDeque::new();
158
159	let mut simd_blur_windows = |writer: SimdIterWindowsPtrMut<T, LANES>, reader: SimdIterWindows<T, LANES>, mut ops_simd: VecDeque<Bsimd>, mut ops_single: VecDeque<Bsingle>| {
160		for (write, read) in writer.zip(reader) {
161			match (write, read) {
162				(SimdIterWindowPtrMut::Simd(write), SimdIterWindow::Simd(read)) => {
163					let mut blur = StackBlur::new(read.map(&mut to_blurrable_simd), radius, ops_simd);
164					write.for_each(|place| place.into_iter().zip(to_pixel_simd(blur.next().unwrap())).for_each(|(place, pixel)| unsafe { *place = pixel }));
165					ops_simd = blur.into_ops();
166				}
167
168				(SimdIterWindowPtrMut::Single(write), SimdIterWindow::Single(read)) => {
169					let mut blur = StackBlur::new(read.map(&mut to_blurrable_single), radius, ops_single);
170					write.for_each(|place| unsafe { *place = to_pixel_single(blur.next().unwrap()) });
171					ops_single = blur.into_ops();
172				}
173
174				_ => unreachable!()
175			}
176		}
177
178		(ops_simd, ops_single)
179	};
180
181	let buffer_ptr = buffer.as_mut_ptr();
182	(ops_simd, ops_single) = simd_blur_windows(unsafe { buffer_ptr.simd_iter_rows_ptr_mut::<LANES>() }, buffer.simd_iter_rows::<LANES>(), ops_simd, ops_single);
183	simd_blur_windows(unsafe { buffer_ptr.simd_iter_cols_ptr_mut::<LANES>() }, buffer.simd_iter_cols::<LANES>(), ops_simd, ops_single);
184}
185
186/// Blurs a buffer with SIMD in parallel, assuming one element per pixel.
187///
188/// The provided closures are used to convert from the buffer's native pixel
189/// format to [`StackBlurrable`] values that can be consumed by [`StackBlur`].
190#[cfg(any(doc, all(feature = "rayon", feature = "simd")))]
191pub fn par_simd_blur<T: Send + Sync, Bsimd: StackBlurrable + Send + Sync, Bsingle: StackBlurrable + Send + Sync, const LANES: usize>(
192	buffer: &mut ImgRefMut<T>,
193	radius: usize,
194	to_blurrable_simd: impl Fn([&T; LANES]) -> Bsimd + Sync,
195	to_pixel_simd: impl Fn(Bsimd) -> [T; LANES] + Sync,
196	to_blurrable_single: impl Fn(&T) -> Bsingle + Sync,
197	to_pixel_single: impl Fn(Bsingle) -> T + Sync
198) where LaneCount<LANES>: SupportedLaneCount {
199	#[cfg(not(doc))]
200	use imgref_iter::traits::{ImgIterMut, ImgSimdIter, ImgSimdIterPtrMut};
201	#[cfg(not(doc))]
202	use rayon::iter::{ParallelBridge, ParallelIterator};
203	#[cfg(not(doc))]
204	use imgref_iter::iter::{SimdIterWindow, SimdIterWindowPtrMut, SimdIterWindows, SimdIterWindowsPtrMut};
205
206	let mut opses_simd = vec![Some(VecDeque::new()); rayon::current_num_threads()];
207	let opses_simd_ptr = unsafe { unique::Unique::new_unchecked(opses_simd.as_mut_ptr()) };
208
209	let mut opses_single = vec![Some(VecDeque::new()); rayon::current_num_threads()];
210	let opses_single_ptr = unsafe { unique::Unique::new_unchecked(opses_single.as_mut_ptr()) };
211
212	let par_simd_blur_windows = |writer: SimdIterWindowsPtrMut<T, LANES>, reader: SimdIterWindows<T, LANES>| {
213		writer.zip(reader).par_bridge().for_each(|(write, read)| match (write, read) {
214			(SimdIterWindowPtrMut::Simd(write), SimdIterWindow::Simd(read)) => {
215				let ops_ref = unsafe { &mut *opses_simd_ptr.as_ptr().add(rayon::current_thread_index().unwrap()) };
216				let ops = ops_ref.take().unwrap();
217				let mut blur = StackBlur::new(read.map(&to_blurrable_simd), radius, ops);
218				write.for_each(|place| place.into_iter().zip(to_pixel_simd(blur.next().unwrap())).for_each(|(place, pixel)| unsafe { *place = pixel }));
219				ops_ref.replace(blur.into_ops());
220			}
221
222			(SimdIterWindowPtrMut::Single(write), SimdIterWindow::Single(read)) => {
223				let ops_ref = unsafe { &mut *opses_single_ptr.as_ptr().add(rayon::current_thread_index().unwrap()) };
224				let ops = ops_ref.take().unwrap();
225				let mut blur = StackBlur::new(read.map(&to_blurrable_single), radius, ops);
226				write.for_each(|place| unsafe { *place = to_pixel_single(blur.next().unwrap()) });
227				ops_ref.replace(blur.into_ops());
228			}
229
230			_ => unreachable!()
231		});
232	};
233
234	let buffer_ptr = buffer.as_mut_ptr();
235	par_simd_blur_windows(unsafe { buffer_ptr.simd_iter_rows_ptr_mut::<LANES>() }, buffer.simd_iter_rows::<LANES>());
236	par_simd_blur_windows(unsafe { buffer_ptr.simd_iter_cols_ptr_mut::<LANES>() }, buffer.simd_iter_cols::<LANES>());
237}
238
239/// Blurs a buffer of 32-bit packed ARGB pixels (0xAARRGGBB).
240///
241/// This is a version of [`blur`] with pre-filled conversion routines that
242/// provide good results for blur radii <= 4096. Larger radii may overflow.
243///
244/// Note that this function is *linear*. For sRGB, see [`blur_srgb`].
245pub fn blur_argb(buffer: &mut ImgRefMut<u32>, radius: usize) {
246	blur(buffer, radius, |i| Argb::from_u32(*i), Argb::to_u32);
247}
248
249/// Blurs a buffer of 32-bit packed sRGB pixels (0xAARRGGBB).
250///
251/// This is a version of [`blur`] with pre-filled conversion routines that
252/// provide good results for blur radii <= 1536. Larger radii may overflow.
253///
254/// Note that this function uses *sRGB*. For linear, see [`blur_argb`].
255#[cfg(any(doc, feature = "blend-srgb"))]
256pub fn blur_srgb(buffer: &mut ImgRefMut<u32>, radius: usize) {
257	blur(buffer, radius, |i| Argb::from_u32_srgb(*i), Argb::to_u32_srgb);
258}
259
260/// Blurs a buffer of 32-bit packed ARGB pixels (0xAARRGGBB) in parallel.
261///
262/// This is a version of [`par_blur`] with pre-filled conversion routines that
263/// provide good results for blur radii <= 4096. Larger radii may overflow.
264///
265/// Note that this function is *linear*. For sRGB, see [`par_blur_srgb`].
266#[cfg(any(doc, feature = "rayon"))]
267pub fn par_blur_argb(buffer: &mut ImgRefMut<u32>, radius: usize) {
268	par_blur(buffer, radius, |i| Argb::from_u32(*i), Argb::to_u32);
269}
270
271/// Blurs a buffer of 32-bit packed sRGB pixels (0xAARRGGBB) in parallel.
272///
273/// This is a version of [`par_blur`] with pre-filled conversion routines that
274/// provide good results for blur radii <= 1536. Larger radii may overflow.
275///
276/// Note that this function uses *sRGB*. For linear, see [`par_blur_argb`].
277#[cfg(any(doc, all(feature = "rayon", feature = "blend-srgb")))]
278pub fn par_blur_srgb(buffer: &mut ImgRefMut<u32>, radius: usize) {
279	par_blur(buffer, radius, |i| Argb::from_u32_srgb(*i), Argb::to_u32_srgb);
280}
281
282/// Blurs a buffer of 32-bit packed ARGB pixels (0xAARRGGBB) with SIMD.
283///
284/// This is a version of [`simd_blur`] with pre-filled conversion routines that
285/// provide good results for blur radii <= 4096. Larger radii may overflow.
286///
287/// Note that this function is *linear*. For sRGB, see [`simd_blur_srgb`].
288#[cfg(any(doc, feature = "simd"))]
289pub fn simd_blur_argb<const LANES: usize>(buffer: &mut ImgRefMut<u32>, radius: usize) where LaneCount<LANES>: SupportedLaneCount {
290	simd_blur(buffer, radius,
291		|i: [&u32; LANES]| Argb::from_u32xN(i.map(u32::clone)), Argb::to_u32xN,
292		|i| Argb::from_u32(*i), Argb::to_u32
293	);
294}
295
296/// Blurs a buffer of 32-bit packed sRGB pixels (0xAARRGGBB) with SIMD.
297///
298/// This is a version of [`simd_blur`] with pre-filled conversion routines that
299/// provide good results for blur radii <= 1536. Larger radii may overflow.
300///
301/// Note that this function uses *sRGB*. For linear, see [`simd_blur_argb`].
302#[cfg(any(doc, all(feature = "simd", feature = "blend-srgb")))]
303pub fn simd_blur_srgb<const LANES: usize>(buffer: &mut ImgRefMut<u32>, radius: usize) where LaneCount<LANES>: SupportedLaneCount {
304	simd_blur(buffer, radius,
305		|i: [&u32; LANES]| Argb::from_u32xN_srgb(i.map(u32::clone)), Argb::to_u32xN_srgb,
306		|i| Argb::from_u32_srgb(*i), Argb::to_u32_srgb
307	);
308}
309
310/// Blurs a buffer of 32-bit packed ARGB pixels (0xAARRGGBB) with SIMD in
311/// parallel.
312///
313/// This is a version of [`par_simd_blur`] with pre-filled conversion routines
314/// that provide good results for blur radii <= 4096. Larger radii may overflow.
315///
316/// Note that this function is *linear*. For sRGB, see [`par_simd_blur_srgb`].
317#[cfg(any(doc, all(feature = "rayon", feature = "simd")))]
318pub fn par_simd_blur_argb<const LANES: usize>(buffer: &mut ImgRefMut<u32>, radius: usize) where LaneCount<LANES>: SupportedLaneCount {
319	par_simd_blur(buffer, radius,
320		|i: [&u32; LANES]| Argb::from_u32xN(i.map(u32::clone)), Argb::to_u32xN,
321		|i| Argb::from_u32(*i), Argb::to_u32
322	);
323}
324
325/// Blurs a buffer of 32-bit packed sRGB pixels (0xAARRGGBB) with SIMD in
326/// parallel.
327///
328/// This is a version of [`par_simd_blur`] with pre-filled conversion routines
329/// that provide good results for blur radii <= 1536. Larger radii may overflow.
330///
331/// Note that this function uses *sRGB*. For linear, see [`par_simd_blur_argb`].
332#[cfg(any(doc, all(feature = "rayon", feature = "simd", feature = "blend-srgb")))]
333pub fn par_simd_blur_srgb<const LANES: usize>(buffer: &mut ImgRefMut<u32>, radius: usize) where LaneCount<LANES>: SupportedLaneCount {
334	par_simd_blur(buffer, radius,
335		|i: [&u32; LANES]| Argb::from_u32xN_srgb(i.map(u32::clone)), Argb::to_u32xN_srgb,
336		|i| Argb::from_u32_srgb(*i), Argb::to_u32_srgb
337	);
338}