blittle 0.3.0 - Docs.rs

use std::slice::{from_raw_parts, from_raw_parts_mut};

use crate::{ClippedRect, get_index};
use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
pub use rayon::max_num_threads;

/// Blit using multiple threads by dividing `src` and `dst` into chunks and blitting each in parallel.
///
/// This can be either slower or faster than `blit` depending on the size of `src` and the number of threads you want/can use.
/// Adjust `num_threads` accordingly:
///
/// - You don't want this to be more than the max number of threads available.
/// - If you use too many threads for small images, this function can be slower than `blit` due to the overhead of spawning/joining threads.
pub fn blit_multi_threaded(
    src: &[u8],
    dst: &mut [u8],
    rect: &ClippedRect,
    stride: usize,
    num_threads: usize,
) {
    let src_ptr = src.as_ptr();
    let dst_ptr = dst.as_mut_ptr();
    let src_w = rect.src_size_clipped.w * stride;

    // Divide into slices.
    let slices = (0..rect.src_size_clipped.h)
        .map(|src_y| {
            let src_index = get_index(0, src_y, rect.src_size.w, stride);
            let dst_index = get_index(
                rect.dst_position_clipped.x,
                rect.dst_position_clipped.y + src_y,
                rect.dst_size.w,
                stride,
            );
            unsafe {
                (
                    from_raw_parts(src_ptr.add(src_index), src_w),
                    from_raw_parts_mut(dst_ptr.add(dst_index), src_w),
                )
            }
        })
        .collect::<Vec<(&[u8], &mut [u8])>>();

    // Iterate through chunks of slices.
    let chunk_size = src.len() / num_threads;
    slices
        .into_par_iter()
        .chunks(chunk_size)
        .for_each(|slices| {
            slices.into_iter().for_each(|(src, dst)| {
                dst.copy_from_slice(src);
            });
        });
}