use std::ffi::c_void;
use after_effects as ae;
use crate::types::{Configuration, FrameParams};
pub type CpuDispatchFn = unsafe extern "C" fn(u32, u32, *const *const c_void, *const c_void, *const c_void);
pub type CpuDispatchTileFn = unsafe extern "C" fn(u32, u32, u32, *const *const c_void, *const c_void, *const c_void);
#[derive(Copy, Clone, Debug)]
pub(crate) struct SafeBuffers(pub(crate) [*const c_void; 3]);
unsafe impl Send for SafeBuffers {}
unsafe impl Sync for SafeBuffers {}
pub fn pixel_layout_from_format(in_data: &ae::InData, layer: &ae::Layer) -> u32 {
if in_data.is_premiere() {
if let Ok(fmt) = layer.pr_pixel_format() {
match fmt {
ae::pr::PixelFormat::Vuya4444_8u709
| ae::pr::PixelFormat::Vuya4444_32f709
| ae::pr::PixelFormat::Vuyx4444_8u709
| ae::pr::PixelFormat::Vuyx4444_32f709
| ae::pr::PixelFormat::Vuyp4444_8u709
| ae::pr::PixelFormat::Vuyp4444_32f709 => 3,
ae::pr::PixelFormat::Vuya4444_8u
| ae::pr::PixelFormat::Vuya4444_16u
| ae::pr::PixelFormat::Vuya4444_32f
| ae::pr::PixelFormat::Vuyx4444_8u
| ae::pr::PixelFormat::Vuyx4444_32f
| ae::pr::PixelFormat::Vuyp4444_8u
| ae::pr::PixelFormat::Vuyp4444_32f => 2,
_ => 1,
}
} else {
1 }
} else {
1 }
}
pub fn compute_bpp(in_data: &ae::InData, layer: &ae::Layer) -> Result<u32, ae::Error> {
if in_data.is_premiere() {
let fmt = layer.pr_pixel_format()?;
match fmt {
ae::pr::PixelFormat::Bgra4444_8u
| ae::pr::PixelFormat::Vuya4444_8u
| ae::pr::PixelFormat::Vuya4444_8u709
| ae::pr::PixelFormat::Argb4444_8u
| ae::pr::PixelFormat::Bgrx4444_8u
| ae::pr::PixelFormat::Vuyx4444_8u
| ae::pr::PixelFormat::Vuyx4444_8u709
| ae::pr::PixelFormat::Xrgb4444_8u
| ae::pr::PixelFormat::Bgrp4444_8u
| ae::pr::PixelFormat::Vuyp4444_8u
| ae::pr::PixelFormat::Vuyp4444_8u709
| ae::pr::PixelFormat::Prgb4444_8u => Ok(4),
ae::pr::PixelFormat::Bgra4444_16u
| ae::pr::PixelFormat::Vuya4444_16u
| ae::pr::PixelFormat::Argb4444_16u
| ae::pr::PixelFormat::Bgrx4444_16u
| ae::pr::PixelFormat::Xrgb4444_16u
| ae::pr::PixelFormat::Bgrp4444_16u
| ae::pr::PixelFormat::Prgb4444_16u => Ok(8),
ae::pr::PixelFormat::Bgra4444_32f
| ae::pr::PixelFormat::Vuya4444_32f
| ae::pr::PixelFormat::Vuya4444_32f709
| ae::pr::PixelFormat::Argb4444_32f
| ae::pr::PixelFormat::Bgrx4444_32f
| ae::pr::PixelFormat::Vuyx4444_32f
| ae::pr::PixelFormat::Vuyx4444_32f709
| ae::pr::PixelFormat::Xrgb4444_32f
| ae::pr::PixelFormat::Bgrp4444_32f
| ae::pr::PixelFormat::Vuyp4444_32f
| ae::pr::PixelFormat::Vuyp4444_32f709
| ae::pr::PixelFormat::Prgb4444_32f
| ae::pr::PixelFormat::Bgra4444_32fLinear
| ae::pr::PixelFormat::Bgrp4444_32fLinear
| ae::pr::PixelFormat::Bgrx4444_32fLinear
| ae::pr::PixelFormat::Argb4444_32fLinear
| ae::pr::PixelFormat::Prgb4444_32fLinear
| ae::pr::PixelFormat::Xrgb4444_32fLinear => Ok(16),
_ => Err(ae::Error::InvalidParms),
}
} else {
match layer.world_type() {
ae::aegp::WorldType::U8 => Ok(4),
ae::aegp::WorldType::U15 => Ok(8),
ae::aegp::WorldType::F32 => Ok(16),
_ => Err(ae::Error::Generic),
}
}
}
pub fn render_cpu<P: Copy + Sync>(
kernel_name: &'static str,
in_data: &ae::InData,
in_layer: &ae::Layer,
out_layer: &mut ae::Layer,
config: &Configuration,
dispatch_fn: CpuDispatchFn,
dispatch_tile_fn: CpuDispatchTileFn,
user_params: &P,
) -> Result<(), ae::Error> {
use crate::cpu::diag;
let w = config.width;
let h = config.height;
if w == 0 || h == 0 {
return Ok(());
}
let guard = diag::DispatchGuard::enter();
let wall_start = std::time::Instant::now();
let outgoing_ptr = config.outgoing_data.unwrap_or(std::ptr::null_mut()) as *const c_void;
let incoming_ptr = config.incoming_data.unwrap_or(std::ptr::null_mut()) as *const c_void;
let dest_ptr = config.dest_data as *const c_void;
let buffers = SafeBuffers([outgoing_ptr, incoming_ptr, dest_ptr]);
let time = if in_data.time_scale() != 0 {
in_data.current_time() as f32 / in_data.time_scale() as f32
} else {
0.0
};
let tp = FrameParams {
out_desc: crate::types::make_outgoing_desc(config),
in_desc: crate::types::make_texture_desc(config.incoming_width, config.incoming_height, config.incoming_pitch_px as u32, config.bytes_per_pixel, config.pixel_layout),
dst_desc: crate::types::make_texture_desc(w, h, config.dest_pitch_px as u32, config.bytes_per_pixel, config.pixel_layout),
width: w,
height: h,
time,
progress: config.progress,
};
let can_iterate_with = !in_data.is_premiere() && w == out_layer.width() as u32 && h == out_layer.height() as u32;
let setup_ns = wall_start.elapsed().as_nanos() as u64;
let body_start = std::time::Instant::now();
let (path, chunk_rows, result) = if can_iterate_with {
(
diag::DispatchPath::AeIterate,
1u32,
ae_dispatch(in_layer, out_layer, buffers, tp, user_params, dispatch_fn),
)
} else {
let out_stride_bytes = tp.dst_desc.pitch_bytes as usize;
let out_buf_size = (h as usize) * out_stride_bytes;
let out_buf = if out_buf_size > 0 && !dest_ptr.is_null() {
unsafe { std::slice::from_raw_parts_mut(dest_ptr as *mut u8, out_buf_size) }
} else {
&mut []
};
let rows = rayon_dispatch_tile(w, buffers, tp, user_params, dispatch_tile_fn, out_buf, out_stride_bytes);
(diag::DispatchPath::Rayon, rows, Ok(()))
};
let body_ns = body_start.elapsed().as_nanos() as u64;
crate::timing::record(kernel_name, crate::types::Backend::Cpu, setup_ns + body_ns);
diag::log_dispatch(kernel_name, path, w, h, chunk_rows, setup_ns, body_ns, guard.concurrent_at_entry());
drop(guard);
result
}
fn ae_dispatch<P: Copy + Sync>(
in_layer: &ae::Layer,
out_layer: &mut ae::Layer,
buffers: SafeBuffers,
tp: FrameParams,
user_params: &P,
dispatch_fn: CpuDispatchFn,
) -> Result<(), ae::Error> {
let first_call = std::cell::Cell::new(true);
in_layer.iterate_with(
out_layer,
0,
tp.height as i32,
None,
move |x: i32, y: i32, _pixel: ae::GenericPixel, _out_pixel: ae::GenericPixelMut| {
if first_call.get() {
first_call.set(false);
}
unsafe {
dispatch_fn(
x as u32,
y as u32,
buffers.0.as_ptr(),
&tp as *const _ as *const c_void,
user_params as *const _ as *const c_void,
);
}
Ok(())
},
)
}
#[inline]
fn compute_rows_per_task(height: u32) -> u32 {
let threads = crate::cpu::pool::worker_count().max(1) as u32;
let target_tasks = threads.saturating_mul(4).max(1);
((height + target_tasks - 1) / target_tasks).max(1)
}
pub(crate) fn rayon_dispatch_tile<P: Copy + Sync>(
width: u32,
buffers: SafeBuffers,
tp: FrameParams,
user_params: &P,
dispatch_tile_fn: CpuDispatchTileFn,
out_buf: &mut [u8],
out_stride_bytes: usize,
) -> u32 {
use rayon::prelude::*;
let buf_ptr = buffers.0.as_ptr() as usize;
let tp_ptr = &tp as *const _ as usize;
let up_ptr = user_params as *const _ as usize;
let height = tp.height as usize;
let rows_per_task = compute_rows_per_task(tp.height) as usize;
let chunk_bytes = rows_per_task * out_stride_bytes;
crate::cpu::pool::ensure_initialized();
out_buf.par_chunks_mut(chunk_bytes).enumerate().for_each(move |(chunk_idx, _chunk_bytes)| {
let y0 = (chunk_idx * rows_per_task) as u32;
let y1 = ((chunk_idx * rows_per_task + rows_per_task).min(height)) as u32;
unsafe {
dispatch_tile_fn(
y0,
y1,
width,
buf_ptr as *const *const c_void,
tp_ptr as *const c_void,
up_ptr as *const c_void,
);
}
});
rows_per_task as u32
}
pub unsafe fn render_cpu_direct<P: Copy + Sync>(
kernel_name: &'static str,
config: &Configuration,
dispatch_tile_fn: CpuDispatchTileFn,
user_params: &P,
) {
use crate::cpu::diag;
let w = config.width;
let h = config.height;
if w == 0 || h == 0 {
return;
}
let guard = diag::DispatchGuard::enter();
let wall_start = std::time::Instant::now();
let outgoing_ptr = config.outgoing_data.unwrap_or(std::ptr::null_mut()) as *const c_void;
let incoming_ptr = config.incoming_data.unwrap_or(std::ptr::null_mut()) as *const c_void;
let dest_ptr = config.dest_data as *const c_void;
let buffers = SafeBuffers([outgoing_ptr, incoming_ptr, dest_ptr]);
let tp = FrameParams {
out_desc: crate::types::make_outgoing_desc(config),
in_desc: crate::types::make_texture_desc(config.incoming_width, config.incoming_height, config.incoming_pitch_px as u32, config.bytes_per_pixel, config.pixel_layout),
dst_desc: crate::types::make_texture_desc(w, h, config.dest_pitch_px as u32, config.bytes_per_pixel, config.pixel_layout),
width: w,
height: h,
time: config.time,
progress: config.progress,
};
let out_stride_bytes = tp.dst_desc.pitch_bytes as usize;
let out_buf_size = (h as usize) * out_stride_bytes;
let setup_ns = wall_start.elapsed().as_nanos() as u64;
let body_start = std::time::Instant::now();
let mut chunk_rows = 1u32;
if out_buf_size > 0 && !dest_ptr.is_null() {
let out_buf = unsafe { std::slice::from_raw_parts_mut(dest_ptr as *mut u8, out_buf_size) };
chunk_rows = rayon_dispatch_tile(w, buffers, tp, user_params, dispatch_tile_fn, out_buf, out_stride_bytes);
}
let body_ns = body_start.elapsed().as_nanos() as u64;
crate::timing::record(kernel_name, crate::types::Backend::Cpu, setup_ns + body_ns);
diag::log_dispatch(kernel_name, diag::DispatchPath::Direct, w, h, chunk_rows, setup_ns, body_ns, guard.concurrent_at_entry());
drop(guard);
}