pub mod scissor;
pub mod sdf;
pub use scissor::{clip_poly_to_rect, ink_outside_rect};
use vello_cpu::color::PremulRgba8;
use vello_cpu::Pixmap;
use super::camera::Camera;
use super::prim::{LineInstance, QuadInstance};
use super::{Backend, Canvas, Frame, Renderer};
pub struct CpuCanvas {
pixmap: Pixmap,
width: u32,
height: u32,
camera: Camera,
background: PremulRgba8,
quads: Vec<QuadInstance>,
lines: Vec<LineInstance>,
}
impl CpuCanvas {
pub fn new(width: u32, height: u32, camera: Camera, background: [u8; 4]) -> Self {
let pixmap = Pixmap::new(width as u16, height as u16);
Self {
pixmap,
width,
height,
camera,
background: premul(background),
quads: Vec::new(),
lines: Vec::new(),
}
}
pub fn pixmap(&self) -> &Pixmap {
&self.pixmap
}
pub fn rasterize(&mut self) -> Frame {
self.rasterize_with_workers(0)
}
pub fn rasterize_with_workers(&mut self, workers: usize) -> Frame {
let lines = std::mem::take(&mut self.lines);
let quads = std::mem::take(&mut self.quads);
self.raster_batches(&lines, &quads, workers);
self.frame_with_workers(workers)
}
pub fn raster_only(&mut self, workers: usize) {
let lines = std::mem::take(&mut self.lines);
let quads = std::mem::take(&mut self.quads);
self.raster_batches(&lines, &quads, workers);
}
const PARALLEL_PX_THRESHOLD: usize = 64 * 1024;
#[inline]
fn parallel_frame(w: u32, h: u32) -> bool {
(w as usize * h as usize) >= Self::PARALLEL_PX_THRESHOLD
}
fn raster_batches(&mut self, lines: &[LineInstance], quads: &[QuadInstance], workers: usize) {
let w = self.width;
let h = self.height;
if w == 0 || h == 0 {
return;
}
let bg = self.background;
let buckets = YBuckets::build(lines, quads, h);
let min_rows = if Self::parallel_frame(w, h) { 1 } else { usize::MAX };
let data = self.pixmap.data_mut();
znippy_zoomies::gatling_forkjoin::gatling_scanlines(
data,
h as usize,
w as usize,
workers, min_rows,
|y, row| {
for px in row.iter_mut() {
*px = bg;
}
let (li, qi) = buckets.row(y);
raster_row(row, y as u32, w, lines, quads, li, qi);
},
);
}
pub fn frame(&self) -> Frame {
self.frame_with_workers(0)
}
pub fn frame_with_workers(&self, workers: usize) -> Frame {
let w = self.width as usize;
let h = self.height as usize;
let len = w * h * 4;
if w == 0 || h == 0 {
return Frame { width: self.width, height: self.height, rgba: Vec::new() };
}
let mut rgba: Vec<u8> = Vec::with_capacity(len);
let src = self.pixmap.data();
let min_rows = if Self::parallel_frame(self.width, self.height) { 1 } else { usize::MAX };
{
let spare = rgba.spare_capacity_mut();
let buf = unsafe {
std::slice::from_raw_parts_mut(spare.as_mut_ptr() as *mut u8, len)
};
znippy_zoomies::gatling_forkjoin::gatling_scanlines(
buf,
h,
w * 4,
workers,
min_rows,
|y, out_row| {
let src_row = &src[y * w..(y + 1) * w];
for (p, o) in src_row.iter().zip(out_row.chunks_exact_mut(4)) {
unpremul_into(p, o);
}
},
);
}
unsafe { rgba.set_len(len) };
Frame { width: self.width, height: self.height, rgba }
}
}
#[inline]
fn unpremul_into(p: &PremulRgba8, o: &mut [u8]) {
let a = p.a;
if a == 0 {
o[0] = 0;
o[1] = 0;
o[2] = 0;
o[3] = 0;
} else {
let un = |c: u8| ((c as u32 * 255 + (a as u32) / 2) / a as u32).min(255) as u8;
o[0] = un(p.r);
o[1] = un(p.g);
o[2] = un(p.b);
o[3] = a;
}
}
struct YBuckets {
line_idx: Vec<u32>,
line_off: Vec<u32>,
quad_idx: Vec<u32>,
quad_off: Vec<u32>,
}
impl YBuckets {
#[inline]
fn span(mny: f32, mxy: f32, h: u32) -> (u32, u32) {
let y0 = ((mny - 0.5).ceil().max(0.0) as u32).min(h);
let y1 = (((mxy - 0.5).floor() + 1.0).max(0.0) as u32).min(h); (y0, y1.max(y0))
}
fn build(lines: &[LineInstance], quads: &[QuadInstance], h: u32) -> Self {
let hu = h as usize;
let mut line_off = vec![0u32; hu + 1];
let mut quad_off = vec![0u32; hu + 1];
for l in lines {
let (_, mny, _, mxy) = l.bounds();
let (y0, y1) = Self::span(mny, mxy, h);
for y in y0..y1 {
line_off[y as usize + 1] += 1;
}
}
for q in quads {
let cy = q.center[1];
let he = q.half_extent();
let (y0, y1) = Self::span(cy - he, cy + he, h);
for y in y0..y1 {
quad_off[y as usize + 1] += 1;
}
}
for y in 0..hu {
line_off[y + 1] += line_off[y];
quad_off[y + 1] += quad_off[y];
}
let mut line_idx = vec![0u32; line_off[hu] as usize];
let mut quad_idx = vec![0u32; quad_off[hu] as usize];
let mut cursor = line_off.clone();
for (i, l) in lines.iter().enumerate() {
let (_, mny, _, mxy) = l.bounds();
let (y0, y1) = Self::span(mny, mxy, h);
for y in y0..y1 {
let slot = &mut cursor[y as usize];
line_idx[*slot as usize] = i as u32;
*slot += 1;
}
}
let mut cursor = quad_off.clone();
for (i, q) in quads.iter().enumerate() {
let cy = q.center[1];
let he = q.half_extent();
let (y0, y1) = Self::span(cy - he, cy + he, h);
for y in y0..y1 {
let slot = &mut cursor[y as usize];
quad_idx[*slot as usize] = i as u32;
*slot += 1;
}
}
Self { line_idx, line_off, quad_idx, quad_off }
}
#[inline]
fn row(&self, y: usize) -> (&[u32], &[u32]) {
let l0 = self.line_off[y] as usize;
let l1 = self.line_off[y + 1] as usize;
let q0 = self.quad_off[y] as usize;
let q1 = self.quad_off[y + 1] as usize;
(&self.line_idx[l0..l1], &self.quad_idx[q0..q1])
}
}
fn raster_row(
row: &mut [PremulRgba8],
y: u32,
w: u32,
lines: &[LineInstance],
quads: &[QuadInstance],
line_idx: &[u32],
quad_idx: &[u32],
) {
let py = y as f32 + 0.5;
for &i in line_idx {
let l = &lines[i as usize];
let (mnx, _, mxx, _) = l.bounds();
let x0 = (mnx.floor()).max(0.0) as u32;
let x1 = (mxx.ceil()).min(w as f32) as u32;
for x in x0..x1 {
let cov = sdf::line_coverage(l, [x as f32 + 0.5, py]);
if cov > 0.0 {
blend_px(&mut row[x as usize], l.color, cov);
}
}
}
for &i in quad_idx {
let q = &quads[i as usize];
let he = q.half_extent();
let (cx, cy) = (q.center[0], q.center[1]);
let x0 = ((cx - he).floor()).max(0.0) as u32;
let x1 = ((cx + he).ceil()).min(w as f32) as u32;
for x in x0..x1 {
let dx = x as f32 + 0.5 - cx;
let dy = py - cy;
let cov = sdf::quad_coverage(q, dx, dy);
if cov > 0.0 {
blend_px(&mut row[x as usize], q.color, cov);
}
}
}
}
#[inline]
fn blend_px(dst: &mut PremulRgba8, color: [f32; 4], coverage: f32) {
let sa = (color[3] * coverage).clamp(0.0, 1.0);
let sr = color[0] * sa;
let sg = color[1] * sa;
let sb = color[2] * sa;
let da = dst.a as f32 / 255.0;
let dr = dst.r as f32 / 255.0;
let dg = dst.g as f32 / 255.0;
let db = dst.b as f32 / 255.0;
let inv = 1.0 - sa;
*dst = PremulRgba8 {
r: ((sr + dr * inv) * 255.0).round().clamp(0.0, 255.0) as u8,
g: ((sg + dg * inv) * 255.0).round().clamp(0.0, 255.0) as u8,
b: ((sb + db * inv) * 255.0).round().clamp(0.0, 255.0) as u8,
a: ((sa + da * inv) * 255.0).round().clamp(0.0, 255.0) as u8,
};
}
fn premul(c: [u8; 4]) -> PremulRgba8 {
let a = c[3] as u32;
let m = |v: u8| ((v as u32 * a + 127) / 255) as u8;
PremulRgba8 { r: m(c[0]), g: m(c[1]), b: m(c[2]), a: c[3] }
}
impl Canvas for CpuCanvas {
fn push_quads(&mut self, quads: &[QuadInstance]) {
self.quads.extend_from_slice(quads);
}
fn push_lines(&mut self, lines: &[LineInstance]) {
self.lines.extend_from_slice(lines);
}
fn camera(&self) -> &Camera {
&self.camera
}
}
pub struct CpuRenderer {
background: [u8; 4],
canvas: Option<CpuCanvas>,
}
impl CpuRenderer {
pub fn new(background: [u8; 4]) -> Self {
Self { background, canvas: None }
}
}
impl Default for CpuRenderer {
fn default() -> Self {
Self::new([12, 14, 20, 255])
}
}
impl Renderer for CpuRenderer {
fn begin(&mut self, width: u32, height: u32, camera: Camera) -> &mut dyn Canvas {
self.canvas = Some(CpuCanvas::new(width, height, camera, self.background));
self.canvas.as_mut().unwrap()
}
fn present(&mut self) -> Frame {
self.canvas.take().map(|mut c| c.rasterize()).unwrap_or(Frame { width: 0, height: 0, rgba: Vec::new() })
}
fn backend(&self) -> Backend {
Backend::CpuVello
}
}
#[cfg(test)]
impl CpuCanvas {
fn rasterize_sequential(&mut self) -> Frame {
let lines = std::mem::take(&mut self.lines);
let quads = std::mem::take(&mut self.quads);
let w = self.width;
let h = self.height;
let bg = self.background;
let buckets = YBuckets::build(&lines, &quads, h);
let data = self.pixmap.data_mut();
for y in 0..h {
let start = (y * w) as usize;
let row = &mut data[start..start + w as usize];
for px in row.iter_mut() {
*px = bg;
}
let (li, qi) = buckets.row(y as usize);
raster_row(row, y, w, &lines, &quads, li, qi);
}
self.frame()
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::render::prim::{shape, CircleInstance, LineInstance, MarkerInstance, RingInstance};
#[test]
fn parallel_raster_matches_sequential() {
let (w, h) = (512u32, 512u32);
let mk = || {
let mut c = CpuCanvas::new(w, h, Camera::default(), [9, 11, 16, 255]);
let mut quads = Vec::new();
let mut lines = Vec::new();
for i in 0..400u32 {
let x = (i * 37 % 500 + 6) as f32;
let y = (i * 53 % 500 + 6) as f32;
let col = [(i % 7) as f32 / 7.0, (i % 5) as f32 / 5.0, (i % 3) as f32 / 3.0, 0.7];
if i % 3 == 0 {
quads.push(CircleInstance { center: [x, y], radius: 14.0, color: col, aa: 1.5 }.lower());
} else if i % 3 == 1 {
quads.push(RingInstance { center: [x, y], radius: 16.0, inner: 8.0, color: col, aa: 1.5 }.lower());
} else {
quads.push(MarkerInstance { center: [x, y], radius: 12.0, corner: 2.0, color: col, aa: 1.0, shape: shape::DIAMOND }.lower());
}
lines.push(LineInstance::round([x, y], [x + 40.0, y + 25.0], 3.0, 1.5, [col[0], col[1], col[2], 0.6]));
}
c.push_lines(&lines);
c.push_quads(&quads);
c
};
let parallel = mk().rasterize();
let sequential = mk().rasterize_sequential();
assert_eq!(parallel.width, sequential.width);
assert_eq!(parallel.rgba.len(), sequential.rgba.len());
assert_eq!(parallel.rgba, sequential.rgba, "GATLING parallel raster is bit-identical to sequential");
assert!(parallel.lit_px() > 50_000, "real content rastered, got {}", parallel.lit_px());
}
#[test]
fn ybucket_raster_matches_brute_force_all_primitives() {
let (w, h) = (300u32, 200u32);
let mut quads = Vec::new();
let mut lines = Vec::new();
for i in 0..120u32 {
let x = (i * 41 % 290 + 4) as f32;
let y = (i as f32 * 7.3) - 30.0;
let col = [(i % 7) as f32 / 7.0, (i % 4) as f32 / 4.0, (i % 3) as f32 / 3.0, 0.65];
quads.push(CircleInstance { center: [x, y], radius: 12.0, color: col, aa: 1.5 }.lower());
lines.push(LineInstance::round([x, y], [x + 30.0, y + 50.0], 3.0, 1.5, [col[0], col[1], col[2], 0.5]));
}
let mut c = CpuCanvas::new(w, h, Camera::default(), [10, 12, 18, 255]);
c.push_lines(&lines);
c.push_quads(&quads);
let bucketed = c.rasterize();
let bg = premul([10, 12, 18, 255]);
let mut pm = Pixmap::new(w as u16, h as u16);
let all_l: Vec<u32> = (0..lines.len() as u32).collect();
let all_q: Vec<u32> = (0..quads.len() as u32).collect();
let data = pm.data_mut();
for y in 0..h {
let row = &mut data[(y * w) as usize..((y + 1) * w) as usize];
for px in row.iter_mut() {
*px = bg;
}
raster_row(row, y, w, &lines, &quads, &all_l, &all_q);
}
let brute: Vec<u8> = data
.iter()
.flat_map(|p| {
let mut o = [0u8; 4];
unpremul_into(p, &mut o);
o
})
.collect();
assert_eq!(bucketed.rgba.len(), brute.len());
assert_eq!(bucketed.rgba, brute, "y-bucketed raster == brute-force-all-primitives");
assert!(bucketed.lit_px() > 1_000, "real content drawn, got {}", bucketed.lit_px());
}
#[test]
fn cpu_canvas_lights_pixels_inside_a_circle() {
let cam = Camera::default();
let mut canvas = CpuCanvas::new(64, 64, cam, [0, 0, 0, 255]);
let c = CircleInstance { center: [32.0, 32.0], radius: 10.0, color: [1.0, 0.0, 0.0, 1.0], aa: 1.0 };
canvas.push_quads(&[c.lower()]);
let frame = canvas.rasterize();
assert_eq!(frame.rgba.len(), 64 * 64 * 4);
let i = ((32 * 64 + 32) * 4) as usize;
assert!(frame.rgba[i] > 200 && frame.rgba[i + 1] < 50, "centre is red");
let c0 = 0;
assert!(frame.rgba[c0] < 10, "corner stays background");
}
#[test]
fn cpu_renderer_round_trips_through_the_seam() {
let mut r = CpuRenderer::new([0, 0, 0, 255]);
let canvas = r.begin(48, 48, Camera::default());
let ring = RingInstance { center: [24.0, 24.0], radius: 12.0, inner: 6.0, color: [0.0, 1.0, 0.0, 1.0], aa: 1.0 };
canvas.push_quads(&[ring.lower()]);
assert_eq!(r.backend(), Backend::CpuVello);
let frame = r.present();
let band = (((24) * 48 + (24 + 9)) * 4) as usize;
assert!(frame.rgba[band + 1] > 200, "ring band green");
let hole = ((24 * 48 + 24) * 4) as usize;
assert!(frame.rgba[hole + 1] < 50, "ring hole is background");
}
}