#[cfg(not(feature = "std"))]
use crate::kurbo::common::FloatFuncs as _;
use crate::kurbo::{CubicBez, Line, ParamCurve, ParamCurveNearest, PathEl, Point, QuadBez};
use crate::{
flatten::{SQRT_TOL, TOL, TOL_2},
geometry::RectU16,
kurbo::Affine,
tile::Tile,
};
use alloc::vec::Vec;
use bytemuck::{Pod, Zeroable};
use fearless_simd::*;
pub(crate) enum LinePathEl {
MoveTo(Point),
LineTo(Point),
}
pub(crate) trait Callback {
fn callback(&mut self, el: LinePathEl);
}
#[inline(always)]
pub(crate) fn flatten<S: Simd>(
simd: S,
path: impl IntoIterator<Item = PathEl>,
affine: Affine,
callback: &mut impl Callback,
flatten_ctx: &mut FlattenCtx,
cull_bbox: RectU16,
) {
flatten_ctx.flattened_cubics.clear();
let left = cull_bbox.x0 as f64;
let top = ((cull_bbox.y0 / Tile::HEIGHT) * Tile::HEIGHT) as f64;
let right = cull_bbox.x1 as f64;
let bottom = cull_bbox.y1 as f64;
let mut path = path.into_iter();
let Some(first_el) = path.next() else {
return;
};
let first_el = affine * first_el;
let PathEl::MoveTo(start_pt) = first_el else {
debug_assert!(
matches!(first_el, PathEl::MoveTo(_)),
"Non-empty paths must begin with `PathEl::MoveTo`, got {first_el:?}"
);
return;
};
let mut start_pt = start_pt;
let mut last_pt = start_pt;
callback.callback(LinePathEl::MoveTo(start_pt));
for el in path {
match affine * el {
PathEl::MoveTo(p) => {
if last_pt != start_pt {
callback.callback(LinePathEl::LineTo(start_pt));
}
last_pt = p;
start_pt = p;
callback.callback(LinePathEl::MoveTo(p));
}
PathEl::LineTo(p) => {
last_pt = p;
callback.callback(LinePathEl::LineTo(p));
}
PathEl::QuadTo(p1, p2) => {
let p0 = last_pt;
let line = Line::new(p0, p2);
if [p0, p1, p2].into_iter().all(|p| p.x > right)
|| [p0, p1, p2].into_iter().all(|p| p.y < top)
|| [p0, p1, p2].into_iter().all(|p| p.y > bottom)
{
callback.callback(LinePathEl::MoveTo(p2));
}
else if [p0, p1, p2].into_iter().all(|p| p.x < left)
|| line.nearest(p1, 0.).distance_sq <= 4. * TOL_2
{
callback.callback(LinePathEl::LineTo(p2));
} else {
let q = QuadBez::new(p0, p1, p2);
let params = q.estimate_subdiv(SQRT_TOL);
let n = ((0.5 / SQRT_TOL * params.val).ceil() as usize).max(1);
let step = 1.0 / (n as f64);
for i in 1..n {
let u = (i as f64) * step;
let t = q.determine_subdiv_t(¶ms, u);
let p = q.eval(t);
callback.callback(LinePathEl::LineTo(p));
}
callback.callback(LinePathEl::LineTo(p2));
}
last_pt = p2;
}
PathEl::CurveTo(p1, p2, p3) => {
let p0 = last_pt;
let line = Line::new(p0, p3);
if [p0, p1, p2, p3].into_iter().all(|p| p.x > right)
|| [p0, p1, p2, p3].into_iter().all(|p| p.y < top)
|| [p0, p1, p2, p3].into_iter().all(|p| p.y > bottom)
{
callback.callback(LinePathEl::MoveTo(p3));
}
else if [p0, p1, p2, p3].into_iter().all(|p| p.x < left)
|| f64::max(
line.nearest(p1, 0.).distance_sq,
line.nearest(p2, 0.).distance_sq,
) <= 16. / 9. * TOL_2
{
callback.callback(LinePathEl::LineTo(p3));
} else {
let c = CubicBez::new(p0, p1, p2, p3);
let max = flatten_cubic_simd(simd, c, flatten_ctx);
for p in &flatten_ctx.flattened_cubics[1..max] {
callback.callback(LinePathEl::LineTo(Point::new(p.x as f64, p.y as f64)));
}
}
last_pt = p3;
}
PathEl::ClosePath => {
if last_pt != start_pt {
callback.callback(LinePathEl::LineTo(start_pt));
last_pt = start_pt;
}
}
}
}
if last_pt != start_pt {
callback.callback(LinePathEl::LineTo(start_pt));
}
}
fn approx_parabola_integral(x: f64) -> f64 {
const D: f64 = 0.67;
x / (1.0 - D + (D.powi(4) + 0.25 * x * x).sqrt().sqrt())
}
fn approx_parabola_inv_integral(x: f64) -> f64 {
const B: f64 = 0.39;
x * (1.0 - B + (B * B + 0.25 * x * x).sqrt())
}
impl FlattenParamsExt for QuadBez {
#[inline(always)]
fn estimate_subdiv(&self, sqrt_tol: f64) -> FlattenParams {
let d01 = self.p1 - self.p0;
let d12 = self.p2 - self.p1;
let dd = d01 - d12;
let cross = (self.p2 - self.p0).cross(dd);
let x0 = d01.dot(dd) * cross.recip();
let x2 = d12.dot(dd) * cross.recip();
let scale = (cross / (dd.hypot() * (x2 - x0))).abs();
let a0 = approx_parabola_integral(x0);
let a2 = approx_parabola_integral(x2);
let val = if scale.is_finite() {
let da = (a2 - a0).abs();
let sqrt_scale = scale.sqrt();
if x0.signum() == x2.signum() {
da * sqrt_scale
} else {
let xmin = sqrt_tol / sqrt_scale;
sqrt_tol * da / approx_parabola_integral(xmin)
}
} else {
0.0
};
let u0 = approx_parabola_inv_integral(a0);
let u2 = approx_parabola_inv_integral(a2);
let uscale = (u2 - u0).recip();
FlattenParams {
a0,
a2,
u0,
uscale,
val,
}
}
#[inline(always)]
fn determine_subdiv_t(&self, params: &FlattenParams, x: f64) -> f64 {
let a = params.a0 + (params.a2 - params.a0) * x;
let u = approx_parabola_inv_integral(a);
(u - params.u0) * params.uscale
}
}
trait FlattenParamsExt {
fn estimate_subdiv(&self, sqrt_tol: f64) -> FlattenParams;
fn determine_subdiv_t(&self, params: &FlattenParams, x: f64) -> f64;
}
#[derive(Clone, Copy, Debug, Default, Zeroable, Pod)]
#[repr(C)]
struct Point32 {
x: f32,
y: f32,
}
struct FlattenParams {
a0: f64,
a2: f64,
u0: f64,
uscale: f64,
val: f64,
}
const MAX_QUADS: usize = 16;
#[derive(Default, Debug)]
pub struct FlattenCtx {
even_pts: [Point32; MAX_QUADS + 4],
odd_pts: [Point32; MAX_QUADS],
a0: [f32; MAX_QUADS],
da: [f32; MAX_QUADS],
u0: [f32; MAX_QUADS],
uscale: [f32; MAX_QUADS],
val: [f32; MAX_QUADS],
n_quads: usize,
flattened_cubics: Vec<Point32>,
}
#[inline(always)]
fn is_finite_simd<S: Simd>(x: f32x4<S>) -> mask32x4<S> {
let simd = x.simd;
let x_abs = x.abs();
let reinterpreted = u32x4::from_bytes(x_abs.to_bytes());
simd.simd_lt_u32x4(reinterpreted, u32x4::splat(simd, 0x7f80_0000))
}
#[inline(always)]
fn approx_parabola_integral_simd<S: Simd, F: SimdFloat<S, Element = f32>>(x: F) -> F {
let simd = x.witness();
const D: f32 = 0.67;
const D_POWI_4: f32 = 0.201_511_2;
let temp = F::splat(x.witness(), 0.25)
.mul_add(x * x, F::splat(simd, D_POWI_4))
.sqrt()
.sqrt();
let denom = temp + (1. - D);
x / denom
}
#[inline(always)]
fn approx_parabola_inv_integral_simd<S: Simd>(x: f32x8<S>) -> f32x8<S> {
let simd = x.simd;
const B: f32 = 0.39;
const ONE_MINUS_B: f32 = 1.0 - B;
let temp = f32x8::splat(simd, 0.25).mul_add(x * x, B * B).sqrt();
let factor = f32x8::splat(simd, ONE_MINUS_B) + temp;
x * factor
}
#[inline(always)]
fn pt_splat_simd<S: Simd>(simd: S, pt: Point32) -> f32x8<S> {
let p_f64: f64 = bytemuck::cast(pt);
simd.reinterpret_f32_f64x4(f64x4::splat(simd, p_f64))
}
#[inline(always)]
fn eval_cubics_simd<S: Simd>(simd: S, c: &CubicBez, n: usize, result: &mut FlattenCtx) {
result.n_quads = n;
let dt = 0.5 / n as f32;
let p0p1 = f32x4::from_slice(
simd,
&[c.p0.x as f32, c.p0.y as f32, c.p1.x as f32, c.p1.y as f32],
);
let p2p3 = f32x4::from_slice(
simd,
&[c.p2.x as f32, c.p2.y as f32, c.p3.x as f32, c.p3.y as f32],
);
let split_single = |input: f32x4<S>| {
let t1 = simd.reinterpret_f64_f32x4(input);
let p0 = simd.zip_low_f64x2(t1, t1);
let p1 = simd.zip_high_f64x2(t1, t1);
let p0 = simd.reinterpret_f32_f64x2(p0);
let p1 = simd.reinterpret_f32_f64x2(p1);
(f32x8::block_splat(p0), f32x8::block_splat(p1))
};
let (p0_128, p1_128) = split_single(p0p1);
let (p2_128, p3_128) = split_single(p2p3);
let coeff_a = (p1_128 - p2_128).mul_add(3.0, p3_128 - p0_128);
let coeff_b = p1_128.mul_add(-2.0, p0_128 + p2_128) * 3.0;
let coeff_c = (p1_128 - p0_128) * 3.0;
let coeff_d = p0_128;
let iota = f32x8::from_slice(simd, &[0.0, 0.0, 2.0, 2.0, 1.0, 1.0, 3.0, 3.0]);
let step = iota * dt;
let mut t = step;
let t_inc = f32x8::splat(simd, 4.0 * dt);
let even_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut result.even_pts);
let odd_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut result.odd_pts);
for i in 0..n.div_ceil(2) {
let evaluated = (coeff_a.mul_add(t, coeff_b))
.mul_add(t, coeff_c)
.mul_add(t, coeff_d);
let (low, high) = simd.split_f32x8(evaluated);
low.store_slice(&mut even_pts[i * 4..][..4]);
high.store_slice(&mut odd_pts[i * 4..][..4]);
t += t_inc;
}
p3_128.store_slice(&mut even_pts[n * 2..][..8]);
}
#[inline(always)]
fn estimate_subdiv_simd<S: Simd>(simd: S, sqrt_tol: f32, ctx: &mut FlattenCtx) {
let n = ctx.n_quads;
let even_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut ctx.even_pts);
let odd_pts: &mut [f32] = bytemuck::cast_slice_mut(&mut ctx.odd_pts);
for i in 0..n.div_ceil(4) {
let p0 = f32x8::from_slice(simd, &even_pts[i * 8..][..8]);
let p_onehalf = f32x8::from_slice(simd, &odd_pts[i * 8..][..8]);
let p2 = f32x8::from_slice(simd, &even_pts[(i * 8 + 2)..][..8]);
let x = p0 * -0.5;
let x1 = p_onehalf.mul_add(2.0, x);
let p1 = p2.mul_add(-0.5, x1);
p1.store_slice(&mut odd_pts[(i * 8)..][..8]);
let d01 = p1 - p0;
let d12 = p2 - p1;
let d01x = simd.unzip_low_f32x8(d01, d01);
let d01y = simd.unzip_high_f32x8(d01, d01);
let d12x = simd.unzip_low_f32x8(d12, d12);
let d12y = simd.unzip_high_f32x8(d12, d12);
let ddx = d01x - d12x;
let ddy = d01y - d12y;
let d02x = d01x + d12x;
let d02y = d01y + d12y;
let cross = ddx.mul_add(-d02y, d02x * ddy);
let x0_x2_a = {
let (d01x_low, _) = simd.split_f32x8(d01x);
let (d12x_low, _) = simd.split_f32x8(d12x);
simd.combine_f32x4(d12x_low, d01x_low) * ddx
};
let temp1 = {
let (d12y_low, _) = simd.split_f32x8(d12y);
let (d01y_low, _) = simd.split_f32x8(d01y);
simd.combine_f32x4(d12y_low, d01y_low)
};
let x0_x2_num = temp1.mul_add(ddy, x0_x2_a);
let x0_x2 = x0_x2_num / cross;
let (ddx_low, _) = simd.split_f32x8(ddx);
let (ddy_low, _) = simd.split_f32x8(ddy);
let dd_hypot = ddy_low.mul_add(ddy_low, ddx_low * ddx_low).sqrt();
let (x0, x2) = simd.split_f32x8(x0_x2);
let scale_denom = dd_hypot * (x2 - x0);
let (temp2, _) = simd.split_f32x8(cross);
let scale = (temp2 / scale_denom).abs();
let a0_a2 = approx_parabola_integral_simd(x0_x2);
let (a0, a2) = simd.split_f32x8(a0_a2);
let da = a2 - a0;
let da_abs = da.abs();
let sqrt_scale = scale.sqrt();
let temp3 = simd.or_i32x4(x0.bitcast(), x2.bitcast());
let mask = simd.simd_ge_i32x4(temp3, i32x4::splat(simd, 0));
let noncusp = da_abs * sqrt_scale;
let xmin = sqrt_tol / sqrt_scale;
let approxint = approx_parabola_integral_simd(xmin);
let cusp = (sqrt_tol * da_abs) / approxint;
let val_raw = simd.select_f32x4(mask, noncusp, cusp);
let finite_mask = is_finite_simd(val_raw);
let val = simd.select_f32x4(finite_mask, val_raw, f32x4::splat(simd, 0.0));
let u0_u2 = approx_parabola_inv_integral_simd(a0_a2);
let (u0, u2) = simd.split_f32x8(u0_u2);
let uscale_a = u2 - u0;
let uscale = 1.0 / uscale_a;
a0.store_slice(&mut ctx.a0[i * 4..][..4]);
da.store_slice(&mut ctx.da[i * 4..][..4]);
u0.store_slice(&mut ctx.u0[i * 4..][..4]);
uscale.store_slice(&mut ctx.uscale[i * 4..][..4]);
val.store_slice(&mut ctx.val[i * 4..][..4]);
}
}
#[inline(always)]
fn output_lines_simd<S: Simd>(
simd: S,
ctx: &mut FlattenCtx,
i: usize,
x0: f32,
dx: f32,
n: usize,
start_idx: usize,
) {
let p0 = pt_splat_simd(simd, ctx.even_pts[i]);
let p1 = pt_splat_simd(simd, ctx.odd_pts[i]);
let p2 = pt_splat_simd(simd, ctx.even_pts[i + 1]);
const IOTA2: [f32; 8] = [0., 0., 1., 1., 2., 2., 3., 3.];
let iota2 = f32x8::from_slice(simd, IOTA2.as_ref());
let x = iota2.mul_add(dx, f32x8::splat(simd, x0));
let da = f32x8::splat(simd, ctx.da[i]);
let mut a = da.mul_add(x, f32x8::splat(simd, ctx.a0[i]));
let a_inc = 4.0 * dx * da;
let uscale = f32x8::splat(simd, ctx.uscale[i]);
let u0 = f32x8::splat(simd, ctx.u0[i]);
let coeff_a = p1.mul_add(-2.0, p0) + p2;
let coeff_b = (p1 - p0) * 2.0;
let coeff_c = p0;
let out: &mut [f32] = bytemuck::cast_slice_mut(&mut ctx.flattened_cubics[start_idx..]);
for j in 0..n.div_ceil(4) {
let u = approx_parabola_inv_integral_simd(a);
let t = (u - u0) * uscale;
let p = coeff_a.mul_add(t, coeff_b).mul_add(t, coeff_c);
p.store_slice(&mut out[j * 8..][..8]);
a += a_inc;
}
}
#[inline(always)]
fn flatten_cubic_simd<S: Simd>(simd: S, c: CubicBez, ctx: &mut FlattenCtx) -> usize {
let n_quads = estimate_num_quads(c, TOL as f32);
eval_cubics_simd(simd, &c, n_quads, ctx);
let tol = (TOL as f32) * (1.0 - TO_QUAD_TOL);
let sqrt_tol = tol.sqrt();
estimate_subdiv_simd(simd, sqrt_tol, ctx);
let sum: f32 = ctx.val[..n_quads].iter().sum();
let n = ((0.5 * sum / sqrt_tol).ceil() as usize).max(1);
let target_len = n + 4;
if target_len > ctx.flattened_cubics.len() {
ctx.flattened_cubics.resize(target_len, Point32::default());
}
let step = sum / (n as f32);
let step_recip = 1.0 / step;
let mut val_sum = 0.0;
let mut last_n = 0;
let mut x0base = 0.0;
for i in 0..n_quads {
let val = ctx.val[i];
val_sum += val;
let this_n = val_sum * step_recip;
let this_n_next = 1.0 + this_n.floor();
let dn = this_n_next as usize - last_n;
if dn > 0 {
let dx = step / val;
let x0 = x0base * dx;
output_lines_simd(simd, ctx, i, x0, dx, dn, last_n);
}
x0base = this_n_next - this_n;
last_n = this_n_next as usize;
}
ctx.flattened_cubics[n] = ctx.even_pts[n_quads];
n + 1
}
#[inline(always)]
fn estimate_num_quads(c: CubicBez, accuracy: f32) -> usize {
let q_accuracy = (accuracy * TO_QUAD_TOL) as f64;
let max_hypot2 = 432.0 * q_accuracy * q_accuracy;
let p1x2 = c.p1.to_vec2() * 3.0 - c.p0.to_vec2();
let p2x2 = c.p2.to_vec2() * 3.0 - c.p3.to_vec2();
let err = (p2x2 - p1x2).hypot2();
let err_div = err / max_hypot2;
estimate(err_div)
}
const TO_QUAD_TOL: f32 = 0.1;
#[inline(always)]
fn estimate(err_div: f64) -> usize {
const LUT: [f64; MAX_QUADS] = [
1.0, 64.0, 729.0, 4096.0, 15625.0, 46656.0, 117649.0, 262144.0, 531441.0, 1000000.0,
1771561.0, 2985984.0, 4826809.0, 7529536.0, 11390625.0, 16777216.0,
];
#[expect(clippy::needless_range_loop, reason = "better clarity")]
for i in 0..MAX_QUADS {
if err_div <= LUT[i] {
return i + 1;
}
}
MAX_QUADS
}
#[cfg(test)]
mod tests {
use crate::flatten_simd::{MAX_QUADS, estimate};
fn old_estimate(err_div: f64) -> usize {
let n_quads = (err_div.powf(1. / 6.0).ceil() as usize).max(1);
n_quads.min(MAX_QUADS)
}
#[test]
#[ignore]
fn accuracy() {
for i in 0..u32::MAX {
let num = f32::from_bits(i);
if num.is_finite() {
assert_eq!(old_estimate(num as f64), estimate(num as f64), "{num}");
}
}
}
}