forma-render 0.1.3

// Copyright 2022 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

let WORKGROUP_SIZE = 256u;
let MAX_INVOCATION_COUNT = 16u;

let TILE_WIDTH: i32 = 16;
let TILE_WIDTH_SHIFT: u32 = 4u;
let TILE_HEIGHT: i32 = 4;
let TILE_HEIGHT_SHIFT: u32 = 2u;

let PIXEL_WIDTH: i32 = 16;
let PIXEL_SHIFT: u32 = 4u;

let MAX_WIDTH_SHIFT = 16u;
let MAX_HEIGHT_SHIFT = 15u;

let TILE_BIAS = 1;
let LAYER_ID_BIT_SIZE = 21u;
let DOUBLE_AREA_MULTIPLIER_BIT_SIZE = 6u;
let COVER_BIT_SIZE = 6u;

let NONE: u32 = 0xffffffffu;

// black-box methods that ensure precise results in the presence of ffast-math.

fn bbAdd(x: f32, y: f32) -> f32 {
    return select(x, x + y, y != 0.0);
}

fn bbSub(x: f32, y: f32) -> f32 {
    return select(x, x - y, y != 0.0);
}

fn bbMul(x: f32, y: f32) -> f32 {
    return select(x, x * y, y != 1.0);
}

fn bbDiv(x: f32, y: f32) -> f32 {
    return select(x, x / y, y != 1.0);
}

fn twoSum(x: f32, y: f32) -> vec2<f32> {
    let r = bbAdd(x, y);
    let t = bbSub(r, x);
    let e = bbAdd(bbSub(x, bbSub(r, t)), bbSub(y, t));
    
    return vec2(r, e);
}

fn twoSumQuick(x: f32, y: f32) -> vec2<f32> {
    let r = bbAdd(x, y);
    let e = bbSub(y, bbSub(r, x));
    
    return vec2(r, e);
}

fn twoDifference(x: f32, y: f32) -> vec2<f32> {
    let r = bbSub(x, y);
    let t = bbSub(r, x);
    let e = bbSub(bbSub(x,  bbSub(r, t)), bbAdd(y, t));

    return vec2(r, e);
}

fn twoProduct(x: f32, y: f32) -> vec2<f32> {
    let r = bbMul(x, y);
    let e = fma(x, y, -r);
    
    return vec2(r, e);
}

// A "float-float" adaptiation of the double-double arithmetic. Adapted from
// https://github.com/sukop/doubledouble.
struct ff64 {
    hi: f32,
    lo: f32,
}

fn ff64F32(val: f32) -> ff64 {
    return ff64(val, 0.0);
}

fn add(x: ff64, y: ff64) -> ff64 {
    var r = twoSum(x.hi, y.hi);
    r.y = bbAdd(r.y, bbAdd(x.lo, y.lo));
    r = twoSumQuick(r.x, r.y);
    
    return ff64(r.x, r.y);
}

fn sub(x: ff64, y: ff64) -> ff64 {
    var r = twoDifference(x.hi, y.hi);
    r.y = bbAdd(r.y, bbSub(x.lo, y.lo));
    r = twoSumQuick(r.x, r.y);
    
    return ff64(r.x, r.y);
}

fn mul(x: ff64, y: ff64) -> ff64 {
    var r = twoProduct(x.hi, y.hi);
    r.y = bbAdd(r.y, bbAdd(bbMul(x.hi, y.lo), bbMul(x.lo, y.hi)));
    r = twoSumQuick(r.x, r.y);
    
    return ff64(r.x, r.y);
}

fn div(x: ff64, y: ff64) -> ff64 {
    let r = bbDiv(x.hi, y.hi);
    let s = twoProduct(r, y.hi);
    let e = bbDiv(
        bbSub(bbAdd(bbSub(bbSub(x.hi, s.x), s.y), x.lo), bbMul(r, y.lo)),
        y.hi,
    );
    let v = twoSumQuick(r, e);
    
    return ff64(v.x, v.y);
}

fn ff64Ceil(val: ff64) -> f32 {
    let ceilHi = ceil(val.hi);
    let ceilLo = ceil(val.lo);

    return select(
        ceilHi + ceilLo,
        ceilHi,
        ceilHi > val.hi,
    );
}

struct Config {
    lines_len: u32,
    segments_len: u32,
}

struct PixelSegment {
    lo: u32,
    hi: u32,
}

struct Line {
    a: f32,
    b: f32,
    c: f32,
    d: f32,
    x0: f32,
    y0: f32,
    dx: f32,
    dy: f32,
    order: u32,
}

@group(0) @binding(0) var<uniform> config: Config;
@group(0) @binding(1) var<storage> points: array<vec2<f32>>;
@group(0) @binding(2) var<storage> orders: array<u32>;
@group(0) @binding(3) var<storage, write> lines_out: array<Line>;
@group(0) @binding(4) var<storage> lines_in: array<Line>;
@group(0) @binding(5) var<storage> line_lens: array<u32>;
@group(0) @binding(6) var<storage, write> segments: array<PixelSegment>;

fn prepareLine(p0: vec2<f32>, p1: vec2<f32>, order: u32) -> Line {
    if order == NONE {
        return Line(0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0u);
    }

    let dp = p1 - p0;
    let dp_recip = 1.0 / dp;

    let t_offset = select(
        vec2(0.0, 0.0),
        max((ceil(p0) - p0) * dp_recip, -fract(p0) * dp_recip),
        dp != vec2<f32>(0.0, 0.0),
    );

    let a = abs(dp_recip.x);
    let b = abs(dp_recip.y);
    let c = t_offset.x;
    let d = t_offset.y;

    // Converting to sub-pixel by multiplying with `PIXEL_WIDTH`.
    let sp0 = p0 * f32(PIXEL_WIDTH);
    let sdp = dp * f32(PIXEL_WIDTH);

    return Line(a, b, c, d, sp0.x, sp0.y, sdp.x, sdp.y, order);
}

// Binary search.
fn findLineId(segment_index: u32, line_len: u32) -> u32 {
    // Let line_id be min(i) ∀ i ∈ [0; segments_len) where
    // lines_orders[i] > segment_index. Binary search in range [lo; hi].
    var lo: u32 = 0u;
    var hi: u32 = line_len;

    for (var i = line_len; i > 0u; i >>= 1u){
        let mid: u32 = lo + ((hi - lo) >> 1u);
        let is_greater = line_lens[mid] > segment_index;

        lo = select(mid + 1u, lo, is_greater);
        hi = select(hi, mid, is_greater);
    }

    return lo;
}

// Returns true when the value equals +Inf or -Inf.
fn isInf(v: f32) -> bool {
    let clear_sign_mask = 0x7fffffffu;
    let infinity = 0x7f800000u;
    return (bitcast<u32>(v) & clear_sign_mask) == infinity;
}

// Rounds to closest integer or towards -Inf if there is a draw.
fn roundToI32(v: f32) -> i32 {
    return i32(floor(v + 0.5));
}

fn packPixelSegment(
    layer_id: u32,
    tile_x: i32,
    tile_y: i32,
    local_x: u32,
    local_y: u32,
    double_area_multiplier: u32,
    cover: i32,
) -> PixelSegment {
    var seg: PixelSegment;

    seg.hi = u32(max(0, tile_y + TILE_BIAS)) <<
        (32u - (MAX_HEIGHT_SHIFT - TILE_HEIGHT_SHIFT));

    seg.hi = insertBits(
        seg.hi,
        u32(max(0, tile_x + TILE_BIAS)),
        32u - (MAX_WIDTH_SHIFT - TILE_WIDTH_SHIFT) -
            (MAX_HEIGHT_SHIFT - TILE_HEIGHT_SHIFT),
        MAX_WIDTH_SHIFT - TILE_WIDTH_SHIFT,
    );

    seg.hi = insertBits(
        seg.hi,
        layer_id >> (32u - TILE_WIDTH_SHIFT - TILE_HEIGHT_SHIFT -
            DOUBLE_AREA_MULTIPLIER_BIT_SIZE - COVER_BIT_SIZE),
        0u,
        32u - (MAX_WIDTH_SHIFT - TILE_WIDTH_SHIFT) -
            (MAX_HEIGHT_SHIFT - TILE_HEIGHT_SHIFT),
    );

    seg.lo = layer_id <<
        (TILE_WIDTH_SHIFT + TILE_HEIGHT_SHIFT +
        DOUBLE_AREA_MULTIPLIER_BIT_SIZE + COVER_BIT_SIZE);

    seg.lo =  insertBits(
        seg.lo,
        local_x,
        TILE_HEIGHT_SHIFT + DOUBLE_AREA_MULTIPLIER_BIT_SIZE + COVER_BIT_SIZE,
        TILE_WIDTH_SHIFT,
    );

    seg.lo = insertBits(
        seg.lo,
        local_y,
        DOUBLE_AREA_MULTIPLIER_BIT_SIZE + COVER_BIT_SIZE,
        TILE_HEIGHT_SHIFT,
    );

    seg.lo = insertBits(
        seg.lo,
        double_area_multiplier,
        COVER_BIT_SIZE,
        DOUBLE_AREA_MULTIPLIER_BIT_SIZE,
    );

    seg.lo = insertBits(
        seg.lo,
        u32(cover),
        0u,
        COVER_BIT_SIZE,
    );

    return seg;
}

// This finds the ith term in the ordered union of two sequences:
//
// 1. a * t + c
// 2. b * t + d
//
// It works by estimating the amount of items that came from sequence 1 and
// sequence 2 such that the next item will be the ith. This results in two
// indices from each sequence. The final step is to simply pick the smaller one
// which naturally comes next.
fn find(i: i32, a_over_a_b: ff64, b_over_a_b: ff64, c_d_over_a_b: ff64, a: f32, b: f32, c: f32, d: f32) -> f32 {
    let i = f32(i);

    // Index estimation requires extra bits of information to work correctly for
    // cases where e.g. a + b would lose information.
    let ja = select(
        ff64Ceil(sub(mul(b_over_a_b, ff64F32(i)), c_d_over_a_b)),
        i,
        isInf(b),
    );
    let jb = select(
        ff64Ceil(add(mul(a_over_a_b, ff64F32(i)), c_d_over_a_b)),
        i,
        isInf(a),
    );

    let guess_a = a * ja + c;
    let guess_b = b * jb + d;

    return min(guess_a, guess_b);
}

fn computePixelSegment(li: u32, pi: u32) -> PixelSegment {
    let line_ = lines_in[li];
    let a = line_.a;
    let b = line_.b;
    let c = line_.c;
    let d = line_.d;

    let i: i32 = i32(pi) - i32(c != 0.0) - i32(d != 0.0);

    let i0 = i;
    let i1 = i + 1;

    let sum_recip = div(ff64F32(1.0), add(ff64F32(a), ff64F32(b)));
    let a_over_a_b = mul(ff64F32(a), sum_recip);
    let b_over_a_b = mul(ff64F32(b), sum_recip);
    let c_d_over_a_b = mul(sub(ff64F32(c), ff64F32(d)), sum_recip);

    let t0 = max(
        find(i0, a_over_a_b, b_over_a_b, c_d_over_a_b, a, b, c, d),
        0.0,
    );
    let t1 = min(
        find(i1, a_over_a_b, b_over_a_b, c_d_over_a_b, a, b, c, d),
        1.0,
    );

    let x0f = t0 * line_.dx + line_.x0;
    let y0f = t0 * line_.dy + line_.y0;
    let x1f = t1 * line_.dx + line_.x0;
    let y1f = t1 * line_.dy + line_.y0;

    let x0_sub: i32 = roundToI32(x0f);
    let x1_sub: i32 = roundToI32(x1f);
    let y0_sub: i32 = roundToI32(y0f);
    let y1_sub: i32 = roundToI32(y1f);

    let border_x: i32 = min(x0_sub, x1_sub) >> PIXEL_SHIFT;
    let border_y: i32 = min(y0_sub, y1_sub) >> PIXEL_SHIFT;

    let tile_x: i32 = (border_x >> TILE_WIDTH_SHIFT);
    let tile_y: i32 = (border_y >> TILE_HEIGHT_SHIFT);
    let local_x: u32 = u32(border_x & (TILE_WIDTH - 1));
    let local_y: u32 = u32(border_y & (TILE_HEIGHT - 1));

    let border = (border_x << PIXEL_SHIFT) + PIXEL_WIDTH;
    let height = y1_sub - y0_sub;

    let double_area_multiplier: u32 =
        u32(abs(x1_sub - x0_sub) + 2 * (border - max(x0_sub, x1_sub)));
    let cover: i32 = height;

    return packPixelSegment(
        line_.order,
        tile_x,
        tile_y,
        local_x,
        local_y,
        double_area_multiplier,
        cover,
    );
}

@compute @workgroup_size(256)
fn prepareLines(
    @builtin(global_invocation_id) global_id_vec: vec3<u32>
) {
    for (
        var line_index = global_id_vec.x;
        line_index < config.lines_len;
        line_index += WORKGROUP_SIZE * MAX_INVOCATION_COUNT
    ) {

        let p0 = points[line_index];
        let p1 = points[line_index + 1u];
        let order = orders[line_index];

        lines_out[line_index] = prepareLine(p0, p1, order);
    }
}

@compute @workgroup_size(256)
fn rasterize(
    @builtin(global_invocation_id) global_id_vec: vec3<u32>
) {
    for (
        var segment_index = global_id_vec.x;
        segment_index < config.segments_len;
        segment_index += WORKGROUP_SIZE * MAX_INVOCATION_COUNT
    ) {

        let li = findLineId(segment_index, config.lines_len);
        let pi = segment_index - select(
            0u,
            line_lens[max(0u, li - 1u)],
            li > 0u,
        );

        segments[segment_index] = computePixelSegment(li, pi);

        // Set segment beyond the last line with padding to u64 maximal value,
        // so that it stays at the end of the buffer after sort, and the painter
        // can ignore them based on the exact segment count.
        if segment_index >= config.segments_len {
            segments[segment_index] = PixelSegment(0xffffffffu, 0xffffffffu);
        }
    }
}