facett-core 0.1.10

// **GPU compute particles** (feature `wgpu`) — storage-buffer boids stepped on the
// GPU and drawn as soft additive points into an HDR target. Two stages:
//  - `cs_step`: O(n²) Reynolds flocking (separation / alignment / cohesion), reads
//    `src`, writes `dst` (ping-pong, no read/write hazard). Deterministic given the
//    same input + dt + step count (FC-7).
//  - `pt_vs`/`pt_fs`: instanced unit quad per particle, soft radial falloff,
//    premultiplied so an additive (`src=ONE,dst=ONE`) blend accumulates glow.

struct Particle {
    pos: vec2<f32>,
    vel: vec2<f32>,
};

struct SimU {
    // x = dt, y = count (as f32), z = max_speed, w = sep_radius
    a: vec4<f32>,
    // x = align_radius, y = cohesion_radius, z = sep_w, w = align_w
    b: vec4<f32>,
    // x = cohesion_w, y..w unused
    c: vec4<f32>,
};

@group(0) @binding(0) var<uniform> SIM: SimU;
@group(0) @binding(1) var<storage, read> src: array<Particle>;
@group(0) @binding(2) var<storage, read_write> dst: array<Particle>;

fn wrap01(v: f32) -> f32 {
    // Keep positions inside [0,1) with toroidal wrap.
    return v - floor(v);
}

@compute @workgroup_size(64)
fn cs_step(@builtin(global_invocation_id) gid: vec3<u32>) {
    let i = gid.x;
    let n = u32(SIM.a.y);
    if (i >= n) { return; }

    let dt = SIM.a.x;
    let max_speed = SIM.a.z;
    let sep_r = SIM.a.w;
    let align_r = SIM.b.x;
    let coh_r = SIM.b.y;
    let sep_w = SIM.b.z;
    let align_w = SIM.b.w;
    let coh_w = SIM.c.x;

    let me = src[i];
    var sep = vec2<f32>(0.0, 0.0);
    var align_sum = vec2<f32>(0.0, 0.0);
    var coh_sum = vec2<f32>(0.0, 0.0);
    var align_n = 0.0;
    var coh_n = 0.0;

    for (var j = 0u; j < n; j = j + 1u) {
        if (j == i) { continue; }
        let o = src[j];
        let d = o.pos - me.pos;
        let dist = length(d);
        if (dist > 0.0 && dist < sep_r) {
            sep = sep - d / (dist * dist); // push away, stronger when closer
        }
        if (dist < align_r) {
            align_sum = align_sum + o.vel;
            align_n = align_n + 1.0;
        }
        if (dist < coh_r) {
            coh_sum = coh_sum + o.pos;
            coh_n = coh_n + 1.0;
        }
    }

    var acc = sep * sep_w;
    if (align_n > 0.0) {
        acc = acc + (align_sum / align_n - me.vel) * align_w;
    }
    if (coh_n > 0.0) {
        acc = acc + (coh_sum / coh_n - me.pos) * coh_w;
    }

    var vel = me.vel + acc * dt;
    let sp = length(vel);
    if (sp > max_speed) {
        vel = vel / sp * max_speed;
    }
    var pos = me.pos + vel * dt;
    pos = vec2<f32>(wrap01(pos.x), wrap01(pos.y));

    dst[i].pos = pos;
    dst[i].vel = vel;
}

// ───────────────────────────── render ─────────────────────────────
struct DrawU {
    // x = point_size (px), y = intensity, z = viewport_w, w = viewport_h
    a: vec4<f32>,
    // tint rgb + speed_to_warm (mix factor for fast = warmer)
    tint: vec4<f32>,
};

@group(0) @binding(0) var<uniform> DRAW: DrawU;
@group(0) @binding(1) var<storage, read> points: array<Particle>;

struct VOut {
    @builtin(position) clip: vec4<f32>,
    @location(0) local: vec2<f32>,
    @location(1) color: vec3<f32>,
};

@vertex
fn pt_vs(@builtin(vertex_index) vi: u32, @builtin(instance_index) inst: u32) -> VOut {
    var corners = array<vec2<f32>, 6>(
        vec2<f32>(-1.0, -1.0), vec2<f32>(1.0, -1.0), vec2<f32>(-1.0, 1.0),
        vec2<f32>(-1.0,  1.0), vec2<f32>(1.0, -1.0), vec2<f32>( 1.0, 1.0),
    );
    let p = points[inst];
    let local = corners[vi];
    let vp = DRAW.a.zw;
    // Particle pos is in [0,1]; map to NDC, then offset by the quad in pixels.
    let ndc = vec2<f32>(p.pos.x * 2.0 - 1.0, 1.0 - p.pos.y * 2.0);
    let px_off = local * DRAW.a.x;
    let ndc_off = vec2<f32>(px_off.x / vp.x * 2.0, -px_off.y / vp.y * 2.0);

    var out: VOut;
    out.clip = vec4<f32>(ndc + ndc_off, 0.0, 1.0);
    out.local = local;
    // Colour: cool base → warm with speed (visual energy).
    let speed = length(p.vel);
    let warm = clamp(speed * DRAW.tint.w, 0.0, 1.0);
    let cool = DRAW.tint.rgb;
    let hot = vec3<f32>(1.0, 0.6, 0.25);
    out.color = mix(cool, hot, warm);
    return out;
}

@fragment
fn pt_fs(in: VOut) -> @location(0) vec4<f32> {
    let r = length(in.local);
    // Soft radial falloff (gaussian-ish), zero past the quad edge.
    let a = exp(-r * r * 3.5) * step(r, 1.0);
    let c = in.color * a * DRAW.a.y;
    // Premultiplied so an additive blend just sums the glow.
    return vec4<f32>(c, a);
}