// **GPU compute particles** (feature `wgpu`) — storage-buffer boids stepped on the
// GPU and drawn as soft additive points into an HDR target. Two stages:
// - `cs_step`: O(n²) Reynolds flocking (separation / alignment / cohesion), reads
// `src`, writes `dst` (ping-pong, no read/write hazard). Deterministic given the
// same input + dt + step count (FC-7).
// - `pt_vs`/`pt_fs`: instanced unit quad per particle, soft radial falloff,
// premultiplied so an additive (`src=ONE,dst=ONE`) blend accumulates glow.
struct Particle {
pos: vec2<f32>,
vel: vec2<f32>,
};
struct SimU {
// x = dt, y = count (as f32), z = max_speed, w = sep_radius
a: vec4<f32>,
// x = align_radius, y = cohesion_radius, z = sep_w, w = align_w
b: vec4<f32>,
// x = cohesion_w, y..w unused
c: vec4<f32>,
};
@group(0) @binding(0) var<uniform> SIM: SimU;
@group(0) @binding(1) var<storage, read> src: array<Particle>;
@group(0) @binding(2) var<storage, read_write> dst: array<Particle>;
fn wrap01(v: f32) -> f32 {
// Keep positions inside [0,1) with toroidal wrap.
return v - floor(v);
}
@compute @workgroup_size(64)
fn cs_step(@builtin(global_invocation_id) gid: vec3<u32>) {
let i = gid.x;
let n = u32(SIM.a.y);
if (i >= n) { return; }
let dt = SIM.a.x;
let max_speed = SIM.a.z;
let sep_r = SIM.a.w;
let align_r = SIM.b.x;
let coh_r = SIM.b.y;
let sep_w = SIM.b.z;
let align_w = SIM.b.w;
let coh_w = SIM.c.x;
let me = src[i];
var sep = vec2<f32>(0.0, 0.0);
var align_sum = vec2<f32>(0.0, 0.0);
var coh_sum = vec2<f32>(0.0, 0.0);
var align_n = 0.0;
var coh_n = 0.0;
for (var j = 0u; j < n; j = j + 1u) {
if (j == i) { continue; }
let o = src[j];
let d = o.pos - me.pos;
let dist = length(d);
if (dist > 0.0 && dist < sep_r) {
sep = sep - d / (dist * dist); // push away, stronger when closer
}
if (dist < align_r) {
align_sum = align_sum + o.vel;
align_n = align_n + 1.0;
}
if (dist < coh_r) {
coh_sum = coh_sum + o.pos;
coh_n = coh_n + 1.0;
}
}
var acc = sep * sep_w;
if (align_n > 0.0) {
acc = acc + (align_sum / align_n - me.vel) * align_w;
}
if (coh_n > 0.0) {
acc = acc + (coh_sum / coh_n - me.pos) * coh_w;
}
var vel = me.vel + acc * dt;
let sp = length(vel);
if (sp > max_speed) {
vel = vel / sp * max_speed;
}
var pos = me.pos + vel * dt;
pos = vec2<f32>(wrap01(pos.x), wrap01(pos.y));
dst[i].pos = pos;
dst[i].vel = vel;
}
// ───────────────────────────── render ─────────────────────────────
struct DrawU {
// x = point_size (px), y = intensity, z = viewport_w, w = viewport_h
a: vec4<f32>,
// tint rgb + speed_to_warm (mix factor for fast = warmer)
tint: vec4<f32>,
};
@group(0) @binding(0) var<uniform> DRAW: DrawU;
@group(0) @binding(1) var<storage, read> points: array<Particle>;
struct VOut {
@builtin(position) clip: vec4<f32>,
@location(0) local: vec2<f32>,
@location(1) color: vec3<f32>,
};
@vertex
fn pt_vs(@builtin(vertex_index) vi: u32, @builtin(instance_index) inst: u32) -> VOut {
var corners = array<vec2<f32>, 6>(
vec2<f32>(-1.0, -1.0), vec2<f32>(1.0, -1.0), vec2<f32>(-1.0, 1.0),
vec2<f32>(-1.0, 1.0), vec2<f32>(1.0, -1.0), vec2<f32>( 1.0, 1.0),
);
let p = points[inst];
let local = corners[vi];
let vp = DRAW.a.zw;
// Particle pos is in [0,1]; map to NDC, then offset by the quad in pixels.
let ndc = vec2<f32>(p.pos.x * 2.0 - 1.0, 1.0 - p.pos.y * 2.0);
let px_off = local * DRAW.a.x;
let ndc_off = vec2<f32>(px_off.x / vp.x * 2.0, -px_off.y / vp.y * 2.0);
var out: VOut;
out.clip = vec4<f32>(ndc + ndc_off, 0.0, 1.0);
out.local = local;
// Colour: cool base → warm with speed (visual energy).
let speed = length(p.vel);
let warm = clamp(speed * DRAW.tint.w, 0.0, 1.0);
let cool = DRAW.tint.rgb;
let hot = vec3<f32>(1.0, 0.6, 0.25);
out.color = mix(cool, hot, warm);
return out;
}
@fragment
fn pt_fs(in: VOut) -> @location(0) vec4<f32> {
let r = length(in.local);
// Soft radial falloff (gaussian-ish), zero past the quad edge.
let a = exp(-r * r * 3.5) * step(r, 1.0);
let c = in.color * a * DRAW.a.y;
// Premultiplied so an additive blend just sums the glow.
return vec4<f32>(c, a);
}