roxlap-render 0.12.0

Unified CPU/GPU renderer facade for the roxlap scene-graph engine — one SceneRenderer over roxlap-core opticast (softbuffer) and roxlap-gpu (wgpu), with automatic CPU fallback.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
//! roxlap-render — unified CPU/GPU renderer facade.
//!
//! One [`SceneRenderer`] hides the choice between the CPU opticast
//! path (`roxlap-core` / `roxlap-scene`, presented via `softbuffer`)
//! and the GPU compute-shader path (`roxlap-gpu`, presented via its
//! own wgpu surface). Construction picks the GPU backend when asked
//! and able, and **falls back to CPU automatically** when WGPU init
//! fails — so a host never has to branch on GPU availability or carry
//! the `Scene`→GPU upload/refresh/transform glue itself.
//!
//! Hosts stay thin: build a `Scene`, advance it from input, then call
//! [`SceneRenderer::render`] each frame. The facade owns the window
//! surface, the framebuffer/z-buffer (CPU) or the resident scene +
//! dirty-chunk tracking (GPU), and presentation.
//!
//! The per-frame flow is `render` → *(optional overlays)* → finish.
//! Between [`SceneRenderer::render`] and the finishing
//! [`SceneRenderer::present`] / [`SceneRenderer::paint_egui`] call, a
//! host may overlay depth-tested world-space lines with
//! [`SceneRenderer::draw_lines`] (editor gizmos, debug geometry — see
//! [`Line3`]); they land in the framebuffer, occluded by the rendered
//! scene, with egui still painting panels on top.
//!
//! This is the RF.0 skeleton: backend selection + fallback + a
//! clear-to-sky frame. RF.1/RF.2 fill in the real CPU/GPU scene
//! render; RF.3 adds sprites; RF.4 adds framebuffer capture.

#![forbid(unsafe_code)]

mod cpu;
/// WebGL2 framebuffer presenter for the CPU backend on wasm (the
/// browser has no `softbuffer`).
#[cfg(target_arch = "wasm32")]
mod cpu_blit;
#[cfg(feature = "hud")]
mod cpu_egui;
mod gpu;

#[cfg(not(target_arch = "wasm32"))]
use std::sync::Arc;

use roxlap_core::opticast::OpticastSettings;
use roxlap_core::sky::Sky;
use roxlap_core::sprite::SpriteLighting;
use roxlap_core::Camera;
use roxlap_scene::Scene;

pub use roxlap_formats::kfa::KfaSprite;
pub use roxlap_formats::kv6::Kv6;
pub use roxlap_formats::sprite::Sprite;
pub use roxlap_gpu::{GpuInitError, GpuRendererSettings, PowerPreference};
// Re-exported so hosts can name the [`SceneRenderer::new`] bounds
// without adding a direct `raw-window-handle` dependency of their own.
pub use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
// Re-exported so hosts feed [`SceneRenderer::paint_egui`] from the exact
// egui version the renderer was built against (`hud` feature).
#[cfg(feature = "hud")]
pub use egui;

use crate::cpu::CpuBackend;
use crate::gpu::GpuBackend;

/// Type-erased display handle stored by the CPU backend's softbuffer
/// surface. `raw-window-handle` implements `HasDisplayHandle` for
/// `Arc<H>` (`H: ?Sized`), and the bare trait object implements its
/// own object-safe trait — so `Arc<W>` coerces to `Arc<DynDisplay>`
/// for any provider `W`.
#[cfg(not(target_arch = "wasm32"))]
pub(crate) type DynDisplay = dyn HasDisplayHandle + Send + Sync + 'static;
/// Type-erased window handle counterpart to [`DynDisplay`].
#[cfg(not(target_arch = "wasm32"))]
pub(crate) type DynWindow = dyn HasWindowHandle + Send + Sync + 'static;

/// One placed sprite instance: which [`SpriteSet::models`] entry and
/// where in the world.
pub struct SpriteInstanceDesc {
    pub model: usize,
    pub pos: [f32; 3],
}

/// Stable handle to a registered sprite model, returned (one per
/// [`SpriteSet::models`] entry, in order) by
/// [`SceneRenderer::set_sprites`]. Pass it to
/// [`refresh_sprite_model`](SceneRenderer::refresh_sprite_model) to
/// re-register that model's geometry after a content edit — so callers
/// never track the positional `usize` index themselves. Opaque on
/// purpose: there is no arithmetic to do on it.
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct SpriteModelId(pub(crate) usize);

/// Backend-agnostic sprite description. The facade builds the CPU
/// per-instance draw list and the GPU instanced registry from the
/// same data, so both backends show identical sprites. The host owns
/// content (which models, where, recolouring) — building a recoloured
/// variant is just a second [`Sprite`] model with edited `kv6.voxels`.
pub struct SpriteSet {
    /// Distinct voxel models (KV6 + base orientation). Instances index
    /// into this; their position overrides the model's.
    pub models: Vec<Sprite>,
    pub instances: Vec<SpriteInstanceDesc>,
    /// Model the [`SceneRenderer::carve_active_sprite`] hotkey edits
    /// (GPU only, mirroring the demo's `G`-carve). `None` disables it.
    pub carve_model: Option<usize>,
}

/// Per-frame inputs both backends consume. The host builds the
/// [`OpticastSettings`] (it owns scan distance etc.); the facade does
/// everything else (pool config, sky fill, render, present).
pub struct FrameParams<'a> {
    /// CPU opticast settings (scan distance, mip ladder, framebuffer
    /// geometry). Ignored by the GPU backend.
    pub settings: &'a OpticastSettings,
    /// Packed engine sky colour: the CPU sky-miss fill + skycast, and
    /// the clear colour if no scene renders.
    pub sky_color: u32,
    /// Optional sky panorama for the CPU rasterizer's sky sampling.
    pub sky: Option<&'a Sky>,
    /// CPU fog: packed colour + max scan distance (voxels). `0` scan
    /// distance disables CPU fog.
    pub fog_color: u32,
    pub fog_max_scan_dist: i32,
    /// CPU: treat z=255 as air (avoids the S1.X bedrock path for
    /// out-of-bounds cameras).
    pub treat_z_max_as_air: bool,
    /// GPU scene-grid LOD scan distance (world units); see GPU.11.1.
    /// Ignored by the CPU backend.
    pub gpu_mip_scan_dist: f32,
    /// GPU outer-DDA step budget (chunks). Ignored by the CPU backend.
    pub gpu_max_outer_steps: u32,
    /// GPU vertical field of view (radians). Ignored by the CPU
    /// backend (it derives projection from [`OpticastSettings`]).
    pub gpu_fov_y_rad: f32,
    /// CPU sprite shading (built by the host from its engine). Required
    /// for the CPU backend to draw sprites; ignored by the GPU backend
    /// (its sprite pass shades from the uploaded model colours). `None`
    /// skips CPU sprite drawing.
    pub sprite_lighting: Option<&'a SpriteLighting<'a>>,
    /// Per-face directional shading for the voxel grids — voxlap's
    /// `setsideshades(top, bot, left, right, up, down)`, the grid-scan
    /// analogue of [`sprite_lighting`](Self::sprite_lighting). Each
    /// entry darkens the faces pointing that way; the host typically
    /// passes its engine's `side_shades()`. The default `[0; 6]` keeps
    /// `sideshademode` off (no per-side shading), so existing hosts and
    /// the oracle goldens are unaffected. Applied each frame by **both**
    /// backends: the CPU rasteriser via `gcsub`, and the GPU scene-DDA
    /// pass by darkening a hit voxel's brightness by the hit face's
    /// shade (the face taken from the DDA's last-stepped axis).
    pub side_shades: [i8; 6],
}

/// Result of [`SceneRenderer::pick`] — a resolved screen→world voxel
/// hit. `world` is the surface point (`cam.pos + t · normalize(ray)`);
/// `grid` + `voxel` are the owning grid and its **grid-local** voxel
/// (transform-correct for rotated / translated grids).
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct PickHit {
    pub world: [f32; 3],
    pub grid: roxlap_scene::GridId,
    pub voxel: glam::IVec3,
}

/// A world-space view ray: the canonical unproject output of
/// [`SceneRenderer::view_ray`]. `dir` is unit-length. Feed it straight
/// to [`roxlap_scene::Scene::raycast`] for depth-free, backend-agnostic
/// voxel picking (`scene.raycast(ray.origin, ray.dir, max_dist)`), or
/// intersect it with a plane for tile selection.
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct Ray {
    pub origin: glam::DVec3,
    pub dir: glam::DVec3,
}

/// A world-space line segment to draw over a rendered frame via
/// [`SceneRenderer::draw_lines`] — editor gizmos (bounding boxes, floor
/// grids, axes, hover wireframes), debug paths, etc.
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct Line3 {
    /// World-space endpoints (voxel units), in the same frame the
    /// rendered scene + `camera` use.
    pub a: [f64; 3],
    pub b: [f64; 3],
    /// `0xAARRGGBB` — the high byte is an alpha blend factor (`0xFF`
    /// opaque, `0x00` invisible), the low 24 bits the RGB colour.
    pub color: u32,
    /// Screen-space thickness in pixels (`<= 1.0` draws a 1px line).
    pub width_px: f32,
    /// `true`: the segment is occluded by nearer rendered geometry
    /// (depth-tested against the frame's z-buffer). `false`: always on
    /// top (e.g. a hover highlight that should show through the model).
    pub depth_test: bool,
}

/// A handle to an uploaded image-sprite texture, returned by
/// [`SceneRenderer::upload_image`]. Positional (like [`SpriteModelId`]):
/// it indexes the backend's texture store. Pass it in an [`ImageSprite`]
/// for [`SceneRenderer::draw_images`], or to
/// [`drop_image`](SceneRenderer::drop_image) to release it. Opaque on
/// purpose — there's no arithmetic to do on it.
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct ImageId(pub(crate) usize);

/// How an [`ImageSprite`]'s quad is oriented in the world.
#[derive(Clone, Copy, PartialEq, Debug)]
pub enum ImageFacing {
    /// Fixed in world space: the quad lies in the plane spanned by `u`
    /// (the image's +column / width direction) and `v` (its +row /
    /// height direction). Both are world-space directions; their length
    /// is ignored (the quad is sized by [`ImageSprite::size`]), so pass
    /// the plane's axes directly. Row 0 of the image is the `origin`
    /// edge and rows grow along `v`.
    World { u: [f32; 3], v: [f32; 3] },
    /// Always faces the camera (billboard); `up` is the world direction
    /// the image's top edge points toward (e.g. world `-Z` for the
    /// scene-demo's z-down world, or any "up" the host prefers).
    Billboard { up: [f32; 3] },
}

/// One placed 2D image sprite for the current frame: a flat textured
/// quad in world space, composited over the rendered scene with the
/// frame's depth buffer (so the voxel model can occlude it). Built per
/// frame and passed to [`SceneRenderer::draw_images`], mirroring
/// [`Line3`] / [`SceneRenderer::draw_lines`]. The texture is uploaded
/// once via [`SceneRenderer::upload_image`] and referenced by [`image`].
///
/// [`image`]: ImageSprite::image
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct ImageSprite {
    /// The uploaded texture to draw (from [`SceneRenderer::upload_image`]).
    pub image: ImageId,
    /// World position of the quad's **top-left** corner — the image's
    /// `(column 0, row 0)` texel. The quad extends `size[0]` along the
    /// facing's `u` and `size[1]` along its `v`.
    pub origin: [f32; 3],
    /// World orientation of the quad — fixed in world or camera-facing.
    pub facing: ImageFacing,
    /// World size of the quad along `u` and `v`. For pixel-art traced at
    /// 1 texel = 1 voxel, pass `[width as f32, height as f32]`.
    pub size: [f32; 2],
    /// Multiplied into every sampled texel (tint + opacity), `0xAARRGGBB`.
    /// `0xFFFFFFFF` draws the texture unchanged; the high byte scales
    /// the texel alpha (e.g. `0x80FFFFFF` = 50 % opacity).
    pub tint: u32,
    /// `true`: occluded by nearer rendered geometry (depth-tested against
    /// the frame's depth buffer, with a bias so a quad resting on a
    /// coincident voxel face doesn't z-fight). `false`: always on top.
    pub depth_test: bool,
    /// `true`: draw regardless of which way the quad faces (no backface
    /// cull) — what reference images usually want. `false`: cull when the
    /// quad faces away from the camera. Ignored for
    /// [`ImageFacing::Billboard`] (it always faces the camera).
    pub double_sided: bool,
}

/// Backend-agnostic resolved quad: four world corners (`TL, TR, BL, BR`,
/// with UVs `(0,0) (1,0) (0,1) (1,1)`) + the texture to map. The facade
/// resolves [`ImageSprite::facing`] into corners and culls back-facing
/// quads once, so both backends draw from the same geometry.
#[derive(Clone, Copy, Debug)]
pub(crate) struct QuadDraw {
    pub corners: [[f32; 3]; 4],
    pub image: ImageId,
    pub tint: u32,
    pub depth_test: bool,
}

/// Which renderer a [`SceneRenderer`] resolved to at construction.
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum Backend {
    /// `roxlap-core` opticast, presented via `softbuffer`.
    Cpu,
    /// `roxlap-gpu` compute marcher, presented via wgpu.
    Gpu,
}

/// Construction-time options for [`SceneRenderer::new`].
pub struct RenderOptions {
    /// Try the GPU backend first. When `false`, or when GPU init
    /// fails, the renderer uses the CPU backend.
    pub want_gpu: bool,
    /// Settings forwarded to [`roxlap_gpu::GpuRenderer`] when the GPU
    /// backend is selected.
    pub gpu: GpuRendererSettings,
    /// Packed `0x00RRGGBB` (alpha ignored) the empty/clear frame fills
    /// with until a scene render lands. Also the CPU sky-miss colour
    /// default if a frame supplies none.
    pub clear_sky: u32,
    /// CPU [`ScratchPool`](roxlap_core::rasterizer::ScratchPool) `lastx`
    /// sizing — the largest combined grid `vsid` the CPU rasterizer
    /// will see. Pre-sizing keeps later frames allocation-free.
    pub cpu_max_grid_vsid: u32,
    /// CPU strip-parallel render thread count (capped to the rayon
    /// pool). One [`ScratchPool`](roxlap_core::rasterizer::ScratchPool)
    /// slot per thread.
    pub cpu_render_threads: usize,
}

impl Default for RenderOptions {
    fn default() -> Self {
        Self {
            want_gpu: false,
            gpu: GpuRendererSettings::default(),
            clear_sky: 0x0099_b3d9,
            // 32 chunks × CHUNK_SIZE_XY — the scene-demo's widest
            // combined ground grid.
            cpu_max_grid_vsid: 32 * roxlap_scene::CHUNK_SIZE_XY,
            cpu_render_threads: 4,
        }
    }
}

// --- image-sprite geometry helpers (shared by both backends) ---

fn v_sub(a: [f32; 3], b: [f32; 3]) -> [f32; 3] {
    [a[0] - b[0], a[1] - b[1], a[2] - b[2]]
}
fn v_add(a: [f32; 3], b: [f32; 3]) -> [f32; 3] {
    [a[0] + b[0], a[1] + b[1], a[2] + b[2]]
}
fn v_scale(a: [f32; 3], s: f32) -> [f32; 3] {
    [a[0] * s, a[1] * s, a[2] * s]
}
fn v_dot(a: [f32; 3], b: [f32; 3]) -> f32 {
    a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
}
fn v_cross(a: [f32; 3], b: [f32; 3]) -> [f32; 3] {
    [
        a[1] * b[2] - a[2] * b[1],
        a[2] * b[0] - a[0] * b[2],
        a[0] * b[1] - a[1] * b[0],
    ]
}
fn v_norm(a: [f32; 3]) -> [f32; 3] {
    let len = v_dot(a, a).sqrt();
    if len < 1e-12 {
        a
    } else {
        v_scale(a, 1.0 / len)
    }
}

/// Resolve an [`ImageSprite`] into its four world corners (`TL, TR, BL,
/// BR`), or `None` when a `double_sided == false` world quad faces away
/// from the camera (back-face cull) or its plane is degenerate. The
/// camera basis is used only for [`ImageFacing::Billboard`] and the cull
/// test.
fn resolve_quad(sprite: &ImageSprite, camera: &Camera) -> Option<QuadDraw> {
    let cam_pos = [
        camera.pos[0] as f32,
        camera.pos[1] as f32,
        camera.pos[2] as f32,
    ];
    let cam_fwd = v_norm([
        camera.forward[0] as f32,
        camera.forward[1] as f32,
        camera.forward[2] as f32,
    ]);

    let (u_hat, v_hat) = match sprite.facing {
        ImageFacing::World { u, v } => (v_norm(u), v_norm(v)),
        ImageFacing::Billboard { up } => {
            // Horizontal axis ⟂ both the view direction and `up`; fall
            // back to the camera right when `up` is parallel to the view.
            let mut u_hat = v_norm(v_cross(up, cam_fwd));
            if v_dot(u_hat, u_hat) < 1e-12 {
                u_hat = v_norm([
                    camera.right[0] as f32,
                    camera.right[1] as f32,
                    camera.right[2] as f32,
                ]);
            }
            // Vertical axis ⟂ both, pointing *down* (rows grow downward)
            // so the top edge ends up toward `up`.
            let mut v_hat = v_norm(v_cross(cam_fwd, u_hat));
            if v_dot(v_hat, up) > 0.0 {
                v_hat = v_scale(v_hat, -1.0);
            }
            (u_hat, v_hat)
        }
    };

    let du = v_scale(u_hat, sprite.size[0]);
    let dv = v_scale(v_hat, sprite.size[1]);
    let tl = sprite.origin;
    let tr = v_add(tl, du);
    let bl = v_add(tl, dv);
    let br = v_add(tr, dv);

    // Back-face cull for fixed world quads (billboards always face us).
    if !sprite.double_sided {
        if let ImageFacing::World { .. } = sprite.facing {
            let normal = v_cross(du, dv);
            // Front-facing when the quad normal points toward the camera.
            if v_dot(normal, v_sub(cam_pos, tl)) <= 0.0 {
                return None;
            }
        }
    }

    Some(QuadDraw {
        corners: [tl, tr, bl, br],
        image: sprite.image,
        tint: sprite.tint,
        depth_test: sprite.depth_test,
    })
}

/// Renderer-internal backend; never exposes wgpu or softbuffer types.
/// The GPU variant owns the whole wgpu device/queue/pipelines, so
/// it's boxed to keep the enum small.
enum BackendImpl {
    // Both variants boxed so the enum stays small regardless of which
    // backend's state is larger (clippy::large_enum_variant).
    Cpu(Box<CpuBackend>),
    Gpu(Box<GpuBackend>),
}

/// Unified renderer over the CPU and GPU paths. See the crate docs.
pub struct SceneRenderer {
    inner: BackendImpl,
}

impl SceneRenderer {
    /// Build a renderer for `window` — any [`raw-window-handle`]
    /// provider (winit, SDL, GLFW, …) in an `Arc`. `size` is the
    /// window's initial physical framebuffer size in pixels; thereafter
    /// the host reports changes via [`Self::resize`]. Passing the size
    /// explicitly keeps the facade decoupled from any one windowing
    /// library's size API.
    ///
    /// Selects the GPU backend when `opts.want_gpu` and WGPU
    /// initialises; otherwise the CPU backend. **Never fails** — a
    /// missing/incompatible GPU silently yields the CPU path (the
    /// message is logged to stderr).
    ///
    /// [`raw-window-handle`]: raw_window_handle
    #[cfg(not(target_arch = "wasm32"))]
    #[must_use]
    pub fn new<W>(window: Arc<W>, size: (u32, u32), opts: &RenderOptions) -> Self
    where
        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
    {
        if opts.want_gpu {
            match GpuBackend::new(window.clone(), size, opts) {
                Ok(g) => {
                    return Self {
                        inner: BackendImpl::Gpu(Box::new(g)),
                    };
                }
                Err(e) => {
                    eprintln!(
                        "roxlap-render: GPU init failed ({e}); falling back to the CPU renderer",
                    );
                }
            }
        }
        Self {
            inner: BackendImpl::Cpu(Box::new(CpuBackend::new(window, size, opts))),
        }
    }

    /// wasm/WebGPU build-time entry: build a renderer over an HTML
    /// `canvas`. `size` is the canvas's initial framebuffer size in
    /// pixels; the host reports later changes via [`Self::resize`].
    ///
    /// Async because the browser drives wgpu's adapter/device requests
    /// through its event loop — `await` it inside a
    /// `wasm_bindgen_futures::spawn_local` task. Selects the GPU
    /// (WebGPU) backend when `opts.want_gpu` and WebGPU is available;
    /// otherwise (no WebGPU, or init failed) it falls back to the CPU
    /// opticast path presented through a WebGL2 blit on the same canvas.
    /// **Never fails** — the message is logged to the browser console.
    #[cfg(target_arch = "wasm32")]
    pub async fn new_from_canvas_async(
        canvas: web_sys::HtmlCanvasElement,
        size: (u32, u32),
        opts: &RenderOptions,
    ) -> Self {
        if opts.want_gpu {
            // `SurfaceTarget::Canvas` moves the canvas into wgpu, so the
            // GPU attempt gets a clone — the CPU fallback keeps the
            // original if WebGPU init fails.
            match GpuBackend::new_async(canvas.clone(), size, opts).await {
                Ok(g) => {
                    return Self {
                        inner: BackendImpl::Gpu(Box::new(g)),
                    };
                }
                Err(e) => {
                    web_sys::console::warn_1(
                        &format!("roxlap-render: WebGPU init failed ({e}); using the CPU renderer")
                            .into(),
                    );
                }
            }
        }
        Self {
            inner: BackendImpl::Cpu(Box::new(CpuBackend::new_from_canvas(canvas, size, opts))),
        }
    }

    /// Which backend was selected.
    #[must_use]
    pub fn backend(&self) -> Backend {
        match self.inner {
            BackendImpl::Cpu(_) => Backend::Cpu,
            BackendImpl::Gpu(_) => Backend::Gpu,
        }
    }

    /// The GPU adapter description when on the GPU backend, else
    /// `None`.
    #[must_use]
    pub fn adapter_info(&self) -> Option<&str> {
        match &self.inner {
            BackendImpl::Gpu(g) => Some(g.adapter_info()),
            BackendImpl::Cpu(_) => None,
        }
    }

    /// Upload an equirectangular sky panorama (RGBA8, `w×h`) for the
    /// GPU marcher's sky sampling. No-op on the CPU backend, which
    /// samples the [`Sky`] passed in each [`FrameParams`] instead.
    pub fn set_sky_panorama(&mut self, rgba: &[u8], w: u32, h: u32) {
        if let BackendImpl::Gpu(g) = &mut self.inner {
            g.set_sky_panorama(rgba, w, h);
        }
    }

    /// Follow a window resize. CPU resizes its framebuffer lazily, so
    /// this only matters to the GPU swapchain — but it's safe to call
    /// for both.
    pub fn resize(&mut self, width: u32, height: u32) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.resize(width, height),
            BackendImpl::Gpu(g) => g.resize(width, height),
        }
    }

    /// Composite `scene` from `camera` with `frame` params into the
    /// backend's frame buffer — **without presenting**. The CPU backend
    /// fills sky + runs the opticast compositor into an owned buffer;
    /// the GPU backend uploads/refreshes the scene, runs the compute
    /// marcher + sprite pass, and acquires (but does not present) the
    /// swapchain frame.
    ///
    /// Finish the frame with exactly one of [`present`](Self::present)
    /// (no overlay) or [`paint_egui`](Self::paint_egui) (UI overlay).
    /// Calling `render` again without finishing drops the pending frame.
    pub fn render(&mut self, scene: &mut Scene, camera: &Camera, frame: &FrameParams) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.render(scene, camera, frame),
            BackendImpl::Gpu(g) => g.render(scene, camera, frame),
        }
    }

    /// Draw world-space [`Line3`] segments over the frame
    /// [`render`](Self::render) composited, using that frame's camera +
    /// projection + depth buffer. Call **after** [`render`](Self::render)
    /// and **before** [`present`](Self::present) /
    /// [`paint_egui`](Self::paint_egui) — the lines land in the
    /// framebuffer, so a subsequent `paint_egui` still draws its panels
    /// on top.
    ///
    /// `camera` must be the one the last frame rendered with (the
    /// projection is taken from that frame). Depth-tested segments
    /// (`Line3::depth_test`) are occluded by nearer rendered geometry;
    /// always-on-top segments ignore depth. See [`Line3`] for colour /
    /// width / blend semantics.
    pub fn draw_lines(&mut self, camera: &Camera, lines: &[Line3]) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.draw_lines(camera, lines),
            BackendImpl::Gpu(g) => g.draw_lines(camera, lines),
        }
    }

    /// Upload (or replace) an RGBA8 image and return a stable [`ImageId`]
    /// to reference it in [`draw_images`](Self::draw_images). `rgba` is
    /// row-major, `width * height * 4` bytes, **straight** (un-premultiplied)
    /// alpha. The texture is retained until [`drop_image`](Self::drop_image),
    /// so the per-frame draw call stays cheap. Sampling is
    /// nearest-neighbour (pixel-art friendly — no blurring).
    ///
    /// Returns `ImageId(0)` for malformed input (wrong byte count or a
    /// zero dimension); such an id draws nothing.
    pub fn upload_image(&mut self, rgba: &[u8], width: u32, height: u32) -> ImageId {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.upload_image(rgba, width, height),
            BackendImpl::Gpu(g) => g.upload_image(rgba, width, height),
        }
    }

    /// Release a texture uploaded with [`upload_image`](Self::upload_image).
    /// The id must not be reused afterwards (a later `upload_image` may
    /// hand the slot back out under a fresh id).
    pub fn drop_image(&mut self, id: ImageId) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.drop_image(id),
            BackendImpl::Gpu(g) => g.drop_image(id),
        }
    }

    /// Draw 2D [`ImageSprite`]s over the frame [`render`](Self::render)
    /// composited — flat textured quads placed in world space, using that
    /// frame's camera + projection + depth buffer. Same contract as
    /// [`draw_lines`](Self::draw_lines): call **after** [`render`](Self::render)
    /// and **before** [`present`](Self::present) / [`paint_egui`](Self::paint_egui).
    ///
    /// UVs are perspective-correct (no affine warp on an obliquely-viewed
    /// quad). Depth-tested sprites are occluded by nearer rendered
    /// geometry (with a bias to avoid z-fighting on a coincident face);
    /// the texture's straight alpha + the [`ImageSprite::tint`] composite
    /// over the scene. `camera` must be the one the last frame rendered.
    pub fn draw_images(&mut self, camera: &Camera, images: &[ImageSprite]) {
        if images.is_empty() {
            return;
        }
        let quads: Vec<QuadDraw> = images
            .iter()
            .filter_map(|s| resolve_quad(s, camera))
            .collect();
        if quads.is_empty() {
            return;
        }
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.draw_images(camera, &quads),
            BackendImpl::Gpu(g) => g.draw_images(camera, &quads),
        }
    }

    /// Project a world point to window pixel coordinates `(x, y)` under
    /// the projection the **last frame** rendered with — the backend-correct
    /// `world → screen` inverse of [`view_ray`](Self::view_ray). `None`
    /// before the first frame or for a point at/behind the camera near
    /// plane.
    ///
    /// Both backends honour their own projection (CPU `setcamera`
    /// `hx/hy/hz`, GPU vertical-FOV pinhole), so hosts never reconstruct
    /// it themselves. The returned `(x, y)` may fall outside `[0, w) ×
    /// [0, h)` for points off-screen but in front of the camera.
    #[must_use]
    pub fn project_point(&self, camera: &Camera, world: [f32; 3]) -> Option<(f32, f32)> {
        match &self.inner {
            BackendImpl::Cpu(c) => c.project_point(camera, world),
            BackendImpl::Gpu(g) => g.project_point(camera, world),
        }
    }

    /// Present the frame [`render`](Self::render) composited, with no UI
    /// overlay. Pairs with `render`; use [`paint_egui`](Self::paint_egui)
    /// instead to overlay an egui UI before presenting.
    pub fn present(&mut self) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.present(),
            BackendImpl::Gpu(g) => g.present(),
        }
    }

    /// Overlay an egui UI on the frame [`render`](Self::render)
    /// composited, then present it (`hud` feature). The host runs egui
    /// itself (e.g. `egui` + `egui-winit`) and passes the tessellated
    /// `jobs` ([`egui::Context::tessellate`]) and the per-frame
    /// `textures` delta from [`egui::FullOutput`]; `pixels_per_point` is
    /// the UI scale (`ctx.pixels_per_point()`).
    ///
    /// The GPU backend paints via `egui-wgpu`; the CPU backend
    /// software-rasterises the tessellation into its framebuffer. Use
    /// this **instead of** [`present`](Self::present) — both finish the
    /// frame.
    #[cfg(feature = "hud")]
    pub fn paint_egui(
        &mut self,
        jobs: &[egui::ClippedPrimitive],
        textures: &egui::TexturesDelta,
        pixels_per_point: f32,
    ) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.paint_egui(jobs, textures, pixels_per_point),
            BackendImpl::Gpu(g) => g.paint_egui(jobs, textures, pixels_per_point),
        }
    }

    /// Register sprite models + instances. The CPU backend builds a
    /// per-instance draw list; the GPU backend builds an instanced
    /// model registry. Call once at setup (or again to replace).
    pub fn set_sprites(&mut self, set: &SpriteSet) -> Vec<SpriteModelId> {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.set_sprites(set),
            BackendImpl::Gpu(g) => g.set_sprites(set),
        }
        // Handles are positional by construction (model index = chain id
        // on both backends), so the facade hands them out directly —
        // callers keep the handle instead of re-deriving the index.
        (0..set.models.len()).map(SpriteModelId).collect()
    }

    /// Re-register one sprite model's geometry after you've edited its
    /// content (a carve or recolour of its `kv6`). `model` is the
    /// [`SpriteModelId`] handed back by [`set_sprites`](Self::set_sprites);
    /// `kv6` is the model's **new** geometry — the caller owns the source
    /// of truth (e.g. a dense carve grid the surface-only `kv6` can't
    /// represent) and supplies the refreshed mesh here.
    ///
    /// This is a **backend-agnostic content refresh**, not a GPU upload:
    /// the renderer brings its stored model up to date however its active
    /// backend needs to. The instance set is left untouched (an edit never
    /// moves or adds an instance), so on the GPU backend only that one
    /// model's voxel data is re-uploaded — through a slack-backed
    /// suballocator, one model's bytes rather than the whole registry —
    /// while the CPU backend swaps the cached `kv6` into each instance of
    /// the model. Use [`set_sprites`](Self::set_sprites) to add/remove
    /// models or change the instance set.
    pub fn refresh_sprite_model(&mut self, model: SpriteModelId, kv6: &Kv6) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.update_sprite_model(model.0, kv6),
            BackendImpl::Gpu(g) => g.update_sprite_model(model.0, kv6),
        }
    }

    /// Register animated KFA sprites (one or more bone hierarchies).
    /// The GPU backend uploads each limb's kv6 as an instanced model
    /// **once** (appended to the sprite registry) and seeds the limb
    /// instances at their current pose; the CPU backend caches the
    /// posed limbs for drawing. Call once at setup, after
    /// [`set_sprites`](Self::set_sprites), then drive motion per frame
    /// with [`update_kfa_poses`](Self::update_kfa_poses).
    ///
    /// Limbs are posed from the sprites' current
    /// [`kfaval`](roxlap_formats::kfa::KfaSprite::kfaval) (advance
    /// [`animsprite`](roxlap_formats::kfa::KfaSprite::animsprite) first
    /// if using a baked curve), so `kfas` is taken `&mut`.
    pub fn set_kfa_sprites(&mut self, kfas: &mut [KfaSprite]) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.set_kfa_sprites(kfas),
            BackendImpl::Gpu(g) => g.set_kfa_sprites(kfas),
        }
    }

    /// Re-pose the registered KFA sprites from their current
    /// `kfaval[]`. Call each frame after advancing the animation
    /// (`kfa.animsprite(dt_ms)` or poking `kfaval[]`). The GPU backend
    /// takes the cheap transform-only update (no model-volume
    /// re-upload); the CPU backend re-solves limb transforms for the
    /// next [`render`](Self::render). Must follow a
    /// [`set_kfa_sprites`](Self::set_kfa_sprites) with the same sprites.
    pub fn update_kfa_poses(&mut self, kfas: &mut [KfaSprite]) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.update_kfa_poses(kfas),
            BackendImpl::Gpu(g) => g.update_kfa_poses(kfas),
        }
    }

    /// Carve the next z-layer off the [`SpriteSet::carve_model`] and
    /// re-upload (the demo's `G` hotkey + GPU.12 copy-on-modify). GPU
    /// only; a no-op on the CPU backend. Returns the voxels removed.
    pub fn carve_active_sprite(&mut self) -> u32 {
        match &mut self.inner {
            BackendImpl::Cpu(_) => 0,
            BackendImpl::Gpu(g) => g.carve_active_sprite(),
        }
    }

    /// Request that the next [`render`](Self::render) capture its
    /// framebuffer for [`take_capture`](Self::take_capture). CPU only
    /// (the GPU swapchain isn't read back) — a no-op on GPU.
    pub fn request_capture(&mut self) {
        if let BackendImpl::Cpu(c) = &mut self.inner {
            c.request_capture();
        }
    }

    /// Take the most recently captured frame as packed `0x00RRGGBB`
    /// pixels + dimensions, or `None` if no capture is ready / GPU.
    pub fn take_capture(&mut self) -> Option<(Vec<u32>, u32, u32)> {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.take_capture(),
            BackendImpl::Gpu(_) => None,
        }
    }

    /// Screen→world picking input: the world-space hit distance `t` at
    /// window pixel `(x, y)` from the **last rendered frame**, or `None`
    /// for out-of-bounds pixels and sky / no-hit. The host reconstructs
    /// the world hit point as `cam.pos + t * normalize(ray_dir)`, where
    /// `ray_dir` is the same per-pixel ray the frame was rendered with
    /// (see the backend's projection).
    ///
    /// `t` is the distance to the nearest **scene-grid** surface
    /// (terrain + grids); sprites do not occlude it (the sprite pass
    /// reads depth read-only), so a cursor sprite under the pointer is
    /// transparent to the pick.
    ///
    /// Cost: the CPU backend reads its in-memory z-buffer (free); the
    /// GPU backend stages the depth buffer and blocks on a device poll
    /// (cheap at click time — do not call every frame). The GPU path
    /// only has depth when the last frame drew sprites (`write_depth`).
    #[must_use]
    pub fn pick_depth(&self, x: u32, y: u32) -> Option<f32> {
        match &self.inner {
            BackendImpl::Cpu(c) => c.pick_depth(x, y),
            BackendImpl::Gpu(g) => g.pick_depth(x, y),
        }
    }

    /// World-space view-ray direction (un-normalised) for window pixel
    /// `(x, y)`, under the projection the **last frame** rendered with.
    /// The backends differ (CPU `setcamera` vs GPU vertical-FOV
    /// pinhole), so this hides which one is active. `None` before the
    /// first frame. Intersect it with a plane for tile picking, or feed
    /// it to [`Self::pick`] for a voxel.
    #[must_use]
    pub fn pixel_ray(&self, camera: &Camera, x: f64, y: f64) -> Option<[f64; 3]> {
        match &self.inner {
            BackendImpl::Cpu(c) => c.pixel_ray(camera, x, y),
            BackendImpl::Gpu(g) => g.pixel_ray(camera, x, y),
        }
    }

    /// Canonical screen→world unproject: the full view [`Ray`]
    /// (`camera.pos` origin + unit direction) for window pixel
    /// `(x, y)`, under whichever projection the last frame used. The
    /// one entry point both backends honour — hosts never reconstruct
    /// the projection. `None` before the first frame or for a
    /// degenerate ray.
    ///
    /// Compose with [`roxlap_scene::Scene::raycast`] for depth-free
    /// picking that's identical on CPU and GPU:
    /// `renderer.view_ray(cam, x, y).and_then(|r| scene.raycast(r.origin, r.dir, max))`.
    #[must_use]
    pub fn view_ray(&self, camera: &Camera, x: f64, y: f64) -> Option<Ray> {
        let d = self.pixel_ray(camera, x, y)?;
        let len = (d[0] * d[0] + d[1] * d[1] + d[2] * d[2]).sqrt();
        if len < 1e-12 {
            return None;
        }
        Some(Ray {
            origin: glam::DVec3::from_array([camera.pos[0], camera.pos[1], camera.pos[2]]),
            dir: glam::DVec3::new(d[0] / len, d[1] / len, d[2] / len),
        })
    }

    /// One-call screen→world voxel pick: unproject pixel `(x, y)` with
    /// the active backend's projection, read the last frame's depth
    /// there, reconstruct the world hit, and resolve it to the owning
    /// grid + grid-local voxel via [`Scene::resolve_voxel`]. `None` on
    /// sky / no-hit, or when no grid claims the surface.
    ///
    /// `scene` and `camera` must be the ones the last frame rendered;
    /// the projection (size + FOV / `hx,hy,hz`) is taken from that
    /// frame. Cheap on CPU (in-memory z-buffer); on GPU it stages the
    /// depth buffer (a click-time device poll — not per frame).
    #[must_use]
    pub fn pick(&self, scene: &Scene, camera: &Camera, x: u32, y: u32) -> Option<PickHit> {
        let dir = self.pixel_ray(camera, f64::from(x), f64::from(y))?;
        let t = f64::from(self.pick_depth(x, y)?);
        let len = (dir[0] * dir[0] + dir[1] * dir[1] + dir[2] * dir[2]).sqrt();
        if len < 1e-9 {
            return None;
        }
        let s = t / len; // world = cam.pos + t · (dir / |dir|)
        let world = glam::DVec3::new(
            camera.pos[0] + dir[0] * s,
            camera.pos[1] + dir[1] * s,
            camera.pos[2] + dir[2] * s,
        );
        let (grid, voxel) = scene.resolve_voxel(world, glam::DVec3::from_array(dir))?;
        #[allow(clippy::cast_possible_truncation)]
        let world_f32 = [world.x as f32, world.y as f32, world.z as f32];
        Some(PickHit {
            world: world_f32,
            grid,
            voxel,
        })
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn options_default_is_cpu_intent() {
        let o = RenderOptions::default();
        assert!(!o.want_gpu);
        assert_eq!(o.clear_sky & 0xFF00_0000, 0, "clear_sky is 0x00RRGGBB");
    }

    /// A camera at the origin looking down +Y (voxlap z-down world): right
    /// = +X, down = +Z, forward = +Y. Handedness `right × down == forward`.
    fn cam_looking_y() -> Camera {
        Camera {
            pos: [0.0, 0.0, 0.0],
            right: [1.0, 0.0, 0.0],
            down: [0.0, 0.0, 1.0],
            forward: [0.0, 1.0, 0.0],
        }
    }

    #[test]
    fn world_quad_corner_layout() {
        // Top-left at (-5, 10, -5); u = +X (width), v = +Z (down). A
        // 10×10 quad facing the camera (its +Y normal points back at us).
        let sprite = ImageSprite {
            image: ImageId(0),
            origin: [-5.0, 10.0, -5.0],
            facing: ImageFacing::World {
                u: [1.0, 0.0, 0.0],
                v: [0.0, 0.0, 1.0],
            },
            size: [10.0, 10.0],
            tint: 0xFFFF_FFFF,
            depth_test: true,
            double_sided: true,
        };
        let q = resolve_quad(&sprite, &cam_looking_y()).expect("front-facing");
        assert_eq!(q.corners[0], [-5.0, 10.0, -5.0], "TL = origin");
        assert_eq!(q.corners[1], [5.0, 10.0, -5.0], "TR = origin + u·size");
        assert_eq!(q.corners[2], [-5.0, 10.0, 5.0], "BL = origin + v·size");
        assert_eq!(q.corners[3], [5.0, 10.0, 5.0], "BR = origin + u + v");
    }

    #[test]
    fn world_quad_backface_culls_when_single_sided() {
        // Same plane but spanned so its normal (u × v) points *away* from
        // the camera: swap u/v so the winding flips.
        let sprite = ImageSprite {
            image: ImageId(0),
            origin: [-5.0, 10.0, -5.0],
            facing: ImageFacing::World {
                u: [0.0, 0.0, 1.0], // v-ish
                v: [1.0, 0.0, 0.0], // u-ish → normal flips to -Y... toward camera?
            },
            size: [10.0, 10.0],
            tint: 0xFFFF_FFFF,
            depth_test: true,
            double_sided: false,
        };
        // With double_sided=false one of the two windings must cull; the
        // opposite winding must draw. Exactly one of the two resolves.
        let a = resolve_quad(&sprite, &cam_looking_y()).is_some();
        let mut flipped = sprite;
        flipped.facing = ImageFacing::World {
            u: [1.0, 0.0, 0.0],
            v: [0.0, 0.0, 1.0],
        };
        let b = resolve_quad(&flipped, &cam_looking_y()).is_some();
        assert!(a ^ b, "exactly one winding is front-facing");
    }

    #[test]
    fn double_sided_never_culls() {
        let mut sprite = ImageSprite {
            image: ImageId(0),
            origin: [-5.0, 10.0, -5.0],
            facing: ImageFacing::World {
                u: [0.0, 0.0, 1.0],
                v: [1.0, 0.0, 0.0],
            },
            size: [10.0, 10.0],
            tint: 0xFFFF_FFFF,
            depth_test: true,
            double_sided: true,
        };
        assert!(resolve_quad(&sprite, &cam_looking_y()).is_some());
        sprite.facing = ImageFacing::World {
            u: [1.0, 0.0, 0.0],
            v: [0.0, 0.0, 1.0],
        };
        assert!(resolve_quad(&sprite, &cam_looking_y()).is_some());
    }

    #[test]
    fn billboard_axes_orthogonal_and_top_toward_up() {
        // World up = -Z (z-down world). The billboard's v (top→bottom)
        // must point away from `up`, and u/v must be ⟂ the view direction.
        let up = [0.0, 0.0, -1.0];
        let sprite = ImageSprite {
            image: ImageId(0),
            origin: [0.0, 50.0, 0.0],
            facing: ImageFacing::Billboard { up },
            size: [4.0, 4.0],
            tint: 0xFFFF_FFFF,
            depth_test: false,
            double_sided: false, // billboards must NEVER cull
        };
        let q = resolve_quad(&sprite, &cam_looking_y()).expect("billboard always faces camera");
        let u = v_sub(q.corners[1], q.corners[0]); // TR - TL = u·size
        let v = v_sub(q.corners[2], q.corners[0]); // BL - TL = v·size
        let fwd = [0.0, 1.0, 0.0];
        assert!(v_dot(u, fwd).abs() < 1e-5, "u ⟂ view");
        assert!(v_dot(v, fwd).abs() < 1e-5, "v ⟂ view");
        assert!(v_dot(u, v).abs() < 1e-5, "u ⟂ v");
        assert!(
            v_dot(v, up) < 0.0,
            "rows grow away from `up` (top edge toward up)"
        );
    }
}