roxlap-render 0.13.0

Unified CPU/GPU renderer facade for the roxlap scene-graph engine — one SceneRenderer over roxlap-core opticast (softbuffer) and roxlap-gpu (wgpu), with automatic CPU fallback.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
//! roxlap-render — unified CPU/GPU renderer facade.
//!
//! One [`SceneRenderer`] hides the choice between the CPU opticast
//! path (`roxlap-core` / `roxlap-scene`, presented via `softbuffer`)
//! and the GPU compute-shader path (`roxlap-gpu`, presented via its
//! own wgpu surface). Construction picks the GPU backend when asked
//! and able, and **falls back to CPU automatically** when WGPU init
//! fails — so a host never has to branch on GPU availability or carry
//! the `Scene`→GPU upload/refresh/transform glue itself.
//!
//! Hosts stay thin: build a `Scene`, advance it from input, then call
//! [`SceneRenderer::render`] each frame. The facade owns the window
//! surface, the framebuffer/z-buffer (CPU) or the resident scene +
//! dirty-chunk tracking (GPU), and presentation.
//!
//! The per-frame flow is `render` → *(optional overlays)* → finish.
//! Between [`SceneRenderer::render`] and the finishing
//! [`SceneRenderer::present`] / [`SceneRenderer::paint_egui`] call, a
//! host may overlay depth-tested world-space lines with
//! [`SceneRenderer::draw_lines`] (editor gizmos, debug geometry — see
//! [`Line3`]); they land in the framebuffer, occluded by the rendered
//! scene, with egui still painting panels on top.
//!
//! This is the RF.0 skeleton: backend selection + fallback + a
//! clear-to-sky frame. RF.1/RF.2 fill in the real CPU/GPU scene
//! render; RF.3 adds sprites; RF.4 adds framebuffer capture.

#![forbid(unsafe_code)]

mod cpu;
/// WebGL2 framebuffer presenter for the CPU backend on wasm (the
/// browser has no `softbuffer`).
#[cfg(target_arch = "wasm32")]
mod cpu_blit;
#[cfg(feature = "hud")]
mod cpu_egui;
mod gpu;

#[cfg(not(target_arch = "wasm32"))]
use std::sync::Arc;

use roxlap_core::opticast::OpticastSettings;
use roxlap_core::sky::Sky;
use roxlap_core::sprite::SpriteLighting;
use roxlap_core::Camera;
use roxlap_scene::Scene;

pub use roxlap_formats::kfa::KfaSprite;
pub use roxlap_formats::kv6::Kv6;
pub use roxlap_formats::sprite::Sprite;
pub use roxlap_gpu::{GpuInitError, GpuRendererSettings, PowerPreference};
// Re-exported so hosts can name the [`SceneRenderer::new`] bounds
// without adding a direct `raw-window-handle` dependency of their own.
pub use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
// Re-exported so hosts feed [`SceneRenderer::paint_egui`] from the exact
// egui version the renderer was built against (`hud` feature).
#[cfg(feature = "hud")]
pub use egui;

use crate::cpu::CpuBackend;
use crate::gpu::GpuBackend;

/// Type-erased display handle stored by the CPU backend's softbuffer
/// surface. `raw-window-handle` implements `HasDisplayHandle` for
/// `Arc<H>` (`H: ?Sized`), and the bare trait object implements its
/// own object-safe trait — so `Arc<W>` coerces to `Arc<DynDisplay>`
/// for any provider `W`.
#[cfg(not(target_arch = "wasm32"))]
pub(crate) type DynDisplay = dyn HasDisplayHandle + Send + Sync + 'static;
/// Type-erased window handle counterpart to [`DynDisplay`].
#[cfg(not(target_arch = "wasm32"))]
pub(crate) type DynWindow = dyn HasWindowHandle + Send + Sync + 'static;

/// One placed sprite instance: which [`SpriteSet::models`] entry and
/// where in the world.
pub struct SpriteInstanceDesc {
    pub model: usize,
    pub pos: [f32; 3],
}

/// Stable handle to a registered sprite model, returned (one per
/// [`SpriteSet::models`] entry, in order) by
/// [`SceneRenderer::set_sprites`]. Pass it to
/// [`refresh_sprite_model`](SceneRenderer::refresh_sprite_model) to
/// re-register that model's geometry after a content edit — so callers
/// never track the positional `usize` index themselves. Opaque on
/// purpose: there is no arithmetic to do on it.
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct SpriteModelId(pub(crate) usize);

/// Stable handle to a **dynamically added** sprite instance — the result
/// of [`SceneRenderer::add_sprite_instance`], passed to
/// [`remove_sprite_instance`](SceneRenderer::remove_sprite_instance).
///
/// Backends remove instances by swap (O(1)), which moves another instance
/// into the freed slot; this handle survives that because the facade keeps
/// the id↔slot mapping up to date. The generation guards against a stale
/// handle aliasing a recycled slot.
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct SpriteInstanceId {
    slot: u32,
    gen: u32,
}

/// Facade-side slotmap that turns the backends' swap-remove indexing into
/// stable [`SpriteInstanceId`] handles. Both backends keep their dynamic
/// instances as a tail sublist indexed `0..n`; `order[dyn_index]` is the
/// owning slot, and a removal fixes up the one slot whose instance was
/// swapped into the hole.
#[derive(Default)]
struct DynInstanceMap {
    /// Per slot: `(generation, Some(dyn_index) while live)`.
    slots: Vec<(u32, Option<u32>)>,
    /// Per live `dyn_index`: the owning slot. Parallel to the backends'
    /// dynamic sublist (so `order.len()` == the dynamic instance count).
    order: Vec<u32>,
    free: Vec<u32>,
}

impl DynInstanceMap {
    /// Register a freshly appended instance (always at `dyn_index ==
    /// order.len()`); returns its stable handle.
    fn alloc(&mut self, dyn_index: u32) -> SpriteInstanceId {
        debug_assert_eq!(self.order.len() as u32, dyn_index);
        let slot = self.free.pop().unwrap_or_else(|| {
            self.slots.push((0, None));
            (self.slots.len() - 1) as u32
        });
        let gen = self.slots[slot as usize].0;
        self.slots[slot as usize].1 = Some(dyn_index);
        self.order.push(slot);
        SpriteInstanceId { slot, gen }
    }

    /// Resolve a handle to its current backend `dyn_index`, or `None` if
    /// it's stale / already removed.
    fn dyn_index(&self, id: SpriteInstanceId) -> Option<u32> {
        let (gen, idx) = *self.slots.get(id.slot as usize)?;
        (gen == id.gen).then_some(idx).flatten()
    }

    /// Apply a removal: the backend swap-removed `removed` and reported
    /// `moved` (the old-last `dyn_index` that slid into `removed`, or
    /// `None` if `removed` was itself the last).
    fn remove(&mut self, id: SpriteInstanceId, removed: u32, moved: Option<u32>) {
        self.slots[id.slot as usize].1 = None;
        self.slots[id.slot as usize].0 += 1; // bump generation
        self.free.push(id.slot);
        if let Some(last) = moved {
            let moved_slot = self.order[last as usize];
            self.slots[moved_slot as usize].1 = Some(removed);
            self.order[removed as usize] = moved_slot;
        }
        self.order.pop();
    }
}

/// Backend-agnostic sprite description. The facade builds the CPU
/// per-instance draw list and the GPU instanced registry from the
/// same data, so both backends show identical sprites. The host owns
/// content (which models, where, recolouring) — building a recoloured
/// variant is just a second [`Sprite`] model with edited `kv6.voxels`.
pub struct SpriteSet {
    /// Distinct voxel models (KV6 + base orientation). Instances index
    /// into this; their position overrides the model's.
    pub models: Vec<Sprite>,
    pub instances: Vec<SpriteInstanceDesc>,
    /// Model the [`SceneRenderer::carve_active_sprite`] hotkey edits
    /// (GPU only, mirroring the demo's `G`-carve). `None` disables it.
    pub carve_model: Option<usize>,
}

/// Per-frame inputs both backends consume. The host builds the
/// [`OpticastSettings`] (it owns scan distance etc.); the facade does
/// everything else (pool config, sky fill, render, present).
pub struct FrameParams<'a> {
    /// CPU opticast settings (scan distance, mip ladder, framebuffer
    /// geometry). Ignored by the GPU backend.
    pub settings: &'a OpticastSettings,
    /// Packed engine sky colour: the CPU sky-miss fill + skycast, and
    /// the clear colour if no scene renders.
    pub sky_color: u32,
    /// Optional sky panorama for the CPU rasterizer's sky sampling.
    pub sky: Option<&'a Sky>,
    /// CPU fog: packed colour + max scan distance (voxels). `0` scan
    /// distance disables CPU fog.
    pub fog_color: u32,
    pub fog_max_scan_dist: i32,
    /// CPU: treat z=255 as air (avoids the S1.X bedrock path for
    /// out-of-bounds cameras).
    pub treat_z_max_as_air: bool,
    /// GPU scene-grid LOD scan distance (world units); see GPU.11.1.
    /// Ignored by the CPU backend.
    pub gpu_mip_scan_dist: f32,
    /// GPU outer-DDA step budget (chunks). Ignored by the CPU backend.
    pub gpu_max_outer_steps: u32,
    /// GPU vertical field of view (radians). Ignored by the CPU
    /// backend (it derives projection from [`OpticastSettings`]).
    pub gpu_fov_y_rad: f32,
    /// CPU sprite shading (built by the host from its engine). Required
    /// for the CPU backend to draw sprites; ignored by the GPU backend
    /// (its sprite pass shades from the uploaded model colours). `None`
    /// skips CPU sprite drawing.
    pub sprite_lighting: Option<&'a SpriteLighting<'a>>,
    /// Per-face directional shading for the voxel grids — voxlap's
    /// `setsideshades(top, bot, left, right, up, down)`, the grid-scan
    /// analogue of [`sprite_lighting`](Self::sprite_lighting). Each
    /// entry darkens the faces pointing that way; the host typically
    /// passes its engine's `side_shades()`. The default `[0; 6]` keeps
    /// `sideshademode` off (no per-side shading), so existing hosts and
    /// the oracle goldens are unaffected. Applied each frame by **both**
    /// backends: the CPU rasteriser via `gcsub`, and the GPU scene-DDA
    /// pass by darkening a hit voxel's brightness by the hit face's
    /// shade (the face taken from the DDA's last-stepped axis).
    pub side_shades: [i8; 6],
}

/// Result of [`SceneRenderer::pick`] — a resolved screen→world voxel
/// hit. `world` is the surface point (`cam.pos + t · normalize(ray)`);
/// `grid` + `voxel` are the owning grid and its **grid-local** voxel
/// (transform-correct for rotated / translated grids).
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct PickHit {
    pub world: [f32; 3],
    pub grid: roxlap_scene::GridId,
    pub voxel: glam::IVec3,
}

/// A world-space view ray: the canonical unproject output of
/// [`SceneRenderer::view_ray`]. `dir` is unit-length. Feed it straight
/// to [`roxlap_scene::Scene::raycast`] for depth-free, backend-agnostic
/// voxel picking (`scene.raycast(ray.origin, ray.dir, max_dist)`), or
/// intersect it with a plane for tile selection.
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct Ray {
    pub origin: glam::DVec3,
    pub dir: glam::DVec3,
}

/// A world-space line segment to draw over a rendered frame via
/// [`SceneRenderer::draw_lines`] — editor gizmos (bounding boxes, floor
/// grids, axes, hover wireframes), debug paths, etc.
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct Line3 {
    /// World-space endpoints (voxel units), in the same frame the
    /// rendered scene + `camera` use.
    pub a: [f64; 3],
    pub b: [f64; 3],
    /// `0xAARRGGBB` — the high byte is an alpha blend factor (`0xFF`
    /// opaque, `0x00` invisible), the low 24 bits the RGB colour.
    pub color: u32,
    /// Screen-space thickness in pixels (`<= 1.0` draws a 1px line).
    pub width_px: f32,
    /// `true`: the segment is occluded by nearer rendered geometry
    /// (depth-tested against the frame's z-buffer). `false`: always on
    /// top (e.g. a hover highlight that should show through the model).
    pub depth_test: bool,
}

/// A handle to an uploaded image-sprite texture, returned by
/// [`SceneRenderer::upload_image`]. Positional (like [`SpriteModelId`]):
/// it indexes the backend's texture store. Pass it in an [`ImageSprite`]
/// for [`SceneRenderer::draw_images`], or to
/// [`drop_image`](SceneRenderer::drop_image) to release it. Opaque on
/// purpose — there's no arithmetic to do on it.
#[derive(Clone, Copy, PartialEq, Eq, Hash, Debug)]
pub struct ImageId(pub(crate) usize);

/// How an [`ImageSprite`]'s quad is oriented in the world.
#[derive(Clone, Copy, PartialEq, Debug)]
pub enum ImageFacing {
    /// Fixed in world space: the quad lies in the plane spanned by `u`
    /// (the image's +column / width direction) and `v` (its +row /
    /// height direction). Both are world-space directions; their length
    /// is ignored (the quad is sized by [`ImageSprite::size`]), so pass
    /// the plane's axes directly. Row 0 of the image is the `origin`
    /// edge and rows grow along `v`.
    World { u: [f32; 3], v: [f32; 3] },
    /// Always faces the camera (billboard); `up` is the world direction
    /// the image's top edge points toward (e.g. world `-Z` for the
    /// scene-demo's z-down world, or any "up" the host prefers).
    Billboard { up: [f32; 3] },
}

/// One placed 2D image sprite for the current frame: a flat textured
/// quad in world space, composited over the rendered scene with the
/// frame's depth buffer (so the voxel model can occlude it). Built per
/// frame and passed to [`SceneRenderer::draw_images`], mirroring
/// [`Line3`] / [`SceneRenderer::draw_lines`]. The texture is uploaded
/// once via [`SceneRenderer::upload_image`] and referenced by [`image`].
///
/// [`image`]: ImageSprite::image
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct ImageSprite {
    /// The uploaded texture to draw (from [`SceneRenderer::upload_image`]).
    pub image: ImageId,
    /// World position of the quad's **top-left** corner — the image's
    /// `(column 0, row 0)` texel. The quad extends `size[0]` along the
    /// facing's `u` and `size[1]` along its `v`.
    pub origin: [f32; 3],
    /// World orientation of the quad — fixed in world or camera-facing.
    pub facing: ImageFacing,
    /// World size of the quad along `u` and `v`. For pixel-art traced at
    /// 1 texel = 1 voxel, pass `[width as f32, height as f32]`.
    pub size: [f32; 2],
    /// Multiplied into every sampled texel (tint + opacity), `0xAARRGGBB`.
    /// `0xFFFFFFFF` draws the texture unchanged; the high byte scales
    /// the texel alpha (e.g. `0x80FFFFFF` = 50 % opacity).
    pub tint: u32,
    /// Alpha cutoff in `0.0..=1.0`. Texels whose **own** alpha is below
    /// this are discarded outright (not blended) — crisp pixel-art edges
    /// instead of a semi-transparent haze, and the same threshold decides
    /// what [`SceneRenderer::pick_image`] treats as solid. `0.0` keeps the
    /// plain straight-alpha over-blend (every non-zero texel draws).
    pub alpha_cutoff: f32,
    /// `true`: occluded by nearer rendered geometry (depth-tested against
    /// the frame's depth buffer, with a bias so a quad resting on a
    /// coincident voxel face doesn't z-fight). `false`: always on top.
    pub depth_test: bool,
    /// `true`: draw regardless of which way the quad faces (no backface
    /// cull) — what reference images usually want. `false`: cull when the
    /// quad faces away from the camera. Ignored for
    /// [`ImageFacing::Billboard`] (it always faces the camera).
    pub double_sided: bool,
}

/// Backend-agnostic resolved quad: four world corners (`TL, TR, BL, BR`,
/// with UVs `(0,0) (1,0) (0,1) (1,1)`) + the texture to map. The facade
/// resolves [`ImageSprite::facing`] into corners and culls back-facing
/// quads once, so both backends draw from the same geometry.
#[derive(Clone, Copy, Debug)]
pub(crate) struct QuadDraw {
    pub corners: [[f32; 3]; 4],
    pub image: ImageId,
    pub tint: u32,
    pub depth_test: bool,
    pub alpha_cutoff: f32,
}

/// Result of [`SceneRenderer::pick_image`] — a resolved screen→sprite hit.
/// `uv` is the normalised position within the quad (`(0,0)` = top-left
/// corner); `texel` is the matching source-image pixel; `world` is the
/// hit point; `t` is its euclidean distance from the camera.
#[derive(Clone, Copy, PartialEq, Debug)]
pub struct ImagePickHit {
    pub image: ImageId,
    pub uv: [f32; 2],
    pub texel: (u32, u32),
    pub world: [f32; 3],
    pub t: f32,
}

/// Which renderer a [`SceneRenderer`] resolved to at construction.
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum Backend {
    /// `roxlap-core` opticast, presented via `softbuffer`.
    Cpu,
    /// `roxlap-gpu` compute marcher, presented via wgpu.
    Gpu,
}

/// Construction-time options for [`SceneRenderer::new`].
pub struct RenderOptions {
    /// Try the GPU backend first. When `false`, or when GPU init
    /// fails, the renderer uses the CPU backend.
    pub want_gpu: bool,
    /// Settings forwarded to [`roxlap_gpu::GpuRenderer`] when the GPU
    /// backend is selected.
    pub gpu: GpuRendererSettings,
    /// Packed `0x00RRGGBB` (alpha ignored) the empty/clear frame fills
    /// with until a scene render lands. Also the CPU sky-miss colour
    /// default if a frame supplies none.
    pub clear_sky: u32,
    /// CPU [`ScratchPool`](roxlap_core::rasterizer::ScratchPool) `lastx`
    /// sizing — the largest combined grid `vsid` the CPU rasterizer
    /// will see. Pre-sizing keeps later frames allocation-free.
    pub cpu_max_grid_vsid: u32,
    /// CPU strip-parallel render thread count (capped to the rayon
    /// pool). One [`ScratchPool`](roxlap_core::rasterizer::ScratchPool)
    /// slot per thread.
    pub cpu_render_threads: usize,
}

impl Default for RenderOptions {
    fn default() -> Self {
        Self {
            want_gpu: false,
            gpu: GpuRendererSettings::default(),
            clear_sky: 0x0099_b3d9,
            // 32 chunks × CHUNK_SIZE_XY — the scene-demo's widest
            // combined ground grid.
            cpu_max_grid_vsid: 32 * roxlap_scene::CHUNK_SIZE_XY,
            cpu_render_threads: 4,
        }
    }
}

/// Depth-test slack (same spirit as the backends' `DEPTH_BIAS`) so a
/// [`SceneRenderer::pick_image`] hit on a sprite resting on a coincident
/// voxel face isn't rejected as "occluded".
const PICK_DEPTH_BIAS: f32 = 0.5;

// --- image-sprite geometry helpers (shared by both backends) ---

fn v_sub(a: [f32; 3], b: [f32; 3]) -> [f32; 3] {
    [a[0] - b[0], a[1] - b[1], a[2] - b[2]]
}
fn v_add(a: [f32; 3], b: [f32; 3]) -> [f32; 3] {
    [a[0] + b[0], a[1] + b[1], a[2] + b[2]]
}
fn v_scale(a: [f32; 3], s: f32) -> [f32; 3] {
    [a[0] * s, a[1] * s, a[2] * s]
}
fn v_dot(a: [f32; 3], b: [f32; 3]) -> f32 {
    a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
}
fn v_cross(a: [f32; 3], b: [f32; 3]) -> [f32; 3] {
    [
        a[1] * b[2] - a[2] * b[1],
        a[2] * b[0] - a[0] * b[2],
        a[0] * b[1] - a[1] * b[0],
    ]
}
fn v_norm(a: [f32; 3]) -> [f32; 3] {
    let len = v_dot(a, a).sqrt();
    if len < 1e-12 {
        a
    } else {
        v_scale(a, 1.0 / len)
    }
}

/// Intersect a ray (`origin` + `dir`, `dir` un-normalised) with a quad
/// `[TL, TR, BL, BR]` and return `(uv, t)` for a front/back hit inside
/// the quad — `uv` in `0..=1` (`(0,0)` = `TL`), `t` the ray parameter
/// (`hit = origin + dir·t`). `None` for a parallel ray, a hit behind the
/// origin, a degenerate quad, or a hit outside the `u`/`v` span. Solves
/// affine coords exactly for a (possibly skew) parallelogram. Standalone
/// so the geometry is unit-testable without a renderer.
fn ray_quad_uv(
    origin: [f32; 3],
    dir: [f32; 3],
    corners: &[[f32; 3]; 4],
) -> Option<([f32; 2], f32)> {
    let [tl, tr, bl, _br] = *corners;
    let ue = v_sub(tr, tl); // +u edge (width)
    let ve = v_sub(bl, tl); // +v edge (height)
    let n = v_cross(ue, ve);
    let denom = v_dot(dir, n);
    if denom.abs() < 1e-12 {
        return None; // ray parallel to the quad's plane
    }
    let t = v_dot(v_sub(tl, origin), n) / denom;
    if t <= 1e-6 {
        return None; // behind / at the origin
    }
    let p = v_add(origin, v_scale(dir, t));
    let rel = v_sub(p, tl);
    let guu = v_dot(ue, ue);
    let guv = v_dot(ue, ve);
    let gvv = v_dot(ve, ve);
    let det = guu * gvv - guv * guv;
    if det.abs() < 1e-12 {
        return None; // degenerate quad
    }
    let wu = v_dot(rel, ue);
    let wv = v_dot(rel, ve);
    let a = (gvv * wu - guv * wv) / det;
    let b = (guu * wv - guv * wu) / det;
    if !(0.0..=1.0).contains(&a) || !(0.0..=1.0).contains(&b) {
        return None; // outside the quad
    }
    Some(([a, b], t))
}

/// Resolve an [`ImageSprite`] into its four world corners (`TL, TR, BL,
/// BR`), or `None` when a `double_sided == false` world quad faces away
/// from the camera (back-face cull) or its plane is degenerate. The
/// camera basis is used only for [`ImageFacing::Billboard`] and the cull
/// test.
fn resolve_quad(sprite: &ImageSprite, camera: &Camera) -> Option<QuadDraw> {
    let cam_pos = [
        camera.pos[0] as f32,
        camera.pos[1] as f32,
        camera.pos[2] as f32,
    ];
    let cam_fwd = v_norm([
        camera.forward[0] as f32,
        camera.forward[1] as f32,
        camera.forward[2] as f32,
    ]);

    let (u_hat, v_hat) = match sprite.facing {
        ImageFacing::World { u, v } => (v_norm(u), v_norm(v)),
        ImageFacing::Billboard { up } => {
            // Horizontal axis ⟂ both the view direction and `up`; fall
            // back to the camera right when `up` is parallel to the view.
            let mut u_hat = v_norm(v_cross(up, cam_fwd));
            if v_dot(u_hat, u_hat) < 1e-12 {
                u_hat = v_norm([
                    camera.right[0] as f32,
                    camera.right[1] as f32,
                    camera.right[2] as f32,
                ]);
            }
            // Vertical axis ⟂ both, pointing *down* (rows grow downward)
            // so the top edge ends up toward `up`.
            let mut v_hat = v_norm(v_cross(cam_fwd, u_hat));
            if v_dot(v_hat, up) > 0.0 {
                v_hat = v_scale(v_hat, -1.0);
            }
            (u_hat, v_hat)
        }
    };

    let du = v_scale(u_hat, sprite.size[0]);
    let dv = v_scale(v_hat, sprite.size[1]);
    let tl = sprite.origin;
    let tr = v_add(tl, du);
    let bl = v_add(tl, dv);
    let br = v_add(tr, dv);

    // Back-face cull for fixed world quads (billboards always face us).
    if !sprite.double_sided {
        if let ImageFacing::World { .. } = sprite.facing {
            let normal = v_cross(du, dv);
            // Front-facing when the quad normal points toward the camera.
            if v_dot(normal, v_sub(cam_pos, tl)) <= 0.0 {
                return None;
            }
        }
    }

    Some(QuadDraw {
        corners: [tl, tr, bl, br],
        image: sprite.image,
        tint: sprite.tint,
        depth_test: sprite.depth_test,
        alpha_cutoff: sprite.alpha_cutoff,
    })
}

/// Renderer-internal backend; never exposes wgpu or softbuffer types.
/// The GPU variant owns the whole wgpu device/queue/pipelines, so
/// it's boxed to keep the enum small.
enum BackendImpl {
    // Both variants boxed so the enum stays small regardless of which
    // backend's state is larger (clippy::large_enum_variant).
    Cpu(Box<CpuBackend>),
    Gpu(Box<GpuBackend>),
}

/// Unified renderer over the CPU and GPU paths. See the crate docs.
pub struct SceneRenderer {
    inner: BackendImpl,
    /// Handles for dynamically added sprite instances (see
    /// [`Self::add_sprite_instance`]). Reset by [`Self::set_sprites`].
    dyn_map: DynInstanceMap,
}

impl SceneRenderer {
    /// Build a renderer for `window` — any [`raw-window-handle`]
    /// provider (winit, SDL, GLFW, …) in an `Arc`. `size` is the
    /// window's initial physical framebuffer size in pixels; thereafter
    /// the host reports changes via [`Self::resize`]. Passing the size
    /// explicitly keeps the facade decoupled from any one windowing
    /// library's size API.
    ///
    /// Selects the GPU backend when `opts.want_gpu` and WGPU
    /// initialises; otherwise the CPU backend. **Never fails** — a
    /// missing/incompatible GPU silently yields the CPU path (the
    /// message is logged to stderr).
    ///
    /// [`raw-window-handle`]: raw_window_handle
    #[cfg(not(target_arch = "wasm32"))]
    #[must_use]
    pub fn new<W>(window: Arc<W>, size: (u32, u32), opts: &RenderOptions) -> Self
    where
        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
    {
        if opts.want_gpu {
            match GpuBackend::new(window.clone(), size, opts) {
                Ok(g) => {
                    return Self {
                        inner: BackendImpl::Gpu(Box::new(g)),
                        dyn_map: DynInstanceMap::default(),
                    };
                }
                Err(e) => {
                    eprintln!(
                        "roxlap-render: GPU init failed ({e}); falling back to the CPU renderer",
                    );
                }
            }
        }
        Self {
            inner: BackendImpl::Cpu(Box::new(CpuBackend::new(window, size, opts))),
            dyn_map: DynInstanceMap::default(),
        }
    }

    /// wasm/WebGPU build-time entry: build a renderer over an HTML
    /// `canvas`. `size` is the canvas's initial framebuffer size in
    /// pixels; the host reports later changes via [`Self::resize`].
    ///
    /// Async because the browser drives wgpu's adapter/device requests
    /// through its event loop — `await` it inside a
    /// `wasm_bindgen_futures::spawn_local` task. Selects the GPU
    /// (WebGPU) backend when `opts.want_gpu` and WebGPU is available;
    /// otherwise (no WebGPU, or init failed) it falls back to the CPU
    /// opticast path presented through a WebGL2 blit on the same canvas.
    /// **Never fails** — the message is logged to the browser console.
    #[cfg(target_arch = "wasm32")]
    pub async fn new_from_canvas_async(
        canvas: web_sys::HtmlCanvasElement,
        size: (u32, u32),
        opts: &RenderOptions,
    ) -> Self {
        if opts.want_gpu {
            // `SurfaceTarget::Canvas` moves the canvas into wgpu, so the
            // GPU attempt gets a clone — the CPU fallback keeps the
            // original if WebGPU init fails.
            match GpuBackend::new_async(canvas.clone(), size, opts).await {
                Ok(g) => {
                    return Self {
                        inner: BackendImpl::Gpu(Box::new(g)),
                        dyn_map: DynInstanceMap::default(),
                    };
                }
                Err(e) => {
                    web_sys::console::warn_1(
                        &format!("roxlap-render: WebGPU init failed ({e}); using the CPU renderer")
                            .into(),
                    );
                }
            }
        }
        Self {
            inner: BackendImpl::Cpu(Box::new(CpuBackend::new_from_canvas(canvas, size, opts))),
            dyn_map: DynInstanceMap::default(),
        }
    }

    /// Which backend was selected.
    #[must_use]
    pub fn backend(&self) -> Backend {
        match self.inner {
            BackendImpl::Cpu(_) => Backend::Cpu,
            BackendImpl::Gpu(_) => Backend::Gpu,
        }
    }

    /// The GPU adapter description when on the GPU backend, else
    /// `None`.
    #[must_use]
    pub fn adapter_info(&self) -> Option<&str> {
        match &self.inner {
            BackendImpl::Gpu(g) => Some(g.adapter_info()),
            BackendImpl::Cpu(_) => None,
        }
    }

    /// Upload an equirectangular sky panorama (RGBA8, `w×h`) for the
    /// GPU marcher's sky sampling. No-op on the CPU backend, which
    /// samples the [`Sky`] passed in each [`FrameParams`] instead.
    pub fn set_sky_panorama(&mut self, rgba: &[u8], w: u32, h: u32) {
        if let BackendImpl::Gpu(g) = &mut self.inner {
            g.set_sky_panorama(rgba, w, h);
        }
    }

    /// Follow a window resize. CPU resizes its framebuffer lazily, so
    /// this only matters to the GPU swapchain — but it's safe to call
    /// for both.
    pub fn resize(&mut self, width: u32, height: u32) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.resize(width, height),
            BackendImpl::Gpu(g) => g.resize(width, height),
        }
    }

    /// Composite `scene` from `camera` with `frame` params into the
    /// backend's frame buffer — **without presenting**. The CPU backend
    /// fills sky + runs the opticast compositor into an owned buffer;
    /// the GPU backend uploads/refreshes the scene, runs the compute
    /// marcher + sprite pass, and acquires (but does not present) the
    /// swapchain frame.
    ///
    /// Finish the frame with exactly one of [`present`](Self::present)
    /// (no overlay) or [`paint_egui`](Self::paint_egui) (UI overlay).
    /// Calling `render` again without finishing drops the pending frame.
    pub fn render(&mut self, scene: &mut Scene, camera: &Camera, frame: &FrameParams) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.render(scene, camera, frame),
            BackendImpl::Gpu(g) => g.render(scene, camera, frame),
        }
    }

    /// Draw world-space [`Line3`] segments over the frame
    /// [`render`](Self::render) composited, using that frame's camera +
    /// projection + depth buffer. Call **after** [`render`](Self::render)
    /// and **before** [`present`](Self::present) /
    /// [`paint_egui`](Self::paint_egui) — the lines land in the
    /// framebuffer, so a subsequent `paint_egui` still draws its panels
    /// on top.
    ///
    /// `camera` must be the one the last frame rendered with (the
    /// projection is taken from that frame). Depth-tested segments
    /// (`Line3::depth_test`) are occluded by nearer rendered geometry;
    /// always-on-top segments ignore depth. See [`Line3`] for colour /
    /// width / blend semantics.
    pub fn draw_lines(&mut self, camera: &Camera, lines: &[Line3]) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.draw_lines(camera, lines),
            BackendImpl::Gpu(g) => g.draw_lines(camera, lines),
        }
    }

    /// Upload (or replace) an RGBA8 image and return a stable [`ImageId`]
    /// to reference it in [`draw_images`](Self::draw_images). `rgba` is
    /// row-major, `width * height * 4` bytes, **straight** (un-premultiplied)
    /// alpha. The texture is retained until [`drop_image`](Self::drop_image),
    /// so the per-frame draw call stays cheap. Sampling is
    /// nearest-neighbour (pixel-art friendly — no blurring).
    ///
    /// Returns `ImageId(0)` for malformed input (wrong byte count or a
    /// zero dimension); such an id draws nothing.
    pub fn upload_image(&mut self, rgba: &[u8], width: u32, height: u32) -> ImageId {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.upload_image(rgba, width, height),
            BackendImpl::Gpu(g) => g.upload_image(rgba, width, height),
        }
    }

    /// Release a texture uploaded with [`upload_image`](Self::upload_image).
    /// The id must not be reused afterwards (a later `upload_image` may
    /// hand the slot back out under a fresh id).
    pub fn drop_image(&mut self, id: ImageId) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.drop_image(id),
            BackendImpl::Gpu(g) => g.drop_image(id),
        }
    }

    /// Draw 2D [`ImageSprite`]s over the frame [`render`](Self::render)
    /// composited — flat textured quads placed in world space, using that
    /// frame's camera + projection + depth buffer. Same contract as
    /// [`draw_lines`](Self::draw_lines): call **after** [`render`](Self::render)
    /// and **before** [`present`](Self::present) / [`paint_egui`](Self::paint_egui).
    ///
    /// UVs are perspective-correct (no affine warp on an obliquely-viewed
    /// quad). Depth-tested sprites are occluded by nearer rendered
    /// geometry (with a bias to avoid z-fighting on a coincident face);
    /// the texture's straight alpha + the [`ImageSprite::tint`] composite
    /// over the scene. `camera` must be the one the last frame rendered.
    pub fn draw_images(&mut self, camera: &Camera, images: &[ImageSprite]) {
        if images.is_empty() {
            return;
        }
        let quads: Vec<QuadDraw> = images
            .iter()
            .filter_map(|s| resolve_quad(s, camera))
            .collect();
        if quads.is_empty() {
            return;
        }
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.draw_images(camera, &quads),
            BackendImpl::Gpu(g) => g.draw_images(camera, &quads),
        }
    }

    /// Project a world point to window pixel coordinates `(x, y)` under
    /// the projection the **last frame** rendered with — the backend-correct
    /// `world → screen` inverse of [`view_ray`](Self::view_ray). `None`
    /// before the first frame or for a point at/behind the camera near
    /// plane.
    ///
    /// Both backends honour their own projection (CPU `setcamera`
    /// `hx/hy/hz`, GPU vertical-FOV pinhole), so hosts never reconstruct
    /// it themselves. The returned `(x, y)` may fall outside `[0, w) ×
    /// [0, h)` for points off-screen but in front of the camera.
    #[must_use]
    pub fn project_point(&self, camera: &Camera, world: [f32; 3]) -> Option<(f32, f32)> {
        match &self.inner {
            BackendImpl::Cpu(c) => c.project_point(camera, world),
            BackendImpl::Gpu(g) => g.project_point(camera, world),
        }
    }

    /// Screen→sprite pick: the nearest [`ImageSprite`] hit under window
    /// pixel `(x, y)`, resolving which texel was clicked. `sprites` is the
    /// same list passed to [`draw_images`](Self::draw_images) (image
    /// sprites are immediate-mode, so the caller owns the set). `None` for
    /// a miss.
    ///
    /// The ray is intersected with each quad's plane and mapped to its
    /// `uv` / source texel. A texel whose alpha is below the sprite's
    /// [`ImageSprite::alpha_cutoff`] (and any fully-transparent texel) is
    /// **see-through** — the pick passes through it to a sprite behind.
    /// For [`depth_test`](ImageSprite::depth_test) sprites the hit is
    /// rejected when nearer scene geometry occludes that pixel (shares the
    /// depth convention + bias of [`pick`](Self::pick); on the GPU backend
    /// the occlusion test costs a click-time depth readback).
    #[must_use]
    pub fn pick_image(
        &self,
        camera: &Camera,
        x: f64,
        y: f64,
        sprites: &[ImageSprite],
    ) -> Option<ImagePickHit> {
        if sprites.is_empty() {
            return None;
        }
        let dir = self.pixel_ray(camera, x, y)?;
        let dir = [dir[0] as f32, dir[1] as f32, dir[2] as f32];
        let dir_len = v_dot(dir, dir).sqrt();
        if dir_len < 1e-9 {
            return None;
        }
        let origin = [
            camera.pos[0] as f32,
            camera.pos[1] as f32,
            camera.pos[2] as f32,
        ];
        // Scene surface distance under this pixel (sky / no-hit → None);
        // used to occlude depth-tested sprites. Same metric as `pick`.
        let scene_t = self.pick_depth(x as u32, y as u32);

        let mut best: Option<ImagePickHit> = None;
        for sprite in sprites {
            // Reuse the render-path resolve (back-face cull included), so
            // a single-sided quad that isn't drawn also can't be picked.
            let Some(q) = resolve_quad(sprite, camera) else {
                continue;
            };
            let Some(([a, b], t)) = ray_quad_uv(origin, dir, &q.corners) else {
                continue; // miss / parallel / behind
            };
            let d_eucl = t * dir_len;
            if best.is_some_and(|cur| d_eucl >= cur.t) {
                continue; // a nearer sprite already won
            }
            let p = v_add(origin, v_scale(dir, t));

            let Some((iw, ih)) = self.image_dims(sprite.image) else {
                continue; // dropped / unknown image
            };
            let tx = ((a * iw as f32) as i32).clamp(0, iw as i32 - 1) as u32;
            let ty = ((b * ih as f32) as i32).clamp(0, ih as i32 - 1) as u32;

            // See-through test: a texel is solid when its alpha clears the
            // cutoff (and a fully-transparent texel is never solid).
            let cutoff_u8 = (sprite.alpha_cutoff.clamp(0.0, 1.0) * 255.0) as u32;
            let solid_thresh = cutoff_u8.max(1);
            if u32::from(self.image_alpha_at(sprite.image, tx, ty)) < solid_thresh {
                continue;
            }

            // Occlusion: a depth-tested sprite behind nearer geometry loses.
            if sprite.depth_test {
                if let Some(st) = scene_t {
                    if d_eucl > st + PICK_DEPTH_BIAS {
                        continue;
                    }
                }
            }

            best = Some(ImagePickHit {
                image: sprite.image,
                uv: [a, b],
                texel: (tx, ty),
                world: p,
                t: d_eucl,
            });
        }
        best
    }

    /// Source dimensions of an uploaded image, or `None` if the id was
    /// dropped / never uploaded. Internal helper for [`Self::pick_image`].
    fn image_dims(&self, id: ImageId) -> Option<(u32, u32)> {
        match &self.inner {
            BackendImpl::Cpu(c) => c.image_dims(id),
            BackendImpl::Gpu(g) => g.image_dims(id),
        }
    }

    /// Alpha byte of texel `(tx, ty)` in an uploaded image (`0` for an
    /// unknown id / out-of-range texel). Internal helper for
    /// [`Self::pick_image`].
    fn image_alpha_at(&self, id: ImageId, tx: u32, ty: u32) -> u8 {
        match &self.inner {
            BackendImpl::Cpu(c) => c.image_alpha_at(id, tx, ty),
            BackendImpl::Gpu(g) => g.image_alpha_at(id, tx, ty),
        }
    }

    /// Mirror the rendered 3D scene horizontally before display. The flip is
    /// applied *before* any egui overlay, so the UI stays upright while the
    /// viewport un-mirrors — a fix for the engine's left-handed render.
    /// Supported on both backends (CPU reverses the framebuffer rows; GPU
    /// mirrors the scene blit + line/image overlays). Picking/projection are
    /// unchanged, so a host that flips must mirror its cursor X (`width - x`)
    /// for ray casts.
    pub fn set_flip_x(&mut self, flip: bool) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.set_flip_x(flip),
            BackendImpl::Gpu(g) => g.set_flip_x(flip),
        }
    }

    /// Present the frame [`render`](Self::render) composited, with no UI
    /// overlay. Pairs with `render`; use [`paint_egui`](Self::paint_egui)
    /// instead to overlay an egui UI before presenting.
    pub fn present(&mut self) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.present(),
            BackendImpl::Gpu(g) => g.present(),
        }
    }

    /// Overlay an egui UI on the frame [`render`](Self::render)
    /// composited, then present it (`hud` feature). The host runs egui
    /// itself (e.g. `egui` + `egui-winit`) and passes the tessellated
    /// `jobs` ([`egui::Context::tessellate`]) and the per-frame
    /// `textures` delta from [`egui::FullOutput`]; `pixels_per_point` is
    /// the UI scale (`ctx.pixels_per_point()`).
    ///
    /// The GPU backend paints via `egui-wgpu`; the CPU backend
    /// software-rasterises the tessellation into its framebuffer. Use
    /// this **instead of** [`present`](Self::present) — both finish the
    /// frame.
    #[cfg(feature = "hud")]
    pub fn paint_egui(
        &mut self,
        jobs: &[egui::ClippedPrimitive],
        textures: &egui::TexturesDelta,
        pixels_per_point: f32,
    ) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.paint_egui(jobs, textures, pixels_per_point),
            BackendImpl::Gpu(g) => g.paint_egui(jobs, textures, pixels_per_point),
        }
    }

    /// Register sprite models + instances. The CPU backend builds a
    /// per-instance draw list; the GPU backend builds an instanced
    /// model registry. Call once at setup (or again to replace).
    pub fn set_sprites(&mut self, set: &SpriteSet) -> Vec<SpriteModelId> {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.set_sprites(set),
            BackendImpl::Gpu(g) => g.set_sprites(set),
        }
        // A fresh sprite set replaces the instance world, so any
        // previously added dynamic instances are gone — drop their handles.
        self.dyn_map = DynInstanceMap::default();
        // Handles are positional by construction (model index = chain id
        // on both backends), so the facade hands them out directly —
        // callers keep the handle instead of re-deriving the index.
        (0..set.models.len()).map(SpriteModelId).collect()
    }

    /// Re-register one sprite model's geometry after you've edited its
    /// content (a carve or recolour of its `kv6`). `model` is the
    /// [`SpriteModelId`] handed back by [`set_sprites`](Self::set_sprites);
    /// `kv6` is the model's **new** geometry — the caller owns the source
    /// of truth (e.g. a dense carve grid the surface-only `kv6` can't
    /// represent) and supplies the refreshed mesh here.
    ///
    /// This is a **backend-agnostic content refresh**, not a GPU upload:
    /// the renderer brings its stored model up to date however its active
    /// backend needs to. The instance set is left untouched (an edit never
    /// moves or adds an instance), so on the GPU backend only that one
    /// model's voxel data is re-uploaded — through a slack-backed
    /// suballocator, one model's bytes rather than the whole registry —
    /// while the CPU backend swaps the cached `kv6` into each instance of
    /// the model. Use [`set_sprites`](Self::set_sprites) to add/remove
    /// models or change the instance set.
    pub fn refresh_sprite_model(&mut self, model: SpriteModelId, kv6: &Kv6) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.update_sprite_model(model.0, kv6),
            BackendImpl::Gpu(g) => g.update_sprite_model(model.0, kv6),
        }
    }

    /// Add one sprite instance of an already-registered `model` at world
    /// `pos`, **incrementally** — the cheap streaming-spawn path that both
    /// backends now share (GPU: append to the instance buffer, growing by
    /// powers of two; CPU: push one pre-posed [`Sprite`]). Returns a
    /// stable [`SpriteInstanceId`] for later removal.
    ///
    /// `model` must be a [`SpriteModelId`] from the current
    /// [`set_sprites`](Self::set_sprites) (a model registered there, even
    /// with zero initial instances). Dynamic instances live *after* the
    /// static set + any KFA limbs, so register those first.
    pub fn add_sprite_instance(&mut self, model: SpriteModelId, pos: [f32; 3]) -> SpriteInstanceId {
        let dyn_index = match &mut self.inner {
            BackendImpl::Cpu(c) => c.add_dyn_instance(model.0, pos),
            BackendImpl::Gpu(g) => g.add_dyn_instance(model.0, pos),
        };
        self.dyn_map.alloc(dyn_index as u32)
    }

    /// Remove a dynamic sprite instance added by
    /// [`add_sprite_instance`](Self::add_sprite_instance). O(1) on both
    /// backends (swap-remove); other dynamic handles stay valid. Returns
    /// `false` if the handle is stale / already removed.
    pub fn remove_sprite_instance(&mut self, id: SpriteInstanceId) -> bool {
        let Some(dyn_index) = self.dyn_map.dyn_index(id) else {
            return false;
        };
        let moved = match &mut self.inner {
            BackendImpl::Cpu(c) => c.remove_dyn_instance(dyn_index as usize),
            BackendImpl::Gpu(g) => g.remove_dyn_instance(dyn_index as usize),
        };
        self.dyn_map.remove(id, dyn_index, moved.map(|m| m as u32));
        true
    }

    /// Number of live dynamic sprite instances (those added via
    /// [`add_sprite_instance`](Self::add_sprite_instance)).
    #[must_use]
    pub fn dynamic_sprite_count(&self) -> usize {
        self.dyn_map.order.len()
    }

    /// Register animated KFA sprites (one or more bone hierarchies).
    /// The GPU backend uploads each limb's kv6 as an instanced model
    /// **once** (appended to the sprite registry) and seeds the limb
    /// instances at their current pose; the CPU backend caches the
    /// posed limbs for drawing. Call once at setup, after
    /// [`set_sprites`](Self::set_sprites), then drive motion per frame
    /// with [`update_kfa_poses`](Self::update_kfa_poses).
    ///
    /// Limbs are posed from the sprites' current
    /// [`kfaval`](roxlap_formats::kfa::KfaSprite::kfaval) (advance
    /// [`animsprite`](roxlap_formats::kfa::KfaSprite::animsprite) first
    /// if using a baked curve), so `kfas` is taken `&mut`.
    pub fn set_kfa_sprites(&mut self, kfas: &mut [KfaSprite]) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.set_kfa_sprites(kfas),
            BackendImpl::Gpu(g) => g.set_kfa_sprites(kfas),
        }
    }

    /// Re-pose the registered KFA sprites from their current
    /// `kfaval[]`. Call each frame after advancing the animation
    /// (`kfa.animsprite(dt_ms)` or poking `kfaval[]`). The GPU backend
    /// takes the cheap transform-only update (no model-volume
    /// re-upload); the CPU backend re-solves limb transforms for the
    /// next [`render`](Self::render). Must follow a
    /// [`set_kfa_sprites`](Self::set_kfa_sprites) with the same sprites.
    pub fn update_kfa_poses(&mut self, kfas: &mut [KfaSprite]) {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.update_kfa_poses(kfas),
            BackendImpl::Gpu(g) => g.update_kfa_poses(kfas),
        }
    }

    /// Carve the next z-layer off the [`SpriteSet::carve_model`] and
    /// re-upload (the demo's `G` hotkey + GPU.12 copy-on-modify). GPU
    /// only; a no-op on the CPU backend. Returns the voxels removed.
    pub fn carve_active_sprite(&mut self) -> u32 {
        match &mut self.inner {
            BackendImpl::Cpu(_) => 0,
            BackendImpl::Gpu(g) => g.carve_active_sprite(),
        }
    }

    /// Request that the next [`render`](Self::render) capture its
    /// framebuffer for [`take_capture`](Self::take_capture). CPU only
    /// (the GPU swapchain isn't read back) — a no-op on GPU.
    pub fn request_capture(&mut self) {
        if let BackendImpl::Cpu(c) = &mut self.inner {
            c.request_capture();
        }
    }

    /// Take the most recently captured frame as packed `0x00RRGGBB`
    /// pixels + dimensions, or `None` if no capture is ready / GPU.
    pub fn take_capture(&mut self) -> Option<(Vec<u32>, u32, u32)> {
        match &mut self.inner {
            BackendImpl::Cpu(c) => c.take_capture(),
            BackendImpl::Gpu(_) => None,
        }
    }

    /// Screen→world picking input: the world-space hit distance `t` at
    /// window pixel `(x, y)` from the **last rendered frame**, or `None`
    /// for out-of-bounds pixels and sky / no-hit. The host reconstructs
    /// the world hit point as `cam.pos + t * normalize(ray_dir)`, where
    /// `ray_dir` is the same per-pixel ray the frame was rendered with
    /// (see the backend's projection).
    ///
    /// `t` is the distance to the nearest **scene-grid** surface
    /// (terrain + grids); sprites do not occlude it (the sprite pass
    /// reads depth read-only), so a cursor sprite under the pointer is
    /// transparent to the pick.
    ///
    /// Cost: the CPU backend reads its in-memory z-buffer (free); the
    /// GPU backend stages the depth buffer and blocks on a device poll
    /// (cheap at click time — do not call every frame). The GPU path
    /// only has depth when the last frame drew sprites (`write_depth`).
    #[must_use]
    pub fn pick_depth(&self, x: u32, y: u32) -> Option<f32> {
        match &self.inner {
            BackendImpl::Cpu(c) => c.pick_depth(x, y),
            BackendImpl::Gpu(g) => g.pick_depth(x, y),
        }
    }

    /// World-space view-ray direction (un-normalised) for window pixel
    /// `(x, y)`, under the projection the **last frame** rendered with.
    /// The backends differ (CPU `setcamera` vs GPU vertical-FOV
    /// pinhole), so this hides which one is active. `None` before the
    /// first frame. Intersect it with a plane for tile picking, or feed
    /// it to [`Self::pick`] for a voxel.
    #[must_use]
    pub fn pixel_ray(&self, camera: &Camera, x: f64, y: f64) -> Option<[f64; 3]> {
        match &self.inner {
            BackendImpl::Cpu(c) => c.pixel_ray(camera, x, y),
            BackendImpl::Gpu(g) => g.pixel_ray(camera, x, y),
        }
    }

    /// Canonical screen→world unproject: the full view [`Ray`]
    /// (`camera.pos` origin + unit direction) for window pixel
    /// `(x, y)`, under whichever projection the last frame used. The
    /// one entry point both backends honour — hosts never reconstruct
    /// the projection. `None` before the first frame or for a
    /// degenerate ray.
    ///
    /// Compose with [`roxlap_scene::Scene::raycast`] for depth-free
    /// picking that's identical on CPU and GPU:
    /// `renderer.view_ray(cam, x, y).and_then(|r| scene.raycast(r.origin, r.dir, max))`.
    #[must_use]
    pub fn view_ray(&self, camera: &Camera, x: f64, y: f64) -> Option<Ray> {
        let d = self.pixel_ray(camera, x, y)?;
        let len = (d[0] * d[0] + d[1] * d[1] + d[2] * d[2]).sqrt();
        if len < 1e-12 {
            return None;
        }
        Some(Ray {
            origin: glam::DVec3::from_array([camera.pos[0], camera.pos[1], camera.pos[2]]),
            dir: glam::DVec3::new(d[0] / len, d[1] / len, d[2] / len),
        })
    }

    /// One-call screen→world voxel pick: unproject pixel `(x, y)` with
    /// the active backend's projection, read the last frame's depth
    /// there, reconstruct the world hit, and resolve it to the owning
    /// grid + grid-local voxel via [`Scene::resolve_voxel`]. `None` on
    /// sky / no-hit, or when no grid claims the surface.
    ///
    /// `scene` and `camera` must be the ones the last frame rendered;
    /// the projection (size + FOV / `hx,hy,hz`) is taken from that
    /// frame. Cheap on CPU (in-memory z-buffer); on GPU it stages the
    /// depth buffer (a click-time device poll — not per frame).
    #[must_use]
    pub fn pick(&self, scene: &Scene, camera: &Camera, x: u32, y: u32) -> Option<PickHit> {
        let dir = self.pixel_ray(camera, f64::from(x), f64::from(y))?;
        let t = f64::from(self.pick_depth(x, y)?);
        let len = (dir[0] * dir[0] + dir[1] * dir[1] + dir[2] * dir[2]).sqrt();
        if len < 1e-9 {
            return None;
        }
        let s = t / len; // world = cam.pos + t · (dir / |dir|)
        let world = glam::DVec3::new(
            camera.pos[0] + dir[0] * s,
            camera.pos[1] + dir[1] * s,
            camera.pos[2] + dir[2] * s,
        );
        let (grid, voxel) = scene.resolve_voxel(world, glam::DVec3::from_array(dir))?;
        #[allow(clippy::cast_possible_truncation)]
        let world_f32 = [world.x as f32, world.y as f32, world.z as f32];
        Some(PickHit {
            world: world_f32,
            grid,
            voxel,
        })
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    /// The handle map must survive the backends' swap-remove indexing:
    /// drive a model `DynInstanceMap` against a `Vec` "backend" that
    /// swap-removes, and check every live handle keeps resolving to its
    /// own payload through a sequence of adds + removes.
    #[test]
    fn dyn_instance_map_survives_swap_removes() {
        let mut map = DynInstanceMap::default();
        // The "backend": payload per dynamic index; swap_remove mirrors
        // both backends' remove_dyn_instance.
        let mut backend: Vec<u32> = Vec::new();
        // Our bookkeeping: handle -> the payload we expect it to address.
        let mut expect: Vec<(SpriteInstanceId, u32)> = Vec::new();

        let add = |map: &mut DynInstanceMap,
                   backend: &mut Vec<u32>,
                   expect: &mut Vec<(SpriteInstanceId, u32)>,
                   payload: u32| {
            let dyn_index = backend.len() as u32;
            backend.push(payload);
            let id = map.alloc(dyn_index);
            expect.push((id, payload));
        };

        for p in 0..6 {
            add(&mut map, &mut backend, &mut expect, p);
        }

        // Remove a middle handle (payload 2) and a later one (payload 4),
        // plus the current last — covering swap and no-swap paths.
        for victim_payload in [2u32, 4, 5] {
            let pos = expect
                .iter()
                .position(|&(_, p)| p == victim_payload)
                .unwrap();
            let (id, _) = expect.remove(pos);
            let dyn_index = map.dyn_index(id).expect("live handle resolves");
            // Backend swap-remove + report moved index (old last), exactly
            // like remove_dyn_instance on both backends.
            let last = backend.len() - 1;
            backend.swap_remove(dyn_index as usize);
            let moved = (dyn_index as usize != last).then_some(last as u32);
            map.remove(id, dyn_index, moved);
            // The removed handle is now stale.
            assert!(map.dyn_index(id).is_none(), "removed handle is stale");
        }

        // Every surviving handle still resolves to its own payload.
        for &(id, payload) in &expect {
            let idx = map.dyn_index(id).expect("survivor resolves");
            assert_eq!(
                backend[idx as usize], payload,
                "handle addresses its payload"
            );
        }
        assert_eq!(map.order.len(), backend.len());
        assert_eq!(backend.len(), expect.len());
    }

    #[test]
    fn options_default_is_cpu_intent() {
        let o = RenderOptions::default();
        assert!(!o.want_gpu);
        assert_eq!(o.clear_sky & 0xFF00_0000, 0, "clear_sky is 0x00RRGGBB");
    }

    /// A camera at the origin looking down +Y (voxlap z-down world): right
    /// = +X, down = +Z, forward = +Y. Handedness `right × down == forward`.
    fn cam_looking_y() -> Camera {
        Camera {
            pos: [0.0, 0.0, 0.0],
            right: [1.0, 0.0, 0.0],
            down: [0.0, 0.0, 1.0],
            forward: [0.0, 1.0, 0.0],
        }
    }

    #[test]
    fn world_quad_corner_layout() {
        // Top-left at (-5, 10, -5); u = +X (width), v = +Z (down). A
        // 10×10 quad facing the camera (its +Y normal points back at us).
        let sprite = ImageSprite {
            image: ImageId(0),
            origin: [-5.0, 10.0, -5.0],
            facing: ImageFacing::World {
                u: [1.0, 0.0, 0.0],
                v: [0.0, 0.0, 1.0],
            },
            size: [10.0, 10.0],
            tint: 0xFFFF_FFFF,
            alpha_cutoff: 0.0,
            depth_test: true,
            double_sided: true,
        };
        let q = resolve_quad(&sprite, &cam_looking_y()).expect("front-facing");
        assert_eq!(q.corners[0], [-5.0, 10.0, -5.0], "TL = origin");
        assert_eq!(q.corners[1], [5.0, 10.0, -5.0], "TR = origin + u·size");
        assert_eq!(q.corners[2], [-5.0, 10.0, 5.0], "BL = origin + v·size");
        assert_eq!(q.corners[3], [5.0, 10.0, 5.0], "BR = origin + u + v");
    }

    #[test]
    fn world_quad_backface_culls_when_single_sided() {
        // Same plane but spanned so its normal (u × v) points *away* from
        // the camera: swap u/v so the winding flips.
        let sprite = ImageSprite {
            image: ImageId(0),
            origin: [-5.0, 10.0, -5.0],
            facing: ImageFacing::World {
                u: [0.0, 0.0, 1.0], // v-ish
                v: [1.0, 0.0, 0.0], // u-ish → normal flips to -Y... toward camera?
            },
            size: [10.0, 10.0],
            tint: 0xFFFF_FFFF,
            alpha_cutoff: 0.0,
            depth_test: true,
            double_sided: false,
        };
        // With double_sided=false one of the two windings must cull; the
        // opposite winding must draw. Exactly one of the two resolves.
        let a = resolve_quad(&sprite, &cam_looking_y()).is_some();
        let mut flipped = sprite;
        flipped.facing = ImageFacing::World {
            u: [1.0, 0.0, 0.0],
            v: [0.0, 0.0, 1.0],
        };
        let b = resolve_quad(&flipped, &cam_looking_y()).is_some();
        assert!(a ^ b, "exactly one winding is front-facing");
    }

    #[test]
    fn double_sided_never_culls() {
        let mut sprite = ImageSprite {
            image: ImageId(0),
            origin: [-5.0, 10.0, -5.0],
            facing: ImageFacing::World {
                u: [0.0, 0.0, 1.0],
                v: [1.0, 0.0, 0.0],
            },
            size: [10.0, 10.0],
            tint: 0xFFFF_FFFF,
            alpha_cutoff: 0.0,
            depth_test: true,
            double_sided: true,
        };
        assert!(resolve_quad(&sprite, &cam_looking_y()).is_some());
        sprite.facing = ImageFacing::World {
            u: [1.0, 0.0, 0.0],
            v: [0.0, 0.0, 1.0],
        };
        assert!(resolve_quad(&sprite, &cam_looking_y()).is_some());
    }

    #[test]
    fn ray_quad_uv_center_and_corners() {
        // 10×10 quad on the y=10 plane: TL(-5,10,-5) u=+X v=+Z. Camera at
        // origin looking +Y. A ray straight at the quad centre → uv (.5,.5).
        let corners = [
            [-5.0, 10.0, -5.0], // TL
            [5.0, 10.0, -5.0],  // TR
            [-5.0, 10.0, 5.0],  // BL
            [5.0, 10.0, 5.0],   // BR
        ];
        let (uv, t) = ray_quad_uv([0.0, 0.0, 0.0], [0.0, 1.0, 0.0], &corners).expect("center hit");
        assert!(
            (uv[0] - 0.5).abs() < 1e-5 && (uv[1] - 0.5).abs() < 1e-5,
            "centre → (.5,.5)"
        );
        assert!((t - 10.0).abs() < 1e-4, "t = plane distance");
        // Ray toward the TL corner texel region (−x, +y, −z) → uv near (0,0).
        let (uv_tl, _) = ray_quad_uv([0.0, 0.0, 0.0], [-4.0, 10.0, -4.0], &corners).unwrap();
        assert!(uv_tl[0] < 0.2 && uv_tl[1] < 0.2, "toward TL → small uv");
    }

    #[test]
    fn ray_quad_uv_misses_outside_and_behind() {
        let corners = [
            [-5.0, 10.0, -5.0],
            [5.0, 10.0, -5.0],
            [-5.0, 10.0, 5.0],
            [5.0, 10.0, 5.0],
        ];
        // Ray pointing away (−Y) never reaches the +Y plane in front.
        assert!(ray_quad_uv([0.0, 0.0, 0.0], [0.0, -1.0, 0.0], &corners).is_none());
        // Ray parallel to the quad plane (in +X) → no intersection.
        assert!(ray_quad_uv([0.0, 0.0, 0.0], [1.0, 0.0, 0.0], &corners).is_none());
        // Ray hitting the plane far outside the quad → outside uv.
        assert!(ray_quad_uv([100.0, 0.0, 0.0], [0.0, 1.0, 0.0], &corners).is_none());
    }

    #[test]
    fn billboard_axes_orthogonal_and_top_toward_up() {
        // World up = -Z (z-down world). The billboard's v (top→bottom)
        // must point away from `up`, and u/v must be ⟂ the view direction.
        let up = [0.0, 0.0, -1.0];
        let sprite = ImageSprite {
            image: ImageId(0),
            origin: [0.0, 50.0, 0.0],
            facing: ImageFacing::Billboard { up },
            size: [4.0, 4.0],
            tint: 0xFFFF_FFFF,
            alpha_cutoff: 0.0,
            depth_test: false,
            double_sided: false, // billboards must NEVER cull
        };
        let q = resolve_quad(&sprite, &cam_looking_y()).expect("billboard always faces camera");
        let u = v_sub(q.corners[1], q.corners[0]); // TR - TL = u·size
        let v = v_sub(q.corners[2], q.corners[0]); // BL - TL = v·size
        let fwd = [0.0, 1.0, 0.0];
        assert!(v_dot(u, fwd).abs() < 1e-5, "u ⟂ view");
        assert!(v_dot(v, fwd).abs() < 1e-5, "v ⟂ view");
        assert!(v_dot(u, v).abs() < 1e-5, "u ⟂ v");
        assert!(
            v_dot(v, up) < 0.0,
            "rows grow away from `up` (top edge toward up)"
        );
    }
}