Skip to main content

bevy_sensor/
render.rs

1//! Headless rendering implementation using Bevy.
2//!
3//! This module provides two rendering modes:
4//!
5//! 1. **Headless mode** (default): Renders to an image texture without requiring
6//!    a window or display. Works on WSL2, CI servers, and any environment without
7//!    GPU windowing support.
8//!
9//! 2. **Windowed mode** (fallback): Uses a visible window for rendering when
10//!    headless mode fails. Requires a display (X11/Wayland).
11//!
12//! # Current Status
13//!
14//! - **RGBA**: Working via render-to-texture + GPU readback
15//! - **Depth**: Working via ViewDepthTexture + reverse-Z conversion
16//!
17//! # Headless Rendering Architecture
18//!
19//! The headless renderer:
20//! 1. Creates a Bevy app without window plugins (uses ScheduleRunnerPlugin)
21//! 2. Sets up a render-to-texture pipeline with RenderTarget::Image
22//! 3. Extracts RGBA data via ImageCopyDriver
23//! 4. Extracts depth via DepthReadbackNode
24//!
25//! # Running Requirements
26//!
27//! Headless mode should work without any display. For windowed fallback:
28//! ```bash
29//! DISPLAY=:0 cargo run --example test_render
30//! ```
31//!
32//! # Architecture Notes
33//!
34//! Bevy's `App::run()` does not return cleanly in all configurations. This
35//! implementation uses a watchdog thread that monitors for completion and
36//! calls `std::process::exit(0)` once the render output is serialized to
37//! a temp file. The main thread reads this file after the process would
38//! normally exit.
39
40use bevy::app::{ScheduleRunnerPlugin, TerminalCtrlCHandlerPlugin};
41use bevy::asset::{LoadState, RenderAssetUsages};
42use bevy::camera::RenderTarget;
43use bevy::core_pipeline::prepass::{DepthPrepass, NormalPrepass};
44use bevy::core_pipeline::tonemapping::Tonemapping;
45use bevy::ecs::query::QueryItem;
46use bevy::light::GlobalAmbientLight;
47use bevy::log::LogPlugin;
48use bevy::prelude::*;
49use bevy::render::camera::ExtractedCamera;
50use bevy::render::render_asset::RenderAssets;
51use bevy::render::render_graph::{
52    Node, NodeRunError, RenderGraphContext, RenderGraphExt, RenderLabel, ViewNode, ViewNodeRunner,
53};
54use bevy::render::render_resource::{
55    Buffer, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, Extent3d, MapMode, Origin3d,
56    TexelCopyBufferInfo, TexelCopyBufferLayout, TexelCopyTextureInfo, TextureAspect,
57    TextureDimension, TextureFormat, TextureUsages,
58};
59use bevy::render::renderer::RenderQueue;
60use bevy::render::renderer::{RenderContext, RenderDevice};
61use bevy::render::texture::GpuImage;
62use bevy::render::view::screenshot::{Screenshot, ScreenshotCaptured};
63use bevy::render::view::{ExtractedView, Hdr, ViewDepthTexture};
64use bevy::render::{Extract, Render, RenderApp, RenderSystems};
65use bevy::window::{ExitCondition, WindowPlugin};
66use bevy_obj::ObjPlugin;
67use std::fs::File;
68use std::io::Read as IoRead;
69use std::path::{Path, PathBuf};
70#[cfg(test)]
71use std::sync::atomic::{AtomicUsize, Ordering};
72use std::sync::{Arc, Mutex, OnceLock};
73use std::time::Duration;
74
75use crate::{
76    backend::BackendConfig, ObjectRotation, RenderConfig, RenderError, RenderOutput,
77    TargetingPolicy,
78};
79use ycbust::{GOOGLE_16K_MESH_RELATIVE, GOOGLE_16K_TEXTURE_RELATIVE};
80
81/// Watchdog timeout for a single render, in seconds.
82///
83/// Bounds how long any single render path waits before declaring failure.
84/// 180s accommodates first-run wgpu shader compilation on Windows, which
85/// can take well over 60s on a cold GPU cache (see commit 9cd1d11).
86const RENDER_TIMEOUT_SECS: u64 = 180;
87
88/// Warmup frames after each camera move in `render_headless_sequence`.
89///
90/// After writing a new camera `Transform`, Bevy needs at least one frame for
91/// transform propagation + render-world extract before the next capture is
92/// valid. Historically set to 3 as a conservative cushion; reducing directly
93/// shortens per-viewpoint wall-clock since `app.update()` in the batch path
94/// is not rate-limited. Validated against the pixel-exact hardware test
95/// `test_batch_render_matches_sequential_episode_outputs`.
96const BATCH_WARMUP_FRAMES: u32 = 1;
97
98/// Warmup frames at the start of each `PersistentRenderer::render()` call.
99///
100/// `BATCH_WARMUP_FRAMES = 1` works for inter-viewpoint advancement inside a
101/// batch because `extract_and_continue_headless_batch` writes the next
102/// camera transform *and* clears the shared GPU readback buffers in the
103/// same tick — so the in-flight copy from the previous viewpoint has
104/// already drained by the time the next capture is gated.
105///
106/// In the persistent per-call path, the previous render's output may still
107/// be sitting in `shared_rgba`/`shared_depth` (we clear them before the
108/// loop, but the pipeline still needs ticks to propagate the new camera/
109/// scene-rotation `Transform` writes through `PostUpdate` →
110/// `transform_propagate` → `Extract` → render graph → `ImageCopyDriver`
111/// before the capture we request actually reflects the new transforms.
112///
113/// Validated by `test_persistent_renderer_matches_render_to_buffer`. Three
114/// ticks of warmup gives Windows/DX12 enough room to drain the previous
115/// readback and capture the post-propagation color target:
116///   - tick 0: transforms propagate, render runs (no copy enabled)
117///   - tick 1: previous in-flight readback drains (no copy enabled)
118///   - tick 2: warmup hits 0, capture fires, render runs with copy enabled
119///   - tick 3: shared buffers populated → captured → batch finalized
120const PERSISTENT_WARMUP_FRAMES: u32 = 3;
121const DEPTH_CAPTURE_NEAR_PLANE_EPSILON_METERS: f64 = 1e-5;
122const DEPTH_CAPTURE_FAR_PLANE_FRACTION: f64 = 0.999;
123
124fn persistent_warmup_camera_transform() -> Transform {
125    crate::generate_viewpoints(&crate::ViewpointConfig::default())
126        .into_iter()
127        .next()
128        .unwrap_or_else(|| Transform::from_xyz(0.0, 0.0, 0.5).looking_at(Vec3::ZERO, Vec3::Y))
129}
130
131/// Check the render-trace env var. Cheap enough (single HashMap lookup) to call
132/// from per-frame systems; gate all tracing output behind this.
133#[inline]
134fn render_trace_enabled() -> bool {
135    std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok()
136}
137
138fn is_capture_foreground_depth(depth: f64, near: f64, far: f64) -> bool {
139    depth.is_finite()
140        && near.is_finite()
141        && far.is_finite()
142        && depth > near + DEPTH_CAPTURE_NEAR_PLANE_EPSILON_METERS
143        && depth < far * DEPTH_CAPTURE_FAR_PLANE_FRACTION
144}
145
146fn is_all_background_depth(depth: &[f64], far: f64) -> bool {
147    !depth.is_empty()
148        && far.is_finite()
149        && depth
150            .iter()
151            .all(|value| value.is_finite() && *value >= far * DEPTH_CAPTURE_FAR_PLANE_FRACTION)
152}
153
154fn is_uniform_rgba_frame(rgba: &[u8]) -> bool {
155    let Some(first) = rgba.chunks_exact(4).next() else {
156        return false;
157    };
158    rgba.chunks_exact(4).all(|pixel| pixel == first)
159}
160
161#[derive(Debug, Clone, PartialEq)]
162struct DepthReadbackSummary {
163    samples: usize,
164    finite: usize,
165    invalid: usize,
166    foreground: usize,
167    near_or_zero: usize,
168    far_or_background: usize,
169    min: Option<f64>,
170    max: Option<f64>,
171    foreground_min: Option<f64>,
172    foreground_max: Option<f64>,
173}
174
175impl DepthReadbackSummary {
176    fn from_depth(depth: &[f64], near: f64, far: f64) -> Self {
177        let mut summary = Self {
178            samples: depth.len(),
179            finite: 0,
180            invalid: 0,
181            foreground: 0,
182            near_or_zero: 0,
183            far_or_background: 0,
184            min: None,
185            max: None,
186            foreground_min: None,
187            foreground_max: None,
188        };
189        let near_threshold = near + DEPTH_CAPTURE_NEAR_PLANE_EPSILON_METERS;
190        let far_threshold = far * DEPTH_CAPTURE_FAR_PLANE_FRACTION;
191
192        for &value in depth {
193            if !value.is_finite() {
194                summary.invalid += 1;
195                continue;
196            }
197
198            summary.finite += 1;
199            summary.min = Some(summary.min.map_or(value, |min| min.min(value)));
200            summary.max = Some(summary.max.map_or(value, |max| max.max(value)));
201
202            if value <= near_threshold {
203                summary.near_or_zero += 1;
204            } else if !far.is_finite() || value >= far_threshold {
205                summary.far_or_background += 1;
206            }
207
208            if is_capture_foreground_depth(value, near, far) {
209                summary.foreground += 1;
210                summary.foreground_min =
211                    Some(summary.foreground_min.map_or(value, |min| min.min(value)));
212                summary.foreground_max =
213                    Some(summary.foreground_max.map_or(value, |max| max.max(value)));
214            }
215        }
216
217        summary
218    }
219}
220
221fn format_depth_value(value: Option<f64>) -> String {
222    value
223        .map(|value| format!("{value:.4}"))
224        .unwrap_or_else(|| "none".to_string())
225}
226
227impl std::fmt::Display for DepthReadbackSummary {
228    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
229        write!(
230            f,
231            "samples={} finite={} invalid={} foreground={} near_or_zero={} far_or_background={} min={} max={} fg_min={} fg_max={}",
232            self.samples,
233            self.finite,
234            self.invalid,
235            self.foreground,
236            self.near_or_zero,
237            self.far_or_background,
238            format_depth_value(self.min),
239            format_depth_value(self.max),
240            format_depth_value(self.foreground_min),
241            format_depth_value(self.foreground_max)
242        )
243    }
244}
245
246/// Convert a filesystem path into a Bevy asset-path string.
247///
248/// `std::fs::canonicalize` on Windows returns a `\\?\C:\...` verbatim-prefixed
249/// path. Bevy's `AssetPath` parser cannot handle that prefix, so the asset
250/// would silently never load. Strip the verbatim prefix and normalize
251/// separators to `/` so the absolute path resolves through the default file
252/// asset source on every platform.
253fn fs_path_to_asset_string(path: &std::path::Path) -> String {
254    let s = path.display().to_string();
255    let s = s.strip_prefix(r"\\?\").map(str::to_string).unwrap_or(s);
256    s.replace('\\', "/")
257}
258
259/// Check if a display is available for windowed rendering.
260///
261/// Returns true if DISPLAY or WAYLAND_DISPLAY environment variable is set.
262#[allow(dead_code)]
263fn display_available() -> bool {
264    std::env::var("DISPLAY").is_ok() || std::env::var("WAYLAND_DISPLAY").is_ok()
265}
266
267/// Check if we're running on WSL2 (which doesn't support Vulkan window surfaces).
268#[allow(dead_code)]
269fn is_wsl2() -> bool {
270    if let Ok(version) = std::fs::read_to_string("/proc/version") {
271        return version.to_lowercase().contains("microsoft")
272            || version.to_lowercase().contains("wsl");
273    }
274    false
275}
276
277/// Internal state for tracking render progress
278#[derive(Resource, Default)]
279struct RenderState {
280    frame_count: u32,
281    scene_loaded: bool,
282    texture_loaded: bool,
283    materials_applied: bool,
284    /// `frame_count` at the moment materials were applied; used to gate
285    /// `capture_ready` on N frames of render-graph propagation rather than
286    /// a legacy llvmpipe-era 60-frame wait.
287    materials_applied_frame: u32,
288    /// `frame_count` when the texture finished loading. Capture waits a small
289    /// margin past this for GPU image preparation. The material (and therefore
290    /// the main-pass pipeline) is applied earlier, so by the time the texture is
291    /// ready the pipeline has already compiled.
292    texture_ready_frame: u32,
293    capture_ready: bool,
294    screenshot_requested: bool,
295    /// Number of frames spent waiting for a *valid* (non-blank / valid-depth)
296    /// readback. The one-shot GPU capture is nondeterministic and occasionally
297    /// reads a uniform clear-color frame; we reject those and keep capturing
298    /// until a real frame lands, bounded by this counter.
299    capture_retries: u32,
300    /// Previous frame's RGBA readback. The capture is accepted only once two
301    /// consecutive readbacks are identical (the render has settled), so partial
302    /// in-progress frames aren't captured and every render path yields the same
303    /// fully-drawn image (required for byte-exact cross-path parity).
304    prev_rgba: Option<Vec<u8>>,
305    /// Previous frame's depth readback, for the same settle-detection as
306    /// `prev_rgba` (depth parity is asserted to ~1e-9, i.e. bit-exact).
307    prev_depth: Option<Vec<f64>>,
308    captured: bool,
309    exit_requested: bool,
310    #[allow(dead_code)]
311    exit_frame_count: u32,
312    rgba_data: Option<Vec<u8>>,
313    depth_data: Option<Vec<f64>>,
314    image_width: u32,
315    image_height: u32,
316}
317
318#[cfg(test)]
319static HEADLESS_SCENE_SETUP_COUNT: AtomicUsize = AtomicUsize::new(0);
320
321#[cfg(test)]
322fn reset_headless_scene_setup_count() {
323    HEADLESS_SCENE_SETUP_COUNT.store(0, Ordering::SeqCst);
324}
325
326#[cfg(test)]
327fn headless_scene_setup_count() -> usize {
328    HEADLESS_SCENE_SETUP_COUNT.load(Ordering::SeqCst)
329}
330
331/// Shared buffer for screenshot callback to write into
332#[derive(Resource, Clone)]
333#[allow(clippy::type_complexity)]
334#[allow(dead_code)]
335struct SharedImageBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
336
337/// Shared buffer for depth data from GPU readback
338/// Contains: (linear_depth_values, width, height)
339/// Uses f64 for TBP numerical precision compatibility.
340#[derive(Resource, Clone, Default)]
341#[allow(clippy::type_complexity)]
342struct SharedDepthBuffer(Arc<Mutex<Option<(Vec<f64>, u32, u32)>>>);
343
344// ============================================================================
345// Depth Readback Infrastructure
346// ============================================================================
347
348/// Request to capture depth - extracted from main world to render world
349#[derive(Resource, Default, Clone)]
350struct DepthCaptureRequest {
351    requested: bool,
352    near: f32,
353    far: f32,
354}
355
356/// Pending depth capture info for async processing.
357///
358/// `m22`/`m32` are the relevant entries of the view's reverse-Z projection
359/// matrix (`clip_from_view`), captured at copy time so the CPU-side
360/// linearization matches the exact projection the GPU rendered with. This keeps
361/// depth output robust if projection construction or backend behavior changes.
362struct PendingDepthCapture {
363    buffer: Buffer,
364    width: u32,
365    height: u32,
366    m22: f32,
367    m32: f32,
368    far: f32,
369}
370
371fn render_projection(config: &RenderConfig) -> Projection {
372    let near = config.near_plane;
373    Projection::Perspective(PerspectiveProjection {
374        fov: config.fov_radians(),
375        near,
376        far: config.far_plane,
377        near_clip_plane: Vec4::new(0.0, 0.0, -1.0, -near),
378        ..default()
379    })
380}
381
382/// Queue for pending depth captures (written by render node, read by cleanup system)
383#[derive(Resource, Default)]
384struct PendingDepthCaptureQueue(Arc<Mutex<Vec<PendingDepthCapture>>>);
385
386#[cfg(test)]
387mod projection_tests {
388    use super::*;
389
390    #[test]
391    fn render_projection_uses_configured_near_plane_for_effective_clip_matrix() {
392        let mut config = RenderConfig::tbp_default();
393        config.near_plane = 0.025;
394        config.far_plane = 12.0;
395
396        let projection = render_projection(&config);
397        let Projection::Perspective(perspective) = &projection else {
398            panic!("render_projection should create a perspective projection");
399        };
400
401        assert_eq!(perspective.near, config.near_plane);
402        assert_eq!(
403            perspective.near_clip_plane,
404            Vec4::new(0.0, 0.0, -1.0, -config.near_plane)
405        );
406        assert_eq!(perspective.far, config.far_plane);
407
408        let clip_from_view = projection.get_clip_from_view();
409        assert!(
410            (clip_from_view.w_axis.z - config.near_plane).abs() < 1e-6,
411            "reverse-Z projection matrix should encode configured near plane; got {}",
412            clip_from_view.w_axis.z
413        );
414    }
415}
416
417// ============================================================================
418// Depth Buffer Helpers
419// ============================================================================
420
421mod depth_helpers {
422    /// wgpu requires buffer row alignment of 256 bytes
423    pub const COPY_BYTES_PER_ROW_ALIGNMENT: u32 = 256;
424
425    /// Align byte size to wgpu's COPY_BYTES_PER_ROW_ALIGNMENT
426    pub fn align_byte_size(value: u32) -> u32 {
427        let remainder = value % COPY_BYTES_PER_ROW_ALIGNMENT;
428        if remainder == 0 {
429            value
430        } else {
431            value + (COPY_BYTES_PER_ROW_ALIGNMENT - remainder)
432        }
433    }
434
435    /// Calculate aligned buffer size for an image
436    #[allow(dead_code)]
437    pub fn get_aligned_size(width: u32, height: u32, pixel_size: u32) -> u32 {
438        height * align_byte_size(width * pixel_size)
439    }
440
441    /// Convert reverse-Z NDC depth to linear depth in meters.
442    ///
443    /// Bevy uses reverse-Z depth buffer: near plane maps to depth=1, far plane to depth=0.
444    /// This provides better precision for distant objects.
445    ///
446    /// Formula derivation:
447    /// - At near plane (z = near): ndc = 1
448    /// - At far plane (z = far): ndc = 0
449    /// - linear = far / (1 + ndc * (far/near - 1))
450    ///
451    /// Superseded in the render path by [`ndc_to_linear_with_matrix`], which
452    /// reads the actual projection near from the view matrix instead of trusting
453    /// a passed-in near (the source of the #92 10x depth error). Retained for its
454    /// tests and as a reference formula.
455    #[allow(dead_code)]
456    pub fn reverse_z_to_linear_depth(ndc_depth: f32, near: f32, far: f32) -> f32 {
457        // Handle edge cases
458        if ndc_depth <= 0.0 {
459            return far; // Background (infinite distance in reverse-Z)
460        }
461        if ndc_depth >= 1.0 {
462            return near; // At or beyond near plane
463        }
464        // Reverse-Z formula: linear = far / (1 + ndc * (far/near - 1))
465        far / (1.0 + ndc_depth * (far / near - 1.0))
466    }
467
468    /// Extract depth values from aligned buffer, handling row padding
469    pub fn extract_depth_with_alignment(data: &[u8], width: u32, height: u32) -> Vec<f32> {
470        let pixel_size = 4u32; // f32 = 4 bytes
471        let aligned_row_bytes = align_byte_size(width * pixel_size) as usize;
472        let actual_row_bytes = (width * pixel_size) as usize;
473
474        let mut depth_values = Vec::with_capacity((width * height) as usize);
475
476        for y in 0..height as usize {
477            let row_start = y * aligned_row_bytes;
478            let row_data = &data[row_start..row_start + actual_row_bytes];
479
480            for x in 0..width as usize {
481                let offset = x * 4;
482                let bytes: [u8; 4] = row_data[offset..offset + 4].try_into().unwrap();
483                let depth_value = f32::from_le_bytes(bytes);
484                depth_values.push(depth_value);
485            }
486        }
487
488        depth_values
489    }
490
491    /// Convert all NDC depth values to linear meters (as f64 for TBP precision).
492    /// Superseded by [`convert_depth_to_linear_with_matrix`]; retained for tests.
493    #[allow(dead_code)]
494    pub fn convert_depth_to_linear(raw_depth: &[f32], near: f32, far: f32) -> Vec<f64> {
495        raw_depth
496            .iter()
497            .map(|&ndc| reverse_z_to_linear_depth(ndc, near, far) as f64)
498            .collect()
499    }
500
501    /// Linearize a reverse-Z NDC depth using the view's actual projection matrix,
502    /// rather than a hand-supplied near/far.
503    ///
504    /// For a perspective right-handed projection, the relevant clip-space rows are
505    /// `clip_z = m22 * z + m32` and `clip_w = -z` (camera looks down -Z), so
506    /// `ndc = clip_z / clip_w = (m22*z + m32) / (-z)`. Solving for the positive
507    /// view-space distance `d = -z` gives **`d = m32 / (ndc + m22)`**. This holds
508    /// for both finite and infinite reverse-Z and is correct regardless of which
509    /// near plane the renderer actually used — the previous fixed-near formula
510    /// produced depths 10x too small when the effective projection near plane
511    /// drifted from `RenderConfig::near_plane` (issue #86/#92/#95).
512    ///
513    /// `m22 = clip_from_view[col=2][row=2]`, `m32 = clip_from_view[col=3][row=2]`.
514    /// `ndc <= 0` is the reverse-Z far plane (background) and maps to `far`.
515    pub fn ndc_to_linear_with_matrix(ndc: f32, m22: f32, m32: f32, far: f32) -> f32 {
516        if ndc <= 0.0 {
517            return far; // background / at-or-beyond far plane in reverse-Z
518        }
519        let denom = ndc + m22;
520        if denom.abs() <= f32::EPSILON {
521            return far;
522        }
523        let linear = m32 / denom;
524        if !linear.is_finite() || linear <= 0.0 {
525            far
526        } else {
527            linear.min(far)
528        }
529    }
530
531    /// Convert all NDC depth values to linear meters using the view projection
532    /// matrix (f64 for TBP precision). See [`ndc_to_linear_with_matrix`].
533    pub fn convert_depth_to_linear_with_matrix(
534        raw_depth: &[f32],
535        m22: f32,
536        m32: f32,
537        far: f32,
538    ) -> Vec<f64> {
539        raw_depth
540            .iter()
541            .map(|&ndc| ndc_to_linear_with_matrix(ndc, m22, m32, far) as f64)
542            .collect()
543    }
544
545    #[cfg(test)]
546    mod tests {
547        use super::*;
548
549        #[test]
550        fn test_align_byte_size() {
551            assert_eq!(align_byte_size(256), 256);
552            assert_eq!(align_byte_size(257), 512);
553            assert_eq!(align_byte_size(1), 256);
554            assert_eq!(align_byte_size(512), 512);
555            assert_eq!(align_byte_size(0), 0);
556        }
557
558        #[test]
559        fn test_reverse_z_to_linear_depth() {
560            let near = 0.01;
561            let far = 10.0;
562
563            // Near plane (ndc=1 in reverse-Z)
564            let linear_near = reverse_z_to_linear_depth(1.0, near, far);
565            assert!((linear_near - near).abs() < 0.001);
566
567            // Mid-range depth (ndc=0.5 should give geometric mean area)
568            let linear_mid = reverse_z_to_linear_depth(0.5, near, far);
569            // At ndc=0.5: linear = 10 / (1 + 0.5 * (1000-1)) = 10 / 500.5 ≈ 0.02
570            assert!(linear_mid > near && linear_mid < far);
571
572            // Very close to far plane (ndc very small)
573            let linear_almost_far = reverse_z_to_linear_depth(0.0001, near, far);
574            // At ndc=0.0001: linear = 10 / (1 + 0.0001 * 999) ≈ 10 / 1.0999 ≈ 9.09
575            assert!(linear_almost_far > 9.0);
576
577            // Background (ndc=0)
578            let background = reverse_z_to_linear_depth(0.0, near, far);
579            assert_eq!(background, far);
580        }
581
582        #[test]
583        fn test_extract_depth_with_alignment() {
584            // 2x2 image, 4 bytes per pixel
585            // Aligned row = 256 bytes, but actual = 8 bytes
586            let width = 2u32;
587            let height = 2u32;
588
589            let mut data = vec![0u8; 256 * 2]; // 2 aligned rows
590
591            // Write test depth values
592            // Row 0: [0.5, 0.6]
593            data[0..4].copy_from_slice(&0.5f32.to_le_bytes());
594            data[4..8].copy_from_slice(&0.6f32.to_le_bytes());
595            // Row 1: [0.7, 0.8]
596            data[256..260].copy_from_slice(&0.7f32.to_le_bytes());
597            data[260..264].copy_from_slice(&0.8f32.to_le_bytes());
598
599            let depth = extract_depth_with_alignment(&data, width, height);
600            assert_eq!(depth.len(), 4);
601            assert!((depth[0] - 0.5).abs() < 0.001);
602            assert!((depth[1] - 0.6).abs() < 0.001);
603            assert!((depth[2] - 0.7).abs() < 0.001);
604            assert!((depth[3] - 0.8).abs() < 0.001);
605        }
606
607        #[test]
608        fn test_reverse_z_depth_at_near_plane() {
609            // Near plane should give near value
610            let near = 0.01;
611            let far = 100.0;
612            let depth = reverse_z_to_linear_depth(1.0, near, far);
613            assert!((depth - near).abs() < 0.0001);
614        }
615
616        #[test]
617        fn test_reverse_z_depth_at_far_plane() {
618            // Far plane (ndc=0) should give far value
619            let near = 0.01;
620            let far = 100.0;
621            let depth = reverse_z_to_linear_depth(0.0, near, far);
622            assert!((depth - far).abs() < 0.0001);
623        }
624
625        #[test]
626        fn test_reverse_z_monotonic() {
627            // Depth should increase as NDC decreases (reverse-Z)
628            let near = 0.01;
629            let far = 10.0;
630
631            let mut prev_depth = 0.0;
632            for i in (0..=100).rev() {
633                let ndc = i as f32 / 100.0;
634                let depth = reverse_z_to_linear_depth(ndc, near, far);
635                assert!(
636                    depth >= prev_depth,
637                    "Depth should be monotonic: ndc={}, depth={}, prev={}",
638                    ndc,
639                    depth,
640                    prev_depth
641                );
642                prev_depth = depth;
643            }
644        }
645
646        #[test]
647        fn test_ndc_to_linear_with_matrix_infinite_reverse_z() {
648            // Infinite reverse-Z (Bevy `perspective_infinite_reverse_rh`):
649            // m22 = 0, m32 = near. d = near / ndc.
650            let (m22, m32, far) = (0.0f32, 0.1f32, 10.0f32);
651
652            // The exact regression from #92: ndc 0.366504 must linearize to
653            // ~0.273 m (near 0.1), NOT ~0.027 m (the old fixed near = 0.01).
654            let d = ndc_to_linear_with_matrix(0.366504, m22, m32, far);
655            assert!((d as f64 - 0.272849).abs() < 1e-4, "got {d}");
656
657            // Background (reverse-Z far plane) and clamping.
658            assert_eq!(ndc_to_linear_with_matrix(0.0, m22, m32, far), far);
659            assert_eq!(ndc_to_linear_with_matrix(-0.5, m22, m32, far), far);
660            // Very small ndc -> very far -> clamped to far.
661            assert_eq!(ndc_to_linear_with_matrix(1e-9, m22, m32, far), far);
662        }
663
664        #[test]
665        fn test_ndc_to_linear_with_matrix_finite_reverse_z() {
666            // Finite reverse-Z maps near->ndc 1, far->ndc 0. Construct the matrix
667            // entries for near=0.5, far=20: m22 = near/(far-near), m32 = far*m22.
668            let (near, far) = (0.5f32, 20.0f32);
669            let m22 = near / (far - near);
670            let m32 = far * m22;
671            // ndc = 1 -> near; ndc = 0 -> far (background sentinel also returns far).
672            assert!((ndc_to_linear_with_matrix(1.0, m22, m32, far) - near).abs() < 1e-4);
673            assert_eq!(ndc_to_linear_with_matrix(0.0, m22, m32, far), far);
674        }
675
676        #[test]
677        fn test_convert_depth_to_linear_batch() {
678            let near = 0.01f32;
679            let far = 10.0f32;
680            let ndc_depths = vec![1.0f32, 0.5, 0.1, 0.0];
681
682            let linear = convert_depth_to_linear(&ndc_depths, near, far);
683
684            assert_eq!(linear.len(), 4);
685            // Near plane
686            assert!((linear[0] - near as f64).abs() < 0.001);
687            // Far plane
688            assert!((linear[3] - far as f64).abs() < 0.001);
689            // All should be in range [near, far]
690            for d in &linear {
691                assert!(*d >= near as f64 && *d <= far as f64);
692            }
693        }
694
695        #[test]
696        fn test_align_byte_size_edge_cases() {
697            // Powers of two should stay the same if multiple of 256
698            assert_eq!(align_byte_size(256), 256);
699            assert_eq!(align_byte_size(512), 512);
700            assert_eq!(align_byte_size(1024), 1024);
701
702            // Just under 256 should round up to 256
703            assert_eq!(align_byte_size(255), 256);
704            assert_eq!(align_byte_size(128), 256);
705
706            // Just over 256 should round up to 512
707            assert_eq!(align_byte_size(300), 512);
708        }
709
710        #[test]
711        fn test_extract_depth_64x64() {
712            // Test with TBP default resolution
713            let width = 64u32;
714            let height = 64u32;
715            let bytes_per_pixel = 4u32;
716            let padded_row = align_byte_size(width * bytes_per_pixel);
717
718            // Create aligned buffer
719            let mut data = vec![0u8; (padded_row * height) as usize];
720
721            // Fill with incrementing values
722            for y in 0..height {
723                for x in 0..width {
724                    let value = (y * width + x) as f32 / (width * height) as f32;
725                    let offset = (y * padded_row + x * bytes_per_pixel) as usize;
726                    data[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
727                }
728            }
729
730            let depth = extract_depth_with_alignment(&data, width, height);
731            assert_eq!(depth.len(), (width * height) as usize);
732
733            // Verify first and last values
734            assert!((depth[0] - 0.0).abs() < 0.001);
735            let expected_last = (width * height - 1) as f32 / (width * height) as f32;
736            assert!((depth[(width * height - 1) as usize] - expected_last).abs() < 0.001);
737        }
738    }
739}
740
741// ============================================================================
742// Depth Readback Render Node
743// ============================================================================
744
745/// Label for the depth readback render graph node.
746#[derive(Debug, Hash, PartialEq, Eq, Clone, bevy::render::render_graph::RenderLabel)]
747struct DepthReadbackLabel;
748
749/// Render node that copies the main camera's depth texture to a staging buffer.
750/// This runs after the main pass completes, using ViewDepthTexture.
751#[derive(Default)]
752struct DepthReadbackNode;
753
754impl ViewNode for DepthReadbackNode {
755    type ViewQuery = (
756        &'static ViewDepthTexture,
757        &'static ExtractedCamera,
758        &'static ExtractedView,
759    );
760
761    fn run<'w>(
762        &self,
763        _graph: &mut RenderGraphContext,
764        render_context: &mut RenderContext<'w>,
765        (view_depth_texture, camera, view): QueryItem<'w, '_, Self::ViewQuery>,
766        world: &'w World,
767    ) -> Result<(), NodeRunError> {
768        let trace = render_trace_enabled();
769        let t0 = trace.then(std::time::Instant::now);
770
771        // Check if depth capture is requested
772        let Some(request) = world.get_resource::<DepthCaptureRequest>() else {
773            return Ok(());
774        };
775        if !request.requested {
776            return Ok(());
777        }
778
779        // Get the pending queue
780        let Some(queue) = world.get_resource::<PendingDepthCaptureQueue>() else {
781            return Ok(());
782        };
783
784        // Get texture size from camera viewport or physical size
785        let Some(physical_size) = camera.physical_target_size else {
786            return Ok(());
787        };
788        let width = physical_size.x;
789        let height = physical_size.y;
790
791        let render_device = world.resource::<RenderDevice>();
792
793        // Calculate aligned buffer size (wgpu requires 256-byte row alignment)
794        let bytes_per_pixel = 4u32; // f32 = 4 bytes (Depth32Float)
795        let unpadded_bytes_per_row = width * bytes_per_pixel;
796        let padded_bytes_per_row = depth_helpers::align_byte_size(unpadded_bytes_per_row);
797        let buffer_size = (padded_bytes_per_row * height) as u64;
798
799        // Create staging buffer for CPU readback
800        let staging_buffer = render_device.create_buffer(&BufferDescriptor {
801            label: Some("depth_staging_buffer"),
802            size: buffer_size,
803            usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
804            mapped_at_creation: false,
805        });
806
807        // Copy depth texture to staging buffer
808        let encoder = render_context.command_encoder();
809        encoder.copy_texture_to_buffer(
810            TexelCopyTextureInfo {
811                texture: &view_depth_texture.texture,
812                mip_level: 0,
813                origin: Origin3d::ZERO,
814                aspect: TextureAspect::DepthOnly,
815            },
816            TexelCopyBufferInfo {
817                buffer: &staging_buffer,
818                layout: TexelCopyBufferLayout {
819                    offset: 0,
820                    bytes_per_row: Some(padded_bytes_per_row),
821                    rows_per_image: Some(height),
822                },
823            },
824            Extent3d {
825                width,
826                height,
827                depth_or_array_layers: 1,
828            },
829        );
830
831        // Push to queue for async processing (queue is Arc<Mutex<Vec>>).
832        // Capture the projection-matrix entries used for linearization: for a
833        // perspective RH matrix, clip_z = m22*z + m32 and clip_w = -z, so the
834        // positive view-space distance is d = m32 / (ndc + m22).
835        let clip_from_view = view.clip_from_view;
836        if let Ok(mut pending) = queue.0.lock() {
837            pending.push(PendingDepthCapture {
838                buffer: staging_buffer,
839                width,
840                height,
841                m22: clip_from_view.z_axis.z,
842                m32: clip_from_view.w_axis.z,
843                far: request.far,
844            });
845        }
846
847        if let Some(t0) = t0 {
848            eprintln!(
849                "[render_trace][node] DepthReadbackNode ms={:.3}",
850                t0.elapsed().as_secs_f64() * 1000.0
851            );
852        }
853
854        Ok(())
855    }
856}
857
858// ============================================================================
859// Depth Readback Plugin
860// ============================================================================
861
862/// Plugin that sets up depth buffer readback from the GPU.
863struct DepthReadbackPlugin {
864    shared_depth: SharedDepthBuffer,
865    near: f32,
866    far: f32,
867}
868
869impl Plugin for DepthReadbackPlugin {
870    fn build(&self, app: &mut App) {
871        use bevy::core_pipeline::core_3d::graph::Core3d;
872        use bevy::core_pipeline::core_3d::graph::Node3d;
873
874        // Insert shared depth buffer in main app
875        app.insert_resource(self.shared_depth.clone());
876        app.insert_resource(DepthCaptureRequest {
877            requested: false,
878            near: self.near,
879            far: self.far,
880        });
881
882        // Get render app
883        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
884            eprintln!("Failed to get RenderApp for depth readback");
885            return;
886        };
887
888        // Insert resources in render world
889        render_app.insert_resource(self.shared_depth.clone());
890        render_app.init_resource::<PendingDepthCaptureQueue>();
891
892        // Add extraction system to copy request from main world
893        render_app.add_systems(ExtractSchedule, extract_depth_request);
894
895        // Add system to process completed depth captures
896        render_app.add_systems(
897            Render,
898            collect_depth_captures.in_set(RenderSystems::Cleanup),
899        );
900
901        // Register the depth readback node in the render graph
902        // Run after main pass completes (depth buffer is ready) but before tonemapping
903        render_app
904            .add_render_graph_node::<ViewNodeRunner<DepthReadbackNode>>(Core3d, DepthReadbackLabel)
905            .add_render_graph_edges(
906                Core3d,
907                (Node3d::EndMainPass, DepthReadbackLabel, Node3d::Tonemapping),
908            );
909    }
910}
911
912/// Extract depth capture request from main world to render world
913fn extract_depth_request(mut commands: Commands, request: Extract<Res<DepthCaptureRequest>>) {
914    commands.insert_resource(DepthCaptureRequest {
915        requested: request.requested,
916        near: request.near,
917        far: request.far,
918    });
919}
920
921/// Process completed depth buffer captures (synchronous GPU-to-CPU readback with device polling)
922fn collect_depth_captures(
923    queue: Res<PendingDepthCaptureQueue>,
924    shared_depth: Res<SharedDepthBuffer>,
925    render_device: Res<RenderDevice>,
926) {
927    let trace = render_trace_enabled();
928    let t_sys = trace.then(std::time::Instant::now);
929
930    // Take all pending captures from the queue
931    let pending_captures = {
932        let Ok(mut pending) = queue.0.lock() else {
933            return;
934        };
935        std::mem::take(&mut *pending)
936    };
937
938    if pending_captures.is_empty() {
939        if let Some(t0) = t_sys {
940            eprintln!(
941                "[render_trace][sys] collect_depth_captures empty ms={:.3}",
942                t0.elapsed().as_secs_f64() * 1000.0
943            );
944        }
945        return;
946    }
947
948    let pending_count = pending_captures.len();
949
950    // Process each pending capture synchronously with device polling
951    for pending in pending_captures {
952        let width = pending.width;
953        let height = pending.height;
954        let m22 = pending.m22;
955        let m32 = pending.m32;
956        let far = pending.far;
957        let buffer = pending.buffer;
958        let shared = shared_depth.0.clone();
959
960        // Use blocking sync approach with device polling (same as RGBA capture)
961        let buffer_slice = buffer.slice(..);
962
963        // Request mapping
964        let (tx, rx) = std::sync::mpsc::channel();
965        buffer_slice.map_async(MapMode::Read, move |result| {
966            let _ = tx.send(result);
967        });
968
969        let t_wait = trace.then(std::time::Instant::now);
970        let mut poll_iters: u32 = 0;
971
972        // Poll the device until mapping completes
973        loop {
974            let _ =
975                render_device.poll(bevy::render::render_resource::PollType::wait_indefinitely());
976            poll_iters += 1;
977            match rx.try_recv() {
978                Ok(Ok(())) => {
979                    let data = buffer_slice.get_mapped_range();
980
981                    // Extract depth values with alignment handling
982                    let ndc_depth =
983                        depth_helpers::extract_depth_with_alignment(&data, width, height);
984
985                    drop(data);
986                    buffer.unmap();
987
988                    // Convert reverse-Z NDC to linear depth (meters) using the
989                    // view's actual projection matrix entries. See
990                    // `convert_depth_to_linear_with_matrix`.
991                    let linear_depth = depth_helpers::convert_depth_to_linear_with_matrix(
992                        &ndc_depth, m22, m32, far,
993                    );
994
995                    // Store in shared buffer
996                    if let Ok(mut guard) = shared.lock() {
997                        *guard = Some((linear_depth, width, height));
998                    }
999                    break;
1000                }
1001                Ok(Err(e)) => {
1002                    eprintln!("Failed to map depth buffer: {:?}", e);
1003                    break;
1004                }
1005                Err(std::sync::mpsc::TryRecvError::Empty) => {
1006                    // Keep polling
1007                    std::thread::sleep(std::time::Duration::from_millis(1));
1008                }
1009                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
1010                    eprintln!("Depth buffer mapping channel disconnected");
1011                    break;
1012                }
1013            }
1014        }
1015
1016        if let Some(t_wait) = t_wait {
1017            eprintln!(
1018                "[render_trace][sys] collect_depth_captures mapping_wait poll_iters={} ms={:.3}",
1019                poll_iters,
1020                t_wait.elapsed().as_secs_f64() * 1000.0
1021            );
1022        }
1023    }
1024
1025    if let Some(t0) = t_sys {
1026        eprintln!(
1027            "[render_trace][sys] collect_depth_captures done pending={} ms={:.3}",
1028            pending_count,
1029            t0.elapsed().as_secs_f64() * 1000.0
1030        );
1031    }
1032}
1033
1034// ============================================================================
1035// Image Copy Infrastructure (for headless rendering)
1036// ============================================================================
1037
1038/// Label for the image copy render graph node
1039#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)]
1040struct ImageCopyLabel;
1041
1042/// Component that marks an image for GPU-to-CPU copying
1043#[derive(Component, Clone)]
1044struct ImageCopier {
1045    /// Handle to the source image (render target)
1046    src_image: Handle<Image>,
1047    /// Whether to capture on this frame
1048    enabled: bool,
1049}
1050
1051/// Resource containing all ImageCopiers for the render world
1052#[derive(Resource, Default)]
1053struct ImageCopiers(Vec<ImageCopier>);
1054
1055/// Pending image capture for async processing
1056struct PendingImageCapture {
1057    buffer: Buffer,
1058    width: u32,
1059    height: u32,
1060    padded_bytes_per_row: u32,
1061}
1062
1063/// Queue for pending image captures
1064#[derive(Resource, Default)]
1065struct PendingImageCaptureQueue(Arc<Mutex<Vec<PendingImageCapture>>>);
1066
1067/// Shared buffer for captured RGBA data
1068#[derive(Resource, Clone, Default)]
1069#[allow(clippy::type_complexity)]
1070struct SharedRgbaBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
1071
1072/// Render graph node that copies render target images to staging buffers
1073struct ImageCopyDriver;
1074
1075impl Node for ImageCopyDriver {
1076    fn run(
1077        &self,
1078        _graph: &mut RenderGraphContext,
1079        _render_context: &mut RenderContext,
1080        world: &World,
1081    ) -> Result<(), NodeRunError> {
1082        let trace = render_trace_enabled();
1083        let t0 = trace.then(std::time::Instant::now);
1084
1085        let Some(image_copiers) = world.get_resource::<ImageCopiers>() else {
1086            return Ok(());
1087        };
1088
1089        let Some(gpu_images) = world.get_resource::<RenderAssets<GpuImage>>() else {
1090            return Ok(());
1091        };
1092
1093        let Some(queue) = world.get_resource::<PendingImageCaptureQueue>() else {
1094            return Ok(());
1095        };
1096
1097        let render_device = world.resource::<RenderDevice>();
1098
1099        let Some(render_queue) = world.get_resource::<RenderQueue>() else {
1100            return Ok(());
1101        };
1102
1103        for image_copier in image_copiers.0.iter() {
1104            if !image_copier.enabled {
1105                continue;
1106            }
1107
1108            let Some(gpu_image) = gpu_images.get(&image_copier.src_image) else {
1109                continue;
1110            };
1111
1112            let width = gpu_image.size.width;
1113            let height = gpu_image.size.height;
1114
1115            // Calculate padded bytes per row (wgpu requires 256-byte alignment)
1116            let block_dimensions = gpu_image.texture_format.block_dimensions();
1117            let block_size = gpu_image.texture_format.block_copy_size(None).unwrap_or(4); // Default to 4 bytes for RGBA8
1118
1119            let padded_bytes_per_row = RenderDevice::align_copy_bytes_per_row(
1120                (width as usize / block_dimensions.0 as usize) * block_size as usize,
1121            );
1122
1123            let buffer_size = (padded_bytes_per_row * height as usize) as u64;
1124
1125            // Create staging buffer for CPU readback
1126            let staging_buffer = render_device.create_buffer(&BufferDescriptor {
1127                label: Some("image_copy_staging_buffer"),
1128                size: buffer_size,
1129                usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
1130                mapped_at_creation: false,
1131            });
1132
1133            // Create command encoder for the copy operation
1134            let mut encoder =
1135                render_device.create_command_encoder(&CommandEncoderDescriptor::default());
1136
1137            let texture_extent = Extent3d {
1138                width,
1139                height,
1140                depth_or_array_layers: 1,
1141            };
1142
1143            // Copy texture to buffer
1144            encoder.copy_texture_to_buffer(
1145                gpu_image.texture.as_image_copy(),
1146                TexelCopyBufferInfo {
1147                    buffer: &staging_buffer,
1148                    layout: TexelCopyBufferLayout {
1149                        offset: 0,
1150                        bytes_per_row: Some(padded_bytes_per_row as u32),
1151                        rows_per_image: None,
1152                    },
1153                },
1154                texture_extent,
1155            );
1156
1157            // Submit the copy command
1158            render_queue.submit(std::iter::once(encoder.finish()));
1159
1160            // Queue for async processing
1161            if let Ok(mut pending) = queue.0.lock() {
1162                pending.push(PendingImageCapture {
1163                    buffer: staging_buffer,
1164                    width,
1165                    height,
1166                    padded_bytes_per_row: padded_bytes_per_row as u32,
1167                });
1168            }
1169        }
1170
1171        if let Some(t0) = t0 {
1172            eprintln!(
1173                "[render_trace][node] ImageCopyDriver ms={:.3}",
1174                t0.elapsed().as_secs_f64() * 1000.0
1175            );
1176        }
1177
1178        Ok(())
1179    }
1180}
1181
1182/// Extract ImageCopier components to render world
1183fn extract_image_copiers(mut commands: Commands, query: Extract<Query<&ImageCopier>>) {
1184    commands.insert_resource(ImageCopiers(query.iter().cloned().collect()));
1185}
1186
1187/// Process completed image captures
1188fn collect_image_captures(
1189    queue: Res<PendingImageCaptureQueue>,
1190    shared_rgba: Res<SharedRgbaBuffer>,
1191    render_device: Res<RenderDevice>,
1192) {
1193    let trace = render_trace_enabled();
1194    let t_sys = trace.then(std::time::Instant::now);
1195
1196    let pending_captures = {
1197        let Ok(mut pending) = queue.0.lock() else {
1198            return;
1199        };
1200        std::mem::take(&mut *pending)
1201    };
1202
1203    if pending_captures.is_empty() {
1204        if let Some(t0) = t_sys {
1205            eprintln!(
1206                "[render_trace][sys] collect_image_captures empty ms={:.3}",
1207                t0.elapsed().as_secs_f64() * 1000.0
1208            );
1209        }
1210        return;
1211    }
1212
1213    let pending_count = pending_captures.len();
1214
1215    for pending in pending_captures {
1216        let width = pending.width;
1217        let height = pending.height;
1218        let padded_bytes_per_row = pending.padded_bytes_per_row;
1219        let buffer = pending.buffer;
1220        let shared = shared_rgba.0.clone();
1221
1222        // Use blocking sync approach with device polling
1223        let buffer_slice = buffer.slice(..);
1224
1225        // Request mapping
1226        let (tx, rx) = std::sync::mpsc::channel();
1227        buffer_slice.map_async(MapMode::Read, move |result| {
1228            let _ = tx.send(result);
1229        });
1230
1231        // Poll the device until mapping completes (with timeout)
1232        let start = std::time::Instant::now();
1233        let timeout = std::time::Duration::from_secs(10);
1234        let mut poll_iters: u32 = 0;
1235        loop {
1236            let _ =
1237                render_device.poll(bevy::render::render_resource::PollType::wait_indefinitely());
1238            poll_iters += 1;
1239
1240            if start.elapsed() > timeout {
1241                eprintln!(
1242                    "Warning: Buffer mapping timeout after {:?}",
1243                    start.elapsed()
1244                );
1245                break;
1246            }
1247
1248            match rx.try_recv() {
1249                Ok(Ok(())) => {
1250                    let data = buffer_slice.get_mapped_range();
1251
1252                    // Extract pixels with alignment handling
1253                    let bytes_per_pixel = 4u32;
1254                    let actual_row_bytes = (width * bytes_per_pixel) as usize;
1255                    let padded_row_bytes = padded_bytes_per_row as usize;
1256
1257                    let mut rgba = Vec::with_capacity((width * height * 4) as usize);
1258                    for y in 0..height as usize {
1259                        let row_start = y * padded_row_bytes;
1260                        rgba.extend_from_slice(&data[row_start..row_start + actual_row_bytes]);
1261                    }
1262
1263                    drop(data);
1264                    buffer.unmap();
1265
1266                    if let Ok(mut guard) = shared.lock() {
1267                        *guard = Some((rgba, width, height));
1268                    }
1269                    break;
1270                }
1271                Ok(Err(e)) => {
1272                    eprintln!("Failed to map image buffer: {:?}", e);
1273                    break;
1274                }
1275                Err(std::sync::mpsc::TryRecvError::Empty) => {
1276                    // Keep polling
1277                    std::thread::sleep(std::time::Duration::from_millis(1));
1278                }
1279                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
1280                    eprintln!("Image buffer mapping channel disconnected");
1281                    break;
1282                }
1283            }
1284        }
1285
1286        if trace {
1287            eprintln!(
1288                "[render_trace][sys] collect_image_captures mapping_wait poll_iters={} ms={:.3}",
1289                poll_iters,
1290                start.elapsed().as_secs_f64() * 1000.0
1291            );
1292        }
1293    }
1294
1295    if let Some(t0) = t_sys {
1296        eprintln!(
1297            "[render_trace][sys] collect_image_captures done pending={} ms={:.3}",
1298            pending_count,
1299            t0.elapsed().as_secs_f64() * 1000.0
1300        );
1301    }
1302}
1303
1304/// Plugin for headless image copy
1305struct ImageCopyPlugin {
1306    shared_rgba: SharedRgbaBuffer,
1307}
1308
1309impl Plugin for ImageCopyPlugin {
1310    fn build(&self, app: &mut App) {
1311        use bevy::render::render_graph::RenderGraph;
1312
1313        app.insert_resource(self.shared_rgba.clone());
1314
1315        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
1316            return;
1317        };
1318
1319        render_app.insert_resource(self.shared_rgba.clone());
1320        render_app.init_resource::<ImageCopiers>();
1321        render_app.init_resource::<PendingImageCaptureQueue>();
1322
1323        render_app.add_systems(ExtractSchedule, extract_image_copiers);
1324        render_app.add_systems(
1325            Render,
1326            collect_image_captures.in_set(RenderSystems::Cleanup),
1327        );
1328
1329        // Add image copy node to render graph (runs after camera driver)
1330        let mut graph = render_app.world_mut().resource_mut::<RenderGraph>();
1331        graph.add_node(ImageCopyLabel, ImageCopyDriver);
1332        graph.add_node_edge(bevy::render::graph::CameraDriverLabel, ImageCopyLabel);
1333    }
1334}
1335
1336// ============================================================================
1337// Render Request and Components
1338// ============================================================================
1339
1340/// Configuration passed to the Bevy app
1341#[derive(Resource, Clone)]
1342struct RenderRequest {
1343    mesh_path: String,
1344    texture_path: String,
1345    camera_transform: Transform,
1346    object_rotation: ObjectRotation,
1347    object_translation: Vec3,
1348    object_scale: Vec3,
1349    config: RenderConfig,
1350}
1351
1352/// Marker for the rendered object
1353#[derive(Component)]
1354struct RenderedObject;
1355
1356/// Marker for the render camera
1357#[derive(Component)]
1358struct RenderCamera;
1359
1360/// Handle for the loaded texture
1361#[derive(Resource)]
1362struct LoadedTexture(Handle<Image>);
1363
1364/// Handle for the loaded scene
1365#[derive(Resource)]
1366struct LoadedScene(Handle<Scene>);
1367
1368/// Shared output for extracting render results
1369#[derive(Resource, Clone)]
1370struct SharedOutput(Arc<Mutex<Option<RenderOutput>>>);
1371
1372/// Handle for the render target image
1373#[derive(Resource)]
1374#[allow(dead_code)]
1375struct RenderTargetImage(Handle<Image>);
1376
1377/// Tracks progress for a homogeneous batch of viewpoints rendered in one app.
1378#[derive(Resource)]
1379struct HeadlessBatchSequence {
1380    viewpoints: Vec<Transform>,
1381    current_index: usize,
1382    outputs: Vec<RenderOutput>,
1383    warmup_frames_remaining: u32,
1384    done: bool,
1385}
1386
1387impl HeadlessBatchSequence {
1388    fn new(viewpoints: Vec<Transform>) -> Self {
1389        let capacity = viewpoints.len();
1390        Self {
1391            viewpoints,
1392            current_index: 0,
1393            outputs: Vec::with_capacity(capacity),
1394            warmup_frames_remaining: 0,
1395            done: capacity == 0,
1396        }
1397    }
1398
1399    fn current_viewpoint(&self) -> Option<Transform> {
1400        self.viewpoints.get(self.current_index).cloned()
1401    }
1402}
1403
1404/// Perform headless rendering of a YCB object.
1405///
1406/// This uses true headless GPU rendering via `RenderTarget::Image`, which does NOT
1407/// require any window surfaces. This should work on WSL2 and other environments
1408/// without display servers.
1409///
1410/// Note: Bevy's App::run() does not return cleanly. A watchdog thread monitors
1411/// for results and terminates the process once the render is complete.
1412#[allow(dead_code)]
1413pub fn render_headless(
1414    object_dir: &Path,
1415    camera_transform: &Transform,
1416    object_rotation: &ObjectRotation,
1417    object_translation: Vec3,
1418    object_scale: Vec3,
1419    config: &RenderConfig,
1420) -> Result<RenderOutput, RenderError> {
1421    // Canonicalize paths so Bevy's asset server can find them regardless of
1422    // caller working directory. Relative paths like "../../ycb" pass the
1423    // exists() check but Bevy resolves assets against its own root.
1424    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1425        RenderError::RenderFailed(format!(
1426            "Cannot canonicalize object directory {}: {}",
1427            object_dir.display(),
1428            e
1429        ))
1430    })?;
1431    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1432    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1433
1434    if !mesh_path.exists() {
1435        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
1436            &mesh_path,
1437        )));
1438    }
1439    if !texture_path.exists() {
1440        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
1441            &texture_path,
1442        )));
1443    }
1444
1445    let request = RenderRequest {
1446        mesh_path: fs_path_to_asset_string(&mesh_path),
1447        texture_path: fs_path_to_asset_string(&texture_path),
1448        camera_transform: *camera_transform,
1449        object_rotation: object_rotation.clone(),
1450        object_translation,
1451        object_scale,
1452        config: config.clone(),
1453    };
1454
1455    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
1456    let output_clone = shared_output.clone();
1457
1458    // Shared buffer for RGBA data from headless render target
1459    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1460
1461    // Shared buffer for depth readback
1462    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1463
1464    // Create a temp file path for fallback output serialization
1465    let temp_path =
1466        std::env::temp_dir().join(format!("bevy_sensor_render_{}.bin", std::process::id()));
1467
1468    // Spawn watchdog thread that monitors for timeout (don't exit - let Bevy exit gracefully)
1469    let output_poll_for_timeout = shared_output.clone();
1470    std::thread::spawn(move || {
1471        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1472        let start = std::time::Instant::now();
1473        let poll_interval = std::time::Duration::from_millis(100);
1474
1475        loop {
1476            // Check if we have a result
1477            if let Ok(guard) = output_poll_for_timeout.0.lock() {
1478                if guard.is_some() {
1479                    // Output is ready, Bevy will exit via AppExit event
1480                    return; // Exit watchdog thread, Bevy will handle exit
1481                }
1482            }
1483
1484            if start.elapsed() > timeout {
1485                eprintln!(
1486                    "Error: Render timeout after {} seconds",
1487                    RENDER_TIMEOUT_SECS
1488                );
1489                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
1490                // Force exit on timeout (this is a failure case)
1491                std::process::exit(1);
1492            }
1493
1494            std::thread::sleep(poll_interval);
1495        }
1496    });
1497
1498    // Run Bevy app with HEADLESS configuration (no window surfaces!)
1499    // Uses ScheduleRunnerPlugin instead of WinitPlugin
1500    build_headless_app(request, output_clone, shared_rgba, shared_depth).run();
1501
1502    // App::run() returned - check shared_output for result
1503    if let Ok(guard) = shared_output.0.lock() {
1504        if let Some(output) = guard.as_ref() {
1505            return Ok(output.clone());
1506        }
1507    }
1508
1509    // Fallback: try to read from temp file (for legacy compatibility)
1510    if temp_path.exists() {
1511        if let Ok(output) = read_output_from_file(&temp_path) {
1512            let _ = std::fs::remove_file(&temp_path);
1513            return Ok(output);
1514        }
1515    }
1516
1517    Err(RenderError::RenderFailed(
1518        "Render did not complete".to_string(),
1519    ))
1520}
1521
1522/// Render a homogeneous sequence of viewpoints in a single headless Bevy app.
1523///
1524/// All captures share the same object, object rotation, and render configuration.
1525/// This is the fast path used by the batch API for episode-style workloads.
1526pub fn render_headless_sequence(
1527    object_dir: &Path,
1528    viewpoints: &[Transform],
1529    object_rotation: &ObjectRotation,
1530    object_translation: Vec3,
1531    object_scale: Vec3,
1532    config: &RenderConfig,
1533) -> Result<Vec<RenderOutput>, RenderError> {
1534    if viewpoints.is_empty() {
1535        return Ok(Vec::new());
1536    }
1537
1538    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1539        RenderError::RenderFailed(format!(
1540            "Cannot canonicalize object directory {}: {}",
1541            object_dir.display(),
1542            e
1543        ))
1544    })?;
1545    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1546    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1547
1548    if !mesh_path.exists() {
1549        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
1550            &mesh_path,
1551        )));
1552    }
1553    if !texture_path.exists() {
1554        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
1555            &texture_path,
1556        )));
1557    }
1558
1559    let request = RenderRequest {
1560        mesh_path: fs_path_to_asset_string(&mesh_path),
1561        texture_path: fs_path_to_asset_string(&texture_path),
1562        camera_transform: viewpoints[0],
1563        object_rotation: object_rotation.clone(),
1564        object_translation,
1565        object_scale,
1566        config: config.clone(),
1567    };
1568
1569    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1570    let rgba_clone = shared_rgba.clone();
1571
1572    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1573    let depth_clone = shared_depth.clone();
1574
1575    let mut app = App::new();
1576    app.add_plugins(
1577        DefaultPlugins
1578            .set(bevy::asset::AssetPlugin {
1579                // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
1580                // default (UnapprovedPathMode::Forbid → load() silently returns a
1581                // default handle). YCB meshes load from absolute paths, so allow them.
1582                unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
1583                ..default()
1584            })
1585            .set(WindowPlugin {
1586                primary_window: None,
1587                exit_condition: ExitCondition::DontExit,
1588                ..default()
1589            })
1590            .disable::<bevy::winit::WinitPlugin>()
1591            .disable::<LogPlugin>()
1592            .disable::<TerminalCtrlCHandlerPlugin>(),
1593    )
1594    .add_plugins(ObjPlugin)
1595    // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
1596    // Scene spawning panics unless those component types are registered. The
1597    // minimal headless plugin set doesn't register them, so do it explicitly.
1598    .register_type::<Mesh3d>()
1599    .register_type::<MeshMaterial3d<StandardMaterial>>()
1600    .register_type::<bevy::prelude::Transform>()
1601    .register_type::<bevy::prelude::GlobalTransform>()
1602    .register_type::<bevy::transform::components::TransformTreeChanged>()
1603    .register_type::<bevy::prelude::Visibility>()
1604    .register_type::<bevy::prelude::InheritedVisibility>()
1605    .register_type::<bevy::prelude::ViewVisibility>()
1606    .add_plugins(ImageCopyPlugin {
1607        shared_rgba: rgba_clone,
1608    })
1609    .add_plugins(DepthReadbackPlugin {
1610        shared_depth: depth_clone,
1611        near: config.near_plane,
1612        far: config.far_plane,
1613    })
1614    .insert_resource(request)
1615    .insert_resource(shared_rgba)
1616    .insert_resource(HeadlessBatchSequence::new(viewpoints.to_vec()))
1617    .init_resource::<RenderState>()
1618    .add_systems(Startup, setup_headless_scene)
1619    .add_systems(
1620        Update,
1621        (
1622            check_assets_loaded,
1623            apply_materials,
1624            tick_headless_batch_warmup,
1625            request_headless_capture,
1626            check_headless_capture_ready,
1627            extract_and_continue_headless_batch,
1628        )
1629            .chain(),
1630    );
1631
1632    // Manual app.update() loops do not run plugin finish/cleanup hooks automatically.
1633    // Bevy's screenshot plugin inserts CapturedScreenshots during finish(), so run the
1634    // normal startup phases before driving the headless batch loop ourselves.
1635    let trace_outer = render_trace_enabled();
1636    let t_finish = std::time::Instant::now();
1637    app.finish();
1638    let finish_ms = t_finish.elapsed().as_secs_f64() * 1000.0;
1639    let t_cleanup = std::time::Instant::now();
1640    app.cleanup();
1641    let cleanup_ms = t_cleanup.elapsed().as_secs_f64() * 1000.0;
1642    if trace_outer {
1643        eprintln!(
1644            "[render_trace][coldinit] app.finish ms={:.3} app.cleanup ms={:.3}",
1645            finish_ms, cleanup_ms
1646        );
1647    }
1648
1649    let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1650    let start = std::time::Instant::now();
1651
1652    let trace = std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok();
1653    let mut update_idx: u32 = 0;
1654    let mut last_completed_outputs: usize = 0;
1655    let mut viewpoint_start = std::time::Instant::now();
1656
1657    loop {
1658        if start.elapsed() > timeout {
1659            return Err(RenderError::RenderTimeout {
1660                duration_secs: RENDER_TIMEOUT_SECS,
1661            });
1662        }
1663
1664        let update_start = std::time::Instant::now();
1665        app.update();
1666        let update_elapsed_ms = update_start.elapsed().as_secs_f64() * 1000.0;
1667
1668        if trace {
1669            let batch = app.world().resource::<HeadlessBatchSequence>();
1670            let warmup = batch.warmup_frames_remaining;
1671            let current = batch.current_index;
1672            let completed = batch.outputs.len();
1673            let vp_ms = viewpoint_start.elapsed().as_secs_f64() * 1000.0;
1674            eprintln!(
1675                "[render_trace] update={update_idx} vp={current} warmup={warmup} \
1676                 completed={completed} update_ms={update_elapsed_ms:.2} vp_ms={vp_ms:.2}"
1677            );
1678            if completed > last_completed_outputs {
1679                eprintln!(
1680                    "[render_trace] viewpoint {} finished in {:.2} ms",
1681                    completed - 1,
1682                    vp_ms
1683                );
1684                last_completed_outputs = completed;
1685                viewpoint_start = std::time::Instant::now();
1686            }
1687        }
1688
1689        update_idx += 1;
1690
1691        if app.world().resource::<HeadlessBatchSequence>().done {
1692            break;
1693        }
1694    }
1695
1696    if trace {
1697        eprintln!(
1698            "[render_trace] total_wall_ms={:.2} updates={update_idx} viewpoints={}",
1699            start.elapsed().as_secs_f64() * 1000.0,
1700            viewpoints.len()
1701        );
1702    }
1703
1704    let mut batch = app.world_mut().resource_mut::<HeadlessBatchSequence>();
1705    if batch.outputs.len() != viewpoints.len() {
1706        return Err(RenderError::RenderFailed(format!(
1707            "Batch render produced {} outputs for {} viewpoints",
1708            batch.outputs.len(),
1709            viewpoints.len()
1710        )));
1711    }
1712
1713    Ok(std::mem::take(&mut batch.outputs))
1714}
1715
1716/// Assemble the shared single-render headless Bevy app.
1717fn build_headless_app(
1718    request: RenderRequest,
1719    shared_output: SharedOutput,
1720    shared_rgba: SharedRgbaBuffer,
1721    shared_depth: SharedDepthBuffer,
1722) -> App {
1723    let near = request.config.near_plane;
1724    let far = request.config.far_plane;
1725
1726    let mut app = App::new();
1727    app.add_plugins(
1728        DefaultPlugins
1729            .set(bevy::asset::AssetPlugin {
1730                // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
1731                // default (UnapprovedPathMode::Forbid → load() silently returns a
1732                // default handle). YCB meshes load from absolute paths, so allow them.
1733                unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
1734                ..default()
1735            })
1736            .set(WindowPlugin {
1737                primary_window: None,
1738                exit_condition: ExitCondition::DontExit,
1739                ..default()
1740            })
1741            .disable::<bevy::winit::WinitPlugin>()
1742            .disable::<LogPlugin>()
1743            .disable::<TerminalCtrlCHandlerPlugin>(),
1744    )
1745    .add_plugins(ScheduleRunnerPlugin::run_loop(Duration::from_secs_f64(
1746        1.0 / 60.0,
1747    )))
1748    .add_plugins(ObjPlugin)
1749    // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
1750    // Scene spawning panics unless those component types are registered. The
1751    // minimal headless plugin set doesn't register them, so do it explicitly.
1752    .register_type::<Mesh3d>()
1753    .register_type::<MeshMaterial3d<StandardMaterial>>()
1754    .register_type::<bevy::prelude::Transform>()
1755    .register_type::<bevy::prelude::GlobalTransform>()
1756    .register_type::<bevy::transform::components::TransformTreeChanged>()
1757    .register_type::<bevy::prelude::Visibility>()
1758    .register_type::<bevy::prelude::InheritedVisibility>()
1759    .register_type::<bevy::prelude::ViewVisibility>()
1760    .add_plugins(ImageCopyPlugin {
1761        shared_rgba: shared_rgba.clone(),
1762    })
1763    .add_plugins(DepthReadbackPlugin {
1764        shared_depth,
1765        near,
1766        far,
1767    })
1768    .insert_resource(request)
1769    .insert_resource(shared_output)
1770    .insert_resource(shared_rgba)
1771    .init_resource::<RenderState>()
1772    .add_systems(Startup, setup_headless_scene)
1773    .add_systems(
1774        Update,
1775        (
1776            check_assets_loaded,
1777            apply_materials,
1778            request_headless_capture,
1779            check_headless_capture_ready,
1780            extract_and_exit_headless,
1781        )
1782            .chain(),
1783    );
1784    app
1785}
1786
1787/// Serialize RenderOutput to bytes for IPC (used by subprocess mode)
1788#[allow(dead_code)]
1789fn serialize_output(output: &RenderOutput) -> Vec<u8> {
1790    let mut data = Vec::new();
1791
1792    // Header: width, height, rgba_len, depth_len
1793    data.extend_from_slice(&output.width.to_le_bytes());
1794    data.extend_from_slice(&output.height.to_le_bytes());
1795    data.extend_from_slice(&(output.rgba.len() as u32).to_le_bytes());
1796    data.extend_from_slice(&(output.depth.len() as u32).to_le_bytes());
1797
1798    // RGBA data
1799    data.extend_from_slice(&output.rgba);
1800
1801    // Depth data (as f64 bytes for TBP precision)
1802    for d in &output.depth {
1803        data.extend_from_slice(&d.to_le_bytes());
1804    }
1805
1806    // Intrinsics (f64 for TBP precision)
1807    data.extend_from_slice(&output.intrinsics.focal_length[0].to_le_bytes());
1808    data.extend_from_slice(&output.intrinsics.focal_length[1].to_le_bytes());
1809    data.extend_from_slice(&output.intrinsics.principal_point[0].to_le_bytes());
1810    data.extend_from_slice(&output.intrinsics.principal_point[1].to_le_bytes());
1811    data.extend_from_slice(&output.intrinsics.image_size[0].to_le_bytes());
1812    data.extend_from_slice(&output.intrinsics.image_size[1].to_le_bytes());
1813
1814    // Camera transform (translation + rotation quaternion)
1815    let t = output.camera_transform.translation;
1816    let r = output.camera_transform.rotation;
1817    data.extend_from_slice(&t.x.to_le_bytes());
1818    data.extend_from_slice(&t.y.to_le_bytes());
1819    data.extend_from_slice(&t.z.to_le_bytes());
1820    data.extend_from_slice(&r.x.to_le_bytes());
1821    data.extend_from_slice(&r.y.to_le_bytes());
1822    data.extend_from_slice(&r.z.to_le_bytes());
1823    data.extend_from_slice(&r.w.to_le_bytes());
1824
1825    // Object rotation (f64)
1826    let or = &output.object_rotation;
1827    data.extend_from_slice(&or.pitch.to_le_bytes());
1828    data.extend_from_slice(&or.yaw.to_le_bytes());
1829    data.extend_from_slice(&or.roll.to_le_bytes());
1830
1831    // Object translation + scale (f32 for Bevy compatibility)
1832    let ot = output.object_translation;
1833    let os = output.object_scale;
1834    data.extend_from_slice(&ot.x.to_le_bytes());
1835    data.extend_from_slice(&ot.y.to_le_bytes());
1836    data.extend_from_slice(&ot.z.to_le_bytes());
1837    data.extend_from_slice(&os.x.to_le_bytes());
1838    data.extend_from_slice(&os.y.to_le_bytes());
1839    data.extend_from_slice(&os.z.to_le_bytes());
1840
1841    data
1842}
1843
1844/// Read RenderOutput from serialized file
1845fn read_output_from_file(path: &std::path::Path) -> Result<RenderOutput, RenderError> {
1846    let mut file = File::open(path).map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1847    let mut data = Vec::new();
1848    file.read_to_end(&mut data)
1849        .map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1850
1851    let mut cursor = 0;
1852
1853    let read_u32 = |data: &[u8], cursor: &mut usize| -> u32 {
1854        let val = u32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1855        *cursor += 4;
1856        val
1857    };
1858
1859    let read_f32 = |data: &[u8], cursor: &mut usize| -> f32 {
1860        let val = f32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1861        *cursor += 4;
1862        val
1863    };
1864
1865    let read_f64 = |data: &[u8], cursor: &mut usize| -> f64 {
1866        let val = f64::from_le_bytes(data[*cursor..*cursor + 8].try_into().unwrap());
1867        *cursor += 8;
1868        val
1869    };
1870
1871    let width = read_u32(&data, &mut cursor);
1872    let height = read_u32(&data, &mut cursor);
1873    let rgba_len = read_u32(&data, &mut cursor) as usize;
1874    let depth_len = read_u32(&data, &mut cursor) as usize;
1875
1876    let rgba = data[cursor..cursor + rgba_len].to_vec();
1877    cursor += rgba_len;
1878
1879    // Depth data (f64 for TBP precision)
1880    let mut depth = Vec::with_capacity(depth_len);
1881    for _ in 0..depth_len {
1882        depth.push(read_f64(&data, &mut cursor));
1883    }
1884
1885    // Intrinsics (f64 for TBP precision)
1886    let focal_length = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1887    let principal_point = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1888    let image_size = [read_u32(&data, &mut cursor), read_u32(&data, &mut cursor)];
1889
1890    // Camera transform (f32 for Bevy compatibility)
1891    let tx = read_f32(&data, &mut cursor);
1892    let ty = read_f32(&data, &mut cursor);
1893    let tz = read_f32(&data, &mut cursor);
1894    let rx = read_f32(&data, &mut cursor);
1895    let ry = read_f32(&data, &mut cursor);
1896    let rz = read_f32(&data, &mut cursor);
1897    let rw = read_f32(&data, &mut cursor);
1898
1899    // Object rotation (f64)
1900    let pitch = read_f64(&data, &mut cursor);
1901    let yaw = read_f64(&data, &mut cursor);
1902    let roll = read_f64(&data, &mut cursor);
1903
1904    let (object_translation, object_scale) = if cursor + 24 <= data.len() {
1905        let tx = read_f32(&data, &mut cursor);
1906        let ty = read_f32(&data, &mut cursor);
1907        let tz = read_f32(&data, &mut cursor);
1908        let sx = read_f32(&data, &mut cursor);
1909        let sy = read_f32(&data, &mut cursor);
1910        let sz = read_f32(&data, &mut cursor);
1911        (Vec3::new(tx, ty, tz), Vec3::new(sx, sy, sz))
1912    } else {
1913        (Vec3::ZERO, Vec3::ONE)
1914    };
1915
1916    Ok(RenderOutput {
1917        rgba,
1918        depth,
1919        width,
1920        height,
1921        intrinsics: crate::CameraIntrinsics {
1922            focal_length,
1923            principal_point,
1924            image_size,
1925        },
1926        camera_transform: Transform {
1927            translation: Vec3::new(tx, ty, tz),
1928            rotation: Quat::from_xyzw(rx, ry, rz, rw),
1929            scale: Vec3::ONE,
1930        },
1931        object_rotation: ObjectRotation { pitch, yaw, roll },
1932        object_translation,
1933        object_scale,
1934        target_point: Vec3::ZERO,
1935        targeting_policy: TargetingPolicy::Origin,
1936    })
1937}
1938
1939/// Setup the scene with camera, lighting, and object
1940#[allow(dead_code)]
1941fn setup_scene(
1942    mut commands: Commands,
1943    asset_server: Res<AssetServer>,
1944    request: Res<RenderRequest>,
1945    mut _materials: ResMut<Assets<StandardMaterial>>,
1946) {
1947    // Camera with depth prepass (Bevy 0.15+ uses Camera3d component)
1948    // Disable MSAA for depth readback compatibility (can't copy from multisampled texture)
1949    // Apply FOV from RenderConfig so the projection matches TBP's camera intrinsics.
1950    commands.spawn((
1951        Camera3d::default(),
1952        Camera::default(),
1953        Hdr,
1954        render_projection(&request.config),
1955        Msaa::Off,
1956        request.camera_transform,
1957        Tonemapping::None, // Accurate colors for software rendering
1958        DepthPrepass,
1959        NormalPrepass,
1960        RenderCamera,
1961    ));
1962
1963    // Ambient light (from config). In Bevy 0.18 the global ambient light is the
1964    // `GlobalAmbientLight` resource (the `AmbientLight` type became a per-camera component).
1965    let lighting = &request.config.lighting;
1966    commands.insert_resource(GlobalAmbientLight {
1967        color: Color::WHITE,
1968        brightness: lighting.ambient_brightness,
1969        ..default()
1970    });
1971
1972    // Key light (from config) - Bevy 0.15+ uses PointLight component directly
1973    if lighting.key_light_intensity > 0.0 {
1974        commands.spawn((
1975            PointLight {
1976                intensity: lighting.key_light_intensity,
1977                shadows_enabled: lighting.shadows_enabled,
1978                ..default()
1979            },
1980            Transform::from_xyz(
1981                lighting.key_light_position[0],
1982                lighting.key_light_position[1],
1983                lighting.key_light_position[2],
1984            ),
1985        ));
1986    }
1987
1988    // Fill light (from config)
1989    if lighting.fill_light_intensity > 0.0 {
1990        commands.spawn((
1991            PointLight {
1992                intensity: lighting.fill_light_intensity,
1993                shadows_enabled: lighting.shadows_enabled,
1994                ..default()
1995            },
1996            Transform::from_xyz(
1997                lighting.fill_light_position[0],
1998                lighting.fill_light_position[1],
1999                lighting.fill_light_position[2],
2000            ),
2001        ));
2002    }
2003
2004    // Load the scene
2005    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
2006    commands.insert_resource(LoadedScene(scene_handle.clone()));
2007
2008    // Load the texture
2009    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
2010    commands.insert_resource(LoadedTexture(texture_handle.clone()));
2011
2012    // Create material with texture (will be applied later)
2013    let _material = _materials.add(StandardMaterial {
2014        base_color_texture: Some(texture_handle),
2015        unlit: true,
2016        ..default()
2017    });
2018
2019    // Spawn the scene with the requested object transform (Bevy 0.15+ uses SceneRoot)
2020    commands.spawn((
2021        SceneRoot(scene_handle),
2022        request
2023            .object_rotation
2024            .to_transform_with_translation_scale(request.object_translation, request.object_scale),
2025        RenderedObject,
2026    ));
2027
2028    println!("Scene setup complete");
2029}
2030
2031/// Check if assets are loaded
2032fn check_assets_loaded(
2033    mut state: ResMut<RenderState>,
2034    asset_server: Res<AssetServer>,
2035    scene: Option<Res<LoadedScene>>,
2036    texture: Option<Res<LoadedTexture>>,
2037) {
2038    let trace = render_trace_enabled();
2039    let was_scene_loaded = state.scene_loaded;
2040    let was_texture_loaded = state.texture_loaded;
2041
2042    state.frame_count += 1;
2043
2044    if state.scene_loaded && state.texture_loaded {
2045        return;
2046    }
2047
2048    if let Some(scene) = scene {
2049        match asset_server.get_load_state(&scene.0) {
2050            Some(LoadState::Loaded) => {
2051                state.scene_loaded = true;
2052            }
2053            Some(LoadState::Failed(_)) => {}
2054            _ => {}
2055        }
2056    }
2057
2058    if let Some(texture) = texture {
2059        match asset_server.get_load_state(&texture.0) {
2060            Some(LoadState::Loaded) => {
2061                state.texture_loaded = true;
2062            }
2063            Some(LoadState::Failed(_)) => {}
2064            _ => {}
2065        }
2066    }
2067
2068    if trace {
2069        if !was_scene_loaded && state.scene_loaded {
2070            eprintln!(
2071                "[render_trace][coldinit] scene_loaded frame_count={}",
2072                state.frame_count
2073            );
2074        }
2075        if !was_texture_loaded && state.texture_loaded {
2076            eprintln!(
2077                "[render_trace][coldinit] texture_loaded frame_count={}",
2078                state.frame_count
2079            );
2080        }
2081    }
2082}
2083
2084/// Apply materials to loaded meshes
2085fn apply_materials(
2086    mut state: ResMut<RenderState>,
2087    texture: Option<Res<LoadedTexture>>,
2088    mut materials: ResMut<Assets<StandardMaterial>>,
2089    // Bevy 0.15+: Use MeshMaterial3d instead of Handle<StandardMaterial>
2090    mut mesh_query: Query<&mut MeshMaterial3d<StandardMaterial>, With<Mesh3d>>,
2091) {
2092    // NOTE: we intentionally do NOT wait for `texture_loaded` before applying the
2093    // material. The texture *handle* is valid immediately, so applying the material
2094    // as soon as the mesh entities exist lets the main-pass `StandardMaterial`
2095    // pipeline start compiling during the long async texture load. A late material
2096    // swap (after texture load) would reset the pipeline and capture a blank color
2097    // frame before it recompiled — the root cause of the 0.18 blank renders.
2098    if !state.scene_loaded || state.capture_ready {
2099        return;
2100    }
2101
2102    state.frame_count += 1;
2103
2104    let Some(tex) = texture else { return };
2105
2106    if !state.materials_applied {
2107        // The scene hierarchy is instantiated asynchronously after the asset
2108        // load event fires; wait until mesh entities exist before applying.
2109        if mesh_query.is_empty() {
2110            return;
2111        }
2112
2113        let textured_material = materials.add(StandardMaterial {
2114            base_color_texture: Some(tex.0.clone()),
2115            unlit: true,
2116            ..default()
2117        });
2118
2119        for mut mat in mesh_query.iter_mut() {
2120            mat.0 = textured_material.clone();
2121        }
2122
2123        state.materials_applied = true;
2124        state.materials_applied_frame = state.frame_count;
2125    }
2126
2127    // Record the frame the texture finished loading (once).
2128    if state.texture_loaded && state.texture_ready_frame == 0 {
2129        state.texture_ready_frame = state.frame_count;
2130    }
2131
2132    // Capture once the texture pixels are loaded (+ a small margin for GPU image
2133    // preparation) AND the main-pass pipeline has had time to compile since the
2134    // material was applied. Because the material is applied early, the pipeline is
2135    // almost always ready well before the texture, so this resolves to a few frames
2136    // after the texture loads — deterministic and fast (no 60/120-frame cushion).
2137    let texture_ready =
2138        state.texture_ready_frame != 0 && state.frame_count >= state.texture_ready_frame + 6;
2139    let pipeline_ready = state.frame_count >= state.materials_applied_frame + 6;
2140    if texture_ready && pipeline_ready {
2141        let was_ready = state.capture_ready;
2142        state.capture_ready = true;
2143        if render_trace_enabled() && !was_ready {
2144            eprintln!(
2145                "[render_trace][coldinit] capture_ready frame_count={}",
2146                state.frame_count
2147            );
2148        }
2149    }
2150}
2151
2152/// Request a screenshot capture (Bevy 0.15+ uses Screenshot entity + observer)
2153#[allow(dead_code)]
2154fn request_screenshot(
2155    mut commands: Commands,
2156    mut state: ResMut<RenderState>,
2157    shared_image: Res<SharedImageBuffer>,
2158    mut depth_request: ResMut<DepthCaptureRequest>,
2159) {
2160    if !state.capture_ready || state.screenshot_requested {
2161        return;
2162    }
2163
2164    // Clone the Arc for the observer closure
2165    let image_buffer = shared_image.0.clone();
2166
2167    // Also request depth capture
2168    depth_request.requested = true;
2169    println!("Depth capture requested");
2170
2171    // Spawn Screenshot entity with observer (Bevy 0.15+ API)
2172    println!("Requesting screenshot via Screenshot entity");
2173    commands
2174        .spawn(Screenshot::primary_window())
2175        .observe(move |trigger: On<ScreenshotCaptured>| {
2176            // ScreenshotCaptured derefs to Image
2177            let image: &Image = trigger.event();
2178
2179            // Get dimensions
2180            let width = image.texture_descriptor.size.width;
2181            let height = image.texture_descriptor.size.height;
2182
2183            // Bevy 0.18: Image.data is now Option<Vec<u8>>; skip if absent.
2184            let Some(rgba_data) = image.data.clone() else {
2185                return;
2186            };
2187
2188            // Store in shared buffer
2189            if let Ok(mut guard) = image_buffer.lock() {
2190                *guard = Some((rgba_data, width, height));
2191            }
2192        });
2193
2194    state.screenshot_requested = true;
2195    println!("Screenshot requested");
2196}
2197
2198/// Check if screenshot callback has completed
2199#[allow(dead_code)]
2200fn check_screenshot_ready(
2201    mut state: ResMut<RenderState>,
2202    shared_image: Res<SharedImageBuffer>,
2203    shared_depth: Res<SharedDepthBuffer>,
2204    request: Res<RenderRequest>,
2205) {
2206    if !state.screenshot_requested || state.captured {
2207        return;
2208    }
2209
2210    // Increment frame count while waiting for capture
2211    state.frame_count += 1;
2212
2213    // Check if RGBA callback has written data
2214    let rgba_ready = if let Ok(guard) = shared_image.0.lock() {
2215        if let Some((rgba_data, width, height)) = guard.as_ref() {
2216            if state.rgba_data.is_none() {
2217                state.rgba_data = Some(rgba_data.clone());
2218                state.image_width = *width;
2219                state.image_height = *height;
2220            }
2221            true
2222        } else {
2223            false
2224        }
2225    } else {
2226        false
2227    };
2228
2229    // Check if depth readback has completed
2230    let depth_ready = if let Ok(guard) = shared_depth.0.lock() {
2231        if let Some((depth_data, _width, _height)) = guard.as_ref() {
2232            if state.depth_data.is_none() {
2233                state.depth_data = Some(depth_data.clone());
2234            }
2235            true
2236        } else {
2237            false
2238        }
2239    } else {
2240        false
2241    };
2242
2243    // If depth readback failed or is taking too long, fall back to placeholder.
2244    // As in check_headless_capture_ready, this uniform plane is a DEGRADED render
2245    // (flat depth, no real geometry) that must be loud — it silently masked the
2246    // #92 depth regression. (This fn is currently dead code; kept loud in case it
2247    // is ever revived.)
2248    if rgba_ready && !depth_ready && state.frame_count > 60 {
2249        let camera_dist = request.camera_transform.translation.length() as f64;
2250        let pixel_count = (state.image_width * state.image_height) as usize;
2251        eprintln!(
2252            "[bevy-sensor][WARN] depth readback produced no valid frame; falling back to a \
2253             UNIFORM {:.4} m camera-distance plane (degraded render, no real 3D geometry). \
2254             Indicates a depth-readback regression.",
2255            camera_dist
2256        );
2257        state.depth_data = Some(vec![camera_dist; pixel_count]);
2258    }
2259
2260    // Mark as captured when both RGBA and depth are ready
2261    if state.rgba_data.is_some() && state.depth_data.is_some() {
2262        state.captured = true;
2263    }
2264}
2265
2266/// Extract results and exit
2267#[allow(dead_code)]
2268fn extract_and_exit(
2269    mut state: ResMut<RenderState>,
2270    request: Res<RenderRequest>,
2271    shared_output: Res<SharedOutput>,
2272    mut commands: Commands,
2273    windows: Query<Entity, With<bevy::window::Window>>,
2274) {
2275    // Handle delayed exit after closing window
2276    if state.exit_requested {
2277        state.exit_frame_count += 1;
2278        // After a few frames with no window, Bevy should exit
2279        return;
2280    }
2281
2282    if !state.captured {
2283        return;
2284    }
2285
2286    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2287        // Use actual captured dimensions (may differ from config if window was resized)
2288        let width = state.image_width;
2289        let height = state.image_height;
2290
2291        // Compute intrinsics from the same TBP zoom formula as the camera projection.
2292        let intrinsics = request.config.intrinsics_for_size(width, height);
2293
2294        let output = RenderOutput {
2295            rgba: rgba.clone(),
2296            depth: depth.clone(),
2297            width,
2298            height,
2299            intrinsics,
2300            camera_transform: request.camera_transform,
2301            object_rotation: request.object_rotation.clone(),
2302            object_translation: request.object_translation,
2303            object_scale: request.object_scale,
2304            target_point: Vec3::ZERO,
2305            targeting_policy: TargetingPolicy::Origin,
2306        };
2307
2308        if let Ok(mut guard) = shared_output.0.lock() {
2309            *guard = Some(output);
2310            drop(guard); // Release lock immediately
2311
2312            // Small delay to allow watchdog to detect output before window close
2313            std::thread::sleep(std::time::Duration::from_millis(200));
2314        }
2315
2316        // Close all windows to trigger app exit
2317        // eprintln!("Closing windows to trigger exit...");
2318        for window_entity in windows.iter() {
2319            commands.entity(window_entity).despawn();
2320        }
2321        state.exit_requested = true;
2322    }
2323}
2324
2325// ============================================================================
2326// Headless Rendering Systems (no window surfaces)
2327// ============================================================================
2328
2329/// Setup the scene for headless rendering with RenderTarget::Image
2330fn setup_headless_scene(
2331    mut commands: Commands,
2332    mut images: ResMut<Assets<Image>>,
2333    asset_server: Res<AssetServer>,
2334    request: Res<RenderRequest>,
2335    mut _materials: ResMut<Assets<StandardMaterial>>,
2336) {
2337    let trace = render_trace_enabled();
2338    let t0 = trace.then(std::time::Instant::now);
2339
2340    #[cfg(test)]
2341    HEADLESS_SCENE_SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
2342
2343    let width = request.config.width;
2344    let height = request.config.height;
2345
2346    // Create render target image with proper texture usages
2347    let size = Extent3d {
2348        width,
2349        height,
2350        depth_or_array_layers: 1,
2351    };
2352
2353    let mut render_target_image = Image::new_fill(
2354        size,
2355        TextureDimension::D2,
2356        &[0, 0, 0, 255], // Initialize with opaque black
2357        TextureFormat::Rgba8UnormSrgb,
2358        RenderAssetUsages::default(),
2359    );
2360
2361    // Add required texture usages for headless rendering
2362    render_target_image.texture_descriptor.usage =
2363        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
2364
2365    let render_target_handle = images.add(render_target_image);
2366
2367    // Store handle for later access
2368    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
2369
2370    // Camera rendering to the image texture (NO window!)
2371    commands.spawn((
2372        Camera3d::default(),
2373        Camera::default(),
2374        Hdr,
2375        // In Bevy 0.18 the render target is a separate `RenderTarget` component,
2376        // and `RenderTarget::Image` wraps an `ImageRenderTarget` (via `From<Handle<Image>>`).
2377        RenderTarget::Image(render_target_handle.clone().into()),
2378        render_projection(&request.config),
2379        Msaa::Off,
2380        request.camera_transform,
2381        Tonemapping::None,
2382        DepthPrepass,
2383        NormalPrepass,
2384        RenderCamera,
2385        // Add ImageCopier to trigger RGBA extraction
2386        ImageCopier {
2387            src_image: render_target_handle,
2388            enabled: false, // Will enable when ready to capture
2389        },
2390    ));
2391
2392    // Ambient light (global resource in Bevy 0.18).
2393    let lighting = &request.config.lighting;
2394    commands.insert_resource(GlobalAmbientLight {
2395        color: Color::WHITE,
2396        brightness: lighting.ambient_brightness,
2397        ..default()
2398    });
2399
2400    // Key light
2401    if lighting.key_light_intensity > 0.0 {
2402        commands.spawn((
2403            PointLight {
2404                intensity: lighting.key_light_intensity,
2405                shadows_enabled: lighting.shadows_enabled,
2406                ..default()
2407            },
2408            Transform::from_xyz(
2409                lighting.key_light_position[0],
2410                lighting.key_light_position[1],
2411                lighting.key_light_position[2],
2412            ),
2413        ));
2414    }
2415
2416    // Fill light
2417    if lighting.fill_light_intensity > 0.0 {
2418        commands.spawn((
2419            PointLight {
2420                intensity: lighting.fill_light_intensity,
2421                shadows_enabled: lighting.shadows_enabled,
2422                ..default()
2423            },
2424            Transform::from_xyz(
2425                lighting.fill_light_position[0],
2426                lighting.fill_light_position[1],
2427                lighting.fill_light_position[2],
2428            ),
2429        ));
2430    }
2431
2432    // Load the scene
2433    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
2434    commands.insert_resource(LoadedScene(scene_handle.clone()));
2435
2436    // Load the texture
2437    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
2438    commands.insert_resource(LoadedTexture(texture_handle.clone()));
2439
2440    // Create material with texture
2441    let _material = _materials.add(StandardMaterial {
2442        base_color_texture: Some(texture_handle),
2443        unlit: true,
2444        ..default()
2445    });
2446
2447    // Spawn the scene with the requested object transform
2448    commands.spawn((
2449        SceneRoot(scene_handle),
2450        request
2451            .object_rotation
2452            .to_transform_with_translation_scale(request.object_translation, request.object_scale),
2453        RenderedObject,
2454    ));
2455
2456    if let Some(t0) = t0 {
2457        eprintln!(
2458            "[render_trace][startup] setup_headless_scene ms={:.3}",
2459            t0.elapsed().as_secs_f64() * 1000.0
2460        );
2461    }
2462}
2463
2464/// Request capture for headless rendering (enable ImageCopier)
2465fn request_headless_capture(
2466    mut state: ResMut<RenderState>,
2467    mut depth_request: ResMut<DepthCaptureRequest>,
2468    mut query: Query<&mut ImageCopier>,
2469    batch: Option<Res<HeadlessBatchSequence>>,
2470) {
2471    let trace = render_trace_enabled();
2472    let t0 = trace.then(std::time::Instant::now);
2473
2474    if !state.capture_ready || state.screenshot_requested {
2475        if let Some(t0) = t0 {
2476            eprintln!(
2477                "[render_trace][sys] request_headless_capture skipped(gate) ms={:.3}",
2478                t0.elapsed().as_secs_f64() * 1000.0
2479            );
2480        }
2481        return;
2482    }
2483
2484    if batch
2485        .as_ref()
2486        .is_some_and(|batch| batch.warmup_frames_remaining > 0)
2487    {
2488        if let Some(t0) = t0 {
2489            eprintln!(
2490                "[render_trace][sys] request_headless_capture skipped(warmup) ms={:.3}",
2491                t0.elapsed().as_secs_f64() * 1000.0
2492            );
2493        }
2494        return;
2495    }
2496
2497    // Enable the ImageCopier to trigger RGBA extraction
2498    for mut copier in query.iter_mut() {
2499        copier.enabled = true;
2500    }
2501
2502    // Request depth capture
2503    depth_request.requested = true;
2504
2505    state.screenshot_requested = true;
2506
2507    if let Some(t0) = t0 {
2508        eprintln!(
2509            "[render_trace][sys] request_headless_capture requested ms={:.3}",
2510            t0.elapsed().as_secs_f64() * 1000.0
2511        );
2512    }
2513}
2514
2515/// Check if headless capture has completed
2516fn check_headless_capture_ready(
2517    mut state: ResMut<RenderState>,
2518    shared_rgba: Res<SharedRgbaBuffer>,
2519    shared_depth: Res<SharedDepthBuffer>,
2520    request: Res<RenderRequest>,
2521    mut query: Query<&mut ImageCopier>,
2522) {
2523    let trace = render_trace_enabled();
2524    let t0 = trace.then(std::time::Instant::now);
2525
2526    if !state.screenshot_requested || state.captured {
2527        if let Some(t0) = t0 {
2528            eprintln!(
2529                "[render_trace][sys] check_headless_capture_ready skipped(gate) ms={:.3}",
2530                t0.elapsed().as_secs_f64() * 1000.0
2531            );
2532        }
2533        return;
2534    }
2535
2536    state.frame_count += 1;
2537    state.capture_retries += 1;
2538    // Bounded fallback so a genuinely-uniform scene (or persistent invalid
2539    // readback) still terminates instead of hanging to the watchdog.
2540    // Generous bound: slow paths (e.g. RenderSession's retained-render-world
2541    // settle after a scene swap) can take ~150 frames to produce a stable frame,
2542    // so force-accepting at 150 would grab a partial frame and break parity. Only
2543    // force as a true last resort to avoid hanging the watchdog.
2544    let force_accept = state.capture_retries > 150;
2545
2546    let near = request.config.near_plane as f64;
2547    let far = request.config.far_plane as f64;
2548
2549    // RGBA: accept the first stable non-blank frame. Uniform clear-color frames
2550    // can be pre-geometry reads from the nondeterministic one-shot capture, but
2551    // they are also legitimate off-target renders. Accept a stable blank frame
2552    // only when the depth side has already shown a stable all-background buffer.
2553    // The copier stays enabled until BOTH RGBA and depth are valid so a late/odd
2554    // depth frame can still be captured.
2555    if state.rgba_data.is_none() {
2556        let captured_rgba = shared_rgba.0.lock().ok().and_then(|mut g| g.take());
2557        if let Some((rgba_data, width, height)) = captured_rgba {
2558            let blank = is_uniform_rgba_frame(&rgba_data);
2559            let non_blank = !blank;
2560            // Stable == identical to the previous readback (render has settled).
2561            let stable = state.prev_rgba.as_deref() == Some(rgba_data.as_slice());
2562            let stable_empty_view = blank
2563                && stable
2564                && state
2565                    .prev_depth
2566                    .as_deref()
2567                    .is_some_and(|depth| is_all_background_depth(depth, far));
2568            if (non_blank && stable) || stable_empty_view || force_accept {
2569                state.image_width = width;
2570                state.image_height = height;
2571                state.rgba_data = Some(rgba_data);
2572                state.prev_rgba = None;
2573            } else {
2574                // Not settled yet: remember this frame and re-read fresh next one.
2575                state.prev_rgba = Some(rgba_data);
2576            }
2577        }
2578    }
2579
2580    // Depth: accept the first readback that contains real foreground (the depth
2581    // readback can also miss the geometry, leaving an all-far-plane buffer).
2582    if state.depth_data.is_none() {
2583        let captured_depth = shared_depth.0.lock().ok().and_then(|mut g| g.take());
2584        if let Some((depth_data, _w, _h)) = captured_depth {
2585            // Require a real object-surface depth, not just any non-far value:
2586            // near-plane garbage (~configured near plane) is not a valid surface,
2587            // but TBP surface policies legitimately work close to the object
2588            // (~0.025m with the default 0.01m near plane), so do not use a broad
2589            // absolute floor like 0.1m here.
2590            let has_foreground = depth_data
2591                .iter()
2592                .any(|&depth| is_capture_foreground_depth(depth, near, far));
2593            // Settled == identical to the previous depth readback.
2594            let stable = state.prev_depth.as_deref() == Some(depth_data.as_slice());
2595            let stable_empty_view = stable
2596                && is_all_background_depth(&depth_data, far)
2597                && state
2598                    .rgba_data
2599                    .as_deref()
2600                    .is_some_and(is_uniform_rgba_frame);
2601            if (has_foreground && stable) || stable_empty_view {
2602                state.depth_data = Some(depth_data);
2603                state.prev_depth = None;
2604            } else {
2605                state.prev_depth = Some(depth_data);
2606            }
2607        }
2608    }
2609
2610    // Last-resort fallback so we never hang the watchdog: once RGBA is in hand
2611    // and we've retried a lot, fill a uniform far-plane/background placeholder.
2612    //
2613    // This is NOT a valid object render; it is an all-background depth buffer
2614    // that downstream callers should treat as no surface. It must therefore be
2615    // LOUD: a future depth-readback regression has to surface in logs/CI instead
2616    // of looking like a successful render. `tests/spatial_parity.rs` is the
2617    // geometric guard for the old fake-surface failure.
2618    if state.rgba_data.is_some() && state.depth_data.is_none() && force_accept {
2619        let pixel_count = (state.image_width * state.image_height) as usize;
2620        let depth_summary = state
2621            .prev_depth
2622            .as_deref()
2623            .map(|depth| {
2624                DepthReadbackSummary::from_depth(
2625                    depth,
2626                    request.config.near_plane as f64,
2627                    request.config.far_plane as f64,
2628                )
2629            })
2630            .map(|summary| summary.to_string())
2631            .unwrap_or_else(|| "none".to_string());
2632        let camera_translation = request.camera_transform.translation;
2633        let camera_rotation = request.camera_transform.rotation;
2634        let object_rotation = &request.object_rotation;
2635        eprintln!(
2636            "[bevy-sensor][WARN] depth readback produced no valid frame after {} retries; \
2637             falling back to a UNIFORM {:.4} m far-plane background. This is a degraded \
2638             render (no real 3D geometry) and indicates a depth-readback regression. \
2639             request mesh={} image={}x{} camera_t=[{:.4},{:.4},{:.4}] \
2640             camera_q_xyzw=[{:.6},{:.6},{:.6},{:.6}] object_rot_deg=[{:.3},{:.3},{:.3}] \
2641             object_t=[{:.4},{:.4},{:.4}] object_scale=[{:.4},{:.4},{:.4}] \
2642             last_rejected_depth=({}). See render.rs DepthReadbackNode and \
2643             tests/spatial_parity.rs.",
2644            state.capture_retries,
2645            far,
2646            request.mesh_path,
2647            state.image_width,
2648            state.image_height,
2649            camera_translation.x,
2650            camera_translation.y,
2651            camera_translation.z,
2652            camera_rotation.x,
2653            camera_rotation.y,
2654            camera_rotation.z,
2655            camera_rotation.w,
2656            object_rotation.pitch,
2657            object_rotation.yaw,
2658            object_rotation.roll,
2659            request.object_translation.x,
2660            request.object_translation.y,
2661            request.object_translation.z,
2662            request.object_scale.x,
2663            request.object_scale.y,
2664            request.object_scale.z,
2665            depth_summary
2666        );
2667        state.depth_data = Some(vec![far; pixel_count]);
2668    }
2669
2670    let rgba_ready = state.rgba_data.is_some();
2671    let depth_ready = state.depth_data.is_some();
2672
2673    // Both valid → capture complete; stop the copier.
2674    if rgba_ready && depth_ready {
2675        state.captured = true;
2676        for mut copier in query.iter_mut() {
2677            copier.enabled = false;
2678        }
2679    }
2680
2681    if let Some(t0) = t0 {
2682        eprintln!(
2683            "[render_trace][sys] check_headless_capture_ready rgba_ready={} depth_ready={} captured={} frame_count={} ms={:.3}",
2684            rgba_ready,
2685            depth_ready,
2686            state.captured,
2687            state.frame_count,
2688            t0.elapsed().as_secs_f64() * 1000.0
2689        );
2690    }
2691}
2692
2693/// Extract results and exit for headless rendering
2694fn extract_and_exit_headless(
2695    mut state: ResMut<RenderState>,
2696    request: Res<RenderRequest>,
2697    shared_output: Res<SharedOutput>,
2698    mut app_exit: MessageWriter<bevy::app::AppExit>,
2699    batch: Option<Res<HeadlessBatchSequence>>,
2700) {
2701    if batch.is_some() {
2702        return;
2703    }
2704
2705    if state.exit_requested {
2706        return;
2707    }
2708
2709    if !state.captured {
2710        return;
2711    }
2712
2713    if state.rgba_data.is_some() && state.depth_data.is_some() {
2714        let width = state.image_width;
2715        let height = state.image_height;
2716        let rgba = state.rgba_data.take().expect("checked rgba_data");
2717        let depth = state.depth_data.take().expect("checked depth_data");
2718
2719        // Compute intrinsics from the same TBP zoom formula as the camera projection.
2720        let intrinsics = request.config.intrinsics_for_size(width, height);
2721
2722        let output = RenderOutput {
2723            rgba,
2724            depth,
2725            width,
2726            height,
2727            intrinsics,
2728            camera_transform: request.camera_transform,
2729            object_rotation: request.object_rotation.clone(),
2730            object_translation: request.object_translation,
2731            object_scale: request.object_scale,
2732            target_point: Vec3::ZERO,
2733            targeting_policy: TargetingPolicy::Origin,
2734        };
2735
2736        if let Ok(mut guard) = shared_output.0.lock() {
2737            *guard = Some(output);
2738            drop(guard);
2739            std::thread::sleep(std::time::Duration::from_millis(200));
2740        }
2741
2742        // Send AppExit event (headless apps use this instead of closing windows)
2743        app_exit.write(bevy::app::AppExit::Success);
2744        state.exit_requested = true;
2745    }
2746}
2747
2748/// Advance the short post-camera-move warmup for homogeneous batch rendering.
2749fn tick_headless_batch_warmup(batch: Option<ResMut<HeadlessBatchSequence>>) {
2750    let Some(mut batch) = batch else {
2751        return;
2752    };
2753
2754    if batch.warmup_frames_remaining > 0 {
2755        batch.warmup_frames_remaining -= 1;
2756    }
2757}
2758
2759/// Extract one batch output and continue rendering the next viewpoint in the same app.
2760fn extract_and_continue_headless_batch(
2761    mut state: ResMut<RenderState>,
2762    request: Res<RenderRequest>,
2763    buffers: (Res<SharedRgbaBuffer>, Res<SharedDepthBuffer>),
2764    batch: Option<ResMut<HeadlessBatchSequence>>,
2765    mut camera_query: Query<&mut Transform, With<RenderCamera>>,
2766    mut depth_request: ResMut<DepthCaptureRequest>,
2767    mut image_copiers: Query<&mut ImageCopier>,
2768) {
2769    let trace = render_trace_enabled();
2770    let t0 = trace.then(std::time::Instant::now);
2771
2772    let (shared_rgba, shared_depth) = buffers;
2773    let Some(mut batch) = batch else {
2774        if let Some(t0) = t0 {
2775            eprintln!(
2776                "[render_trace][sys] extract_and_continue_headless_batch skipped(no_batch) ms={:.3}",
2777                t0.elapsed().as_secs_f64() * 1000.0
2778            );
2779        }
2780        return;
2781    };
2782
2783    if state.exit_requested || !state.captured || batch.done {
2784        if let Some(t0) = t0 {
2785            eprintln!(
2786                "[render_trace][sys] extract_and_continue_headless_batch skipped(gate) captured={} done={} ms={:.3}",
2787                state.captured,
2788                batch.done,
2789                t0.elapsed().as_secs_f64() * 1000.0
2790            );
2791        }
2792        return;
2793    }
2794
2795    if state.rgba_data.is_some() && state.depth_data.is_some() {
2796        let width = state.image_width;
2797        let height = state.image_height;
2798        let rgba = state.rgba_data.take().expect("checked rgba_data");
2799        let depth = state.depth_data.take().expect("checked depth_data");
2800
2801        let intrinsics = request.config.intrinsics_for_size(width, height);
2802
2803        let output = RenderOutput {
2804            rgba,
2805            depth,
2806            width,
2807            height,
2808            intrinsics,
2809            camera_transform: batch
2810                .current_viewpoint()
2811                .unwrap_or(request.camera_transform),
2812            object_rotation: request.object_rotation.clone(),
2813            object_translation: request.object_translation,
2814            object_scale: request.object_scale,
2815            target_point: Vec3::ZERO,
2816            targeting_policy: TargetingPolicy::Origin,
2817        };
2818        batch.outputs.push(output);
2819
2820        let next_index = batch.current_index + 1;
2821        if next_index >= batch.viewpoints.len() {
2822            batch.done = true;
2823            state.exit_requested = true;
2824            return;
2825        }
2826
2827        batch.current_index = next_index;
2828        batch.warmup_frames_remaining = BATCH_WARMUP_FRAMES;
2829
2830        if let Some(next_viewpoint) = batch.current_viewpoint() {
2831            for mut camera_transform in camera_query.iter_mut() {
2832                *camera_transform = next_viewpoint;
2833            }
2834        }
2835
2836        if let Ok(mut guard) = shared_rgba.0.lock() {
2837            *guard = None;
2838        }
2839        if let Ok(mut guard) = shared_depth.0.lock() {
2840            *guard = None;
2841        }
2842
2843        for mut copier in image_copiers.iter_mut() {
2844            copier.enabled = false;
2845        }
2846
2847        depth_request.requested = false;
2848        state.frame_count = 0;
2849        state.capture_ready = true;
2850        state.screenshot_requested = false;
2851        state.captured = false;
2852        state.rgba_data = None;
2853        state.depth_data = None;
2854        state.image_width = 0;
2855        state.image_height = 0;
2856        // Reset the per-capture settle/retry tracking too, otherwise it
2857        // accumulates across viewpoints and force-accepts an unsettled frame for
2858        // later viewpoints (breaking parity).
2859        state.capture_retries = 0;
2860        state.prev_rgba = None;
2861        state.prev_depth = None;
2862
2863        if let Some(t0) = t0 {
2864            eprintln!(
2865                "[render_trace][sys] extract_and_continue_headless_batch extracted vp={} next={} done={} ms={:.3}",
2866                batch.current_index.saturating_sub(1),
2867                batch.current_index,
2868                batch.done,
2869                t0.elapsed().as_secs_f64() * 1000.0
2870            );
2871        }
2872    } else if let Some(t0) = t0 {
2873        eprintln!(
2874            "[render_trace][sys] extract_and_continue_headless_batch no_data ms={:.3}",
2875            t0.elapsed().as_secs_f64() * 1000.0
2876        );
2877    }
2878}
2879
2880// ============================================================================
2881// Persistent batch session (RenderSession)
2882//
2883// Amortizes wgpu device creation, Bevy app setup, and first-draw pipeline state
2884// object (PSO) compilation across multiple `render()` calls. Profile data (see
2885// issues #54 and #55) showed that on a 60-episode parity-gate, ~2.3s per episode
2886// lives in first-draw DX12 PSO compilation, totalling ~131s of 151s wall-clock.
2887// Keeping the `App` (and thus the `RenderDevice` and its PSO cache) alive across
2888// episodes recovers the bulk of that cost.
2889// ============================================================================
2890
2891/// Marker for the per-group scene entity so we can despawn it cleanly when the
2892/// next `RenderSession::render()` call swaps in a different object or rotation.
2893#[derive(Component)]
2894struct SessionScene;
2895
2896/// Session-persistent setup: render target image, camera (with prepass +
2897/// `ImageCopier`), ambient light, key + fill lights. Everything here lives for
2898/// the full lifetime of the `RenderSession`; per-group work (mesh/texture load,
2899/// scene entity spawn) happens outside Startup in `RenderSession::render()`.
2900fn setup_session_persistent_scene(
2901    mut commands: Commands,
2902    mut images: ResMut<Assets<Image>>,
2903    config: Res<SessionRenderConfig>,
2904) {
2905    let width = config.0.width;
2906    let height = config.0.height;
2907
2908    let size = Extent3d {
2909        width,
2910        height,
2911        depth_or_array_layers: 1,
2912    };
2913
2914    let mut render_target_image = Image::new_fill(
2915        size,
2916        TextureDimension::D2,
2917        &[0, 0, 0, 255],
2918        TextureFormat::Rgba8UnormSrgb,
2919        RenderAssetUsages::default(),
2920    );
2921    render_target_image.texture_descriptor.usage =
2922        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
2923
2924    let render_target_handle = images.add(render_target_image);
2925    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
2926
2927    commands.spawn((
2928        Camera3d::default(),
2929        Camera::default(),
2930        Hdr,
2931        RenderTarget::Image(render_target_handle.clone().into()),
2932        render_projection(&config.0),
2933        Msaa::Off,
2934        Transform::default(),
2935        Tonemapping::None,
2936        DepthPrepass,
2937        NormalPrepass,
2938        RenderCamera,
2939        ImageCopier {
2940            src_image: render_target_handle,
2941            enabled: false,
2942        },
2943    ));
2944
2945    let lighting = &config.0.lighting;
2946    commands.insert_resource(GlobalAmbientLight {
2947        color: Color::WHITE,
2948        brightness: lighting.ambient_brightness,
2949        ..default()
2950    });
2951
2952    if lighting.key_light_intensity > 0.0 {
2953        commands.spawn((
2954            PointLight {
2955                intensity: lighting.key_light_intensity,
2956                shadows_enabled: lighting.shadows_enabled,
2957                ..default()
2958            },
2959            Transform::from_xyz(
2960                lighting.key_light_position[0],
2961                lighting.key_light_position[1],
2962                lighting.key_light_position[2],
2963            ),
2964        ));
2965    }
2966
2967    if lighting.fill_light_intensity > 0.0 {
2968        commands.spawn((
2969            PointLight {
2970                intensity: lighting.fill_light_intensity,
2971                shadows_enabled: lighting.shadows_enabled,
2972                ..default()
2973            },
2974            Transform::from_xyz(
2975                lighting.fill_light_position[0],
2976                lighting.fill_light_position[1],
2977                lighting.fill_light_position[2],
2978            ),
2979        ));
2980    }
2981}
2982
2983/// Resource carrying the `RenderConfig` that was fixed at session construction.
2984/// Used by `setup_session_persistent_scene` to size the render target.
2985#[derive(Resource)]
2986struct SessionRenderConfig(RenderConfig);
2987
2988/// Persistent batch render session. Keeps a Bevy `App` (and its `RenderDevice`
2989/// plus PSO cache) alive across multiple `render()` calls, amortizing per-episode
2990/// cold-init cost.
2991///
2992/// # Thread affinity
2993///
2994/// `RenderSession` must be created, used, and dropped on the same thread. It
2995/// holds a `bevy::App` which owns GPU resources that are not safe to move
2996/// across threads. The `!Send + !Sync` marker is enforced via
2997/// `PhantomData<*const ()>`.
2998///
2999/// # Config invariant
3000///
3001/// The `RenderConfig` (resolution, lighting, near/far, fov) is fixed at
3002/// `new()`. All `render()` calls must use requests whose `render_config`
3003/// matches; heterogeneous configs are rejected.
3004///
3005/// # Phase 1 limitation
3006///
3007/// Each `render()` call must contain homogeneous requests (same `object_dir`
3008/// and `object_rotation`). Heterogeneous calls return
3009/// `BatchRenderError::InvalidConfig`. Hold a single `RenderSession` and call
3010/// `render()` once per episode to amortize setup across episodes.
3011pub struct RenderSession {
3012    app: App,
3013    render_config: RenderConfig,
3014    shared_rgba: SharedRgbaBuffer,
3015    shared_depth: SharedDepthBuffer,
3016    _not_send_sync: std::marker::PhantomData<*const ()>,
3017}
3018
3019impl RenderSession {
3020    /// Build the App, run plugin `finish()`/`cleanup()`, and perform one warmup
3021    /// `update()` so Startup systems run and the wgpu device + adapter are
3022    /// initialized. The first `render()` call still pays PSO compilation for
3023    /// the specific mesh/material combination; subsequent calls reuse the cache.
3024    pub fn new(render_config: &crate::RenderConfig) -> Result<Self, crate::RenderError> {
3025        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3026        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3027
3028        let mut app = App::new();
3029        app.add_plugins(
3030            DefaultPlugins
3031                .set(bevy::asset::AssetPlugin {
3032                    // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
3033                    // default (UnapprovedPathMode::Forbid → load() silently returns a
3034                    // default handle). YCB meshes load from absolute paths, so allow them.
3035                    unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
3036                    ..default()
3037                })
3038                .set(WindowPlugin {
3039                    primary_window: None,
3040                    exit_condition: ExitCondition::DontExit,
3041                    ..default()
3042                })
3043                .disable::<bevy::winit::WinitPlugin>()
3044                .disable::<LogPlugin>()
3045                .disable::<TerminalCtrlCHandlerPlugin>(),
3046        )
3047        .add_plugins(ObjPlugin)
3048        // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
3049        // Scene spawning panics unless those component types are registered. The
3050        // minimal headless plugin set doesn't register them, so do it explicitly.
3051        .register_type::<Mesh3d>()
3052        .register_type::<MeshMaterial3d<StandardMaterial>>()
3053        .register_type::<bevy::prelude::Transform>()
3054        .register_type::<bevy::prelude::GlobalTransform>()
3055        .register_type::<bevy::transform::components::TransformTreeChanged>()
3056        .register_type::<bevy::prelude::Visibility>()
3057        .register_type::<bevy::prelude::InheritedVisibility>()
3058        .register_type::<bevy::prelude::ViewVisibility>()
3059        .add_plugins(ImageCopyPlugin {
3060            shared_rgba: shared_rgba.clone(),
3061        })
3062        .add_plugins(DepthReadbackPlugin {
3063            shared_depth: shared_depth.clone(),
3064            near: render_config.near_plane,
3065            far: render_config.far_plane,
3066        })
3067        .insert_resource(SessionRenderConfig(render_config.clone()))
3068        .insert_resource(shared_rgba.clone())
3069        .init_resource::<RenderState>()
3070        .add_systems(Startup, setup_session_persistent_scene)
3071        .add_systems(
3072            Update,
3073            (
3074                check_assets_loaded,
3075                apply_materials,
3076                tick_headless_batch_warmup,
3077                request_headless_capture,
3078                check_headless_capture_ready,
3079                extract_and_continue_headless_batch,
3080            )
3081                .chain()
3082                // Gate the capture chain on `RenderRequest` existing. `new()`
3083                // runs a warmup `app.update()` to execute Startup (which spawns
3084                // the camera/lights/render target) before the first `render()`
3085                // call, but does not yet insert `RenderRequest`. Several systems
3086                // in this chain take `Res<RenderRequest>` (not `Option`) and
3087                // would panic on SystemState init if the resource were absent.
3088                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
3089        );
3090
3091        app.finish();
3092        app.cleanup();
3093
3094        // One warmup update runs Startup systems (render target, camera, lights)
3095        // so they exist before the first `render()` call seeds the camera
3096        // transform. The Update chain is gated by `RenderRequest` existence and
3097        // is a no-op this tick. PSO compilation for specific mesh/material
3098        // combinations still happens lazily on the first real render.
3099        app.update();
3100
3101        Ok(Self {
3102            app,
3103            render_config: render_config.clone(),
3104            shared_rgba,
3105            shared_depth,
3106            _not_send_sync: std::marker::PhantomData,
3107        })
3108    }
3109
3110    /// Render a homogeneous batch of viewpoints (same object + rotation + config).
3111    /// Returns outputs in request order.
3112    ///
3113    /// On `BatchRenderError::DeviceLost`, the returned error signals that the
3114    /// wgpu device was lost mid-render. This call produced no output; any
3115    /// outputs from earlier `render()` calls on this session are still valid.
3116    /// Recovery: drop this `RenderSession` and construct a new one.
3117    pub fn render(
3118        &mut self,
3119        requests: &[crate::BatchRenderRequest],
3120    ) -> Result<Vec<crate::BatchRenderOutput>, crate::BatchRenderError> {
3121        use crate::{BatchRenderError, BatchRenderOutput};
3122
3123        if requests.is_empty() {
3124            return Ok(Vec::new());
3125        }
3126
3127        // Enforce homogeneity and config invariance.
3128        let first = &requests[0];
3129        if first.render_config != self.render_config {
3130            return Err(BatchRenderError::InvalidConfig(
3131                "RenderSession render_config mismatch: session was constructed with a different \
3132                 RenderConfig than the first request carries. Session config cannot change after \
3133                 `new()`; construct a new session if you need a different resolution/camera."
3134                    .to_string(),
3135            ));
3136        }
3137        for r in &requests[1..] {
3138            if r.object_dir != first.object_dir
3139                || r.object_rotation != first.object_rotation
3140                || r.object_translation != first.object_translation
3141                || r.object_scale != first.object_scale
3142                || r.render_config != first.render_config
3143            {
3144                return Err(BatchRenderError::InvalidConfig(
3145                    "Phase 1 RenderSession::render requires homogeneous requests \
3146                     (same object_dir, object transform, and render_config across the batch). \
3147                     Call render() once per group instead."
3148                        .to_string(),
3149                ));
3150            }
3151        }
3152
3153        // Canonicalize paths and validate mesh/texture presence. This matches
3154        // `render_headless_sequence`'s preconditions so the error surface stays
3155        // consistent.
3156        let object_dir = std::fs::canonicalize(&first.object_dir).map_err(|e| {
3157            BatchRenderError::InvalidConfig(format!(
3158                "Cannot canonicalize object directory {}: {}",
3159                first.object_dir.display(),
3160                e
3161            ))
3162        })?;
3163        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3164        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3165        if !mesh_path.exists() {
3166            return Err(BatchRenderError::InvalidConfig(format!(
3167                "Mesh not found: {}",
3168                mesh_path.display()
3169            )));
3170        }
3171        if !texture_path.exists() {
3172            return Err(BatchRenderError::InvalidConfig(format!(
3173                "Texture not found: {}",
3174                texture_path.display()
3175            )));
3176        }
3177
3178        let viewpoints: Vec<Transform> = requests.iter().map(|r| r.viewpoint).collect();
3179
3180        // --- per-group scene swap (direct world manipulation) ---
3181        {
3182            let world = self.app.world_mut();
3183
3184            // Despawn any SessionScene entity from the previous group.
3185            let stale: Vec<Entity> = world
3186                .query_filtered::<Entity, With<SessionScene>>()
3187                .iter(world)
3188                .collect();
3189            for entity in stale {
3190                world.entity_mut(entity).despawn();
3191            }
3192
3193            // Clear shared RGBA/depth buffers so a stale payload can't leak
3194            // into the first viewpoint of this call.
3195            if let Ok(mut guard) = self.shared_rgba.0.lock() {
3196                *guard = None;
3197            }
3198            if let Ok(mut guard) = self.shared_depth.0.lock() {
3199                *guard = None;
3200            }
3201
3202            // Reset RenderState (scene_loaded, texture_loaded, capture_ready,
3203            // frame_count, materials_applied, etc.). Default() gives all false/0.
3204            *world.resource_mut::<RenderState>() = RenderState::default();
3205
3206            // Update RenderRequest so the existing capture systems see the new
3207            // object paths, rotation, and camera transform (seeded from first vp).
3208            let new_request = RenderRequest {
3209                mesh_path: fs_path_to_asset_string(&mesh_path),
3210                texture_path: fs_path_to_asset_string(&texture_path),
3211                camera_transform: viewpoints[0],
3212                object_rotation: first.object_rotation.clone(),
3213                object_translation: first.object_translation,
3214                object_scale: first.object_scale,
3215                config: self.render_config.clone(),
3216            };
3217            world.insert_resource(new_request);
3218
3219            // Kick off asset loads and install the handles under the names the
3220            // existing `check_assets_loaded` system expects.
3221            let asset_server = world.resource::<AssetServer>().clone();
3222            let scene_handle: Handle<Scene> =
3223                asset_server.load(fs_path_to_asset_string(&mesh_path));
3224            let texture_handle: Handle<Image> =
3225                asset_server.load(fs_path_to_asset_string(&texture_path));
3226            world.insert_resource(LoadedScene(scene_handle.clone()));
3227            world.insert_resource(LoadedTexture(texture_handle));
3228
3229            // Spawn the new scene entity tagged so we can find + despawn it next
3230            // render() call.
3231            world.spawn((
3232                SceneRoot(scene_handle),
3233                first.object_rotation.to_transform_with_translation_scale(
3234                    first.object_translation,
3235                    first.object_scale,
3236                ),
3237                RenderedObject,
3238                SessionScene,
3239            ));
3240
3241            // Seed the camera transform to the first viewpoint now so the first
3242            // capture lines up; subsequent viewpoints are advanced by
3243            // `extract_and_continue_headless_batch`.
3244            let camera_entity = world
3245                .query_filtered::<Entity, With<RenderCamera>>()
3246                .iter(world)
3247                .next();
3248            if let Some(cam) = camera_entity {
3249                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
3250                    *transform = viewpoints[0];
3251                }
3252            }
3253
3254            // Install the viewpoint sequence for this render() call. The robust
3255            // settled-frame capture (reject blank/partial readbacks, retry until
3256            // two consecutive readbacks match) absorbs the despawn/respawn
3257            // render-world settle, so a separate discarded warmup pass is not
3258            // needed and the per-object cost stays low.
3259            world.insert_resource(HeadlessBatchSequence::new(viewpoints.clone()));
3260        }
3261
3262        // --- drive the real capture loop ---
3263        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3264        let start = std::time::Instant::now();
3265        loop {
3266            if start.elapsed() > timeout {
3267                return Err(BatchRenderError::TotalFailure(format!(
3268                    "RenderSession::render timed out after {}s",
3269                    RENDER_TIMEOUT_SECS
3270                )));
3271            }
3272
3273            self.app.update();
3274
3275            if self.app.world().resource::<HeadlessBatchSequence>().done {
3276                break;
3277            }
3278        }
3279
3280        // Collect outputs and zip with requests to produce BatchRenderOutput in
3281        // request order.
3282        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
3283        if sequence.outputs.len() != requests.len() {
3284            return Err(BatchRenderError::TotalFailure(format!(
3285                "RenderSession produced {} outputs for {} requests",
3286                sequence.outputs.len(),
3287                requests.len()
3288            )));
3289        }
3290        let outputs = std::mem::take(&mut sequence.outputs);
3291
3292        Ok(requests
3293            .iter()
3294            .cloned()
3295            .zip(outputs)
3296            .map(|(req, out)| BatchRenderOutput::from_render_output(req, out))
3297            .collect())
3298    }
3299}
3300
3301// ============================================================================
3302// Per-step persistent renderer (PersistentRenderer)
3303//
3304// `RenderSession` reuses the App across calls but rebuilds the scene on every
3305// `render()` (despawn SceneRoot, re-issue asset_server.load, respawn). That's
3306// fine for the parity-gate path (one scene per episode of N viewpoints) but
3307// wasteful for surface-policy feedback loops where N=1 viewpoint per call and
3308// the object stays loaded for the whole episode.
3309//
3310// `PersistentRenderer` commits to one `object_dir` + `RenderConfig` at
3311// construction. `new()` loads mesh + texture + spawns the scene root + drives
3312// one warmup render (output discarded) so PSO compilation and material setup
3313// are paid up front. `render(camera, rotation)` then only mutates the camera
3314// `Transform` and (if changed) the scene root rotation, drives the capture
3315// chain for one frame, and returns. See issue #65.
3316// ============================================================================
3317
3318/// Marker for the `PersistentRenderer`'s scene root entity. We keep the
3319/// entity alive for the whole renderer lifetime and just mutate its
3320/// `Transform` when the caller-supplied object rotation changes.
3321#[derive(Component)]
3322struct PersistentScene;
3323
3324/// Persistent per-step renderer. Loads the scene once at `new()` and renders
3325/// one frame per `render()` call by mutating the camera transform and scene
3326/// root rotation in-place. Built for surface-policy feedback loops where the
3327/// object stays fixed for the duration of an episode and the camera moves
3328/// every step. See issue #65.
3329///
3330/// # Thread affinity
3331///
3332/// `PersistentRenderer` must be created, used, and dropped on the same thread.
3333/// Holds a `bevy::App` that owns GPU resources not safe to move across
3334/// threads; `!Send + !Sync` is enforced via `PhantomData<*const ()>`.
3335///
3336/// # Object + config invariants
3337///
3338/// `object_dir` and `RenderConfig` are fixed at `new()`. To render a different
3339/// object or change resolution/lighting, drop and rebuild. Rotation may change
3340/// freely between `render()` calls.
3341pub struct PersistentRenderer {
3342    app: App,
3343    object_dir: PathBuf,
3344    render_config: RenderConfig,
3345    shared_rgba: SharedRgbaBuffer,
3346    shared_depth: SharedDepthBuffer,
3347    _not_send_sync: std::marker::PhantomData<*const ()>,
3348}
3349
3350impl PersistentRenderer {
3351    /// Build the App, load the scene + texture, spawn the scene root, and drive
3352    /// one warmup render whose output is discarded. After `new()` returns, the
3353    /// first user-facing `render()` call benefits from a warm PSO cache and
3354    /// applied materials.
3355    pub fn new(
3356        object_dir: &Path,
3357        render_config: &RenderConfig,
3358    ) -> Result<Self, crate::RenderError> {
3359        let object_dir =
3360            std::fs::canonicalize(object_dir).map_err(|e| crate::RenderError::FileNotFound {
3361                path: object_dir.display().to_string(),
3362                reason: e.to_string(),
3363            })?;
3364        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3365        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3366        if !mesh_path.exists() {
3367            return Err(crate::RenderError::MeshNotFound(fs_path_to_asset_string(
3368                &mesh_path,
3369            )));
3370        }
3371        if !texture_path.exists() {
3372            return Err(crate::RenderError::TextureNotFound(
3373                fs_path_to_asset_string(&texture_path),
3374            ));
3375        }
3376
3377        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3378        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3379
3380        let mut app = App::new();
3381        app.add_plugins(
3382            DefaultPlugins
3383                .set(bevy::asset::AssetPlugin {
3384                    // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
3385                    // default (UnapprovedPathMode::Forbid → load() silently returns a
3386                    // default handle). YCB meshes load from absolute paths, so allow them.
3387                    unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
3388                    ..default()
3389                })
3390                .set(WindowPlugin {
3391                    primary_window: None,
3392                    exit_condition: ExitCondition::DontExit,
3393                    ..default()
3394                })
3395                .disable::<bevy::winit::WinitPlugin>()
3396                .disable::<LogPlugin>()
3397                .disable::<TerminalCtrlCHandlerPlugin>(),
3398        )
3399        .add_plugins(ObjPlugin)
3400        // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
3401        // Scene spawning panics unless those component types are registered. The
3402        // minimal headless plugin set doesn't register them, so do it explicitly.
3403        .register_type::<Mesh3d>()
3404        .register_type::<MeshMaterial3d<StandardMaterial>>()
3405        .register_type::<bevy::prelude::Transform>()
3406        .register_type::<bevy::prelude::GlobalTransform>()
3407        .register_type::<bevy::transform::components::TransformTreeChanged>()
3408        .register_type::<bevy::prelude::Visibility>()
3409        .register_type::<bevy::prelude::InheritedVisibility>()
3410        .register_type::<bevy::prelude::ViewVisibility>()
3411        .add_plugins(ImageCopyPlugin {
3412            shared_rgba: shared_rgba.clone(),
3413        })
3414        .add_plugins(DepthReadbackPlugin {
3415            shared_depth: shared_depth.clone(),
3416            near: render_config.near_plane,
3417            far: render_config.far_plane,
3418        })
3419        .insert_resource(SessionRenderConfig(render_config.clone()))
3420        .insert_resource(shared_rgba.clone())
3421        .init_resource::<RenderState>()
3422        .add_systems(Startup, setup_session_persistent_scene)
3423        .add_systems(
3424            Update,
3425            (
3426                check_assets_loaded,
3427                apply_materials,
3428                tick_headless_batch_warmup,
3429                request_headless_capture,
3430                check_headless_capture_ready,
3431                extract_and_continue_headless_batch,
3432            )
3433                .chain()
3434                // Same gate as RenderSession: capture chain only runs once
3435                // RenderRequest is installed. Startup runs first via the
3436                // warmup `app.update()` below.
3437                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
3438        );
3439
3440        app.finish();
3441        app.cleanup();
3442        // Warmup tick #1: Startup runs (camera, lights, render target spawn).
3443        app.update();
3444
3445        // Install scene + warmup render request. The warmup output is discarded
3446        // — its purpose is to pay PSO compilation and material application
3447        // upfront so the first user-facing render() is fast. Use a real TBP
3448        // viewpoint rather than Transform::default(), which places the camera
3449        // at the object origin and forces a flat-depth fallback before any
3450        // caller-requested surface-policy render runs.
3451        let warmup_camera = persistent_warmup_camera_transform();
3452        let initial_request = RenderRequest {
3453            mesh_path: fs_path_to_asset_string(&mesh_path),
3454            texture_path: fs_path_to_asset_string(&texture_path),
3455            camera_transform: warmup_camera,
3456            object_rotation: ObjectRotation::identity(),
3457            object_translation: Vec3::ZERO,
3458            object_scale: Vec3::ONE,
3459            config: render_config.clone(),
3460        };
3461
3462        {
3463            let world = app.world_mut();
3464            let asset_server = world.resource::<AssetServer>().clone();
3465            let scene_handle: Handle<Scene> =
3466                asset_server.load(fs_path_to_asset_string(&mesh_path));
3467            let texture_handle: Handle<Image> =
3468                asset_server.load(fs_path_to_asset_string(&texture_path));
3469            world.insert_resource(LoadedScene(scene_handle.clone()));
3470            world.insert_resource(LoadedTexture(texture_handle));
3471            world.insert_resource(initial_request);
3472            world.spawn((
3473                SceneRoot(scene_handle),
3474                ObjectRotation::identity()
3475                    .to_transform_with_translation_scale(Vec3::ZERO, Vec3::ONE),
3476                RenderedObject,
3477                PersistentScene,
3478            ));
3479            if let Some(cam) = world
3480                .query_filtered::<Entity, With<RenderCamera>>()
3481                .iter(world)
3482                .next()
3483            {
3484                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
3485                    *transform = warmup_camera;
3486                }
3487            }
3488            world.insert_resource(HeadlessBatchSequence::new(vec![warmup_camera]));
3489        }
3490
3491        // Drive the warmup render to completion.
3492        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3493        let start = std::time::Instant::now();
3494        loop {
3495            if start.elapsed() > timeout {
3496                return Err(crate::RenderError::RenderFailed(format!(
3497                    "PersistentRenderer::new warmup render timed out after {RENDER_TIMEOUT_SECS}s"
3498                )));
3499            }
3500            app.update();
3501            if app.world().resource::<HeadlessBatchSequence>().done {
3502                break;
3503            }
3504        }
3505        // Discard the warmup output so it doesn't leak into the first real
3506        // render() call's output buffer.
3507        app.world_mut()
3508            .resource_mut::<HeadlessBatchSequence>()
3509            .outputs
3510            .clear();
3511
3512        Ok(Self {
3513            app,
3514            object_dir,
3515            render_config: render_config.clone(),
3516            shared_rgba,
3517            shared_depth,
3518            _not_send_sync: std::marker::PhantomData,
3519        })
3520    }
3521
3522    /// Render one frame from the given camera transform and object rotation.
3523    /// Reuses the loaded scene + warm PSO cache from `new()`.
3524    pub fn render(
3525        &mut self,
3526        camera_transform: &Transform,
3527        object_rotation: &ObjectRotation,
3528    ) -> Result<RenderOutput, crate::RenderError> {
3529        self.render_with_object_transform(camera_transform, object_rotation, Vec3::ZERO, Vec3::ONE)
3530    }
3531
3532    /// Render one frame with explicit object translation and scale.
3533    pub fn render_with_object_transform(
3534        &mut self,
3535        camera_transform: &Transform,
3536        object_rotation: &ObjectRotation,
3537        object_translation: Vec3,
3538        object_scale: Vec3,
3539    ) -> Result<RenderOutput, crate::RenderError> {
3540        let camera_transform = *camera_transform;
3541        let object_rotation_owned = object_rotation.clone();
3542
3543        {
3544            let world = self.app.world_mut();
3545
3546            // Update the persistent scene root rotation. Always-write avoids
3547            // the cost of an extra ObjectRotation comparison per call; the
3548            // mutation itself is a single Transform write.
3549            let scene_entity = world
3550                .query_filtered::<Entity, With<PersistentScene>>()
3551                .iter(world)
3552                .next();
3553            if let Some(entity) = scene_entity {
3554                if let Some(mut transform) = world.entity_mut(entity).get_mut::<Transform>() {
3555                    *transform = object_rotation_owned
3556                        .to_transform_with_translation_scale(object_translation, object_scale);
3557                }
3558            }
3559
3560            // Update the camera transform.
3561            let cam_entity = world
3562                .query_filtered::<Entity, With<RenderCamera>>()
3563                .iter(world)
3564                .next();
3565            if let Some(cam) = cam_entity {
3566                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
3567                    *transform = camera_transform;
3568                }
3569            }
3570
3571            // Reset per-frame state, preserving scene_loaded / texture_loaded
3572            // / materials_applied / materials_applied_frame. The asset-load
3573            // and material-apply work was paid in `new()`'s warmup; we only
3574            // need to clear the per-capture state.
3575            //
3576            // `capture_ready = true` short-circuits `apply_materials` on
3577            // every tick of the render loop (no need to re-check material
3578            // application — it stays applied for the renderer's lifetime).
3579            // It does NOT short-circuit `request_headless_capture`, which
3580            // is gated by `HeadlessBatchSequence::warmup_frames_remaining`
3581            // below. Bug fix from PR #66 review (off-by-one / blank-step-0):
3582            // without that warmup gate, request_headless_capture fires same-
3583            // tick as the transform writes, capturing the previous render's
3584            // target before the new transforms have propagated.
3585            {
3586                let mut state = world.resource_mut::<RenderState>();
3587                state.exit_requested = false;
3588                state.screenshot_requested = false;
3589                state.captured = false;
3590                state.rgba_data = None;
3591                state.depth_data = None;
3592                state.frame_count = 0;
3593                state.image_width = 0;
3594                state.image_height = 0;
3595                state.capture_ready = true;
3596                state.capture_retries = 0;
3597                state.prev_rgba = None;
3598                state.prev_depth = None;
3599            }
3600
3601            // Clear shared GPU readback buffers so a stale payload from the
3602            // previous render() can't leak into this call's output.
3603            if let Ok(mut guard) = self.shared_rgba.0.lock() {
3604                *guard = None;
3605            }
3606            if let Ok(mut guard) = self.shared_depth.0.lock() {
3607                *guard = None;
3608            }
3609
3610            // Update RenderRequest (used by extract_and_continue_headless_batch
3611            // to stamp the output with the right intrinsics + rotation).
3612            {
3613                let mut req = world.resource_mut::<RenderRequest>();
3614                req.camera_transform = camera_transform;
3615                req.object_rotation = object_rotation_owned.clone();
3616                req.object_translation = object_translation;
3617                req.object_scale = object_scale;
3618            }
3619
3620            // Install fresh single-element batch with warmup frames so
3621            // `request_headless_capture` is gated until the new transforms
3622            // have propagated through the render pipeline.
3623            let mut batch = HeadlessBatchSequence::new(vec![camera_transform]);
3624            batch.warmup_frames_remaining = PERSISTENT_WARMUP_FRAMES;
3625            world.insert_resource(batch);
3626        }
3627
3628        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3629        let start = std::time::Instant::now();
3630        loop {
3631            if start.elapsed() > timeout {
3632                return Err(crate::RenderError::RenderFailed(format!(
3633                    "PersistentRenderer::render timed out after {RENDER_TIMEOUT_SECS}s"
3634                )));
3635            }
3636            self.app.update();
3637            if self.app.world().resource::<HeadlessBatchSequence>().done {
3638                break;
3639            }
3640        }
3641
3642        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
3643        let mut outputs = std::mem::take(&mut sequence.outputs);
3644        if outputs.len() != 1 {
3645            return Err(crate::RenderError::RenderFailed(format!(
3646                "PersistentRenderer::render expected 1 output, got {}",
3647                outputs.len()
3648            )));
3649        }
3650
3651        Ok(outputs.remove(0))
3652    }
3653
3654    /// Path to the YCB object directory this renderer was bound to.
3655    pub fn object_dir(&self) -> &Path {
3656        &self.object_dir
3657    }
3658
3659    /// The `RenderConfig` this renderer was constructed with.
3660    pub fn render_config(&self) -> &RenderConfig {
3661        &self.render_config
3662    }
3663
3664    /// Explicit close. Equivalent to dropping; provided to match the API
3665    /// proposal in #65 for callers that want lifetime-explicit teardown.
3666    pub fn close(self) {
3667        // Drop runs on return.
3668    }
3669}
3670
3671/// Render directly to files (for subprocess mode).
3672///
3673/// This function saves RGBA and depth data directly to files before exiting.
3674/// Designed for subprocess rendering where the process will exit after rendering.
3675#[allow(clippy::too_many_arguments)]
3676pub fn render_to_files(
3677    object_dir: &Path,
3678    camera_transform: &Transform,
3679    object_rotation: &ObjectRotation,
3680    object_translation: Vec3,
3681    object_scale: Vec3,
3682    config: &RenderConfig,
3683    rgba_path: &Path,
3684    depth_path: &Path,
3685) -> Result<(), RenderError> {
3686    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3687    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3688
3689    if !mesh_path.exists() {
3690        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
3691            &mesh_path,
3692        )));
3693    }
3694    if !texture_path.exists() {
3695        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
3696            &texture_path,
3697        )));
3698    }
3699
3700    let request = RenderRequest {
3701        mesh_path: fs_path_to_asset_string(&mesh_path),
3702        texture_path: fs_path_to_asset_string(&texture_path),
3703        camera_transform: *camera_transform,
3704        object_rotation: object_rotation.clone(),
3705        object_translation,
3706        object_scale,
3707        config: config.clone(),
3708    };
3709
3710    // Shared state for output
3711    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
3712    let output_poll = shared_output.clone();
3713
3714    // Clone paths for watchdog thread
3715    let rgba_path = rgba_path.to_path_buf();
3716    let depth_path = depth_path.to_path_buf();
3717
3718    // Shared buffer for RGBA data from headless render target
3719    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3720
3721    // Shared buffer for depth readback
3722    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3723
3724    // Spawn watchdog thread that saves files and exits
3725    std::thread::spawn(move || {
3726        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3727        let start = std::time::Instant::now();
3728        let poll_interval = std::time::Duration::from_millis(100);
3729
3730        loop {
3731            if let Ok(guard) = output_poll.0.lock() {
3732                if let Some(output) = guard.as_ref() {
3733                    // Save RGBA as PNG
3734                    if let Err(e) =
3735                        save_rgba_to_png(&output.rgba, output.width, output.height, &rgba_path)
3736                    {
3737                        eprintln!("Failed to save RGBA: {:?}", e);
3738                        std::process::exit(1);
3739                    }
3740
3741                    // Save depth as binary f32
3742                    if let Err(e) = save_depth_to_binary(&output.depth, &depth_path) {
3743                        eprintln!("Failed to save depth: {:?}", e);
3744                        std::process::exit(1);
3745                    }
3746
3747                    std::process::exit(0);
3748                }
3749            }
3750
3751            if start.elapsed() > timeout {
3752                eprintln!(
3753                    "Error: Render timeout after {} seconds",
3754                    RENDER_TIMEOUT_SECS
3755                );
3756                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
3757                std::process::exit(1);
3758            }
3759
3760            std::thread::sleep(poll_interval);
3761        }
3762    });
3763
3764    // Configure rendering backend for this environment.
3765    // Use OnceLock so env vars are only set once per process — repeated calls
3766    // (e.g. sequential render_to_buffer calls in a parity loop) no longer trigger
3767    // redundant wgpu backend env writes. Full GPU adapter reuse across App instances
3768    // requires a persistent renderer (tracked in issue #14).
3769    static BACKEND_INIT: OnceLock<()> = OnceLock::new();
3770    BACKEND_INIT.get_or_init(|| {
3771        let backend_config = BackendConfig::headless();
3772        backend_config.apply_env();
3773    });
3774
3775    // Run Bevy app with HEADLESS configuration
3776    build_headless_app(request, shared_output, shared_rgba, shared_depth).run();
3777
3778    // Unreachable - watchdog thread exits the process
3779    Err(RenderError::RenderFailed(
3780        "Render did not complete".to_string(),
3781    ))
3782}
3783
3784/// Save RGBA data to PNG file
3785fn save_rgba_to_png(rgba: &[u8], width: u32, height: u32, path: &Path) -> Result<(), String> {
3786    use image::{ImageBuffer, Rgba};
3787
3788    // Create parent directories if needed
3789    if let Some(parent) = path.parent() {
3790        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3791    }
3792
3793    let img: ImageBuffer<Rgba<u8>, Vec<u8>> =
3794        ImageBuffer::from_raw(width, height, rgba.to_vec())
3795            .ok_or_else(|| "Failed to create image buffer".to_string())?;
3796
3797    img.save(path).map_err(|e| e.to_string())
3798}
3799
3800/// Save depth data to binary file (f64 for TBP precision)
3801fn save_depth_to_binary(depth: &[f64], path: &Path) -> Result<(), String> {
3802    // Create parent directories if needed
3803    if let Some(parent) = path.parent() {
3804        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3805    }
3806
3807    let bytes: Vec<u8> = depth.iter().flat_map(|f| f.to_le_bytes()).collect();
3808    std::fs::write(path, &bytes).map_err(|e| e.to_string())
3809}
3810
3811#[cfg(test)]
3812mod depth_readback_summary_tests {
3813    use super::{
3814        is_all_background_depth, is_capture_foreground_depth, is_uniform_rgba_frame,
3815        DepthReadbackSummary,
3816    };
3817
3818    #[test]
3819    fn capture_foreground_depth_matches_persistent_capture_gate() {
3820        let near = 0.01;
3821        let far = 10.0;
3822
3823        assert!(!is_capture_foreground_depth(near, near, far));
3824        assert!(!is_capture_foreground_depth(0.010005, near, far));
3825        assert!(is_capture_foreground_depth(0.01002, near, far));
3826        assert!(is_capture_foreground_depth(0.025, near, far));
3827        assert!(is_capture_foreground_depth(9.98, near, far));
3828        assert!(!is_capture_foreground_depth(9.99, near, far));
3829        assert!(!is_capture_foreground_depth(f64::NAN, near, far));
3830    }
3831
3832    #[test]
3833    fn depth_readback_summary_classifies_all_far_frames() {
3834        let summary = DepthReadbackSummary::from_depth(&[10.0, 10.0, 9.99], 0.01, 10.0);
3835
3836        assert_eq!(summary.samples, 3);
3837        assert_eq!(summary.finite, 3);
3838        assert_eq!(summary.invalid, 0);
3839        assert_eq!(summary.foreground, 0);
3840        assert_eq!(summary.near_or_zero, 0);
3841        assert_eq!(summary.far_or_background, 3);
3842        assert_eq!(summary.min, Some(9.99));
3843        assert_eq!(summary.max, Some(10.0));
3844        assert_eq!(summary.foreground_min, None);
3845        assert_eq!(summary.foreground_max, None);
3846    }
3847
3848    #[test]
3849    fn depth_readback_summary_keeps_foreground_range_when_depth_exists() {
3850        let summary =
3851            DepthReadbackSummary::from_depth(&[0.0, 0.010005, 0.025, 1.5, 10.0], 0.01, 10.0);
3852
3853        assert_eq!(summary.samples, 5);
3854        assert_eq!(summary.finite, 5);
3855        assert_eq!(summary.foreground, 2);
3856        assert_eq!(summary.near_or_zero, 2);
3857        assert_eq!(summary.far_or_background, 1);
3858        assert_eq!(summary.min, Some(0.0));
3859        assert_eq!(summary.max, Some(10.0));
3860        assert_eq!(summary.foreground_min, Some(0.025));
3861        assert_eq!(summary.foreground_max, Some(1.5));
3862    }
3863
3864    #[test]
3865    fn depth_readback_summary_counts_invalid_samples() {
3866        let summary = DepthReadbackSummary::from_depth(&[f64::NAN, f64::INFINITY, 0.2], 0.01, 10.0);
3867
3868        assert_eq!(summary.samples, 3);
3869        assert_eq!(summary.finite, 1);
3870        assert_eq!(summary.invalid, 2);
3871        assert_eq!(summary.foreground, 1);
3872        assert_eq!(summary.min, Some(0.2));
3873        assert_eq!(summary.max, Some(0.2));
3874    }
3875
3876    #[test]
3877    fn all_background_depth_accepts_far_plane_only() {
3878        assert!(is_all_background_depth(&[10.0, 9.99], 10.0));
3879        assert!(!is_all_background_depth(&[10.0, 9.98], 10.0));
3880        assert!(!is_all_background_depth(&[10.0, 0.010005], 10.0));
3881        assert!(!is_all_background_depth(&[10.0, f64::NAN], 10.0));
3882        assert!(!is_all_background_depth(&[], 10.0));
3883    }
3884
3885    #[test]
3886    fn uniform_rgba_frame_detects_blank_readbacks() {
3887        assert!(is_uniform_rgba_frame(&[1, 2, 3, 255, 1, 2, 3, 255]));
3888        assert!(!is_uniform_rgba_frame(&[1, 2, 3, 255, 4, 2, 3, 255]));
3889        assert!(!is_uniform_rgba_frame(&[]));
3890    }
3891}
3892
3893#[cfg(test)]
3894mod smoke_tests {
3895    use super::{
3896        headless_scene_setup_count, persistent_warmup_camera_transform,
3897        reset_headless_scene_setup_count,
3898    };
3899    use crate::{
3900        BatchRenderConfig, BatchRenderRequest, ObjectRotation, RenderConfig, TargetingPolicy, Vec3,
3901        ViewpointConfig,
3902    };
3903    use image::{ImageBuffer, Rgba};
3904    use tempfile::TempDir;
3905
3906    fn write_synthetic_object() -> TempDir {
3907        let temp_dir = TempDir::new().expect("create temp dir for synthetic object");
3908        let object_dir = temp_dir.path().join("synthetic_cube").join("google_16k");
3909        std::fs::create_dir_all(&object_dir).expect("create synthetic google_16k dir");
3910
3911        // A small centered cube stays visible from all default TBP viewpoints and does not
3912        // need any YCB downloads.
3913        let obj = r#"o SyntheticCube
3914v -0.10 -0.10  0.10
3915v  0.10 -0.10  0.10
3916v  0.10  0.10  0.10
3917v -0.10  0.10  0.10
3918v -0.10 -0.10 -0.10
3919v  0.10 -0.10 -0.10
3920v  0.10  0.10 -0.10
3921v -0.10  0.10 -0.10
3922vt 0.0 0.0
3923vt 1.0 0.0
3924vt 1.0 1.0
3925vt 0.0 1.0
3926f 1/1 2/2 3/3
3927f 1/1 3/3 4/4
3928f 6/1 5/2 8/3
3929f 6/1 8/3 7/4
3930f 2/1 6/2 7/3
3931f 2/1 7/3 3/4
3932f 5/1 1/2 4/3
3933f 5/1 4/3 8/4
3934f 4/1 3/2 7/3
3935f 4/1 7/3 8/4
3936f 5/1 6/2 2/3
3937f 5/1 2/3 1/4
3938"#;
3939        std::fs::write(object_dir.join("textured.obj"), obj).expect("write synthetic obj");
3940
3941        let texture = ImageBuffer::from_fn(2, 2, |x, y| match (x, y) {
3942            (0, 0) => Rgba([255u8, 48, 48, 255]),
3943            (1, 0) => Rgba([48u8, 255, 48, 255]),
3944            (0, 1) => Rgba([48u8, 48, 255, 255]),
3945            _ => Rgba([255u8, 255, 64, 255]),
3946        });
3947        texture
3948            .save(object_dir.join("texture_map.png"))
3949            .expect("write synthetic texture");
3950
3951        temp_dir
3952    }
3953
3954    #[test]
3955    fn persistent_warmup_camera_is_a_real_viewpoint() {
3956        let transform = persistent_warmup_camera_transform();
3957        assert!(
3958            transform.translation.length() > 0.1,
3959            "persistent warmup must not place the camera at the object origin"
3960        );
3961
3962        let forward = transform.rotation * Vec3::NEG_Z;
3963        let to_origin = -transform.translation.normalize();
3964        assert!(
3965            forward.dot(to_origin) > 0.99,
3966            "persistent warmup camera should look at the object origin"
3967        );
3968    }
3969
3970    #[test]
3971    #[ignore = "headless throughput smoke check is opt-in because it needs a local render backend"]
3972    fn test_headless_batch_throughput_smoke() {
3973        crate::initialize();
3974        reset_headless_scene_setup_count();
3975
3976        let object_root = write_synthetic_object();
3977        let object_dir = object_root.path().join("synthetic_cube");
3978        let viewpoints = crate::generate_viewpoints(&ViewpointConfig::default());
3979        let request_count = 5usize;
3980        let config = RenderConfig::tbp_default();
3981
3982        let requests: Vec<_> = viewpoints
3983            .iter()
3984            .take(request_count)
3985            .copied()
3986            .map(|viewpoint| BatchRenderRequest {
3987                object_dir: object_dir.clone(),
3988                viewpoint,
3989                object_rotation: ObjectRotation::identity(),
3990                object_translation: Vec3::ZERO,
3991                object_scale: Vec3::ONE,
3992                render_config: config.clone(),
3993                target_point: Vec3::ZERO,
3994                targeting_policy: TargetingPolicy::Origin,
3995            })
3996            .collect();
3997
3998        let start = std::time::Instant::now();
3999        let outputs = crate::render_batch(requests, &BatchRenderConfig::default())
4000            .expect("synthetic headless batch render should succeed");
4001        let elapsed = start.elapsed();
4002
4003        assert_eq!(outputs.len(), request_count);
4004        // This is the deterministic churn signal for the smoke check. Adapter log lines vary by
4005        // backend and logging config, but a homogeneous batch should still set up headless scene
4006        // state exactly once.
4007        assert_eq!(
4008            headless_scene_setup_count(),
4009            1,
4010            "homogeneous batch smoke check should reuse one headless app setup"
4011        );
4012
4013        for (idx, output) in outputs.iter().enumerate() {
4014            assert_eq!(output.width, config.width, "output {idx} width mismatch");
4015            assert_eq!(output.height, config.height, "output {idx} height mismatch");
4016            assert_eq!(
4017                output.rgba.len(),
4018                (config.width * config.height * 4) as usize,
4019                "output {idx} rgba size mismatch"
4020            );
4021            assert_eq!(
4022                output.depth.len(),
4023                (config.width * config.height) as usize,
4024                "output {idx} depth size mismatch"
4025            );
4026            assert!(
4027                output
4028                    .rgba
4029                    .chunks_exact(4)
4030                    .any(|px| px[0] != 0 || px[1] != 0 || px[2] != 0),
4031                "output {idx} should contain visible color"
4032            );
4033        }
4034
4035        // Acceptance target: under llvmpipe-class CPU rendering, five 64x64 captures should
4036        // finish in under 8s. Much slower runs usually mean we reintroduced per-capture app
4037        // churn or another headless startup regression.
4038        assert!(
4039            elapsed < std::time::Duration::from_secs(8),
4040            "5 synthetic headless captures took {:.2}s, expected < 8.0s",
4041            elapsed.as_secs_f64()
4042        );
4043    }
4044}