Skip to main content

bevy_sensor/
render.rs

1//! Headless rendering implementation using Bevy.
2//!
3//! This module provides two rendering modes:
4//!
5//! 1. **Headless mode** (default): Renders to an image texture without requiring
6//!    a window or display. Works on WSL2, CI servers, and any environment without
7//!    GPU windowing support.
8//!
9//! 2. **Windowed mode** (fallback): Uses a visible window for rendering when
10//!    headless mode fails. Requires a display (X11/Wayland).
11//!
12//! # Current Status
13//!
14//! - **RGBA**: Working via render-to-texture + GPU readback
15//! - **Depth**: Working via ViewDepthTexture + reverse-Z conversion
16//!
17//! # Headless Rendering Architecture
18//!
19//! The headless renderer:
20//! 1. Creates a Bevy app without window plugins (uses ScheduleRunnerPlugin)
21//! 2. Sets up a render-to-texture pipeline with RenderTarget::Image
22//! 3. Extracts RGBA data via ImageCopyDriver
23//! 4. Extracts depth via DepthReadbackNode
24//!
25//! # Running Requirements
26//!
27//! Headless mode should work without any display. For windowed fallback:
28//! ```bash
29//! DISPLAY=:0 cargo run --example test_render
30//! ```
31//!
32//! # Architecture Notes
33//!
34//! Bevy's `App::run()` does not return cleanly in all configurations. This
35//! implementation uses a watchdog thread that monitors for completion and
36//! calls `std::process::exit(0)` once the render output is serialized to
37//! a temp file. The main thread reads this file after the process would
38//! normally exit.
39
40use bevy::app::{ScheduleRunnerPlugin, TerminalCtrlCHandlerPlugin};
41use bevy::asset::{LoadState, RenderAssetUsages};
42use bevy::camera::RenderTarget;
43use bevy::core_pipeline::prepass::{DepthPrepass, NormalPrepass};
44use bevy::core_pipeline::tonemapping::Tonemapping;
45use bevy::ecs::query::QueryItem;
46use bevy::light::GlobalAmbientLight;
47use bevy::log::LogPlugin;
48use bevy::prelude::*;
49use bevy::render::camera::ExtractedCamera;
50use bevy::render::render_asset::RenderAssets;
51use bevy::render::render_graph::{
52    Node, NodeRunError, RenderGraphContext, RenderGraphExt, RenderLabel, ViewNode, ViewNodeRunner,
53};
54use bevy::render::render_resource::{
55    Buffer, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, Extent3d, MapMode, Origin3d,
56    TexelCopyBufferInfo, TexelCopyBufferLayout, TexelCopyTextureInfo, TextureAspect,
57    TextureDimension, TextureFormat, TextureUsages,
58};
59use bevy::render::renderer::RenderQueue;
60use bevy::render::renderer::{RenderContext, RenderDevice};
61use bevy::render::texture::GpuImage;
62use bevy::render::view::screenshot::{Screenshot, ScreenshotCaptured};
63use bevy::render::view::{ExtractedView, Hdr, ViewDepthTexture};
64use bevy::render::{Extract, Render, RenderApp, RenderSystems};
65use bevy::window::{ExitCondition, WindowPlugin};
66use bevy_obj::ObjPlugin;
67use std::fs::File;
68use std::io::Read as IoRead;
69use std::path::{Path, PathBuf};
70#[cfg(test)]
71use std::sync::atomic::{AtomicUsize, Ordering};
72use std::sync::{Arc, Mutex, OnceLock};
73use std::time::Duration;
74
75use crate::{
76    backend::BackendConfig, ObjectRotation, RenderConfig, RenderError, RenderOutput,
77    TargetingPolicy,
78};
79use ycbust::{GOOGLE_16K_MESH_RELATIVE, GOOGLE_16K_TEXTURE_RELATIVE};
80
81/// Watchdog timeout for a single render, in seconds.
82///
83/// Bounds how long any single render path waits before declaring failure.
84/// 180s accommodates first-run wgpu shader compilation on Windows, which
85/// can take well over 60s on a cold GPU cache (see commit 9cd1d11).
86const RENDER_TIMEOUT_SECS: u64 = 180;
87
88/// Warmup frames after each camera move in `render_headless_sequence`.
89///
90/// After writing a new camera `Transform`, Bevy needs at least one frame for
91/// transform propagation + render-world extract before the next capture is
92/// valid. Historically set to 3 as a conservative cushion; reducing directly
93/// shortens per-viewpoint wall-clock since `app.update()` in the batch path
94/// is not rate-limited. Validated against the pixel-exact hardware test
95/// `test_batch_render_matches_sequential_episode_outputs`.
96const BATCH_WARMUP_FRAMES: u32 = 1;
97
98/// Warmup frames at the start of each `PersistentRenderer::render()` call.
99///
100/// `BATCH_WARMUP_FRAMES = 1` works for inter-viewpoint advancement inside a
101/// batch because `extract_and_continue_headless_batch` writes the next
102/// camera transform *and* clears the shared GPU readback buffers in the
103/// same tick — so the in-flight copy from the previous viewpoint has
104/// already drained by the time the next capture is gated.
105///
106/// In the persistent per-call path, the previous render's output may still
107/// be sitting in `shared_rgba`/`shared_depth` (we clear them before the
108/// loop, but the pipeline still needs ticks to propagate the new camera/
109/// scene-rotation `Transform` writes through `PostUpdate` →
110/// `transform_propagate` → `Extract` → render graph → `ImageCopyDriver`
111/// before the capture we request actually reflects the new transforms.
112///
113/// Validated by `test_persistent_renderer_matches_render_to_buffer`. Three
114/// ticks of warmup gives Windows/DX12 enough room to drain the previous
115/// readback and capture the post-propagation color target:
116///   - tick 0: transforms propagate, render runs (no copy enabled)
117///   - tick 1: previous in-flight readback drains (no copy enabled)
118///   - tick 2: warmup hits 0, capture fires, render runs with copy enabled
119///   - tick 3: shared buffers populated → captured → batch finalized
120const PERSISTENT_WARMUP_FRAMES: u32 = 3;
121const DEPTH_CAPTURE_NEAR_PLANE_EPSILON_METERS: f64 = 1e-5;
122const DEPTH_CAPTURE_FAR_PLANE_FRACTION: f64 = 0.999;
123
124fn persistent_warmup_camera_transform() -> Transform {
125    crate::generate_viewpoints(&crate::ViewpointConfig::default())
126        .into_iter()
127        .next()
128        .unwrap_or_else(|| Transform::from_xyz(0.0, 0.0, 0.5).looking_at(Vec3::ZERO, Vec3::Y))
129}
130
131/// Check the render-trace env var. Cheap enough (single HashMap lookup) to call
132/// from per-frame systems; gate all tracing output behind this.
133#[inline]
134fn render_trace_enabled() -> bool {
135    std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok()
136}
137
138fn is_capture_foreground_depth(depth: f64, near: f64, far: f64) -> bool {
139    depth.is_finite()
140        && near.is_finite()
141        && far.is_finite()
142        && depth > near + DEPTH_CAPTURE_NEAR_PLANE_EPSILON_METERS
143        && depth < far * DEPTH_CAPTURE_FAR_PLANE_FRACTION
144}
145
146fn is_all_background_depth(depth: &[f64], far: f64) -> bool {
147    !depth.is_empty()
148        && far.is_finite()
149        && depth
150            .iter()
151            .all(|value| value.is_finite() && *value >= far * DEPTH_CAPTURE_FAR_PLANE_FRACTION)
152}
153
154fn is_uniform_rgba_frame(rgba: &[u8]) -> bool {
155    let Some(first) = rgba.chunks_exact(4).next() else {
156        return false;
157    };
158    rgba.chunks_exact(4).all(|pixel| pixel == first)
159}
160
161#[derive(Debug, Clone, PartialEq)]
162struct DepthReadbackSummary {
163    samples: usize,
164    finite: usize,
165    invalid: usize,
166    foreground: usize,
167    near_or_zero: usize,
168    far_or_background: usize,
169    min: Option<f64>,
170    max: Option<f64>,
171    foreground_min: Option<f64>,
172    foreground_max: Option<f64>,
173}
174
175impl DepthReadbackSummary {
176    fn from_depth(depth: &[f64], near: f64, far: f64) -> Self {
177        let mut summary = Self {
178            samples: depth.len(),
179            finite: 0,
180            invalid: 0,
181            foreground: 0,
182            near_or_zero: 0,
183            far_or_background: 0,
184            min: None,
185            max: None,
186            foreground_min: None,
187            foreground_max: None,
188        };
189        let near_threshold = near + DEPTH_CAPTURE_NEAR_PLANE_EPSILON_METERS;
190        let far_threshold = far * DEPTH_CAPTURE_FAR_PLANE_FRACTION;
191
192        for &value in depth {
193            if !value.is_finite() {
194                summary.invalid += 1;
195                continue;
196            }
197
198            summary.finite += 1;
199            summary.min = Some(summary.min.map_or(value, |min| min.min(value)));
200            summary.max = Some(summary.max.map_or(value, |max| max.max(value)));
201
202            if value <= near_threshold {
203                summary.near_or_zero += 1;
204            } else if !far.is_finite() || value >= far_threshold {
205                summary.far_or_background += 1;
206            }
207
208            if is_capture_foreground_depth(value, near, far) {
209                summary.foreground += 1;
210                summary.foreground_min =
211                    Some(summary.foreground_min.map_or(value, |min| min.min(value)));
212                summary.foreground_max =
213                    Some(summary.foreground_max.map_or(value, |max| max.max(value)));
214            }
215        }
216
217        summary
218    }
219}
220
221fn format_depth_value(value: Option<f64>) -> String {
222    value
223        .map(|value| format!("{value:.4}"))
224        .unwrap_or_else(|| "none".to_string())
225}
226
227impl std::fmt::Display for DepthReadbackSummary {
228    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
229        write!(
230            f,
231            "samples={} finite={} invalid={} foreground={} near_or_zero={} far_or_background={} min={} max={} fg_min={} fg_max={}",
232            self.samples,
233            self.finite,
234            self.invalid,
235            self.foreground,
236            self.near_or_zero,
237            self.far_or_background,
238            format_depth_value(self.min),
239            format_depth_value(self.max),
240            format_depth_value(self.foreground_min),
241            format_depth_value(self.foreground_max)
242        )
243    }
244}
245
246/// Convert a filesystem path into a Bevy asset-path string.
247///
248/// `std::fs::canonicalize` on Windows returns a `\\?\C:\...` verbatim-prefixed
249/// path. Bevy's `AssetPath` parser cannot handle that prefix, so the asset
250/// would silently never load. Strip the verbatim prefix and normalize
251/// separators to `/` so the absolute path resolves through the default file
252/// asset source on every platform.
253fn fs_path_to_asset_string(path: &std::path::Path) -> String {
254    let s = path.display().to_string();
255    let s = s.strip_prefix(r"\\?\").map(str::to_string).unwrap_or(s);
256    s.replace('\\', "/")
257}
258
259/// Check if a display is available for windowed rendering.
260///
261/// Returns true if DISPLAY or WAYLAND_DISPLAY environment variable is set.
262#[allow(dead_code)]
263fn display_available() -> bool {
264    std::env::var("DISPLAY").is_ok() || std::env::var("WAYLAND_DISPLAY").is_ok()
265}
266
267/// Check if we're running on WSL2 (which doesn't support Vulkan window surfaces).
268#[allow(dead_code)]
269fn is_wsl2() -> bool {
270    if let Ok(version) = std::fs::read_to_string("/proc/version") {
271        return version.to_lowercase().contains("microsoft")
272            || version.to_lowercase().contains("wsl");
273    }
274    false
275}
276
277/// Internal state for tracking render progress
278#[derive(Resource, Default)]
279struct RenderState {
280    frame_count: u32,
281    scene_loaded: bool,
282    texture_loaded: bool,
283    materials_applied: bool,
284    /// `frame_count` at the moment materials were applied; used to gate
285    /// `capture_ready` on N frames of render-graph propagation rather than
286    /// a legacy llvmpipe-era 60-frame wait.
287    materials_applied_frame: u32,
288    /// `frame_count` when the texture finished loading. Capture waits a small
289    /// margin past this for GPU image preparation. The material (and therefore
290    /// the main-pass pipeline) is applied earlier, so by the time the texture is
291    /// ready the pipeline has already compiled.
292    texture_ready_frame: u32,
293    capture_ready: bool,
294    screenshot_requested: bool,
295    /// Number of frames spent waiting for a *valid* (non-blank / valid-depth)
296    /// readback. The one-shot GPU capture is nondeterministic and occasionally
297    /// reads a uniform clear-color frame; we reject those and keep capturing
298    /// until a real frame lands, bounded by this counter.
299    capture_retries: u32,
300    /// Previous frame's RGBA readback. The capture is accepted only once two
301    /// consecutive readbacks are identical (the render has settled), so partial
302    /// in-progress frames aren't captured and every render path yields the same
303    /// fully-drawn image (required for byte-exact cross-path parity).
304    prev_rgba: Option<Vec<u8>>,
305    /// Previous frame's depth readback, for the same settle-detection as
306    /// `prev_rgba` (depth parity is asserted to ~1e-9, i.e. bit-exact).
307    prev_depth: Option<Vec<f64>>,
308    captured: bool,
309    exit_requested: bool,
310    #[allow(dead_code)]
311    exit_frame_count: u32,
312    rgba_data: Option<Vec<u8>>,
313    depth_data: Option<Vec<f64>>,
314    image_width: u32,
315    image_height: u32,
316}
317
318#[cfg(test)]
319static HEADLESS_SCENE_SETUP_COUNT: AtomicUsize = AtomicUsize::new(0);
320
321#[cfg(test)]
322fn reset_headless_scene_setup_count() {
323    HEADLESS_SCENE_SETUP_COUNT.store(0, Ordering::SeqCst);
324}
325
326#[cfg(test)]
327fn headless_scene_setup_count() -> usize {
328    HEADLESS_SCENE_SETUP_COUNT.load(Ordering::SeqCst)
329}
330
331/// Shared buffer for screenshot callback to write into
332#[derive(Resource, Clone)]
333#[allow(clippy::type_complexity)]
334#[allow(dead_code)]
335struct SharedImageBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
336
337/// Shared buffer for depth data from GPU readback
338/// Contains: (linear_depth_values, width, height)
339/// Uses f64 for TBP numerical precision compatibility.
340#[derive(Resource, Clone, Default)]
341#[allow(clippy::type_complexity)]
342struct SharedDepthBuffer(Arc<Mutex<Option<(Vec<f64>, u32, u32)>>>);
343
344// ============================================================================
345// Depth Readback Infrastructure
346// ============================================================================
347
348/// Request to capture depth - extracted from main world to render world
349#[derive(Resource, Default, Clone)]
350struct DepthCaptureRequest {
351    requested: bool,
352    near: f32,
353    far: f32,
354}
355
356/// Pending depth capture info for async processing.
357///
358/// `m22`/`m32` are the relevant entries of the view's reverse-Z projection
359/// matrix (`clip_from_view`), captured at copy time so the CPU-side
360/// linearization matches the exact projection the GPU rendered with. This keeps
361/// depth output robust if projection construction or backend behavior changes.
362struct PendingDepthCapture {
363    buffer: Buffer,
364    width: u32,
365    height: u32,
366    m22: f32,
367    m32: f32,
368    far: f32,
369}
370
371fn render_projection(config: &RenderConfig) -> Projection {
372    let near = config.near_plane;
373    Projection::Perspective(PerspectiveProjection {
374        fov: config.fov_radians(),
375        near,
376        far: config.far_plane,
377        near_clip_plane: Vec4::new(0.0, 0.0, -1.0, -near),
378        ..default()
379    })
380}
381
382/// Queue for pending depth captures (written by render node, read by cleanup system)
383#[derive(Resource, Default)]
384struct PendingDepthCaptureQueue(Arc<Mutex<Vec<PendingDepthCapture>>>);
385
386#[cfg(test)]
387mod projection_tests {
388    use super::*;
389
390    #[test]
391    fn render_projection_uses_configured_near_plane_for_effective_clip_matrix() {
392        let mut config = RenderConfig::tbp_default();
393        config.near_plane = 0.025;
394        config.far_plane = 12.0;
395
396        let projection = render_projection(&config);
397        let Projection::Perspective(perspective) = &projection else {
398            panic!("render_projection should create a perspective projection");
399        };
400
401        assert_eq!(perspective.near, config.near_plane);
402        assert_eq!(
403            perspective.near_clip_plane,
404            Vec4::new(0.0, 0.0, -1.0, -config.near_plane)
405        );
406        assert_eq!(perspective.far, config.far_plane);
407
408        let clip_from_view = projection.get_clip_from_view();
409        assert!(
410            (clip_from_view.w_axis.z - config.near_plane).abs() < 1e-6,
411            "reverse-Z projection matrix should encode configured near plane; got {}",
412            clip_from_view.w_axis.z
413        );
414    }
415}
416
417// ============================================================================
418// Depth Buffer Helpers
419// ============================================================================
420
421mod depth_helpers {
422    /// wgpu requires buffer row alignment of 256 bytes
423    pub const COPY_BYTES_PER_ROW_ALIGNMENT: u32 = 256;
424
425    /// Align byte size to wgpu's COPY_BYTES_PER_ROW_ALIGNMENT
426    pub fn align_byte_size(value: u32) -> u32 {
427        let remainder = value % COPY_BYTES_PER_ROW_ALIGNMENT;
428        if remainder == 0 {
429            value
430        } else {
431            value + (COPY_BYTES_PER_ROW_ALIGNMENT - remainder)
432        }
433    }
434
435    /// Calculate aligned buffer size for an image
436    #[allow(dead_code)]
437    pub fn get_aligned_size(width: u32, height: u32, pixel_size: u32) -> u32 {
438        height * align_byte_size(width * pixel_size)
439    }
440
441    /// Convert reverse-Z NDC depth to linear depth in meters.
442    ///
443    /// Bevy uses reverse-Z depth buffer: near plane maps to depth=1, far plane to depth=0.
444    /// This provides better precision for distant objects.
445    ///
446    /// Formula derivation:
447    /// - At near plane (z = near): ndc = 1
448    /// - At far plane (z = far): ndc = 0
449    /// - linear = far / (1 + ndc * (far/near - 1))
450    ///
451    /// Superseded in the render path by [`ndc_to_linear_with_matrix`], which
452    /// reads the actual projection near from the view matrix instead of trusting
453    /// a passed-in near (the source of the #92 10x depth error). Retained for its
454    /// tests and as a reference formula.
455    #[allow(dead_code)]
456    pub fn reverse_z_to_linear_depth(ndc_depth: f32, near: f32, far: f32) -> f32 {
457        // Handle edge cases
458        if ndc_depth <= 0.0 {
459            return far; // Background (infinite distance in reverse-Z)
460        }
461        if ndc_depth >= 1.0 {
462            return near; // At or beyond near plane
463        }
464        // Reverse-Z formula: linear = far / (1 + ndc * (far/near - 1))
465        far / (1.0 + ndc_depth * (far / near - 1.0))
466    }
467
468    /// Extract depth values from aligned buffer, handling row padding
469    pub fn extract_depth_with_alignment(data: &[u8], width: u32, height: u32) -> Vec<f32> {
470        let pixel_size = 4u32; // f32 = 4 bytes
471        let aligned_row_bytes = align_byte_size(width * pixel_size) as usize;
472        let actual_row_bytes = (width * pixel_size) as usize;
473
474        let mut depth_values = Vec::with_capacity((width * height) as usize);
475
476        for y in 0..height as usize {
477            let row_start = y * aligned_row_bytes;
478            let row_data = &data[row_start..row_start + actual_row_bytes];
479
480            for x in 0..width as usize {
481                let offset = x * 4;
482                let bytes: [u8; 4] = row_data[offset..offset + 4].try_into().unwrap();
483                let depth_value = f32::from_le_bytes(bytes);
484                depth_values.push(depth_value);
485            }
486        }
487
488        depth_values
489    }
490
491    /// Convert all NDC depth values to linear meters (as f64 for TBP precision).
492    /// Superseded by [`convert_depth_to_linear_with_matrix`]; retained for tests.
493    #[allow(dead_code)]
494    pub fn convert_depth_to_linear(raw_depth: &[f32], near: f32, far: f32) -> Vec<f64> {
495        raw_depth
496            .iter()
497            .map(|&ndc| reverse_z_to_linear_depth(ndc, near, far) as f64)
498            .collect()
499    }
500
501    /// Linearize a reverse-Z NDC depth using the view's actual projection matrix,
502    /// rather than a hand-supplied near/far.
503    ///
504    /// For a perspective right-handed projection, the relevant clip-space rows are
505    /// `clip_z = m22 * z + m32` and `clip_w = -z` (camera looks down -Z), so
506    /// `ndc = clip_z / clip_w = (m22*z + m32) / (-z)`. Solving for the positive
507    /// view-space distance `d = -z` gives **`d = m32 / (ndc + m22)`**. This holds
508    /// for both finite and infinite reverse-Z and is correct regardless of which
509    /// near plane the renderer actually used — the previous fixed-near formula
510    /// produced depths 10x too small when the effective projection near plane
511    /// drifted from `RenderConfig::near_plane` (issue #86/#92/#95).
512    ///
513    /// `m22 = clip_from_view[col=2][row=2]`, `m32 = clip_from_view[col=3][row=2]`.
514    /// `ndc <= 0` is the reverse-Z far plane (background) and maps to `far`.
515    pub fn ndc_to_linear_with_matrix(ndc: f32, m22: f32, m32: f32, far: f32) -> f32 {
516        if ndc <= 0.0 {
517            return far; // background / at-or-beyond far plane in reverse-Z
518        }
519        let denom = ndc + m22;
520        if denom.abs() <= f32::EPSILON {
521            return far;
522        }
523        let linear = m32 / denom;
524        if !linear.is_finite() || linear <= 0.0 {
525            far
526        } else {
527            linear.min(far)
528        }
529    }
530
531    /// Convert all NDC depth values to linear meters using the view projection
532    /// matrix (f64 for TBP precision). See [`ndc_to_linear_with_matrix`].
533    pub fn convert_depth_to_linear_with_matrix(
534        raw_depth: &[f32],
535        m22: f32,
536        m32: f32,
537        far: f32,
538    ) -> Vec<f64> {
539        raw_depth
540            .iter()
541            .map(|&ndc| ndc_to_linear_with_matrix(ndc, m22, m32, far) as f64)
542            .collect()
543    }
544
545    #[cfg(test)]
546    mod tests {
547        use super::*;
548
549        #[test]
550        fn test_align_byte_size() {
551            assert_eq!(align_byte_size(256), 256);
552            assert_eq!(align_byte_size(257), 512);
553            assert_eq!(align_byte_size(1), 256);
554            assert_eq!(align_byte_size(512), 512);
555            assert_eq!(align_byte_size(0), 0);
556        }
557
558        #[test]
559        fn test_reverse_z_to_linear_depth() {
560            let near = 0.01;
561            let far = 10.0;
562
563            // Near plane (ndc=1 in reverse-Z)
564            let linear_near = reverse_z_to_linear_depth(1.0, near, far);
565            assert!((linear_near - near).abs() < 0.001);
566
567            // Mid-range depth (ndc=0.5 should give geometric mean area)
568            let linear_mid = reverse_z_to_linear_depth(0.5, near, far);
569            // At ndc=0.5: linear = 10 / (1 + 0.5 * (1000-1)) = 10 / 500.5 ≈ 0.02
570            assert!(linear_mid > near && linear_mid < far);
571
572            // Very close to far plane (ndc very small)
573            let linear_almost_far = reverse_z_to_linear_depth(0.0001, near, far);
574            // At ndc=0.0001: linear = 10 / (1 + 0.0001 * 999) ≈ 10 / 1.0999 ≈ 9.09
575            assert!(linear_almost_far > 9.0);
576
577            // Background (ndc=0)
578            let background = reverse_z_to_linear_depth(0.0, near, far);
579            assert_eq!(background, far);
580        }
581
582        #[test]
583        fn test_extract_depth_with_alignment() {
584            // 2x2 image, 4 bytes per pixel
585            // Aligned row = 256 bytes, but actual = 8 bytes
586            let width = 2u32;
587            let height = 2u32;
588
589            let mut data = vec![0u8; 256 * 2]; // 2 aligned rows
590
591            // Write test depth values
592            // Row 0: [0.5, 0.6]
593            data[0..4].copy_from_slice(&0.5f32.to_le_bytes());
594            data[4..8].copy_from_slice(&0.6f32.to_le_bytes());
595            // Row 1: [0.7, 0.8]
596            data[256..260].copy_from_slice(&0.7f32.to_le_bytes());
597            data[260..264].copy_from_slice(&0.8f32.to_le_bytes());
598
599            let depth = extract_depth_with_alignment(&data, width, height);
600            assert_eq!(depth.len(), 4);
601            assert!((depth[0] - 0.5).abs() < 0.001);
602            assert!((depth[1] - 0.6).abs() < 0.001);
603            assert!((depth[2] - 0.7).abs() < 0.001);
604            assert!((depth[3] - 0.8).abs() < 0.001);
605        }
606
607        #[test]
608        fn test_reverse_z_depth_at_near_plane() {
609            // Near plane should give near value
610            let near = 0.01;
611            let far = 100.0;
612            let depth = reverse_z_to_linear_depth(1.0, near, far);
613            assert!((depth - near).abs() < 0.0001);
614        }
615
616        #[test]
617        fn test_reverse_z_depth_at_far_plane() {
618            // Far plane (ndc=0) should give far value
619            let near = 0.01;
620            let far = 100.0;
621            let depth = reverse_z_to_linear_depth(0.0, near, far);
622            assert!((depth - far).abs() < 0.0001);
623        }
624
625        #[test]
626        fn test_reverse_z_monotonic() {
627            // Depth should increase as NDC decreases (reverse-Z)
628            let near = 0.01;
629            let far = 10.0;
630
631            let mut prev_depth = 0.0;
632            for i in (0..=100).rev() {
633                let ndc = i as f32 / 100.0;
634                let depth = reverse_z_to_linear_depth(ndc, near, far);
635                assert!(
636                    depth >= prev_depth,
637                    "Depth should be monotonic: ndc={}, depth={}, prev={}",
638                    ndc,
639                    depth,
640                    prev_depth
641                );
642                prev_depth = depth;
643            }
644        }
645
646        #[test]
647        fn test_ndc_to_linear_with_matrix_infinite_reverse_z() {
648            // Infinite reverse-Z (Bevy `perspective_infinite_reverse_rh`):
649            // m22 = 0, m32 = near. d = near / ndc.
650            let (m22, m32, far) = (0.0f32, 0.1f32, 10.0f32);
651
652            // The exact regression from #92: ndc 0.366504 must linearize to
653            // ~0.273 m (near 0.1), NOT ~0.027 m (the old fixed near = 0.01).
654            let d = ndc_to_linear_with_matrix(0.366504, m22, m32, far);
655            assert!((d as f64 - 0.272849).abs() < 1e-4, "got {d}");
656
657            // Background (reverse-Z far plane) and clamping.
658            assert_eq!(ndc_to_linear_with_matrix(0.0, m22, m32, far), far);
659            assert_eq!(ndc_to_linear_with_matrix(-0.5, m22, m32, far), far);
660            // Very small ndc -> very far -> clamped to far.
661            assert_eq!(ndc_to_linear_with_matrix(1e-9, m22, m32, far), far);
662        }
663
664        #[test]
665        fn test_ndc_to_linear_with_matrix_finite_reverse_z() {
666            // Finite reverse-Z maps near->ndc 1, far->ndc 0. Construct the matrix
667            // entries for near=0.5, far=20: m22 = near/(far-near), m32 = far*m22.
668            let (near, far) = (0.5f32, 20.0f32);
669            let m22 = near / (far - near);
670            let m32 = far * m22;
671            // ndc = 1 -> near; ndc = 0 -> far (background sentinel also returns far).
672            assert!((ndc_to_linear_with_matrix(1.0, m22, m32, far) - near).abs() < 1e-4);
673            assert_eq!(ndc_to_linear_with_matrix(0.0, m22, m32, far), far);
674        }
675
676        #[test]
677        fn test_convert_depth_to_linear_batch() {
678            let near = 0.01f32;
679            let far = 10.0f32;
680            let ndc_depths = vec![1.0f32, 0.5, 0.1, 0.0];
681
682            let linear = convert_depth_to_linear(&ndc_depths, near, far);
683
684            assert_eq!(linear.len(), 4);
685            // Near plane
686            assert!((linear[0] - near as f64).abs() < 0.001);
687            // Far plane
688            assert!((linear[3] - far as f64).abs() < 0.001);
689            // All should be in range [near, far]
690            for d in &linear {
691                assert!(*d >= near as f64 && *d <= far as f64);
692            }
693        }
694
695        #[test]
696        fn test_align_byte_size_edge_cases() {
697            // Powers of two should stay the same if multiple of 256
698            assert_eq!(align_byte_size(256), 256);
699            assert_eq!(align_byte_size(512), 512);
700            assert_eq!(align_byte_size(1024), 1024);
701
702            // Just under 256 should round up to 256
703            assert_eq!(align_byte_size(255), 256);
704            assert_eq!(align_byte_size(128), 256);
705
706            // Just over 256 should round up to 512
707            assert_eq!(align_byte_size(300), 512);
708        }
709
710        #[test]
711        fn test_extract_depth_64x64() {
712            // Test with TBP default resolution
713            let width = 64u32;
714            let height = 64u32;
715            let bytes_per_pixel = 4u32;
716            let padded_row = align_byte_size(width * bytes_per_pixel);
717
718            // Create aligned buffer
719            let mut data = vec![0u8; (padded_row * height) as usize];
720
721            // Fill with incrementing values
722            for y in 0..height {
723                for x in 0..width {
724                    let value = (y * width + x) as f32 / (width * height) as f32;
725                    let offset = (y * padded_row + x * bytes_per_pixel) as usize;
726                    data[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
727                }
728            }
729
730            let depth = extract_depth_with_alignment(&data, width, height);
731            assert_eq!(depth.len(), (width * height) as usize);
732
733            // Verify first and last values
734            assert!((depth[0] - 0.0).abs() < 0.001);
735            let expected_last = (width * height - 1) as f32 / (width * height) as f32;
736            assert!((depth[(width * height - 1) as usize] - expected_last).abs() < 0.001);
737        }
738    }
739}
740
741// ============================================================================
742// Depth Readback Render Node
743// ============================================================================
744
745/// Label for the depth readback render graph node.
746#[derive(Debug, Hash, PartialEq, Eq, Clone, bevy::render::render_graph::RenderLabel)]
747struct DepthReadbackLabel;
748
749/// Render node that copies the main camera's depth texture to a staging buffer.
750/// This runs after the main pass completes, using ViewDepthTexture.
751#[derive(Default)]
752struct DepthReadbackNode;
753
754impl ViewNode for DepthReadbackNode {
755    type ViewQuery = (
756        &'static ViewDepthTexture,
757        &'static ExtractedCamera,
758        &'static ExtractedView,
759    );
760
761    fn run<'w>(
762        &self,
763        _graph: &mut RenderGraphContext,
764        render_context: &mut RenderContext<'w>,
765        (view_depth_texture, camera, view): QueryItem<'w, '_, Self::ViewQuery>,
766        world: &'w World,
767    ) -> Result<(), NodeRunError> {
768        let trace = render_trace_enabled();
769        let t0 = trace.then(std::time::Instant::now);
770
771        // Check if depth capture is requested
772        let Some(request) = world.get_resource::<DepthCaptureRequest>() else {
773            return Ok(());
774        };
775        if !request.requested {
776            return Ok(());
777        }
778
779        // Get the pending queue
780        let Some(queue) = world.get_resource::<PendingDepthCaptureQueue>() else {
781            return Ok(());
782        };
783
784        // Get texture size from camera viewport or physical size
785        let Some(physical_size) = camera.physical_target_size else {
786            return Ok(());
787        };
788        let width = physical_size.x;
789        let height = physical_size.y;
790
791        let render_device = world.resource::<RenderDevice>();
792
793        // Calculate aligned buffer size (wgpu requires 256-byte row alignment)
794        let bytes_per_pixel = 4u32; // f32 = 4 bytes (Depth32Float)
795        let unpadded_bytes_per_row = width * bytes_per_pixel;
796        let padded_bytes_per_row = depth_helpers::align_byte_size(unpadded_bytes_per_row);
797        let buffer_size = (padded_bytes_per_row * height) as u64;
798
799        // Create staging buffer for CPU readback
800        let staging_buffer = render_device.create_buffer(&BufferDescriptor {
801            label: Some("depth_staging_buffer"),
802            size: buffer_size,
803            usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
804            mapped_at_creation: false,
805        });
806
807        // Copy depth texture to staging buffer
808        let encoder = render_context.command_encoder();
809        encoder.copy_texture_to_buffer(
810            TexelCopyTextureInfo {
811                texture: &view_depth_texture.texture,
812                mip_level: 0,
813                origin: Origin3d::ZERO,
814                aspect: TextureAspect::DepthOnly,
815            },
816            TexelCopyBufferInfo {
817                buffer: &staging_buffer,
818                layout: TexelCopyBufferLayout {
819                    offset: 0,
820                    bytes_per_row: Some(padded_bytes_per_row),
821                    rows_per_image: Some(height),
822                },
823            },
824            Extent3d {
825                width,
826                height,
827                depth_or_array_layers: 1,
828            },
829        );
830
831        // Push to queue for async processing (queue is Arc<Mutex<Vec>>).
832        // Capture the projection-matrix entries used for linearization: for a
833        // perspective RH matrix, clip_z = m22*z + m32 and clip_w = -z, so the
834        // positive view-space distance is d = m32 / (ndc + m22).
835        let clip_from_view = view.clip_from_view;
836        if let Ok(mut pending) = queue.0.lock() {
837            pending.push(PendingDepthCapture {
838                buffer: staging_buffer,
839                width,
840                height,
841                m22: clip_from_view.z_axis.z,
842                m32: clip_from_view.w_axis.z,
843                far: request.far,
844            });
845        }
846
847        if let Some(t0) = t0 {
848            eprintln!(
849                "[render_trace][node] DepthReadbackNode ms={:.3}",
850                t0.elapsed().as_secs_f64() * 1000.0
851            );
852        }
853
854        Ok(())
855    }
856}
857
858// ============================================================================
859// Depth Readback Plugin
860// ============================================================================
861
862/// Plugin that sets up depth buffer readback from the GPU.
863struct DepthReadbackPlugin {
864    shared_depth: SharedDepthBuffer,
865    near: f32,
866    far: f32,
867}
868
869impl Plugin for DepthReadbackPlugin {
870    fn build(&self, app: &mut App) {
871        use bevy::core_pipeline::core_3d::graph::Core3d;
872        use bevy::core_pipeline::core_3d::graph::Node3d;
873
874        // Insert shared depth buffer in main app
875        app.insert_resource(self.shared_depth.clone());
876        app.insert_resource(DepthCaptureRequest {
877            requested: false,
878            near: self.near,
879            far: self.far,
880        });
881
882        // Get render app
883        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
884            eprintln!("Failed to get RenderApp for depth readback");
885            return;
886        };
887
888        // Insert resources in render world
889        render_app.insert_resource(self.shared_depth.clone());
890        render_app.init_resource::<PendingDepthCaptureQueue>();
891
892        // Add extraction system to copy request from main world
893        render_app.add_systems(ExtractSchedule, extract_depth_request);
894
895        // Add system to process completed depth captures
896        render_app.add_systems(
897            Render,
898            collect_depth_captures.in_set(RenderSystems::Cleanup),
899        );
900
901        // Register the depth readback node in the render graph
902        // Run after main pass completes (depth buffer is ready) but before tonemapping
903        render_app
904            .add_render_graph_node::<ViewNodeRunner<DepthReadbackNode>>(Core3d, DepthReadbackLabel)
905            .add_render_graph_edges(
906                Core3d,
907                (Node3d::EndMainPass, DepthReadbackLabel, Node3d::Tonemapping),
908            );
909    }
910}
911
912/// Extract depth capture request from main world to render world
913fn extract_depth_request(mut commands: Commands, request: Extract<Res<DepthCaptureRequest>>) {
914    commands.insert_resource(DepthCaptureRequest {
915        requested: request.requested,
916        near: request.near,
917        far: request.far,
918    });
919}
920
921/// Process completed depth buffer captures (synchronous GPU-to-CPU readback with device polling)
922fn collect_depth_captures(
923    queue: Res<PendingDepthCaptureQueue>,
924    shared_depth: Res<SharedDepthBuffer>,
925    render_device: Res<RenderDevice>,
926) {
927    let trace = render_trace_enabled();
928    let t_sys = trace.then(std::time::Instant::now);
929
930    // Take all pending captures from the queue
931    let pending_captures = {
932        let Ok(mut pending) = queue.0.lock() else {
933            return;
934        };
935        std::mem::take(&mut *pending)
936    };
937
938    if pending_captures.is_empty() {
939        if let Some(t0) = t_sys {
940            eprintln!(
941                "[render_trace][sys] collect_depth_captures empty ms={:.3}",
942                t0.elapsed().as_secs_f64() * 1000.0
943            );
944        }
945        return;
946    }
947
948    let pending_count = pending_captures.len();
949
950    // Process each pending capture synchronously with device polling
951    for pending in pending_captures {
952        let width = pending.width;
953        let height = pending.height;
954        let m22 = pending.m22;
955        let m32 = pending.m32;
956        let far = pending.far;
957        let buffer = pending.buffer;
958        let shared = shared_depth.0.clone();
959
960        // Use blocking sync approach with device polling (same as RGBA capture)
961        let buffer_slice = buffer.slice(..);
962
963        // Request mapping
964        let (tx, rx) = std::sync::mpsc::channel();
965        buffer_slice.map_async(MapMode::Read, move |result| {
966            let _ = tx.send(result);
967        });
968
969        let t_wait = trace.then(std::time::Instant::now);
970        let mut poll_iters: u32 = 0;
971
972        // Poll the device until mapping completes
973        loop {
974            let _ =
975                render_device.poll(bevy::render::render_resource::PollType::wait_indefinitely());
976            poll_iters += 1;
977            match rx.try_recv() {
978                Ok(Ok(())) => {
979                    let data = buffer_slice.get_mapped_range();
980
981                    // Extract depth values with alignment handling
982                    let ndc_depth =
983                        depth_helpers::extract_depth_with_alignment(&data, width, height);
984
985                    drop(data);
986                    buffer.unmap();
987
988                    // Convert reverse-Z NDC to linear depth (meters) using the
989                    // view's actual projection matrix entries. See
990                    // `convert_depth_to_linear_with_matrix`.
991                    let linear_depth = depth_helpers::convert_depth_to_linear_with_matrix(
992                        &ndc_depth, m22, m32, far,
993                    );
994
995                    // Store in shared buffer
996                    if let Ok(mut guard) = shared.lock() {
997                        *guard = Some((linear_depth, width, height));
998                    }
999                    break;
1000                }
1001                Ok(Err(e)) => {
1002                    eprintln!("Failed to map depth buffer: {:?}", e);
1003                    break;
1004                }
1005                Err(std::sync::mpsc::TryRecvError::Empty) => {
1006                    // Keep polling
1007                    std::thread::sleep(std::time::Duration::from_millis(1));
1008                }
1009                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
1010                    eprintln!("Depth buffer mapping channel disconnected");
1011                    break;
1012                }
1013            }
1014        }
1015
1016        if let Some(t_wait) = t_wait {
1017            eprintln!(
1018                "[render_trace][sys] collect_depth_captures mapping_wait poll_iters={} ms={:.3}",
1019                poll_iters,
1020                t_wait.elapsed().as_secs_f64() * 1000.0
1021            );
1022        }
1023    }
1024
1025    if let Some(t0) = t_sys {
1026        eprintln!(
1027            "[render_trace][sys] collect_depth_captures done pending={} ms={:.3}",
1028            pending_count,
1029            t0.elapsed().as_secs_f64() * 1000.0
1030        );
1031    }
1032}
1033
1034// ============================================================================
1035// Image Copy Infrastructure (for headless rendering)
1036// ============================================================================
1037
1038/// Label for the image copy render graph node
1039#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)]
1040struct ImageCopyLabel;
1041
1042/// Component that marks an image for GPU-to-CPU copying
1043#[derive(Component, Clone)]
1044struct ImageCopier {
1045    /// Handle to the source image (render target)
1046    src_image: Handle<Image>,
1047    /// Whether to capture on this frame
1048    enabled: bool,
1049}
1050
1051/// Resource containing all ImageCopiers for the render world
1052#[derive(Resource, Default)]
1053struct ImageCopiers(Vec<ImageCopier>);
1054
1055/// Pending image capture for async processing
1056struct PendingImageCapture {
1057    buffer: Buffer,
1058    width: u32,
1059    height: u32,
1060    padded_bytes_per_row: u32,
1061}
1062
1063/// Queue for pending image captures
1064#[derive(Resource, Default)]
1065struct PendingImageCaptureQueue(Arc<Mutex<Vec<PendingImageCapture>>>);
1066
1067/// Shared buffer for captured RGBA data
1068#[derive(Resource, Clone, Default)]
1069#[allow(clippy::type_complexity)]
1070struct SharedRgbaBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
1071
1072/// Render graph node that copies render target images to staging buffers
1073struct ImageCopyDriver;
1074
1075impl Node for ImageCopyDriver {
1076    fn run(
1077        &self,
1078        _graph: &mut RenderGraphContext,
1079        _render_context: &mut RenderContext,
1080        world: &World,
1081    ) -> Result<(), NodeRunError> {
1082        let trace = render_trace_enabled();
1083        let t0 = trace.then(std::time::Instant::now);
1084
1085        let Some(image_copiers) = world.get_resource::<ImageCopiers>() else {
1086            return Ok(());
1087        };
1088
1089        let Some(gpu_images) = world.get_resource::<RenderAssets<GpuImage>>() else {
1090            return Ok(());
1091        };
1092
1093        let Some(queue) = world.get_resource::<PendingImageCaptureQueue>() else {
1094            return Ok(());
1095        };
1096
1097        let render_device = world.resource::<RenderDevice>();
1098
1099        let Some(render_queue) = world.get_resource::<RenderQueue>() else {
1100            return Ok(());
1101        };
1102
1103        for image_copier in image_copiers.0.iter() {
1104            if !image_copier.enabled {
1105                continue;
1106            }
1107
1108            let Some(gpu_image) = gpu_images.get(&image_copier.src_image) else {
1109                continue;
1110            };
1111
1112            let width = gpu_image.size.width;
1113            let height = gpu_image.size.height;
1114
1115            // Calculate padded bytes per row (wgpu requires 256-byte alignment)
1116            let block_dimensions = gpu_image.texture_format.block_dimensions();
1117            let block_size = gpu_image.texture_format.block_copy_size(None).unwrap_or(4); // Default to 4 bytes for RGBA8
1118
1119            let padded_bytes_per_row = RenderDevice::align_copy_bytes_per_row(
1120                (width as usize / block_dimensions.0 as usize) * block_size as usize,
1121            );
1122
1123            let buffer_size = (padded_bytes_per_row * height as usize) as u64;
1124
1125            // Create staging buffer for CPU readback
1126            let staging_buffer = render_device.create_buffer(&BufferDescriptor {
1127                label: Some("image_copy_staging_buffer"),
1128                size: buffer_size,
1129                usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
1130                mapped_at_creation: false,
1131            });
1132
1133            // Create command encoder for the copy operation
1134            let mut encoder =
1135                render_device.create_command_encoder(&CommandEncoderDescriptor::default());
1136
1137            let texture_extent = Extent3d {
1138                width,
1139                height,
1140                depth_or_array_layers: 1,
1141            };
1142
1143            // Copy texture to buffer
1144            encoder.copy_texture_to_buffer(
1145                gpu_image.texture.as_image_copy(),
1146                TexelCopyBufferInfo {
1147                    buffer: &staging_buffer,
1148                    layout: TexelCopyBufferLayout {
1149                        offset: 0,
1150                        bytes_per_row: Some(padded_bytes_per_row as u32),
1151                        rows_per_image: None,
1152                    },
1153                },
1154                texture_extent,
1155            );
1156
1157            // Submit the copy command
1158            render_queue.submit(std::iter::once(encoder.finish()));
1159
1160            // Queue for async processing
1161            if let Ok(mut pending) = queue.0.lock() {
1162                pending.push(PendingImageCapture {
1163                    buffer: staging_buffer,
1164                    width,
1165                    height,
1166                    padded_bytes_per_row: padded_bytes_per_row as u32,
1167                });
1168            }
1169        }
1170
1171        if let Some(t0) = t0 {
1172            eprintln!(
1173                "[render_trace][node] ImageCopyDriver ms={:.3}",
1174                t0.elapsed().as_secs_f64() * 1000.0
1175            );
1176        }
1177
1178        Ok(())
1179    }
1180}
1181
1182/// Extract ImageCopier components to render world
1183fn extract_image_copiers(mut commands: Commands, query: Extract<Query<&ImageCopier>>) {
1184    commands.insert_resource(ImageCopiers(query.iter().cloned().collect()));
1185}
1186
1187/// Process completed image captures
1188fn collect_image_captures(
1189    queue: Res<PendingImageCaptureQueue>,
1190    shared_rgba: Res<SharedRgbaBuffer>,
1191    render_device: Res<RenderDevice>,
1192) {
1193    let trace = render_trace_enabled();
1194    let t_sys = trace.then(std::time::Instant::now);
1195
1196    let pending_captures = {
1197        let Ok(mut pending) = queue.0.lock() else {
1198            return;
1199        };
1200        std::mem::take(&mut *pending)
1201    };
1202
1203    if pending_captures.is_empty() {
1204        if let Some(t0) = t_sys {
1205            eprintln!(
1206                "[render_trace][sys] collect_image_captures empty ms={:.3}",
1207                t0.elapsed().as_secs_f64() * 1000.0
1208            );
1209        }
1210        return;
1211    }
1212
1213    let pending_count = pending_captures.len();
1214
1215    for pending in pending_captures {
1216        let width = pending.width;
1217        let height = pending.height;
1218        let padded_bytes_per_row = pending.padded_bytes_per_row;
1219        let buffer = pending.buffer;
1220        let shared = shared_rgba.0.clone();
1221
1222        // Use blocking sync approach with device polling
1223        let buffer_slice = buffer.slice(..);
1224
1225        // Request mapping
1226        let (tx, rx) = std::sync::mpsc::channel();
1227        buffer_slice.map_async(MapMode::Read, move |result| {
1228            let _ = tx.send(result);
1229        });
1230
1231        // Poll the device until mapping completes (with timeout)
1232        let start = std::time::Instant::now();
1233        let timeout = std::time::Duration::from_secs(10);
1234        let mut poll_iters: u32 = 0;
1235        loop {
1236            let _ =
1237                render_device.poll(bevy::render::render_resource::PollType::wait_indefinitely());
1238            poll_iters += 1;
1239
1240            if start.elapsed() > timeout {
1241                eprintln!(
1242                    "Warning: Buffer mapping timeout after {:?}",
1243                    start.elapsed()
1244                );
1245                break;
1246            }
1247
1248            match rx.try_recv() {
1249                Ok(Ok(())) => {
1250                    let data = buffer_slice.get_mapped_range();
1251
1252                    // Extract pixels with alignment handling
1253                    let bytes_per_pixel = 4u32;
1254                    let actual_row_bytes = (width * bytes_per_pixel) as usize;
1255                    let padded_row_bytes = padded_bytes_per_row as usize;
1256
1257                    let mut rgba = Vec::with_capacity((width * height * 4) as usize);
1258                    for y in 0..height as usize {
1259                        let row_start = y * padded_row_bytes;
1260                        rgba.extend_from_slice(&data[row_start..row_start + actual_row_bytes]);
1261                    }
1262
1263                    drop(data);
1264                    buffer.unmap();
1265
1266                    if let Ok(mut guard) = shared.lock() {
1267                        *guard = Some((rgba, width, height));
1268                    }
1269                    break;
1270                }
1271                Ok(Err(e)) => {
1272                    eprintln!("Failed to map image buffer: {:?}", e);
1273                    break;
1274                }
1275                Err(std::sync::mpsc::TryRecvError::Empty) => {
1276                    // Keep polling
1277                    std::thread::sleep(std::time::Duration::from_millis(1));
1278                }
1279                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
1280                    eprintln!("Image buffer mapping channel disconnected");
1281                    break;
1282                }
1283            }
1284        }
1285
1286        if trace {
1287            eprintln!(
1288                "[render_trace][sys] collect_image_captures mapping_wait poll_iters={} ms={:.3}",
1289                poll_iters,
1290                start.elapsed().as_secs_f64() * 1000.0
1291            );
1292        }
1293    }
1294
1295    if let Some(t0) = t_sys {
1296        eprintln!(
1297            "[render_trace][sys] collect_image_captures done pending={} ms={:.3}",
1298            pending_count,
1299            t0.elapsed().as_secs_f64() * 1000.0
1300        );
1301    }
1302}
1303
1304/// Plugin for headless image copy
1305struct ImageCopyPlugin {
1306    shared_rgba: SharedRgbaBuffer,
1307}
1308
1309impl Plugin for ImageCopyPlugin {
1310    fn build(&self, app: &mut App) {
1311        use bevy::render::render_graph::RenderGraph;
1312
1313        app.insert_resource(self.shared_rgba.clone());
1314
1315        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
1316            return;
1317        };
1318
1319        render_app.insert_resource(self.shared_rgba.clone());
1320        render_app.init_resource::<ImageCopiers>();
1321        render_app.init_resource::<PendingImageCaptureQueue>();
1322
1323        render_app.add_systems(ExtractSchedule, extract_image_copiers);
1324        render_app.add_systems(
1325            Render,
1326            collect_image_captures.in_set(RenderSystems::Cleanup),
1327        );
1328
1329        // Add image copy node to render graph (runs after camera driver)
1330        let mut graph = render_app.world_mut().resource_mut::<RenderGraph>();
1331        graph.add_node(ImageCopyLabel, ImageCopyDriver);
1332        graph.add_node_edge(bevy::render::graph::CameraDriverLabel, ImageCopyLabel);
1333    }
1334}
1335
1336// ============================================================================
1337// Render Request and Components
1338// ============================================================================
1339
1340/// Configuration passed to the Bevy app
1341#[derive(Resource, Clone)]
1342struct RenderRequest {
1343    mesh_path: String,
1344    texture_path: String,
1345    camera_transform: Transform,
1346    object_rotation: ObjectRotation,
1347    object_translation: Vec3,
1348    object_scale: Vec3,
1349    config: RenderConfig,
1350    target_point: Vec3,
1351    targeting_policy: TargetingPolicy,
1352}
1353
1354impl RenderRequest {
1355    fn target_projects_in_frame(&self) -> bool {
1356        if matches!(self.targeting_policy, TargetingPolicy::Origin) {
1357            return false;
1358        }
1359
1360        target_projects_in_frame(self.target_point, &self.camera_transform, &self.config)
1361    }
1362
1363    fn accepts_stable_empty_view(&self) -> bool {
1364        !self.target_projects_in_frame()
1365    }
1366}
1367
1368fn target_projects_in_frame(
1369    world_point: Vec3,
1370    camera_transform: &Transform,
1371    config: &RenderConfig,
1372) -> bool {
1373    let local = camera_transform.rotation.inverse() * (world_point - camera_transform.translation);
1374    let Some([x, y]) = project_camera_local(local, config) else {
1375        return false;
1376    };
1377
1378    x >= 0.0 && x < config.width as f64 && y >= 0.0 && y < config.height as f64
1379}
1380
1381fn project_camera_local(local: Vec3, config: &RenderConfig) -> Option<[f64; 2]> {
1382    if local.z >= 0.0 {
1383        return None;
1384    }
1385
1386    let depth = -local.z as f64;
1387    let intrinsics = config.intrinsics();
1388    let x = (local.x as f64 / depth) * intrinsics.focal_length[0] + intrinsics.principal_point[0];
1389    let y = (-local.y as f64 / depth) * intrinsics.focal_length[1] + intrinsics.principal_point[1];
1390    Some([x, y])
1391}
1392
1393fn reject_all_background_target_in_frame(
1394    output: &RenderOutput,
1395    config: &RenderConfig,
1396) -> Result<(), RenderError> {
1397    if matches!(output.targeting_policy, TargetingPolicy::Origin) {
1398        return Ok(());
1399    }
1400
1401    if !target_projects_in_frame(output.target_point, &output.camera_transform, config) {
1402        return Ok(());
1403    }
1404
1405    if is_all_background_depth(&output.depth, config.far_plane as f64) {
1406        return Err(RenderError::RenderFailed(format!(
1407            "all-background target-in-frame render for target_policy={} target=[{:.6},{:.6},{:.6}] image={}x{}",
1408            output.targeting_policy.label(),
1409            output.target_point.x,
1410            output.target_point.y,
1411            output.target_point.z,
1412            output.width,
1413            output.height
1414        )));
1415    }
1416
1417    Ok(())
1418}
1419
1420/// Marker for the rendered object
1421#[derive(Component)]
1422struct RenderedObject;
1423
1424/// Marker for the render camera
1425#[derive(Component)]
1426struct RenderCamera;
1427
1428/// Handle for the loaded texture
1429#[derive(Resource)]
1430struct LoadedTexture(Handle<Image>);
1431
1432/// Handle for the loaded scene
1433#[derive(Resource)]
1434struct LoadedScene(Handle<Scene>);
1435
1436/// Shared output for extracting render results
1437#[derive(Resource, Clone)]
1438struct SharedOutput(Arc<Mutex<Option<RenderOutput>>>);
1439
1440/// Handle for the render target image
1441#[derive(Resource)]
1442#[allow(dead_code)]
1443struct RenderTargetImage(Handle<Image>);
1444
1445/// Tracks progress for a homogeneous batch of viewpoints rendered in one app.
1446#[derive(Resource)]
1447struct HeadlessBatchSequence {
1448    viewpoints: Vec<Transform>,
1449    current_index: usize,
1450    outputs: Vec<RenderOutput>,
1451    warmup_frames_remaining: u32,
1452    done: bool,
1453}
1454
1455impl HeadlessBatchSequence {
1456    fn new(viewpoints: Vec<Transform>) -> Self {
1457        let capacity = viewpoints.len();
1458        Self {
1459            viewpoints,
1460            current_index: 0,
1461            outputs: Vec::with_capacity(capacity),
1462            warmup_frames_remaining: 0,
1463            done: capacity == 0,
1464        }
1465    }
1466
1467    fn current_viewpoint(&self) -> Option<Transform> {
1468        self.viewpoints.get(self.current_index).cloned()
1469    }
1470}
1471
1472/// Perform headless rendering of a YCB object.
1473///
1474/// This uses true headless GPU rendering via `RenderTarget::Image`, which does NOT
1475/// require any window surfaces. This should work on WSL2 and other environments
1476/// without display servers.
1477///
1478/// Note: Bevy's App::run() does not return cleanly. A watchdog thread monitors
1479/// for results and terminates the process once the render is complete.
1480#[allow(dead_code)]
1481pub fn render_headless(
1482    object_dir: &Path,
1483    camera_transform: &Transform,
1484    object_rotation: &ObjectRotation,
1485    object_translation: Vec3,
1486    object_scale: Vec3,
1487    config: &RenderConfig,
1488) -> Result<RenderOutput, RenderError> {
1489    render_headless_with_target(
1490        object_dir,
1491        camera_transform,
1492        object_rotation,
1493        object_translation,
1494        object_scale,
1495        config,
1496        Vec3::ZERO,
1497        TargetingPolicy::Origin,
1498    )
1499}
1500
1501#[allow(clippy::too_many_arguments)]
1502pub(crate) fn render_headless_with_target(
1503    object_dir: &Path,
1504    camera_transform: &Transform,
1505    object_rotation: &ObjectRotation,
1506    object_translation: Vec3,
1507    object_scale: Vec3,
1508    config: &RenderConfig,
1509    target_point: Vec3,
1510    targeting_policy: TargetingPolicy,
1511) -> Result<RenderOutput, RenderError> {
1512    // Canonicalize paths so Bevy's asset server can find them regardless of
1513    // caller working directory. Relative paths like "../../ycb" pass the
1514    // exists() check but Bevy resolves assets against its own root.
1515    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1516        RenderError::RenderFailed(format!(
1517            "Cannot canonicalize object directory {}: {}",
1518            object_dir.display(),
1519            e
1520        ))
1521    })?;
1522    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1523    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1524
1525    if !mesh_path.exists() {
1526        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
1527            &mesh_path,
1528        )));
1529    }
1530    if !texture_path.exists() {
1531        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
1532            &texture_path,
1533        )));
1534    }
1535
1536    let request = RenderRequest {
1537        mesh_path: fs_path_to_asset_string(&mesh_path),
1538        texture_path: fs_path_to_asset_string(&texture_path),
1539        camera_transform: *camera_transform,
1540        object_rotation: object_rotation.clone(),
1541        object_translation,
1542        object_scale,
1543        config: config.clone(),
1544        target_point,
1545        targeting_policy: targeting_policy.clone(),
1546    };
1547
1548    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
1549    let output_clone = shared_output.clone();
1550
1551    // Shared buffer for RGBA data from headless render target
1552    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1553
1554    // Shared buffer for depth readback
1555    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1556
1557    // Create a temp file path for fallback output serialization
1558    let temp_path =
1559        std::env::temp_dir().join(format!("bevy_sensor_render_{}.bin", std::process::id()));
1560
1561    // Spawn watchdog thread that monitors for timeout (don't exit - let Bevy exit gracefully)
1562    let output_poll_for_timeout = shared_output.clone();
1563    std::thread::spawn(move || {
1564        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1565        let start = std::time::Instant::now();
1566        let poll_interval = std::time::Duration::from_millis(100);
1567
1568        loop {
1569            // Check if we have a result
1570            if let Ok(guard) = output_poll_for_timeout.0.lock() {
1571                if guard.is_some() {
1572                    // Output is ready, Bevy will exit via AppExit event
1573                    return; // Exit watchdog thread, Bevy will handle exit
1574                }
1575            }
1576
1577            if start.elapsed() > timeout {
1578                eprintln!(
1579                    "Error: Render timeout after {} seconds",
1580                    RENDER_TIMEOUT_SECS
1581                );
1582                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
1583                // Force exit on timeout (this is a failure case)
1584                std::process::exit(1);
1585            }
1586
1587            std::thread::sleep(poll_interval);
1588        }
1589    });
1590
1591    // Run Bevy app with HEADLESS configuration (no window surfaces!)
1592    // Uses ScheduleRunnerPlugin instead of WinitPlugin
1593    build_headless_app(request, output_clone, shared_rgba, shared_depth).run();
1594
1595    // App::run() returned - check shared_output for result
1596    if let Ok(guard) = shared_output.0.lock() {
1597        if let Some(output) = guard.as_ref() {
1598            reject_all_background_target_in_frame(output, config)?;
1599            return Ok(output.clone());
1600        }
1601    }
1602
1603    // Fallback: try to read from temp file (for legacy compatibility)
1604    if temp_path.exists() {
1605        if let Ok(output) = read_output_from_file(&temp_path) {
1606            let _ = std::fs::remove_file(&temp_path);
1607            let output = output.with_targeting(target_point, targeting_policy);
1608            reject_all_background_target_in_frame(&output, config)?;
1609            return Ok(output);
1610        }
1611    }
1612
1613    Err(RenderError::RenderFailed(
1614        "Render did not complete".to_string(),
1615    ))
1616}
1617
1618/// Render a homogeneous sequence of viewpoints in a single headless Bevy app.
1619///
1620/// All captures share the same object, object rotation, and render configuration.
1621/// This is the fast path used by the batch API for episode-style workloads.
1622#[allow(clippy::too_many_arguments)]
1623pub fn render_headless_sequence(
1624    object_dir: &Path,
1625    viewpoints: &[Transform],
1626    object_rotation: &ObjectRotation,
1627    object_translation: Vec3,
1628    object_scale: Vec3,
1629    config: &RenderConfig,
1630    target_point: Vec3,
1631    targeting_policy: TargetingPolicy,
1632) -> Result<Vec<RenderOutput>, RenderError> {
1633    if viewpoints.is_empty() {
1634        return Ok(Vec::new());
1635    }
1636
1637    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1638        RenderError::RenderFailed(format!(
1639            "Cannot canonicalize object directory {}: {}",
1640            object_dir.display(),
1641            e
1642        ))
1643    })?;
1644    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1645    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1646
1647    if !mesh_path.exists() {
1648        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
1649            &mesh_path,
1650        )));
1651    }
1652    if !texture_path.exists() {
1653        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
1654            &texture_path,
1655        )));
1656    }
1657
1658    let request = RenderRequest {
1659        mesh_path: fs_path_to_asset_string(&mesh_path),
1660        texture_path: fs_path_to_asset_string(&texture_path),
1661        camera_transform: viewpoints[0],
1662        object_rotation: object_rotation.clone(),
1663        object_translation,
1664        object_scale,
1665        config: config.clone(),
1666        target_point,
1667        targeting_policy,
1668    };
1669
1670    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1671    let rgba_clone = shared_rgba.clone();
1672
1673    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1674    let depth_clone = shared_depth.clone();
1675
1676    let mut app = App::new();
1677    app.add_plugins(
1678        DefaultPlugins
1679            .set(bevy::asset::AssetPlugin {
1680                // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
1681                // default (UnapprovedPathMode::Forbid → load() silently returns a
1682                // default handle). YCB meshes load from absolute paths, so allow them.
1683                unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
1684                ..default()
1685            })
1686            .set(WindowPlugin {
1687                primary_window: None,
1688                exit_condition: ExitCondition::DontExit,
1689                ..default()
1690            })
1691            .disable::<bevy::winit::WinitPlugin>()
1692            .disable::<LogPlugin>()
1693            .disable::<TerminalCtrlCHandlerPlugin>(),
1694    )
1695    .add_plugins(ObjPlugin)
1696    // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
1697    // Scene spawning panics unless those component types are registered. The
1698    // minimal headless plugin set doesn't register them, so do it explicitly.
1699    .register_type::<Mesh3d>()
1700    .register_type::<MeshMaterial3d<StandardMaterial>>()
1701    .register_type::<bevy::prelude::Transform>()
1702    .register_type::<bevy::prelude::GlobalTransform>()
1703    .register_type::<bevy::transform::components::TransformTreeChanged>()
1704    .register_type::<bevy::prelude::Visibility>()
1705    .register_type::<bevy::prelude::InheritedVisibility>()
1706    .register_type::<bevy::prelude::ViewVisibility>()
1707    .add_plugins(ImageCopyPlugin {
1708        shared_rgba: rgba_clone,
1709    })
1710    .add_plugins(DepthReadbackPlugin {
1711        shared_depth: depth_clone,
1712        near: config.near_plane,
1713        far: config.far_plane,
1714    })
1715    .insert_resource(request)
1716    .insert_resource(shared_rgba)
1717    .insert_resource(HeadlessBatchSequence::new(viewpoints.to_vec()))
1718    .init_resource::<RenderState>()
1719    .add_systems(Startup, setup_headless_scene)
1720    .add_systems(
1721        Update,
1722        (
1723            check_assets_loaded,
1724            apply_materials,
1725            tick_headless_batch_warmup,
1726            request_headless_capture,
1727            check_headless_capture_ready,
1728            extract_and_continue_headless_batch,
1729        )
1730            .chain(),
1731    );
1732
1733    // Manual app.update() loops do not run plugin finish/cleanup hooks automatically.
1734    // Bevy's screenshot plugin inserts CapturedScreenshots during finish(), so run the
1735    // normal startup phases before driving the headless batch loop ourselves.
1736    let trace_outer = render_trace_enabled();
1737    let t_finish = std::time::Instant::now();
1738    app.finish();
1739    let finish_ms = t_finish.elapsed().as_secs_f64() * 1000.0;
1740    let t_cleanup = std::time::Instant::now();
1741    app.cleanup();
1742    let cleanup_ms = t_cleanup.elapsed().as_secs_f64() * 1000.0;
1743    if trace_outer {
1744        eprintln!(
1745            "[render_trace][coldinit] app.finish ms={:.3} app.cleanup ms={:.3}",
1746            finish_ms, cleanup_ms
1747        );
1748    }
1749
1750    let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1751    let start = std::time::Instant::now();
1752
1753    let trace = std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok();
1754    let mut update_idx: u32 = 0;
1755    let mut last_completed_outputs: usize = 0;
1756    let mut viewpoint_start = std::time::Instant::now();
1757
1758    loop {
1759        if start.elapsed() > timeout {
1760            return Err(RenderError::RenderTimeout {
1761                duration_secs: RENDER_TIMEOUT_SECS,
1762            });
1763        }
1764
1765        let update_start = std::time::Instant::now();
1766        app.update();
1767        let update_elapsed_ms = update_start.elapsed().as_secs_f64() * 1000.0;
1768
1769        if trace {
1770            let batch = app.world().resource::<HeadlessBatchSequence>();
1771            let warmup = batch.warmup_frames_remaining;
1772            let current = batch.current_index;
1773            let completed = batch.outputs.len();
1774            let vp_ms = viewpoint_start.elapsed().as_secs_f64() * 1000.0;
1775            eprintln!(
1776                "[render_trace] update={update_idx} vp={current} warmup={warmup} \
1777                 completed={completed} update_ms={update_elapsed_ms:.2} vp_ms={vp_ms:.2}"
1778            );
1779            if completed > last_completed_outputs {
1780                eprintln!(
1781                    "[render_trace] viewpoint {} finished in {:.2} ms",
1782                    completed - 1,
1783                    vp_ms
1784                );
1785                last_completed_outputs = completed;
1786                viewpoint_start = std::time::Instant::now();
1787            }
1788        }
1789
1790        update_idx += 1;
1791
1792        if app.world().resource::<HeadlessBatchSequence>().done {
1793            break;
1794        }
1795    }
1796
1797    if trace {
1798        eprintln!(
1799            "[render_trace] total_wall_ms={:.2} updates={update_idx} viewpoints={}",
1800            start.elapsed().as_secs_f64() * 1000.0,
1801            viewpoints.len()
1802        );
1803    }
1804
1805    let mut batch = app.world_mut().resource_mut::<HeadlessBatchSequence>();
1806    if batch.outputs.len() != viewpoints.len() {
1807        return Err(RenderError::RenderFailed(format!(
1808            "Batch render produced {} outputs for {} viewpoints",
1809            batch.outputs.len(),
1810            viewpoints.len()
1811        )));
1812    }
1813
1814    for output in &batch.outputs {
1815        reject_all_background_target_in_frame(output, config)?;
1816    }
1817
1818    Ok(std::mem::take(&mut batch.outputs))
1819}
1820
1821/// Assemble the shared single-render headless Bevy app.
1822fn build_headless_app(
1823    request: RenderRequest,
1824    shared_output: SharedOutput,
1825    shared_rgba: SharedRgbaBuffer,
1826    shared_depth: SharedDepthBuffer,
1827) -> App {
1828    let near = request.config.near_plane;
1829    let far = request.config.far_plane;
1830
1831    let mut app = App::new();
1832    app.add_plugins(
1833        DefaultPlugins
1834            .set(bevy::asset::AssetPlugin {
1835                // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
1836                // default (UnapprovedPathMode::Forbid → load() silently returns a
1837                // default handle). YCB meshes load from absolute paths, so allow them.
1838                unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
1839                ..default()
1840            })
1841            .set(WindowPlugin {
1842                primary_window: None,
1843                exit_condition: ExitCondition::DontExit,
1844                ..default()
1845            })
1846            .disable::<bevy::winit::WinitPlugin>()
1847            .disable::<LogPlugin>()
1848            .disable::<TerminalCtrlCHandlerPlugin>(),
1849    )
1850    .add_plugins(ScheduleRunnerPlugin::run_loop(Duration::from_secs_f64(
1851        1.0 / 60.0,
1852    )))
1853    .add_plugins(ObjPlugin)
1854    // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
1855    // Scene spawning panics unless those component types are registered. The
1856    // minimal headless plugin set doesn't register them, so do it explicitly.
1857    .register_type::<Mesh3d>()
1858    .register_type::<MeshMaterial3d<StandardMaterial>>()
1859    .register_type::<bevy::prelude::Transform>()
1860    .register_type::<bevy::prelude::GlobalTransform>()
1861    .register_type::<bevy::transform::components::TransformTreeChanged>()
1862    .register_type::<bevy::prelude::Visibility>()
1863    .register_type::<bevy::prelude::InheritedVisibility>()
1864    .register_type::<bevy::prelude::ViewVisibility>()
1865    .add_plugins(ImageCopyPlugin {
1866        shared_rgba: shared_rgba.clone(),
1867    })
1868    .add_plugins(DepthReadbackPlugin {
1869        shared_depth,
1870        near,
1871        far,
1872    })
1873    .insert_resource(request)
1874    .insert_resource(shared_output)
1875    .insert_resource(shared_rgba)
1876    .init_resource::<RenderState>()
1877    .add_systems(Startup, setup_headless_scene)
1878    .add_systems(
1879        Update,
1880        (
1881            check_assets_loaded,
1882            apply_materials,
1883            request_headless_capture,
1884            check_headless_capture_ready,
1885            extract_and_exit_headless,
1886        )
1887            .chain(),
1888    );
1889    app
1890}
1891
1892/// Serialize RenderOutput to bytes for IPC (used by subprocess mode)
1893#[allow(dead_code)]
1894fn serialize_output(output: &RenderOutput) -> Vec<u8> {
1895    let mut data = Vec::new();
1896
1897    // Header: width, height, rgba_len, depth_len
1898    data.extend_from_slice(&output.width.to_le_bytes());
1899    data.extend_from_slice(&output.height.to_le_bytes());
1900    data.extend_from_slice(&(output.rgba.len() as u32).to_le_bytes());
1901    data.extend_from_slice(&(output.depth.len() as u32).to_le_bytes());
1902
1903    // RGBA data
1904    data.extend_from_slice(&output.rgba);
1905
1906    // Depth data (as f64 bytes for TBP precision)
1907    for d in &output.depth {
1908        data.extend_from_slice(&d.to_le_bytes());
1909    }
1910
1911    // Intrinsics (f64 for TBP precision)
1912    data.extend_from_slice(&output.intrinsics.focal_length[0].to_le_bytes());
1913    data.extend_from_slice(&output.intrinsics.focal_length[1].to_le_bytes());
1914    data.extend_from_slice(&output.intrinsics.principal_point[0].to_le_bytes());
1915    data.extend_from_slice(&output.intrinsics.principal_point[1].to_le_bytes());
1916    data.extend_from_slice(&output.intrinsics.image_size[0].to_le_bytes());
1917    data.extend_from_slice(&output.intrinsics.image_size[1].to_le_bytes());
1918
1919    // Camera transform (translation + rotation quaternion)
1920    let t = output.camera_transform.translation;
1921    let r = output.camera_transform.rotation;
1922    data.extend_from_slice(&t.x.to_le_bytes());
1923    data.extend_from_slice(&t.y.to_le_bytes());
1924    data.extend_from_slice(&t.z.to_le_bytes());
1925    data.extend_from_slice(&r.x.to_le_bytes());
1926    data.extend_from_slice(&r.y.to_le_bytes());
1927    data.extend_from_slice(&r.z.to_le_bytes());
1928    data.extend_from_slice(&r.w.to_le_bytes());
1929
1930    // Object rotation (f64)
1931    let or = &output.object_rotation;
1932    data.extend_from_slice(&or.pitch.to_le_bytes());
1933    data.extend_from_slice(&or.yaw.to_le_bytes());
1934    data.extend_from_slice(&or.roll.to_le_bytes());
1935
1936    // Object translation + scale (f32 for Bevy compatibility)
1937    let ot = output.object_translation;
1938    let os = output.object_scale;
1939    data.extend_from_slice(&ot.x.to_le_bytes());
1940    data.extend_from_slice(&ot.y.to_le_bytes());
1941    data.extend_from_slice(&ot.z.to_le_bytes());
1942    data.extend_from_slice(&os.x.to_le_bytes());
1943    data.extend_from_slice(&os.y.to_le_bytes());
1944    data.extend_from_slice(&os.z.to_le_bytes());
1945
1946    data
1947}
1948
1949/// Read RenderOutput from serialized file
1950fn read_output_from_file(path: &std::path::Path) -> Result<RenderOutput, RenderError> {
1951    let mut file = File::open(path).map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1952    let mut data = Vec::new();
1953    file.read_to_end(&mut data)
1954        .map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1955
1956    let mut cursor = 0;
1957
1958    let read_u32 = |data: &[u8], cursor: &mut usize| -> u32 {
1959        let val = u32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1960        *cursor += 4;
1961        val
1962    };
1963
1964    let read_f32 = |data: &[u8], cursor: &mut usize| -> f32 {
1965        let val = f32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1966        *cursor += 4;
1967        val
1968    };
1969
1970    let read_f64 = |data: &[u8], cursor: &mut usize| -> f64 {
1971        let val = f64::from_le_bytes(data[*cursor..*cursor + 8].try_into().unwrap());
1972        *cursor += 8;
1973        val
1974    };
1975
1976    let width = read_u32(&data, &mut cursor);
1977    let height = read_u32(&data, &mut cursor);
1978    let rgba_len = read_u32(&data, &mut cursor) as usize;
1979    let depth_len = read_u32(&data, &mut cursor) as usize;
1980
1981    let rgba = data[cursor..cursor + rgba_len].to_vec();
1982    cursor += rgba_len;
1983
1984    // Depth data (f64 for TBP precision)
1985    let mut depth = Vec::with_capacity(depth_len);
1986    for _ in 0..depth_len {
1987        depth.push(read_f64(&data, &mut cursor));
1988    }
1989
1990    // Intrinsics (f64 for TBP precision)
1991    let focal_length = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1992    let principal_point = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1993    let image_size = [read_u32(&data, &mut cursor), read_u32(&data, &mut cursor)];
1994
1995    // Camera transform (f32 for Bevy compatibility)
1996    let tx = read_f32(&data, &mut cursor);
1997    let ty = read_f32(&data, &mut cursor);
1998    let tz = read_f32(&data, &mut cursor);
1999    let rx = read_f32(&data, &mut cursor);
2000    let ry = read_f32(&data, &mut cursor);
2001    let rz = read_f32(&data, &mut cursor);
2002    let rw = read_f32(&data, &mut cursor);
2003
2004    // Object rotation (f64)
2005    let pitch = read_f64(&data, &mut cursor);
2006    let yaw = read_f64(&data, &mut cursor);
2007    let roll = read_f64(&data, &mut cursor);
2008
2009    let (object_translation, object_scale) = if cursor + 24 <= data.len() {
2010        let tx = read_f32(&data, &mut cursor);
2011        let ty = read_f32(&data, &mut cursor);
2012        let tz = read_f32(&data, &mut cursor);
2013        let sx = read_f32(&data, &mut cursor);
2014        let sy = read_f32(&data, &mut cursor);
2015        let sz = read_f32(&data, &mut cursor);
2016        (Vec3::new(tx, ty, tz), Vec3::new(sx, sy, sz))
2017    } else {
2018        (Vec3::ZERO, Vec3::ONE)
2019    };
2020
2021    Ok(RenderOutput {
2022        rgba,
2023        depth,
2024        width,
2025        height,
2026        intrinsics: crate::CameraIntrinsics {
2027            focal_length,
2028            principal_point,
2029            image_size,
2030        },
2031        camera_transform: Transform {
2032            translation: Vec3::new(tx, ty, tz),
2033            rotation: Quat::from_xyzw(rx, ry, rz, rw),
2034            scale: Vec3::ONE,
2035        },
2036        object_rotation: ObjectRotation { pitch, yaw, roll },
2037        object_translation,
2038        object_scale,
2039        target_point: Vec3::ZERO,
2040        targeting_policy: TargetingPolicy::Origin,
2041    })
2042}
2043
2044/// Setup the scene with camera, lighting, and object
2045#[allow(dead_code)]
2046fn setup_scene(
2047    mut commands: Commands,
2048    asset_server: Res<AssetServer>,
2049    request: Res<RenderRequest>,
2050    mut _materials: ResMut<Assets<StandardMaterial>>,
2051) {
2052    // Camera with depth prepass (Bevy 0.15+ uses Camera3d component)
2053    // Disable MSAA for depth readback compatibility (can't copy from multisampled texture)
2054    // Apply FOV from RenderConfig so the projection matches TBP's camera intrinsics.
2055    commands.spawn((
2056        Camera3d::default(),
2057        Camera::default(),
2058        Hdr,
2059        render_projection(&request.config),
2060        Msaa::Off,
2061        request.camera_transform,
2062        Tonemapping::None, // Accurate colors for software rendering
2063        DepthPrepass,
2064        NormalPrepass,
2065        RenderCamera,
2066    ));
2067
2068    // Ambient light (from config). In Bevy 0.18 the global ambient light is the
2069    // `GlobalAmbientLight` resource (the `AmbientLight` type became a per-camera component).
2070    let lighting = &request.config.lighting;
2071    commands.insert_resource(GlobalAmbientLight {
2072        color: Color::WHITE,
2073        brightness: lighting.ambient_brightness,
2074        ..default()
2075    });
2076
2077    // Key light (from config) - Bevy 0.15+ uses PointLight component directly
2078    if lighting.key_light_intensity > 0.0 {
2079        commands.spawn((
2080            PointLight {
2081                intensity: lighting.key_light_intensity,
2082                shadows_enabled: lighting.shadows_enabled,
2083                ..default()
2084            },
2085            Transform::from_xyz(
2086                lighting.key_light_position[0],
2087                lighting.key_light_position[1],
2088                lighting.key_light_position[2],
2089            ),
2090        ));
2091    }
2092
2093    // Fill light (from config)
2094    if lighting.fill_light_intensity > 0.0 {
2095        commands.spawn((
2096            PointLight {
2097                intensity: lighting.fill_light_intensity,
2098                shadows_enabled: lighting.shadows_enabled,
2099                ..default()
2100            },
2101            Transform::from_xyz(
2102                lighting.fill_light_position[0],
2103                lighting.fill_light_position[1],
2104                lighting.fill_light_position[2],
2105            ),
2106        ));
2107    }
2108
2109    // Load the scene
2110    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
2111    commands.insert_resource(LoadedScene(scene_handle.clone()));
2112
2113    // Load the texture
2114    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
2115    commands.insert_resource(LoadedTexture(texture_handle.clone()));
2116
2117    // Create material with texture (will be applied later)
2118    let _material = _materials.add(StandardMaterial {
2119        base_color_texture: Some(texture_handle),
2120        unlit: true,
2121        ..default()
2122    });
2123
2124    // Spawn the scene with the requested object transform (Bevy 0.15+ uses SceneRoot)
2125    commands.spawn((
2126        SceneRoot(scene_handle),
2127        request
2128            .object_rotation
2129            .to_transform_with_translation_scale(request.object_translation, request.object_scale),
2130        RenderedObject,
2131    ));
2132
2133    println!("Scene setup complete");
2134}
2135
2136/// Check if assets are loaded
2137fn check_assets_loaded(
2138    mut state: ResMut<RenderState>,
2139    asset_server: Res<AssetServer>,
2140    scene: Option<Res<LoadedScene>>,
2141    texture: Option<Res<LoadedTexture>>,
2142) {
2143    let trace = render_trace_enabled();
2144    let was_scene_loaded = state.scene_loaded;
2145    let was_texture_loaded = state.texture_loaded;
2146
2147    state.frame_count += 1;
2148
2149    if state.scene_loaded && state.texture_loaded {
2150        return;
2151    }
2152
2153    if let Some(scene) = scene {
2154        match asset_server.get_load_state(&scene.0) {
2155            Some(LoadState::Loaded) => {
2156                state.scene_loaded = true;
2157            }
2158            Some(LoadState::Failed(_)) => {}
2159            _ => {}
2160        }
2161    }
2162
2163    if let Some(texture) = texture {
2164        match asset_server.get_load_state(&texture.0) {
2165            Some(LoadState::Loaded) => {
2166                state.texture_loaded = true;
2167            }
2168            Some(LoadState::Failed(_)) => {}
2169            _ => {}
2170        }
2171    }
2172
2173    if trace {
2174        if !was_scene_loaded && state.scene_loaded {
2175            eprintln!(
2176                "[render_trace][coldinit] scene_loaded frame_count={}",
2177                state.frame_count
2178            );
2179        }
2180        if !was_texture_loaded && state.texture_loaded {
2181            eprintln!(
2182                "[render_trace][coldinit] texture_loaded frame_count={}",
2183                state.frame_count
2184            );
2185        }
2186    }
2187}
2188
2189/// Apply materials to loaded meshes
2190fn apply_materials(
2191    mut state: ResMut<RenderState>,
2192    texture: Option<Res<LoadedTexture>>,
2193    mut materials: ResMut<Assets<StandardMaterial>>,
2194    // Bevy 0.15+: Use MeshMaterial3d instead of Handle<StandardMaterial>
2195    mut mesh_query: Query<&mut MeshMaterial3d<StandardMaterial>, With<Mesh3d>>,
2196) {
2197    // NOTE: we intentionally do NOT wait for `texture_loaded` before applying the
2198    // material. The texture *handle* is valid immediately, so applying the material
2199    // as soon as the mesh entities exist lets the main-pass `StandardMaterial`
2200    // pipeline start compiling during the long async texture load. A late material
2201    // swap (after texture load) would reset the pipeline and capture a blank color
2202    // frame before it recompiled — the root cause of the 0.18 blank renders.
2203    if !state.scene_loaded || state.capture_ready {
2204        return;
2205    }
2206
2207    state.frame_count += 1;
2208
2209    let Some(tex) = texture else { return };
2210
2211    if !state.materials_applied {
2212        // The scene hierarchy is instantiated asynchronously after the asset
2213        // load event fires; wait until mesh entities exist before applying.
2214        if mesh_query.is_empty() {
2215            return;
2216        }
2217
2218        let textured_material = materials.add(StandardMaterial {
2219            base_color_texture: Some(tex.0.clone()),
2220            unlit: true,
2221            ..default()
2222        });
2223
2224        for mut mat in mesh_query.iter_mut() {
2225            mat.0 = textured_material.clone();
2226        }
2227
2228        state.materials_applied = true;
2229        state.materials_applied_frame = state.frame_count;
2230    }
2231
2232    // Record the frame the texture finished loading (once).
2233    if state.texture_loaded && state.texture_ready_frame == 0 {
2234        state.texture_ready_frame = state.frame_count;
2235    }
2236
2237    // Capture once the texture pixels are loaded (+ a small margin for GPU image
2238    // preparation) AND the main-pass pipeline has had time to compile since the
2239    // material was applied. Because the material is applied early, the pipeline is
2240    // almost always ready well before the texture, so this resolves to a few frames
2241    // after the texture loads — deterministic and fast (no 60/120-frame cushion).
2242    let texture_ready =
2243        state.texture_ready_frame != 0 && state.frame_count >= state.texture_ready_frame + 6;
2244    let pipeline_ready = state.frame_count >= state.materials_applied_frame + 6;
2245    if texture_ready && pipeline_ready {
2246        let was_ready = state.capture_ready;
2247        state.capture_ready = true;
2248        if render_trace_enabled() && !was_ready {
2249            eprintln!(
2250                "[render_trace][coldinit] capture_ready frame_count={}",
2251                state.frame_count
2252            );
2253        }
2254    }
2255}
2256
2257/// Request a screenshot capture (Bevy 0.15+ uses Screenshot entity + observer)
2258#[allow(dead_code)]
2259fn request_screenshot(
2260    mut commands: Commands,
2261    mut state: ResMut<RenderState>,
2262    shared_image: Res<SharedImageBuffer>,
2263    mut depth_request: ResMut<DepthCaptureRequest>,
2264) {
2265    if !state.capture_ready || state.screenshot_requested {
2266        return;
2267    }
2268
2269    // Clone the Arc for the observer closure
2270    let image_buffer = shared_image.0.clone();
2271
2272    // Also request depth capture
2273    depth_request.requested = true;
2274    println!("Depth capture requested");
2275
2276    // Spawn Screenshot entity with observer (Bevy 0.15+ API)
2277    println!("Requesting screenshot via Screenshot entity");
2278    commands
2279        .spawn(Screenshot::primary_window())
2280        .observe(move |trigger: On<ScreenshotCaptured>| {
2281            // ScreenshotCaptured derefs to Image
2282            let image: &Image = trigger.event();
2283
2284            // Get dimensions
2285            let width = image.texture_descriptor.size.width;
2286            let height = image.texture_descriptor.size.height;
2287
2288            // Bevy 0.18: Image.data is now Option<Vec<u8>>; skip if absent.
2289            let Some(rgba_data) = image.data.clone() else {
2290                return;
2291            };
2292
2293            // Store in shared buffer
2294            if let Ok(mut guard) = image_buffer.lock() {
2295                *guard = Some((rgba_data, width, height));
2296            }
2297        });
2298
2299    state.screenshot_requested = true;
2300    println!("Screenshot requested");
2301}
2302
2303/// Check if screenshot callback has completed
2304#[allow(dead_code)]
2305fn check_screenshot_ready(
2306    mut state: ResMut<RenderState>,
2307    shared_image: Res<SharedImageBuffer>,
2308    shared_depth: Res<SharedDepthBuffer>,
2309    request: Res<RenderRequest>,
2310) {
2311    if !state.screenshot_requested || state.captured {
2312        return;
2313    }
2314
2315    // Increment frame count while waiting for capture
2316    state.frame_count += 1;
2317
2318    // Check if RGBA callback has written data
2319    let rgba_ready = if let Ok(guard) = shared_image.0.lock() {
2320        if let Some((rgba_data, width, height)) = guard.as_ref() {
2321            if state.rgba_data.is_none() {
2322                state.rgba_data = Some(rgba_data.clone());
2323                state.image_width = *width;
2324                state.image_height = *height;
2325            }
2326            true
2327        } else {
2328            false
2329        }
2330    } else {
2331        false
2332    };
2333
2334    // Check if depth readback has completed
2335    let depth_ready = if let Ok(guard) = shared_depth.0.lock() {
2336        if let Some((depth_data, _width, _height)) = guard.as_ref() {
2337            if state.depth_data.is_none() {
2338                state.depth_data = Some(depth_data.clone());
2339            }
2340            true
2341        } else {
2342            false
2343        }
2344    } else {
2345        false
2346    };
2347
2348    // If depth readback failed or is taking too long, fall back to placeholder.
2349    // As in check_headless_capture_ready, this uniform plane is a DEGRADED render
2350    // (flat depth, no real geometry) that must be loud — it silently masked the
2351    // #92 depth regression. (This fn is currently dead code; kept loud in case it
2352    // is ever revived.)
2353    if rgba_ready && !depth_ready && state.frame_count > 60 {
2354        let camera_dist = request.camera_transform.translation.length() as f64;
2355        let pixel_count = (state.image_width * state.image_height) as usize;
2356        eprintln!(
2357            "[bevy-sensor][WARN] depth readback produced no valid frame; falling back to a \
2358             UNIFORM {:.4} m camera-distance plane (degraded render, no real 3D geometry). \
2359             Indicates a depth-readback regression.",
2360            camera_dist
2361        );
2362        state.depth_data = Some(vec![camera_dist; pixel_count]);
2363    }
2364
2365    // Mark as captured when both RGBA and depth are ready
2366    if state.rgba_data.is_some() && state.depth_data.is_some() {
2367        state.captured = true;
2368    }
2369}
2370
2371/// Extract results and exit
2372#[allow(dead_code)]
2373fn extract_and_exit(
2374    mut state: ResMut<RenderState>,
2375    request: Res<RenderRequest>,
2376    shared_output: Res<SharedOutput>,
2377    mut commands: Commands,
2378    windows: Query<Entity, With<bevy::window::Window>>,
2379) {
2380    // Handle delayed exit after closing window
2381    if state.exit_requested {
2382        state.exit_frame_count += 1;
2383        // After a few frames with no window, Bevy should exit
2384        return;
2385    }
2386
2387    if !state.captured {
2388        return;
2389    }
2390
2391    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2392        // Use actual captured dimensions (may differ from config if window was resized)
2393        let width = state.image_width;
2394        let height = state.image_height;
2395
2396        // Compute intrinsics from the same TBP zoom formula as the camera projection.
2397        let intrinsics = request.config.intrinsics_for_size(width, height);
2398
2399        let output = RenderOutput {
2400            rgba: rgba.clone(),
2401            depth: depth.clone(),
2402            width,
2403            height,
2404            intrinsics,
2405            camera_transform: request.camera_transform,
2406            object_rotation: request.object_rotation.clone(),
2407            object_translation: request.object_translation,
2408            object_scale: request.object_scale,
2409            target_point: request.target_point,
2410            targeting_policy: request.targeting_policy.clone(),
2411        };
2412
2413        if let Ok(mut guard) = shared_output.0.lock() {
2414            *guard = Some(output);
2415            drop(guard); // Release lock immediately
2416
2417            // Small delay to allow watchdog to detect output before window close
2418            std::thread::sleep(std::time::Duration::from_millis(200));
2419        }
2420
2421        // Close all windows to trigger app exit
2422        // eprintln!("Closing windows to trigger exit...");
2423        for window_entity in windows.iter() {
2424            commands.entity(window_entity).despawn();
2425        }
2426        state.exit_requested = true;
2427    }
2428}
2429
2430// ============================================================================
2431// Headless Rendering Systems (no window surfaces)
2432// ============================================================================
2433
2434/// Setup the scene for headless rendering with RenderTarget::Image
2435fn setup_headless_scene(
2436    mut commands: Commands,
2437    mut images: ResMut<Assets<Image>>,
2438    asset_server: Res<AssetServer>,
2439    request: Res<RenderRequest>,
2440    mut _materials: ResMut<Assets<StandardMaterial>>,
2441) {
2442    let trace = render_trace_enabled();
2443    let t0 = trace.then(std::time::Instant::now);
2444
2445    #[cfg(test)]
2446    HEADLESS_SCENE_SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
2447
2448    let width = request.config.width;
2449    let height = request.config.height;
2450
2451    // Create render target image with proper texture usages
2452    let size = Extent3d {
2453        width,
2454        height,
2455        depth_or_array_layers: 1,
2456    };
2457
2458    let mut render_target_image = Image::new_fill(
2459        size,
2460        TextureDimension::D2,
2461        &[0, 0, 0, 255], // Initialize with opaque black
2462        TextureFormat::Rgba8UnormSrgb,
2463        RenderAssetUsages::default(),
2464    );
2465
2466    // Add required texture usages for headless rendering
2467    render_target_image.texture_descriptor.usage =
2468        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
2469
2470    let render_target_handle = images.add(render_target_image);
2471
2472    // Store handle for later access
2473    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
2474
2475    // Camera rendering to the image texture (NO window!)
2476    commands.spawn((
2477        Camera3d::default(),
2478        Camera::default(),
2479        Hdr,
2480        // In Bevy 0.18 the render target is a separate `RenderTarget` component,
2481        // and `RenderTarget::Image` wraps an `ImageRenderTarget` (via `From<Handle<Image>>`).
2482        RenderTarget::Image(render_target_handle.clone().into()),
2483        render_projection(&request.config),
2484        Msaa::Off,
2485        request.camera_transform,
2486        Tonemapping::None,
2487        DepthPrepass,
2488        NormalPrepass,
2489        RenderCamera,
2490        // Add ImageCopier to trigger RGBA extraction
2491        ImageCopier {
2492            src_image: render_target_handle,
2493            enabled: false, // Will enable when ready to capture
2494        },
2495    ));
2496
2497    // Ambient light (global resource in Bevy 0.18).
2498    let lighting = &request.config.lighting;
2499    commands.insert_resource(GlobalAmbientLight {
2500        color: Color::WHITE,
2501        brightness: lighting.ambient_brightness,
2502        ..default()
2503    });
2504
2505    // Key light
2506    if lighting.key_light_intensity > 0.0 {
2507        commands.spawn((
2508            PointLight {
2509                intensity: lighting.key_light_intensity,
2510                shadows_enabled: lighting.shadows_enabled,
2511                ..default()
2512            },
2513            Transform::from_xyz(
2514                lighting.key_light_position[0],
2515                lighting.key_light_position[1],
2516                lighting.key_light_position[2],
2517            ),
2518        ));
2519    }
2520
2521    // Fill light
2522    if lighting.fill_light_intensity > 0.0 {
2523        commands.spawn((
2524            PointLight {
2525                intensity: lighting.fill_light_intensity,
2526                shadows_enabled: lighting.shadows_enabled,
2527                ..default()
2528            },
2529            Transform::from_xyz(
2530                lighting.fill_light_position[0],
2531                lighting.fill_light_position[1],
2532                lighting.fill_light_position[2],
2533            ),
2534        ));
2535    }
2536
2537    // Load the scene
2538    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
2539    commands.insert_resource(LoadedScene(scene_handle.clone()));
2540
2541    // Load the texture
2542    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
2543    commands.insert_resource(LoadedTexture(texture_handle.clone()));
2544
2545    // Create material with texture
2546    let _material = _materials.add(StandardMaterial {
2547        base_color_texture: Some(texture_handle),
2548        unlit: true,
2549        ..default()
2550    });
2551
2552    // Spawn the scene with the requested object transform
2553    commands.spawn((
2554        SceneRoot(scene_handle),
2555        request
2556            .object_rotation
2557            .to_transform_with_translation_scale(request.object_translation, request.object_scale),
2558        RenderedObject,
2559    ));
2560
2561    if let Some(t0) = t0 {
2562        eprintln!(
2563            "[render_trace][startup] setup_headless_scene ms={:.3}",
2564            t0.elapsed().as_secs_f64() * 1000.0
2565        );
2566    }
2567}
2568
2569/// Request capture for headless rendering (enable ImageCopier)
2570fn request_headless_capture(
2571    mut state: ResMut<RenderState>,
2572    mut depth_request: ResMut<DepthCaptureRequest>,
2573    mut query: Query<&mut ImageCopier>,
2574    batch: Option<Res<HeadlessBatchSequence>>,
2575) {
2576    let trace = render_trace_enabled();
2577    let t0 = trace.then(std::time::Instant::now);
2578
2579    if !state.capture_ready || state.screenshot_requested {
2580        if let Some(t0) = t0 {
2581            eprintln!(
2582                "[render_trace][sys] request_headless_capture skipped(gate) ms={:.3}",
2583                t0.elapsed().as_secs_f64() * 1000.0
2584            );
2585        }
2586        return;
2587    }
2588
2589    if batch
2590        .as_ref()
2591        .is_some_and(|batch| batch.warmup_frames_remaining > 0)
2592    {
2593        if let Some(t0) = t0 {
2594            eprintln!(
2595                "[render_trace][sys] request_headless_capture skipped(warmup) ms={:.3}",
2596                t0.elapsed().as_secs_f64() * 1000.0
2597            );
2598        }
2599        return;
2600    }
2601
2602    // Enable the ImageCopier to trigger RGBA extraction
2603    for mut copier in query.iter_mut() {
2604        copier.enabled = true;
2605    }
2606
2607    // Request depth capture
2608    depth_request.requested = true;
2609
2610    state.screenshot_requested = true;
2611
2612    if let Some(t0) = t0 {
2613        eprintln!(
2614            "[render_trace][sys] request_headless_capture requested ms={:.3}",
2615            t0.elapsed().as_secs_f64() * 1000.0
2616        );
2617    }
2618}
2619
2620/// Check if headless capture has completed
2621fn check_headless_capture_ready(
2622    mut state: ResMut<RenderState>,
2623    shared_rgba: Res<SharedRgbaBuffer>,
2624    shared_depth: Res<SharedDepthBuffer>,
2625    request: Res<RenderRequest>,
2626    mut query: Query<&mut ImageCopier>,
2627) {
2628    let trace = render_trace_enabled();
2629    let t0 = trace.then(std::time::Instant::now);
2630
2631    if !state.screenshot_requested || state.captured {
2632        if let Some(t0) = t0 {
2633            eprintln!(
2634                "[render_trace][sys] check_headless_capture_ready skipped(gate) ms={:.3}",
2635                t0.elapsed().as_secs_f64() * 1000.0
2636            );
2637        }
2638        return;
2639    }
2640
2641    state.frame_count += 1;
2642    state.capture_retries += 1;
2643    // Bounded fallback so a genuinely-uniform scene (or persistent invalid
2644    // readback) still terminates instead of hanging to the watchdog.
2645    // Generous bound: slow paths (e.g. RenderSession's retained-render-world
2646    // settle after a scene swap) can take ~150 frames to produce a stable frame,
2647    // so force-accepting at 150 would grab a partial frame and break parity. Only
2648    // force as a true last resort to avoid hanging the watchdog.
2649    let force_accept = state.capture_retries > 150;
2650
2651    let near = request.config.near_plane as f64;
2652    let far = request.config.far_plane as f64;
2653
2654    // RGBA: accept the first stable non-blank frame. Uniform clear-color frames
2655    // can be pre-geometry reads from the nondeterministic one-shot capture, but
2656    // they are also legitimate off-target renders. Accept a stable blank frame
2657    // only when the depth side has already shown a stable all-background buffer.
2658    // The copier stays enabled until BOTH RGBA and depth are valid so a late/odd
2659    // depth frame can still be captured.
2660    if state.rgba_data.is_none() {
2661        let captured_rgba = shared_rgba.0.lock().ok().and_then(|mut g| g.take());
2662        if let Some((rgba_data, width, height)) = captured_rgba {
2663            let blank = is_uniform_rgba_frame(&rgba_data);
2664            let non_blank = !blank;
2665            // Stable == identical to the previous readback (render has settled).
2666            let stable = state.prev_rgba.as_deref() == Some(rgba_data.as_slice());
2667            let stable_empty_view = blank
2668                && stable
2669                && state
2670                    .prev_depth
2671                    .as_deref()
2672                    .is_some_and(|depth| is_all_background_depth(depth, far))
2673                && request.accepts_stable_empty_view();
2674            if (non_blank && stable) || stable_empty_view || force_accept {
2675                state.image_width = width;
2676                state.image_height = height;
2677                state.rgba_data = Some(rgba_data);
2678                state.prev_rgba = None;
2679            } else {
2680                // Not settled yet: remember this frame and re-read fresh next one.
2681                state.prev_rgba = Some(rgba_data);
2682            }
2683        }
2684    }
2685
2686    // Depth: accept the first readback that contains real foreground (the depth
2687    // readback can also miss the geometry, leaving an all-far-plane buffer).
2688    if state.depth_data.is_none() {
2689        let captured_depth = shared_depth.0.lock().ok().and_then(|mut g| g.take());
2690        if let Some((depth_data, _w, _h)) = captured_depth {
2691            // Require a real object-surface depth, not just any non-far value:
2692            // near-plane garbage (~configured near plane) is not a valid surface,
2693            // but TBP surface policies legitimately work close to the object
2694            // (~0.025m with the default 0.01m near plane), so do not use a broad
2695            // absolute floor like 0.1m here.
2696            let has_foreground = depth_data
2697                .iter()
2698                .any(|&depth| is_capture_foreground_depth(depth, near, far));
2699            // Settled == identical to the previous depth readback.
2700            let stable = state.prev_depth.as_deref() == Some(depth_data.as_slice());
2701            let stable_empty_view = stable
2702                && is_all_background_depth(&depth_data, far)
2703                && state
2704                    .rgba_data
2705                    .as_deref()
2706                    .is_some_and(is_uniform_rgba_frame)
2707                && request.accepts_stable_empty_view();
2708            if (has_foreground && stable) || stable_empty_view {
2709                state.depth_data = Some(depth_data);
2710                state.prev_depth = None;
2711            } else {
2712                state.prev_depth = Some(depth_data);
2713            }
2714        }
2715    }
2716
2717    // Last-resort fallback so we never hang the watchdog: once RGBA is in hand
2718    // and we've retried a lot, fill a uniform far-plane/background placeholder.
2719    //
2720    // This is NOT a valid object render; it is an all-background depth buffer
2721    // that downstream callers should treat as no surface. It must therefore be
2722    // LOUD: a future depth-readback regression has to surface in logs/CI instead
2723    // of looking like a successful render. `tests/spatial_parity.rs` is the
2724    // geometric guard for the old fake-surface failure.
2725    if state.rgba_data.is_some() && state.depth_data.is_none() && force_accept {
2726        let pixel_count = (state.image_width * state.image_height) as usize;
2727        let depth_summary = state
2728            .prev_depth
2729            .as_deref()
2730            .map(|depth| {
2731                DepthReadbackSummary::from_depth(
2732                    depth,
2733                    request.config.near_plane as f64,
2734                    request.config.far_plane as f64,
2735                )
2736            })
2737            .map(|summary| summary.to_string())
2738            .unwrap_or_else(|| "none".to_string());
2739        let camera_translation = request.camera_transform.translation;
2740        let camera_rotation = request.camera_transform.rotation;
2741        let object_rotation = &request.object_rotation;
2742        let target_local = request.camera_transform.rotation.inverse()
2743            * (request.target_point - request.camera_transform.translation);
2744        let target_projection = project_camera_local(target_local, &request.config);
2745        let target_projection_text = target_projection
2746            .map(|[x, y]| format!("[{x:.3},{y:.3}]"))
2747            .unwrap_or_else(|| "none".to_string());
2748        let target_in_frame = request.target_projects_in_frame();
2749        eprintln!(
2750            "[bevy-sensor][WARN] depth readback produced no valid frame after {} retries; \
2751             falling back to a UNIFORM {:.4} m far-plane background. This is a degraded \
2752             render (no real 3D geometry) and indicates a depth-readback regression. \
2753             request mesh={} image={}x{} camera_t=[{:.4},{:.4},{:.4}] \
2754             camera_q_xyzw=[{:.6},{:.6},{:.6},{:.6}] object_rot_deg=[{:.3},{:.3},{:.3}] \
2755             object_t=[{:.4},{:.4},{:.4}] object_scale=[{:.4},{:.4},{:.4}] \
2756             target_policy={} target=[{:.6},{:.6},{:.6}] target_camera_local=[{:.6},{:.6},{:.6}] \
2757             target_projection_px={} target_in_frame={} classification={} \
2758             last_rejected_depth=({}). See render.rs DepthReadbackNode and \
2759             tests/spatial_parity.rs.",
2760            state.capture_retries,
2761            far,
2762            request.mesh_path,
2763            state.image_width,
2764            state.image_height,
2765            camera_translation.x,
2766            camera_translation.y,
2767            camera_translation.z,
2768            camera_rotation.x,
2769            camera_rotation.y,
2770            camera_rotation.z,
2771            camera_rotation.w,
2772            object_rotation.pitch,
2773            object_rotation.yaw,
2774            object_rotation.roll,
2775            request.object_translation.x,
2776            request.object_translation.y,
2777            request.object_translation.z,
2778            request.object_scale.x,
2779            request.object_scale.y,
2780            request.object_scale.z,
2781            request.targeting_policy.label(),
2782            request.target_point.x,
2783            request.target_point.y,
2784            request.target_point.z,
2785            target_local.x,
2786            target_local.y,
2787            target_local.z,
2788            target_projection_text,
2789            target_in_frame,
2790            if target_in_frame {
2791                "AllBackgroundTargetInFrame"
2792            } else {
2793                "AllBackgroundEmptyView"
2794            },
2795            depth_summary
2796        );
2797        state.depth_data = Some(vec![far; pixel_count]);
2798    }
2799
2800    let rgba_ready = state.rgba_data.is_some();
2801    let depth_ready = state.depth_data.is_some();
2802
2803    // Both valid → capture complete; stop the copier.
2804    if rgba_ready && depth_ready {
2805        state.captured = true;
2806        for mut copier in query.iter_mut() {
2807            copier.enabled = false;
2808        }
2809    }
2810
2811    if let Some(t0) = t0 {
2812        eprintln!(
2813            "[render_trace][sys] check_headless_capture_ready rgba_ready={} depth_ready={} captured={} frame_count={} ms={:.3}",
2814            rgba_ready,
2815            depth_ready,
2816            state.captured,
2817            state.frame_count,
2818            t0.elapsed().as_secs_f64() * 1000.0
2819        );
2820    }
2821}
2822
2823/// Extract results and exit for headless rendering
2824fn extract_and_exit_headless(
2825    mut state: ResMut<RenderState>,
2826    request: Res<RenderRequest>,
2827    shared_output: Res<SharedOutput>,
2828    mut app_exit: MessageWriter<bevy::app::AppExit>,
2829    batch: Option<Res<HeadlessBatchSequence>>,
2830) {
2831    if batch.is_some() {
2832        return;
2833    }
2834
2835    if state.exit_requested {
2836        return;
2837    }
2838
2839    if !state.captured {
2840        return;
2841    }
2842
2843    if state.rgba_data.is_some() && state.depth_data.is_some() {
2844        let width = state.image_width;
2845        let height = state.image_height;
2846        let rgba = state.rgba_data.take().expect("checked rgba_data");
2847        let depth = state.depth_data.take().expect("checked depth_data");
2848
2849        // Compute intrinsics from the same TBP zoom formula as the camera projection.
2850        let intrinsics = request.config.intrinsics_for_size(width, height);
2851
2852        let output = RenderOutput {
2853            rgba,
2854            depth,
2855            width,
2856            height,
2857            intrinsics,
2858            camera_transform: request.camera_transform,
2859            object_rotation: request.object_rotation.clone(),
2860            object_translation: request.object_translation,
2861            object_scale: request.object_scale,
2862            target_point: request.target_point,
2863            targeting_policy: request.targeting_policy.clone(),
2864        };
2865
2866        if let Ok(mut guard) = shared_output.0.lock() {
2867            *guard = Some(output);
2868            drop(guard);
2869            std::thread::sleep(std::time::Duration::from_millis(200));
2870        }
2871
2872        // Send AppExit event (headless apps use this instead of closing windows)
2873        app_exit.write(bevy::app::AppExit::Success);
2874        state.exit_requested = true;
2875    }
2876}
2877
2878/// Advance the short post-camera-move warmup for homogeneous batch rendering.
2879fn tick_headless_batch_warmup(batch: Option<ResMut<HeadlessBatchSequence>>) {
2880    let Some(mut batch) = batch else {
2881        return;
2882    };
2883
2884    if batch.warmup_frames_remaining > 0 {
2885        batch.warmup_frames_remaining -= 1;
2886    }
2887}
2888
2889/// Extract one batch output and continue rendering the next viewpoint in the same app.
2890fn extract_and_continue_headless_batch(
2891    mut state: ResMut<RenderState>,
2892    mut request: ResMut<RenderRequest>,
2893    buffers: (Res<SharedRgbaBuffer>, Res<SharedDepthBuffer>),
2894    batch: Option<ResMut<HeadlessBatchSequence>>,
2895    mut camera_query: Query<&mut Transform, With<RenderCamera>>,
2896    mut depth_request: ResMut<DepthCaptureRequest>,
2897    mut image_copiers: Query<&mut ImageCopier>,
2898) {
2899    let trace = render_trace_enabled();
2900    let t0 = trace.then(std::time::Instant::now);
2901
2902    let (shared_rgba, shared_depth) = buffers;
2903    let Some(mut batch) = batch else {
2904        if let Some(t0) = t0 {
2905            eprintln!(
2906                "[render_trace][sys] extract_and_continue_headless_batch skipped(no_batch) ms={:.3}",
2907                t0.elapsed().as_secs_f64() * 1000.0
2908            );
2909        }
2910        return;
2911    };
2912
2913    if state.exit_requested || !state.captured || batch.done {
2914        if let Some(t0) = t0 {
2915            eprintln!(
2916                "[render_trace][sys] extract_and_continue_headless_batch skipped(gate) captured={} done={} ms={:.3}",
2917                state.captured,
2918                batch.done,
2919                t0.elapsed().as_secs_f64() * 1000.0
2920            );
2921        }
2922        return;
2923    }
2924
2925    if state.rgba_data.is_some() && state.depth_data.is_some() {
2926        let width = state.image_width;
2927        let height = state.image_height;
2928        let rgba = state.rgba_data.take().expect("checked rgba_data");
2929        let depth = state.depth_data.take().expect("checked depth_data");
2930
2931        let intrinsics = request.config.intrinsics_for_size(width, height);
2932
2933        let output = RenderOutput {
2934            rgba,
2935            depth,
2936            width,
2937            height,
2938            intrinsics,
2939            camera_transform: batch
2940                .current_viewpoint()
2941                .unwrap_or(request.camera_transform),
2942            object_rotation: request.object_rotation.clone(),
2943            object_translation: request.object_translation,
2944            object_scale: request.object_scale,
2945            target_point: request.target_point,
2946            targeting_policy: request.targeting_policy.clone(),
2947        };
2948        batch.outputs.push(output);
2949
2950        let next_index = batch.current_index + 1;
2951        if next_index >= batch.viewpoints.len() {
2952            batch.done = true;
2953            state.exit_requested = true;
2954            return;
2955        }
2956
2957        batch.current_index = next_index;
2958        batch.warmup_frames_remaining = BATCH_WARMUP_FRAMES;
2959
2960        if let Some(next_viewpoint) = batch.current_viewpoint() {
2961            request.camera_transform = next_viewpoint;
2962            for mut camera_transform in camera_query.iter_mut() {
2963                *camera_transform = next_viewpoint;
2964            }
2965        }
2966
2967        if let Ok(mut guard) = shared_rgba.0.lock() {
2968            *guard = None;
2969        }
2970        if let Ok(mut guard) = shared_depth.0.lock() {
2971            *guard = None;
2972        }
2973
2974        for mut copier in image_copiers.iter_mut() {
2975            copier.enabled = false;
2976        }
2977
2978        depth_request.requested = false;
2979        state.frame_count = 0;
2980        state.capture_ready = true;
2981        state.screenshot_requested = false;
2982        state.captured = false;
2983        state.rgba_data = None;
2984        state.depth_data = None;
2985        state.image_width = 0;
2986        state.image_height = 0;
2987        // Reset the per-capture settle/retry tracking too, otherwise it
2988        // accumulates across viewpoints and force-accepts an unsettled frame for
2989        // later viewpoints (breaking parity).
2990        state.capture_retries = 0;
2991        state.prev_rgba = None;
2992        state.prev_depth = None;
2993
2994        if let Some(t0) = t0 {
2995            eprintln!(
2996                "[render_trace][sys] extract_and_continue_headless_batch extracted vp={} next={} done={} ms={:.3}",
2997                batch.current_index.saturating_sub(1),
2998                batch.current_index,
2999                batch.done,
3000                t0.elapsed().as_secs_f64() * 1000.0
3001            );
3002        }
3003    } else if let Some(t0) = t0 {
3004        eprintln!(
3005            "[render_trace][sys] extract_and_continue_headless_batch no_data ms={:.3}",
3006            t0.elapsed().as_secs_f64() * 1000.0
3007        );
3008    }
3009}
3010
3011// ============================================================================
3012// Persistent batch session (RenderSession)
3013//
3014// Amortizes wgpu device creation, Bevy app setup, and first-draw pipeline state
3015// object (PSO) compilation across multiple `render()` calls. Profile data (see
3016// issues #54 and #55) showed that on a 60-episode parity-gate, ~2.3s per episode
3017// lives in first-draw DX12 PSO compilation, totalling ~131s of 151s wall-clock.
3018// Keeping the `App` (and thus the `RenderDevice` and its PSO cache) alive across
3019// episodes recovers the bulk of that cost.
3020// ============================================================================
3021
3022/// Marker for the per-group scene entity so we can despawn it cleanly when the
3023/// next `RenderSession::render()` call swaps in a different object or rotation.
3024#[derive(Component)]
3025struct SessionScene;
3026
3027/// Session-persistent setup: render target image, camera (with prepass +
3028/// `ImageCopier`), ambient light, key + fill lights. Everything here lives for
3029/// the full lifetime of the `RenderSession`; per-group work (mesh/texture load,
3030/// scene entity spawn) happens outside Startup in `RenderSession::render()`.
3031fn setup_session_persistent_scene(
3032    mut commands: Commands,
3033    mut images: ResMut<Assets<Image>>,
3034    config: Res<SessionRenderConfig>,
3035) {
3036    let width = config.0.width;
3037    let height = config.0.height;
3038
3039    let size = Extent3d {
3040        width,
3041        height,
3042        depth_or_array_layers: 1,
3043    };
3044
3045    let mut render_target_image = Image::new_fill(
3046        size,
3047        TextureDimension::D2,
3048        &[0, 0, 0, 255],
3049        TextureFormat::Rgba8UnormSrgb,
3050        RenderAssetUsages::default(),
3051    );
3052    render_target_image.texture_descriptor.usage =
3053        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
3054
3055    let render_target_handle = images.add(render_target_image);
3056    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
3057
3058    commands.spawn((
3059        Camera3d::default(),
3060        Camera::default(),
3061        Hdr,
3062        RenderTarget::Image(render_target_handle.clone().into()),
3063        render_projection(&config.0),
3064        Msaa::Off,
3065        Transform::default(),
3066        Tonemapping::None,
3067        DepthPrepass,
3068        NormalPrepass,
3069        RenderCamera,
3070        ImageCopier {
3071            src_image: render_target_handle,
3072            enabled: false,
3073        },
3074    ));
3075
3076    let lighting = &config.0.lighting;
3077    commands.insert_resource(GlobalAmbientLight {
3078        color: Color::WHITE,
3079        brightness: lighting.ambient_brightness,
3080        ..default()
3081    });
3082
3083    if lighting.key_light_intensity > 0.0 {
3084        commands.spawn((
3085            PointLight {
3086                intensity: lighting.key_light_intensity,
3087                shadows_enabled: lighting.shadows_enabled,
3088                ..default()
3089            },
3090            Transform::from_xyz(
3091                lighting.key_light_position[0],
3092                lighting.key_light_position[1],
3093                lighting.key_light_position[2],
3094            ),
3095        ));
3096    }
3097
3098    if lighting.fill_light_intensity > 0.0 {
3099        commands.spawn((
3100            PointLight {
3101                intensity: lighting.fill_light_intensity,
3102                shadows_enabled: lighting.shadows_enabled,
3103                ..default()
3104            },
3105            Transform::from_xyz(
3106                lighting.fill_light_position[0],
3107                lighting.fill_light_position[1],
3108                lighting.fill_light_position[2],
3109            ),
3110        ));
3111    }
3112}
3113
3114/// Resource carrying the `RenderConfig` that was fixed at session construction.
3115/// Used by `setup_session_persistent_scene` to size the render target.
3116#[derive(Resource)]
3117struct SessionRenderConfig(RenderConfig);
3118
3119/// Persistent batch render session. Keeps a Bevy `App` (and its `RenderDevice`
3120/// plus PSO cache) alive across multiple `render()` calls, amortizing per-episode
3121/// cold-init cost.
3122///
3123/// # Thread affinity
3124///
3125/// `RenderSession` must be created, used, and dropped on the same thread. It
3126/// holds a `bevy::App` which owns GPU resources that are not safe to move
3127/// across threads. The `!Send + !Sync` marker is enforced via
3128/// `PhantomData<*const ()>`.
3129///
3130/// # Config invariant
3131///
3132/// The `RenderConfig` (resolution, lighting, near/far, fov) is fixed at
3133/// `new()`. All `render()` calls must use requests whose `render_config`
3134/// matches; heterogeneous configs are rejected.
3135///
3136/// # Phase 1 limitation
3137///
3138/// Each `render()` call must contain homogeneous requests (same `object_dir`
3139/// and `object_rotation`). Heterogeneous calls return
3140/// `BatchRenderError::InvalidConfig`. Hold a single `RenderSession` and call
3141/// `render()` once per episode to amortize setup across episodes.
3142pub struct RenderSession {
3143    app: App,
3144    render_config: RenderConfig,
3145    shared_rgba: SharedRgbaBuffer,
3146    shared_depth: SharedDepthBuffer,
3147    _not_send_sync: std::marker::PhantomData<*const ()>,
3148}
3149
3150impl RenderSession {
3151    /// Build the App, run plugin `finish()`/`cleanup()`, and perform one warmup
3152    /// `update()` so Startup systems run and the wgpu device + adapter are
3153    /// initialized. The first `render()` call still pays PSO compilation for
3154    /// the specific mesh/material combination; subsequent calls reuse the cache.
3155    pub fn new(render_config: &crate::RenderConfig) -> Result<Self, crate::RenderError> {
3156        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3157        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3158
3159        let mut app = App::new();
3160        app.add_plugins(
3161            DefaultPlugins
3162                .set(bevy::asset::AssetPlugin {
3163                    // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
3164                    // default (UnapprovedPathMode::Forbid → load() silently returns a
3165                    // default handle). YCB meshes load from absolute paths, so allow them.
3166                    unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
3167                    ..default()
3168                })
3169                .set(WindowPlugin {
3170                    primary_window: None,
3171                    exit_condition: ExitCondition::DontExit,
3172                    ..default()
3173                })
3174                .disable::<bevy::winit::WinitPlugin>()
3175                .disable::<LogPlugin>()
3176                .disable::<TerminalCtrlCHandlerPlugin>(),
3177        )
3178        .add_plugins(ObjPlugin)
3179        // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
3180        // Scene spawning panics unless those component types are registered. The
3181        // minimal headless plugin set doesn't register them, so do it explicitly.
3182        .register_type::<Mesh3d>()
3183        .register_type::<MeshMaterial3d<StandardMaterial>>()
3184        .register_type::<bevy::prelude::Transform>()
3185        .register_type::<bevy::prelude::GlobalTransform>()
3186        .register_type::<bevy::transform::components::TransformTreeChanged>()
3187        .register_type::<bevy::prelude::Visibility>()
3188        .register_type::<bevy::prelude::InheritedVisibility>()
3189        .register_type::<bevy::prelude::ViewVisibility>()
3190        .add_plugins(ImageCopyPlugin {
3191            shared_rgba: shared_rgba.clone(),
3192        })
3193        .add_plugins(DepthReadbackPlugin {
3194            shared_depth: shared_depth.clone(),
3195            near: render_config.near_plane,
3196            far: render_config.far_plane,
3197        })
3198        .insert_resource(SessionRenderConfig(render_config.clone()))
3199        .insert_resource(shared_rgba.clone())
3200        .init_resource::<RenderState>()
3201        .add_systems(Startup, setup_session_persistent_scene)
3202        .add_systems(
3203            Update,
3204            (
3205                check_assets_loaded,
3206                apply_materials,
3207                tick_headless_batch_warmup,
3208                request_headless_capture,
3209                check_headless_capture_ready,
3210                extract_and_continue_headless_batch,
3211            )
3212                .chain()
3213                // Gate the capture chain on `RenderRequest` existing. `new()`
3214                // runs a warmup `app.update()` to execute Startup (which spawns
3215                // the camera/lights/render target) before the first `render()`
3216                // call, but does not yet insert `RenderRequest`. Several systems
3217                // in this chain take `Res<RenderRequest>` (not `Option`) and
3218                // would panic on SystemState init if the resource were absent.
3219                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
3220        );
3221
3222        app.finish();
3223        app.cleanup();
3224
3225        // One warmup update runs Startup systems (render target, camera, lights)
3226        // so they exist before the first `render()` call seeds the camera
3227        // transform. The Update chain is gated by `RenderRequest` existence and
3228        // is a no-op this tick. PSO compilation for specific mesh/material
3229        // combinations still happens lazily on the first real render.
3230        app.update();
3231
3232        Ok(Self {
3233            app,
3234            render_config: render_config.clone(),
3235            shared_rgba,
3236            shared_depth,
3237            _not_send_sync: std::marker::PhantomData,
3238        })
3239    }
3240
3241    /// Render a homogeneous batch of viewpoints (same object + rotation + config).
3242    /// Returns outputs in request order.
3243    ///
3244    /// On `BatchRenderError::DeviceLost`, the returned error signals that the
3245    /// wgpu device was lost mid-render. This call produced no output; any
3246    /// outputs from earlier `render()` calls on this session are still valid.
3247    /// Recovery: drop this `RenderSession` and construct a new one.
3248    pub fn render(
3249        &mut self,
3250        requests: &[crate::BatchRenderRequest],
3251    ) -> Result<Vec<crate::BatchRenderOutput>, crate::BatchRenderError> {
3252        use crate::{BatchRenderError, BatchRenderOutput};
3253
3254        if requests.is_empty() {
3255            return Ok(Vec::new());
3256        }
3257
3258        // Enforce homogeneity and config invariance.
3259        let first = &requests[0];
3260        if first.render_config != self.render_config {
3261            return Err(BatchRenderError::InvalidConfig(
3262                "RenderSession render_config mismatch: session was constructed with a different \
3263                 RenderConfig than the first request carries. Session config cannot change after \
3264                 `new()`; construct a new session if you need a different resolution/camera."
3265                    .to_string(),
3266            ));
3267        }
3268        for r in &requests[1..] {
3269            if r.object_dir != first.object_dir
3270                || r.object_rotation != first.object_rotation
3271                || r.object_translation != first.object_translation
3272                || r.object_scale != first.object_scale
3273                || r.render_config != first.render_config
3274            {
3275                return Err(BatchRenderError::InvalidConfig(
3276                    "Phase 1 RenderSession::render requires homogeneous requests \
3277                     (same object_dir, object transform, and render_config across the batch). \
3278                     Call render() once per group instead."
3279                        .to_string(),
3280                ));
3281            }
3282        }
3283
3284        // Canonicalize paths and validate mesh/texture presence. This matches
3285        // `render_headless_sequence`'s preconditions so the error surface stays
3286        // consistent.
3287        let object_dir = std::fs::canonicalize(&first.object_dir).map_err(|e| {
3288            BatchRenderError::InvalidConfig(format!(
3289                "Cannot canonicalize object directory {}: {}",
3290                first.object_dir.display(),
3291                e
3292            ))
3293        })?;
3294        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3295        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3296        if !mesh_path.exists() {
3297            return Err(BatchRenderError::InvalidConfig(format!(
3298                "Mesh not found: {}",
3299                mesh_path.display()
3300            )));
3301        }
3302        if !texture_path.exists() {
3303            return Err(BatchRenderError::InvalidConfig(format!(
3304                "Texture not found: {}",
3305                texture_path.display()
3306            )));
3307        }
3308
3309        let viewpoints: Vec<Transform> = requests.iter().map(|r| r.viewpoint).collect();
3310
3311        // --- per-group scene swap (direct world manipulation) ---
3312        {
3313            let world = self.app.world_mut();
3314
3315            // Despawn any SessionScene entity from the previous group.
3316            let stale: Vec<Entity> = world
3317                .query_filtered::<Entity, With<SessionScene>>()
3318                .iter(world)
3319                .collect();
3320            for entity in stale {
3321                world.entity_mut(entity).despawn();
3322            }
3323
3324            // Clear shared RGBA/depth buffers so a stale payload can't leak
3325            // into the first viewpoint of this call.
3326            if let Ok(mut guard) = self.shared_rgba.0.lock() {
3327                *guard = None;
3328            }
3329            if let Ok(mut guard) = self.shared_depth.0.lock() {
3330                *guard = None;
3331            }
3332
3333            // Reset RenderState (scene_loaded, texture_loaded, capture_ready,
3334            // frame_count, materials_applied, etc.). Default() gives all false/0.
3335            *world.resource_mut::<RenderState>() = RenderState::default();
3336
3337            // Update RenderRequest so the existing capture systems see the new
3338            // object paths, rotation, and camera transform (seeded from first vp).
3339            let new_request = RenderRequest {
3340                mesh_path: fs_path_to_asset_string(&mesh_path),
3341                texture_path: fs_path_to_asset_string(&texture_path),
3342                camera_transform: viewpoints[0],
3343                object_rotation: first.object_rotation.clone(),
3344                object_translation: first.object_translation,
3345                object_scale: first.object_scale,
3346                config: self.render_config.clone(),
3347                target_point: first.target_point,
3348                targeting_policy: first.targeting_policy.clone(),
3349            };
3350            world.insert_resource(new_request);
3351
3352            // Kick off asset loads and install the handles under the names the
3353            // existing `check_assets_loaded` system expects.
3354            let asset_server = world.resource::<AssetServer>().clone();
3355            let scene_handle: Handle<Scene> =
3356                asset_server.load(fs_path_to_asset_string(&mesh_path));
3357            let texture_handle: Handle<Image> =
3358                asset_server.load(fs_path_to_asset_string(&texture_path));
3359            world.insert_resource(LoadedScene(scene_handle.clone()));
3360            world.insert_resource(LoadedTexture(texture_handle));
3361
3362            // Spawn the new scene entity tagged so we can find + despawn it next
3363            // render() call.
3364            world.spawn((
3365                SceneRoot(scene_handle),
3366                first.object_rotation.to_transform_with_translation_scale(
3367                    first.object_translation,
3368                    first.object_scale,
3369                ),
3370                RenderedObject,
3371                SessionScene,
3372            ));
3373
3374            // Seed the camera transform to the first viewpoint now so the first
3375            // capture lines up; subsequent viewpoints are advanced by
3376            // `extract_and_continue_headless_batch`.
3377            let camera_entity = world
3378                .query_filtered::<Entity, With<RenderCamera>>()
3379                .iter(world)
3380                .next();
3381            if let Some(cam) = camera_entity {
3382                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
3383                    *transform = viewpoints[0];
3384                }
3385            }
3386
3387            // Install the viewpoint sequence for this render() call. The robust
3388            // settled-frame capture (reject blank/partial readbacks, retry until
3389            // two consecutive readbacks match) absorbs the despawn/respawn
3390            // render-world settle, so a separate discarded warmup pass is not
3391            // needed and the per-object cost stays low.
3392            world.insert_resource(HeadlessBatchSequence::new(viewpoints.clone()));
3393        }
3394
3395        // --- drive the real capture loop ---
3396        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3397        let start = std::time::Instant::now();
3398        loop {
3399            if start.elapsed() > timeout {
3400                return Err(BatchRenderError::TotalFailure(format!(
3401                    "RenderSession::render timed out after {}s",
3402                    RENDER_TIMEOUT_SECS
3403                )));
3404            }
3405
3406            self.app.update();
3407
3408            if self.app.world().resource::<HeadlessBatchSequence>().done {
3409                break;
3410            }
3411        }
3412
3413        // Collect outputs and zip with requests to produce BatchRenderOutput in
3414        // request order.
3415        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
3416        if sequence.outputs.len() != requests.len() {
3417            return Err(BatchRenderError::TotalFailure(format!(
3418                "RenderSession produced {} outputs for {} requests",
3419                sequence.outputs.len(),
3420                requests.len()
3421            )));
3422        }
3423        let outputs = std::mem::take(&mut sequence.outputs);
3424
3425        Ok(requests
3426            .iter()
3427            .cloned()
3428            .zip(outputs)
3429            .map(|(req, out)| BatchRenderOutput::from_render_output(req, out))
3430            .collect())
3431    }
3432}
3433
3434// ============================================================================
3435// Per-step persistent renderer (PersistentRenderer)
3436//
3437// `RenderSession` reuses the App across calls but rebuilds the scene on every
3438// `render()` (despawn SceneRoot, re-issue asset_server.load, respawn). That's
3439// fine for the parity-gate path (one scene per episode of N viewpoints) but
3440// wasteful for surface-policy feedback loops where N=1 viewpoint per call and
3441// the object stays loaded for the whole episode.
3442//
3443// `PersistentRenderer` commits to one `object_dir` + `RenderConfig` at
3444// construction. `new()` loads mesh + texture + spawns the scene root + drives
3445// one warmup render (output discarded) so PSO compilation and material setup
3446// are paid up front. `render(camera, rotation)` then only mutates the camera
3447// `Transform` and (if changed) the scene root rotation, drives the capture
3448// chain for one frame, and returns. See issue #65.
3449// ============================================================================
3450
3451/// Marker for the `PersistentRenderer`'s scene root entity. We keep the
3452/// entity alive for the whole renderer lifetime and just mutate its
3453/// `Transform` when the caller-supplied object rotation changes.
3454#[derive(Component)]
3455struct PersistentScene;
3456
3457/// Persistent per-step renderer. Loads the scene once at `new()` and renders
3458/// one frame per `render()` call by mutating the camera transform and scene
3459/// root rotation in-place. Built for surface-policy feedback loops where the
3460/// object stays fixed for the duration of an episode and the camera moves
3461/// every step. See issue #65.
3462///
3463/// # Thread affinity
3464///
3465/// `PersistentRenderer` must be created, used, and dropped on the same thread.
3466/// Holds a `bevy::App` that owns GPU resources not safe to move across
3467/// threads; `!Send + !Sync` is enforced via `PhantomData<*const ()>`.
3468///
3469/// # Object + config invariants
3470///
3471/// `object_dir` and `RenderConfig` are fixed at `new()`. To render a different
3472/// object or change resolution/lighting, drop and rebuild. Rotation may change
3473/// freely between `render()` calls.
3474pub struct PersistentRenderer {
3475    app: App,
3476    object_dir: PathBuf,
3477    render_config: RenderConfig,
3478    shared_rgba: SharedRgbaBuffer,
3479    shared_depth: SharedDepthBuffer,
3480    _not_send_sync: std::marker::PhantomData<*const ()>,
3481}
3482
3483impl PersistentRenderer {
3484    /// Build the App, load the scene + texture, spawn the scene root, and drive
3485    /// one warmup render whose output is discarded. After `new()` returns, the
3486    /// first user-facing `render()` call benefits from a warm PSO cache and
3487    /// applied materials.
3488    pub fn new(
3489        object_dir: &Path,
3490        render_config: &RenderConfig,
3491    ) -> Result<Self, crate::RenderError> {
3492        let object_dir =
3493            std::fs::canonicalize(object_dir).map_err(|e| crate::RenderError::FileNotFound {
3494                path: object_dir.display().to_string(),
3495                reason: e.to_string(),
3496            })?;
3497        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3498        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3499        if !mesh_path.exists() {
3500            return Err(crate::RenderError::MeshNotFound(fs_path_to_asset_string(
3501                &mesh_path,
3502            )));
3503        }
3504        if !texture_path.exists() {
3505            return Err(crate::RenderError::TextureNotFound(
3506                fs_path_to_asset_string(&texture_path),
3507            ));
3508        }
3509
3510        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3511        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3512
3513        let mut app = App::new();
3514        app.add_plugins(
3515            DefaultPlugins
3516                .set(bevy::asset::AssetPlugin {
3517                    // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
3518                    // default (UnapprovedPathMode::Forbid → load() silently returns a
3519                    // default handle). YCB meshes load from absolute paths, so allow them.
3520                    unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
3521                    ..default()
3522                })
3523                .set(WindowPlugin {
3524                    primary_window: None,
3525                    exit_condition: ExitCondition::DontExit,
3526                    ..default()
3527                })
3528                .disable::<bevy::winit::WinitPlugin>()
3529                .disable::<LogPlugin>()
3530                .disable::<TerminalCtrlCHandlerPlugin>(),
3531        )
3532        .add_plugins(ObjPlugin)
3533        // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
3534        // Scene spawning panics unless those component types are registered. The
3535        // minimal headless plugin set doesn't register them, so do it explicitly.
3536        .register_type::<Mesh3d>()
3537        .register_type::<MeshMaterial3d<StandardMaterial>>()
3538        .register_type::<bevy::prelude::Transform>()
3539        .register_type::<bevy::prelude::GlobalTransform>()
3540        .register_type::<bevy::transform::components::TransformTreeChanged>()
3541        .register_type::<bevy::prelude::Visibility>()
3542        .register_type::<bevy::prelude::InheritedVisibility>()
3543        .register_type::<bevy::prelude::ViewVisibility>()
3544        .add_plugins(ImageCopyPlugin {
3545            shared_rgba: shared_rgba.clone(),
3546        })
3547        .add_plugins(DepthReadbackPlugin {
3548            shared_depth: shared_depth.clone(),
3549            near: render_config.near_plane,
3550            far: render_config.far_plane,
3551        })
3552        .insert_resource(SessionRenderConfig(render_config.clone()))
3553        .insert_resource(shared_rgba.clone())
3554        .init_resource::<RenderState>()
3555        .add_systems(Startup, setup_session_persistent_scene)
3556        .add_systems(
3557            Update,
3558            (
3559                check_assets_loaded,
3560                apply_materials,
3561                tick_headless_batch_warmup,
3562                request_headless_capture,
3563                check_headless_capture_ready,
3564                extract_and_continue_headless_batch,
3565            )
3566                .chain()
3567                // Same gate as RenderSession: capture chain only runs once
3568                // RenderRequest is installed. Startup runs first via the
3569                // warmup `app.update()` below.
3570                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
3571        );
3572
3573        app.finish();
3574        app.cleanup();
3575        // Warmup tick #1: Startup runs (camera, lights, render target spawn).
3576        app.update();
3577
3578        // Install scene + warmup render request. The warmup output is discarded
3579        // — its purpose is to pay PSO compilation and material application
3580        // upfront so the first user-facing render() is fast. Use a real TBP
3581        // viewpoint rather than Transform::default(), which places the camera
3582        // at the object origin and forces a flat-depth fallback before any
3583        // caller-requested surface-policy render runs.
3584        let warmup_camera = persistent_warmup_camera_transform();
3585        let initial_request = RenderRequest {
3586            mesh_path: fs_path_to_asset_string(&mesh_path),
3587            texture_path: fs_path_to_asset_string(&texture_path),
3588            camera_transform: warmup_camera,
3589            object_rotation: ObjectRotation::identity(),
3590            object_translation: Vec3::ZERO,
3591            object_scale: Vec3::ONE,
3592            config: render_config.clone(),
3593            target_point: Vec3::ZERO,
3594            targeting_policy: TargetingPolicy::Origin,
3595        };
3596
3597        {
3598            let world = app.world_mut();
3599            let asset_server = world.resource::<AssetServer>().clone();
3600            let scene_handle: Handle<Scene> =
3601                asset_server.load(fs_path_to_asset_string(&mesh_path));
3602            let texture_handle: Handle<Image> =
3603                asset_server.load(fs_path_to_asset_string(&texture_path));
3604            world.insert_resource(LoadedScene(scene_handle.clone()));
3605            world.insert_resource(LoadedTexture(texture_handle));
3606            world.insert_resource(initial_request);
3607            world.spawn((
3608                SceneRoot(scene_handle),
3609                ObjectRotation::identity()
3610                    .to_transform_with_translation_scale(Vec3::ZERO, Vec3::ONE),
3611                RenderedObject,
3612                PersistentScene,
3613            ));
3614            if let Some(cam) = world
3615                .query_filtered::<Entity, With<RenderCamera>>()
3616                .iter(world)
3617                .next()
3618            {
3619                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
3620                    *transform = warmup_camera;
3621                }
3622            }
3623            world.insert_resource(HeadlessBatchSequence::new(vec![warmup_camera]));
3624        }
3625
3626        // Drive the warmup render to completion.
3627        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3628        let start = std::time::Instant::now();
3629        loop {
3630            if start.elapsed() > timeout {
3631                return Err(crate::RenderError::RenderFailed(format!(
3632                    "PersistentRenderer::new warmup render timed out after {RENDER_TIMEOUT_SECS}s"
3633                )));
3634            }
3635            app.update();
3636            if app.world().resource::<HeadlessBatchSequence>().done {
3637                break;
3638            }
3639        }
3640        // Discard the warmup output so it doesn't leak into the first real
3641        // render() call's output buffer.
3642        app.world_mut()
3643            .resource_mut::<HeadlessBatchSequence>()
3644            .outputs
3645            .clear();
3646
3647        Ok(Self {
3648            app,
3649            object_dir,
3650            render_config: render_config.clone(),
3651            shared_rgba,
3652            shared_depth,
3653            _not_send_sync: std::marker::PhantomData,
3654        })
3655    }
3656
3657    /// Render one frame from the given camera transform and object rotation.
3658    /// Reuses the loaded scene + warm PSO cache from `new()`.
3659    pub fn render(
3660        &mut self,
3661        camera_transform: &Transform,
3662        object_rotation: &ObjectRotation,
3663    ) -> Result<RenderOutput, crate::RenderError> {
3664        self.render_with_object_transform(camera_transform, object_rotation, Vec3::ZERO, Vec3::ONE)
3665    }
3666
3667    /// Render one frame with target metadata available during capture readiness checks.
3668    pub fn render_with_target(
3669        &mut self,
3670        camera_transform: &Transform,
3671        object_rotation: &ObjectRotation,
3672        target_point: Vec3,
3673        targeting_policy: TargetingPolicy,
3674    ) -> Result<RenderOutput, crate::RenderError> {
3675        self.render_with_target_and_object_transform(
3676            camera_transform,
3677            object_rotation,
3678            Vec3::ZERO,
3679            Vec3::ONE,
3680            target_point,
3681            targeting_policy,
3682        )
3683    }
3684
3685    /// Render one frame with explicit object translation and scale.
3686    pub fn render_with_object_transform(
3687        &mut self,
3688        camera_transform: &Transform,
3689        object_rotation: &ObjectRotation,
3690        object_translation: Vec3,
3691        object_scale: Vec3,
3692    ) -> Result<RenderOutput, crate::RenderError> {
3693        self.render_with_target_and_object_transform(
3694            camera_transform,
3695            object_rotation,
3696            object_translation,
3697            object_scale,
3698            Vec3::ZERO,
3699            TargetingPolicy::Origin,
3700        )
3701    }
3702
3703    /// Render one frame with explicit object transform and target metadata.
3704    pub fn render_with_target_and_object_transform(
3705        &mut self,
3706        camera_transform: &Transform,
3707        object_rotation: &ObjectRotation,
3708        object_translation: Vec3,
3709        object_scale: Vec3,
3710        target_point: Vec3,
3711        targeting_policy: TargetingPolicy,
3712    ) -> Result<RenderOutput, crate::RenderError> {
3713        let camera_transform = *camera_transform;
3714        let object_rotation_owned = object_rotation.clone();
3715        let target_policy_owned = targeting_policy.clone();
3716
3717        {
3718            let world = self.app.world_mut();
3719
3720            // Update the persistent scene root rotation. Always-write avoids
3721            // the cost of an extra ObjectRotation comparison per call; the
3722            // mutation itself is a single Transform write.
3723            let scene_entity = world
3724                .query_filtered::<Entity, With<PersistentScene>>()
3725                .iter(world)
3726                .next();
3727            if let Some(entity) = scene_entity {
3728                if let Some(mut transform) = world.entity_mut(entity).get_mut::<Transform>() {
3729                    *transform = object_rotation_owned
3730                        .to_transform_with_translation_scale(object_translation, object_scale);
3731                }
3732            }
3733
3734            // Update the camera transform.
3735            let cam_entity = world
3736                .query_filtered::<Entity, With<RenderCamera>>()
3737                .iter(world)
3738                .next();
3739            if let Some(cam) = cam_entity {
3740                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
3741                    *transform = camera_transform;
3742                }
3743            }
3744
3745            // Reset per-frame state, preserving scene_loaded / texture_loaded
3746            // / materials_applied / materials_applied_frame. The asset-load
3747            // and material-apply work was paid in `new()`'s warmup; we only
3748            // need to clear the per-capture state.
3749            //
3750            // `capture_ready = true` short-circuits `apply_materials` on
3751            // every tick of the render loop (no need to re-check material
3752            // application — it stays applied for the renderer's lifetime).
3753            // It does NOT short-circuit `request_headless_capture`, which
3754            // is gated by `HeadlessBatchSequence::warmup_frames_remaining`
3755            // below. Bug fix from PR #66 review (off-by-one / blank-step-0):
3756            // without that warmup gate, request_headless_capture fires same-
3757            // tick as the transform writes, capturing the previous render's
3758            // target before the new transforms have propagated.
3759            {
3760                let mut state = world.resource_mut::<RenderState>();
3761                state.exit_requested = false;
3762                state.screenshot_requested = false;
3763                state.captured = false;
3764                state.rgba_data = None;
3765                state.depth_data = None;
3766                state.frame_count = 0;
3767                state.image_width = 0;
3768                state.image_height = 0;
3769                state.capture_ready = true;
3770                state.capture_retries = 0;
3771                state.prev_rgba = None;
3772                state.prev_depth = None;
3773            }
3774
3775            // Clear shared GPU readback buffers so a stale payload from the
3776            // previous render() can't leak into this call's output.
3777            if let Ok(mut guard) = self.shared_rgba.0.lock() {
3778                *guard = None;
3779            }
3780            if let Ok(mut guard) = self.shared_depth.0.lock() {
3781                *guard = None;
3782            }
3783
3784            // Update RenderRequest (used by extract_and_continue_headless_batch
3785            // to stamp the output with the right intrinsics + rotation).
3786            {
3787                let mut req = world.resource_mut::<RenderRequest>();
3788                req.camera_transform = camera_transform;
3789                req.object_rotation = object_rotation_owned.clone();
3790                req.object_translation = object_translation;
3791                req.object_scale = object_scale;
3792                req.target_point = target_point;
3793                req.targeting_policy = target_policy_owned.clone();
3794            }
3795
3796            // Install fresh single-element batch with warmup frames so
3797            // `request_headless_capture` is gated until the new transforms
3798            // have propagated through the render pipeline.
3799            let mut batch = HeadlessBatchSequence::new(vec![camera_transform]);
3800            batch.warmup_frames_remaining = PERSISTENT_WARMUP_FRAMES;
3801            world.insert_resource(batch);
3802        }
3803
3804        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3805        let start = std::time::Instant::now();
3806        loop {
3807            if start.elapsed() > timeout {
3808                return Err(crate::RenderError::RenderFailed(format!(
3809                    "PersistentRenderer::render timed out after {RENDER_TIMEOUT_SECS}s"
3810                )));
3811            }
3812            self.app.update();
3813            if self.app.world().resource::<HeadlessBatchSequence>().done {
3814                break;
3815            }
3816        }
3817
3818        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
3819        let mut outputs = std::mem::take(&mut sequence.outputs);
3820        if outputs.len() != 1 {
3821            return Err(crate::RenderError::RenderFailed(format!(
3822                "PersistentRenderer::render expected 1 output, got {}",
3823                outputs.len()
3824            )));
3825        }
3826
3827        let output = outputs
3828            .remove(0)
3829            .with_targeting(target_point, targeting_policy);
3830        reject_all_background_target_in_frame(&output, &self.render_config)?;
3831        Ok(output)
3832    }
3833
3834    /// Path to the YCB object directory this renderer was bound to.
3835    pub fn object_dir(&self) -> &Path {
3836        &self.object_dir
3837    }
3838
3839    /// The `RenderConfig` this renderer was constructed with.
3840    pub fn render_config(&self) -> &RenderConfig {
3841        &self.render_config
3842    }
3843
3844    /// Explicit close. Equivalent to dropping; provided to match the API
3845    /// proposal in #65 for callers that want lifetime-explicit teardown.
3846    pub fn close(self) {
3847        // Drop runs on return.
3848    }
3849}
3850
3851/// Render directly to files (for subprocess mode).
3852///
3853/// This function saves RGBA and depth data directly to files before exiting.
3854/// Designed for subprocess rendering where the process will exit after rendering.
3855#[allow(clippy::too_many_arguments)]
3856pub fn render_to_files(
3857    object_dir: &Path,
3858    camera_transform: &Transform,
3859    object_rotation: &ObjectRotation,
3860    object_translation: Vec3,
3861    object_scale: Vec3,
3862    config: &RenderConfig,
3863    rgba_path: &Path,
3864    depth_path: &Path,
3865) -> Result<(), RenderError> {
3866    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3867    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3868
3869    if !mesh_path.exists() {
3870        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
3871            &mesh_path,
3872        )));
3873    }
3874    if !texture_path.exists() {
3875        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
3876            &texture_path,
3877        )));
3878    }
3879
3880    let request = RenderRequest {
3881        mesh_path: fs_path_to_asset_string(&mesh_path),
3882        texture_path: fs_path_to_asset_string(&texture_path),
3883        camera_transform: *camera_transform,
3884        object_rotation: object_rotation.clone(),
3885        object_translation,
3886        object_scale,
3887        config: config.clone(),
3888        target_point: Vec3::ZERO,
3889        targeting_policy: TargetingPolicy::Origin,
3890    };
3891
3892    // Shared state for output
3893    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
3894    let output_poll = shared_output.clone();
3895
3896    // Clone paths for watchdog thread
3897    let rgba_path = rgba_path.to_path_buf();
3898    let depth_path = depth_path.to_path_buf();
3899
3900    // Shared buffer for RGBA data from headless render target
3901    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3902
3903    // Shared buffer for depth readback
3904    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3905
3906    // Spawn watchdog thread that saves files and exits
3907    std::thread::spawn(move || {
3908        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3909        let start = std::time::Instant::now();
3910        let poll_interval = std::time::Duration::from_millis(100);
3911
3912        loop {
3913            if let Ok(guard) = output_poll.0.lock() {
3914                if let Some(output) = guard.as_ref() {
3915                    // Save RGBA as PNG
3916                    if let Err(e) =
3917                        save_rgba_to_png(&output.rgba, output.width, output.height, &rgba_path)
3918                    {
3919                        eprintln!("Failed to save RGBA: {:?}", e);
3920                        std::process::exit(1);
3921                    }
3922
3923                    // Save depth as binary f32
3924                    if let Err(e) = save_depth_to_binary(&output.depth, &depth_path) {
3925                        eprintln!("Failed to save depth: {:?}", e);
3926                        std::process::exit(1);
3927                    }
3928
3929                    std::process::exit(0);
3930                }
3931            }
3932
3933            if start.elapsed() > timeout {
3934                eprintln!(
3935                    "Error: Render timeout after {} seconds",
3936                    RENDER_TIMEOUT_SECS
3937                );
3938                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
3939                std::process::exit(1);
3940            }
3941
3942            std::thread::sleep(poll_interval);
3943        }
3944    });
3945
3946    // Configure rendering backend for this environment.
3947    // Use OnceLock so env vars are only set once per process — repeated calls
3948    // (e.g. sequential render_to_buffer calls in a parity loop) no longer trigger
3949    // redundant wgpu backend env writes. Full GPU adapter reuse across App instances
3950    // requires a persistent renderer (tracked in issue #14).
3951    static BACKEND_INIT: OnceLock<()> = OnceLock::new();
3952    BACKEND_INIT.get_or_init(|| {
3953        let backend_config = BackendConfig::headless();
3954        backend_config.apply_env();
3955    });
3956
3957    // Run Bevy app with HEADLESS configuration
3958    build_headless_app(request, shared_output, shared_rgba, shared_depth).run();
3959
3960    // Unreachable - watchdog thread exits the process
3961    Err(RenderError::RenderFailed(
3962        "Render did not complete".to_string(),
3963    ))
3964}
3965
3966/// Save RGBA data to PNG file
3967fn save_rgba_to_png(rgba: &[u8], width: u32, height: u32, path: &Path) -> Result<(), String> {
3968    use image::{ImageBuffer, Rgba};
3969
3970    // Create parent directories if needed
3971    if let Some(parent) = path.parent() {
3972        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3973    }
3974
3975    let img: ImageBuffer<Rgba<u8>, Vec<u8>> =
3976        ImageBuffer::from_raw(width, height, rgba.to_vec())
3977            .ok_or_else(|| "Failed to create image buffer".to_string())?;
3978
3979    img.save(path).map_err(|e| e.to_string())
3980}
3981
3982/// Save depth data to binary file (f64 for TBP precision)
3983fn save_depth_to_binary(depth: &[f64], path: &Path) -> Result<(), String> {
3984    // Create parent directories if needed
3985    if let Some(parent) = path.parent() {
3986        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3987    }
3988
3989    let bytes: Vec<u8> = depth.iter().flat_map(|f| f.to_le_bytes()).collect();
3990    std::fs::write(path, &bytes).map_err(|e| e.to_string())
3991}
3992
3993#[cfg(test)]
3994mod depth_readback_summary_tests {
3995    use super::{
3996        is_all_background_depth, is_capture_foreground_depth, is_uniform_rgba_frame,
3997        project_camera_local, reject_all_background_target_in_frame, target_projects_in_frame,
3998        DepthReadbackSummary, RenderRequest,
3999    };
4000    use crate::{ObjectRotation, RenderConfig, RenderOutput, TargetingPolicy, Transform, Vec3};
4001
4002    #[test]
4003    fn capture_foreground_depth_matches_persistent_capture_gate() {
4004        let near = 0.01;
4005        let far = 10.0;
4006
4007        assert!(!is_capture_foreground_depth(near, near, far));
4008        assert!(!is_capture_foreground_depth(0.010005, near, far));
4009        assert!(is_capture_foreground_depth(0.01002, near, far));
4010        assert!(is_capture_foreground_depth(0.025, near, far));
4011        assert!(is_capture_foreground_depth(9.98, near, far));
4012        assert!(!is_capture_foreground_depth(9.99, near, far));
4013        assert!(!is_capture_foreground_depth(f64::NAN, near, far));
4014    }
4015
4016    #[test]
4017    fn depth_readback_summary_classifies_all_far_frames() {
4018        let summary = DepthReadbackSummary::from_depth(&[10.0, 10.0, 9.99], 0.01, 10.0);
4019
4020        assert_eq!(summary.samples, 3);
4021        assert_eq!(summary.finite, 3);
4022        assert_eq!(summary.invalid, 0);
4023        assert_eq!(summary.foreground, 0);
4024        assert_eq!(summary.near_or_zero, 0);
4025        assert_eq!(summary.far_or_background, 3);
4026        assert_eq!(summary.min, Some(9.99));
4027        assert_eq!(summary.max, Some(10.0));
4028        assert_eq!(summary.foreground_min, None);
4029        assert_eq!(summary.foreground_max, None);
4030    }
4031
4032    #[test]
4033    fn depth_readback_summary_keeps_foreground_range_when_depth_exists() {
4034        let summary =
4035            DepthReadbackSummary::from_depth(&[0.0, 0.010005, 0.025, 1.5, 10.0], 0.01, 10.0);
4036
4037        assert_eq!(summary.samples, 5);
4038        assert_eq!(summary.finite, 5);
4039        assert_eq!(summary.foreground, 2);
4040        assert_eq!(summary.near_or_zero, 2);
4041        assert_eq!(summary.far_or_background, 1);
4042        assert_eq!(summary.min, Some(0.0));
4043        assert_eq!(summary.max, Some(10.0));
4044        assert_eq!(summary.foreground_min, Some(0.025));
4045        assert_eq!(summary.foreground_max, Some(1.5));
4046    }
4047
4048    #[test]
4049    fn depth_readback_summary_counts_invalid_samples() {
4050        let summary = DepthReadbackSummary::from_depth(&[f64::NAN, f64::INFINITY, 0.2], 0.01, 10.0);
4051
4052        assert_eq!(summary.samples, 3);
4053        assert_eq!(summary.finite, 1);
4054        assert_eq!(summary.invalid, 2);
4055        assert_eq!(summary.foreground, 1);
4056        assert_eq!(summary.min, Some(0.2));
4057        assert_eq!(summary.max, Some(0.2));
4058    }
4059
4060    #[test]
4061    fn all_background_depth_accepts_far_plane_only() {
4062        assert!(is_all_background_depth(&[10.0, 9.99], 10.0));
4063        assert!(!is_all_background_depth(&[10.0, 9.98], 10.0));
4064        assert!(!is_all_background_depth(&[10.0, 0.010005], 10.0));
4065        assert!(!is_all_background_depth(&[10.0, f64::NAN], 10.0));
4066        assert!(!is_all_background_depth(&[], 10.0));
4067    }
4068
4069    #[test]
4070    fn uniform_rgba_frame_detects_blank_readbacks() {
4071        assert!(is_uniform_rgba_frame(&[1, 2, 3, 255, 1, 2, 3, 255]));
4072        assert!(!is_uniform_rgba_frame(&[1, 2, 3, 255, 4, 2, 3, 255]));
4073        assert!(!is_uniform_rgba_frame(&[]));
4074    }
4075
4076    #[test]
4077    fn target_projection_matches_center_pixel_for_targeted_camera() {
4078        let config = RenderConfig::tbp_default();
4079        let camera =
4080            Transform::from_translation(Vec3::new(0.0, 0.0, 0.5)).looking_at(Vec3::ZERO, Vec3::Y);
4081
4082        let projected = project_camera_local(Vec3::new(0.0, 0.0, -0.5), &config)
4083            .expect("target in front of camera");
4084        assert!((projected[0] - 32.0).abs() < 1e-9);
4085        assert!((projected[1] - 32.0).abs() < 1e-9);
4086        assert!(target_projects_in_frame(Vec3::ZERO, &camera, &config));
4087    }
4088
4089    #[test]
4090    fn target_aware_requests_reject_stable_empty_view_when_target_is_in_frame() {
4091        let config = RenderConfig::tbp_default();
4092        let camera =
4093            Transform::from_translation(Vec3::new(0.0, 0.0, 0.5)).looking_at(Vec3::ZERO, Vec3::Y);
4094
4095        let mut request = RenderRequest {
4096            mesh_path: "mesh.obj".to_string(),
4097            texture_path: "texture.png".to_string(),
4098            camera_transform: camera,
4099            object_rotation: ObjectRotation::identity(),
4100            object_translation: Vec3::ZERO,
4101            object_scale: Vec3::ONE,
4102            config,
4103            target_point: Vec3::ZERO,
4104            targeting_policy: TargetingPolicy::MeshCenter,
4105        };
4106
4107        assert!(!request.accepts_stable_empty_view());
4108
4109        request.targeting_policy = TargetingPolicy::Origin;
4110        assert!(request.accepts_stable_empty_view());
4111
4112        request.targeting_policy = TargetingPolicy::MeshCenter;
4113        request.target_point = Vec3::new(5.0, 0.0, 0.0);
4114        assert!(request.accepts_stable_empty_view());
4115    }
4116
4117    #[test]
4118    fn target_aware_outputs_error_on_all_background_target_in_frame() {
4119        let config = RenderConfig {
4120            width: 2,
4121            height: 2,
4122            ..RenderConfig::tbp_default()
4123        };
4124        let camera =
4125            Transform::from_translation(Vec3::new(0.0, 0.0, 0.5)).looking_at(Vec3::ZERO, Vec3::Y);
4126        let output = RenderOutput {
4127            rgba: vec![0; 2 * 2 * 4],
4128            depth: vec![config.far_plane as f64; 2 * 2],
4129            width: config.width,
4130            height: config.height,
4131            intrinsics: config.intrinsics(),
4132            camera_transform: camera,
4133            object_rotation: ObjectRotation::identity(),
4134            object_translation: Vec3::ZERO,
4135            object_scale: Vec3::ONE,
4136            target_point: Vec3::ZERO,
4137            targeting_policy: TargetingPolicy::MeshCenter,
4138        };
4139
4140        let error = reject_all_background_target_in_frame(&output, &config)
4141            .expect_err("target-in-frame all-background should be rejected");
4142        assert!(error.to_string().contains("all-background target-in-frame"));
4143
4144        let mut foreground = output.clone();
4145        foreground.depth[0] = 0.5;
4146        reject_all_background_target_in_frame(&foreground, &config)
4147            .expect("foreground target-aware output should pass");
4148
4149        let mut origin = output;
4150        origin.targeting_policy = TargetingPolicy::Origin;
4151        reject_all_background_target_in_frame(&origin, &config)
4152            .expect("origin-targeted empty views remain compatibility-accepted");
4153    }
4154}
4155
4156#[cfg(test)]
4157mod smoke_tests {
4158    use super::{
4159        headless_scene_setup_count, persistent_warmup_camera_transform,
4160        reset_headless_scene_setup_count,
4161    };
4162    use crate::{
4163        BatchRenderConfig, BatchRenderRequest, ObjectRotation, RenderConfig, TargetingPolicy, Vec3,
4164        ViewpointConfig,
4165    };
4166    use image::{ImageBuffer, Rgba};
4167    use tempfile::TempDir;
4168
4169    fn write_synthetic_object() -> TempDir {
4170        let temp_dir = TempDir::new().expect("create temp dir for synthetic object");
4171        let object_dir = temp_dir.path().join("synthetic_cube").join("google_16k");
4172        std::fs::create_dir_all(&object_dir).expect("create synthetic google_16k dir");
4173
4174        // A small centered cube stays visible from all default TBP viewpoints and does not
4175        // need any YCB downloads.
4176        let obj = r#"o SyntheticCube
4177v -0.10 -0.10  0.10
4178v  0.10 -0.10  0.10
4179v  0.10  0.10  0.10
4180v -0.10  0.10  0.10
4181v -0.10 -0.10 -0.10
4182v  0.10 -0.10 -0.10
4183v  0.10  0.10 -0.10
4184v -0.10  0.10 -0.10
4185vt 0.0 0.0
4186vt 1.0 0.0
4187vt 1.0 1.0
4188vt 0.0 1.0
4189f 1/1 2/2 3/3
4190f 1/1 3/3 4/4
4191f 6/1 5/2 8/3
4192f 6/1 8/3 7/4
4193f 2/1 6/2 7/3
4194f 2/1 7/3 3/4
4195f 5/1 1/2 4/3
4196f 5/1 4/3 8/4
4197f 4/1 3/2 7/3
4198f 4/1 7/3 8/4
4199f 5/1 6/2 2/3
4200f 5/1 2/3 1/4
4201"#;
4202        std::fs::write(object_dir.join("textured.obj"), obj).expect("write synthetic obj");
4203
4204        let texture = ImageBuffer::from_fn(2, 2, |x, y| match (x, y) {
4205            (0, 0) => Rgba([255u8, 48, 48, 255]),
4206            (1, 0) => Rgba([48u8, 255, 48, 255]),
4207            (0, 1) => Rgba([48u8, 48, 255, 255]),
4208            _ => Rgba([255u8, 255, 64, 255]),
4209        });
4210        texture
4211            .save(object_dir.join("texture_map.png"))
4212            .expect("write synthetic texture");
4213
4214        temp_dir
4215    }
4216
4217    #[test]
4218    fn persistent_warmup_camera_is_a_real_viewpoint() {
4219        let transform = persistent_warmup_camera_transform();
4220        assert!(
4221            transform.translation.length() > 0.1,
4222            "persistent warmup must not place the camera at the object origin"
4223        );
4224
4225        let forward = transform.rotation * Vec3::NEG_Z;
4226        let to_origin = -transform.translation.normalize();
4227        assert!(
4228            forward.dot(to_origin) > 0.99,
4229            "persistent warmup camera should look at the object origin"
4230        );
4231    }
4232
4233    #[test]
4234    #[ignore = "headless throughput smoke check is opt-in because it needs a local render backend"]
4235    fn test_headless_batch_throughput_smoke() {
4236        crate::initialize();
4237        reset_headless_scene_setup_count();
4238
4239        let object_root = write_synthetic_object();
4240        let object_dir = object_root.path().join("synthetic_cube");
4241        let viewpoints = crate::generate_viewpoints(&ViewpointConfig::default());
4242        let request_count = 5usize;
4243        let config = RenderConfig::tbp_default();
4244
4245        let requests: Vec<_> = viewpoints
4246            .iter()
4247            .take(request_count)
4248            .copied()
4249            .map(|viewpoint| BatchRenderRequest {
4250                object_dir: object_dir.clone(),
4251                viewpoint,
4252                object_rotation: ObjectRotation::identity(),
4253                object_translation: Vec3::ZERO,
4254                object_scale: Vec3::ONE,
4255                render_config: config.clone(),
4256                target_point: Vec3::ZERO,
4257                targeting_policy: TargetingPolicy::Origin,
4258            })
4259            .collect();
4260
4261        let start = std::time::Instant::now();
4262        let outputs = crate::render_batch(requests, &BatchRenderConfig::default())
4263            .expect("synthetic headless batch render should succeed");
4264        let elapsed = start.elapsed();
4265
4266        assert_eq!(outputs.len(), request_count);
4267        // This is the deterministic churn signal for the smoke check. Adapter log lines vary by
4268        // backend and logging config, but a homogeneous batch should still set up headless scene
4269        // state exactly once.
4270        assert_eq!(
4271            headless_scene_setup_count(),
4272            1,
4273            "homogeneous batch smoke check should reuse one headless app setup"
4274        );
4275
4276        for (idx, output) in outputs.iter().enumerate() {
4277            assert_eq!(output.width, config.width, "output {idx} width mismatch");
4278            assert_eq!(output.height, config.height, "output {idx} height mismatch");
4279            assert_eq!(
4280                output.rgba.len(),
4281                (config.width * config.height * 4) as usize,
4282                "output {idx} rgba size mismatch"
4283            );
4284            assert_eq!(
4285                output.depth.len(),
4286                (config.width * config.height) as usize,
4287                "output {idx} depth size mismatch"
4288            );
4289            assert!(
4290                output
4291                    .rgba
4292                    .chunks_exact(4)
4293                    .any(|px| px[0] != 0 || px[1] != 0 || px[2] != 0),
4294                "output {idx} should contain visible color"
4295            );
4296        }
4297
4298        // Acceptance target: under llvmpipe-class CPU rendering, five 64x64 captures should
4299        // finish in under 8s. Much slower runs usually mean we reintroduced per-capture app
4300        // churn or another headless startup regression.
4301        assert!(
4302            elapsed < std::time::Duration::from_secs(8),
4303            "5 synthetic headless captures took {:.2}s, expected < 8.0s",
4304            elapsed.as_secs_f64()
4305        );
4306    }
4307}