Skip to main content

bevy_sensor/
render.rs

1//! Headless rendering implementation using Bevy.
2//!
3//! This module provides two rendering modes:
4//!
5//! 1. **Headless mode** (default): Renders to an image texture without requiring
6//!    a window or display. Works on WSL2, CI servers, and any environment without
7//!    GPU windowing support.
8//!
9//! 2. **Windowed mode** (fallback): Uses a visible window for rendering when
10//!    headless mode fails. Requires a display (X11/Wayland).
11//!
12//! # Current Status
13//!
14//! - **RGBA**: Working via render-to-texture + GPU readback
15//! - **Depth**: Working via ViewDepthTexture + reverse-Z conversion
16//!
17//! # Headless Rendering Architecture
18//!
19//! The headless renderer:
20//! 1. Creates a Bevy app without window plugins (uses ScheduleRunnerPlugin)
21//! 2. Sets up a render-to-texture pipeline with RenderTarget::Image
22//! 3. Extracts RGBA data via ImageCopyDriver
23//! 4. Extracts depth via DepthReadbackNode
24//!
25//! # Running Requirements
26//!
27//! Headless mode should work without any display. For windowed fallback:
28//! ```bash
29//! DISPLAY=:0 cargo run --example test_render
30//! ```
31//!
32//! # Architecture Notes
33//!
34//! Bevy's `App::run()` does not return cleanly in all configurations. This
35//! implementation uses a watchdog thread that monitors for completion and
36//! calls `std::process::exit(0)` once the render output is serialized to
37//! a temp file. The main thread reads this file after the process would
38//! normally exit.
39
40use bevy::app::{ScheduleRunnerPlugin, TerminalCtrlCHandlerPlugin};
41use bevy::asset::{LoadState, RenderAssetUsages};
42use bevy::camera::RenderTarget;
43use bevy::core_pipeline::prepass::{DepthPrepass, NormalPrepass};
44use bevy::core_pipeline::tonemapping::Tonemapping;
45use bevy::ecs::query::QueryItem;
46use bevy::light::GlobalAmbientLight;
47use bevy::log::LogPlugin;
48use bevy::prelude::*;
49use bevy::render::camera::ExtractedCamera;
50use bevy::render::render_asset::RenderAssets;
51use bevy::render::render_graph::{
52    Node, NodeRunError, RenderGraphContext, RenderGraphExt, RenderLabel, ViewNode, ViewNodeRunner,
53};
54use bevy::render::render_resource::{
55    Buffer, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, Extent3d, MapMode, Origin3d,
56    TexelCopyBufferInfo, TexelCopyBufferLayout, TexelCopyTextureInfo, TextureAspect,
57    TextureDimension, TextureFormat, TextureUsages,
58};
59use bevy::render::renderer::RenderQueue;
60use bevy::render::renderer::{RenderContext, RenderDevice};
61use bevy::render::texture::GpuImage;
62use bevy::render::view::screenshot::{Screenshot, ScreenshotCaptured};
63use bevy::render::view::{ExtractedView, Hdr, ViewDepthTexture};
64use bevy::render::{Extract, Render, RenderApp, RenderSystems};
65use bevy::window::{ExitCondition, WindowPlugin};
66use bevy_obj::ObjPlugin;
67use std::fs::File;
68use std::io::Read as IoRead;
69use std::path::{Path, PathBuf};
70#[cfg(test)]
71use std::sync::atomic::{AtomicUsize, Ordering};
72use std::sync::{Arc, Mutex, OnceLock};
73use std::time::Duration;
74
75use crate::{
76    backend::BackendConfig, ObjectRotation, RenderConfig, RenderError, RenderOutput,
77    TargetingPolicy,
78};
79use ycbust::{GOOGLE_16K_MESH_RELATIVE, GOOGLE_16K_TEXTURE_RELATIVE};
80
81/// Watchdog timeout for a single render, in seconds.
82///
83/// Bounds how long any single render path waits before declaring failure.
84/// 180s accommodates first-run wgpu shader compilation on Windows, which
85/// can take well over 60s on a cold GPU cache (see commit 9cd1d11).
86const RENDER_TIMEOUT_SECS: u64 = 180;
87
88/// Warmup frames after each camera move in `render_headless_sequence`.
89///
90/// After writing a new camera `Transform`, Bevy needs at least one frame for
91/// transform propagation + render-world extract before the next capture is
92/// valid. Historically set to 3 as a conservative cushion; reducing directly
93/// shortens per-viewpoint wall-clock since `app.update()` in the batch path
94/// is not rate-limited. Validated against the pixel-exact hardware test
95/// `test_batch_render_matches_sequential_episode_outputs`.
96const BATCH_WARMUP_FRAMES: u32 = 1;
97
98/// Warmup frames at the start of each `PersistentRenderer::render()` call.
99///
100/// `BATCH_WARMUP_FRAMES = 1` works for inter-viewpoint advancement inside a
101/// batch because `extract_and_continue_headless_batch` writes the next
102/// camera transform *and* clears the shared GPU readback buffers in the
103/// same tick — so the in-flight copy from the previous viewpoint has
104/// already drained by the time the next capture is gated.
105///
106/// In the persistent per-call path, the previous render's output may still
107/// be sitting in `shared_rgba`/`shared_depth` (we clear them before the
108/// loop, but the pipeline still needs ticks to propagate the new camera/
109/// scene-rotation `Transform` writes through `PostUpdate` →
110/// `transform_propagate` → `Extract` → render graph → `ImageCopyDriver`
111/// before the capture we request actually reflects the new transforms.
112///
113/// Validated by `test_persistent_renderer_matches_render_to_buffer`. Three
114/// ticks of warmup gives Windows/DX12 enough room to drain the previous
115/// readback and capture the post-propagation color target:
116///   - tick 0: transforms propagate, render runs (no copy enabled)
117///   - tick 1: previous in-flight readback drains (no copy enabled)
118///   - tick 2: warmup hits 0, capture fires, render runs with copy enabled
119///   - tick 3: shared buffers populated → captured → batch finalized
120const PERSISTENT_WARMUP_FRAMES: u32 = 3;
121
122fn persistent_warmup_camera_transform() -> Transform {
123    crate::generate_viewpoints(&crate::ViewpointConfig::default())
124        .into_iter()
125        .next()
126        .unwrap_or_else(|| Transform::from_xyz(0.0, 0.0, 0.5).looking_at(Vec3::ZERO, Vec3::Y))
127}
128
129/// Check the render-trace env var. Cheap enough (single HashMap lookup) to call
130/// from per-frame systems; gate all tracing output behind this.
131#[inline]
132fn render_trace_enabled() -> bool {
133    std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok()
134}
135
136/// Convert a filesystem path into a Bevy asset-path string.
137///
138/// `std::fs::canonicalize` on Windows returns a `\\?\C:\...` verbatim-prefixed
139/// path. Bevy's `AssetPath` parser cannot handle that prefix, so the asset
140/// would silently never load. Strip the verbatim prefix and normalize
141/// separators to `/` so the absolute path resolves through the default file
142/// asset source on every platform.
143fn fs_path_to_asset_string(path: &std::path::Path) -> String {
144    let s = path.display().to_string();
145    let s = s.strip_prefix(r"\\?\").map(str::to_string).unwrap_or(s);
146    s.replace('\\', "/")
147}
148
149/// Check if a display is available for windowed rendering.
150///
151/// Returns true if DISPLAY or WAYLAND_DISPLAY environment variable is set.
152#[allow(dead_code)]
153fn display_available() -> bool {
154    std::env::var("DISPLAY").is_ok() || std::env::var("WAYLAND_DISPLAY").is_ok()
155}
156
157/// Check if we're running on WSL2 (which doesn't support Vulkan window surfaces).
158#[allow(dead_code)]
159fn is_wsl2() -> bool {
160    if let Ok(version) = std::fs::read_to_string("/proc/version") {
161        return version.to_lowercase().contains("microsoft")
162            || version.to_lowercase().contains("wsl");
163    }
164    false
165}
166
167/// Internal state for tracking render progress
168#[derive(Resource, Default)]
169struct RenderState {
170    frame_count: u32,
171    scene_loaded: bool,
172    texture_loaded: bool,
173    materials_applied: bool,
174    /// `frame_count` at the moment materials were applied; used to gate
175    /// `capture_ready` on N frames of render-graph propagation rather than
176    /// a legacy llvmpipe-era 60-frame wait.
177    materials_applied_frame: u32,
178    /// `frame_count` when the texture finished loading. Capture waits a small
179    /// margin past this for GPU image preparation. The material (and therefore
180    /// the main-pass pipeline) is applied earlier, so by the time the texture is
181    /// ready the pipeline has already compiled.
182    texture_ready_frame: u32,
183    capture_ready: bool,
184    screenshot_requested: bool,
185    /// Number of frames spent waiting for a *valid* (non-blank / valid-depth)
186    /// readback. The one-shot GPU capture is nondeterministic and occasionally
187    /// reads a uniform clear-color frame; we reject those and keep capturing
188    /// until a real frame lands, bounded by this counter.
189    capture_retries: u32,
190    /// Previous frame's RGBA readback. The capture is accepted only once two
191    /// consecutive readbacks are identical (the render has settled), so partial
192    /// in-progress frames aren't captured and every render path yields the same
193    /// fully-drawn image (required for byte-exact cross-path parity).
194    prev_rgba: Option<Vec<u8>>,
195    /// Previous frame's depth readback, for the same settle-detection as
196    /// `prev_rgba` (depth parity is asserted to ~1e-9, i.e. bit-exact).
197    prev_depth: Option<Vec<f64>>,
198    captured: bool,
199    exit_requested: bool,
200    #[allow(dead_code)]
201    exit_frame_count: u32,
202    rgba_data: Option<Vec<u8>>,
203    depth_data: Option<Vec<f64>>,
204    image_width: u32,
205    image_height: u32,
206}
207
208#[cfg(test)]
209static HEADLESS_SCENE_SETUP_COUNT: AtomicUsize = AtomicUsize::new(0);
210
211#[cfg(test)]
212fn reset_headless_scene_setup_count() {
213    HEADLESS_SCENE_SETUP_COUNT.store(0, Ordering::SeqCst);
214}
215
216#[cfg(test)]
217fn headless_scene_setup_count() -> usize {
218    HEADLESS_SCENE_SETUP_COUNT.load(Ordering::SeqCst)
219}
220
221/// Shared buffer for screenshot callback to write into
222#[derive(Resource, Clone)]
223#[allow(clippy::type_complexity)]
224#[allow(dead_code)]
225struct SharedImageBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
226
227/// Shared buffer for depth data from GPU readback
228/// Contains: (linear_depth_values, width, height)
229/// Uses f64 for TBP numerical precision compatibility.
230#[derive(Resource, Clone, Default)]
231#[allow(clippy::type_complexity)]
232struct SharedDepthBuffer(Arc<Mutex<Option<(Vec<f64>, u32, u32)>>>);
233
234// ============================================================================
235// Depth Readback Infrastructure
236// ============================================================================
237
238/// Request to capture depth - extracted from main world to render world
239#[derive(Resource, Default, Clone)]
240struct DepthCaptureRequest {
241    requested: bool,
242    near: f32,
243    far: f32,
244}
245
246/// Pending depth capture info for async processing.
247///
248/// `m22`/`m32` are the relevant entries of the view's reverse-Z projection
249/// matrix (`clip_from_view`), captured at copy time so the CPU-side
250/// linearization matches the exact projection the GPU rendered with. This keeps
251/// depth output robust if projection construction or backend behavior changes.
252struct PendingDepthCapture {
253    buffer: Buffer,
254    width: u32,
255    height: u32,
256    m22: f32,
257    m32: f32,
258    far: f32,
259}
260
261fn render_projection(config: &RenderConfig) -> Projection {
262    let near = config.near_plane;
263    Projection::Perspective(PerspectiveProjection {
264        fov: config.fov_radians(),
265        near,
266        far: config.far_plane,
267        near_clip_plane: Vec4::new(0.0, 0.0, -1.0, -near),
268        ..default()
269    })
270}
271
272/// Queue for pending depth captures (written by render node, read by cleanup system)
273#[derive(Resource, Default)]
274struct PendingDepthCaptureQueue(Arc<Mutex<Vec<PendingDepthCapture>>>);
275
276#[cfg(test)]
277mod projection_tests {
278    use super::*;
279
280    #[test]
281    fn render_projection_uses_configured_near_plane_for_effective_clip_matrix() {
282        let mut config = RenderConfig::tbp_default();
283        config.near_plane = 0.025;
284        config.far_plane = 12.0;
285
286        let projection = render_projection(&config);
287        let Projection::Perspective(perspective) = &projection else {
288            panic!("render_projection should create a perspective projection");
289        };
290
291        assert_eq!(perspective.near, config.near_plane);
292        assert_eq!(
293            perspective.near_clip_plane,
294            Vec4::new(0.0, 0.0, -1.0, -config.near_plane)
295        );
296        assert_eq!(perspective.far, config.far_plane);
297
298        let clip_from_view = projection.get_clip_from_view();
299        assert!(
300            (clip_from_view.w_axis.z - config.near_plane).abs() < 1e-6,
301            "reverse-Z projection matrix should encode configured near plane; got {}",
302            clip_from_view.w_axis.z
303        );
304    }
305}
306
307// ============================================================================
308// Depth Buffer Helpers
309// ============================================================================
310
311mod depth_helpers {
312    /// wgpu requires buffer row alignment of 256 bytes
313    pub const COPY_BYTES_PER_ROW_ALIGNMENT: u32 = 256;
314
315    /// Align byte size to wgpu's COPY_BYTES_PER_ROW_ALIGNMENT
316    pub fn align_byte_size(value: u32) -> u32 {
317        let remainder = value % COPY_BYTES_PER_ROW_ALIGNMENT;
318        if remainder == 0 {
319            value
320        } else {
321            value + (COPY_BYTES_PER_ROW_ALIGNMENT - remainder)
322        }
323    }
324
325    /// Calculate aligned buffer size for an image
326    #[allow(dead_code)]
327    pub fn get_aligned_size(width: u32, height: u32, pixel_size: u32) -> u32 {
328        height * align_byte_size(width * pixel_size)
329    }
330
331    /// Convert reverse-Z NDC depth to linear depth in meters.
332    ///
333    /// Bevy uses reverse-Z depth buffer: near plane maps to depth=1, far plane to depth=0.
334    /// This provides better precision for distant objects.
335    ///
336    /// Formula derivation:
337    /// - At near plane (z = near): ndc = 1
338    /// - At far plane (z = far): ndc = 0
339    /// - linear = far / (1 + ndc * (far/near - 1))
340    ///
341    /// Superseded in the render path by [`ndc_to_linear_with_matrix`], which
342    /// reads the actual projection near from the view matrix instead of trusting
343    /// a passed-in near (the source of the #92 10x depth error). Retained for its
344    /// tests and as a reference formula.
345    #[allow(dead_code)]
346    pub fn reverse_z_to_linear_depth(ndc_depth: f32, near: f32, far: f32) -> f32 {
347        // Handle edge cases
348        if ndc_depth <= 0.0 {
349            return far; // Background (infinite distance in reverse-Z)
350        }
351        if ndc_depth >= 1.0 {
352            return near; // At or beyond near plane
353        }
354        // Reverse-Z formula: linear = far / (1 + ndc * (far/near - 1))
355        far / (1.0 + ndc_depth * (far / near - 1.0))
356    }
357
358    /// Extract depth values from aligned buffer, handling row padding
359    pub fn extract_depth_with_alignment(data: &[u8], width: u32, height: u32) -> Vec<f32> {
360        let pixel_size = 4u32; // f32 = 4 bytes
361        let aligned_row_bytes = align_byte_size(width * pixel_size) as usize;
362        let actual_row_bytes = (width * pixel_size) as usize;
363
364        let mut depth_values = Vec::with_capacity((width * height) as usize);
365
366        for y in 0..height as usize {
367            let row_start = y * aligned_row_bytes;
368            let row_data = &data[row_start..row_start + actual_row_bytes];
369
370            for x in 0..width as usize {
371                let offset = x * 4;
372                let bytes: [u8; 4] = row_data[offset..offset + 4].try_into().unwrap();
373                let depth_value = f32::from_le_bytes(bytes);
374                depth_values.push(depth_value);
375            }
376        }
377
378        depth_values
379    }
380
381    /// Convert all NDC depth values to linear meters (as f64 for TBP precision).
382    /// Superseded by [`convert_depth_to_linear_with_matrix`]; retained for tests.
383    #[allow(dead_code)]
384    pub fn convert_depth_to_linear(raw_depth: &[f32], near: f32, far: f32) -> Vec<f64> {
385        raw_depth
386            .iter()
387            .map(|&ndc| reverse_z_to_linear_depth(ndc, near, far) as f64)
388            .collect()
389    }
390
391    /// Linearize a reverse-Z NDC depth using the view's actual projection matrix,
392    /// rather than a hand-supplied near/far.
393    ///
394    /// For a perspective right-handed projection, the relevant clip-space rows are
395    /// `clip_z = m22 * z + m32` and `clip_w = -z` (camera looks down -Z), so
396    /// `ndc = clip_z / clip_w = (m22*z + m32) / (-z)`. Solving for the positive
397    /// view-space distance `d = -z` gives **`d = m32 / (ndc + m22)`**. This holds
398    /// for both finite and infinite reverse-Z and is correct regardless of which
399    /// near plane the renderer actually used — the previous fixed-near formula
400    /// produced depths 10x too small when the effective projection near plane
401    /// drifted from `RenderConfig::near_plane` (issue #86/#92/#95).
402    ///
403    /// `m22 = clip_from_view[col=2][row=2]`, `m32 = clip_from_view[col=3][row=2]`.
404    /// `ndc <= 0` is the reverse-Z far plane (background) and maps to `far`.
405    pub fn ndc_to_linear_with_matrix(ndc: f32, m22: f32, m32: f32, far: f32) -> f32 {
406        if ndc <= 0.0 {
407            return far; // background / at-or-beyond far plane in reverse-Z
408        }
409        let denom = ndc + m22;
410        if denom.abs() <= f32::EPSILON {
411            return far;
412        }
413        let linear = m32 / denom;
414        if !linear.is_finite() || linear <= 0.0 {
415            far
416        } else {
417            linear.min(far)
418        }
419    }
420
421    /// Convert all NDC depth values to linear meters using the view projection
422    /// matrix (f64 for TBP precision). See [`ndc_to_linear_with_matrix`].
423    pub fn convert_depth_to_linear_with_matrix(
424        raw_depth: &[f32],
425        m22: f32,
426        m32: f32,
427        far: f32,
428    ) -> Vec<f64> {
429        raw_depth
430            .iter()
431            .map(|&ndc| ndc_to_linear_with_matrix(ndc, m22, m32, far) as f64)
432            .collect()
433    }
434
435    #[cfg(test)]
436    mod tests {
437        use super::*;
438
439        #[test]
440        fn test_align_byte_size() {
441            assert_eq!(align_byte_size(256), 256);
442            assert_eq!(align_byte_size(257), 512);
443            assert_eq!(align_byte_size(1), 256);
444            assert_eq!(align_byte_size(512), 512);
445            assert_eq!(align_byte_size(0), 0);
446        }
447
448        #[test]
449        fn test_reverse_z_to_linear_depth() {
450            let near = 0.01;
451            let far = 10.0;
452
453            // Near plane (ndc=1 in reverse-Z)
454            let linear_near = reverse_z_to_linear_depth(1.0, near, far);
455            assert!((linear_near - near).abs() < 0.001);
456
457            // Mid-range depth (ndc=0.5 should give geometric mean area)
458            let linear_mid = reverse_z_to_linear_depth(0.5, near, far);
459            // At ndc=0.5: linear = 10 / (1 + 0.5 * (1000-1)) = 10 / 500.5 ≈ 0.02
460            assert!(linear_mid > near && linear_mid < far);
461
462            // Very close to far plane (ndc very small)
463            let linear_almost_far = reverse_z_to_linear_depth(0.0001, near, far);
464            // At ndc=0.0001: linear = 10 / (1 + 0.0001 * 999) ≈ 10 / 1.0999 ≈ 9.09
465            assert!(linear_almost_far > 9.0);
466
467            // Background (ndc=0)
468            let background = reverse_z_to_linear_depth(0.0, near, far);
469            assert_eq!(background, far);
470        }
471
472        #[test]
473        fn test_extract_depth_with_alignment() {
474            // 2x2 image, 4 bytes per pixel
475            // Aligned row = 256 bytes, but actual = 8 bytes
476            let width = 2u32;
477            let height = 2u32;
478
479            let mut data = vec![0u8; 256 * 2]; // 2 aligned rows
480
481            // Write test depth values
482            // Row 0: [0.5, 0.6]
483            data[0..4].copy_from_slice(&0.5f32.to_le_bytes());
484            data[4..8].copy_from_slice(&0.6f32.to_le_bytes());
485            // Row 1: [0.7, 0.8]
486            data[256..260].copy_from_slice(&0.7f32.to_le_bytes());
487            data[260..264].copy_from_slice(&0.8f32.to_le_bytes());
488
489            let depth = extract_depth_with_alignment(&data, width, height);
490            assert_eq!(depth.len(), 4);
491            assert!((depth[0] - 0.5).abs() < 0.001);
492            assert!((depth[1] - 0.6).abs() < 0.001);
493            assert!((depth[2] - 0.7).abs() < 0.001);
494            assert!((depth[3] - 0.8).abs() < 0.001);
495        }
496
497        #[test]
498        fn test_reverse_z_depth_at_near_plane() {
499            // Near plane should give near value
500            let near = 0.01;
501            let far = 100.0;
502            let depth = reverse_z_to_linear_depth(1.0, near, far);
503            assert!((depth - near).abs() < 0.0001);
504        }
505
506        #[test]
507        fn test_reverse_z_depth_at_far_plane() {
508            // Far plane (ndc=0) should give far value
509            let near = 0.01;
510            let far = 100.0;
511            let depth = reverse_z_to_linear_depth(0.0, near, far);
512            assert!((depth - far).abs() < 0.0001);
513        }
514
515        #[test]
516        fn test_reverse_z_monotonic() {
517            // Depth should increase as NDC decreases (reverse-Z)
518            let near = 0.01;
519            let far = 10.0;
520
521            let mut prev_depth = 0.0;
522            for i in (0..=100).rev() {
523                let ndc = i as f32 / 100.0;
524                let depth = reverse_z_to_linear_depth(ndc, near, far);
525                assert!(
526                    depth >= prev_depth,
527                    "Depth should be monotonic: ndc={}, depth={}, prev={}",
528                    ndc,
529                    depth,
530                    prev_depth
531                );
532                prev_depth = depth;
533            }
534        }
535
536        #[test]
537        fn test_ndc_to_linear_with_matrix_infinite_reverse_z() {
538            // Infinite reverse-Z (Bevy `perspective_infinite_reverse_rh`):
539            // m22 = 0, m32 = near. d = near / ndc.
540            let (m22, m32, far) = (0.0f32, 0.1f32, 10.0f32);
541
542            // The exact regression from #92: ndc 0.366504 must linearize to
543            // ~0.273 m (near 0.1), NOT ~0.027 m (the old fixed near = 0.01).
544            let d = ndc_to_linear_with_matrix(0.366504, m22, m32, far);
545            assert!((d as f64 - 0.272849).abs() < 1e-4, "got {d}");
546
547            // Background (reverse-Z far plane) and clamping.
548            assert_eq!(ndc_to_linear_with_matrix(0.0, m22, m32, far), far);
549            assert_eq!(ndc_to_linear_with_matrix(-0.5, m22, m32, far), far);
550            // Very small ndc -> very far -> clamped to far.
551            assert_eq!(ndc_to_linear_with_matrix(1e-9, m22, m32, far), far);
552        }
553
554        #[test]
555        fn test_ndc_to_linear_with_matrix_finite_reverse_z() {
556            // Finite reverse-Z maps near->ndc 1, far->ndc 0. Construct the matrix
557            // entries for near=0.5, far=20: m22 = near/(far-near), m32 = far*m22.
558            let (near, far) = (0.5f32, 20.0f32);
559            let m22 = near / (far - near);
560            let m32 = far * m22;
561            // ndc = 1 -> near; ndc = 0 -> far (background sentinel also returns far).
562            assert!((ndc_to_linear_with_matrix(1.0, m22, m32, far) - near).abs() < 1e-4);
563            assert_eq!(ndc_to_linear_with_matrix(0.0, m22, m32, far), far);
564        }
565
566        #[test]
567        fn test_convert_depth_to_linear_batch() {
568            let near = 0.01f32;
569            let far = 10.0f32;
570            let ndc_depths = vec![1.0f32, 0.5, 0.1, 0.0];
571
572            let linear = convert_depth_to_linear(&ndc_depths, near, far);
573
574            assert_eq!(linear.len(), 4);
575            // Near plane
576            assert!((linear[0] - near as f64).abs() < 0.001);
577            // Far plane
578            assert!((linear[3] - far as f64).abs() < 0.001);
579            // All should be in range [near, far]
580            for d in &linear {
581                assert!(*d >= near as f64 && *d <= far as f64);
582            }
583        }
584
585        #[test]
586        fn test_align_byte_size_edge_cases() {
587            // Powers of two should stay the same if multiple of 256
588            assert_eq!(align_byte_size(256), 256);
589            assert_eq!(align_byte_size(512), 512);
590            assert_eq!(align_byte_size(1024), 1024);
591
592            // Just under 256 should round up to 256
593            assert_eq!(align_byte_size(255), 256);
594            assert_eq!(align_byte_size(128), 256);
595
596            // Just over 256 should round up to 512
597            assert_eq!(align_byte_size(300), 512);
598        }
599
600        #[test]
601        fn test_extract_depth_64x64() {
602            // Test with TBP default resolution
603            let width = 64u32;
604            let height = 64u32;
605            let bytes_per_pixel = 4u32;
606            let padded_row = align_byte_size(width * bytes_per_pixel);
607
608            // Create aligned buffer
609            let mut data = vec![0u8; (padded_row * height) as usize];
610
611            // Fill with incrementing values
612            for y in 0..height {
613                for x in 0..width {
614                    let value = (y * width + x) as f32 / (width * height) as f32;
615                    let offset = (y * padded_row + x * bytes_per_pixel) as usize;
616                    data[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
617                }
618            }
619
620            let depth = extract_depth_with_alignment(&data, width, height);
621            assert_eq!(depth.len(), (width * height) as usize);
622
623            // Verify first and last values
624            assert!((depth[0] - 0.0).abs() < 0.001);
625            let expected_last = (width * height - 1) as f32 / (width * height) as f32;
626            assert!((depth[(width * height - 1) as usize] - expected_last).abs() < 0.001);
627        }
628    }
629}
630
631// ============================================================================
632// Depth Readback Render Node
633// ============================================================================
634
635/// Label for the depth readback render graph node.
636#[derive(Debug, Hash, PartialEq, Eq, Clone, bevy::render::render_graph::RenderLabel)]
637struct DepthReadbackLabel;
638
639/// Render node that copies the main camera's depth texture to a staging buffer.
640/// This runs after the main pass completes, using ViewDepthTexture.
641#[derive(Default)]
642struct DepthReadbackNode;
643
644impl ViewNode for DepthReadbackNode {
645    type ViewQuery = (
646        &'static ViewDepthTexture,
647        &'static ExtractedCamera,
648        &'static ExtractedView,
649    );
650
651    fn run<'w>(
652        &self,
653        _graph: &mut RenderGraphContext,
654        render_context: &mut RenderContext<'w>,
655        (view_depth_texture, camera, view): QueryItem<'w, '_, Self::ViewQuery>,
656        world: &'w World,
657    ) -> Result<(), NodeRunError> {
658        let trace = render_trace_enabled();
659        let t0 = trace.then(std::time::Instant::now);
660
661        // Check if depth capture is requested
662        let Some(request) = world.get_resource::<DepthCaptureRequest>() else {
663            return Ok(());
664        };
665        if !request.requested {
666            return Ok(());
667        }
668
669        // Get the pending queue
670        let Some(queue) = world.get_resource::<PendingDepthCaptureQueue>() else {
671            return Ok(());
672        };
673
674        // Get texture size from camera viewport or physical size
675        let Some(physical_size) = camera.physical_target_size else {
676            return Ok(());
677        };
678        let width = physical_size.x;
679        let height = physical_size.y;
680
681        let render_device = world.resource::<RenderDevice>();
682
683        // Calculate aligned buffer size (wgpu requires 256-byte row alignment)
684        let bytes_per_pixel = 4u32; // f32 = 4 bytes (Depth32Float)
685        let unpadded_bytes_per_row = width * bytes_per_pixel;
686        let padded_bytes_per_row = depth_helpers::align_byte_size(unpadded_bytes_per_row);
687        let buffer_size = (padded_bytes_per_row * height) as u64;
688
689        // Create staging buffer for CPU readback
690        let staging_buffer = render_device.create_buffer(&BufferDescriptor {
691            label: Some("depth_staging_buffer"),
692            size: buffer_size,
693            usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
694            mapped_at_creation: false,
695        });
696
697        // Copy depth texture to staging buffer
698        let encoder = render_context.command_encoder();
699        encoder.copy_texture_to_buffer(
700            TexelCopyTextureInfo {
701                texture: &view_depth_texture.texture,
702                mip_level: 0,
703                origin: Origin3d::ZERO,
704                aspect: TextureAspect::DepthOnly,
705            },
706            TexelCopyBufferInfo {
707                buffer: &staging_buffer,
708                layout: TexelCopyBufferLayout {
709                    offset: 0,
710                    bytes_per_row: Some(padded_bytes_per_row),
711                    rows_per_image: Some(height),
712                },
713            },
714            Extent3d {
715                width,
716                height,
717                depth_or_array_layers: 1,
718            },
719        );
720
721        // Push to queue for async processing (queue is Arc<Mutex<Vec>>).
722        // Capture the projection-matrix entries used for linearization: for a
723        // perspective RH matrix, clip_z = m22*z + m32 and clip_w = -z, so the
724        // positive view-space distance is d = m32 / (ndc + m22).
725        let clip_from_view = view.clip_from_view;
726        if let Ok(mut pending) = queue.0.lock() {
727            pending.push(PendingDepthCapture {
728                buffer: staging_buffer,
729                width,
730                height,
731                m22: clip_from_view.z_axis.z,
732                m32: clip_from_view.w_axis.z,
733                far: request.far,
734            });
735        }
736
737        if let Some(t0) = t0 {
738            eprintln!(
739                "[render_trace][node] DepthReadbackNode ms={:.3}",
740                t0.elapsed().as_secs_f64() * 1000.0
741            );
742        }
743
744        Ok(())
745    }
746}
747
748// ============================================================================
749// Depth Readback Plugin
750// ============================================================================
751
752/// Plugin that sets up depth buffer readback from the GPU.
753struct DepthReadbackPlugin {
754    shared_depth: SharedDepthBuffer,
755    near: f32,
756    far: f32,
757}
758
759impl Plugin for DepthReadbackPlugin {
760    fn build(&self, app: &mut App) {
761        use bevy::core_pipeline::core_3d::graph::Core3d;
762        use bevy::core_pipeline::core_3d::graph::Node3d;
763
764        // Insert shared depth buffer in main app
765        app.insert_resource(self.shared_depth.clone());
766        app.insert_resource(DepthCaptureRequest {
767            requested: false,
768            near: self.near,
769            far: self.far,
770        });
771
772        // Get render app
773        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
774            eprintln!("Failed to get RenderApp for depth readback");
775            return;
776        };
777
778        // Insert resources in render world
779        render_app.insert_resource(self.shared_depth.clone());
780        render_app.init_resource::<PendingDepthCaptureQueue>();
781
782        // Add extraction system to copy request from main world
783        render_app.add_systems(ExtractSchedule, extract_depth_request);
784
785        // Add system to process completed depth captures
786        render_app.add_systems(
787            Render,
788            collect_depth_captures.in_set(RenderSystems::Cleanup),
789        );
790
791        // Register the depth readback node in the render graph
792        // Run after main pass completes (depth buffer is ready) but before tonemapping
793        render_app
794            .add_render_graph_node::<ViewNodeRunner<DepthReadbackNode>>(Core3d, DepthReadbackLabel)
795            .add_render_graph_edges(
796                Core3d,
797                (Node3d::EndMainPass, DepthReadbackLabel, Node3d::Tonemapping),
798            );
799    }
800}
801
802/// Extract depth capture request from main world to render world
803fn extract_depth_request(mut commands: Commands, request: Extract<Res<DepthCaptureRequest>>) {
804    commands.insert_resource(DepthCaptureRequest {
805        requested: request.requested,
806        near: request.near,
807        far: request.far,
808    });
809}
810
811/// Process completed depth buffer captures (synchronous GPU-to-CPU readback with device polling)
812fn collect_depth_captures(
813    queue: Res<PendingDepthCaptureQueue>,
814    shared_depth: Res<SharedDepthBuffer>,
815    render_device: Res<RenderDevice>,
816) {
817    let trace = render_trace_enabled();
818    let t_sys = trace.then(std::time::Instant::now);
819
820    // Take all pending captures from the queue
821    let pending_captures = {
822        let Ok(mut pending) = queue.0.lock() else {
823            return;
824        };
825        std::mem::take(&mut *pending)
826    };
827
828    if pending_captures.is_empty() {
829        if let Some(t0) = t_sys {
830            eprintln!(
831                "[render_trace][sys] collect_depth_captures empty ms={:.3}",
832                t0.elapsed().as_secs_f64() * 1000.0
833            );
834        }
835        return;
836    }
837
838    let pending_count = pending_captures.len();
839
840    // Process each pending capture synchronously with device polling
841    for pending in pending_captures {
842        let width = pending.width;
843        let height = pending.height;
844        let m22 = pending.m22;
845        let m32 = pending.m32;
846        let far = pending.far;
847        let buffer = pending.buffer;
848        let shared = shared_depth.0.clone();
849
850        // Use blocking sync approach with device polling (same as RGBA capture)
851        let buffer_slice = buffer.slice(..);
852
853        // Request mapping
854        let (tx, rx) = std::sync::mpsc::channel();
855        buffer_slice.map_async(MapMode::Read, move |result| {
856            let _ = tx.send(result);
857        });
858
859        let t_wait = trace.then(std::time::Instant::now);
860        let mut poll_iters: u32 = 0;
861
862        // Poll the device until mapping completes
863        loop {
864            let _ =
865                render_device.poll(bevy::render::render_resource::PollType::wait_indefinitely());
866            poll_iters += 1;
867            match rx.try_recv() {
868                Ok(Ok(())) => {
869                    let data = buffer_slice.get_mapped_range();
870
871                    // Extract depth values with alignment handling
872                    let ndc_depth =
873                        depth_helpers::extract_depth_with_alignment(&data, width, height);
874
875                    drop(data);
876                    buffer.unmap();
877
878                    // Convert reverse-Z NDC to linear depth (meters) using the
879                    // view's actual projection matrix entries. See
880                    // `convert_depth_to_linear_with_matrix`.
881                    let linear_depth = depth_helpers::convert_depth_to_linear_with_matrix(
882                        &ndc_depth, m22, m32, far,
883                    );
884
885                    // Store in shared buffer
886                    if let Ok(mut guard) = shared.lock() {
887                        *guard = Some((linear_depth, width, height));
888                    }
889                    break;
890                }
891                Ok(Err(e)) => {
892                    eprintln!("Failed to map depth buffer: {:?}", e);
893                    break;
894                }
895                Err(std::sync::mpsc::TryRecvError::Empty) => {
896                    // Keep polling
897                    std::thread::sleep(std::time::Duration::from_millis(1));
898                }
899                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
900                    eprintln!("Depth buffer mapping channel disconnected");
901                    break;
902                }
903            }
904        }
905
906        if let Some(t_wait) = t_wait {
907            eprintln!(
908                "[render_trace][sys] collect_depth_captures mapping_wait poll_iters={} ms={:.3}",
909                poll_iters,
910                t_wait.elapsed().as_secs_f64() * 1000.0
911            );
912        }
913    }
914
915    if let Some(t0) = t_sys {
916        eprintln!(
917            "[render_trace][sys] collect_depth_captures done pending={} ms={:.3}",
918            pending_count,
919            t0.elapsed().as_secs_f64() * 1000.0
920        );
921    }
922}
923
924// ============================================================================
925// Image Copy Infrastructure (for headless rendering)
926// ============================================================================
927
928/// Label for the image copy render graph node
929#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)]
930struct ImageCopyLabel;
931
932/// Component that marks an image for GPU-to-CPU copying
933#[derive(Component, Clone)]
934struct ImageCopier {
935    /// Handle to the source image (render target)
936    src_image: Handle<Image>,
937    /// Whether to capture on this frame
938    enabled: bool,
939}
940
941/// Resource containing all ImageCopiers for the render world
942#[derive(Resource, Default)]
943struct ImageCopiers(Vec<ImageCopier>);
944
945/// Pending image capture for async processing
946struct PendingImageCapture {
947    buffer: Buffer,
948    width: u32,
949    height: u32,
950    padded_bytes_per_row: u32,
951}
952
953/// Queue for pending image captures
954#[derive(Resource, Default)]
955struct PendingImageCaptureQueue(Arc<Mutex<Vec<PendingImageCapture>>>);
956
957/// Shared buffer for captured RGBA data
958#[derive(Resource, Clone, Default)]
959#[allow(clippy::type_complexity)]
960struct SharedRgbaBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
961
962/// Render graph node that copies render target images to staging buffers
963struct ImageCopyDriver;
964
965impl Node for ImageCopyDriver {
966    fn run(
967        &self,
968        _graph: &mut RenderGraphContext,
969        _render_context: &mut RenderContext,
970        world: &World,
971    ) -> Result<(), NodeRunError> {
972        let trace = render_trace_enabled();
973        let t0 = trace.then(std::time::Instant::now);
974
975        let Some(image_copiers) = world.get_resource::<ImageCopiers>() else {
976            return Ok(());
977        };
978
979        let Some(gpu_images) = world.get_resource::<RenderAssets<GpuImage>>() else {
980            return Ok(());
981        };
982
983        let Some(queue) = world.get_resource::<PendingImageCaptureQueue>() else {
984            return Ok(());
985        };
986
987        let render_device = world.resource::<RenderDevice>();
988
989        let Some(render_queue) = world.get_resource::<RenderQueue>() else {
990            return Ok(());
991        };
992
993        for image_copier in image_copiers.0.iter() {
994            if !image_copier.enabled {
995                continue;
996            }
997
998            let Some(gpu_image) = gpu_images.get(&image_copier.src_image) else {
999                continue;
1000            };
1001
1002            let width = gpu_image.size.width;
1003            let height = gpu_image.size.height;
1004
1005            // Calculate padded bytes per row (wgpu requires 256-byte alignment)
1006            let block_dimensions = gpu_image.texture_format.block_dimensions();
1007            let block_size = gpu_image.texture_format.block_copy_size(None).unwrap_or(4); // Default to 4 bytes for RGBA8
1008
1009            let padded_bytes_per_row = RenderDevice::align_copy_bytes_per_row(
1010                (width as usize / block_dimensions.0 as usize) * block_size as usize,
1011            );
1012
1013            let buffer_size = (padded_bytes_per_row * height as usize) as u64;
1014
1015            // Create staging buffer for CPU readback
1016            let staging_buffer = render_device.create_buffer(&BufferDescriptor {
1017                label: Some("image_copy_staging_buffer"),
1018                size: buffer_size,
1019                usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
1020                mapped_at_creation: false,
1021            });
1022
1023            // Create command encoder for the copy operation
1024            let mut encoder =
1025                render_device.create_command_encoder(&CommandEncoderDescriptor::default());
1026
1027            let texture_extent = Extent3d {
1028                width,
1029                height,
1030                depth_or_array_layers: 1,
1031            };
1032
1033            // Copy texture to buffer
1034            encoder.copy_texture_to_buffer(
1035                gpu_image.texture.as_image_copy(),
1036                TexelCopyBufferInfo {
1037                    buffer: &staging_buffer,
1038                    layout: TexelCopyBufferLayout {
1039                        offset: 0,
1040                        bytes_per_row: Some(padded_bytes_per_row as u32),
1041                        rows_per_image: None,
1042                    },
1043                },
1044                texture_extent,
1045            );
1046
1047            // Submit the copy command
1048            render_queue.submit(std::iter::once(encoder.finish()));
1049
1050            // Queue for async processing
1051            if let Ok(mut pending) = queue.0.lock() {
1052                pending.push(PendingImageCapture {
1053                    buffer: staging_buffer,
1054                    width,
1055                    height,
1056                    padded_bytes_per_row: padded_bytes_per_row as u32,
1057                });
1058            }
1059        }
1060
1061        if let Some(t0) = t0 {
1062            eprintln!(
1063                "[render_trace][node] ImageCopyDriver ms={:.3}",
1064                t0.elapsed().as_secs_f64() * 1000.0
1065            );
1066        }
1067
1068        Ok(())
1069    }
1070}
1071
1072/// Extract ImageCopier components to render world
1073fn extract_image_copiers(mut commands: Commands, query: Extract<Query<&ImageCopier>>) {
1074    commands.insert_resource(ImageCopiers(query.iter().cloned().collect()));
1075}
1076
1077/// Process completed image captures
1078fn collect_image_captures(
1079    queue: Res<PendingImageCaptureQueue>,
1080    shared_rgba: Res<SharedRgbaBuffer>,
1081    render_device: Res<RenderDevice>,
1082) {
1083    let trace = render_trace_enabled();
1084    let t_sys = trace.then(std::time::Instant::now);
1085
1086    let pending_captures = {
1087        let Ok(mut pending) = queue.0.lock() else {
1088            return;
1089        };
1090        std::mem::take(&mut *pending)
1091    };
1092
1093    if pending_captures.is_empty() {
1094        if let Some(t0) = t_sys {
1095            eprintln!(
1096                "[render_trace][sys] collect_image_captures empty ms={:.3}",
1097                t0.elapsed().as_secs_f64() * 1000.0
1098            );
1099        }
1100        return;
1101    }
1102
1103    let pending_count = pending_captures.len();
1104
1105    for pending in pending_captures {
1106        let width = pending.width;
1107        let height = pending.height;
1108        let padded_bytes_per_row = pending.padded_bytes_per_row;
1109        let buffer = pending.buffer;
1110        let shared = shared_rgba.0.clone();
1111
1112        // Use blocking sync approach with device polling
1113        let buffer_slice = buffer.slice(..);
1114
1115        // Request mapping
1116        let (tx, rx) = std::sync::mpsc::channel();
1117        buffer_slice.map_async(MapMode::Read, move |result| {
1118            let _ = tx.send(result);
1119        });
1120
1121        // Poll the device until mapping completes (with timeout)
1122        let start = std::time::Instant::now();
1123        let timeout = std::time::Duration::from_secs(10);
1124        let mut poll_iters: u32 = 0;
1125        loop {
1126            let _ =
1127                render_device.poll(bevy::render::render_resource::PollType::wait_indefinitely());
1128            poll_iters += 1;
1129
1130            if start.elapsed() > timeout {
1131                eprintln!(
1132                    "Warning: Buffer mapping timeout after {:?}",
1133                    start.elapsed()
1134                );
1135                break;
1136            }
1137
1138            match rx.try_recv() {
1139                Ok(Ok(())) => {
1140                    let data = buffer_slice.get_mapped_range();
1141
1142                    // Extract pixels with alignment handling
1143                    let bytes_per_pixel = 4u32;
1144                    let actual_row_bytes = (width * bytes_per_pixel) as usize;
1145                    let padded_row_bytes = padded_bytes_per_row as usize;
1146
1147                    let mut rgba = Vec::with_capacity((width * height * 4) as usize);
1148                    for y in 0..height as usize {
1149                        let row_start = y * padded_row_bytes;
1150                        rgba.extend_from_slice(&data[row_start..row_start + actual_row_bytes]);
1151                    }
1152
1153                    drop(data);
1154                    buffer.unmap();
1155
1156                    if let Ok(mut guard) = shared.lock() {
1157                        *guard = Some((rgba, width, height));
1158                    }
1159                    break;
1160                }
1161                Ok(Err(e)) => {
1162                    eprintln!("Failed to map image buffer: {:?}", e);
1163                    break;
1164                }
1165                Err(std::sync::mpsc::TryRecvError::Empty) => {
1166                    // Keep polling
1167                    std::thread::sleep(std::time::Duration::from_millis(1));
1168                }
1169                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
1170                    eprintln!("Image buffer mapping channel disconnected");
1171                    break;
1172                }
1173            }
1174        }
1175
1176        if trace {
1177            eprintln!(
1178                "[render_trace][sys] collect_image_captures mapping_wait poll_iters={} ms={:.3}",
1179                poll_iters,
1180                start.elapsed().as_secs_f64() * 1000.0
1181            );
1182        }
1183    }
1184
1185    if let Some(t0) = t_sys {
1186        eprintln!(
1187            "[render_trace][sys] collect_image_captures done pending={} ms={:.3}",
1188            pending_count,
1189            t0.elapsed().as_secs_f64() * 1000.0
1190        );
1191    }
1192}
1193
1194/// Plugin for headless image copy
1195struct ImageCopyPlugin {
1196    shared_rgba: SharedRgbaBuffer,
1197}
1198
1199impl Plugin for ImageCopyPlugin {
1200    fn build(&self, app: &mut App) {
1201        use bevy::render::render_graph::RenderGraph;
1202
1203        app.insert_resource(self.shared_rgba.clone());
1204
1205        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
1206            return;
1207        };
1208
1209        render_app.insert_resource(self.shared_rgba.clone());
1210        render_app.init_resource::<ImageCopiers>();
1211        render_app.init_resource::<PendingImageCaptureQueue>();
1212
1213        render_app.add_systems(ExtractSchedule, extract_image_copiers);
1214        render_app.add_systems(
1215            Render,
1216            collect_image_captures.in_set(RenderSystems::Cleanup),
1217        );
1218
1219        // Add image copy node to render graph (runs after camera driver)
1220        let mut graph = render_app.world_mut().resource_mut::<RenderGraph>();
1221        graph.add_node(ImageCopyLabel, ImageCopyDriver);
1222        graph.add_node_edge(bevy::render::graph::CameraDriverLabel, ImageCopyLabel);
1223    }
1224}
1225
1226// ============================================================================
1227// Render Request and Components
1228// ============================================================================
1229
1230/// Configuration passed to the Bevy app
1231#[derive(Resource, Clone)]
1232struct RenderRequest {
1233    mesh_path: String,
1234    texture_path: String,
1235    camera_transform: Transform,
1236    object_rotation: ObjectRotation,
1237    object_translation: Vec3,
1238    object_scale: Vec3,
1239    config: RenderConfig,
1240}
1241
1242/// Marker for the rendered object
1243#[derive(Component)]
1244struct RenderedObject;
1245
1246/// Marker for the render camera
1247#[derive(Component)]
1248struct RenderCamera;
1249
1250/// Handle for the loaded texture
1251#[derive(Resource)]
1252struct LoadedTexture(Handle<Image>);
1253
1254/// Handle for the loaded scene
1255#[derive(Resource)]
1256struct LoadedScene(Handle<Scene>);
1257
1258/// Shared output for extracting render results
1259#[derive(Resource, Clone)]
1260struct SharedOutput(Arc<Mutex<Option<RenderOutput>>>);
1261
1262/// Handle for the render target image
1263#[derive(Resource)]
1264#[allow(dead_code)]
1265struct RenderTargetImage(Handle<Image>);
1266
1267/// Tracks progress for a homogeneous batch of viewpoints rendered in one app.
1268#[derive(Resource)]
1269struct HeadlessBatchSequence {
1270    viewpoints: Vec<Transform>,
1271    current_index: usize,
1272    outputs: Vec<RenderOutput>,
1273    warmup_frames_remaining: u32,
1274    done: bool,
1275}
1276
1277impl HeadlessBatchSequence {
1278    fn new(viewpoints: Vec<Transform>) -> Self {
1279        let capacity = viewpoints.len();
1280        Self {
1281            viewpoints,
1282            current_index: 0,
1283            outputs: Vec::with_capacity(capacity),
1284            warmup_frames_remaining: 0,
1285            done: capacity == 0,
1286        }
1287    }
1288
1289    fn current_viewpoint(&self) -> Option<Transform> {
1290        self.viewpoints.get(self.current_index).cloned()
1291    }
1292}
1293
1294/// Perform headless rendering of a YCB object.
1295///
1296/// This uses true headless GPU rendering via `RenderTarget::Image`, which does NOT
1297/// require any window surfaces. This should work on WSL2 and other environments
1298/// without display servers.
1299///
1300/// Note: Bevy's App::run() does not return cleanly. A watchdog thread monitors
1301/// for results and terminates the process once the render is complete.
1302#[allow(dead_code)]
1303pub fn render_headless(
1304    object_dir: &Path,
1305    camera_transform: &Transform,
1306    object_rotation: &ObjectRotation,
1307    object_translation: Vec3,
1308    object_scale: Vec3,
1309    config: &RenderConfig,
1310) -> Result<RenderOutput, RenderError> {
1311    // Canonicalize paths so Bevy's asset server can find them regardless of
1312    // caller working directory. Relative paths like "../../ycb" pass the
1313    // exists() check but Bevy resolves assets against its own root.
1314    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1315        RenderError::RenderFailed(format!(
1316            "Cannot canonicalize object directory {}: {}",
1317            object_dir.display(),
1318            e
1319        ))
1320    })?;
1321    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1322    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1323
1324    if !mesh_path.exists() {
1325        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
1326            &mesh_path,
1327        )));
1328    }
1329    if !texture_path.exists() {
1330        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
1331            &texture_path,
1332        )));
1333    }
1334
1335    let request = RenderRequest {
1336        mesh_path: fs_path_to_asset_string(&mesh_path),
1337        texture_path: fs_path_to_asset_string(&texture_path),
1338        camera_transform: *camera_transform,
1339        object_rotation: object_rotation.clone(),
1340        object_translation,
1341        object_scale,
1342        config: config.clone(),
1343    };
1344
1345    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
1346    let output_clone = shared_output.clone();
1347
1348    // Shared buffer for RGBA data from headless render target
1349    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1350
1351    // Shared buffer for depth readback
1352    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1353
1354    // Create a temp file path for fallback output serialization
1355    let temp_path =
1356        std::env::temp_dir().join(format!("bevy_sensor_render_{}.bin", std::process::id()));
1357
1358    // Spawn watchdog thread that monitors for timeout (don't exit - let Bevy exit gracefully)
1359    let output_poll_for_timeout = shared_output.clone();
1360    std::thread::spawn(move || {
1361        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1362        let start = std::time::Instant::now();
1363        let poll_interval = std::time::Duration::from_millis(100);
1364
1365        loop {
1366            // Check if we have a result
1367            if let Ok(guard) = output_poll_for_timeout.0.lock() {
1368                if guard.is_some() {
1369                    // Output is ready, Bevy will exit via AppExit event
1370                    return; // Exit watchdog thread, Bevy will handle exit
1371                }
1372            }
1373
1374            if start.elapsed() > timeout {
1375                eprintln!(
1376                    "Error: Render timeout after {} seconds",
1377                    RENDER_TIMEOUT_SECS
1378                );
1379                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
1380                // Force exit on timeout (this is a failure case)
1381                std::process::exit(1);
1382            }
1383
1384            std::thread::sleep(poll_interval);
1385        }
1386    });
1387
1388    // Run Bevy app with HEADLESS configuration (no window surfaces!)
1389    // Uses ScheduleRunnerPlugin instead of WinitPlugin
1390    build_headless_app(request, output_clone, shared_rgba, shared_depth).run();
1391
1392    // App::run() returned - check shared_output for result
1393    if let Ok(guard) = shared_output.0.lock() {
1394        if let Some(output) = guard.as_ref() {
1395            return Ok(output.clone());
1396        }
1397    }
1398
1399    // Fallback: try to read from temp file (for legacy compatibility)
1400    if temp_path.exists() {
1401        if let Ok(output) = read_output_from_file(&temp_path) {
1402            let _ = std::fs::remove_file(&temp_path);
1403            return Ok(output);
1404        }
1405    }
1406
1407    Err(RenderError::RenderFailed(
1408        "Render did not complete".to_string(),
1409    ))
1410}
1411
1412/// Render a homogeneous sequence of viewpoints in a single headless Bevy app.
1413///
1414/// All captures share the same object, object rotation, and render configuration.
1415/// This is the fast path used by the batch API for episode-style workloads.
1416pub fn render_headless_sequence(
1417    object_dir: &Path,
1418    viewpoints: &[Transform],
1419    object_rotation: &ObjectRotation,
1420    object_translation: Vec3,
1421    object_scale: Vec3,
1422    config: &RenderConfig,
1423) -> Result<Vec<RenderOutput>, RenderError> {
1424    if viewpoints.is_empty() {
1425        return Ok(Vec::new());
1426    }
1427
1428    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1429        RenderError::RenderFailed(format!(
1430            "Cannot canonicalize object directory {}: {}",
1431            object_dir.display(),
1432            e
1433        ))
1434    })?;
1435    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1436    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1437
1438    if !mesh_path.exists() {
1439        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
1440            &mesh_path,
1441        )));
1442    }
1443    if !texture_path.exists() {
1444        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
1445            &texture_path,
1446        )));
1447    }
1448
1449    let request = RenderRequest {
1450        mesh_path: fs_path_to_asset_string(&mesh_path),
1451        texture_path: fs_path_to_asset_string(&texture_path),
1452        camera_transform: viewpoints[0],
1453        object_rotation: object_rotation.clone(),
1454        object_translation,
1455        object_scale,
1456        config: config.clone(),
1457    };
1458
1459    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1460    let rgba_clone = shared_rgba.clone();
1461
1462    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1463    let depth_clone = shared_depth.clone();
1464
1465    let mut app = App::new();
1466    app.add_plugins(
1467        DefaultPlugins
1468            .set(bevy::asset::AssetPlugin {
1469                // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
1470                // default (UnapprovedPathMode::Forbid → load() silently returns a
1471                // default handle). YCB meshes load from absolute paths, so allow them.
1472                unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
1473                ..default()
1474            })
1475            .set(WindowPlugin {
1476                primary_window: None,
1477                exit_condition: ExitCondition::DontExit,
1478                ..default()
1479            })
1480            .disable::<bevy::winit::WinitPlugin>()
1481            .disable::<LogPlugin>()
1482            .disable::<TerminalCtrlCHandlerPlugin>(),
1483    )
1484    .add_plugins(ObjPlugin)
1485    // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
1486    // Scene spawning panics unless those component types are registered. The
1487    // minimal headless plugin set doesn't register them, so do it explicitly.
1488    .register_type::<Mesh3d>()
1489    .register_type::<MeshMaterial3d<StandardMaterial>>()
1490    .register_type::<bevy::prelude::Transform>()
1491    .register_type::<bevy::prelude::GlobalTransform>()
1492    .register_type::<bevy::transform::components::TransformTreeChanged>()
1493    .register_type::<bevy::prelude::Visibility>()
1494    .register_type::<bevy::prelude::InheritedVisibility>()
1495    .register_type::<bevy::prelude::ViewVisibility>()
1496    .add_plugins(ImageCopyPlugin {
1497        shared_rgba: rgba_clone,
1498    })
1499    .add_plugins(DepthReadbackPlugin {
1500        shared_depth: depth_clone,
1501        near: config.near_plane,
1502        far: config.far_plane,
1503    })
1504    .insert_resource(request)
1505    .insert_resource(shared_rgba)
1506    .insert_resource(HeadlessBatchSequence::new(viewpoints.to_vec()))
1507    .init_resource::<RenderState>()
1508    .add_systems(Startup, setup_headless_scene)
1509    .add_systems(
1510        Update,
1511        (
1512            check_assets_loaded,
1513            apply_materials,
1514            tick_headless_batch_warmup,
1515            request_headless_capture,
1516            check_headless_capture_ready,
1517            extract_and_continue_headless_batch,
1518        )
1519            .chain(),
1520    );
1521
1522    // Manual app.update() loops do not run plugin finish/cleanup hooks automatically.
1523    // Bevy's screenshot plugin inserts CapturedScreenshots during finish(), so run the
1524    // normal startup phases before driving the headless batch loop ourselves.
1525    let trace_outer = render_trace_enabled();
1526    let t_finish = std::time::Instant::now();
1527    app.finish();
1528    let finish_ms = t_finish.elapsed().as_secs_f64() * 1000.0;
1529    let t_cleanup = std::time::Instant::now();
1530    app.cleanup();
1531    let cleanup_ms = t_cleanup.elapsed().as_secs_f64() * 1000.0;
1532    if trace_outer {
1533        eprintln!(
1534            "[render_trace][coldinit] app.finish ms={:.3} app.cleanup ms={:.3}",
1535            finish_ms, cleanup_ms
1536        );
1537    }
1538
1539    let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1540    let start = std::time::Instant::now();
1541
1542    let trace = std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok();
1543    let mut update_idx: u32 = 0;
1544    let mut last_completed_outputs: usize = 0;
1545    let mut viewpoint_start = std::time::Instant::now();
1546
1547    loop {
1548        if start.elapsed() > timeout {
1549            return Err(RenderError::RenderTimeout {
1550                duration_secs: RENDER_TIMEOUT_SECS,
1551            });
1552        }
1553
1554        let update_start = std::time::Instant::now();
1555        app.update();
1556        let update_elapsed_ms = update_start.elapsed().as_secs_f64() * 1000.0;
1557
1558        if trace {
1559            let batch = app.world().resource::<HeadlessBatchSequence>();
1560            let warmup = batch.warmup_frames_remaining;
1561            let current = batch.current_index;
1562            let completed = batch.outputs.len();
1563            let vp_ms = viewpoint_start.elapsed().as_secs_f64() * 1000.0;
1564            eprintln!(
1565                "[render_trace] update={update_idx} vp={current} warmup={warmup} \
1566                 completed={completed} update_ms={update_elapsed_ms:.2} vp_ms={vp_ms:.2}"
1567            );
1568            if completed > last_completed_outputs {
1569                eprintln!(
1570                    "[render_trace] viewpoint {} finished in {:.2} ms",
1571                    completed - 1,
1572                    vp_ms
1573                );
1574                last_completed_outputs = completed;
1575                viewpoint_start = std::time::Instant::now();
1576            }
1577        }
1578
1579        update_idx += 1;
1580
1581        if app.world().resource::<HeadlessBatchSequence>().done {
1582            break;
1583        }
1584    }
1585
1586    if trace {
1587        eprintln!(
1588            "[render_trace] total_wall_ms={:.2} updates={update_idx} viewpoints={}",
1589            start.elapsed().as_secs_f64() * 1000.0,
1590            viewpoints.len()
1591        );
1592    }
1593
1594    let mut batch = app.world_mut().resource_mut::<HeadlessBatchSequence>();
1595    if batch.outputs.len() != viewpoints.len() {
1596        return Err(RenderError::RenderFailed(format!(
1597            "Batch render produced {} outputs for {} viewpoints",
1598            batch.outputs.len(),
1599            viewpoints.len()
1600        )));
1601    }
1602
1603    Ok(std::mem::take(&mut batch.outputs))
1604}
1605
1606/// Assemble the shared single-render headless Bevy app.
1607fn build_headless_app(
1608    request: RenderRequest,
1609    shared_output: SharedOutput,
1610    shared_rgba: SharedRgbaBuffer,
1611    shared_depth: SharedDepthBuffer,
1612) -> App {
1613    let near = request.config.near_plane;
1614    let far = request.config.far_plane;
1615
1616    let mut app = App::new();
1617    app.add_plugins(
1618        DefaultPlugins
1619            .set(bevy::asset::AssetPlugin {
1620                // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
1621                // default (UnapprovedPathMode::Forbid → load() silently returns a
1622                // default handle). YCB meshes load from absolute paths, so allow them.
1623                unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
1624                ..default()
1625            })
1626            .set(WindowPlugin {
1627                primary_window: None,
1628                exit_condition: ExitCondition::DontExit,
1629                ..default()
1630            })
1631            .disable::<bevy::winit::WinitPlugin>()
1632            .disable::<LogPlugin>()
1633            .disable::<TerminalCtrlCHandlerPlugin>(),
1634    )
1635    .add_plugins(ScheduleRunnerPlugin::run_loop(Duration::from_secs_f64(
1636        1.0 / 60.0,
1637    )))
1638    .add_plugins(ObjPlugin)
1639    // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
1640    // Scene spawning panics unless those component types are registered. The
1641    // minimal headless plugin set doesn't register them, so do it explicitly.
1642    .register_type::<Mesh3d>()
1643    .register_type::<MeshMaterial3d<StandardMaterial>>()
1644    .register_type::<bevy::prelude::Transform>()
1645    .register_type::<bevy::prelude::GlobalTransform>()
1646    .register_type::<bevy::transform::components::TransformTreeChanged>()
1647    .register_type::<bevy::prelude::Visibility>()
1648    .register_type::<bevy::prelude::InheritedVisibility>()
1649    .register_type::<bevy::prelude::ViewVisibility>()
1650    .add_plugins(ImageCopyPlugin {
1651        shared_rgba: shared_rgba.clone(),
1652    })
1653    .add_plugins(DepthReadbackPlugin {
1654        shared_depth,
1655        near,
1656        far,
1657    })
1658    .insert_resource(request)
1659    .insert_resource(shared_output)
1660    .insert_resource(shared_rgba)
1661    .init_resource::<RenderState>()
1662    .add_systems(Startup, setup_headless_scene)
1663    .add_systems(
1664        Update,
1665        (
1666            check_assets_loaded,
1667            apply_materials,
1668            request_headless_capture,
1669            check_headless_capture_ready,
1670            extract_and_exit_headless,
1671        )
1672            .chain(),
1673    );
1674    app
1675}
1676
1677/// Serialize RenderOutput to bytes for IPC (used by subprocess mode)
1678#[allow(dead_code)]
1679fn serialize_output(output: &RenderOutput) -> Vec<u8> {
1680    let mut data = Vec::new();
1681
1682    // Header: width, height, rgba_len, depth_len
1683    data.extend_from_slice(&output.width.to_le_bytes());
1684    data.extend_from_slice(&output.height.to_le_bytes());
1685    data.extend_from_slice(&(output.rgba.len() as u32).to_le_bytes());
1686    data.extend_from_slice(&(output.depth.len() as u32).to_le_bytes());
1687
1688    // RGBA data
1689    data.extend_from_slice(&output.rgba);
1690
1691    // Depth data (as f64 bytes for TBP precision)
1692    for d in &output.depth {
1693        data.extend_from_slice(&d.to_le_bytes());
1694    }
1695
1696    // Intrinsics (f64 for TBP precision)
1697    data.extend_from_slice(&output.intrinsics.focal_length[0].to_le_bytes());
1698    data.extend_from_slice(&output.intrinsics.focal_length[1].to_le_bytes());
1699    data.extend_from_slice(&output.intrinsics.principal_point[0].to_le_bytes());
1700    data.extend_from_slice(&output.intrinsics.principal_point[1].to_le_bytes());
1701    data.extend_from_slice(&output.intrinsics.image_size[0].to_le_bytes());
1702    data.extend_from_slice(&output.intrinsics.image_size[1].to_le_bytes());
1703
1704    // Camera transform (translation + rotation quaternion)
1705    let t = output.camera_transform.translation;
1706    let r = output.camera_transform.rotation;
1707    data.extend_from_slice(&t.x.to_le_bytes());
1708    data.extend_from_slice(&t.y.to_le_bytes());
1709    data.extend_from_slice(&t.z.to_le_bytes());
1710    data.extend_from_slice(&r.x.to_le_bytes());
1711    data.extend_from_slice(&r.y.to_le_bytes());
1712    data.extend_from_slice(&r.z.to_le_bytes());
1713    data.extend_from_slice(&r.w.to_le_bytes());
1714
1715    // Object rotation (f64)
1716    let or = &output.object_rotation;
1717    data.extend_from_slice(&or.pitch.to_le_bytes());
1718    data.extend_from_slice(&or.yaw.to_le_bytes());
1719    data.extend_from_slice(&or.roll.to_le_bytes());
1720
1721    // Object translation + scale (f32 for Bevy compatibility)
1722    let ot = output.object_translation;
1723    let os = output.object_scale;
1724    data.extend_from_slice(&ot.x.to_le_bytes());
1725    data.extend_from_slice(&ot.y.to_le_bytes());
1726    data.extend_from_slice(&ot.z.to_le_bytes());
1727    data.extend_from_slice(&os.x.to_le_bytes());
1728    data.extend_from_slice(&os.y.to_le_bytes());
1729    data.extend_from_slice(&os.z.to_le_bytes());
1730
1731    data
1732}
1733
1734/// Read RenderOutput from serialized file
1735fn read_output_from_file(path: &std::path::Path) -> Result<RenderOutput, RenderError> {
1736    let mut file = File::open(path).map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1737    let mut data = Vec::new();
1738    file.read_to_end(&mut data)
1739        .map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1740
1741    let mut cursor = 0;
1742
1743    let read_u32 = |data: &[u8], cursor: &mut usize| -> u32 {
1744        let val = u32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1745        *cursor += 4;
1746        val
1747    };
1748
1749    let read_f32 = |data: &[u8], cursor: &mut usize| -> f32 {
1750        let val = f32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1751        *cursor += 4;
1752        val
1753    };
1754
1755    let read_f64 = |data: &[u8], cursor: &mut usize| -> f64 {
1756        let val = f64::from_le_bytes(data[*cursor..*cursor + 8].try_into().unwrap());
1757        *cursor += 8;
1758        val
1759    };
1760
1761    let width = read_u32(&data, &mut cursor);
1762    let height = read_u32(&data, &mut cursor);
1763    let rgba_len = read_u32(&data, &mut cursor) as usize;
1764    let depth_len = read_u32(&data, &mut cursor) as usize;
1765
1766    let rgba = data[cursor..cursor + rgba_len].to_vec();
1767    cursor += rgba_len;
1768
1769    // Depth data (f64 for TBP precision)
1770    let mut depth = Vec::with_capacity(depth_len);
1771    for _ in 0..depth_len {
1772        depth.push(read_f64(&data, &mut cursor));
1773    }
1774
1775    // Intrinsics (f64 for TBP precision)
1776    let focal_length = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1777    let principal_point = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1778    let image_size = [read_u32(&data, &mut cursor), read_u32(&data, &mut cursor)];
1779
1780    // Camera transform (f32 for Bevy compatibility)
1781    let tx = read_f32(&data, &mut cursor);
1782    let ty = read_f32(&data, &mut cursor);
1783    let tz = read_f32(&data, &mut cursor);
1784    let rx = read_f32(&data, &mut cursor);
1785    let ry = read_f32(&data, &mut cursor);
1786    let rz = read_f32(&data, &mut cursor);
1787    let rw = read_f32(&data, &mut cursor);
1788
1789    // Object rotation (f64)
1790    let pitch = read_f64(&data, &mut cursor);
1791    let yaw = read_f64(&data, &mut cursor);
1792    let roll = read_f64(&data, &mut cursor);
1793
1794    let (object_translation, object_scale) = if cursor + 24 <= data.len() {
1795        let tx = read_f32(&data, &mut cursor);
1796        let ty = read_f32(&data, &mut cursor);
1797        let tz = read_f32(&data, &mut cursor);
1798        let sx = read_f32(&data, &mut cursor);
1799        let sy = read_f32(&data, &mut cursor);
1800        let sz = read_f32(&data, &mut cursor);
1801        (Vec3::new(tx, ty, tz), Vec3::new(sx, sy, sz))
1802    } else {
1803        (Vec3::ZERO, Vec3::ONE)
1804    };
1805
1806    Ok(RenderOutput {
1807        rgba,
1808        depth,
1809        width,
1810        height,
1811        intrinsics: crate::CameraIntrinsics {
1812            focal_length,
1813            principal_point,
1814            image_size,
1815        },
1816        camera_transform: Transform {
1817            translation: Vec3::new(tx, ty, tz),
1818            rotation: Quat::from_xyzw(rx, ry, rz, rw),
1819            scale: Vec3::ONE,
1820        },
1821        object_rotation: ObjectRotation { pitch, yaw, roll },
1822        object_translation,
1823        object_scale,
1824        target_point: Vec3::ZERO,
1825        targeting_policy: TargetingPolicy::Origin,
1826    })
1827}
1828
1829/// Setup the scene with camera, lighting, and object
1830#[allow(dead_code)]
1831fn setup_scene(
1832    mut commands: Commands,
1833    asset_server: Res<AssetServer>,
1834    request: Res<RenderRequest>,
1835    mut _materials: ResMut<Assets<StandardMaterial>>,
1836) {
1837    // Camera with depth prepass (Bevy 0.15+ uses Camera3d component)
1838    // Disable MSAA for depth readback compatibility (can't copy from multisampled texture)
1839    // Apply FOV from RenderConfig so the projection matches TBP's camera intrinsics.
1840    commands.spawn((
1841        Camera3d::default(),
1842        Camera::default(),
1843        Hdr,
1844        render_projection(&request.config),
1845        Msaa::Off,
1846        request.camera_transform,
1847        Tonemapping::None, // Accurate colors for software rendering
1848        DepthPrepass,
1849        NormalPrepass,
1850        RenderCamera,
1851    ));
1852
1853    // Ambient light (from config). In Bevy 0.18 the global ambient light is the
1854    // `GlobalAmbientLight` resource (the `AmbientLight` type became a per-camera component).
1855    let lighting = &request.config.lighting;
1856    commands.insert_resource(GlobalAmbientLight {
1857        color: Color::WHITE,
1858        brightness: lighting.ambient_brightness,
1859        ..default()
1860    });
1861
1862    // Key light (from config) - Bevy 0.15+ uses PointLight component directly
1863    if lighting.key_light_intensity > 0.0 {
1864        commands.spawn((
1865            PointLight {
1866                intensity: lighting.key_light_intensity,
1867                shadows_enabled: lighting.shadows_enabled,
1868                ..default()
1869            },
1870            Transform::from_xyz(
1871                lighting.key_light_position[0],
1872                lighting.key_light_position[1],
1873                lighting.key_light_position[2],
1874            ),
1875        ));
1876    }
1877
1878    // Fill light (from config)
1879    if lighting.fill_light_intensity > 0.0 {
1880        commands.spawn((
1881            PointLight {
1882                intensity: lighting.fill_light_intensity,
1883                shadows_enabled: lighting.shadows_enabled,
1884                ..default()
1885            },
1886            Transform::from_xyz(
1887                lighting.fill_light_position[0],
1888                lighting.fill_light_position[1],
1889                lighting.fill_light_position[2],
1890            ),
1891        ));
1892    }
1893
1894    // Load the scene
1895    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
1896    commands.insert_resource(LoadedScene(scene_handle.clone()));
1897
1898    // Load the texture
1899    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
1900    commands.insert_resource(LoadedTexture(texture_handle.clone()));
1901
1902    // Create material with texture (will be applied later)
1903    let _material = _materials.add(StandardMaterial {
1904        base_color_texture: Some(texture_handle),
1905        unlit: true,
1906        ..default()
1907    });
1908
1909    // Spawn the scene with the requested object transform (Bevy 0.15+ uses SceneRoot)
1910    commands.spawn((
1911        SceneRoot(scene_handle),
1912        request
1913            .object_rotation
1914            .to_transform_with_translation_scale(request.object_translation, request.object_scale),
1915        RenderedObject,
1916    ));
1917
1918    println!("Scene setup complete");
1919}
1920
1921/// Check if assets are loaded
1922fn check_assets_loaded(
1923    mut state: ResMut<RenderState>,
1924    asset_server: Res<AssetServer>,
1925    scene: Option<Res<LoadedScene>>,
1926    texture: Option<Res<LoadedTexture>>,
1927) {
1928    let trace = render_trace_enabled();
1929    let was_scene_loaded = state.scene_loaded;
1930    let was_texture_loaded = state.texture_loaded;
1931
1932    state.frame_count += 1;
1933
1934    if state.scene_loaded && state.texture_loaded {
1935        return;
1936    }
1937
1938    if let Some(scene) = scene {
1939        match asset_server.get_load_state(&scene.0) {
1940            Some(LoadState::Loaded) => {
1941                state.scene_loaded = true;
1942            }
1943            Some(LoadState::Failed(_)) => {}
1944            _ => {}
1945        }
1946    }
1947
1948    if let Some(texture) = texture {
1949        match asset_server.get_load_state(&texture.0) {
1950            Some(LoadState::Loaded) => {
1951                state.texture_loaded = true;
1952            }
1953            Some(LoadState::Failed(_)) => {}
1954            _ => {}
1955        }
1956    }
1957
1958    if trace {
1959        if !was_scene_loaded && state.scene_loaded {
1960            eprintln!(
1961                "[render_trace][coldinit] scene_loaded frame_count={}",
1962                state.frame_count
1963            );
1964        }
1965        if !was_texture_loaded && state.texture_loaded {
1966            eprintln!(
1967                "[render_trace][coldinit] texture_loaded frame_count={}",
1968                state.frame_count
1969            );
1970        }
1971    }
1972}
1973
1974/// Apply materials to loaded meshes
1975fn apply_materials(
1976    mut state: ResMut<RenderState>,
1977    texture: Option<Res<LoadedTexture>>,
1978    mut materials: ResMut<Assets<StandardMaterial>>,
1979    // Bevy 0.15+: Use MeshMaterial3d instead of Handle<StandardMaterial>
1980    mut mesh_query: Query<&mut MeshMaterial3d<StandardMaterial>, With<Mesh3d>>,
1981) {
1982    // NOTE: we intentionally do NOT wait for `texture_loaded` before applying the
1983    // material. The texture *handle* is valid immediately, so applying the material
1984    // as soon as the mesh entities exist lets the main-pass `StandardMaterial`
1985    // pipeline start compiling during the long async texture load. A late material
1986    // swap (after texture load) would reset the pipeline and capture a blank color
1987    // frame before it recompiled — the root cause of the 0.18 blank renders.
1988    if !state.scene_loaded || state.capture_ready {
1989        return;
1990    }
1991
1992    state.frame_count += 1;
1993
1994    let Some(tex) = texture else { return };
1995
1996    if !state.materials_applied {
1997        // The scene hierarchy is instantiated asynchronously after the asset
1998        // load event fires; wait until mesh entities exist before applying.
1999        if mesh_query.is_empty() {
2000            return;
2001        }
2002
2003        let textured_material = materials.add(StandardMaterial {
2004            base_color_texture: Some(tex.0.clone()),
2005            unlit: true,
2006            ..default()
2007        });
2008
2009        for mut mat in mesh_query.iter_mut() {
2010            mat.0 = textured_material.clone();
2011        }
2012
2013        state.materials_applied = true;
2014        state.materials_applied_frame = state.frame_count;
2015    }
2016
2017    // Record the frame the texture finished loading (once).
2018    if state.texture_loaded && state.texture_ready_frame == 0 {
2019        state.texture_ready_frame = state.frame_count;
2020    }
2021
2022    // Capture once the texture pixels are loaded (+ a small margin for GPU image
2023    // preparation) AND the main-pass pipeline has had time to compile since the
2024    // material was applied. Because the material is applied early, the pipeline is
2025    // almost always ready well before the texture, so this resolves to a few frames
2026    // after the texture loads — deterministic and fast (no 60/120-frame cushion).
2027    let texture_ready =
2028        state.texture_ready_frame != 0 && state.frame_count >= state.texture_ready_frame + 6;
2029    let pipeline_ready = state.frame_count >= state.materials_applied_frame + 6;
2030    if texture_ready && pipeline_ready {
2031        let was_ready = state.capture_ready;
2032        state.capture_ready = true;
2033        if render_trace_enabled() && !was_ready {
2034            eprintln!(
2035                "[render_trace][coldinit] capture_ready frame_count={}",
2036                state.frame_count
2037            );
2038        }
2039    }
2040}
2041
2042/// Request a screenshot capture (Bevy 0.15+ uses Screenshot entity + observer)
2043#[allow(dead_code)]
2044fn request_screenshot(
2045    mut commands: Commands,
2046    mut state: ResMut<RenderState>,
2047    shared_image: Res<SharedImageBuffer>,
2048    mut depth_request: ResMut<DepthCaptureRequest>,
2049) {
2050    if !state.capture_ready || state.screenshot_requested {
2051        return;
2052    }
2053
2054    // Clone the Arc for the observer closure
2055    let image_buffer = shared_image.0.clone();
2056
2057    // Also request depth capture
2058    depth_request.requested = true;
2059    println!("Depth capture requested");
2060
2061    // Spawn Screenshot entity with observer (Bevy 0.15+ API)
2062    println!("Requesting screenshot via Screenshot entity");
2063    commands
2064        .spawn(Screenshot::primary_window())
2065        .observe(move |trigger: On<ScreenshotCaptured>| {
2066            // ScreenshotCaptured derefs to Image
2067            let image: &Image = trigger.event();
2068
2069            // Get dimensions
2070            let width = image.texture_descriptor.size.width;
2071            let height = image.texture_descriptor.size.height;
2072
2073            // Bevy 0.18: Image.data is now Option<Vec<u8>>; skip if absent.
2074            let Some(rgba_data) = image.data.clone() else {
2075                return;
2076            };
2077
2078            // Store in shared buffer
2079            if let Ok(mut guard) = image_buffer.lock() {
2080                *guard = Some((rgba_data, width, height));
2081            }
2082        });
2083
2084    state.screenshot_requested = true;
2085    println!("Screenshot requested");
2086}
2087
2088/// Check if screenshot callback has completed
2089#[allow(dead_code)]
2090fn check_screenshot_ready(
2091    mut state: ResMut<RenderState>,
2092    shared_image: Res<SharedImageBuffer>,
2093    shared_depth: Res<SharedDepthBuffer>,
2094    request: Res<RenderRequest>,
2095) {
2096    if !state.screenshot_requested || state.captured {
2097        return;
2098    }
2099
2100    // Increment frame count while waiting for capture
2101    state.frame_count += 1;
2102
2103    // Check if RGBA callback has written data
2104    let rgba_ready = if let Ok(guard) = shared_image.0.lock() {
2105        if let Some((rgba_data, width, height)) = guard.as_ref() {
2106            if state.rgba_data.is_none() {
2107                state.rgba_data = Some(rgba_data.clone());
2108                state.image_width = *width;
2109                state.image_height = *height;
2110            }
2111            true
2112        } else {
2113            false
2114        }
2115    } else {
2116        false
2117    };
2118
2119    // Check if depth readback has completed
2120    let depth_ready = if let Ok(guard) = shared_depth.0.lock() {
2121        if let Some((depth_data, _width, _height)) = guard.as_ref() {
2122            if state.depth_data.is_none() {
2123                state.depth_data = Some(depth_data.clone());
2124            }
2125            true
2126        } else {
2127            false
2128        }
2129    } else {
2130        false
2131    };
2132
2133    // If depth readback failed or is taking too long, fall back to placeholder.
2134    // As in check_headless_capture_ready, this uniform plane is a DEGRADED render
2135    // (flat depth, no real geometry) that must be loud — it silently masked the
2136    // #92 depth regression. (This fn is currently dead code; kept loud in case it
2137    // is ever revived.)
2138    if rgba_ready && !depth_ready && state.frame_count > 60 {
2139        let camera_dist = request.camera_transform.translation.length() as f64;
2140        let pixel_count = (state.image_width * state.image_height) as usize;
2141        eprintln!(
2142            "[bevy-sensor][WARN] depth readback produced no valid frame; falling back to a \
2143             UNIFORM {:.4} m camera-distance plane (degraded render, no real 3D geometry). \
2144             Indicates a depth-readback regression.",
2145            camera_dist
2146        );
2147        state.depth_data = Some(vec![camera_dist; pixel_count]);
2148    }
2149
2150    // Mark as captured when both RGBA and depth are ready
2151    if state.rgba_data.is_some() && state.depth_data.is_some() {
2152        state.captured = true;
2153    }
2154}
2155
2156/// Extract results and exit
2157#[allow(dead_code)]
2158fn extract_and_exit(
2159    mut state: ResMut<RenderState>,
2160    request: Res<RenderRequest>,
2161    shared_output: Res<SharedOutput>,
2162    mut commands: Commands,
2163    windows: Query<Entity, With<bevy::window::Window>>,
2164) {
2165    // Handle delayed exit after closing window
2166    if state.exit_requested {
2167        state.exit_frame_count += 1;
2168        // After a few frames with no window, Bevy should exit
2169        return;
2170    }
2171
2172    if !state.captured {
2173        return;
2174    }
2175
2176    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2177        // Use actual captured dimensions (may differ from config if window was resized)
2178        let width = state.image_width;
2179        let height = state.image_height;
2180
2181        // Compute intrinsics from the same TBP zoom formula as the camera projection.
2182        let intrinsics = request.config.intrinsics_for_size(width, height);
2183
2184        let output = RenderOutput {
2185            rgba: rgba.clone(),
2186            depth: depth.clone(),
2187            width,
2188            height,
2189            intrinsics,
2190            camera_transform: request.camera_transform,
2191            object_rotation: request.object_rotation.clone(),
2192            object_translation: request.object_translation,
2193            object_scale: request.object_scale,
2194            target_point: Vec3::ZERO,
2195            targeting_policy: TargetingPolicy::Origin,
2196        };
2197
2198        if let Ok(mut guard) = shared_output.0.lock() {
2199            *guard = Some(output);
2200            drop(guard); // Release lock immediately
2201
2202            // Small delay to allow watchdog to detect output before window close
2203            std::thread::sleep(std::time::Duration::from_millis(200));
2204        }
2205
2206        // Close all windows to trigger app exit
2207        // eprintln!("Closing windows to trigger exit...");
2208        for window_entity in windows.iter() {
2209            commands.entity(window_entity).despawn();
2210        }
2211        state.exit_requested = true;
2212    }
2213}
2214
2215// ============================================================================
2216// Headless Rendering Systems (no window surfaces)
2217// ============================================================================
2218
2219/// Setup the scene for headless rendering with RenderTarget::Image
2220fn setup_headless_scene(
2221    mut commands: Commands,
2222    mut images: ResMut<Assets<Image>>,
2223    asset_server: Res<AssetServer>,
2224    request: Res<RenderRequest>,
2225    mut _materials: ResMut<Assets<StandardMaterial>>,
2226) {
2227    let trace = render_trace_enabled();
2228    let t0 = trace.then(std::time::Instant::now);
2229
2230    #[cfg(test)]
2231    HEADLESS_SCENE_SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
2232
2233    let width = request.config.width;
2234    let height = request.config.height;
2235
2236    // Create render target image with proper texture usages
2237    let size = Extent3d {
2238        width,
2239        height,
2240        depth_or_array_layers: 1,
2241    };
2242
2243    let mut render_target_image = Image::new_fill(
2244        size,
2245        TextureDimension::D2,
2246        &[0, 0, 0, 255], // Initialize with opaque black
2247        TextureFormat::Rgba8UnormSrgb,
2248        RenderAssetUsages::default(),
2249    );
2250
2251    // Add required texture usages for headless rendering
2252    render_target_image.texture_descriptor.usage =
2253        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
2254
2255    let render_target_handle = images.add(render_target_image);
2256
2257    // Store handle for later access
2258    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
2259
2260    // Camera rendering to the image texture (NO window!)
2261    commands.spawn((
2262        Camera3d::default(),
2263        Camera::default(),
2264        Hdr,
2265        // In Bevy 0.18 the render target is a separate `RenderTarget` component,
2266        // and `RenderTarget::Image` wraps an `ImageRenderTarget` (via `From<Handle<Image>>`).
2267        RenderTarget::Image(render_target_handle.clone().into()),
2268        render_projection(&request.config),
2269        Msaa::Off,
2270        request.camera_transform,
2271        Tonemapping::None,
2272        DepthPrepass,
2273        NormalPrepass,
2274        RenderCamera,
2275        // Add ImageCopier to trigger RGBA extraction
2276        ImageCopier {
2277            src_image: render_target_handle,
2278            enabled: false, // Will enable when ready to capture
2279        },
2280    ));
2281
2282    // Ambient light (global resource in Bevy 0.18).
2283    let lighting = &request.config.lighting;
2284    commands.insert_resource(GlobalAmbientLight {
2285        color: Color::WHITE,
2286        brightness: lighting.ambient_brightness,
2287        ..default()
2288    });
2289
2290    // Key light
2291    if lighting.key_light_intensity > 0.0 {
2292        commands.spawn((
2293            PointLight {
2294                intensity: lighting.key_light_intensity,
2295                shadows_enabled: lighting.shadows_enabled,
2296                ..default()
2297            },
2298            Transform::from_xyz(
2299                lighting.key_light_position[0],
2300                lighting.key_light_position[1],
2301                lighting.key_light_position[2],
2302            ),
2303        ));
2304    }
2305
2306    // Fill light
2307    if lighting.fill_light_intensity > 0.0 {
2308        commands.spawn((
2309            PointLight {
2310                intensity: lighting.fill_light_intensity,
2311                shadows_enabled: lighting.shadows_enabled,
2312                ..default()
2313            },
2314            Transform::from_xyz(
2315                lighting.fill_light_position[0],
2316                lighting.fill_light_position[1],
2317                lighting.fill_light_position[2],
2318            ),
2319        ));
2320    }
2321
2322    // Load the scene
2323    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
2324    commands.insert_resource(LoadedScene(scene_handle.clone()));
2325
2326    // Load the texture
2327    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
2328    commands.insert_resource(LoadedTexture(texture_handle.clone()));
2329
2330    // Create material with texture
2331    let _material = _materials.add(StandardMaterial {
2332        base_color_texture: Some(texture_handle),
2333        unlit: true,
2334        ..default()
2335    });
2336
2337    // Spawn the scene with the requested object transform
2338    commands.spawn((
2339        SceneRoot(scene_handle),
2340        request
2341            .object_rotation
2342            .to_transform_with_translation_scale(request.object_translation, request.object_scale),
2343        RenderedObject,
2344    ));
2345
2346    if let Some(t0) = t0 {
2347        eprintln!(
2348            "[render_trace][startup] setup_headless_scene ms={:.3}",
2349            t0.elapsed().as_secs_f64() * 1000.0
2350        );
2351    }
2352}
2353
2354/// Request capture for headless rendering (enable ImageCopier)
2355fn request_headless_capture(
2356    mut state: ResMut<RenderState>,
2357    mut depth_request: ResMut<DepthCaptureRequest>,
2358    mut query: Query<&mut ImageCopier>,
2359    batch: Option<Res<HeadlessBatchSequence>>,
2360) {
2361    let trace = render_trace_enabled();
2362    let t0 = trace.then(std::time::Instant::now);
2363
2364    if !state.capture_ready || state.screenshot_requested {
2365        if let Some(t0) = t0 {
2366            eprintln!(
2367                "[render_trace][sys] request_headless_capture skipped(gate) ms={:.3}",
2368                t0.elapsed().as_secs_f64() * 1000.0
2369            );
2370        }
2371        return;
2372    }
2373
2374    if batch
2375        .as_ref()
2376        .is_some_and(|batch| batch.warmup_frames_remaining > 0)
2377    {
2378        if let Some(t0) = t0 {
2379            eprintln!(
2380                "[render_trace][sys] request_headless_capture skipped(warmup) ms={:.3}",
2381                t0.elapsed().as_secs_f64() * 1000.0
2382            );
2383        }
2384        return;
2385    }
2386
2387    // Enable the ImageCopier to trigger RGBA extraction
2388    for mut copier in query.iter_mut() {
2389        copier.enabled = true;
2390    }
2391
2392    // Request depth capture
2393    depth_request.requested = true;
2394
2395    state.screenshot_requested = true;
2396
2397    if let Some(t0) = t0 {
2398        eprintln!(
2399            "[render_trace][sys] request_headless_capture requested ms={:.3}",
2400            t0.elapsed().as_secs_f64() * 1000.0
2401        );
2402    }
2403}
2404
2405/// Check if headless capture has completed
2406fn check_headless_capture_ready(
2407    mut state: ResMut<RenderState>,
2408    shared_rgba: Res<SharedRgbaBuffer>,
2409    shared_depth: Res<SharedDepthBuffer>,
2410    request: Res<RenderRequest>,
2411    mut query: Query<&mut ImageCopier>,
2412) {
2413    let trace = render_trace_enabled();
2414    let t0 = trace.then(std::time::Instant::now);
2415
2416    if !state.screenshot_requested || state.captured {
2417        if let Some(t0) = t0 {
2418            eprintln!(
2419                "[render_trace][sys] check_headless_capture_ready skipped(gate) ms={:.3}",
2420                t0.elapsed().as_secs_f64() * 1000.0
2421            );
2422        }
2423        return;
2424    }
2425
2426    state.frame_count += 1;
2427    state.capture_retries += 1;
2428    // Bounded fallback so a genuinely-uniform scene (or persistent invalid
2429    // readback) still terminates instead of hanging to the watchdog.
2430    // Generous bound: slow paths (e.g. RenderSession's retained-render-world
2431    // settle after a scene swap) can take ~150 frames to produce a stable frame,
2432    // so force-accepting at 150 would grab a partial frame and break parity. Only
2433    // force as a true last resort to avoid hanging the watchdog.
2434    let force_accept = state.capture_retries > 150;
2435
2436    // RGBA: accept the first non-blank frame. Uniform clear-color frames are
2437    // pre-geometry reads from the nondeterministic one-shot capture — reject and
2438    // retry. The copier stays enabled until BOTH RGBA and depth are valid so a
2439    // late/odd depth frame can still be captured.
2440    if state.rgba_data.is_none() {
2441        let captured_rgba = shared_rgba.0.lock().ok().and_then(|mut g| g.take());
2442        if let Some((rgba_data, width, height)) = captured_rgba {
2443            let non_blank = rgba_data
2444                .chunks_exact(4)
2445                .any(|px| px[0..3] != rgba_data[0..3]);
2446            // Stable == identical to the previous readback (render has settled).
2447            let stable = state.prev_rgba.as_deref() == Some(rgba_data.as_slice());
2448            if (non_blank && stable) || force_accept {
2449                state.image_width = width;
2450                state.image_height = height;
2451                state.rgba_data = Some(rgba_data);
2452                state.prev_rgba = None;
2453            } else {
2454                // Not settled yet: remember this frame and re-read fresh next one.
2455                state.prev_rgba = Some(rgba_data);
2456            }
2457        }
2458    }
2459
2460    // Depth: accept the first readback that contains real foreground (the depth
2461    // readback can also miss the geometry, leaving an all-far-plane buffer).
2462    if state.depth_data.is_none() {
2463        let captured_depth = shared_depth.0.lock().ok().and_then(|mut g| g.take());
2464        if let Some((depth_data, _w, _h)) = captured_depth {
2465            let far = request.config.far_plane as f64;
2466            // Require a real object-surface depth, not just any non-far value:
2467            // near-plane garbage (~0.01) would otherwise be accepted but is not a
2468            // valid surface, and downstream depth-validity checks require > 0.1m.
2469            let has_foreground = depth_data.iter().any(|&d| d > 0.1 && d < far * 0.999);
2470            // Settled == identical to the previous depth readback.
2471            let stable = state.prev_depth.as_deref() == Some(depth_data.as_slice());
2472            if has_foreground && stable {
2473                state.depth_data = Some(depth_data);
2474                state.prev_depth = None;
2475            } else {
2476                state.prev_depth = Some(depth_data);
2477            }
2478        }
2479    }
2480
2481    // Last-resort fallback so we never hang the watchdog: once RGBA is in hand
2482    // and we've retried a lot, fill a uniform camera-distance depth placeholder.
2483    //
2484    // This is NOT a valid render — it is a flat depth plane that extracts
2485    // features and passes buffer-equality parity tests yet unprojects every
2486    // pixel onto one sheet, silently cratering downstream spatial matching
2487    // (this exact fallback masked the Bevy 0.18 depth regression in #92). It
2488    // must therefore be LOUD: a future depth-readback regression has to surface
2489    // in logs/CI instead of looking like a successful render. `tests/
2490    // spatial_parity.rs` is the geometric guard for the same failure.
2491    if state.rgba_data.is_some() && state.depth_data.is_none() && force_accept {
2492        let camera_dist = request.camera_transform.translation.length() as f64;
2493        let pixel_count = (state.image_width * state.image_height) as usize;
2494        eprintln!(
2495            "[bevy-sensor][WARN] depth readback produced no valid frame after {} retries; \
2496             falling back to a UNIFORM {:.4} m camera-distance plane. This is a degraded \
2497             render (flat depth -> no real 3D geometry) and indicates a depth-readback \
2498             regression. See render.rs DepthReadbackNode and tests/spatial_parity.rs.",
2499            state.capture_retries, camera_dist
2500        );
2501        state.depth_data = Some(vec![camera_dist; pixel_count]);
2502    }
2503
2504    let rgba_ready = state.rgba_data.is_some();
2505    let depth_ready = state.depth_data.is_some();
2506
2507    // Both valid → capture complete; stop the copier.
2508    if rgba_ready && depth_ready {
2509        state.captured = true;
2510        for mut copier in query.iter_mut() {
2511            copier.enabled = false;
2512        }
2513    }
2514
2515    if let Some(t0) = t0 {
2516        eprintln!(
2517            "[render_trace][sys] check_headless_capture_ready rgba_ready={} depth_ready={} captured={} frame_count={} ms={:.3}",
2518            rgba_ready,
2519            depth_ready,
2520            state.captured,
2521            state.frame_count,
2522            t0.elapsed().as_secs_f64() * 1000.0
2523        );
2524    }
2525}
2526
2527/// Extract results and exit for headless rendering
2528fn extract_and_exit_headless(
2529    mut state: ResMut<RenderState>,
2530    request: Res<RenderRequest>,
2531    shared_output: Res<SharedOutput>,
2532    mut app_exit: MessageWriter<bevy::app::AppExit>,
2533    batch: Option<Res<HeadlessBatchSequence>>,
2534) {
2535    if batch.is_some() {
2536        return;
2537    }
2538
2539    if state.exit_requested {
2540        return;
2541    }
2542
2543    if !state.captured {
2544        return;
2545    }
2546
2547    if state.rgba_data.is_some() && state.depth_data.is_some() {
2548        let width = state.image_width;
2549        let height = state.image_height;
2550        let rgba = state.rgba_data.take().expect("checked rgba_data");
2551        let depth = state.depth_data.take().expect("checked depth_data");
2552
2553        // Compute intrinsics from the same TBP zoom formula as the camera projection.
2554        let intrinsics = request.config.intrinsics_for_size(width, height);
2555
2556        let output = RenderOutput {
2557            rgba,
2558            depth,
2559            width,
2560            height,
2561            intrinsics,
2562            camera_transform: request.camera_transform,
2563            object_rotation: request.object_rotation.clone(),
2564            object_translation: request.object_translation,
2565            object_scale: request.object_scale,
2566            target_point: Vec3::ZERO,
2567            targeting_policy: TargetingPolicy::Origin,
2568        };
2569
2570        if let Ok(mut guard) = shared_output.0.lock() {
2571            *guard = Some(output);
2572            drop(guard);
2573            std::thread::sleep(std::time::Duration::from_millis(200));
2574        }
2575
2576        // Send AppExit event (headless apps use this instead of closing windows)
2577        app_exit.write(bevy::app::AppExit::Success);
2578        state.exit_requested = true;
2579    }
2580}
2581
2582/// Advance the short post-camera-move warmup for homogeneous batch rendering.
2583fn tick_headless_batch_warmup(batch: Option<ResMut<HeadlessBatchSequence>>) {
2584    let Some(mut batch) = batch else {
2585        return;
2586    };
2587
2588    if batch.warmup_frames_remaining > 0 {
2589        batch.warmup_frames_remaining -= 1;
2590    }
2591}
2592
2593/// Extract one batch output and continue rendering the next viewpoint in the same app.
2594fn extract_and_continue_headless_batch(
2595    mut state: ResMut<RenderState>,
2596    request: Res<RenderRequest>,
2597    buffers: (Res<SharedRgbaBuffer>, Res<SharedDepthBuffer>),
2598    batch: Option<ResMut<HeadlessBatchSequence>>,
2599    mut camera_query: Query<&mut Transform, With<RenderCamera>>,
2600    mut depth_request: ResMut<DepthCaptureRequest>,
2601    mut image_copiers: Query<&mut ImageCopier>,
2602) {
2603    let trace = render_trace_enabled();
2604    let t0 = trace.then(std::time::Instant::now);
2605
2606    let (shared_rgba, shared_depth) = buffers;
2607    let Some(mut batch) = batch else {
2608        if let Some(t0) = t0 {
2609            eprintln!(
2610                "[render_trace][sys] extract_and_continue_headless_batch skipped(no_batch) ms={:.3}",
2611                t0.elapsed().as_secs_f64() * 1000.0
2612            );
2613        }
2614        return;
2615    };
2616
2617    if state.exit_requested || !state.captured || batch.done {
2618        if let Some(t0) = t0 {
2619            eprintln!(
2620                "[render_trace][sys] extract_and_continue_headless_batch skipped(gate) captured={} done={} ms={:.3}",
2621                state.captured,
2622                batch.done,
2623                t0.elapsed().as_secs_f64() * 1000.0
2624            );
2625        }
2626        return;
2627    }
2628
2629    if state.rgba_data.is_some() && state.depth_data.is_some() {
2630        let width = state.image_width;
2631        let height = state.image_height;
2632        let rgba = state.rgba_data.take().expect("checked rgba_data");
2633        let depth = state.depth_data.take().expect("checked depth_data");
2634
2635        let intrinsics = request.config.intrinsics_for_size(width, height);
2636
2637        let output = RenderOutput {
2638            rgba,
2639            depth,
2640            width,
2641            height,
2642            intrinsics,
2643            camera_transform: batch
2644                .current_viewpoint()
2645                .unwrap_or(request.camera_transform),
2646            object_rotation: request.object_rotation.clone(),
2647            object_translation: request.object_translation,
2648            object_scale: request.object_scale,
2649            target_point: Vec3::ZERO,
2650            targeting_policy: TargetingPolicy::Origin,
2651        };
2652        batch.outputs.push(output);
2653
2654        let next_index = batch.current_index + 1;
2655        if next_index >= batch.viewpoints.len() {
2656            batch.done = true;
2657            state.exit_requested = true;
2658            return;
2659        }
2660
2661        batch.current_index = next_index;
2662        batch.warmup_frames_remaining = BATCH_WARMUP_FRAMES;
2663
2664        if let Some(next_viewpoint) = batch.current_viewpoint() {
2665            for mut camera_transform in camera_query.iter_mut() {
2666                *camera_transform = next_viewpoint;
2667            }
2668        }
2669
2670        if let Ok(mut guard) = shared_rgba.0.lock() {
2671            *guard = None;
2672        }
2673        if let Ok(mut guard) = shared_depth.0.lock() {
2674            *guard = None;
2675        }
2676
2677        for mut copier in image_copiers.iter_mut() {
2678            copier.enabled = false;
2679        }
2680
2681        depth_request.requested = false;
2682        state.frame_count = 0;
2683        state.capture_ready = true;
2684        state.screenshot_requested = false;
2685        state.captured = false;
2686        state.rgba_data = None;
2687        state.depth_data = None;
2688        state.image_width = 0;
2689        state.image_height = 0;
2690        // Reset the per-capture settle/retry tracking too, otherwise it
2691        // accumulates across viewpoints and force-accepts an unsettled frame for
2692        // later viewpoints (breaking parity).
2693        state.capture_retries = 0;
2694        state.prev_rgba = None;
2695        state.prev_depth = None;
2696
2697        if let Some(t0) = t0 {
2698            eprintln!(
2699                "[render_trace][sys] extract_and_continue_headless_batch extracted vp={} next={} done={} ms={:.3}",
2700                batch.current_index.saturating_sub(1),
2701                batch.current_index,
2702                batch.done,
2703                t0.elapsed().as_secs_f64() * 1000.0
2704            );
2705        }
2706    } else if let Some(t0) = t0 {
2707        eprintln!(
2708            "[render_trace][sys] extract_and_continue_headless_batch no_data ms={:.3}",
2709            t0.elapsed().as_secs_f64() * 1000.0
2710        );
2711    }
2712}
2713
2714// ============================================================================
2715// Persistent batch session (RenderSession)
2716//
2717// Amortizes wgpu device creation, Bevy app setup, and first-draw pipeline state
2718// object (PSO) compilation across multiple `render()` calls. Profile data (see
2719// issues #54 and #55) showed that on a 60-episode parity-gate, ~2.3s per episode
2720// lives in first-draw DX12 PSO compilation, totalling ~131s of 151s wall-clock.
2721// Keeping the `App` (and thus the `RenderDevice` and its PSO cache) alive across
2722// episodes recovers the bulk of that cost.
2723// ============================================================================
2724
2725/// Marker for the per-group scene entity so we can despawn it cleanly when the
2726/// next `RenderSession::render()` call swaps in a different object or rotation.
2727#[derive(Component)]
2728struct SessionScene;
2729
2730/// Session-persistent setup: render target image, camera (with prepass +
2731/// `ImageCopier`), ambient light, key + fill lights. Everything here lives for
2732/// the full lifetime of the `RenderSession`; per-group work (mesh/texture load,
2733/// scene entity spawn) happens outside Startup in `RenderSession::render()`.
2734fn setup_session_persistent_scene(
2735    mut commands: Commands,
2736    mut images: ResMut<Assets<Image>>,
2737    config: Res<SessionRenderConfig>,
2738) {
2739    let width = config.0.width;
2740    let height = config.0.height;
2741
2742    let size = Extent3d {
2743        width,
2744        height,
2745        depth_or_array_layers: 1,
2746    };
2747
2748    let mut render_target_image = Image::new_fill(
2749        size,
2750        TextureDimension::D2,
2751        &[0, 0, 0, 255],
2752        TextureFormat::Rgba8UnormSrgb,
2753        RenderAssetUsages::default(),
2754    );
2755    render_target_image.texture_descriptor.usage =
2756        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
2757
2758    let render_target_handle = images.add(render_target_image);
2759    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
2760
2761    commands.spawn((
2762        Camera3d::default(),
2763        Camera::default(),
2764        Hdr,
2765        RenderTarget::Image(render_target_handle.clone().into()),
2766        render_projection(&config.0),
2767        Msaa::Off,
2768        Transform::default(),
2769        Tonemapping::None,
2770        DepthPrepass,
2771        NormalPrepass,
2772        RenderCamera,
2773        ImageCopier {
2774            src_image: render_target_handle,
2775            enabled: false,
2776        },
2777    ));
2778
2779    let lighting = &config.0.lighting;
2780    commands.insert_resource(GlobalAmbientLight {
2781        color: Color::WHITE,
2782        brightness: lighting.ambient_brightness,
2783        ..default()
2784    });
2785
2786    if lighting.key_light_intensity > 0.0 {
2787        commands.spawn((
2788            PointLight {
2789                intensity: lighting.key_light_intensity,
2790                shadows_enabled: lighting.shadows_enabled,
2791                ..default()
2792            },
2793            Transform::from_xyz(
2794                lighting.key_light_position[0],
2795                lighting.key_light_position[1],
2796                lighting.key_light_position[2],
2797            ),
2798        ));
2799    }
2800
2801    if lighting.fill_light_intensity > 0.0 {
2802        commands.spawn((
2803            PointLight {
2804                intensity: lighting.fill_light_intensity,
2805                shadows_enabled: lighting.shadows_enabled,
2806                ..default()
2807            },
2808            Transform::from_xyz(
2809                lighting.fill_light_position[0],
2810                lighting.fill_light_position[1],
2811                lighting.fill_light_position[2],
2812            ),
2813        ));
2814    }
2815}
2816
2817/// Resource carrying the `RenderConfig` that was fixed at session construction.
2818/// Used by `setup_session_persistent_scene` to size the render target.
2819#[derive(Resource)]
2820struct SessionRenderConfig(RenderConfig);
2821
2822/// Persistent batch render session. Keeps a Bevy `App` (and its `RenderDevice`
2823/// plus PSO cache) alive across multiple `render()` calls, amortizing per-episode
2824/// cold-init cost.
2825///
2826/// # Thread affinity
2827///
2828/// `RenderSession` must be created, used, and dropped on the same thread. It
2829/// holds a `bevy::App` which owns GPU resources that are not safe to move
2830/// across threads. The `!Send + !Sync` marker is enforced via
2831/// `PhantomData<*const ()>`.
2832///
2833/// # Config invariant
2834///
2835/// The `RenderConfig` (resolution, lighting, near/far, fov) is fixed at
2836/// `new()`. All `render()` calls must use requests whose `render_config`
2837/// matches; heterogeneous configs are rejected.
2838///
2839/// # Phase 1 limitation
2840///
2841/// Each `render()` call must contain homogeneous requests (same `object_dir`
2842/// and `object_rotation`). Heterogeneous calls return
2843/// `BatchRenderError::InvalidConfig`. Hold a single `RenderSession` and call
2844/// `render()` once per episode to amortize setup across episodes.
2845pub struct RenderSession {
2846    app: App,
2847    render_config: RenderConfig,
2848    shared_rgba: SharedRgbaBuffer,
2849    shared_depth: SharedDepthBuffer,
2850    _not_send_sync: std::marker::PhantomData<*const ()>,
2851}
2852
2853impl RenderSession {
2854    /// Build the App, run plugin `finish()`/`cleanup()`, and perform one warmup
2855    /// `update()` so Startup systems run and the wgpu device + adapter are
2856    /// initialized. The first `render()` call still pays PSO compilation for
2857    /// the specific mesh/material combination; subsequent calls reuse the cache.
2858    pub fn new(render_config: &crate::RenderConfig) -> Result<Self, crate::RenderError> {
2859        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
2860        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
2861
2862        let mut app = App::new();
2863        app.add_plugins(
2864            DefaultPlugins
2865                .set(bevy::asset::AssetPlugin {
2866                    // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
2867                    // default (UnapprovedPathMode::Forbid → load() silently returns a
2868                    // default handle). YCB meshes load from absolute paths, so allow them.
2869                    unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
2870                    ..default()
2871                })
2872                .set(WindowPlugin {
2873                    primary_window: None,
2874                    exit_condition: ExitCondition::DontExit,
2875                    ..default()
2876                })
2877                .disable::<bevy::winit::WinitPlugin>()
2878                .disable::<LogPlugin>()
2879                .disable::<TerminalCtrlCHandlerPlugin>(),
2880        )
2881        .add_plugins(ObjPlugin)
2882        // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
2883        // Scene spawning panics unless those component types are registered. The
2884        // minimal headless plugin set doesn't register them, so do it explicitly.
2885        .register_type::<Mesh3d>()
2886        .register_type::<MeshMaterial3d<StandardMaterial>>()
2887        .register_type::<bevy::prelude::Transform>()
2888        .register_type::<bevy::prelude::GlobalTransform>()
2889        .register_type::<bevy::transform::components::TransformTreeChanged>()
2890        .register_type::<bevy::prelude::Visibility>()
2891        .register_type::<bevy::prelude::InheritedVisibility>()
2892        .register_type::<bevy::prelude::ViewVisibility>()
2893        .add_plugins(ImageCopyPlugin {
2894            shared_rgba: shared_rgba.clone(),
2895        })
2896        .add_plugins(DepthReadbackPlugin {
2897            shared_depth: shared_depth.clone(),
2898            near: render_config.near_plane,
2899            far: render_config.far_plane,
2900        })
2901        .insert_resource(SessionRenderConfig(render_config.clone()))
2902        .insert_resource(shared_rgba.clone())
2903        .init_resource::<RenderState>()
2904        .add_systems(Startup, setup_session_persistent_scene)
2905        .add_systems(
2906            Update,
2907            (
2908                check_assets_loaded,
2909                apply_materials,
2910                tick_headless_batch_warmup,
2911                request_headless_capture,
2912                check_headless_capture_ready,
2913                extract_and_continue_headless_batch,
2914            )
2915                .chain()
2916                // Gate the capture chain on `RenderRequest` existing. `new()`
2917                // runs a warmup `app.update()` to execute Startup (which spawns
2918                // the camera/lights/render target) before the first `render()`
2919                // call, but does not yet insert `RenderRequest`. Several systems
2920                // in this chain take `Res<RenderRequest>` (not `Option`) and
2921                // would panic on SystemState init if the resource were absent.
2922                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
2923        );
2924
2925        app.finish();
2926        app.cleanup();
2927
2928        // One warmup update runs Startup systems (render target, camera, lights)
2929        // so they exist before the first `render()` call seeds the camera
2930        // transform. The Update chain is gated by `RenderRequest` existence and
2931        // is a no-op this tick. PSO compilation for specific mesh/material
2932        // combinations still happens lazily on the first real render.
2933        app.update();
2934
2935        Ok(Self {
2936            app,
2937            render_config: render_config.clone(),
2938            shared_rgba,
2939            shared_depth,
2940            _not_send_sync: std::marker::PhantomData,
2941        })
2942    }
2943
2944    /// Render a homogeneous batch of viewpoints (same object + rotation + config).
2945    /// Returns outputs in request order.
2946    ///
2947    /// On `BatchRenderError::DeviceLost`, the returned error signals that the
2948    /// wgpu device was lost mid-render. This call produced no output; any
2949    /// outputs from earlier `render()` calls on this session are still valid.
2950    /// Recovery: drop this `RenderSession` and construct a new one.
2951    pub fn render(
2952        &mut self,
2953        requests: &[crate::BatchRenderRequest],
2954    ) -> Result<Vec<crate::BatchRenderOutput>, crate::BatchRenderError> {
2955        use crate::{BatchRenderError, BatchRenderOutput};
2956
2957        if requests.is_empty() {
2958            return Ok(Vec::new());
2959        }
2960
2961        // Enforce homogeneity and config invariance.
2962        let first = &requests[0];
2963        if first.render_config != self.render_config {
2964            return Err(BatchRenderError::InvalidConfig(
2965                "RenderSession render_config mismatch: session was constructed with a different \
2966                 RenderConfig than the first request carries. Session config cannot change after \
2967                 `new()`; construct a new session if you need a different resolution/camera."
2968                    .to_string(),
2969            ));
2970        }
2971        for r in &requests[1..] {
2972            if r.object_dir != first.object_dir
2973                || r.object_rotation != first.object_rotation
2974                || r.object_translation != first.object_translation
2975                || r.object_scale != first.object_scale
2976                || r.render_config != first.render_config
2977            {
2978                return Err(BatchRenderError::InvalidConfig(
2979                    "Phase 1 RenderSession::render requires homogeneous requests \
2980                     (same object_dir, object transform, and render_config across the batch). \
2981                     Call render() once per group instead."
2982                        .to_string(),
2983                ));
2984            }
2985        }
2986
2987        // Canonicalize paths and validate mesh/texture presence. This matches
2988        // `render_headless_sequence`'s preconditions so the error surface stays
2989        // consistent.
2990        let object_dir = std::fs::canonicalize(&first.object_dir).map_err(|e| {
2991            BatchRenderError::InvalidConfig(format!(
2992                "Cannot canonicalize object directory {}: {}",
2993                first.object_dir.display(),
2994                e
2995            ))
2996        })?;
2997        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
2998        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
2999        if !mesh_path.exists() {
3000            return Err(BatchRenderError::InvalidConfig(format!(
3001                "Mesh not found: {}",
3002                mesh_path.display()
3003            )));
3004        }
3005        if !texture_path.exists() {
3006            return Err(BatchRenderError::InvalidConfig(format!(
3007                "Texture not found: {}",
3008                texture_path.display()
3009            )));
3010        }
3011
3012        let viewpoints: Vec<Transform> = requests.iter().map(|r| r.viewpoint).collect();
3013
3014        // --- per-group scene swap (direct world manipulation) ---
3015        {
3016            let world = self.app.world_mut();
3017
3018            // Despawn any SessionScene entity from the previous group.
3019            let stale: Vec<Entity> = world
3020                .query_filtered::<Entity, With<SessionScene>>()
3021                .iter(world)
3022                .collect();
3023            for entity in stale {
3024                world.entity_mut(entity).despawn();
3025            }
3026
3027            // Clear shared RGBA/depth buffers so a stale payload can't leak
3028            // into the first viewpoint of this call.
3029            if let Ok(mut guard) = self.shared_rgba.0.lock() {
3030                *guard = None;
3031            }
3032            if let Ok(mut guard) = self.shared_depth.0.lock() {
3033                *guard = None;
3034            }
3035
3036            // Reset RenderState (scene_loaded, texture_loaded, capture_ready,
3037            // frame_count, materials_applied, etc.). Default() gives all false/0.
3038            *world.resource_mut::<RenderState>() = RenderState::default();
3039
3040            // Update RenderRequest so the existing capture systems see the new
3041            // object paths, rotation, and camera transform (seeded from first vp).
3042            let new_request = RenderRequest {
3043                mesh_path: fs_path_to_asset_string(&mesh_path),
3044                texture_path: fs_path_to_asset_string(&texture_path),
3045                camera_transform: viewpoints[0],
3046                object_rotation: first.object_rotation.clone(),
3047                object_translation: first.object_translation,
3048                object_scale: first.object_scale,
3049                config: self.render_config.clone(),
3050            };
3051            world.insert_resource(new_request);
3052
3053            // Kick off asset loads and install the handles under the names the
3054            // existing `check_assets_loaded` system expects.
3055            let asset_server = world.resource::<AssetServer>().clone();
3056            let scene_handle: Handle<Scene> =
3057                asset_server.load(fs_path_to_asset_string(&mesh_path));
3058            let texture_handle: Handle<Image> =
3059                asset_server.load(fs_path_to_asset_string(&texture_path));
3060            world.insert_resource(LoadedScene(scene_handle.clone()));
3061            world.insert_resource(LoadedTexture(texture_handle));
3062
3063            // Spawn the new scene entity tagged so we can find + despawn it next
3064            // render() call.
3065            world.spawn((
3066                SceneRoot(scene_handle),
3067                first.object_rotation.to_transform_with_translation_scale(
3068                    first.object_translation,
3069                    first.object_scale,
3070                ),
3071                RenderedObject,
3072                SessionScene,
3073            ));
3074
3075            // Seed the camera transform to the first viewpoint now so the first
3076            // capture lines up; subsequent viewpoints are advanced by
3077            // `extract_and_continue_headless_batch`.
3078            let camera_entity = world
3079                .query_filtered::<Entity, With<RenderCamera>>()
3080                .iter(world)
3081                .next();
3082            if let Some(cam) = camera_entity {
3083                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
3084                    *transform = viewpoints[0];
3085                }
3086            }
3087
3088            // Install the viewpoint sequence for this render() call. The robust
3089            // settled-frame capture (reject blank/partial readbacks, retry until
3090            // two consecutive readbacks match) absorbs the despawn/respawn
3091            // render-world settle, so a separate discarded warmup pass is not
3092            // needed and the per-object cost stays low.
3093            world.insert_resource(HeadlessBatchSequence::new(viewpoints.clone()));
3094        }
3095
3096        // --- drive the real capture loop ---
3097        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3098        let start = std::time::Instant::now();
3099        loop {
3100            if start.elapsed() > timeout {
3101                return Err(BatchRenderError::TotalFailure(format!(
3102                    "RenderSession::render timed out after {}s",
3103                    RENDER_TIMEOUT_SECS
3104                )));
3105            }
3106
3107            self.app.update();
3108
3109            if self.app.world().resource::<HeadlessBatchSequence>().done {
3110                break;
3111            }
3112        }
3113
3114        // Collect outputs and zip with requests to produce BatchRenderOutput in
3115        // request order.
3116        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
3117        if sequence.outputs.len() != requests.len() {
3118            return Err(BatchRenderError::TotalFailure(format!(
3119                "RenderSession produced {} outputs for {} requests",
3120                sequence.outputs.len(),
3121                requests.len()
3122            )));
3123        }
3124        let outputs = std::mem::take(&mut sequence.outputs);
3125
3126        Ok(requests
3127            .iter()
3128            .cloned()
3129            .zip(outputs)
3130            .map(|(req, out)| BatchRenderOutput::from_render_output(req, out))
3131            .collect())
3132    }
3133}
3134
3135// ============================================================================
3136// Per-step persistent renderer (PersistentRenderer)
3137//
3138// `RenderSession` reuses the App across calls but rebuilds the scene on every
3139// `render()` (despawn SceneRoot, re-issue asset_server.load, respawn). That's
3140// fine for the parity-gate path (one scene per episode of N viewpoints) but
3141// wasteful for surface-policy feedback loops where N=1 viewpoint per call and
3142// the object stays loaded for the whole episode.
3143//
3144// `PersistentRenderer` commits to one `object_dir` + `RenderConfig` at
3145// construction. `new()` loads mesh + texture + spawns the scene root + drives
3146// one warmup render (output discarded) so PSO compilation and material setup
3147// are paid up front. `render(camera, rotation)` then only mutates the camera
3148// `Transform` and (if changed) the scene root rotation, drives the capture
3149// chain for one frame, and returns. See issue #65.
3150// ============================================================================
3151
3152/// Marker for the `PersistentRenderer`'s scene root entity. We keep the
3153/// entity alive for the whole renderer lifetime and just mutate its
3154/// `Transform` when the caller-supplied object rotation changes.
3155#[derive(Component)]
3156struct PersistentScene;
3157
3158/// Persistent per-step renderer. Loads the scene once at `new()` and renders
3159/// one frame per `render()` call by mutating the camera transform and scene
3160/// root rotation in-place. Built for surface-policy feedback loops where the
3161/// object stays fixed for the duration of an episode and the camera moves
3162/// every step. See issue #65.
3163///
3164/// # Thread affinity
3165///
3166/// `PersistentRenderer` must be created, used, and dropped on the same thread.
3167/// Holds a `bevy::App` that owns GPU resources not safe to move across
3168/// threads; `!Send + !Sync` is enforced via `PhantomData<*const ()>`.
3169///
3170/// # Object + config invariants
3171///
3172/// `object_dir` and `RenderConfig` are fixed at `new()`. To render a different
3173/// object or change resolution/lighting, drop and rebuild. Rotation may change
3174/// freely between `render()` calls.
3175pub struct PersistentRenderer {
3176    app: App,
3177    object_dir: PathBuf,
3178    render_config: RenderConfig,
3179    shared_rgba: SharedRgbaBuffer,
3180    shared_depth: SharedDepthBuffer,
3181    _not_send_sync: std::marker::PhantomData<*const ()>,
3182}
3183
3184impl PersistentRenderer {
3185    /// Build the App, load the scene + texture, spawn the scene root, and drive
3186    /// one warmup render whose output is discarded. After `new()` returns, the
3187    /// first user-facing `render()` call benefits from a warm PSO cache and
3188    /// applied materials.
3189    pub fn new(
3190        object_dir: &Path,
3191        render_config: &RenderConfig,
3192    ) -> Result<Self, crate::RenderError> {
3193        let object_dir =
3194            std::fs::canonicalize(object_dir).map_err(|e| crate::RenderError::FileNotFound {
3195                path: object_dir.display().to_string(),
3196                reason: e.to_string(),
3197            })?;
3198        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3199        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3200        if !mesh_path.exists() {
3201            return Err(crate::RenderError::MeshNotFound(fs_path_to_asset_string(
3202                &mesh_path,
3203            )));
3204        }
3205        if !texture_path.exists() {
3206            return Err(crate::RenderError::TextureNotFound(
3207                fs_path_to_asset_string(&texture_path),
3208            ));
3209        }
3210
3211        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3212        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3213
3214        let mut app = App::new();
3215        app.add_plugins(
3216            DefaultPlugins
3217                .set(bevy::asset::AssetPlugin {
3218                    // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
3219                    // default (UnapprovedPathMode::Forbid → load() silently returns a
3220                    // default handle). YCB meshes load from absolute paths, so allow them.
3221                    unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
3222                    ..default()
3223                })
3224                .set(WindowPlugin {
3225                    primary_window: None,
3226                    exit_condition: ExitCondition::DontExit,
3227                    ..default()
3228                })
3229                .disable::<bevy::winit::WinitPlugin>()
3230                .disable::<LogPlugin>()
3231                .disable::<TerminalCtrlCHandlerPlugin>(),
3232        )
3233        .add_plugins(ObjPlugin)
3234        // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
3235        // Scene spawning panics unless those component types are registered. The
3236        // minimal headless plugin set doesn't register them, so do it explicitly.
3237        .register_type::<Mesh3d>()
3238        .register_type::<MeshMaterial3d<StandardMaterial>>()
3239        .register_type::<bevy::prelude::Transform>()
3240        .register_type::<bevy::prelude::GlobalTransform>()
3241        .register_type::<bevy::transform::components::TransformTreeChanged>()
3242        .register_type::<bevy::prelude::Visibility>()
3243        .register_type::<bevy::prelude::InheritedVisibility>()
3244        .register_type::<bevy::prelude::ViewVisibility>()
3245        .add_plugins(ImageCopyPlugin {
3246            shared_rgba: shared_rgba.clone(),
3247        })
3248        .add_plugins(DepthReadbackPlugin {
3249            shared_depth: shared_depth.clone(),
3250            near: render_config.near_plane,
3251            far: render_config.far_plane,
3252        })
3253        .insert_resource(SessionRenderConfig(render_config.clone()))
3254        .insert_resource(shared_rgba.clone())
3255        .init_resource::<RenderState>()
3256        .add_systems(Startup, setup_session_persistent_scene)
3257        .add_systems(
3258            Update,
3259            (
3260                check_assets_loaded,
3261                apply_materials,
3262                tick_headless_batch_warmup,
3263                request_headless_capture,
3264                check_headless_capture_ready,
3265                extract_and_continue_headless_batch,
3266            )
3267                .chain()
3268                // Same gate as RenderSession: capture chain only runs once
3269                // RenderRequest is installed. Startup runs first via the
3270                // warmup `app.update()` below.
3271                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
3272        );
3273
3274        app.finish();
3275        app.cleanup();
3276        // Warmup tick #1: Startup runs (camera, lights, render target spawn).
3277        app.update();
3278
3279        // Install scene + warmup render request. The warmup output is discarded
3280        // — its purpose is to pay PSO compilation and material application
3281        // upfront so the first user-facing render() is fast. Use a real TBP
3282        // viewpoint rather than Transform::default(), which places the camera
3283        // at the object origin and forces a flat-depth fallback before any
3284        // caller-requested surface-policy render runs.
3285        let warmup_camera = persistent_warmup_camera_transform();
3286        let initial_request = RenderRequest {
3287            mesh_path: fs_path_to_asset_string(&mesh_path),
3288            texture_path: fs_path_to_asset_string(&texture_path),
3289            camera_transform: warmup_camera,
3290            object_rotation: ObjectRotation::identity(),
3291            object_translation: Vec3::ZERO,
3292            object_scale: Vec3::ONE,
3293            config: render_config.clone(),
3294        };
3295
3296        {
3297            let world = app.world_mut();
3298            let asset_server = world.resource::<AssetServer>().clone();
3299            let scene_handle: Handle<Scene> =
3300                asset_server.load(fs_path_to_asset_string(&mesh_path));
3301            let texture_handle: Handle<Image> =
3302                asset_server.load(fs_path_to_asset_string(&texture_path));
3303            world.insert_resource(LoadedScene(scene_handle.clone()));
3304            world.insert_resource(LoadedTexture(texture_handle));
3305            world.insert_resource(initial_request);
3306            world.spawn((
3307                SceneRoot(scene_handle),
3308                ObjectRotation::identity()
3309                    .to_transform_with_translation_scale(Vec3::ZERO, Vec3::ONE),
3310                RenderedObject,
3311                PersistentScene,
3312            ));
3313            if let Some(cam) = world
3314                .query_filtered::<Entity, With<RenderCamera>>()
3315                .iter(world)
3316                .next()
3317            {
3318                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
3319                    *transform = warmup_camera;
3320                }
3321            }
3322            world.insert_resource(HeadlessBatchSequence::new(vec![warmup_camera]));
3323        }
3324
3325        // Drive the warmup render to completion.
3326        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3327        let start = std::time::Instant::now();
3328        loop {
3329            if start.elapsed() > timeout {
3330                return Err(crate::RenderError::RenderFailed(format!(
3331                    "PersistentRenderer::new warmup render timed out after {RENDER_TIMEOUT_SECS}s"
3332                )));
3333            }
3334            app.update();
3335            if app.world().resource::<HeadlessBatchSequence>().done {
3336                break;
3337            }
3338        }
3339        // Discard the warmup output so it doesn't leak into the first real
3340        // render() call's output buffer.
3341        app.world_mut()
3342            .resource_mut::<HeadlessBatchSequence>()
3343            .outputs
3344            .clear();
3345
3346        Ok(Self {
3347            app,
3348            object_dir,
3349            render_config: render_config.clone(),
3350            shared_rgba,
3351            shared_depth,
3352            _not_send_sync: std::marker::PhantomData,
3353        })
3354    }
3355
3356    /// Render one frame from the given camera transform and object rotation.
3357    /// Reuses the loaded scene + warm PSO cache from `new()`.
3358    pub fn render(
3359        &mut self,
3360        camera_transform: &Transform,
3361        object_rotation: &ObjectRotation,
3362    ) -> Result<RenderOutput, crate::RenderError> {
3363        self.render_with_object_transform(camera_transform, object_rotation, Vec3::ZERO, Vec3::ONE)
3364    }
3365
3366    /// Render one frame with explicit object translation and scale.
3367    pub fn render_with_object_transform(
3368        &mut self,
3369        camera_transform: &Transform,
3370        object_rotation: &ObjectRotation,
3371        object_translation: Vec3,
3372        object_scale: Vec3,
3373    ) -> Result<RenderOutput, crate::RenderError> {
3374        let camera_transform = *camera_transform;
3375        let object_rotation_owned = object_rotation.clone();
3376
3377        {
3378            let world = self.app.world_mut();
3379
3380            // Update the persistent scene root rotation. Always-write avoids
3381            // the cost of an extra ObjectRotation comparison per call; the
3382            // mutation itself is a single Transform write.
3383            let scene_entity = world
3384                .query_filtered::<Entity, With<PersistentScene>>()
3385                .iter(world)
3386                .next();
3387            if let Some(entity) = scene_entity {
3388                if let Some(mut transform) = world.entity_mut(entity).get_mut::<Transform>() {
3389                    *transform = object_rotation_owned
3390                        .to_transform_with_translation_scale(object_translation, object_scale);
3391                }
3392            }
3393
3394            // Update the camera transform.
3395            let cam_entity = world
3396                .query_filtered::<Entity, With<RenderCamera>>()
3397                .iter(world)
3398                .next();
3399            if let Some(cam) = cam_entity {
3400                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
3401                    *transform = camera_transform;
3402                }
3403            }
3404
3405            // Reset per-frame state, preserving scene_loaded / texture_loaded
3406            // / materials_applied / materials_applied_frame. The asset-load
3407            // and material-apply work was paid in `new()`'s warmup; we only
3408            // need to clear the per-capture state.
3409            //
3410            // `capture_ready = true` short-circuits `apply_materials` on
3411            // every tick of the render loop (no need to re-check material
3412            // application — it stays applied for the renderer's lifetime).
3413            // It does NOT short-circuit `request_headless_capture`, which
3414            // is gated by `HeadlessBatchSequence::warmup_frames_remaining`
3415            // below. Bug fix from PR #66 review (off-by-one / blank-step-0):
3416            // without that warmup gate, request_headless_capture fires same-
3417            // tick as the transform writes, capturing the previous render's
3418            // target before the new transforms have propagated.
3419            {
3420                let mut state = world.resource_mut::<RenderState>();
3421                state.exit_requested = false;
3422                state.screenshot_requested = false;
3423                state.captured = false;
3424                state.rgba_data = None;
3425                state.depth_data = None;
3426                state.frame_count = 0;
3427                state.image_width = 0;
3428                state.image_height = 0;
3429                state.capture_ready = true;
3430                state.capture_retries = 0;
3431                state.prev_rgba = None;
3432                state.prev_depth = None;
3433            }
3434
3435            // Clear shared GPU readback buffers so a stale payload from the
3436            // previous render() can't leak into this call's output.
3437            if let Ok(mut guard) = self.shared_rgba.0.lock() {
3438                *guard = None;
3439            }
3440            if let Ok(mut guard) = self.shared_depth.0.lock() {
3441                *guard = None;
3442            }
3443
3444            // Update RenderRequest (used by extract_and_continue_headless_batch
3445            // to stamp the output with the right intrinsics + rotation).
3446            {
3447                let mut req = world.resource_mut::<RenderRequest>();
3448                req.camera_transform = camera_transform;
3449                req.object_rotation = object_rotation_owned.clone();
3450                req.object_translation = object_translation;
3451                req.object_scale = object_scale;
3452            }
3453
3454            // Install fresh single-element batch with warmup frames so
3455            // `request_headless_capture` is gated until the new transforms
3456            // have propagated through the render pipeline.
3457            let mut batch = HeadlessBatchSequence::new(vec![camera_transform]);
3458            batch.warmup_frames_remaining = PERSISTENT_WARMUP_FRAMES;
3459            world.insert_resource(batch);
3460        }
3461
3462        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3463        let start = std::time::Instant::now();
3464        loop {
3465            if start.elapsed() > timeout {
3466                return Err(crate::RenderError::RenderFailed(format!(
3467                    "PersistentRenderer::render timed out after {RENDER_TIMEOUT_SECS}s"
3468                )));
3469            }
3470            self.app.update();
3471            if self.app.world().resource::<HeadlessBatchSequence>().done {
3472                break;
3473            }
3474        }
3475
3476        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
3477        let mut outputs = std::mem::take(&mut sequence.outputs);
3478        if outputs.len() != 1 {
3479            return Err(crate::RenderError::RenderFailed(format!(
3480                "PersistentRenderer::render expected 1 output, got {}",
3481                outputs.len()
3482            )));
3483        }
3484
3485        Ok(outputs.remove(0))
3486    }
3487
3488    /// Path to the YCB object directory this renderer was bound to.
3489    pub fn object_dir(&self) -> &Path {
3490        &self.object_dir
3491    }
3492
3493    /// The `RenderConfig` this renderer was constructed with.
3494    pub fn render_config(&self) -> &RenderConfig {
3495        &self.render_config
3496    }
3497
3498    /// Explicit close. Equivalent to dropping; provided to match the API
3499    /// proposal in #65 for callers that want lifetime-explicit teardown.
3500    pub fn close(self) {
3501        // Drop runs on return.
3502    }
3503}
3504
3505/// Render directly to files (for subprocess mode).
3506///
3507/// This function saves RGBA and depth data directly to files before exiting.
3508/// Designed for subprocess rendering where the process will exit after rendering.
3509#[allow(clippy::too_many_arguments)]
3510pub fn render_to_files(
3511    object_dir: &Path,
3512    camera_transform: &Transform,
3513    object_rotation: &ObjectRotation,
3514    object_translation: Vec3,
3515    object_scale: Vec3,
3516    config: &RenderConfig,
3517    rgba_path: &Path,
3518    depth_path: &Path,
3519) -> Result<(), RenderError> {
3520    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3521    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3522
3523    if !mesh_path.exists() {
3524        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
3525            &mesh_path,
3526        )));
3527    }
3528    if !texture_path.exists() {
3529        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
3530            &texture_path,
3531        )));
3532    }
3533
3534    let request = RenderRequest {
3535        mesh_path: fs_path_to_asset_string(&mesh_path),
3536        texture_path: fs_path_to_asset_string(&texture_path),
3537        camera_transform: *camera_transform,
3538        object_rotation: object_rotation.clone(),
3539        object_translation,
3540        object_scale,
3541        config: config.clone(),
3542    };
3543
3544    // Shared state for output
3545    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
3546    let output_poll = shared_output.clone();
3547
3548    // Clone paths for watchdog thread
3549    let rgba_path = rgba_path.to_path_buf();
3550    let depth_path = depth_path.to_path_buf();
3551
3552    // Shared buffer for RGBA data from headless render target
3553    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3554
3555    // Shared buffer for depth readback
3556    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3557
3558    // Spawn watchdog thread that saves files and exits
3559    std::thread::spawn(move || {
3560        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3561        let start = std::time::Instant::now();
3562        let poll_interval = std::time::Duration::from_millis(100);
3563
3564        loop {
3565            if let Ok(guard) = output_poll.0.lock() {
3566                if let Some(output) = guard.as_ref() {
3567                    // Save RGBA as PNG
3568                    if let Err(e) =
3569                        save_rgba_to_png(&output.rgba, output.width, output.height, &rgba_path)
3570                    {
3571                        eprintln!("Failed to save RGBA: {:?}", e);
3572                        std::process::exit(1);
3573                    }
3574
3575                    // Save depth as binary f32
3576                    if let Err(e) = save_depth_to_binary(&output.depth, &depth_path) {
3577                        eprintln!("Failed to save depth: {:?}", e);
3578                        std::process::exit(1);
3579                    }
3580
3581                    std::process::exit(0);
3582                }
3583            }
3584
3585            if start.elapsed() > timeout {
3586                eprintln!(
3587                    "Error: Render timeout after {} seconds",
3588                    RENDER_TIMEOUT_SECS
3589                );
3590                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
3591                std::process::exit(1);
3592            }
3593
3594            std::thread::sleep(poll_interval);
3595        }
3596    });
3597
3598    // Configure rendering backend for this environment.
3599    // Use OnceLock so env vars are only set once per process — repeated calls
3600    // (e.g. sequential render_to_buffer calls in a parity loop) no longer trigger
3601    // redundant wgpu backend env writes. Full GPU adapter reuse across App instances
3602    // requires a persistent renderer (tracked in issue #14).
3603    static BACKEND_INIT: OnceLock<()> = OnceLock::new();
3604    BACKEND_INIT.get_or_init(|| {
3605        let backend_config = BackendConfig::headless();
3606        backend_config.apply_env();
3607    });
3608
3609    // Run Bevy app with HEADLESS configuration
3610    build_headless_app(request, shared_output, shared_rgba, shared_depth).run();
3611
3612    // Unreachable - watchdog thread exits the process
3613    Err(RenderError::RenderFailed(
3614        "Render did not complete".to_string(),
3615    ))
3616}
3617
3618/// Save RGBA data to PNG file
3619fn save_rgba_to_png(rgba: &[u8], width: u32, height: u32, path: &Path) -> Result<(), String> {
3620    use image::{ImageBuffer, Rgba};
3621
3622    // Create parent directories if needed
3623    if let Some(parent) = path.parent() {
3624        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3625    }
3626
3627    let img: ImageBuffer<Rgba<u8>, Vec<u8>> =
3628        ImageBuffer::from_raw(width, height, rgba.to_vec())
3629            .ok_or_else(|| "Failed to create image buffer".to_string())?;
3630
3631    img.save(path).map_err(|e| e.to_string())
3632}
3633
3634/// Save depth data to binary file (f64 for TBP precision)
3635fn save_depth_to_binary(depth: &[f64], path: &Path) -> Result<(), String> {
3636    // Create parent directories if needed
3637    if let Some(parent) = path.parent() {
3638        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3639    }
3640
3641    let bytes: Vec<u8> = depth.iter().flat_map(|f| f.to_le_bytes()).collect();
3642    std::fs::write(path, &bytes).map_err(|e| e.to_string())
3643}
3644
3645#[cfg(test)]
3646mod smoke_tests {
3647    use super::{
3648        headless_scene_setup_count, persistent_warmup_camera_transform,
3649        reset_headless_scene_setup_count,
3650    };
3651    use crate::{
3652        BatchRenderConfig, BatchRenderRequest, ObjectRotation, RenderConfig, TargetingPolicy, Vec3,
3653        ViewpointConfig,
3654    };
3655    use image::{ImageBuffer, Rgba};
3656    use tempfile::TempDir;
3657
3658    fn write_synthetic_object() -> TempDir {
3659        let temp_dir = TempDir::new().expect("create temp dir for synthetic object");
3660        let object_dir = temp_dir.path().join("synthetic_cube").join("google_16k");
3661        std::fs::create_dir_all(&object_dir).expect("create synthetic google_16k dir");
3662
3663        // A small centered cube stays visible from all default TBP viewpoints and does not
3664        // need any YCB downloads.
3665        let obj = r#"o SyntheticCube
3666v -0.10 -0.10  0.10
3667v  0.10 -0.10  0.10
3668v  0.10  0.10  0.10
3669v -0.10  0.10  0.10
3670v -0.10 -0.10 -0.10
3671v  0.10 -0.10 -0.10
3672v  0.10  0.10 -0.10
3673v -0.10  0.10 -0.10
3674vt 0.0 0.0
3675vt 1.0 0.0
3676vt 1.0 1.0
3677vt 0.0 1.0
3678f 1/1 2/2 3/3
3679f 1/1 3/3 4/4
3680f 6/1 5/2 8/3
3681f 6/1 8/3 7/4
3682f 2/1 6/2 7/3
3683f 2/1 7/3 3/4
3684f 5/1 1/2 4/3
3685f 5/1 4/3 8/4
3686f 4/1 3/2 7/3
3687f 4/1 7/3 8/4
3688f 5/1 6/2 2/3
3689f 5/1 2/3 1/4
3690"#;
3691        std::fs::write(object_dir.join("textured.obj"), obj).expect("write synthetic obj");
3692
3693        let texture = ImageBuffer::from_fn(2, 2, |x, y| match (x, y) {
3694            (0, 0) => Rgba([255u8, 48, 48, 255]),
3695            (1, 0) => Rgba([48u8, 255, 48, 255]),
3696            (0, 1) => Rgba([48u8, 48, 255, 255]),
3697            _ => Rgba([255u8, 255, 64, 255]),
3698        });
3699        texture
3700            .save(object_dir.join("texture_map.png"))
3701            .expect("write synthetic texture");
3702
3703        temp_dir
3704    }
3705
3706    #[test]
3707    fn persistent_warmup_camera_is_a_real_viewpoint() {
3708        let transform = persistent_warmup_camera_transform();
3709        assert!(
3710            transform.translation.length() > 0.1,
3711            "persistent warmup must not place the camera at the object origin"
3712        );
3713
3714        let forward = transform.rotation * Vec3::NEG_Z;
3715        let to_origin = -transform.translation.normalize();
3716        assert!(
3717            forward.dot(to_origin) > 0.99,
3718            "persistent warmup camera should look at the object origin"
3719        );
3720    }
3721
3722    #[test]
3723    #[ignore = "headless throughput smoke check is opt-in because it needs a local render backend"]
3724    fn test_headless_batch_throughput_smoke() {
3725        crate::initialize();
3726        reset_headless_scene_setup_count();
3727
3728        let object_root = write_synthetic_object();
3729        let object_dir = object_root.path().join("synthetic_cube");
3730        let viewpoints = crate::generate_viewpoints(&ViewpointConfig::default());
3731        let request_count = 5usize;
3732        let config = RenderConfig::tbp_default();
3733
3734        let requests: Vec<_> = viewpoints
3735            .iter()
3736            .take(request_count)
3737            .copied()
3738            .map(|viewpoint| BatchRenderRequest {
3739                object_dir: object_dir.clone(),
3740                viewpoint,
3741                object_rotation: ObjectRotation::identity(),
3742                object_translation: Vec3::ZERO,
3743                object_scale: Vec3::ONE,
3744                render_config: config.clone(),
3745                target_point: Vec3::ZERO,
3746                targeting_policy: TargetingPolicy::Origin,
3747            })
3748            .collect();
3749
3750        let start = std::time::Instant::now();
3751        let outputs = crate::render_batch(requests, &BatchRenderConfig::default())
3752            .expect("synthetic headless batch render should succeed");
3753        let elapsed = start.elapsed();
3754
3755        assert_eq!(outputs.len(), request_count);
3756        // This is the deterministic churn signal for the smoke check. Adapter log lines vary by
3757        // backend and logging config, but a homogeneous batch should still set up headless scene
3758        // state exactly once.
3759        assert_eq!(
3760            headless_scene_setup_count(),
3761            1,
3762            "homogeneous batch smoke check should reuse one headless app setup"
3763        );
3764
3765        for (idx, output) in outputs.iter().enumerate() {
3766            assert_eq!(output.width, config.width, "output {idx} width mismatch");
3767            assert_eq!(output.height, config.height, "output {idx} height mismatch");
3768            assert_eq!(
3769                output.rgba.len(),
3770                (config.width * config.height * 4) as usize,
3771                "output {idx} rgba size mismatch"
3772            );
3773            assert_eq!(
3774                output.depth.len(),
3775                (config.width * config.height) as usize,
3776                "output {idx} depth size mismatch"
3777            );
3778            assert!(
3779                output
3780                    .rgba
3781                    .chunks_exact(4)
3782                    .any(|px| px[0] != 0 || px[1] != 0 || px[2] != 0),
3783                "output {idx} should contain visible color"
3784            );
3785        }
3786
3787        // Acceptance target: under llvmpipe-class CPU rendering, five 64x64 captures should
3788        // finish in under 8s. Much slower runs usually mean we reintroduced per-capture app
3789        // churn or another headless startup regression.
3790        assert!(
3791            elapsed < std::time::Duration::from_secs(8),
3792            "5 synthetic headless captures took {:.2}s, expected < 8.0s",
3793            elapsed.as_secs_f64()
3794        );
3795    }
3796}