Skip to main content

bevy_sensor/
render.rs

1//! Headless rendering implementation using Bevy.
2//!
3//! This module provides two rendering modes:
4//!
5//! 1. **Headless mode** (default): Renders to an image texture without requiring
6//!    a window or display. Works on WSL2, CI servers, and any environment without
7//!    GPU windowing support.
8//!
9//! 2. **Windowed mode** (fallback): Uses a visible window for rendering when
10//!    headless mode fails. Requires a display (X11/Wayland).
11//!
12//! # Current Status
13//!
14//! - **RGBA**: Working via render-to-texture + GPU readback
15//! - **Depth**: Working via ViewDepthTexture + reverse-Z conversion
16//!
17//! # Headless Rendering Architecture
18//!
19//! The headless renderer:
20//! 1. Creates a Bevy app without window plugins (uses ScheduleRunnerPlugin)
21//! 2. Sets up a render-to-texture pipeline with RenderTarget::Image
22//! 3. Extracts RGBA data via ImageCopyDriver
23//! 4. Extracts depth via DepthReadbackNode
24//!
25//! # Running Requirements
26//!
27//! Headless mode should work without any display. For windowed fallback:
28//! ```bash
29//! DISPLAY=:0 cargo run --example test_render
30//! ```
31//!
32//! # Architecture Notes
33//!
34//! Bevy's `App::run()` does not return cleanly in all configurations. This
35//! implementation uses a watchdog thread that monitors for completion and
36//! calls `std::process::exit(0)` once the render output is serialized to
37//! a temp file. The main thread reads this file after the process would
38//! normally exit.
39
40use bevy::app::{ScheduleRunnerPlugin, TerminalCtrlCHandlerPlugin};
41use bevy::asset::{LoadState, RenderAssetUsages};
42use bevy::camera::RenderTarget;
43use bevy::core_pipeline::prepass::{DepthPrepass, NormalPrepass};
44use bevy::core_pipeline::tonemapping::Tonemapping;
45use bevy::ecs::query::QueryItem;
46use bevy::light::GlobalAmbientLight;
47use bevy::log::LogPlugin;
48use bevy::prelude::*;
49use bevy::render::camera::ExtractedCamera;
50use bevy::render::render_asset::RenderAssets;
51use bevy::render::render_graph::{
52    Node, NodeRunError, RenderGraphContext, RenderGraphExt, RenderLabel, ViewNode, ViewNodeRunner,
53};
54use bevy::render::render_resource::{
55    Buffer, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, Extent3d, MapMode, Origin3d,
56    TexelCopyBufferInfo, TexelCopyBufferLayout, TexelCopyTextureInfo, TextureAspect,
57    TextureDimension, TextureFormat, TextureUsages,
58};
59use bevy::render::renderer::RenderQueue;
60use bevy::render::renderer::{RenderContext, RenderDevice};
61use bevy::render::texture::GpuImage;
62use bevy::render::view::screenshot::{Screenshot, ScreenshotCaptured};
63use bevy::render::view::{ExtractedView, Hdr, ViewDepthTexture};
64use bevy::render::{Extract, Render, RenderApp, RenderSystems};
65use bevy::window::{ExitCondition, WindowPlugin};
66use bevy_obj::ObjPlugin;
67use std::fs::File;
68use std::io::Read as IoRead;
69use std::path::{Path, PathBuf};
70#[cfg(test)]
71use std::sync::atomic::{AtomicUsize, Ordering};
72use std::sync::{Arc, Mutex, OnceLock};
73use std::time::Duration;
74
75use crate::{
76    backend::BackendConfig, ObjectRotation, RenderConfig, RenderError, RenderOutput,
77    TargetingPolicy,
78};
79use ycbust::{GOOGLE_16K_MESH_RELATIVE, GOOGLE_16K_TEXTURE_RELATIVE};
80
81/// Watchdog timeout for a single render, in seconds.
82///
83/// Bounds how long any single render path waits before declaring failure.
84/// 180s accommodates first-run wgpu shader compilation on Windows, which
85/// can take well over 60s on a cold GPU cache (see commit 9cd1d11).
86const RENDER_TIMEOUT_SECS: u64 = 180;
87
88/// Warmup frames after each camera move in `render_headless_sequence`.
89///
90/// After writing a new camera `Transform`, Bevy needs at least one frame for
91/// transform propagation + render-world extract before the next capture is
92/// valid. Historically set to 3 as a conservative cushion; reducing directly
93/// shortens per-viewpoint wall-clock since `app.update()` in the batch path
94/// is not rate-limited. Validated against the pixel-exact hardware test
95/// `test_batch_render_matches_sequential_episode_outputs`.
96const BATCH_WARMUP_FRAMES: u32 = 1;
97
98/// Warmup frames at the start of each `PersistentRenderer::render()` call.
99///
100/// `BATCH_WARMUP_FRAMES = 1` works for inter-viewpoint advancement inside a
101/// batch because `extract_and_continue_headless_batch` writes the next
102/// camera transform *and* clears the shared GPU readback buffers in the
103/// same tick — so the in-flight copy from the previous viewpoint has
104/// already drained by the time the next capture is gated.
105///
106/// In the persistent per-call path, the previous render's output may still
107/// be sitting in `shared_rgba`/`shared_depth` (we clear them before the
108/// loop, but the pipeline still needs ticks to propagate the new camera/
109/// scene-rotation `Transform` writes through `PostUpdate` →
110/// `transform_propagate` → `Extract` → render graph → `ImageCopyDriver`
111/// before the capture we request actually reflects the new transforms.
112///
113/// Validated by `test_persistent_renderer_matches_render_to_buffer`. Three
114/// ticks of warmup gives Windows/DX12 enough room to drain the previous
115/// readback and capture the post-propagation color target:
116///   - tick 0: transforms propagate, render runs (no copy enabled)
117///   - tick 1: previous in-flight readback drains (no copy enabled)
118///   - tick 2: warmup hits 0, capture fires, render runs with copy enabled
119///   - tick 3: shared buffers populated → captured → batch finalized
120const PERSISTENT_WARMUP_FRAMES: u32 = 3;
121
122/// Check the render-trace env var. Cheap enough (single HashMap lookup) to call
123/// from per-frame systems; gate all tracing output behind this.
124#[inline]
125fn render_trace_enabled() -> bool {
126    std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok()
127}
128
129/// Convert a filesystem path into a Bevy asset-path string.
130///
131/// `std::fs::canonicalize` on Windows returns a `\\?\C:\...` verbatim-prefixed
132/// path. Bevy's `AssetPath` parser cannot handle that prefix, so the asset
133/// would silently never load. Strip the verbatim prefix and normalize
134/// separators to `/` so the absolute path resolves through the default file
135/// asset source on every platform.
136fn fs_path_to_asset_string(path: &std::path::Path) -> String {
137    let s = path.display().to_string();
138    let s = s.strip_prefix(r"\\?\").map(str::to_string).unwrap_or(s);
139    s.replace('\\', "/")
140}
141
142/// Check if a display is available for windowed rendering.
143///
144/// Returns true if DISPLAY or WAYLAND_DISPLAY environment variable is set.
145#[allow(dead_code)]
146fn display_available() -> bool {
147    std::env::var("DISPLAY").is_ok() || std::env::var("WAYLAND_DISPLAY").is_ok()
148}
149
150/// Check if we're running on WSL2 (which doesn't support Vulkan window surfaces).
151#[allow(dead_code)]
152fn is_wsl2() -> bool {
153    if let Ok(version) = std::fs::read_to_string("/proc/version") {
154        return version.to_lowercase().contains("microsoft")
155            || version.to_lowercase().contains("wsl");
156    }
157    false
158}
159
160/// Internal state for tracking render progress
161#[derive(Resource, Default)]
162struct RenderState {
163    frame_count: u32,
164    scene_loaded: bool,
165    texture_loaded: bool,
166    materials_applied: bool,
167    /// `frame_count` at the moment materials were applied; used to gate
168    /// `capture_ready` on N frames of render-graph propagation rather than
169    /// a legacy llvmpipe-era 60-frame wait.
170    materials_applied_frame: u32,
171    /// `frame_count` when the texture finished loading. Capture waits a small
172    /// margin past this for GPU image preparation. The material (and therefore
173    /// the main-pass pipeline) is applied earlier, so by the time the texture is
174    /// ready the pipeline has already compiled.
175    texture_ready_frame: u32,
176    capture_ready: bool,
177    screenshot_requested: bool,
178    /// Number of frames spent waiting for a *valid* (non-blank / valid-depth)
179    /// readback. The one-shot GPU capture is nondeterministic and occasionally
180    /// reads a uniform clear-color frame; we reject those and keep capturing
181    /// until a real frame lands, bounded by this counter.
182    capture_retries: u32,
183    /// Previous frame's RGBA readback. The capture is accepted only once two
184    /// consecutive readbacks are identical (the render has settled), so partial
185    /// in-progress frames aren't captured and every render path yields the same
186    /// fully-drawn image (required for byte-exact cross-path parity).
187    prev_rgba: Option<Vec<u8>>,
188    /// Previous frame's depth readback, for the same settle-detection as
189    /// `prev_rgba` (depth parity is asserted to ~1e-9, i.e. bit-exact).
190    prev_depth: Option<Vec<f64>>,
191    captured: bool,
192    exit_requested: bool,
193    #[allow(dead_code)]
194    exit_frame_count: u32,
195    rgba_data: Option<Vec<u8>>,
196    depth_data: Option<Vec<f64>>,
197    image_width: u32,
198    image_height: u32,
199}
200
201#[cfg(test)]
202static HEADLESS_SCENE_SETUP_COUNT: AtomicUsize = AtomicUsize::new(0);
203
204#[cfg(test)]
205fn reset_headless_scene_setup_count() {
206    HEADLESS_SCENE_SETUP_COUNT.store(0, Ordering::SeqCst);
207}
208
209#[cfg(test)]
210fn headless_scene_setup_count() -> usize {
211    HEADLESS_SCENE_SETUP_COUNT.load(Ordering::SeqCst)
212}
213
214/// Shared buffer for screenshot callback to write into
215#[derive(Resource, Clone)]
216#[allow(clippy::type_complexity)]
217#[allow(dead_code)]
218struct SharedImageBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
219
220/// Shared buffer for depth data from GPU readback
221/// Contains: (linear_depth_values, width, height)
222/// Uses f64 for TBP numerical precision compatibility.
223#[derive(Resource, Clone, Default)]
224#[allow(clippy::type_complexity)]
225struct SharedDepthBuffer(Arc<Mutex<Option<(Vec<f64>, u32, u32)>>>);
226
227// ============================================================================
228// Depth Readback Infrastructure
229// ============================================================================
230
231/// Request to capture depth - extracted from main world to render world
232#[derive(Resource, Default, Clone)]
233struct DepthCaptureRequest {
234    requested: bool,
235    near: f32,
236    far: f32,
237}
238
239/// Pending depth capture info for async processing.
240///
241/// `m22`/`m32` are the relevant entries of the view's reverse-Z projection
242/// matrix (`clip_from_view`), captured at copy time so the CPU-side
243/// linearization matches the exact projection the GPU rendered with. This keeps
244/// depth output robust if projection construction or backend behavior changes.
245struct PendingDepthCapture {
246    buffer: Buffer,
247    width: u32,
248    height: u32,
249    m22: f32,
250    m32: f32,
251    far: f32,
252}
253
254fn render_projection(config: &RenderConfig) -> Projection {
255    let near = config.near_plane;
256    Projection::Perspective(PerspectiveProjection {
257        fov: config.fov_radians(),
258        near,
259        far: config.far_plane,
260        near_clip_plane: Vec4::new(0.0, 0.0, -1.0, -near),
261        ..default()
262    })
263}
264
265/// Queue for pending depth captures (written by render node, read by cleanup system)
266#[derive(Resource, Default)]
267struct PendingDepthCaptureQueue(Arc<Mutex<Vec<PendingDepthCapture>>>);
268
269#[cfg(test)]
270mod projection_tests {
271    use super::*;
272
273    #[test]
274    fn render_projection_uses_configured_near_plane_for_effective_clip_matrix() {
275        let mut config = RenderConfig::tbp_default();
276        config.near_plane = 0.025;
277        config.far_plane = 12.0;
278
279        let projection = render_projection(&config);
280        let Projection::Perspective(perspective) = &projection else {
281            panic!("render_projection should create a perspective projection");
282        };
283
284        assert_eq!(perspective.near, config.near_plane);
285        assert_eq!(
286            perspective.near_clip_plane,
287            Vec4::new(0.0, 0.0, -1.0, -config.near_plane)
288        );
289        assert_eq!(perspective.far, config.far_plane);
290
291        let clip_from_view = projection.get_clip_from_view();
292        assert!(
293            (clip_from_view.w_axis.z - config.near_plane).abs() < 1e-6,
294            "reverse-Z projection matrix should encode configured near plane; got {}",
295            clip_from_view.w_axis.z
296        );
297    }
298}
299
300// ============================================================================
301// Depth Buffer Helpers
302// ============================================================================
303
304mod depth_helpers {
305    /// wgpu requires buffer row alignment of 256 bytes
306    pub const COPY_BYTES_PER_ROW_ALIGNMENT: u32 = 256;
307
308    /// Align byte size to wgpu's COPY_BYTES_PER_ROW_ALIGNMENT
309    pub fn align_byte_size(value: u32) -> u32 {
310        let remainder = value % COPY_BYTES_PER_ROW_ALIGNMENT;
311        if remainder == 0 {
312            value
313        } else {
314            value + (COPY_BYTES_PER_ROW_ALIGNMENT - remainder)
315        }
316    }
317
318    /// Calculate aligned buffer size for an image
319    #[allow(dead_code)]
320    pub fn get_aligned_size(width: u32, height: u32, pixel_size: u32) -> u32 {
321        height * align_byte_size(width * pixel_size)
322    }
323
324    /// Convert reverse-Z NDC depth to linear depth in meters.
325    ///
326    /// Bevy uses reverse-Z depth buffer: near plane maps to depth=1, far plane to depth=0.
327    /// This provides better precision for distant objects.
328    ///
329    /// Formula derivation:
330    /// - At near plane (z = near): ndc = 1
331    /// - At far plane (z = far): ndc = 0
332    /// - linear = far / (1 + ndc * (far/near - 1))
333    ///
334    /// Superseded in the render path by [`ndc_to_linear_with_matrix`], which
335    /// reads the actual projection near from the view matrix instead of trusting
336    /// a passed-in near (the source of the #92 10x depth error). Retained for its
337    /// tests and as a reference formula.
338    #[allow(dead_code)]
339    pub fn reverse_z_to_linear_depth(ndc_depth: f32, near: f32, far: f32) -> f32 {
340        // Handle edge cases
341        if ndc_depth <= 0.0 {
342            return far; // Background (infinite distance in reverse-Z)
343        }
344        if ndc_depth >= 1.0 {
345            return near; // At or beyond near plane
346        }
347        // Reverse-Z formula: linear = far / (1 + ndc * (far/near - 1))
348        far / (1.0 + ndc_depth * (far / near - 1.0))
349    }
350
351    /// Extract depth values from aligned buffer, handling row padding
352    pub fn extract_depth_with_alignment(data: &[u8], width: u32, height: u32) -> Vec<f32> {
353        let pixel_size = 4u32; // f32 = 4 bytes
354        let aligned_row_bytes = align_byte_size(width * pixel_size) as usize;
355        let actual_row_bytes = (width * pixel_size) as usize;
356
357        let mut depth_values = Vec::with_capacity((width * height) as usize);
358
359        for y in 0..height as usize {
360            let row_start = y * aligned_row_bytes;
361            let row_data = &data[row_start..row_start + actual_row_bytes];
362
363            for x in 0..width as usize {
364                let offset = x * 4;
365                let bytes: [u8; 4] = row_data[offset..offset + 4].try_into().unwrap();
366                let depth_value = f32::from_le_bytes(bytes);
367                depth_values.push(depth_value);
368            }
369        }
370
371        depth_values
372    }
373
374    /// Convert all NDC depth values to linear meters (as f64 for TBP precision).
375    /// Superseded by [`convert_depth_to_linear_with_matrix`]; retained for tests.
376    #[allow(dead_code)]
377    pub fn convert_depth_to_linear(raw_depth: &[f32], near: f32, far: f32) -> Vec<f64> {
378        raw_depth
379            .iter()
380            .map(|&ndc| reverse_z_to_linear_depth(ndc, near, far) as f64)
381            .collect()
382    }
383
384    /// Linearize a reverse-Z NDC depth using the view's actual projection matrix,
385    /// rather than a hand-supplied near/far.
386    ///
387    /// For a perspective right-handed projection, the relevant clip-space rows are
388    /// `clip_z = m22 * z + m32` and `clip_w = -z` (camera looks down -Z), so
389    /// `ndc = clip_z / clip_w = (m22*z + m32) / (-z)`. Solving for the positive
390    /// view-space distance `d = -z` gives **`d = m32 / (ndc + m22)`**. This holds
391    /// for both finite and infinite reverse-Z and is correct regardless of which
392    /// near plane the renderer actually used — the previous fixed-near formula
393    /// produced depths 10x too small when the effective projection near plane
394    /// drifted from `RenderConfig::near_plane` (issue #86/#92/#95).
395    ///
396    /// `m22 = clip_from_view[col=2][row=2]`, `m32 = clip_from_view[col=3][row=2]`.
397    /// `ndc <= 0` is the reverse-Z far plane (background) and maps to `far`.
398    pub fn ndc_to_linear_with_matrix(ndc: f32, m22: f32, m32: f32, far: f32) -> f32 {
399        if ndc <= 0.0 {
400            return far; // background / at-or-beyond far plane in reverse-Z
401        }
402        let denom = ndc + m22;
403        if denom.abs() <= f32::EPSILON {
404            return far;
405        }
406        let linear = m32 / denom;
407        if !linear.is_finite() || linear <= 0.0 {
408            far
409        } else {
410            linear.min(far)
411        }
412    }
413
414    /// Convert all NDC depth values to linear meters using the view projection
415    /// matrix (f64 for TBP precision). See [`ndc_to_linear_with_matrix`].
416    pub fn convert_depth_to_linear_with_matrix(
417        raw_depth: &[f32],
418        m22: f32,
419        m32: f32,
420        far: f32,
421    ) -> Vec<f64> {
422        raw_depth
423            .iter()
424            .map(|&ndc| ndc_to_linear_with_matrix(ndc, m22, m32, far) as f64)
425            .collect()
426    }
427
428    #[cfg(test)]
429    mod tests {
430        use super::*;
431
432        #[test]
433        fn test_align_byte_size() {
434            assert_eq!(align_byte_size(256), 256);
435            assert_eq!(align_byte_size(257), 512);
436            assert_eq!(align_byte_size(1), 256);
437            assert_eq!(align_byte_size(512), 512);
438            assert_eq!(align_byte_size(0), 0);
439        }
440
441        #[test]
442        fn test_reverse_z_to_linear_depth() {
443            let near = 0.01;
444            let far = 10.0;
445
446            // Near plane (ndc=1 in reverse-Z)
447            let linear_near = reverse_z_to_linear_depth(1.0, near, far);
448            assert!((linear_near - near).abs() < 0.001);
449
450            // Mid-range depth (ndc=0.5 should give geometric mean area)
451            let linear_mid = reverse_z_to_linear_depth(0.5, near, far);
452            // At ndc=0.5: linear = 10 / (1 + 0.5 * (1000-1)) = 10 / 500.5 ≈ 0.02
453            assert!(linear_mid > near && linear_mid < far);
454
455            // Very close to far plane (ndc very small)
456            let linear_almost_far = reverse_z_to_linear_depth(0.0001, near, far);
457            // At ndc=0.0001: linear = 10 / (1 + 0.0001 * 999) ≈ 10 / 1.0999 ≈ 9.09
458            assert!(linear_almost_far > 9.0);
459
460            // Background (ndc=0)
461            let background = reverse_z_to_linear_depth(0.0, near, far);
462            assert_eq!(background, far);
463        }
464
465        #[test]
466        fn test_extract_depth_with_alignment() {
467            // 2x2 image, 4 bytes per pixel
468            // Aligned row = 256 bytes, but actual = 8 bytes
469            let width = 2u32;
470            let height = 2u32;
471
472            let mut data = vec![0u8; 256 * 2]; // 2 aligned rows
473
474            // Write test depth values
475            // Row 0: [0.5, 0.6]
476            data[0..4].copy_from_slice(&0.5f32.to_le_bytes());
477            data[4..8].copy_from_slice(&0.6f32.to_le_bytes());
478            // Row 1: [0.7, 0.8]
479            data[256..260].copy_from_slice(&0.7f32.to_le_bytes());
480            data[260..264].copy_from_slice(&0.8f32.to_le_bytes());
481
482            let depth = extract_depth_with_alignment(&data, width, height);
483            assert_eq!(depth.len(), 4);
484            assert!((depth[0] - 0.5).abs() < 0.001);
485            assert!((depth[1] - 0.6).abs() < 0.001);
486            assert!((depth[2] - 0.7).abs() < 0.001);
487            assert!((depth[3] - 0.8).abs() < 0.001);
488        }
489
490        #[test]
491        fn test_reverse_z_depth_at_near_plane() {
492            // Near plane should give near value
493            let near = 0.01;
494            let far = 100.0;
495            let depth = reverse_z_to_linear_depth(1.0, near, far);
496            assert!((depth - near).abs() < 0.0001);
497        }
498
499        #[test]
500        fn test_reverse_z_depth_at_far_plane() {
501            // Far plane (ndc=0) should give far value
502            let near = 0.01;
503            let far = 100.0;
504            let depth = reverse_z_to_linear_depth(0.0, near, far);
505            assert!((depth - far).abs() < 0.0001);
506        }
507
508        #[test]
509        fn test_reverse_z_monotonic() {
510            // Depth should increase as NDC decreases (reverse-Z)
511            let near = 0.01;
512            let far = 10.0;
513
514            let mut prev_depth = 0.0;
515            for i in (0..=100).rev() {
516                let ndc = i as f32 / 100.0;
517                let depth = reverse_z_to_linear_depth(ndc, near, far);
518                assert!(
519                    depth >= prev_depth,
520                    "Depth should be monotonic: ndc={}, depth={}, prev={}",
521                    ndc,
522                    depth,
523                    prev_depth
524                );
525                prev_depth = depth;
526            }
527        }
528
529        #[test]
530        fn test_ndc_to_linear_with_matrix_infinite_reverse_z() {
531            // Infinite reverse-Z (Bevy `perspective_infinite_reverse_rh`):
532            // m22 = 0, m32 = near. d = near / ndc.
533            let (m22, m32, far) = (0.0f32, 0.1f32, 10.0f32);
534
535            // The exact regression from #92: ndc 0.366504 must linearize to
536            // ~0.273 m (near 0.1), NOT ~0.027 m (the old fixed near = 0.01).
537            let d = ndc_to_linear_with_matrix(0.366504, m22, m32, far);
538            assert!((d as f64 - 0.272849).abs() < 1e-4, "got {d}");
539
540            // Background (reverse-Z far plane) and clamping.
541            assert_eq!(ndc_to_linear_with_matrix(0.0, m22, m32, far), far);
542            assert_eq!(ndc_to_linear_with_matrix(-0.5, m22, m32, far), far);
543            // Very small ndc -> very far -> clamped to far.
544            assert_eq!(ndc_to_linear_with_matrix(1e-9, m22, m32, far), far);
545        }
546
547        #[test]
548        fn test_ndc_to_linear_with_matrix_finite_reverse_z() {
549            // Finite reverse-Z maps near->ndc 1, far->ndc 0. Construct the matrix
550            // entries for near=0.5, far=20: m22 = near/(far-near), m32 = far*m22.
551            let (near, far) = (0.5f32, 20.0f32);
552            let m22 = near / (far - near);
553            let m32 = far * m22;
554            // ndc = 1 -> near; ndc = 0 -> far (background sentinel also returns far).
555            assert!((ndc_to_linear_with_matrix(1.0, m22, m32, far) - near).abs() < 1e-4);
556            assert_eq!(ndc_to_linear_with_matrix(0.0, m22, m32, far), far);
557        }
558
559        #[test]
560        fn test_convert_depth_to_linear_batch() {
561            let near = 0.01f32;
562            let far = 10.0f32;
563            let ndc_depths = vec![1.0f32, 0.5, 0.1, 0.0];
564
565            let linear = convert_depth_to_linear(&ndc_depths, near, far);
566
567            assert_eq!(linear.len(), 4);
568            // Near plane
569            assert!((linear[0] - near as f64).abs() < 0.001);
570            // Far plane
571            assert!((linear[3] - far as f64).abs() < 0.001);
572            // All should be in range [near, far]
573            for d in &linear {
574                assert!(*d >= near as f64 && *d <= far as f64);
575            }
576        }
577
578        #[test]
579        fn test_align_byte_size_edge_cases() {
580            // Powers of two should stay the same if multiple of 256
581            assert_eq!(align_byte_size(256), 256);
582            assert_eq!(align_byte_size(512), 512);
583            assert_eq!(align_byte_size(1024), 1024);
584
585            // Just under 256 should round up to 256
586            assert_eq!(align_byte_size(255), 256);
587            assert_eq!(align_byte_size(128), 256);
588
589            // Just over 256 should round up to 512
590            assert_eq!(align_byte_size(300), 512);
591        }
592
593        #[test]
594        fn test_extract_depth_64x64() {
595            // Test with TBP default resolution
596            let width = 64u32;
597            let height = 64u32;
598            let bytes_per_pixel = 4u32;
599            let padded_row = align_byte_size(width * bytes_per_pixel);
600
601            // Create aligned buffer
602            let mut data = vec![0u8; (padded_row * height) as usize];
603
604            // Fill with incrementing values
605            for y in 0..height {
606                for x in 0..width {
607                    let value = (y * width + x) as f32 / (width * height) as f32;
608                    let offset = (y * padded_row + x * bytes_per_pixel) as usize;
609                    data[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
610                }
611            }
612
613            let depth = extract_depth_with_alignment(&data, width, height);
614            assert_eq!(depth.len(), (width * height) as usize);
615
616            // Verify first and last values
617            assert!((depth[0] - 0.0).abs() < 0.001);
618            let expected_last = (width * height - 1) as f32 / (width * height) as f32;
619            assert!((depth[(width * height - 1) as usize] - expected_last).abs() < 0.001);
620        }
621    }
622}
623
624// ============================================================================
625// Depth Readback Render Node
626// ============================================================================
627
628/// Label for the depth readback render graph node.
629#[derive(Debug, Hash, PartialEq, Eq, Clone, bevy::render::render_graph::RenderLabel)]
630struct DepthReadbackLabel;
631
632/// Render node that copies the main camera's depth texture to a staging buffer.
633/// This runs after the main pass completes, using ViewDepthTexture.
634#[derive(Default)]
635struct DepthReadbackNode;
636
637impl ViewNode for DepthReadbackNode {
638    type ViewQuery = (
639        &'static ViewDepthTexture,
640        &'static ExtractedCamera,
641        &'static ExtractedView,
642    );
643
644    fn run<'w>(
645        &self,
646        _graph: &mut RenderGraphContext,
647        render_context: &mut RenderContext<'w>,
648        (view_depth_texture, camera, view): QueryItem<'w, '_, Self::ViewQuery>,
649        world: &'w World,
650    ) -> Result<(), NodeRunError> {
651        let trace = render_trace_enabled();
652        let t0 = trace.then(std::time::Instant::now);
653
654        // Check if depth capture is requested
655        let Some(request) = world.get_resource::<DepthCaptureRequest>() else {
656            return Ok(());
657        };
658        if !request.requested {
659            return Ok(());
660        }
661
662        // Get the pending queue
663        let Some(queue) = world.get_resource::<PendingDepthCaptureQueue>() else {
664            return Ok(());
665        };
666
667        // Get texture size from camera viewport or physical size
668        let Some(physical_size) = camera.physical_target_size else {
669            return Ok(());
670        };
671        let width = physical_size.x;
672        let height = physical_size.y;
673
674        let render_device = world.resource::<RenderDevice>();
675
676        // Calculate aligned buffer size (wgpu requires 256-byte row alignment)
677        let bytes_per_pixel = 4u32; // f32 = 4 bytes (Depth32Float)
678        let unpadded_bytes_per_row = width * bytes_per_pixel;
679        let padded_bytes_per_row = depth_helpers::align_byte_size(unpadded_bytes_per_row);
680        let buffer_size = (padded_bytes_per_row * height) as u64;
681
682        // Create staging buffer for CPU readback
683        let staging_buffer = render_device.create_buffer(&BufferDescriptor {
684            label: Some("depth_staging_buffer"),
685            size: buffer_size,
686            usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
687            mapped_at_creation: false,
688        });
689
690        // Copy depth texture to staging buffer
691        let encoder = render_context.command_encoder();
692        encoder.copy_texture_to_buffer(
693            TexelCopyTextureInfo {
694                texture: &view_depth_texture.texture,
695                mip_level: 0,
696                origin: Origin3d::ZERO,
697                aspect: TextureAspect::DepthOnly,
698            },
699            TexelCopyBufferInfo {
700                buffer: &staging_buffer,
701                layout: TexelCopyBufferLayout {
702                    offset: 0,
703                    bytes_per_row: Some(padded_bytes_per_row),
704                    rows_per_image: Some(height),
705                },
706            },
707            Extent3d {
708                width,
709                height,
710                depth_or_array_layers: 1,
711            },
712        );
713
714        // Push to queue for async processing (queue is Arc<Mutex<Vec>>).
715        // Capture the projection-matrix entries used for linearization: for a
716        // perspective RH matrix, clip_z = m22*z + m32 and clip_w = -z, so the
717        // positive view-space distance is d = m32 / (ndc + m22).
718        let clip_from_view = view.clip_from_view;
719        if let Ok(mut pending) = queue.0.lock() {
720            pending.push(PendingDepthCapture {
721                buffer: staging_buffer,
722                width,
723                height,
724                m22: clip_from_view.z_axis.z,
725                m32: clip_from_view.w_axis.z,
726                far: request.far,
727            });
728        }
729
730        if let Some(t0) = t0 {
731            eprintln!(
732                "[render_trace][node] DepthReadbackNode ms={:.3}",
733                t0.elapsed().as_secs_f64() * 1000.0
734            );
735        }
736
737        Ok(())
738    }
739}
740
741// ============================================================================
742// Depth Readback Plugin
743// ============================================================================
744
745/// Plugin that sets up depth buffer readback from the GPU.
746struct DepthReadbackPlugin {
747    shared_depth: SharedDepthBuffer,
748    near: f32,
749    far: f32,
750}
751
752impl Plugin for DepthReadbackPlugin {
753    fn build(&self, app: &mut App) {
754        use bevy::core_pipeline::core_3d::graph::Core3d;
755        use bevy::core_pipeline::core_3d::graph::Node3d;
756
757        // Insert shared depth buffer in main app
758        app.insert_resource(self.shared_depth.clone());
759        app.insert_resource(DepthCaptureRequest {
760            requested: false,
761            near: self.near,
762            far: self.far,
763        });
764
765        // Get render app
766        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
767            eprintln!("Failed to get RenderApp for depth readback");
768            return;
769        };
770
771        // Insert resources in render world
772        render_app.insert_resource(self.shared_depth.clone());
773        render_app.init_resource::<PendingDepthCaptureQueue>();
774
775        // Add extraction system to copy request from main world
776        render_app.add_systems(ExtractSchedule, extract_depth_request);
777
778        // Add system to process completed depth captures
779        render_app.add_systems(
780            Render,
781            collect_depth_captures.in_set(RenderSystems::Cleanup),
782        );
783
784        // Register the depth readback node in the render graph
785        // Run after main pass completes (depth buffer is ready) but before tonemapping
786        render_app
787            .add_render_graph_node::<ViewNodeRunner<DepthReadbackNode>>(Core3d, DepthReadbackLabel)
788            .add_render_graph_edges(
789                Core3d,
790                (Node3d::EndMainPass, DepthReadbackLabel, Node3d::Tonemapping),
791            );
792    }
793}
794
795/// Extract depth capture request from main world to render world
796fn extract_depth_request(mut commands: Commands, request: Extract<Res<DepthCaptureRequest>>) {
797    commands.insert_resource(DepthCaptureRequest {
798        requested: request.requested,
799        near: request.near,
800        far: request.far,
801    });
802}
803
804/// Process completed depth buffer captures (synchronous GPU-to-CPU readback with device polling)
805fn collect_depth_captures(
806    queue: Res<PendingDepthCaptureQueue>,
807    shared_depth: Res<SharedDepthBuffer>,
808    render_device: Res<RenderDevice>,
809) {
810    let trace = render_trace_enabled();
811    let t_sys = trace.then(std::time::Instant::now);
812
813    // Take all pending captures from the queue
814    let pending_captures = {
815        let Ok(mut pending) = queue.0.lock() else {
816            return;
817        };
818        std::mem::take(&mut *pending)
819    };
820
821    if pending_captures.is_empty() {
822        if let Some(t0) = t_sys {
823            eprintln!(
824                "[render_trace][sys] collect_depth_captures empty ms={:.3}",
825                t0.elapsed().as_secs_f64() * 1000.0
826            );
827        }
828        return;
829    }
830
831    let pending_count = pending_captures.len();
832
833    // Process each pending capture synchronously with device polling
834    for pending in pending_captures {
835        let width = pending.width;
836        let height = pending.height;
837        let m22 = pending.m22;
838        let m32 = pending.m32;
839        let far = pending.far;
840        let buffer = pending.buffer;
841        let shared = shared_depth.0.clone();
842
843        // Use blocking sync approach with device polling (same as RGBA capture)
844        let buffer_slice = buffer.slice(..);
845
846        // Request mapping
847        let (tx, rx) = std::sync::mpsc::channel();
848        buffer_slice.map_async(MapMode::Read, move |result| {
849            let _ = tx.send(result);
850        });
851
852        let t_wait = trace.then(std::time::Instant::now);
853        let mut poll_iters: u32 = 0;
854
855        // Poll the device until mapping completes
856        loop {
857            let _ =
858                render_device.poll(bevy::render::render_resource::PollType::wait_indefinitely());
859            poll_iters += 1;
860            match rx.try_recv() {
861                Ok(Ok(())) => {
862                    let data = buffer_slice.get_mapped_range();
863
864                    // Extract depth values with alignment handling
865                    let ndc_depth =
866                        depth_helpers::extract_depth_with_alignment(&data, width, height);
867
868                    drop(data);
869                    buffer.unmap();
870
871                    // Convert reverse-Z NDC to linear depth (meters) using the
872                    // view's actual projection matrix entries. See
873                    // `convert_depth_to_linear_with_matrix`.
874                    let linear_depth = depth_helpers::convert_depth_to_linear_with_matrix(
875                        &ndc_depth, m22, m32, far,
876                    );
877
878                    // Store in shared buffer
879                    if let Ok(mut guard) = shared.lock() {
880                        *guard = Some((linear_depth, width, height));
881                    }
882                    break;
883                }
884                Ok(Err(e)) => {
885                    eprintln!("Failed to map depth buffer: {:?}", e);
886                    break;
887                }
888                Err(std::sync::mpsc::TryRecvError::Empty) => {
889                    // Keep polling
890                    std::thread::sleep(std::time::Duration::from_millis(1));
891                }
892                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
893                    eprintln!("Depth buffer mapping channel disconnected");
894                    break;
895                }
896            }
897        }
898
899        if let Some(t_wait) = t_wait {
900            eprintln!(
901                "[render_trace][sys] collect_depth_captures mapping_wait poll_iters={} ms={:.3}",
902                poll_iters,
903                t_wait.elapsed().as_secs_f64() * 1000.0
904            );
905        }
906    }
907
908    if let Some(t0) = t_sys {
909        eprintln!(
910            "[render_trace][sys] collect_depth_captures done pending={} ms={:.3}",
911            pending_count,
912            t0.elapsed().as_secs_f64() * 1000.0
913        );
914    }
915}
916
917// ============================================================================
918// Image Copy Infrastructure (for headless rendering)
919// ============================================================================
920
921/// Label for the image copy render graph node
922#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)]
923struct ImageCopyLabel;
924
925/// Component that marks an image for GPU-to-CPU copying
926#[derive(Component, Clone)]
927struct ImageCopier {
928    /// Handle to the source image (render target)
929    src_image: Handle<Image>,
930    /// Whether to capture on this frame
931    enabled: bool,
932}
933
934/// Resource containing all ImageCopiers for the render world
935#[derive(Resource, Default)]
936struct ImageCopiers(Vec<ImageCopier>);
937
938/// Pending image capture for async processing
939struct PendingImageCapture {
940    buffer: Buffer,
941    width: u32,
942    height: u32,
943    padded_bytes_per_row: u32,
944}
945
946/// Queue for pending image captures
947#[derive(Resource, Default)]
948struct PendingImageCaptureQueue(Arc<Mutex<Vec<PendingImageCapture>>>);
949
950/// Shared buffer for captured RGBA data
951#[derive(Resource, Clone, Default)]
952#[allow(clippy::type_complexity)]
953struct SharedRgbaBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
954
955/// Render graph node that copies render target images to staging buffers
956struct ImageCopyDriver;
957
958impl Node for ImageCopyDriver {
959    fn run(
960        &self,
961        _graph: &mut RenderGraphContext,
962        _render_context: &mut RenderContext,
963        world: &World,
964    ) -> Result<(), NodeRunError> {
965        let trace = render_trace_enabled();
966        let t0 = trace.then(std::time::Instant::now);
967
968        let Some(image_copiers) = world.get_resource::<ImageCopiers>() else {
969            return Ok(());
970        };
971
972        let Some(gpu_images) = world.get_resource::<RenderAssets<GpuImage>>() else {
973            return Ok(());
974        };
975
976        let Some(queue) = world.get_resource::<PendingImageCaptureQueue>() else {
977            return Ok(());
978        };
979
980        let render_device = world.resource::<RenderDevice>();
981
982        let Some(render_queue) = world.get_resource::<RenderQueue>() else {
983            return Ok(());
984        };
985
986        for image_copier in image_copiers.0.iter() {
987            if !image_copier.enabled {
988                continue;
989            }
990
991            let Some(gpu_image) = gpu_images.get(&image_copier.src_image) else {
992                continue;
993            };
994
995            let width = gpu_image.size.width;
996            let height = gpu_image.size.height;
997
998            // Calculate padded bytes per row (wgpu requires 256-byte alignment)
999            let block_dimensions = gpu_image.texture_format.block_dimensions();
1000            let block_size = gpu_image.texture_format.block_copy_size(None).unwrap_or(4); // Default to 4 bytes for RGBA8
1001
1002            let padded_bytes_per_row = RenderDevice::align_copy_bytes_per_row(
1003                (width as usize / block_dimensions.0 as usize) * block_size as usize,
1004            );
1005
1006            let buffer_size = (padded_bytes_per_row * height as usize) as u64;
1007
1008            // Create staging buffer for CPU readback
1009            let staging_buffer = render_device.create_buffer(&BufferDescriptor {
1010                label: Some("image_copy_staging_buffer"),
1011                size: buffer_size,
1012                usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
1013                mapped_at_creation: false,
1014            });
1015
1016            // Create command encoder for the copy operation
1017            let mut encoder =
1018                render_device.create_command_encoder(&CommandEncoderDescriptor::default());
1019
1020            let texture_extent = Extent3d {
1021                width,
1022                height,
1023                depth_or_array_layers: 1,
1024            };
1025
1026            // Copy texture to buffer
1027            encoder.copy_texture_to_buffer(
1028                gpu_image.texture.as_image_copy(),
1029                TexelCopyBufferInfo {
1030                    buffer: &staging_buffer,
1031                    layout: TexelCopyBufferLayout {
1032                        offset: 0,
1033                        bytes_per_row: Some(padded_bytes_per_row as u32),
1034                        rows_per_image: None,
1035                    },
1036                },
1037                texture_extent,
1038            );
1039
1040            // Submit the copy command
1041            render_queue.submit(std::iter::once(encoder.finish()));
1042
1043            // Queue for async processing
1044            if let Ok(mut pending) = queue.0.lock() {
1045                pending.push(PendingImageCapture {
1046                    buffer: staging_buffer,
1047                    width,
1048                    height,
1049                    padded_bytes_per_row: padded_bytes_per_row as u32,
1050                });
1051            }
1052        }
1053
1054        if let Some(t0) = t0 {
1055            eprintln!(
1056                "[render_trace][node] ImageCopyDriver ms={:.3}",
1057                t0.elapsed().as_secs_f64() * 1000.0
1058            );
1059        }
1060
1061        Ok(())
1062    }
1063}
1064
1065/// Extract ImageCopier components to render world
1066fn extract_image_copiers(mut commands: Commands, query: Extract<Query<&ImageCopier>>) {
1067    commands.insert_resource(ImageCopiers(query.iter().cloned().collect()));
1068}
1069
1070/// Process completed image captures
1071fn collect_image_captures(
1072    queue: Res<PendingImageCaptureQueue>,
1073    shared_rgba: Res<SharedRgbaBuffer>,
1074    render_device: Res<RenderDevice>,
1075) {
1076    let trace = render_trace_enabled();
1077    let t_sys = trace.then(std::time::Instant::now);
1078
1079    let pending_captures = {
1080        let Ok(mut pending) = queue.0.lock() else {
1081            return;
1082        };
1083        std::mem::take(&mut *pending)
1084    };
1085
1086    if pending_captures.is_empty() {
1087        if let Some(t0) = t_sys {
1088            eprintln!(
1089                "[render_trace][sys] collect_image_captures empty ms={:.3}",
1090                t0.elapsed().as_secs_f64() * 1000.0
1091            );
1092        }
1093        return;
1094    }
1095
1096    let pending_count = pending_captures.len();
1097
1098    for pending in pending_captures {
1099        let width = pending.width;
1100        let height = pending.height;
1101        let padded_bytes_per_row = pending.padded_bytes_per_row;
1102        let buffer = pending.buffer;
1103        let shared = shared_rgba.0.clone();
1104
1105        // Use blocking sync approach with device polling
1106        let buffer_slice = buffer.slice(..);
1107
1108        // Request mapping
1109        let (tx, rx) = std::sync::mpsc::channel();
1110        buffer_slice.map_async(MapMode::Read, move |result| {
1111            let _ = tx.send(result);
1112        });
1113
1114        // Poll the device until mapping completes (with timeout)
1115        let start = std::time::Instant::now();
1116        let timeout = std::time::Duration::from_secs(10);
1117        let mut poll_iters: u32 = 0;
1118        loop {
1119            let _ =
1120                render_device.poll(bevy::render::render_resource::PollType::wait_indefinitely());
1121            poll_iters += 1;
1122
1123            if start.elapsed() > timeout {
1124                eprintln!(
1125                    "Warning: Buffer mapping timeout after {:?}",
1126                    start.elapsed()
1127                );
1128                break;
1129            }
1130
1131            match rx.try_recv() {
1132                Ok(Ok(())) => {
1133                    let data = buffer_slice.get_mapped_range();
1134
1135                    // Extract pixels with alignment handling
1136                    let bytes_per_pixel = 4u32;
1137                    let actual_row_bytes = (width * bytes_per_pixel) as usize;
1138                    let padded_row_bytes = padded_bytes_per_row as usize;
1139
1140                    let mut rgba = Vec::with_capacity((width * height * 4) as usize);
1141                    for y in 0..height as usize {
1142                        let row_start = y * padded_row_bytes;
1143                        rgba.extend_from_slice(&data[row_start..row_start + actual_row_bytes]);
1144                    }
1145
1146                    drop(data);
1147                    buffer.unmap();
1148
1149                    if let Ok(mut guard) = shared.lock() {
1150                        *guard = Some((rgba, width, height));
1151                    }
1152                    break;
1153                }
1154                Ok(Err(e)) => {
1155                    eprintln!("Failed to map image buffer: {:?}", e);
1156                    break;
1157                }
1158                Err(std::sync::mpsc::TryRecvError::Empty) => {
1159                    // Keep polling
1160                    std::thread::sleep(std::time::Duration::from_millis(1));
1161                }
1162                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
1163                    eprintln!("Image buffer mapping channel disconnected");
1164                    break;
1165                }
1166            }
1167        }
1168
1169        if trace {
1170            eprintln!(
1171                "[render_trace][sys] collect_image_captures mapping_wait poll_iters={} ms={:.3}",
1172                poll_iters,
1173                start.elapsed().as_secs_f64() * 1000.0
1174            );
1175        }
1176    }
1177
1178    if let Some(t0) = t_sys {
1179        eprintln!(
1180            "[render_trace][sys] collect_image_captures done pending={} ms={:.3}",
1181            pending_count,
1182            t0.elapsed().as_secs_f64() * 1000.0
1183        );
1184    }
1185}
1186
1187/// Plugin for headless image copy
1188struct ImageCopyPlugin {
1189    shared_rgba: SharedRgbaBuffer,
1190}
1191
1192impl Plugin for ImageCopyPlugin {
1193    fn build(&self, app: &mut App) {
1194        use bevy::render::render_graph::RenderGraph;
1195
1196        app.insert_resource(self.shared_rgba.clone());
1197
1198        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
1199            return;
1200        };
1201
1202        render_app.insert_resource(self.shared_rgba.clone());
1203        render_app.init_resource::<ImageCopiers>();
1204        render_app.init_resource::<PendingImageCaptureQueue>();
1205
1206        render_app.add_systems(ExtractSchedule, extract_image_copiers);
1207        render_app.add_systems(
1208            Render,
1209            collect_image_captures.in_set(RenderSystems::Cleanup),
1210        );
1211
1212        // Add image copy node to render graph (runs after camera driver)
1213        let mut graph = render_app.world_mut().resource_mut::<RenderGraph>();
1214        graph.add_node(ImageCopyLabel, ImageCopyDriver);
1215        graph.add_node_edge(bevy::render::graph::CameraDriverLabel, ImageCopyLabel);
1216    }
1217}
1218
1219// ============================================================================
1220// Render Request and Components
1221// ============================================================================
1222
1223/// Configuration passed to the Bevy app
1224#[derive(Resource, Clone)]
1225struct RenderRequest {
1226    mesh_path: String,
1227    texture_path: String,
1228    camera_transform: Transform,
1229    object_rotation: ObjectRotation,
1230    config: RenderConfig,
1231}
1232
1233/// Marker for the rendered object
1234#[derive(Component)]
1235struct RenderedObject;
1236
1237/// Marker for the render camera
1238#[derive(Component)]
1239struct RenderCamera;
1240
1241/// Handle for the loaded texture
1242#[derive(Resource)]
1243struct LoadedTexture(Handle<Image>);
1244
1245/// Handle for the loaded scene
1246#[derive(Resource)]
1247struct LoadedScene(Handle<Scene>);
1248
1249/// Shared output for extracting render results
1250#[derive(Resource, Clone)]
1251struct SharedOutput(Arc<Mutex<Option<RenderOutput>>>);
1252
1253/// Handle for the render target image
1254#[derive(Resource)]
1255#[allow(dead_code)]
1256struct RenderTargetImage(Handle<Image>);
1257
1258/// Tracks progress for a homogeneous batch of viewpoints rendered in one app.
1259#[derive(Resource)]
1260struct HeadlessBatchSequence {
1261    viewpoints: Vec<Transform>,
1262    current_index: usize,
1263    outputs: Vec<RenderOutput>,
1264    warmup_frames_remaining: u32,
1265    done: bool,
1266}
1267
1268impl HeadlessBatchSequence {
1269    fn new(viewpoints: Vec<Transform>) -> Self {
1270        let capacity = viewpoints.len();
1271        Self {
1272            viewpoints,
1273            current_index: 0,
1274            outputs: Vec::with_capacity(capacity),
1275            warmup_frames_remaining: 0,
1276            done: capacity == 0,
1277        }
1278    }
1279
1280    fn current_viewpoint(&self) -> Option<Transform> {
1281        self.viewpoints.get(self.current_index).cloned()
1282    }
1283}
1284
1285/// Perform headless rendering of a YCB object.
1286///
1287/// This uses true headless GPU rendering via `RenderTarget::Image`, which does NOT
1288/// require any window surfaces. This should work on WSL2 and other environments
1289/// without display servers.
1290///
1291/// Note: Bevy's App::run() does not return cleanly. A watchdog thread monitors
1292/// for results and terminates the process once the render is complete.
1293#[allow(dead_code)]
1294pub fn render_headless(
1295    object_dir: &Path,
1296    camera_transform: &Transform,
1297    object_rotation: &ObjectRotation,
1298    config: &RenderConfig,
1299) -> Result<RenderOutput, RenderError> {
1300    // Canonicalize paths so Bevy's asset server can find them regardless of
1301    // caller working directory. Relative paths like "../../ycb" pass the
1302    // exists() check but Bevy resolves assets against its own root.
1303    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1304        RenderError::RenderFailed(format!(
1305            "Cannot canonicalize object directory {}: {}",
1306            object_dir.display(),
1307            e
1308        ))
1309    })?;
1310    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1311    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1312
1313    if !mesh_path.exists() {
1314        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
1315            &mesh_path,
1316        )));
1317    }
1318    if !texture_path.exists() {
1319        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
1320            &texture_path,
1321        )));
1322    }
1323
1324    let request = RenderRequest {
1325        mesh_path: fs_path_to_asset_string(&mesh_path),
1326        texture_path: fs_path_to_asset_string(&texture_path),
1327        camera_transform: *camera_transform,
1328        object_rotation: object_rotation.clone(),
1329        config: config.clone(),
1330    };
1331
1332    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
1333    let output_clone = shared_output.clone();
1334
1335    // Shared buffer for RGBA data from headless render target
1336    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1337
1338    // Shared buffer for depth readback
1339    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1340
1341    // Create a temp file path for fallback output serialization
1342    let temp_path =
1343        std::env::temp_dir().join(format!("bevy_sensor_render_{}.bin", std::process::id()));
1344
1345    // Spawn watchdog thread that monitors for timeout (don't exit - let Bevy exit gracefully)
1346    let output_poll_for_timeout = shared_output.clone();
1347    std::thread::spawn(move || {
1348        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1349        let start = std::time::Instant::now();
1350        let poll_interval = std::time::Duration::from_millis(100);
1351
1352        loop {
1353            // Check if we have a result
1354            if let Ok(guard) = output_poll_for_timeout.0.lock() {
1355                if guard.is_some() {
1356                    // Output is ready, Bevy will exit via AppExit event
1357                    return; // Exit watchdog thread, Bevy will handle exit
1358                }
1359            }
1360
1361            if start.elapsed() > timeout {
1362                eprintln!(
1363                    "Error: Render timeout after {} seconds",
1364                    RENDER_TIMEOUT_SECS
1365                );
1366                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
1367                // Force exit on timeout (this is a failure case)
1368                std::process::exit(1);
1369            }
1370
1371            std::thread::sleep(poll_interval);
1372        }
1373    });
1374
1375    // Run Bevy app with HEADLESS configuration (no window surfaces!)
1376    // Uses ScheduleRunnerPlugin instead of WinitPlugin
1377    build_headless_app(request, output_clone, shared_rgba, shared_depth).run();
1378
1379    // App::run() returned - check shared_output for result
1380    if let Ok(guard) = shared_output.0.lock() {
1381        if let Some(output) = guard.as_ref() {
1382            return Ok(output.clone());
1383        }
1384    }
1385
1386    // Fallback: try to read from temp file (for legacy compatibility)
1387    if temp_path.exists() {
1388        if let Ok(output) = read_output_from_file(&temp_path) {
1389            let _ = std::fs::remove_file(&temp_path);
1390            return Ok(output);
1391        }
1392    }
1393
1394    Err(RenderError::RenderFailed(
1395        "Render did not complete".to_string(),
1396    ))
1397}
1398
1399/// Render a homogeneous sequence of viewpoints in a single headless Bevy app.
1400///
1401/// All captures share the same object, object rotation, and render configuration.
1402/// This is the fast path used by the batch API for episode-style workloads.
1403pub fn render_headless_sequence(
1404    object_dir: &Path,
1405    viewpoints: &[Transform],
1406    object_rotation: &ObjectRotation,
1407    config: &RenderConfig,
1408) -> Result<Vec<RenderOutput>, RenderError> {
1409    if viewpoints.is_empty() {
1410        return Ok(Vec::new());
1411    }
1412
1413    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1414        RenderError::RenderFailed(format!(
1415            "Cannot canonicalize object directory {}: {}",
1416            object_dir.display(),
1417            e
1418        ))
1419    })?;
1420    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1421    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1422
1423    if !mesh_path.exists() {
1424        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
1425            &mesh_path,
1426        )));
1427    }
1428    if !texture_path.exists() {
1429        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
1430            &texture_path,
1431        )));
1432    }
1433
1434    let request = RenderRequest {
1435        mesh_path: fs_path_to_asset_string(&mesh_path),
1436        texture_path: fs_path_to_asset_string(&texture_path),
1437        camera_transform: viewpoints[0],
1438        object_rotation: object_rotation.clone(),
1439        config: config.clone(),
1440    };
1441
1442    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1443    let rgba_clone = shared_rgba.clone();
1444
1445    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1446    let depth_clone = shared_depth.clone();
1447
1448    let mut app = App::new();
1449    app.add_plugins(
1450        DefaultPlugins
1451            .set(bevy::asset::AssetPlugin {
1452                // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
1453                // default (UnapprovedPathMode::Forbid → load() silently returns a
1454                // default handle). YCB meshes load from absolute paths, so allow them.
1455                unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
1456                ..default()
1457            })
1458            .set(WindowPlugin {
1459                primary_window: None,
1460                exit_condition: ExitCondition::DontExit,
1461                ..default()
1462            })
1463            .disable::<bevy::winit::WinitPlugin>()
1464            .disable::<LogPlugin>()
1465            .disable::<TerminalCtrlCHandlerPlugin>(),
1466    )
1467    .add_plugins(ObjPlugin)
1468    // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
1469    // Scene spawning panics unless those component types are registered. The
1470    // minimal headless plugin set doesn't register them, so do it explicitly.
1471    .register_type::<Mesh3d>()
1472    .register_type::<MeshMaterial3d<StandardMaterial>>()
1473    .register_type::<bevy::prelude::Transform>()
1474    .register_type::<bevy::prelude::GlobalTransform>()
1475    .register_type::<bevy::transform::components::TransformTreeChanged>()
1476    .register_type::<bevy::prelude::Visibility>()
1477    .register_type::<bevy::prelude::InheritedVisibility>()
1478    .register_type::<bevy::prelude::ViewVisibility>()
1479    .add_plugins(ImageCopyPlugin {
1480        shared_rgba: rgba_clone,
1481    })
1482    .add_plugins(DepthReadbackPlugin {
1483        shared_depth: depth_clone,
1484        near: config.near_plane,
1485        far: config.far_plane,
1486    })
1487    .insert_resource(request)
1488    .insert_resource(shared_rgba)
1489    .insert_resource(HeadlessBatchSequence::new(viewpoints.to_vec()))
1490    .init_resource::<RenderState>()
1491    .add_systems(Startup, setup_headless_scene)
1492    .add_systems(
1493        Update,
1494        (
1495            check_assets_loaded,
1496            apply_materials,
1497            tick_headless_batch_warmup,
1498            request_headless_capture,
1499            check_headless_capture_ready,
1500            extract_and_continue_headless_batch,
1501        )
1502            .chain(),
1503    );
1504
1505    // Manual app.update() loops do not run plugin finish/cleanup hooks automatically.
1506    // Bevy's screenshot plugin inserts CapturedScreenshots during finish(), so run the
1507    // normal startup phases before driving the headless batch loop ourselves.
1508    let trace_outer = render_trace_enabled();
1509    let t_finish = std::time::Instant::now();
1510    app.finish();
1511    let finish_ms = t_finish.elapsed().as_secs_f64() * 1000.0;
1512    let t_cleanup = std::time::Instant::now();
1513    app.cleanup();
1514    let cleanup_ms = t_cleanup.elapsed().as_secs_f64() * 1000.0;
1515    if trace_outer {
1516        eprintln!(
1517            "[render_trace][coldinit] app.finish ms={:.3} app.cleanup ms={:.3}",
1518            finish_ms, cleanup_ms
1519        );
1520    }
1521
1522    let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1523    let start = std::time::Instant::now();
1524
1525    let trace = std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok();
1526    let mut update_idx: u32 = 0;
1527    let mut last_completed_outputs: usize = 0;
1528    let mut viewpoint_start = std::time::Instant::now();
1529
1530    loop {
1531        if start.elapsed() > timeout {
1532            return Err(RenderError::RenderTimeout {
1533                duration_secs: RENDER_TIMEOUT_SECS,
1534            });
1535        }
1536
1537        let update_start = std::time::Instant::now();
1538        app.update();
1539        let update_elapsed_ms = update_start.elapsed().as_secs_f64() * 1000.0;
1540
1541        if trace {
1542            let batch = app.world().resource::<HeadlessBatchSequence>();
1543            let warmup = batch.warmup_frames_remaining;
1544            let current = batch.current_index;
1545            let completed = batch.outputs.len();
1546            let vp_ms = viewpoint_start.elapsed().as_secs_f64() * 1000.0;
1547            eprintln!(
1548                "[render_trace] update={update_idx} vp={current} warmup={warmup} \
1549                 completed={completed} update_ms={update_elapsed_ms:.2} vp_ms={vp_ms:.2}"
1550            );
1551            if completed > last_completed_outputs {
1552                eprintln!(
1553                    "[render_trace] viewpoint {} finished in {:.2} ms",
1554                    completed - 1,
1555                    vp_ms
1556                );
1557                last_completed_outputs = completed;
1558                viewpoint_start = std::time::Instant::now();
1559            }
1560        }
1561
1562        update_idx += 1;
1563
1564        if app.world().resource::<HeadlessBatchSequence>().done {
1565            break;
1566        }
1567    }
1568
1569    if trace {
1570        eprintln!(
1571            "[render_trace] total_wall_ms={:.2} updates={update_idx} viewpoints={}",
1572            start.elapsed().as_secs_f64() * 1000.0,
1573            viewpoints.len()
1574        );
1575    }
1576
1577    let mut batch = app.world_mut().resource_mut::<HeadlessBatchSequence>();
1578    if batch.outputs.len() != viewpoints.len() {
1579        return Err(RenderError::RenderFailed(format!(
1580            "Batch render produced {} outputs for {} viewpoints",
1581            batch.outputs.len(),
1582            viewpoints.len()
1583        )));
1584    }
1585
1586    Ok(std::mem::take(&mut batch.outputs))
1587}
1588
1589/// Assemble the shared single-render headless Bevy app.
1590fn build_headless_app(
1591    request: RenderRequest,
1592    shared_output: SharedOutput,
1593    shared_rgba: SharedRgbaBuffer,
1594    shared_depth: SharedDepthBuffer,
1595) -> App {
1596    let near = request.config.near_plane;
1597    let far = request.config.far_plane;
1598
1599    let mut app = App::new();
1600    app.add_plugins(
1601        DefaultPlugins
1602            .set(bevy::asset::AssetPlugin {
1603                // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
1604                // default (UnapprovedPathMode::Forbid → load() silently returns a
1605                // default handle). YCB meshes load from absolute paths, so allow them.
1606                unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
1607                ..default()
1608            })
1609            .set(WindowPlugin {
1610                primary_window: None,
1611                exit_condition: ExitCondition::DontExit,
1612                ..default()
1613            })
1614            .disable::<bevy::winit::WinitPlugin>()
1615            .disable::<LogPlugin>()
1616            .disable::<TerminalCtrlCHandlerPlugin>(),
1617    )
1618    .add_plugins(ScheduleRunnerPlugin::run_loop(Duration::from_secs_f64(
1619        1.0 / 60.0,
1620    )))
1621    .add_plugins(ObjPlugin)
1622    // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
1623    // Scene spawning panics unless those component types are registered. The
1624    // minimal headless plugin set doesn't register them, so do it explicitly.
1625    .register_type::<Mesh3d>()
1626    .register_type::<MeshMaterial3d<StandardMaterial>>()
1627    .register_type::<bevy::prelude::Transform>()
1628    .register_type::<bevy::prelude::GlobalTransform>()
1629    .register_type::<bevy::transform::components::TransformTreeChanged>()
1630    .register_type::<bevy::prelude::Visibility>()
1631    .register_type::<bevy::prelude::InheritedVisibility>()
1632    .register_type::<bevy::prelude::ViewVisibility>()
1633    .add_plugins(ImageCopyPlugin {
1634        shared_rgba: shared_rgba.clone(),
1635    })
1636    .add_plugins(DepthReadbackPlugin {
1637        shared_depth,
1638        near,
1639        far,
1640    })
1641    .insert_resource(request)
1642    .insert_resource(shared_output)
1643    .insert_resource(shared_rgba)
1644    .init_resource::<RenderState>()
1645    .add_systems(Startup, setup_headless_scene)
1646    .add_systems(
1647        Update,
1648        (
1649            check_assets_loaded,
1650            apply_materials,
1651            request_headless_capture,
1652            check_headless_capture_ready,
1653            extract_and_exit_headless,
1654        )
1655            .chain(),
1656    );
1657    app
1658}
1659
1660/// Serialize RenderOutput to bytes for IPC (used by subprocess mode)
1661#[allow(dead_code)]
1662fn serialize_output(output: &RenderOutput) -> Vec<u8> {
1663    let mut data = Vec::new();
1664
1665    // Header: width, height, rgba_len, depth_len
1666    data.extend_from_slice(&output.width.to_le_bytes());
1667    data.extend_from_slice(&output.height.to_le_bytes());
1668    data.extend_from_slice(&(output.rgba.len() as u32).to_le_bytes());
1669    data.extend_from_slice(&(output.depth.len() as u32).to_le_bytes());
1670
1671    // RGBA data
1672    data.extend_from_slice(&output.rgba);
1673
1674    // Depth data (as f64 bytes for TBP precision)
1675    for d in &output.depth {
1676        data.extend_from_slice(&d.to_le_bytes());
1677    }
1678
1679    // Intrinsics (f64 for TBP precision)
1680    data.extend_from_slice(&output.intrinsics.focal_length[0].to_le_bytes());
1681    data.extend_from_slice(&output.intrinsics.focal_length[1].to_le_bytes());
1682    data.extend_from_slice(&output.intrinsics.principal_point[0].to_le_bytes());
1683    data.extend_from_slice(&output.intrinsics.principal_point[1].to_le_bytes());
1684    data.extend_from_slice(&output.intrinsics.image_size[0].to_le_bytes());
1685    data.extend_from_slice(&output.intrinsics.image_size[1].to_le_bytes());
1686
1687    // Camera transform (translation + rotation quaternion)
1688    let t = output.camera_transform.translation;
1689    let r = output.camera_transform.rotation;
1690    data.extend_from_slice(&t.x.to_le_bytes());
1691    data.extend_from_slice(&t.y.to_le_bytes());
1692    data.extend_from_slice(&t.z.to_le_bytes());
1693    data.extend_from_slice(&r.x.to_le_bytes());
1694    data.extend_from_slice(&r.y.to_le_bytes());
1695    data.extend_from_slice(&r.z.to_le_bytes());
1696    data.extend_from_slice(&r.w.to_le_bytes());
1697
1698    // Object rotation (f64)
1699    let or = &output.object_rotation;
1700    data.extend_from_slice(&or.pitch.to_le_bytes());
1701    data.extend_from_slice(&or.yaw.to_le_bytes());
1702    data.extend_from_slice(&or.roll.to_le_bytes());
1703
1704    data
1705}
1706
1707/// Read RenderOutput from serialized file
1708fn read_output_from_file(path: &std::path::Path) -> Result<RenderOutput, RenderError> {
1709    let mut file = File::open(path).map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1710    let mut data = Vec::new();
1711    file.read_to_end(&mut data)
1712        .map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1713
1714    let mut cursor = 0;
1715
1716    let read_u32 = |data: &[u8], cursor: &mut usize| -> u32 {
1717        let val = u32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1718        *cursor += 4;
1719        val
1720    };
1721
1722    let read_f32 = |data: &[u8], cursor: &mut usize| -> f32 {
1723        let val = f32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1724        *cursor += 4;
1725        val
1726    };
1727
1728    let read_f64 = |data: &[u8], cursor: &mut usize| -> f64 {
1729        let val = f64::from_le_bytes(data[*cursor..*cursor + 8].try_into().unwrap());
1730        *cursor += 8;
1731        val
1732    };
1733
1734    let width = read_u32(&data, &mut cursor);
1735    let height = read_u32(&data, &mut cursor);
1736    let rgba_len = read_u32(&data, &mut cursor) as usize;
1737    let depth_len = read_u32(&data, &mut cursor) as usize;
1738
1739    let rgba = data[cursor..cursor + rgba_len].to_vec();
1740    cursor += rgba_len;
1741
1742    // Depth data (f64 for TBP precision)
1743    let mut depth = Vec::with_capacity(depth_len);
1744    for _ in 0..depth_len {
1745        depth.push(read_f64(&data, &mut cursor));
1746    }
1747
1748    // Intrinsics (f64 for TBP precision)
1749    let focal_length = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1750    let principal_point = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1751    let image_size = [read_u32(&data, &mut cursor), read_u32(&data, &mut cursor)];
1752
1753    // Camera transform (f32 for Bevy compatibility)
1754    let tx = read_f32(&data, &mut cursor);
1755    let ty = read_f32(&data, &mut cursor);
1756    let tz = read_f32(&data, &mut cursor);
1757    let rx = read_f32(&data, &mut cursor);
1758    let ry = read_f32(&data, &mut cursor);
1759    let rz = read_f32(&data, &mut cursor);
1760    let rw = read_f32(&data, &mut cursor);
1761
1762    // Object rotation (f64)
1763    let pitch = read_f64(&data, &mut cursor);
1764    let yaw = read_f64(&data, &mut cursor);
1765    let roll = read_f64(&data, &mut cursor);
1766
1767    Ok(RenderOutput {
1768        rgba,
1769        depth,
1770        width,
1771        height,
1772        intrinsics: crate::CameraIntrinsics {
1773            focal_length,
1774            principal_point,
1775            image_size,
1776        },
1777        camera_transform: Transform {
1778            translation: Vec3::new(tx, ty, tz),
1779            rotation: Quat::from_xyzw(rx, ry, rz, rw),
1780            scale: Vec3::ONE,
1781        },
1782        object_rotation: ObjectRotation { pitch, yaw, roll },
1783        target_point: Vec3::ZERO,
1784        targeting_policy: TargetingPolicy::Origin,
1785    })
1786}
1787
1788/// Setup the scene with camera, lighting, and object
1789#[allow(dead_code)]
1790fn setup_scene(
1791    mut commands: Commands,
1792    asset_server: Res<AssetServer>,
1793    request: Res<RenderRequest>,
1794    mut _materials: ResMut<Assets<StandardMaterial>>,
1795) {
1796    // Camera with depth prepass (Bevy 0.15+ uses Camera3d component)
1797    // Disable MSAA for depth readback compatibility (can't copy from multisampled texture)
1798    // Apply FOV from RenderConfig so the projection matches TBP's camera intrinsics.
1799    commands.spawn((
1800        Camera3d::default(),
1801        Camera::default(),
1802        Hdr,
1803        render_projection(&request.config),
1804        Msaa::Off,
1805        request.camera_transform,
1806        Tonemapping::None, // Accurate colors for software rendering
1807        DepthPrepass,
1808        NormalPrepass,
1809        RenderCamera,
1810    ));
1811
1812    // Ambient light (from config). In Bevy 0.18 the global ambient light is the
1813    // `GlobalAmbientLight` resource (the `AmbientLight` type became a per-camera component).
1814    let lighting = &request.config.lighting;
1815    commands.insert_resource(GlobalAmbientLight {
1816        color: Color::WHITE,
1817        brightness: lighting.ambient_brightness,
1818        ..default()
1819    });
1820
1821    // Key light (from config) - Bevy 0.15+ uses PointLight component directly
1822    if lighting.key_light_intensity > 0.0 {
1823        commands.spawn((
1824            PointLight {
1825                intensity: lighting.key_light_intensity,
1826                shadows_enabled: lighting.shadows_enabled,
1827                ..default()
1828            },
1829            Transform::from_xyz(
1830                lighting.key_light_position[0],
1831                lighting.key_light_position[1],
1832                lighting.key_light_position[2],
1833            ),
1834        ));
1835    }
1836
1837    // Fill light (from config)
1838    if lighting.fill_light_intensity > 0.0 {
1839        commands.spawn((
1840            PointLight {
1841                intensity: lighting.fill_light_intensity,
1842                shadows_enabled: lighting.shadows_enabled,
1843                ..default()
1844            },
1845            Transform::from_xyz(
1846                lighting.fill_light_position[0],
1847                lighting.fill_light_position[1],
1848                lighting.fill_light_position[2],
1849            ),
1850        ));
1851    }
1852
1853    // Load the scene
1854    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
1855    commands.insert_resource(LoadedScene(scene_handle.clone()));
1856
1857    // Load the texture
1858    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
1859    commands.insert_resource(LoadedTexture(texture_handle.clone()));
1860
1861    // Create material with texture (will be applied later)
1862    let _material = _materials.add(StandardMaterial {
1863        base_color_texture: Some(texture_handle),
1864        unlit: true,
1865        ..default()
1866    });
1867
1868    // Spawn the scene with rotation (Bevy 0.15+ uses SceneRoot)
1869    commands.spawn((
1870        SceneRoot(scene_handle),
1871        Transform::from_rotation(request.object_rotation.to_quat()),
1872        RenderedObject,
1873    ));
1874
1875    println!("Scene setup complete");
1876}
1877
1878/// Check if assets are loaded
1879fn check_assets_loaded(
1880    mut state: ResMut<RenderState>,
1881    asset_server: Res<AssetServer>,
1882    scene: Option<Res<LoadedScene>>,
1883    texture: Option<Res<LoadedTexture>>,
1884) {
1885    let trace = render_trace_enabled();
1886    let was_scene_loaded = state.scene_loaded;
1887    let was_texture_loaded = state.texture_loaded;
1888
1889    state.frame_count += 1;
1890
1891    if state.scene_loaded && state.texture_loaded {
1892        return;
1893    }
1894
1895    if let Some(scene) = scene {
1896        match asset_server.get_load_state(&scene.0) {
1897            Some(LoadState::Loaded) => {
1898                state.scene_loaded = true;
1899            }
1900            Some(LoadState::Failed(_)) => {}
1901            _ => {}
1902        }
1903    }
1904
1905    if let Some(texture) = texture {
1906        match asset_server.get_load_state(&texture.0) {
1907            Some(LoadState::Loaded) => {
1908                state.texture_loaded = true;
1909            }
1910            Some(LoadState::Failed(_)) => {}
1911            _ => {}
1912        }
1913    }
1914
1915    if trace {
1916        if !was_scene_loaded && state.scene_loaded {
1917            eprintln!(
1918                "[render_trace][coldinit] scene_loaded frame_count={}",
1919                state.frame_count
1920            );
1921        }
1922        if !was_texture_loaded && state.texture_loaded {
1923            eprintln!(
1924                "[render_trace][coldinit] texture_loaded frame_count={}",
1925                state.frame_count
1926            );
1927        }
1928    }
1929}
1930
1931/// Apply materials to loaded meshes
1932fn apply_materials(
1933    mut state: ResMut<RenderState>,
1934    texture: Option<Res<LoadedTexture>>,
1935    mut materials: ResMut<Assets<StandardMaterial>>,
1936    // Bevy 0.15+: Use MeshMaterial3d instead of Handle<StandardMaterial>
1937    mut mesh_query: Query<&mut MeshMaterial3d<StandardMaterial>, With<Mesh3d>>,
1938) {
1939    // NOTE: we intentionally do NOT wait for `texture_loaded` before applying the
1940    // material. The texture *handle* is valid immediately, so applying the material
1941    // as soon as the mesh entities exist lets the main-pass `StandardMaterial`
1942    // pipeline start compiling during the long async texture load. A late material
1943    // swap (after texture load) would reset the pipeline and capture a blank color
1944    // frame before it recompiled — the root cause of the 0.18 blank renders.
1945    if !state.scene_loaded || state.capture_ready {
1946        return;
1947    }
1948
1949    state.frame_count += 1;
1950
1951    let Some(tex) = texture else { return };
1952
1953    if !state.materials_applied {
1954        // The scene hierarchy is instantiated asynchronously after the asset
1955        // load event fires; wait until mesh entities exist before applying.
1956        if mesh_query.is_empty() {
1957            return;
1958        }
1959
1960        let textured_material = materials.add(StandardMaterial {
1961            base_color_texture: Some(tex.0.clone()),
1962            unlit: true,
1963            ..default()
1964        });
1965
1966        for mut mat in mesh_query.iter_mut() {
1967            mat.0 = textured_material.clone();
1968        }
1969
1970        state.materials_applied = true;
1971        state.materials_applied_frame = state.frame_count;
1972    }
1973
1974    // Record the frame the texture finished loading (once).
1975    if state.texture_loaded && state.texture_ready_frame == 0 {
1976        state.texture_ready_frame = state.frame_count;
1977    }
1978
1979    // Capture once the texture pixels are loaded (+ a small margin for GPU image
1980    // preparation) AND the main-pass pipeline has had time to compile since the
1981    // material was applied. Because the material is applied early, the pipeline is
1982    // almost always ready well before the texture, so this resolves to a few frames
1983    // after the texture loads — deterministic and fast (no 60/120-frame cushion).
1984    let texture_ready =
1985        state.texture_ready_frame != 0 && state.frame_count >= state.texture_ready_frame + 6;
1986    let pipeline_ready = state.frame_count >= state.materials_applied_frame + 6;
1987    if texture_ready && pipeline_ready {
1988        let was_ready = state.capture_ready;
1989        state.capture_ready = true;
1990        if render_trace_enabled() && !was_ready {
1991            eprintln!(
1992                "[render_trace][coldinit] capture_ready frame_count={}",
1993                state.frame_count
1994            );
1995        }
1996    }
1997}
1998
1999/// Request a screenshot capture (Bevy 0.15+ uses Screenshot entity + observer)
2000#[allow(dead_code)]
2001fn request_screenshot(
2002    mut commands: Commands,
2003    mut state: ResMut<RenderState>,
2004    shared_image: Res<SharedImageBuffer>,
2005    mut depth_request: ResMut<DepthCaptureRequest>,
2006) {
2007    if !state.capture_ready || state.screenshot_requested {
2008        return;
2009    }
2010
2011    // Clone the Arc for the observer closure
2012    let image_buffer = shared_image.0.clone();
2013
2014    // Also request depth capture
2015    depth_request.requested = true;
2016    println!("Depth capture requested");
2017
2018    // Spawn Screenshot entity with observer (Bevy 0.15+ API)
2019    println!("Requesting screenshot via Screenshot entity");
2020    commands
2021        .spawn(Screenshot::primary_window())
2022        .observe(move |trigger: On<ScreenshotCaptured>| {
2023            // ScreenshotCaptured derefs to Image
2024            let image: &Image = trigger.event();
2025
2026            // Get dimensions
2027            let width = image.texture_descriptor.size.width;
2028            let height = image.texture_descriptor.size.height;
2029
2030            // Bevy 0.18: Image.data is now Option<Vec<u8>>; skip if absent.
2031            let Some(rgba_data) = image.data.clone() else {
2032                return;
2033            };
2034
2035            // Store in shared buffer
2036            if let Ok(mut guard) = image_buffer.lock() {
2037                *guard = Some((rgba_data, width, height));
2038            }
2039        });
2040
2041    state.screenshot_requested = true;
2042    println!("Screenshot requested");
2043}
2044
2045/// Check if screenshot callback has completed
2046#[allow(dead_code)]
2047fn check_screenshot_ready(
2048    mut state: ResMut<RenderState>,
2049    shared_image: Res<SharedImageBuffer>,
2050    shared_depth: Res<SharedDepthBuffer>,
2051    request: Res<RenderRequest>,
2052) {
2053    if !state.screenshot_requested || state.captured {
2054        return;
2055    }
2056
2057    // Increment frame count while waiting for capture
2058    state.frame_count += 1;
2059
2060    // Check if RGBA callback has written data
2061    let rgba_ready = if let Ok(guard) = shared_image.0.lock() {
2062        if let Some((rgba_data, width, height)) = guard.as_ref() {
2063            if state.rgba_data.is_none() {
2064                state.rgba_data = Some(rgba_data.clone());
2065                state.image_width = *width;
2066                state.image_height = *height;
2067            }
2068            true
2069        } else {
2070            false
2071        }
2072    } else {
2073        false
2074    };
2075
2076    // Check if depth readback has completed
2077    let depth_ready = if let Ok(guard) = shared_depth.0.lock() {
2078        if let Some((depth_data, _width, _height)) = guard.as_ref() {
2079            if state.depth_data.is_none() {
2080                state.depth_data = Some(depth_data.clone());
2081            }
2082            true
2083        } else {
2084            false
2085        }
2086    } else {
2087        false
2088    };
2089
2090    // If depth readback failed or is taking too long, fall back to placeholder.
2091    // As in check_headless_capture_ready, this uniform plane is a DEGRADED render
2092    // (flat depth, no real geometry) that must be loud — it silently masked the
2093    // #92 depth regression. (This fn is currently dead code; kept loud in case it
2094    // is ever revived.)
2095    if rgba_ready && !depth_ready && state.frame_count > 60 {
2096        let camera_dist = request.camera_transform.translation.length() as f64;
2097        let pixel_count = (state.image_width * state.image_height) as usize;
2098        eprintln!(
2099            "[bevy-sensor][WARN] depth readback produced no valid frame; falling back to a \
2100             UNIFORM {:.4} m camera-distance plane (degraded render, no real 3D geometry). \
2101             Indicates a depth-readback regression.",
2102            camera_dist
2103        );
2104        state.depth_data = Some(vec![camera_dist; pixel_count]);
2105    }
2106
2107    // Mark as captured when both RGBA and depth are ready
2108    if state.rgba_data.is_some() && state.depth_data.is_some() {
2109        state.captured = true;
2110    }
2111}
2112
2113/// Extract results and exit
2114#[allow(dead_code)]
2115fn extract_and_exit(
2116    mut state: ResMut<RenderState>,
2117    request: Res<RenderRequest>,
2118    shared_output: Res<SharedOutput>,
2119    mut commands: Commands,
2120    windows: Query<Entity, With<bevy::window::Window>>,
2121) {
2122    // Handle delayed exit after closing window
2123    if state.exit_requested {
2124        state.exit_frame_count += 1;
2125        // After a few frames with no window, Bevy should exit
2126        return;
2127    }
2128
2129    if !state.captured {
2130        return;
2131    }
2132
2133    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2134        // Use actual captured dimensions (may differ from config if window was resized)
2135        let width = state.image_width;
2136        let height = state.image_height;
2137
2138        // Compute intrinsics from the same TBP zoom formula as the camera projection.
2139        let intrinsics = request.config.intrinsics_for_size(width, height);
2140
2141        let output = RenderOutput {
2142            rgba: rgba.clone(),
2143            depth: depth.clone(),
2144            width,
2145            height,
2146            intrinsics,
2147            camera_transform: request.camera_transform,
2148            object_rotation: request.object_rotation.clone(),
2149            target_point: Vec3::ZERO,
2150            targeting_policy: TargetingPolicy::Origin,
2151        };
2152
2153        if let Ok(mut guard) = shared_output.0.lock() {
2154            *guard = Some(output);
2155            drop(guard); // Release lock immediately
2156
2157            // Small delay to allow watchdog to detect output before window close
2158            std::thread::sleep(std::time::Duration::from_millis(200));
2159        }
2160
2161        // Close all windows to trigger app exit
2162        // eprintln!("Closing windows to trigger exit...");
2163        for window_entity in windows.iter() {
2164            commands.entity(window_entity).despawn();
2165        }
2166        state.exit_requested = true;
2167    }
2168}
2169
2170// ============================================================================
2171// Headless Rendering Systems (no window surfaces)
2172// ============================================================================
2173
2174/// Setup the scene for headless rendering with RenderTarget::Image
2175fn setup_headless_scene(
2176    mut commands: Commands,
2177    mut images: ResMut<Assets<Image>>,
2178    asset_server: Res<AssetServer>,
2179    request: Res<RenderRequest>,
2180    mut _materials: ResMut<Assets<StandardMaterial>>,
2181) {
2182    let trace = render_trace_enabled();
2183    let t0 = trace.then(std::time::Instant::now);
2184
2185    #[cfg(test)]
2186    HEADLESS_SCENE_SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
2187
2188    let width = request.config.width;
2189    let height = request.config.height;
2190
2191    // Create render target image with proper texture usages
2192    let size = Extent3d {
2193        width,
2194        height,
2195        depth_or_array_layers: 1,
2196    };
2197
2198    let mut render_target_image = Image::new_fill(
2199        size,
2200        TextureDimension::D2,
2201        &[0, 0, 0, 255], // Initialize with opaque black
2202        TextureFormat::Rgba8UnormSrgb,
2203        RenderAssetUsages::default(),
2204    );
2205
2206    // Add required texture usages for headless rendering
2207    render_target_image.texture_descriptor.usage =
2208        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
2209
2210    let render_target_handle = images.add(render_target_image);
2211
2212    // Store handle for later access
2213    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
2214
2215    // Camera rendering to the image texture (NO window!)
2216    commands.spawn((
2217        Camera3d::default(),
2218        Camera::default(),
2219        Hdr,
2220        // In Bevy 0.18 the render target is a separate `RenderTarget` component,
2221        // and `RenderTarget::Image` wraps an `ImageRenderTarget` (via `From<Handle<Image>>`).
2222        RenderTarget::Image(render_target_handle.clone().into()),
2223        render_projection(&request.config),
2224        Msaa::Off,
2225        request.camera_transform,
2226        Tonemapping::None,
2227        DepthPrepass,
2228        NormalPrepass,
2229        RenderCamera,
2230        // Add ImageCopier to trigger RGBA extraction
2231        ImageCopier {
2232            src_image: render_target_handle,
2233            enabled: false, // Will enable when ready to capture
2234        },
2235    ));
2236
2237    // Ambient light (global resource in Bevy 0.18).
2238    let lighting = &request.config.lighting;
2239    commands.insert_resource(GlobalAmbientLight {
2240        color: Color::WHITE,
2241        brightness: lighting.ambient_brightness,
2242        ..default()
2243    });
2244
2245    // Key light
2246    if lighting.key_light_intensity > 0.0 {
2247        commands.spawn((
2248            PointLight {
2249                intensity: lighting.key_light_intensity,
2250                shadows_enabled: lighting.shadows_enabled,
2251                ..default()
2252            },
2253            Transform::from_xyz(
2254                lighting.key_light_position[0],
2255                lighting.key_light_position[1],
2256                lighting.key_light_position[2],
2257            ),
2258        ));
2259    }
2260
2261    // Fill light
2262    if lighting.fill_light_intensity > 0.0 {
2263        commands.spawn((
2264            PointLight {
2265                intensity: lighting.fill_light_intensity,
2266                shadows_enabled: lighting.shadows_enabled,
2267                ..default()
2268            },
2269            Transform::from_xyz(
2270                lighting.fill_light_position[0],
2271                lighting.fill_light_position[1],
2272                lighting.fill_light_position[2],
2273            ),
2274        ));
2275    }
2276
2277    // Load the scene
2278    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
2279    commands.insert_resource(LoadedScene(scene_handle.clone()));
2280
2281    // Load the texture
2282    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
2283    commands.insert_resource(LoadedTexture(texture_handle.clone()));
2284
2285    // Create material with texture
2286    let _material = _materials.add(StandardMaterial {
2287        base_color_texture: Some(texture_handle),
2288        unlit: true,
2289        ..default()
2290    });
2291
2292    // Spawn the scene with rotation
2293    commands.spawn((
2294        SceneRoot(scene_handle),
2295        Transform::from_rotation(request.object_rotation.to_quat()),
2296        RenderedObject,
2297    ));
2298
2299    if let Some(t0) = t0 {
2300        eprintln!(
2301            "[render_trace][startup] setup_headless_scene ms={:.3}",
2302            t0.elapsed().as_secs_f64() * 1000.0
2303        );
2304    }
2305}
2306
2307/// Request capture for headless rendering (enable ImageCopier)
2308fn request_headless_capture(
2309    mut state: ResMut<RenderState>,
2310    mut depth_request: ResMut<DepthCaptureRequest>,
2311    mut query: Query<&mut ImageCopier>,
2312    batch: Option<Res<HeadlessBatchSequence>>,
2313) {
2314    let trace = render_trace_enabled();
2315    let t0 = trace.then(std::time::Instant::now);
2316
2317    if !state.capture_ready || state.screenshot_requested {
2318        if let Some(t0) = t0 {
2319            eprintln!(
2320                "[render_trace][sys] request_headless_capture skipped(gate) ms={:.3}",
2321                t0.elapsed().as_secs_f64() * 1000.0
2322            );
2323        }
2324        return;
2325    }
2326
2327    if batch
2328        .as_ref()
2329        .is_some_and(|batch| batch.warmup_frames_remaining > 0)
2330    {
2331        if let Some(t0) = t0 {
2332            eprintln!(
2333                "[render_trace][sys] request_headless_capture skipped(warmup) ms={:.3}",
2334                t0.elapsed().as_secs_f64() * 1000.0
2335            );
2336        }
2337        return;
2338    }
2339
2340    // Enable the ImageCopier to trigger RGBA extraction
2341    for mut copier in query.iter_mut() {
2342        copier.enabled = true;
2343    }
2344
2345    // Request depth capture
2346    depth_request.requested = true;
2347
2348    state.screenshot_requested = true;
2349
2350    if let Some(t0) = t0 {
2351        eprintln!(
2352            "[render_trace][sys] request_headless_capture requested ms={:.3}",
2353            t0.elapsed().as_secs_f64() * 1000.0
2354        );
2355    }
2356}
2357
2358/// Check if headless capture has completed
2359fn check_headless_capture_ready(
2360    mut state: ResMut<RenderState>,
2361    shared_rgba: Res<SharedRgbaBuffer>,
2362    shared_depth: Res<SharedDepthBuffer>,
2363    request: Res<RenderRequest>,
2364    mut query: Query<&mut ImageCopier>,
2365) {
2366    let trace = render_trace_enabled();
2367    let t0 = trace.then(std::time::Instant::now);
2368
2369    if !state.screenshot_requested || state.captured {
2370        if let Some(t0) = t0 {
2371            eprintln!(
2372                "[render_trace][sys] check_headless_capture_ready skipped(gate) ms={:.3}",
2373                t0.elapsed().as_secs_f64() * 1000.0
2374            );
2375        }
2376        return;
2377    }
2378
2379    state.frame_count += 1;
2380    state.capture_retries += 1;
2381    // Bounded fallback so a genuinely-uniform scene (or persistent invalid
2382    // readback) still terminates instead of hanging to the watchdog.
2383    // Generous bound: slow paths (e.g. RenderSession's retained-render-world
2384    // settle after a scene swap) can take ~150 frames to produce a stable frame,
2385    // so force-accepting at 150 would grab a partial frame and break parity. Only
2386    // force as a true last resort to avoid hanging the watchdog.
2387    let force_accept = state.capture_retries > 150;
2388
2389    // RGBA: accept the first non-blank frame. Uniform clear-color frames are
2390    // pre-geometry reads from the nondeterministic one-shot capture — reject and
2391    // retry. The copier stays enabled until BOTH RGBA and depth are valid so a
2392    // late/odd depth frame can still be captured.
2393    if state.rgba_data.is_none() {
2394        let captured_rgba = shared_rgba.0.lock().ok().and_then(|g| g.clone());
2395        if let Some((rgba_data, width, height)) = captured_rgba {
2396            let non_blank = rgba_data
2397                .chunks_exact(4)
2398                .any(|px| px[0..3] != rgba_data[0..3]);
2399            // Stable == identical to the previous readback (render has settled).
2400            let stable = state.prev_rgba.as_deref() == Some(rgba_data.as_slice());
2401            if (non_blank && stable) || force_accept {
2402                state.image_width = width;
2403                state.image_height = height;
2404                state.rgba_data = Some(rgba_data);
2405                state.prev_rgba = None;
2406            } else {
2407                // Not settled yet: remember this frame and re-read fresh next one.
2408                state.prev_rgba = Some(rgba_data);
2409                if let Ok(mut g) = shared_rgba.0.lock() {
2410                    *g = None;
2411                }
2412            }
2413        }
2414    }
2415
2416    // Depth: accept the first readback that contains real foreground (the depth
2417    // readback can also miss the geometry, leaving an all-far-plane buffer).
2418    if state.depth_data.is_none() {
2419        let captured_depth = shared_depth.0.lock().ok().and_then(|g| g.clone());
2420        if let Some((depth_data, _w, _h)) = captured_depth {
2421            let far = request.config.far_plane as f64;
2422            // Require a real object-surface depth, not just any non-far value:
2423            // near-plane garbage (~0.01) would otherwise be accepted but is not a
2424            // valid surface, and downstream depth-validity checks require > 0.1m.
2425            let has_foreground = depth_data.iter().any(|&d| d > 0.1 && d < far * 0.999);
2426            // Settled == identical to the previous depth readback.
2427            let stable = state.prev_depth.as_deref() == Some(depth_data.as_slice());
2428            if has_foreground && stable {
2429                state.depth_data = Some(depth_data);
2430                state.prev_depth = None;
2431            } else {
2432                state.prev_depth = Some(depth_data);
2433                if let Ok(mut g) = shared_depth.0.lock() {
2434                    *g = None; // discard; retry next frame
2435                }
2436            }
2437        }
2438    }
2439
2440    // Last-resort fallback so we never hang the watchdog: once RGBA is in hand
2441    // and we've retried a lot, fill a uniform camera-distance depth placeholder.
2442    //
2443    // This is NOT a valid render — it is a flat depth plane that extracts
2444    // features and passes buffer-equality parity tests yet unprojects every
2445    // pixel onto one sheet, silently cratering downstream spatial matching
2446    // (this exact fallback masked the Bevy 0.18 depth regression in #92). It
2447    // must therefore be LOUD: a future depth-readback regression has to surface
2448    // in logs/CI instead of looking like a successful render. `tests/
2449    // spatial_parity.rs` is the geometric guard for the same failure.
2450    if state.rgba_data.is_some() && state.depth_data.is_none() && force_accept {
2451        let camera_dist = request.camera_transform.translation.length() as f64;
2452        let pixel_count = (state.image_width * state.image_height) as usize;
2453        eprintln!(
2454            "[bevy-sensor][WARN] depth readback produced no valid frame after {} retries; \
2455             falling back to a UNIFORM {:.4} m camera-distance plane. This is a degraded \
2456             render (flat depth -> no real 3D geometry) and indicates a depth-readback \
2457             regression. See render.rs DepthReadbackNode and tests/spatial_parity.rs.",
2458            state.capture_retries, camera_dist
2459        );
2460        state.depth_data = Some(vec![camera_dist; pixel_count]);
2461    }
2462
2463    let rgba_ready = state.rgba_data.is_some();
2464    let depth_ready = state.depth_data.is_some();
2465
2466    // Both valid → capture complete; stop the copier.
2467    if rgba_ready && depth_ready {
2468        state.captured = true;
2469        for mut copier in query.iter_mut() {
2470            copier.enabled = false;
2471        }
2472    }
2473
2474    if let Some(t0) = t0 {
2475        eprintln!(
2476            "[render_trace][sys] check_headless_capture_ready rgba_ready={} depth_ready={} captured={} frame_count={} ms={:.3}",
2477            rgba_ready,
2478            depth_ready,
2479            state.captured,
2480            state.frame_count,
2481            t0.elapsed().as_secs_f64() * 1000.0
2482        );
2483    }
2484}
2485
2486/// Extract results and exit for headless rendering
2487fn extract_and_exit_headless(
2488    mut state: ResMut<RenderState>,
2489    request: Res<RenderRequest>,
2490    shared_output: Res<SharedOutput>,
2491    mut app_exit: MessageWriter<bevy::app::AppExit>,
2492    batch: Option<Res<HeadlessBatchSequence>>,
2493) {
2494    if batch.is_some() {
2495        return;
2496    }
2497
2498    if state.exit_requested {
2499        return;
2500    }
2501
2502    if !state.captured {
2503        return;
2504    }
2505
2506    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2507        let width = state.image_width;
2508        let height = state.image_height;
2509
2510        // Compute intrinsics from the same TBP zoom formula as the camera projection.
2511        let intrinsics = request.config.intrinsics_for_size(width, height);
2512
2513        let output = RenderOutput {
2514            rgba: rgba.clone(),
2515            depth: depth.clone(),
2516            width,
2517            height,
2518            intrinsics,
2519            camera_transform: request.camera_transform,
2520            object_rotation: request.object_rotation.clone(),
2521            target_point: Vec3::ZERO,
2522            targeting_policy: TargetingPolicy::Origin,
2523        };
2524
2525        if let Ok(mut guard) = shared_output.0.lock() {
2526            *guard = Some(output);
2527            drop(guard);
2528            std::thread::sleep(std::time::Duration::from_millis(200));
2529        }
2530
2531        // Send AppExit event (headless apps use this instead of closing windows)
2532        app_exit.write(bevy::app::AppExit::Success);
2533        state.exit_requested = true;
2534    }
2535}
2536
2537/// Advance the short post-camera-move warmup for homogeneous batch rendering.
2538fn tick_headless_batch_warmup(batch: Option<ResMut<HeadlessBatchSequence>>) {
2539    let Some(mut batch) = batch else {
2540        return;
2541    };
2542
2543    if batch.warmup_frames_remaining > 0 {
2544        batch.warmup_frames_remaining -= 1;
2545    }
2546}
2547
2548/// Extract one batch output and continue rendering the next viewpoint in the same app.
2549fn extract_and_continue_headless_batch(
2550    mut state: ResMut<RenderState>,
2551    request: Res<RenderRequest>,
2552    buffers: (Res<SharedRgbaBuffer>, Res<SharedDepthBuffer>),
2553    batch: Option<ResMut<HeadlessBatchSequence>>,
2554    mut camera_query: Query<&mut Transform, With<RenderCamera>>,
2555    mut depth_request: ResMut<DepthCaptureRequest>,
2556    mut image_copiers: Query<&mut ImageCopier>,
2557) {
2558    let trace = render_trace_enabled();
2559    let t0 = trace.then(std::time::Instant::now);
2560
2561    let (shared_rgba, shared_depth) = buffers;
2562    let Some(mut batch) = batch else {
2563        if let Some(t0) = t0 {
2564            eprintln!(
2565                "[render_trace][sys] extract_and_continue_headless_batch skipped(no_batch) ms={:.3}",
2566                t0.elapsed().as_secs_f64() * 1000.0
2567            );
2568        }
2569        return;
2570    };
2571
2572    if state.exit_requested || !state.captured || batch.done {
2573        if let Some(t0) = t0 {
2574            eprintln!(
2575                "[render_trace][sys] extract_and_continue_headless_batch skipped(gate) captured={} done={} ms={:.3}",
2576                state.captured,
2577                batch.done,
2578                t0.elapsed().as_secs_f64() * 1000.0
2579            );
2580        }
2581        return;
2582    }
2583
2584    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2585        let width = state.image_width;
2586        let height = state.image_height;
2587
2588        let intrinsics = request.config.intrinsics_for_size(width, height);
2589
2590        let output = RenderOutput {
2591            rgba: rgba.clone(),
2592            depth: depth.clone(),
2593            width,
2594            height,
2595            intrinsics,
2596            camera_transform: batch
2597                .current_viewpoint()
2598                .unwrap_or(request.camera_transform),
2599            object_rotation: request.object_rotation.clone(),
2600            target_point: Vec3::ZERO,
2601            targeting_policy: TargetingPolicy::Origin,
2602        };
2603        batch.outputs.push(output);
2604
2605        let next_index = batch.current_index + 1;
2606        if next_index >= batch.viewpoints.len() {
2607            batch.done = true;
2608            state.exit_requested = true;
2609            return;
2610        }
2611
2612        batch.current_index = next_index;
2613        batch.warmup_frames_remaining = BATCH_WARMUP_FRAMES;
2614
2615        if let Some(next_viewpoint) = batch.current_viewpoint() {
2616            for mut camera_transform in camera_query.iter_mut() {
2617                *camera_transform = next_viewpoint;
2618            }
2619        }
2620
2621        if let Ok(mut guard) = shared_rgba.0.lock() {
2622            *guard = None;
2623        }
2624        if let Ok(mut guard) = shared_depth.0.lock() {
2625            *guard = None;
2626        }
2627
2628        for mut copier in image_copiers.iter_mut() {
2629            copier.enabled = false;
2630        }
2631
2632        depth_request.requested = false;
2633        state.frame_count = 0;
2634        state.capture_ready = true;
2635        state.screenshot_requested = false;
2636        state.captured = false;
2637        state.rgba_data = None;
2638        state.depth_data = None;
2639        state.image_width = 0;
2640        state.image_height = 0;
2641        // Reset the per-capture settle/retry tracking too, otherwise it
2642        // accumulates across viewpoints and force-accepts an unsettled frame for
2643        // later viewpoints (breaking parity).
2644        state.capture_retries = 0;
2645        state.prev_rgba = None;
2646        state.prev_depth = None;
2647
2648        if let Some(t0) = t0 {
2649            eprintln!(
2650                "[render_trace][sys] extract_and_continue_headless_batch extracted vp={} next={} done={} ms={:.3}",
2651                batch.current_index.saturating_sub(1),
2652                batch.current_index,
2653                batch.done,
2654                t0.elapsed().as_secs_f64() * 1000.0
2655            );
2656        }
2657    } else if let Some(t0) = t0 {
2658        eprintln!(
2659            "[render_trace][sys] extract_and_continue_headless_batch no_data ms={:.3}",
2660            t0.elapsed().as_secs_f64() * 1000.0
2661        );
2662    }
2663}
2664
2665// ============================================================================
2666// Persistent batch session (RenderSession)
2667//
2668// Amortizes wgpu device creation, Bevy app setup, and first-draw pipeline state
2669// object (PSO) compilation across multiple `render()` calls. Profile data (see
2670// issues #54 and #55) showed that on a 60-episode parity-gate, ~2.3s per episode
2671// lives in first-draw DX12 PSO compilation, totalling ~131s of 151s wall-clock.
2672// Keeping the `App` (and thus the `RenderDevice` and its PSO cache) alive across
2673// episodes recovers the bulk of that cost.
2674// ============================================================================
2675
2676/// Marker for the per-group scene entity so we can despawn it cleanly when the
2677/// next `RenderSession::render()` call swaps in a different object or rotation.
2678#[derive(Component)]
2679struct SessionScene;
2680
2681/// Session-persistent setup: render target image, camera (with prepass +
2682/// `ImageCopier`), ambient light, key + fill lights. Everything here lives for
2683/// the full lifetime of the `RenderSession`; per-group work (mesh/texture load,
2684/// scene entity spawn) happens outside Startup in `RenderSession::render()`.
2685fn setup_session_persistent_scene(
2686    mut commands: Commands,
2687    mut images: ResMut<Assets<Image>>,
2688    config: Res<SessionRenderConfig>,
2689) {
2690    let width = config.0.width;
2691    let height = config.0.height;
2692
2693    let size = Extent3d {
2694        width,
2695        height,
2696        depth_or_array_layers: 1,
2697    };
2698
2699    let mut render_target_image = Image::new_fill(
2700        size,
2701        TextureDimension::D2,
2702        &[0, 0, 0, 255],
2703        TextureFormat::Rgba8UnormSrgb,
2704        RenderAssetUsages::default(),
2705    );
2706    render_target_image.texture_descriptor.usage =
2707        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
2708
2709    let render_target_handle = images.add(render_target_image);
2710    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
2711
2712    commands.spawn((
2713        Camera3d::default(),
2714        Camera::default(),
2715        Hdr,
2716        RenderTarget::Image(render_target_handle.clone().into()),
2717        render_projection(&config.0),
2718        Msaa::Off,
2719        Transform::default(),
2720        Tonemapping::None,
2721        DepthPrepass,
2722        NormalPrepass,
2723        RenderCamera,
2724        ImageCopier {
2725            src_image: render_target_handle,
2726            enabled: false,
2727        },
2728    ));
2729
2730    let lighting = &config.0.lighting;
2731    commands.insert_resource(GlobalAmbientLight {
2732        color: Color::WHITE,
2733        brightness: lighting.ambient_brightness,
2734        ..default()
2735    });
2736
2737    if lighting.key_light_intensity > 0.0 {
2738        commands.spawn((
2739            PointLight {
2740                intensity: lighting.key_light_intensity,
2741                shadows_enabled: lighting.shadows_enabled,
2742                ..default()
2743            },
2744            Transform::from_xyz(
2745                lighting.key_light_position[0],
2746                lighting.key_light_position[1],
2747                lighting.key_light_position[2],
2748            ),
2749        ));
2750    }
2751
2752    if lighting.fill_light_intensity > 0.0 {
2753        commands.spawn((
2754            PointLight {
2755                intensity: lighting.fill_light_intensity,
2756                shadows_enabled: lighting.shadows_enabled,
2757                ..default()
2758            },
2759            Transform::from_xyz(
2760                lighting.fill_light_position[0],
2761                lighting.fill_light_position[1],
2762                lighting.fill_light_position[2],
2763            ),
2764        ));
2765    }
2766}
2767
2768/// Resource carrying the `RenderConfig` that was fixed at session construction.
2769/// Used by `setup_session_persistent_scene` to size the render target.
2770#[derive(Resource)]
2771struct SessionRenderConfig(RenderConfig);
2772
2773/// Persistent batch render session. Keeps a Bevy `App` (and its `RenderDevice`
2774/// plus PSO cache) alive across multiple `render()` calls, amortizing per-episode
2775/// cold-init cost.
2776///
2777/// # Thread affinity
2778///
2779/// `RenderSession` must be created, used, and dropped on the same thread. It
2780/// holds a `bevy::App` which owns GPU resources that are not safe to move
2781/// across threads. The `!Send + !Sync` marker is enforced via
2782/// `PhantomData<*const ()>`.
2783///
2784/// # Config invariant
2785///
2786/// The `RenderConfig` (resolution, lighting, near/far, fov) is fixed at
2787/// `new()`. All `render()` calls must use requests whose `render_config`
2788/// matches; heterogeneous configs are rejected.
2789///
2790/// # Phase 1 limitation
2791///
2792/// Each `render()` call must contain homogeneous requests (same `object_dir`
2793/// and `object_rotation`). Heterogeneous calls return
2794/// `BatchRenderError::InvalidConfig`. Hold a single `RenderSession` and call
2795/// `render()` once per episode to amortize setup across episodes.
2796pub struct RenderSession {
2797    app: App,
2798    render_config: RenderConfig,
2799    shared_rgba: SharedRgbaBuffer,
2800    shared_depth: SharedDepthBuffer,
2801    _not_send_sync: std::marker::PhantomData<*const ()>,
2802}
2803
2804impl RenderSession {
2805    /// Build the App, run plugin `finish()`/`cleanup()`, and perform one warmup
2806    /// `update()` so Startup systems run and the wgpu device + adapter are
2807    /// initialized. The first `render()` call still pays PSO compilation for
2808    /// the specific mesh/material combination; subsequent calls reuse the cache.
2809    pub fn new(render_config: &crate::RenderConfig) -> Result<Self, crate::RenderError> {
2810        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
2811        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
2812
2813        let mut app = App::new();
2814        app.add_plugins(
2815            DefaultPlugins
2816                .set(bevy::asset::AssetPlugin {
2817                    // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
2818                    // default (UnapprovedPathMode::Forbid → load() silently returns a
2819                    // default handle). YCB meshes load from absolute paths, so allow them.
2820                    unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
2821                    ..default()
2822                })
2823                .set(WindowPlugin {
2824                    primary_window: None,
2825                    exit_condition: ExitCondition::DontExit,
2826                    ..default()
2827                })
2828                .disable::<bevy::winit::WinitPlugin>()
2829                .disable::<LogPlugin>()
2830                .disable::<TerminalCtrlCHandlerPlugin>(),
2831        )
2832        .add_plugins(ObjPlugin)
2833        // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
2834        // Scene spawning panics unless those component types are registered. The
2835        // minimal headless plugin set doesn't register them, so do it explicitly.
2836        .register_type::<Mesh3d>()
2837        .register_type::<MeshMaterial3d<StandardMaterial>>()
2838        .register_type::<bevy::prelude::Transform>()
2839        .register_type::<bevy::prelude::GlobalTransform>()
2840        .register_type::<bevy::transform::components::TransformTreeChanged>()
2841        .register_type::<bevy::prelude::Visibility>()
2842        .register_type::<bevy::prelude::InheritedVisibility>()
2843        .register_type::<bevy::prelude::ViewVisibility>()
2844        .add_plugins(ImageCopyPlugin {
2845            shared_rgba: shared_rgba.clone(),
2846        })
2847        .add_plugins(DepthReadbackPlugin {
2848            shared_depth: shared_depth.clone(),
2849            near: render_config.near_plane,
2850            far: render_config.far_plane,
2851        })
2852        .insert_resource(SessionRenderConfig(render_config.clone()))
2853        .insert_resource(shared_rgba.clone())
2854        .init_resource::<RenderState>()
2855        .add_systems(Startup, setup_session_persistent_scene)
2856        .add_systems(
2857            Update,
2858            (
2859                check_assets_loaded,
2860                apply_materials,
2861                tick_headless_batch_warmup,
2862                request_headless_capture,
2863                check_headless_capture_ready,
2864                extract_and_continue_headless_batch,
2865            )
2866                .chain()
2867                // Gate the capture chain on `RenderRequest` existing. `new()`
2868                // runs a warmup `app.update()` to execute Startup (which spawns
2869                // the camera/lights/render target) before the first `render()`
2870                // call, but does not yet insert `RenderRequest`. Several systems
2871                // in this chain take `Res<RenderRequest>` (not `Option`) and
2872                // would panic on SystemState init if the resource were absent.
2873                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
2874        );
2875
2876        app.finish();
2877        app.cleanup();
2878
2879        // One warmup update runs Startup systems (render target, camera, lights)
2880        // so they exist before the first `render()` call seeds the camera
2881        // transform. The Update chain is gated by `RenderRequest` existence and
2882        // is a no-op this tick. PSO compilation for specific mesh/material
2883        // combinations still happens lazily on the first real render.
2884        app.update();
2885
2886        Ok(Self {
2887            app,
2888            render_config: render_config.clone(),
2889            shared_rgba,
2890            shared_depth,
2891            _not_send_sync: std::marker::PhantomData,
2892        })
2893    }
2894
2895    /// Render a homogeneous batch of viewpoints (same object + rotation + config).
2896    /// Returns outputs in request order.
2897    ///
2898    /// On `BatchRenderError::DeviceLost`, the returned error signals that the
2899    /// wgpu device was lost mid-render. This call produced no output; any
2900    /// outputs from earlier `render()` calls on this session are still valid.
2901    /// Recovery: drop this `RenderSession` and construct a new one.
2902    pub fn render(
2903        &mut self,
2904        requests: &[crate::BatchRenderRequest],
2905    ) -> Result<Vec<crate::BatchRenderOutput>, crate::BatchRenderError> {
2906        use crate::{BatchRenderError, BatchRenderOutput};
2907
2908        if requests.is_empty() {
2909            return Ok(Vec::new());
2910        }
2911
2912        // Enforce homogeneity and config invariance.
2913        let first = &requests[0];
2914        if first.render_config != self.render_config {
2915            return Err(BatchRenderError::InvalidConfig(
2916                "RenderSession render_config mismatch: session was constructed with a different \
2917                 RenderConfig than the first request carries. Session config cannot change after \
2918                 `new()`; construct a new session if you need a different resolution/camera."
2919                    .to_string(),
2920            ));
2921        }
2922        for r in &requests[1..] {
2923            if r.object_dir != first.object_dir
2924                || r.object_rotation != first.object_rotation
2925                || r.render_config != first.render_config
2926            {
2927                return Err(BatchRenderError::InvalidConfig(
2928                    "Phase 1 RenderSession::render requires homogeneous requests \
2929                     (same object_dir, object_rotation, and render_config across the batch). \
2930                     Call render() once per group instead."
2931                        .to_string(),
2932                ));
2933            }
2934        }
2935
2936        // Canonicalize paths and validate mesh/texture presence. This matches
2937        // `render_headless_sequence`'s preconditions so the error surface stays
2938        // consistent.
2939        let object_dir = std::fs::canonicalize(&first.object_dir).map_err(|e| {
2940            BatchRenderError::InvalidConfig(format!(
2941                "Cannot canonicalize object directory {}: {}",
2942                first.object_dir.display(),
2943                e
2944            ))
2945        })?;
2946        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
2947        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
2948        if !mesh_path.exists() {
2949            return Err(BatchRenderError::InvalidConfig(format!(
2950                "Mesh not found: {}",
2951                mesh_path.display()
2952            )));
2953        }
2954        if !texture_path.exists() {
2955            return Err(BatchRenderError::InvalidConfig(format!(
2956                "Texture not found: {}",
2957                texture_path.display()
2958            )));
2959        }
2960
2961        let viewpoints: Vec<Transform> = requests.iter().map(|r| r.viewpoint).collect();
2962
2963        // --- per-group scene swap (direct world manipulation) ---
2964        {
2965            let world = self.app.world_mut();
2966
2967            // Despawn any SessionScene entity from the previous group.
2968            let stale: Vec<Entity> = world
2969                .query_filtered::<Entity, With<SessionScene>>()
2970                .iter(world)
2971                .collect();
2972            for entity in stale {
2973                world.entity_mut(entity).despawn();
2974            }
2975
2976            // Clear shared RGBA/depth buffers so a stale payload can't leak
2977            // into the first viewpoint of this call.
2978            if let Ok(mut guard) = self.shared_rgba.0.lock() {
2979                *guard = None;
2980            }
2981            if let Ok(mut guard) = self.shared_depth.0.lock() {
2982                *guard = None;
2983            }
2984
2985            // Reset RenderState (scene_loaded, texture_loaded, capture_ready,
2986            // frame_count, materials_applied, etc.). Default() gives all false/0.
2987            *world.resource_mut::<RenderState>() = RenderState::default();
2988
2989            // Update RenderRequest so the existing capture systems see the new
2990            // object paths, rotation, and camera transform (seeded from first vp).
2991            let new_request = RenderRequest {
2992                mesh_path: fs_path_to_asset_string(&mesh_path),
2993                texture_path: fs_path_to_asset_string(&texture_path),
2994                camera_transform: viewpoints[0],
2995                object_rotation: first.object_rotation.clone(),
2996                config: self.render_config.clone(),
2997            };
2998            world.insert_resource(new_request);
2999
3000            // Kick off asset loads and install the handles under the names the
3001            // existing `check_assets_loaded` system expects.
3002            let asset_server = world.resource::<AssetServer>().clone();
3003            let scene_handle: Handle<Scene> =
3004                asset_server.load(fs_path_to_asset_string(&mesh_path));
3005            let texture_handle: Handle<Image> =
3006                asset_server.load(fs_path_to_asset_string(&texture_path));
3007            world.insert_resource(LoadedScene(scene_handle.clone()));
3008            world.insert_resource(LoadedTexture(texture_handle));
3009
3010            // Spawn the new scene entity tagged so we can find + despawn it next
3011            // render() call.
3012            world.spawn((
3013                SceneRoot(scene_handle),
3014                Transform::from_rotation(first.object_rotation.to_quat()),
3015                RenderedObject,
3016                SessionScene,
3017            ));
3018
3019            // Seed the camera transform to the first viewpoint now so the first
3020            // capture lines up; subsequent viewpoints are advanced by
3021            // `extract_and_continue_headless_batch`.
3022            let camera_entity = world
3023                .query_filtered::<Entity, With<RenderCamera>>()
3024                .iter(world)
3025                .next();
3026            if let Some(cam) = camera_entity {
3027                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
3028                    *transform = viewpoints[0];
3029                }
3030            }
3031
3032            // Install the viewpoint sequence for this render() call. The robust
3033            // settled-frame capture (reject blank/partial readbacks, retry until
3034            // two consecutive readbacks match) absorbs the despawn/respawn
3035            // render-world settle, so a separate discarded warmup pass is not
3036            // needed and the per-object cost stays low.
3037            world.insert_resource(HeadlessBatchSequence::new(viewpoints.clone()));
3038        }
3039
3040        // --- drive the real capture loop ---
3041        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3042        let start = std::time::Instant::now();
3043        loop {
3044            if start.elapsed() > timeout {
3045                return Err(BatchRenderError::TotalFailure(format!(
3046                    "RenderSession::render timed out after {}s",
3047                    RENDER_TIMEOUT_SECS
3048                )));
3049            }
3050
3051            self.app.update();
3052
3053            if self.app.world().resource::<HeadlessBatchSequence>().done {
3054                break;
3055            }
3056        }
3057
3058        // Collect outputs and zip with requests to produce BatchRenderOutput in
3059        // request order.
3060        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
3061        if sequence.outputs.len() != requests.len() {
3062            return Err(BatchRenderError::TotalFailure(format!(
3063                "RenderSession produced {} outputs for {} requests",
3064                sequence.outputs.len(),
3065                requests.len()
3066            )));
3067        }
3068        let outputs = std::mem::take(&mut sequence.outputs);
3069
3070        Ok(requests
3071            .iter()
3072            .cloned()
3073            .zip(outputs)
3074            .map(|(req, out)| BatchRenderOutput::from_render_output(req, out))
3075            .collect())
3076    }
3077}
3078
3079// ============================================================================
3080// Per-step persistent renderer (PersistentRenderer)
3081//
3082// `RenderSession` reuses the App across calls but rebuilds the scene on every
3083// `render()` (despawn SceneRoot, re-issue asset_server.load, respawn). That's
3084// fine for the parity-gate path (one scene per episode of N viewpoints) but
3085// wasteful for surface-policy feedback loops where N=1 viewpoint per call and
3086// the object stays loaded for the whole episode.
3087//
3088// `PersistentRenderer` commits to one `object_dir` + `RenderConfig` at
3089// construction. `new()` loads mesh + texture + spawns the scene root + drives
3090// one warmup render (output discarded) so PSO compilation and material setup
3091// are paid up front. `render(camera, rotation)` then only mutates the camera
3092// `Transform` and (if changed) the scene root rotation, drives the capture
3093// chain for one frame, and returns. See issue #65.
3094// ============================================================================
3095
3096/// Marker for the `PersistentRenderer`'s scene root entity. We keep the
3097/// entity alive for the whole renderer lifetime and just mutate its
3098/// `Transform` when the caller-supplied object rotation changes.
3099#[derive(Component)]
3100struct PersistentScene;
3101
3102/// Persistent per-step renderer. Loads the scene once at `new()` and renders
3103/// one frame per `render()` call by mutating the camera transform and scene
3104/// root rotation in-place. Built for surface-policy feedback loops where the
3105/// object stays fixed for the duration of an episode and the camera moves
3106/// every step. See issue #65.
3107///
3108/// # Thread affinity
3109///
3110/// `PersistentRenderer` must be created, used, and dropped on the same thread.
3111/// Holds a `bevy::App` that owns GPU resources not safe to move across
3112/// threads; `!Send + !Sync` is enforced via `PhantomData<*const ()>`.
3113///
3114/// # Object + config invariants
3115///
3116/// `object_dir` and `RenderConfig` are fixed at `new()`. To render a different
3117/// object or change resolution/lighting, drop and rebuild. Rotation may change
3118/// freely between `render()` calls.
3119pub struct PersistentRenderer {
3120    app: App,
3121    object_dir: PathBuf,
3122    render_config: RenderConfig,
3123    shared_rgba: SharedRgbaBuffer,
3124    shared_depth: SharedDepthBuffer,
3125    _not_send_sync: std::marker::PhantomData<*const ()>,
3126}
3127
3128impl PersistentRenderer {
3129    /// Build the App, load the scene + texture, spawn the scene root, and drive
3130    /// one warmup render whose output is discarded. After `new()` returns, the
3131    /// first user-facing `render()` call benefits from a warm PSO cache and
3132    /// applied materials.
3133    pub fn new(
3134        object_dir: &Path,
3135        render_config: &RenderConfig,
3136    ) -> Result<Self, crate::RenderError> {
3137        let object_dir =
3138            std::fs::canonicalize(object_dir).map_err(|e| crate::RenderError::FileNotFound {
3139                path: object_dir.display().to_string(),
3140                reason: e.to_string(),
3141            })?;
3142        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3143        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3144        if !mesh_path.exists() {
3145            return Err(crate::RenderError::MeshNotFound(fs_path_to_asset_string(
3146                &mesh_path,
3147            )));
3148        }
3149        if !texture_path.exists() {
3150            return Err(crate::RenderError::TextureNotFound(
3151                fs_path_to_asset_string(&texture_path),
3152            ));
3153        }
3154
3155        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3156        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3157
3158        let mut app = App::new();
3159        app.add_plugins(
3160            DefaultPlugins
3161                .set(bevy::asset::AssetPlugin {
3162                    // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
3163                    // default (UnapprovedPathMode::Forbid → load() silently returns a
3164                    // default handle). YCB meshes load from absolute paths, so allow them.
3165                    unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
3166                    ..default()
3167                })
3168                .set(WindowPlugin {
3169                    primary_window: None,
3170                    exit_condition: ExitCondition::DontExit,
3171                    ..default()
3172                })
3173                .disable::<bevy::winit::WinitPlugin>()
3174                .disable::<LogPlugin>()
3175                .disable::<TerminalCtrlCHandlerPlugin>(),
3176        )
3177        .add_plugins(ObjPlugin)
3178        // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
3179        // Scene spawning panics unless those component types are registered. The
3180        // minimal headless plugin set doesn't register them, so do it explicitly.
3181        .register_type::<Mesh3d>()
3182        .register_type::<MeshMaterial3d<StandardMaterial>>()
3183        .register_type::<bevy::prelude::Transform>()
3184        .register_type::<bevy::prelude::GlobalTransform>()
3185        .register_type::<bevy::transform::components::TransformTreeChanged>()
3186        .register_type::<bevy::prelude::Visibility>()
3187        .register_type::<bevy::prelude::InheritedVisibility>()
3188        .register_type::<bevy::prelude::ViewVisibility>()
3189        .add_plugins(ImageCopyPlugin {
3190            shared_rgba: shared_rgba.clone(),
3191        })
3192        .add_plugins(DepthReadbackPlugin {
3193            shared_depth: shared_depth.clone(),
3194            near: render_config.near_plane,
3195            far: render_config.far_plane,
3196        })
3197        .insert_resource(SessionRenderConfig(render_config.clone()))
3198        .insert_resource(shared_rgba.clone())
3199        .init_resource::<RenderState>()
3200        .add_systems(Startup, setup_session_persistent_scene)
3201        .add_systems(
3202            Update,
3203            (
3204                check_assets_loaded,
3205                apply_materials,
3206                tick_headless_batch_warmup,
3207                request_headless_capture,
3208                check_headless_capture_ready,
3209                extract_and_continue_headless_batch,
3210            )
3211                .chain()
3212                // Same gate as RenderSession: capture chain only runs once
3213                // RenderRequest is installed. Startup runs first via the
3214                // warmup `app.update()` below.
3215                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
3216        );
3217
3218        app.finish();
3219        app.cleanup();
3220        // Warmup tick #1: Startup runs (camera, lights, render target spawn).
3221        app.update();
3222
3223        // Install scene + warmup render request. The warmup output is discarded
3224        // — its purpose is to pay PSO compilation and material application
3225        // upfront so the first user-facing render() is fast.
3226        let initial_request = RenderRequest {
3227            mesh_path: fs_path_to_asset_string(&mesh_path),
3228            texture_path: fs_path_to_asset_string(&texture_path),
3229            camera_transform: Transform::default(),
3230            object_rotation: ObjectRotation::identity(),
3231            config: render_config.clone(),
3232        };
3233
3234        {
3235            let world = app.world_mut();
3236            let asset_server = world.resource::<AssetServer>().clone();
3237            let scene_handle: Handle<Scene> =
3238                asset_server.load(fs_path_to_asset_string(&mesh_path));
3239            let texture_handle: Handle<Image> =
3240                asset_server.load(fs_path_to_asset_string(&texture_path));
3241            world.insert_resource(LoadedScene(scene_handle.clone()));
3242            world.insert_resource(LoadedTexture(texture_handle));
3243            world.insert_resource(initial_request);
3244            world.spawn((
3245                SceneRoot(scene_handle),
3246                Transform::from_rotation(ObjectRotation::identity().to_quat()),
3247                RenderedObject,
3248                PersistentScene,
3249            ));
3250            world.insert_resource(HeadlessBatchSequence::new(vec![Transform::default()]));
3251        }
3252
3253        // Drive the warmup render to completion.
3254        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3255        let start = std::time::Instant::now();
3256        loop {
3257            if start.elapsed() > timeout {
3258                return Err(crate::RenderError::RenderFailed(format!(
3259                    "PersistentRenderer::new warmup render timed out after {RENDER_TIMEOUT_SECS}s"
3260                )));
3261            }
3262            app.update();
3263            if app.world().resource::<HeadlessBatchSequence>().done {
3264                break;
3265            }
3266        }
3267        // Discard the warmup output so it doesn't leak into the first real
3268        // render() call's output buffer.
3269        app.world_mut()
3270            .resource_mut::<HeadlessBatchSequence>()
3271            .outputs
3272            .clear();
3273
3274        Ok(Self {
3275            app,
3276            object_dir,
3277            render_config: render_config.clone(),
3278            shared_rgba,
3279            shared_depth,
3280            _not_send_sync: std::marker::PhantomData,
3281        })
3282    }
3283
3284    /// Render one frame from the given camera transform and object rotation.
3285    /// Reuses the loaded scene + warm PSO cache from `new()`.
3286    pub fn render(
3287        &mut self,
3288        camera_transform: &Transform,
3289        object_rotation: &ObjectRotation,
3290    ) -> Result<RenderOutput, crate::RenderError> {
3291        let camera_transform = *camera_transform;
3292        let object_rotation_owned = object_rotation.clone();
3293
3294        {
3295            let world = self.app.world_mut();
3296
3297            // Update the persistent scene root rotation. Always-write avoids
3298            // the cost of an extra ObjectRotation comparison per call; the
3299            // mutation itself is a single Transform write.
3300            let scene_entity = world
3301                .query_filtered::<Entity, With<PersistentScene>>()
3302                .iter(world)
3303                .next();
3304            if let Some(entity) = scene_entity {
3305                if let Some(mut transform) = world.entity_mut(entity).get_mut::<Transform>() {
3306                    *transform = Transform::from_rotation(object_rotation_owned.to_quat());
3307                }
3308            }
3309
3310            // Update the camera transform.
3311            let cam_entity = world
3312                .query_filtered::<Entity, With<RenderCamera>>()
3313                .iter(world)
3314                .next();
3315            if let Some(cam) = cam_entity {
3316                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
3317                    *transform = camera_transform;
3318                }
3319            }
3320
3321            // Reset per-frame state, preserving scene_loaded / texture_loaded
3322            // / materials_applied / materials_applied_frame. The asset-load
3323            // and material-apply work was paid in `new()`'s warmup; we only
3324            // need to clear the per-capture state.
3325            //
3326            // `capture_ready = true` short-circuits `apply_materials` on
3327            // every tick of the render loop (no need to re-check material
3328            // application — it stays applied for the renderer's lifetime).
3329            // It does NOT short-circuit `request_headless_capture`, which
3330            // is gated by `HeadlessBatchSequence::warmup_frames_remaining`
3331            // below. Bug fix from PR #66 review (off-by-one / blank-step-0):
3332            // without that warmup gate, request_headless_capture fires same-
3333            // tick as the transform writes, capturing the previous render's
3334            // target before the new transforms have propagated.
3335            {
3336                let mut state = world.resource_mut::<RenderState>();
3337                state.exit_requested = false;
3338                state.screenshot_requested = false;
3339                state.captured = false;
3340                state.rgba_data = None;
3341                state.depth_data = None;
3342                state.frame_count = 0;
3343                state.image_width = 0;
3344                state.image_height = 0;
3345                state.capture_ready = true;
3346                state.capture_retries = 0;
3347                state.prev_rgba = None;
3348                state.prev_depth = None;
3349            }
3350
3351            // Clear shared GPU readback buffers so a stale payload from the
3352            // previous render() can't leak into this call's output.
3353            if let Ok(mut guard) = self.shared_rgba.0.lock() {
3354                *guard = None;
3355            }
3356            if let Ok(mut guard) = self.shared_depth.0.lock() {
3357                *guard = None;
3358            }
3359
3360            // Update RenderRequest (used by extract_and_continue_headless_batch
3361            // to stamp the output with the right intrinsics + rotation).
3362            {
3363                let mut req = world.resource_mut::<RenderRequest>();
3364                req.camera_transform = camera_transform;
3365                req.object_rotation = object_rotation_owned.clone();
3366            }
3367
3368            // Install fresh single-element batch with warmup frames so
3369            // `request_headless_capture` is gated until the new transforms
3370            // have propagated through the render pipeline.
3371            let mut batch = HeadlessBatchSequence::new(vec![camera_transform]);
3372            batch.warmup_frames_remaining = PERSISTENT_WARMUP_FRAMES;
3373            world.insert_resource(batch);
3374        }
3375
3376        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3377        let start = std::time::Instant::now();
3378        loop {
3379            if start.elapsed() > timeout {
3380                return Err(crate::RenderError::RenderFailed(format!(
3381                    "PersistentRenderer::render timed out after {RENDER_TIMEOUT_SECS}s"
3382                )));
3383            }
3384            self.app.update();
3385            if self.app.world().resource::<HeadlessBatchSequence>().done {
3386                break;
3387            }
3388        }
3389
3390        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
3391        let mut outputs = std::mem::take(&mut sequence.outputs);
3392        if outputs.len() != 1 {
3393            return Err(crate::RenderError::RenderFailed(format!(
3394                "PersistentRenderer::render expected 1 output, got {}",
3395                outputs.len()
3396            )));
3397        }
3398
3399        Ok(outputs.remove(0))
3400    }
3401
3402    /// Path to the YCB object directory this renderer was bound to.
3403    pub fn object_dir(&self) -> &Path {
3404        &self.object_dir
3405    }
3406
3407    /// The `RenderConfig` this renderer was constructed with.
3408    pub fn render_config(&self) -> &RenderConfig {
3409        &self.render_config
3410    }
3411
3412    /// Explicit close. Equivalent to dropping; provided to match the API
3413    /// proposal in #65 for callers that want lifetime-explicit teardown.
3414    pub fn close(self) {
3415        // Drop runs on return.
3416    }
3417}
3418
3419/// Render directly to files (for subprocess mode).
3420///
3421/// This function saves RGBA and depth data directly to files before exiting.
3422/// Designed for subprocess rendering where the process will exit after rendering.
3423pub fn render_to_files(
3424    object_dir: &Path,
3425    camera_transform: &Transform,
3426    object_rotation: &ObjectRotation,
3427    config: &RenderConfig,
3428    rgba_path: &Path,
3429    depth_path: &Path,
3430) -> Result<(), RenderError> {
3431    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3432    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3433
3434    if !mesh_path.exists() {
3435        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
3436            &mesh_path,
3437        )));
3438    }
3439    if !texture_path.exists() {
3440        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
3441            &texture_path,
3442        )));
3443    }
3444
3445    let request = RenderRequest {
3446        mesh_path: fs_path_to_asset_string(&mesh_path),
3447        texture_path: fs_path_to_asset_string(&texture_path),
3448        camera_transform: *camera_transform,
3449        object_rotation: object_rotation.clone(),
3450        config: config.clone(),
3451    };
3452
3453    // Shared state for output
3454    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
3455    let output_poll = shared_output.clone();
3456
3457    // Clone paths for watchdog thread
3458    let rgba_path = rgba_path.to_path_buf();
3459    let depth_path = depth_path.to_path_buf();
3460
3461    // Shared buffer for RGBA data from headless render target
3462    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3463
3464    // Shared buffer for depth readback
3465    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3466
3467    // Spawn watchdog thread that saves files and exits
3468    std::thread::spawn(move || {
3469        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3470        let start = std::time::Instant::now();
3471        let poll_interval = std::time::Duration::from_millis(100);
3472
3473        loop {
3474            if let Ok(guard) = output_poll.0.lock() {
3475                if let Some(output) = guard.as_ref() {
3476                    // Save RGBA as PNG
3477                    if let Err(e) =
3478                        save_rgba_to_png(&output.rgba, output.width, output.height, &rgba_path)
3479                    {
3480                        eprintln!("Failed to save RGBA: {:?}", e);
3481                        std::process::exit(1);
3482                    }
3483
3484                    // Save depth as binary f32
3485                    if let Err(e) = save_depth_to_binary(&output.depth, &depth_path) {
3486                        eprintln!("Failed to save depth: {:?}", e);
3487                        std::process::exit(1);
3488                    }
3489
3490                    std::process::exit(0);
3491                }
3492            }
3493
3494            if start.elapsed() > timeout {
3495                eprintln!(
3496                    "Error: Render timeout after {} seconds",
3497                    RENDER_TIMEOUT_SECS
3498                );
3499                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
3500                std::process::exit(1);
3501            }
3502
3503            std::thread::sleep(poll_interval);
3504        }
3505    });
3506
3507    // Configure rendering backend for this environment.
3508    // Use OnceLock so env vars are only set once per process — repeated calls
3509    // (e.g. sequential render_to_buffer calls in a parity loop) no longer trigger
3510    // redundant wgpu backend env writes. Full GPU adapter reuse across App instances
3511    // requires a persistent renderer (tracked in issue #14).
3512    static BACKEND_INIT: OnceLock<()> = OnceLock::new();
3513    BACKEND_INIT.get_or_init(|| {
3514        let backend_config = BackendConfig::headless();
3515        backend_config.apply_env();
3516    });
3517
3518    // Run Bevy app with HEADLESS configuration
3519    build_headless_app(request, shared_output, shared_rgba, shared_depth).run();
3520
3521    // Unreachable - watchdog thread exits the process
3522    Err(RenderError::RenderFailed(
3523        "Render did not complete".to_string(),
3524    ))
3525}
3526
3527/// Save RGBA data to PNG file
3528fn save_rgba_to_png(rgba: &[u8], width: u32, height: u32, path: &Path) -> Result<(), String> {
3529    use image::{ImageBuffer, Rgba};
3530
3531    // Create parent directories if needed
3532    if let Some(parent) = path.parent() {
3533        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3534    }
3535
3536    let img: ImageBuffer<Rgba<u8>, Vec<u8>> =
3537        ImageBuffer::from_raw(width, height, rgba.to_vec())
3538            .ok_or_else(|| "Failed to create image buffer".to_string())?;
3539
3540    img.save(path).map_err(|e| e.to_string())
3541}
3542
3543/// Save depth data to binary file (f64 for TBP precision)
3544fn save_depth_to_binary(depth: &[f64], path: &Path) -> Result<(), String> {
3545    // Create parent directories if needed
3546    if let Some(parent) = path.parent() {
3547        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3548    }
3549
3550    let bytes: Vec<u8> = depth.iter().flat_map(|f| f.to_le_bytes()).collect();
3551    std::fs::write(path, &bytes).map_err(|e| e.to_string())
3552}
3553
3554#[cfg(test)]
3555mod smoke_tests {
3556    use super::{headless_scene_setup_count, reset_headless_scene_setup_count};
3557    use crate::{
3558        BatchRenderConfig, BatchRenderRequest, ObjectRotation, RenderConfig, TargetingPolicy, Vec3,
3559        ViewpointConfig,
3560    };
3561    use image::{ImageBuffer, Rgba};
3562    use tempfile::TempDir;
3563
3564    fn write_synthetic_object() -> TempDir {
3565        let temp_dir = TempDir::new().expect("create temp dir for synthetic object");
3566        let object_dir = temp_dir.path().join("synthetic_cube").join("google_16k");
3567        std::fs::create_dir_all(&object_dir).expect("create synthetic google_16k dir");
3568
3569        // A small centered cube stays visible from all default TBP viewpoints and does not
3570        // need any YCB downloads.
3571        let obj = r#"o SyntheticCube
3572v -0.10 -0.10  0.10
3573v  0.10 -0.10  0.10
3574v  0.10  0.10  0.10
3575v -0.10  0.10  0.10
3576v -0.10 -0.10 -0.10
3577v  0.10 -0.10 -0.10
3578v  0.10  0.10 -0.10
3579v -0.10  0.10 -0.10
3580vt 0.0 0.0
3581vt 1.0 0.0
3582vt 1.0 1.0
3583vt 0.0 1.0
3584f 1/1 2/2 3/3
3585f 1/1 3/3 4/4
3586f 6/1 5/2 8/3
3587f 6/1 8/3 7/4
3588f 2/1 6/2 7/3
3589f 2/1 7/3 3/4
3590f 5/1 1/2 4/3
3591f 5/1 4/3 8/4
3592f 4/1 3/2 7/3
3593f 4/1 7/3 8/4
3594f 5/1 6/2 2/3
3595f 5/1 2/3 1/4
3596"#;
3597        std::fs::write(object_dir.join("textured.obj"), obj).expect("write synthetic obj");
3598
3599        let texture = ImageBuffer::from_fn(2, 2, |x, y| match (x, y) {
3600            (0, 0) => Rgba([255u8, 48, 48, 255]),
3601            (1, 0) => Rgba([48u8, 255, 48, 255]),
3602            (0, 1) => Rgba([48u8, 48, 255, 255]),
3603            _ => Rgba([255u8, 255, 64, 255]),
3604        });
3605        texture
3606            .save(object_dir.join("texture_map.png"))
3607            .expect("write synthetic texture");
3608
3609        temp_dir
3610    }
3611
3612    #[test]
3613    #[ignore = "headless throughput smoke check is opt-in because it needs a local render backend"]
3614    fn test_headless_batch_throughput_smoke() {
3615        crate::initialize();
3616        reset_headless_scene_setup_count();
3617
3618        let object_root = write_synthetic_object();
3619        let object_dir = object_root.path().join("synthetic_cube");
3620        let viewpoints = crate::generate_viewpoints(&ViewpointConfig::default());
3621        let request_count = 5usize;
3622        let config = RenderConfig::tbp_default();
3623
3624        let requests: Vec<_> = viewpoints
3625            .iter()
3626            .take(request_count)
3627            .copied()
3628            .map(|viewpoint| BatchRenderRequest {
3629                object_dir: object_dir.clone(),
3630                viewpoint,
3631                object_rotation: ObjectRotation::identity(),
3632                render_config: config.clone(),
3633                target_point: Vec3::ZERO,
3634                targeting_policy: TargetingPolicy::Origin,
3635            })
3636            .collect();
3637
3638        let start = std::time::Instant::now();
3639        let outputs = crate::render_batch(requests, &BatchRenderConfig::default())
3640            .expect("synthetic headless batch render should succeed");
3641        let elapsed = start.elapsed();
3642
3643        assert_eq!(outputs.len(), request_count);
3644        // This is the deterministic churn signal for the smoke check. Adapter log lines vary by
3645        // backend and logging config, but a homogeneous batch should still set up headless scene
3646        // state exactly once.
3647        assert_eq!(
3648            headless_scene_setup_count(),
3649            1,
3650            "homogeneous batch smoke check should reuse one headless app setup"
3651        );
3652
3653        for (idx, output) in outputs.iter().enumerate() {
3654            assert_eq!(output.width, config.width, "output {idx} width mismatch");
3655            assert_eq!(output.height, config.height, "output {idx} height mismatch");
3656            assert_eq!(
3657                output.rgba.len(),
3658                (config.width * config.height * 4) as usize,
3659                "output {idx} rgba size mismatch"
3660            );
3661            assert_eq!(
3662                output.depth.len(),
3663                (config.width * config.height) as usize,
3664                "output {idx} depth size mismatch"
3665            );
3666            assert!(
3667                output
3668                    .rgba
3669                    .chunks_exact(4)
3670                    .any(|px| px[0] != 0 || px[1] != 0 || px[2] != 0),
3671                "output {idx} should contain visible color"
3672            );
3673        }
3674
3675        // Acceptance target: under llvmpipe-class CPU rendering, five 64x64 captures should
3676        // finish in under 8s. Much slower runs usually mean we reintroduced per-capture app
3677        // churn or another headless startup regression.
3678        assert!(
3679            elapsed < std::time::Duration::from_secs(8),
3680            "5 synthetic headless captures took {:.2}s, expected < 8.0s",
3681            elapsed.as_secs_f64()
3682        );
3683    }
3684}