Skip to main content

bevy_sensor/
render.rs

1//! Headless rendering implementation using Bevy.
2//!
3//! This module provides two rendering modes:
4//!
5//! 1. **Headless mode** (default): Renders to an image texture without requiring
6//!    a window or display. Works on WSL2, CI servers, and any environment without
7//!    GPU windowing support.
8//!
9//! 2. **Windowed mode** (fallback): Uses a visible window for rendering when
10//!    headless mode fails. Requires a display (X11/Wayland).
11//!
12//! # Current Status
13//!
14//! - **RGBA**: Working via render-to-texture + GPU readback
15//! - **Depth**: Working via ViewDepthTexture + reverse-Z conversion
16//!
17//! # Headless Rendering Architecture
18//!
19//! The headless renderer:
20//! 1. Creates a Bevy app without window plugins (uses ScheduleRunnerPlugin)
21//! 2. Sets up a render-to-texture pipeline with RenderTarget::Image
22//! 3. Extracts RGBA data via ImageCopyDriver
23//! 4. Extracts depth via DepthReadbackNode
24//!
25//! # Running Requirements
26//!
27//! Headless mode should work without any display. For windowed fallback:
28//! ```bash
29//! DISPLAY=:0 cargo run --example test_render
30//! ```
31//!
32//! # Architecture Notes
33//!
34//! Bevy's `App::run()` does not return cleanly in all configurations. This
35//! implementation uses a watchdog thread that monitors for completion and
36//! calls `std::process::exit(0)` once the render output is serialized to
37//! a temp file. The main thread reads this file after the process would
38//! normally exit.
39
40use bevy::app::{ScheduleRunnerPlugin, TerminalCtrlCHandlerPlugin};
41use bevy::asset::{LoadState, RenderAssetUsages};
42use bevy::camera::RenderTarget;
43use bevy::core_pipeline::prepass::{DepthPrepass, NormalPrepass};
44use bevy::core_pipeline::tonemapping::Tonemapping;
45use bevy::ecs::query::QueryItem;
46use bevy::light::GlobalAmbientLight;
47use bevy::log::LogPlugin;
48use bevy::prelude::*;
49use bevy::render::camera::ExtractedCamera;
50use bevy::render::render_asset::RenderAssets;
51use bevy::render::render_graph::{
52    Node, NodeRunError, RenderGraphContext, RenderGraphExt, RenderLabel, ViewNode, ViewNodeRunner,
53};
54use bevy::render::render_resource::{
55    Buffer, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, Extent3d, MapMode, Origin3d,
56    TexelCopyBufferInfo, TexelCopyBufferLayout, TexelCopyTextureInfo, TextureAspect,
57    TextureDimension, TextureFormat, TextureUsages,
58};
59use bevy::render::renderer::RenderQueue;
60use bevy::render::renderer::{RenderContext, RenderDevice};
61use bevy::render::texture::GpuImage;
62use bevy::render::view::screenshot::{Screenshot, ScreenshotCaptured};
63use bevy::render::view::{ExtractedView, Hdr, ViewDepthTexture};
64use bevy::render::{Extract, Render, RenderApp, RenderSystems};
65use bevy::window::{ExitCondition, WindowPlugin};
66use bevy_obj::ObjPlugin;
67use std::fs::File;
68use std::io::Read as IoRead;
69use std::path::{Path, PathBuf};
70#[cfg(test)]
71use std::sync::atomic::{AtomicUsize, Ordering};
72use std::sync::{Arc, Mutex, OnceLock};
73use std::time::Duration;
74
75use crate::{
76    backend::BackendConfig, ObjectRotation, RenderConfig, RenderError, RenderOutput,
77    TargetingPolicy,
78};
79use ycbust::{GOOGLE_16K_MESH_RELATIVE, GOOGLE_16K_TEXTURE_RELATIVE};
80
81/// Watchdog timeout for a single render, in seconds.
82///
83/// Bounds how long any single render path waits before declaring failure.
84/// 180s accommodates first-run wgpu shader compilation on Windows, which
85/// can take well over 60s on a cold GPU cache (see commit 9cd1d11).
86const RENDER_TIMEOUT_SECS: u64 = 180;
87
88/// Warmup frames after each camera move in `render_headless_sequence`.
89///
90/// After writing a new camera `Transform`, Bevy needs at least one frame for
91/// transform propagation + render-world extract before the next capture is
92/// valid. Historically set to 3 as a conservative cushion; reducing directly
93/// shortens per-viewpoint wall-clock since `app.update()` in the batch path
94/// is not rate-limited. Validated against the pixel-exact hardware test
95/// `test_batch_render_matches_sequential_episode_outputs`.
96const BATCH_WARMUP_FRAMES: u32 = 1;
97
98/// Warmup frames at the start of each `PersistentRenderer::render()` call.
99///
100/// `BATCH_WARMUP_FRAMES = 1` works for inter-viewpoint advancement inside a
101/// batch because `extract_and_continue_headless_batch` writes the next
102/// camera transform *and* clears the shared GPU readback buffers in the
103/// same tick — so the in-flight copy from the previous viewpoint has
104/// already drained by the time the next capture is gated.
105///
106/// In the persistent per-call path, the previous render's output may still
107/// be sitting in `shared_rgba`/`shared_depth` (we clear them before the
108/// loop, but the pipeline still needs ticks to propagate the new camera/
109/// scene-rotation `Transform` writes through `PostUpdate` →
110/// `transform_propagate` → `Extract` → render graph → `ImageCopyDriver`
111/// before the capture we request actually reflects the new transforms.
112///
113/// Validated by `test_persistent_renderer_matches_render_to_buffer`. Three
114/// ticks of warmup gives Windows/DX12 enough room to drain the previous
115/// readback and capture the post-propagation color target:
116///   - tick 0: transforms propagate, render runs (no copy enabled)
117///   - tick 1: previous in-flight readback drains (no copy enabled)
118///   - tick 2: warmup hits 0, capture fires, render runs with copy enabled
119///   - tick 3: shared buffers populated → captured → batch finalized
120const PERSISTENT_WARMUP_FRAMES: u32 = 3;
121
122/// Check the render-trace env var. Cheap enough (single HashMap lookup) to call
123/// from per-frame systems; gate all tracing output behind this.
124#[inline]
125fn render_trace_enabled() -> bool {
126    std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok()
127}
128
129/// Convert a filesystem path into a Bevy asset-path string.
130///
131/// `std::fs::canonicalize` on Windows returns a `\\?\C:\...` verbatim-prefixed
132/// path. Bevy's `AssetPath` parser cannot handle that prefix, so the asset
133/// would silently never load. Strip the verbatim prefix and normalize
134/// separators to `/` so the absolute path resolves through the default file
135/// asset source on every platform.
136fn fs_path_to_asset_string(path: &std::path::Path) -> String {
137    let s = path.display().to_string();
138    let s = s.strip_prefix(r"\\?\").map(str::to_string).unwrap_or(s);
139    s.replace('\\', "/")
140}
141
142/// Check if a display is available for windowed rendering.
143///
144/// Returns true if DISPLAY or WAYLAND_DISPLAY environment variable is set.
145#[allow(dead_code)]
146fn display_available() -> bool {
147    std::env::var("DISPLAY").is_ok() || std::env::var("WAYLAND_DISPLAY").is_ok()
148}
149
150/// Check if we're running on WSL2 (which doesn't support Vulkan window surfaces).
151#[allow(dead_code)]
152fn is_wsl2() -> bool {
153    if let Ok(version) = std::fs::read_to_string("/proc/version") {
154        return version.to_lowercase().contains("microsoft")
155            || version.to_lowercase().contains("wsl");
156    }
157    false
158}
159
160/// Internal state for tracking render progress
161#[derive(Resource, Default)]
162struct RenderState {
163    frame_count: u32,
164    scene_loaded: bool,
165    texture_loaded: bool,
166    materials_applied: bool,
167    /// `frame_count` at the moment materials were applied; used to gate
168    /// `capture_ready` on N frames of render-graph propagation rather than
169    /// a legacy llvmpipe-era 60-frame wait.
170    materials_applied_frame: u32,
171    /// `frame_count` when the texture finished loading. Capture waits a small
172    /// margin past this for GPU image preparation. The material (and therefore
173    /// the main-pass pipeline) is applied earlier, so by the time the texture is
174    /// ready the pipeline has already compiled.
175    texture_ready_frame: u32,
176    capture_ready: bool,
177    screenshot_requested: bool,
178    /// Number of frames spent waiting for a *valid* (non-blank / valid-depth)
179    /// readback. The one-shot GPU capture is nondeterministic and occasionally
180    /// reads a uniform clear-color frame; we reject those and keep capturing
181    /// until a real frame lands, bounded by this counter.
182    capture_retries: u32,
183    /// Previous frame's RGBA readback. The capture is accepted only once two
184    /// consecutive readbacks are identical (the render has settled), so partial
185    /// in-progress frames aren't captured and every render path yields the same
186    /// fully-drawn image (required for byte-exact cross-path parity).
187    prev_rgba: Option<Vec<u8>>,
188    /// Previous frame's depth readback, for the same settle-detection as
189    /// `prev_rgba` (depth parity is asserted to ~1e-9, i.e. bit-exact).
190    prev_depth: Option<Vec<f64>>,
191    captured: bool,
192    exit_requested: bool,
193    #[allow(dead_code)]
194    exit_frame_count: u32,
195    rgba_data: Option<Vec<u8>>,
196    depth_data: Option<Vec<f64>>,
197    image_width: u32,
198    image_height: u32,
199}
200
201#[cfg(test)]
202static HEADLESS_SCENE_SETUP_COUNT: AtomicUsize = AtomicUsize::new(0);
203
204#[cfg(test)]
205fn reset_headless_scene_setup_count() {
206    HEADLESS_SCENE_SETUP_COUNT.store(0, Ordering::SeqCst);
207}
208
209#[cfg(test)]
210fn headless_scene_setup_count() -> usize {
211    HEADLESS_SCENE_SETUP_COUNT.load(Ordering::SeqCst)
212}
213
214/// Shared buffer for screenshot callback to write into
215#[derive(Resource, Clone)]
216#[allow(clippy::type_complexity)]
217#[allow(dead_code)]
218struct SharedImageBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
219
220/// Shared buffer for depth data from GPU readback
221/// Contains: (linear_depth_values, width, height)
222/// Uses f64 for TBP numerical precision compatibility.
223#[derive(Resource, Clone, Default)]
224#[allow(clippy::type_complexity)]
225struct SharedDepthBuffer(Arc<Mutex<Option<(Vec<f64>, u32, u32)>>>);
226
227// ============================================================================
228// Depth Readback Infrastructure
229// ============================================================================
230
231/// Request to capture depth - extracted from main world to render world
232#[derive(Resource, Default, Clone)]
233struct DepthCaptureRequest {
234    requested: bool,
235    near: f32,
236    far: f32,
237}
238
239/// Pending depth capture info for async processing.
240///
241/// `m22`/`m32` are the relevant entries of the view's reverse-Z projection
242/// matrix (`clip_from_view`), captured at copy time so the CPU-side
243/// linearization matches the exact projection the GPU rendered with — including
244/// whatever near plane Bevy actually used (which is not necessarily
245/// `RenderConfig::near_plane`; Bevy 0.18 renders this camera with near = 0.1).
246struct PendingDepthCapture {
247    buffer: Buffer,
248    width: u32,
249    height: u32,
250    m22: f32,
251    m32: f32,
252    far: f32,
253}
254
255/// Queue for pending depth captures (written by render node, read by cleanup system)
256#[derive(Resource, Default)]
257struct PendingDepthCaptureQueue(Arc<Mutex<Vec<PendingDepthCapture>>>);
258
259// ============================================================================
260// Depth Buffer Helpers
261// ============================================================================
262
263mod depth_helpers {
264    /// wgpu requires buffer row alignment of 256 bytes
265    pub const COPY_BYTES_PER_ROW_ALIGNMENT: u32 = 256;
266
267    /// Align byte size to wgpu's COPY_BYTES_PER_ROW_ALIGNMENT
268    pub fn align_byte_size(value: u32) -> u32 {
269        let remainder = value % COPY_BYTES_PER_ROW_ALIGNMENT;
270        if remainder == 0 {
271            value
272        } else {
273            value + (COPY_BYTES_PER_ROW_ALIGNMENT - remainder)
274        }
275    }
276
277    /// Calculate aligned buffer size for an image
278    #[allow(dead_code)]
279    pub fn get_aligned_size(width: u32, height: u32, pixel_size: u32) -> u32 {
280        height * align_byte_size(width * pixel_size)
281    }
282
283    /// Convert reverse-Z NDC depth to linear depth in meters.
284    ///
285    /// Bevy uses reverse-Z depth buffer: near plane maps to depth=1, far plane to depth=0.
286    /// This provides better precision for distant objects.
287    ///
288    /// Formula derivation:
289    /// - At near plane (z = near): ndc = 1
290    /// - At far plane (z = far): ndc = 0
291    /// - linear = far / (1 + ndc * (far/near - 1))
292    ///
293    /// Superseded in the render path by [`ndc_to_linear_with_matrix`], which
294    /// reads the actual projection near from the view matrix instead of trusting
295    /// a passed-in near (the source of the #92 10x depth error). Retained for its
296    /// tests and as a reference formula.
297    #[allow(dead_code)]
298    pub fn reverse_z_to_linear_depth(ndc_depth: f32, near: f32, far: f32) -> f32 {
299        // Handle edge cases
300        if ndc_depth <= 0.0 {
301            return far; // Background (infinite distance in reverse-Z)
302        }
303        if ndc_depth >= 1.0 {
304            return near; // At or beyond near plane
305        }
306        // Reverse-Z formula: linear = far / (1 + ndc * (far/near - 1))
307        far / (1.0 + ndc_depth * (far / near - 1.0))
308    }
309
310    /// Extract depth values from aligned buffer, handling row padding
311    pub fn extract_depth_with_alignment(data: &[u8], width: u32, height: u32) -> Vec<f32> {
312        let pixel_size = 4u32; // f32 = 4 bytes
313        let aligned_row_bytes = align_byte_size(width * pixel_size) as usize;
314        let actual_row_bytes = (width * pixel_size) as usize;
315
316        let mut depth_values = Vec::with_capacity((width * height) as usize);
317
318        for y in 0..height as usize {
319            let row_start = y * aligned_row_bytes;
320            let row_data = &data[row_start..row_start + actual_row_bytes];
321
322            for x in 0..width as usize {
323                let offset = x * 4;
324                let bytes: [u8; 4] = row_data[offset..offset + 4].try_into().unwrap();
325                let depth_value = f32::from_le_bytes(bytes);
326                depth_values.push(depth_value);
327            }
328        }
329
330        depth_values
331    }
332
333    /// Convert all NDC depth values to linear meters (as f64 for TBP precision).
334    /// Superseded by [`convert_depth_to_linear_with_matrix`]; retained for tests.
335    #[allow(dead_code)]
336    pub fn convert_depth_to_linear(raw_depth: &[f32], near: f32, far: f32) -> Vec<f64> {
337        raw_depth
338            .iter()
339            .map(|&ndc| reverse_z_to_linear_depth(ndc, near, far) as f64)
340            .collect()
341    }
342
343    /// Linearize a reverse-Z NDC depth using the view's actual projection matrix,
344    /// rather than a hand-supplied near/far.
345    ///
346    /// For a perspective right-handed projection, the relevant clip-space rows are
347    /// `clip_z = m22 * z + m32` and `clip_w = -z` (camera looks down -Z), so
348    /// `ndc = clip_z / clip_w = (m22*z + m32) / (-z)`. Solving for the positive
349    /// view-space distance `d = -z` gives **`d = m32 / (ndc + m22)`**. This holds
350    /// for both finite and infinite reverse-Z and is correct regardless of which
351    /// near plane the renderer actually used — the previous fixed-near formula
352    /// produced depths 10x too small on Bevy 0.18, which renders this camera with
353    /// near = 0.1 even though `RenderConfig::near_plane` is 0.01 (issue #86/#92).
354    ///
355    /// `m22 = clip_from_view[col=2][row=2]`, `m32 = clip_from_view[col=3][row=2]`.
356    /// `ndc <= 0` is the reverse-Z far plane (background) and maps to `far`.
357    pub fn ndc_to_linear_with_matrix(ndc: f32, m22: f32, m32: f32, far: f32) -> f32 {
358        if ndc <= 0.0 {
359            return far; // background / at-or-beyond far plane in reverse-Z
360        }
361        let denom = ndc + m22;
362        if denom.abs() <= f32::EPSILON {
363            return far;
364        }
365        let linear = m32 / denom;
366        if !linear.is_finite() || linear <= 0.0 {
367            far
368        } else {
369            linear.min(far)
370        }
371    }
372
373    /// Convert all NDC depth values to linear meters using the view projection
374    /// matrix (f64 for TBP precision). See [`ndc_to_linear_with_matrix`].
375    pub fn convert_depth_to_linear_with_matrix(
376        raw_depth: &[f32],
377        m22: f32,
378        m32: f32,
379        far: f32,
380    ) -> Vec<f64> {
381        raw_depth
382            .iter()
383            .map(|&ndc| ndc_to_linear_with_matrix(ndc, m22, m32, far) as f64)
384            .collect()
385    }
386
387    #[cfg(test)]
388    mod tests {
389        use super::*;
390
391        #[test]
392        fn test_align_byte_size() {
393            assert_eq!(align_byte_size(256), 256);
394            assert_eq!(align_byte_size(257), 512);
395            assert_eq!(align_byte_size(1), 256);
396            assert_eq!(align_byte_size(512), 512);
397            assert_eq!(align_byte_size(0), 0);
398        }
399
400        #[test]
401        fn test_reverse_z_to_linear_depth() {
402            let near = 0.01;
403            let far = 10.0;
404
405            // Near plane (ndc=1 in reverse-Z)
406            let linear_near = reverse_z_to_linear_depth(1.0, near, far);
407            assert!((linear_near - near).abs() < 0.001);
408
409            // Mid-range depth (ndc=0.5 should give geometric mean area)
410            let linear_mid = reverse_z_to_linear_depth(0.5, near, far);
411            // At ndc=0.5: linear = 10 / (1 + 0.5 * (1000-1)) = 10 / 500.5 ≈ 0.02
412            assert!(linear_mid > near && linear_mid < far);
413
414            // Very close to far plane (ndc very small)
415            let linear_almost_far = reverse_z_to_linear_depth(0.0001, near, far);
416            // At ndc=0.0001: linear = 10 / (1 + 0.0001 * 999) ≈ 10 / 1.0999 ≈ 9.09
417            assert!(linear_almost_far > 9.0);
418
419            // Background (ndc=0)
420            let background = reverse_z_to_linear_depth(0.0, near, far);
421            assert_eq!(background, far);
422        }
423
424        #[test]
425        fn test_extract_depth_with_alignment() {
426            // 2x2 image, 4 bytes per pixel
427            // Aligned row = 256 bytes, but actual = 8 bytes
428            let width = 2u32;
429            let height = 2u32;
430
431            let mut data = vec![0u8; 256 * 2]; // 2 aligned rows
432
433            // Write test depth values
434            // Row 0: [0.5, 0.6]
435            data[0..4].copy_from_slice(&0.5f32.to_le_bytes());
436            data[4..8].copy_from_slice(&0.6f32.to_le_bytes());
437            // Row 1: [0.7, 0.8]
438            data[256..260].copy_from_slice(&0.7f32.to_le_bytes());
439            data[260..264].copy_from_slice(&0.8f32.to_le_bytes());
440
441            let depth = extract_depth_with_alignment(&data, width, height);
442            assert_eq!(depth.len(), 4);
443            assert!((depth[0] - 0.5).abs() < 0.001);
444            assert!((depth[1] - 0.6).abs() < 0.001);
445            assert!((depth[2] - 0.7).abs() < 0.001);
446            assert!((depth[3] - 0.8).abs() < 0.001);
447        }
448
449        #[test]
450        fn test_reverse_z_depth_at_near_plane() {
451            // Near plane should give near value
452            let near = 0.01;
453            let far = 100.0;
454            let depth = reverse_z_to_linear_depth(1.0, near, far);
455            assert!((depth - near).abs() < 0.0001);
456        }
457
458        #[test]
459        fn test_reverse_z_depth_at_far_plane() {
460            // Far plane (ndc=0) should give far value
461            let near = 0.01;
462            let far = 100.0;
463            let depth = reverse_z_to_linear_depth(0.0, near, far);
464            assert!((depth - far).abs() < 0.0001);
465        }
466
467        #[test]
468        fn test_reverse_z_monotonic() {
469            // Depth should increase as NDC decreases (reverse-Z)
470            let near = 0.01;
471            let far = 10.0;
472
473            let mut prev_depth = 0.0;
474            for i in (0..=100).rev() {
475                let ndc = i as f32 / 100.0;
476                let depth = reverse_z_to_linear_depth(ndc, near, far);
477                assert!(
478                    depth >= prev_depth,
479                    "Depth should be monotonic: ndc={}, depth={}, prev={}",
480                    ndc,
481                    depth,
482                    prev_depth
483                );
484                prev_depth = depth;
485            }
486        }
487
488        #[test]
489        fn test_ndc_to_linear_with_matrix_infinite_reverse_z() {
490            // Infinite reverse-Z (Bevy `perspective_infinite_reverse_rh`):
491            // m22 = 0, m32 = near. d = near / ndc.
492            let (m22, m32, far) = (0.0f32, 0.1f32, 10.0f32);
493
494            // The exact regression from #92: ndc 0.366504 must linearize to
495            // ~0.273 m (near 0.1), NOT ~0.027 m (the old fixed near = 0.01).
496            let d = ndc_to_linear_with_matrix(0.366504, m22, m32, far);
497            assert!((d as f64 - 0.272849).abs() < 1e-4, "got {d}");
498
499            // Background (reverse-Z far plane) and clamping.
500            assert_eq!(ndc_to_linear_with_matrix(0.0, m22, m32, far), far);
501            assert_eq!(ndc_to_linear_with_matrix(-0.5, m22, m32, far), far);
502            // Very small ndc -> very far -> clamped to far.
503            assert_eq!(ndc_to_linear_with_matrix(1e-9, m22, m32, far), far);
504        }
505
506        #[test]
507        fn test_ndc_to_linear_with_matrix_finite_reverse_z() {
508            // Finite reverse-Z maps near->ndc 1, far->ndc 0. Construct the matrix
509            // entries for near=0.5, far=20: m22 = near/(far-near), m32 = far*m22.
510            let (near, far) = (0.5f32, 20.0f32);
511            let m22 = near / (far - near);
512            let m32 = far * m22;
513            // ndc = 1 -> near; ndc = 0 -> far (background sentinel also returns far).
514            assert!((ndc_to_linear_with_matrix(1.0, m22, m32, far) - near).abs() < 1e-4);
515            assert_eq!(ndc_to_linear_with_matrix(0.0, m22, m32, far), far);
516        }
517
518        #[test]
519        fn test_convert_depth_to_linear_batch() {
520            let near = 0.01f32;
521            let far = 10.0f32;
522            let ndc_depths = vec![1.0f32, 0.5, 0.1, 0.0];
523
524            let linear = convert_depth_to_linear(&ndc_depths, near, far);
525
526            assert_eq!(linear.len(), 4);
527            // Near plane
528            assert!((linear[0] - near as f64).abs() < 0.001);
529            // Far plane
530            assert!((linear[3] - far as f64).abs() < 0.001);
531            // All should be in range [near, far]
532            for d in &linear {
533                assert!(*d >= near as f64 && *d <= far as f64);
534            }
535        }
536
537        #[test]
538        fn test_align_byte_size_edge_cases() {
539            // Powers of two should stay the same if multiple of 256
540            assert_eq!(align_byte_size(256), 256);
541            assert_eq!(align_byte_size(512), 512);
542            assert_eq!(align_byte_size(1024), 1024);
543
544            // Just under 256 should round up to 256
545            assert_eq!(align_byte_size(255), 256);
546            assert_eq!(align_byte_size(128), 256);
547
548            // Just over 256 should round up to 512
549            assert_eq!(align_byte_size(300), 512);
550        }
551
552        #[test]
553        fn test_extract_depth_64x64() {
554            // Test with TBP default resolution
555            let width = 64u32;
556            let height = 64u32;
557            let bytes_per_pixel = 4u32;
558            let padded_row = align_byte_size(width * bytes_per_pixel);
559
560            // Create aligned buffer
561            let mut data = vec![0u8; (padded_row * height) as usize];
562
563            // Fill with incrementing values
564            for y in 0..height {
565                for x in 0..width {
566                    let value = (y * width + x) as f32 / (width * height) as f32;
567                    let offset = (y * padded_row + x * bytes_per_pixel) as usize;
568                    data[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
569                }
570            }
571
572            let depth = extract_depth_with_alignment(&data, width, height);
573            assert_eq!(depth.len(), (width * height) as usize);
574
575            // Verify first and last values
576            assert!((depth[0] - 0.0).abs() < 0.001);
577            let expected_last = (width * height - 1) as f32 / (width * height) as f32;
578            assert!((depth[(width * height - 1) as usize] - expected_last).abs() < 0.001);
579        }
580    }
581}
582
583// ============================================================================
584// Depth Readback Render Node
585// ============================================================================
586
587/// Label for the depth readback render graph node.
588#[derive(Debug, Hash, PartialEq, Eq, Clone, bevy::render::render_graph::RenderLabel)]
589struct DepthReadbackLabel;
590
591/// Render node that copies the main camera's depth texture to a staging buffer.
592/// This runs after the main pass completes, using ViewDepthTexture.
593#[derive(Default)]
594struct DepthReadbackNode;
595
596impl ViewNode for DepthReadbackNode {
597    type ViewQuery = (
598        &'static ViewDepthTexture,
599        &'static ExtractedCamera,
600        &'static ExtractedView,
601    );
602
603    fn run<'w>(
604        &self,
605        _graph: &mut RenderGraphContext,
606        render_context: &mut RenderContext<'w>,
607        (view_depth_texture, camera, view): QueryItem<'w, '_, Self::ViewQuery>,
608        world: &'w World,
609    ) -> Result<(), NodeRunError> {
610        let trace = render_trace_enabled();
611        let t0 = trace.then(std::time::Instant::now);
612
613        // Check if depth capture is requested
614        let Some(request) = world.get_resource::<DepthCaptureRequest>() else {
615            return Ok(());
616        };
617        if !request.requested {
618            return Ok(());
619        }
620
621        // Get the pending queue
622        let Some(queue) = world.get_resource::<PendingDepthCaptureQueue>() else {
623            return Ok(());
624        };
625
626        // Get texture size from camera viewport or physical size
627        let Some(physical_size) = camera.physical_target_size else {
628            return Ok(());
629        };
630        let width = physical_size.x;
631        let height = physical_size.y;
632
633        let render_device = world.resource::<RenderDevice>();
634
635        // Calculate aligned buffer size (wgpu requires 256-byte row alignment)
636        let bytes_per_pixel = 4u32; // f32 = 4 bytes (Depth32Float)
637        let unpadded_bytes_per_row = width * bytes_per_pixel;
638        let padded_bytes_per_row = depth_helpers::align_byte_size(unpadded_bytes_per_row);
639        let buffer_size = (padded_bytes_per_row * height) as u64;
640
641        // Create staging buffer for CPU readback
642        let staging_buffer = render_device.create_buffer(&BufferDescriptor {
643            label: Some("depth_staging_buffer"),
644            size: buffer_size,
645            usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
646            mapped_at_creation: false,
647        });
648
649        // Copy depth texture to staging buffer
650        let encoder = render_context.command_encoder();
651        encoder.copy_texture_to_buffer(
652            TexelCopyTextureInfo {
653                texture: &view_depth_texture.texture,
654                mip_level: 0,
655                origin: Origin3d::ZERO,
656                aspect: TextureAspect::DepthOnly,
657            },
658            TexelCopyBufferInfo {
659                buffer: &staging_buffer,
660                layout: TexelCopyBufferLayout {
661                    offset: 0,
662                    bytes_per_row: Some(padded_bytes_per_row),
663                    rows_per_image: Some(height),
664                },
665            },
666            Extent3d {
667                width,
668                height,
669                depth_or_array_layers: 1,
670            },
671        );
672
673        // Push to queue for async processing (queue is Arc<Mutex<Vec>>).
674        // Capture the projection-matrix entries used for linearization: for a
675        // perspective RH matrix, clip_z = m22*z + m32 and clip_w = -z, so the
676        // positive view-space distance is d = m32 / (ndc + m22).
677        let clip_from_view = view.clip_from_view;
678        if let Ok(mut pending) = queue.0.lock() {
679            pending.push(PendingDepthCapture {
680                buffer: staging_buffer,
681                width,
682                height,
683                m22: clip_from_view.z_axis.z,
684                m32: clip_from_view.w_axis.z,
685                far: request.far,
686            });
687        }
688
689        if let Some(t0) = t0 {
690            eprintln!(
691                "[render_trace][node] DepthReadbackNode ms={:.3}",
692                t0.elapsed().as_secs_f64() * 1000.0
693            );
694        }
695
696        Ok(())
697    }
698}
699
700// ============================================================================
701// Depth Readback Plugin
702// ============================================================================
703
704/// Plugin that sets up depth buffer readback from the GPU.
705struct DepthReadbackPlugin {
706    shared_depth: SharedDepthBuffer,
707    near: f32,
708    far: f32,
709}
710
711impl Plugin for DepthReadbackPlugin {
712    fn build(&self, app: &mut App) {
713        use bevy::core_pipeline::core_3d::graph::Core3d;
714        use bevy::core_pipeline::core_3d::graph::Node3d;
715
716        // Insert shared depth buffer in main app
717        app.insert_resource(self.shared_depth.clone());
718        app.insert_resource(DepthCaptureRequest {
719            requested: false,
720            near: self.near,
721            far: self.far,
722        });
723
724        // Get render app
725        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
726            eprintln!("Failed to get RenderApp for depth readback");
727            return;
728        };
729
730        // Insert resources in render world
731        render_app.insert_resource(self.shared_depth.clone());
732        render_app.init_resource::<PendingDepthCaptureQueue>();
733
734        // Add extraction system to copy request from main world
735        render_app.add_systems(ExtractSchedule, extract_depth_request);
736
737        // Add system to process completed depth captures
738        render_app.add_systems(
739            Render,
740            collect_depth_captures.in_set(RenderSystems::Cleanup),
741        );
742
743        // Register the depth readback node in the render graph
744        // Run after main pass completes (depth buffer is ready) but before tonemapping
745        render_app
746            .add_render_graph_node::<ViewNodeRunner<DepthReadbackNode>>(Core3d, DepthReadbackLabel)
747            .add_render_graph_edges(
748                Core3d,
749                (Node3d::EndMainPass, DepthReadbackLabel, Node3d::Tonemapping),
750            );
751    }
752}
753
754/// Extract depth capture request from main world to render world
755fn extract_depth_request(mut commands: Commands, request: Extract<Res<DepthCaptureRequest>>) {
756    commands.insert_resource(DepthCaptureRequest {
757        requested: request.requested,
758        near: request.near,
759        far: request.far,
760    });
761}
762
763/// Process completed depth buffer captures (synchronous GPU-to-CPU readback with device polling)
764fn collect_depth_captures(
765    queue: Res<PendingDepthCaptureQueue>,
766    shared_depth: Res<SharedDepthBuffer>,
767    render_device: Res<RenderDevice>,
768) {
769    let trace = render_trace_enabled();
770    let t_sys = trace.then(std::time::Instant::now);
771
772    // Take all pending captures from the queue
773    let pending_captures = {
774        let Ok(mut pending) = queue.0.lock() else {
775            return;
776        };
777        std::mem::take(&mut *pending)
778    };
779
780    if pending_captures.is_empty() {
781        if let Some(t0) = t_sys {
782            eprintln!(
783                "[render_trace][sys] collect_depth_captures empty ms={:.3}",
784                t0.elapsed().as_secs_f64() * 1000.0
785            );
786        }
787        return;
788    }
789
790    let pending_count = pending_captures.len();
791
792    // Process each pending capture synchronously with device polling
793    for pending in pending_captures {
794        let width = pending.width;
795        let height = pending.height;
796        let m22 = pending.m22;
797        let m32 = pending.m32;
798        let far = pending.far;
799        let buffer = pending.buffer;
800        let shared = shared_depth.0.clone();
801
802        // Use blocking sync approach with device polling (same as RGBA capture)
803        let buffer_slice = buffer.slice(..);
804
805        // Request mapping
806        let (tx, rx) = std::sync::mpsc::channel();
807        buffer_slice.map_async(MapMode::Read, move |result| {
808            let _ = tx.send(result);
809        });
810
811        let t_wait = trace.then(std::time::Instant::now);
812        let mut poll_iters: u32 = 0;
813
814        // Poll the device until mapping completes
815        loop {
816            let _ =
817                render_device.poll(bevy::render::render_resource::PollType::wait_indefinitely());
818            poll_iters += 1;
819            match rx.try_recv() {
820                Ok(Ok(())) => {
821                    let data = buffer_slice.get_mapped_range();
822
823                    // Extract depth values with alignment handling
824                    let ndc_depth =
825                        depth_helpers::extract_depth_with_alignment(&data, width, height);
826
827                    drop(data);
828                    buffer.unmap();
829
830                    // Convert reverse-Z NDC to linear depth (meters) using the
831                    // view's actual projection matrix entries. See
832                    // `convert_depth_to_linear_with_matrix`.
833                    let linear_depth = depth_helpers::convert_depth_to_linear_with_matrix(
834                        &ndc_depth, m22, m32, far,
835                    );
836
837                    // Store in shared buffer
838                    if let Ok(mut guard) = shared.lock() {
839                        *guard = Some((linear_depth, width, height));
840                    }
841                    break;
842                }
843                Ok(Err(e)) => {
844                    eprintln!("Failed to map depth buffer: {:?}", e);
845                    break;
846                }
847                Err(std::sync::mpsc::TryRecvError::Empty) => {
848                    // Keep polling
849                    std::thread::sleep(std::time::Duration::from_millis(1));
850                }
851                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
852                    eprintln!("Depth buffer mapping channel disconnected");
853                    break;
854                }
855            }
856        }
857
858        if let Some(t_wait) = t_wait {
859            eprintln!(
860                "[render_trace][sys] collect_depth_captures mapping_wait poll_iters={} ms={:.3}",
861                poll_iters,
862                t_wait.elapsed().as_secs_f64() * 1000.0
863            );
864        }
865    }
866
867    if let Some(t0) = t_sys {
868        eprintln!(
869            "[render_trace][sys] collect_depth_captures done pending={} ms={:.3}",
870            pending_count,
871            t0.elapsed().as_secs_f64() * 1000.0
872        );
873    }
874}
875
876// ============================================================================
877// Image Copy Infrastructure (for headless rendering)
878// ============================================================================
879
880/// Label for the image copy render graph node
881#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)]
882struct ImageCopyLabel;
883
884/// Component that marks an image for GPU-to-CPU copying
885#[derive(Component, Clone)]
886struct ImageCopier {
887    /// Handle to the source image (render target)
888    src_image: Handle<Image>,
889    /// Whether to capture on this frame
890    enabled: bool,
891}
892
893/// Resource containing all ImageCopiers for the render world
894#[derive(Resource, Default)]
895struct ImageCopiers(Vec<ImageCopier>);
896
897/// Pending image capture for async processing
898struct PendingImageCapture {
899    buffer: Buffer,
900    width: u32,
901    height: u32,
902    padded_bytes_per_row: u32,
903}
904
905/// Queue for pending image captures
906#[derive(Resource, Default)]
907struct PendingImageCaptureQueue(Arc<Mutex<Vec<PendingImageCapture>>>);
908
909/// Shared buffer for captured RGBA data
910#[derive(Resource, Clone, Default)]
911#[allow(clippy::type_complexity)]
912struct SharedRgbaBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
913
914/// Render graph node that copies render target images to staging buffers
915struct ImageCopyDriver;
916
917impl Node for ImageCopyDriver {
918    fn run(
919        &self,
920        _graph: &mut RenderGraphContext,
921        _render_context: &mut RenderContext,
922        world: &World,
923    ) -> Result<(), NodeRunError> {
924        let trace = render_trace_enabled();
925        let t0 = trace.then(std::time::Instant::now);
926
927        let Some(image_copiers) = world.get_resource::<ImageCopiers>() else {
928            return Ok(());
929        };
930
931        let Some(gpu_images) = world.get_resource::<RenderAssets<GpuImage>>() else {
932            return Ok(());
933        };
934
935        let Some(queue) = world.get_resource::<PendingImageCaptureQueue>() else {
936            return Ok(());
937        };
938
939        let render_device = world.resource::<RenderDevice>();
940
941        let Some(render_queue) = world.get_resource::<RenderQueue>() else {
942            return Ok(());
943        };
944
945        for image_copier in image_copiers.0.iter() {
946            if !image_copier.enabled {
947                continue;
948            }
949
950            let Some(gpu_image) = gpu_images.get(&image_copier.src_image) else {
951                continue;
952            };
953
954            let width = gpu_image.size.width;
955            let height = gpu_image.size.height;
956
957            // Calculate padded bytes per row (wgpu requires 256-byte alignment)
958            let block_dimensions = gpu_image.texture_format.block_dimensions();
959            let block_size = gpu_image.texture_format.block_copy_size(None).unwrap_or(4); // Default to 4 bytes for RGBA8
960
961            let padded_bytes_per_row = RenderDevice::align_copy_bytes_per_row(
962                (width as usize / block_dimensions.0 as usize) * block_size as usize,
963            );
964
965            let buffer_size = (padded_bytes_per_row * height as usize) as u64;
966
967            // Create staging buffer for CPU readback
968            let staging_buffer = render_device.create_buffer(&BufferDescriptor {
969                label: Some("image_copy_staging_buffer"),
970                size: buffer_size,
971                usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
972                mapped_at_creation: false,
973            });
974
975            // Create command encoder for the copy operation
976            let mut encoder =
977                render_device.create_command_encoder(&CommandEncoderDescriptor::default());
978
979            let texture_extent = Extent3d {
980                width,
981                height,
982                depth_or_array_layers: 1,
983            };
984
985            // Copy texture to buffer
986            encoder.copy_texture_to_buffer(
987                gpu_image.texture.as_image_copy(),
988                TexelCopyBufferInfo {
989                    buffer: &staging_buffer,
990                    layout: TexelCopyBufferLayout {
991                        offset: 0,
992                        bytes_per_row: Some(padded_bytes_per_row as u32),
993                        rows_per_image: None,
994                    },
995                },
996                texture_extent,
997            );
998
999            // Submit the copy command
1000            render_queue.submit(std::iter::once(encoder.finish()));
1001
1002            // Queue for async processing
1003            if let Ok(mut pending) = queue.0.lock() {
1004                pending.push(PendingImageCapture {
1005                    buffer: staging_buffer,
1006                    width,
1007                    height,
1008                    padded_bytes_per_row: padded_bytes_per_row as u32,
1009                });
1010            }
1011        }
1012
1013        if let Some(t0) = t0 {
1014            eprintln!(
1015                "[render_trace][node] ImageCopyDriver ms={:.3}",
1016                t0.elapsed().as_secs_f64() * 1000.0
1017            );
1018        }
1019
1020        Ok(())
1021    }
1022}
1023
1024/// Extract ImageCopier components to render world
1025fn extract_image_copiers(mut commands: Commands, query: Extract<Query<&ImageCopier>>) {
1026    commands.insert_resource(ImageCopiers(query.iter().cloned().collect()));
1027}
1028
1029/// Process completed image captures
1030fn collect_image_captures(
1031    queue: Res<PendingImageCaptureQueue>,
1032    shared_rgba: Res<SharedRgbaBuffer>,
1033    render_device: Res<RenderDevice>,
1034) {
1035    let trace = render_trace_enabled();
1036    let t_sys = trace.then(std::time::Instant::now);
1037
1038    let pending_captures = {
1039        let Ok(mut pending) = queue.0.lock() else {
1040            return;
1041        };
1042        std::mem::take(&mut *pending)
1043    };
1044
1045    if pending_captures.is_empty() {
1046        if let Some(t0) = t_sys {
1047            eprintln!(
1048                "[render_trace][sys] collect_image_captures empty ms={:.3}",
1049                t0.elapsed().as_secs_f64() * 1000.0
1050            );
1051        }
1052        return;
1053    }
1054
1055    let pending_count = pending_captures.len();
1056
1057    for pending in pending_captures {
1058        let width = pending.width;
1059        let height = pending.height;
1060        let padded_bytes_per_row = pending.padded_bytes_per_row;
1061        let buffer = pending.buffer;
1062        let shared = shared_rgba.0.clone();
1063
1064        // Use blocking sync approach with device polling
1065        let buffer_slice = buffer.slice(..);
1066
1067        // Request mapping
1068        let (tx, rx) = std::sync::mpsc::channel();
1069        buffer_slice.map_async(MapMode::Read, move |result| {
1070            let _ = tx.send(result);
1071        });
1072
1073        // Poll the device until mapping completes (with timeout)
1074        let start = std::time::Instant::now();
1075        let timeout = std::time::Duration::from_secs(10);
1076        let mut poll_iters: u32 = 0;
1077        loop {
1078            let _ =
1079                render_device.poll(bevy::render::render_resource::PollType::wait_indefinitely());
1080            poll_iters += 1;
1081
1082            if start.elapsed() > timeout {
1083                eprintln!(
1084                    "Warning: Buffer mapping timeout after {:?}",
1085                    start.elapsed()
1086                );
1087                break;
1088            }
1089
1090            match rx.try_recv() {
1091                Ok(Ok(())) => {
1092                    let data = buffer_slice.get_mapped_range();
1093
1094                    // Extract pixels with alignment handling
1095                    let bytes_per_pixel = 4u32;
1096                    let actual_row_bytes = (width * bytes_per_pixel) as usize;
1097                    let padded_row_bytes = padded_bytes_per_row as usize;
1098
1099                    let mut rgba = Vec::with_capacity((width * height * 4) as usize);
1100                    for y in 0..height as usize {
1101                        let row_start = y * padded_row_bytes;
1102                        rgba.extend_from_slice(&data[row_start..row_start + actual_row_bytes]);
1103                    }
1104
1105                    drop(data);
1106                    buffer.unmap();
1107
1108                    if let Ok(mut guard) = shared.lock() {
1109                        *guard = Some((rgba, width, height));
1110                    }
1111                    break;
1112                }
1113                Ok(Err(e)) => {
1114                    eprintln!("Failed to map image buffer: {:?}", e);
1115                    break;
1116                }
1117                Err(std::sync::mpsc::TryRecvError::Empty) => {
1118                    // Keep polling
1119                    std::thread::sleep(std::time::Duration::from_millis(1));
1120                }
1121                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
1122                    eprintln!("Image buffer mapping channel disconnected");
1123                    break;
1124                }
1125            }
1126        }
1127
1128        if trace {
1129            eprintln!(
1130                "[render_trace][sys] collect_image_captures mapping_wait poll_iters={} ms={:.3}",
1131                poll_iters,
1132                start.elapsed().as_secs_f64() * 1000.0
1133            );
1134        }
1135    }
1136
1137    if let Some(t0) = t_sys {
1138        eprintln!(
1139            "[render_trace][sys] collect_image_captures done pending={} ms={:.3}",
1140            pending_count,
1141            t0.elapsed().as_secs_f64() * 1000.0
1142        );
1143    }
1144}
1145
1146/// Plugin for headless image copy
1147struct ImageCopyPlugin {
1148    shared_rgba: SharedRgbaBuffer,
1149}
1150
1151impl Plugin for ImageCopyPlugin {
1152    fn build(&self, app: &mut App) {
1153        use bevy::render::render_graph::RenderGraph;
1154
1155        app.insert_resource(self.shared_rgba.clone());
1156
1157        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
1158            return;
1159        };
1160
1161        render_app.insert_resource(self.shared_rgba.clone());
1162        render_app.init_resource::<ImageCopiers>();
1163        render_app.init_resource::<PendingImageCaptureQueue>();
1164
1165        render_app.add_systems(ExtractSchedule, extract_image_copiers);
1166        render_app.add_systems(
1167            Render,
1168            collect_image_captures.in_set(RenderSystems::Cleanup),
1169        );
1170
1171        // Add image copy node to render graph (runs after camera driver)
1172        let mut graph = render_app.world_mut().resource_mut::<RenderGraph>();
1173        graph.add_node(ImageCopyLabel, ImageCopyDriver);
1174        graph.add_node_edge(bevy::render::graph::CameraDriverLabel, ImageCopyLabel);
1175    }
1176}
1177
1178// ============================================================================
1179// Render Request and Components
1180// ============================================================================
1181
1182/// Configuration passed to the Bevy app
1183#[derive(Resource, Clone)]
1184struct RenderRequest {
1185    mesh_path: String,
1186    texture_path: String,
1187    camera_transform: Transform,
1188    object_rotation: ObjectRotation,
1189    config: RenderConfig,
1190}
1191
1192/// Marker for the rendered object
1193#[derive(Component)]
1194struct RenderedObject;
1195
1196/// Marker for the render camera
1197#[derive(Component)]
1198struct RenderCamera;
1199
1200/// Handle for the loaded texture
1201#[derive(Resource)]
1202struct LoadedTexture(Handle<Image>);
1203
1204/// Handle for the loaded scene
1205#[derive(Resource)]
1206struct LoadedScene(Handle<Scene>);
1207
1208/// Shared output for extracting render results
1209#[derive(Resource, Clone)]
1210struct SharedOutput(Arc<Mutex<Option<RenderOutput>>>);
1211
1212/// Handle for the render target image
1213#[derive(Resource)]
1214#[allow(dead_code)]
1215struct RenderTargetImage(Handle<Image>);
1216
1217/// Tracks progress for a homogeneous batch of viewpoints rendered in one app.
1218#[derive(Resource)]
1219struct HeadlessBatchSequence {
1220    viewpoints: Vec<Transform>,
1221    current_index: usize,
1222    outputs: Vec<RenderOutput>,
1223    warmup_frames_remaining: u32,
1224    done: bool,
1225}
1226
1227impl HeadlessBatchSequence {
1228    fn new(viewpoints: Vec<Transform>) -> Self {
1229        let capacity = viewpoints.len();
1230        Self {
1231            viewpoints,
1232            current_index: 0,
1233            outputs: Vec::with_capacity(capacity),
1234            warmup_frames_remaining: 0,
1235            done: capacity == 0,
1236        }
1237    }
1238
1239    fn current_viewpoint(&self) -> Option<Transform> {
1240        self.viewpoints.get(self.current_index).cloned()
1241    }
1242}
1243
1244/// Perform headless rendering of a YCB object.
1245///
1246/// This uses true headless GPU rendering via `RenderTarget::Image`, which does NOT
1247/// require any window surfaces. This should work on WSL2 and other environments
1248/// without display servers.
1249///
1250/// Note: Bevy's App::run() does not return cleanly. A watchdog thread monitors
1251/// for results and terminates the process once the render is complete.
1252#[allow(dead_code)]
1253pub fn render_headless(
1254    object_dir: &Path,
1255    camera_transform: &Transform,
1256    object_rotation: &ObjectRotation,
1257    config: &RenderConfig,
1258) -> Result<RenderOutput, RenderError> {
1259    // Canonicalize paths so Bevy's asset server can find them regardless of
1260    // caller working directory. Relative paths like "../../ycb" pass the
1261    // exists() check but Bevy resolves assets against its own root.
1262    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1263        RenderError::RenderFailed(format!(
1264            "Cannot canonicalize object directory {}: {}",
1265            object_dir.display(),
1266            e
1267        ))
1268    })?;
1269    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1270    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1271
1272    if !mesh_path.exists() {
1273        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
1274            &mesh_path,
1275        )));
1276    }
1277    if !texture_path.exists() {
1278        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
1279            &texture_path,
1280        )));
1281    }
1282
1283    let request = RenderRequest {
1284        mesh_path: fs_path_to_asset_string(&mesh_path),
1285        texture_path: fs_path_to_asset_string(&texture_path),
1286        camera_transform: *camera_transform,
1287        object_rotation: object_rotation.clone(),
1288        config: config.clone(),
1289    };
1290
1291    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
1292    let output_clone = shared_output.clone();
1293
1294    // Shared buffer for RGBA data from headless render target
1295    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1296
1297    // Shared buffer for depth readback
1298    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1299
1300    // Create a temp file path for fallback output serialization
1301    let temp_path =
1302        std::env::temp_dir().join(format!("bevy_sensor_render_{}.bin", std::process::id()));
1303
1304    // Spawn watchdog thread that monitors for timeout (don't exit - let Bevy exit gracefully)
1305    let output_poll_for_timeout = shared_output.clone();
1306    std::thread::spawn(move || {
1307        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1308        let start = std::time::Instant::now();
1309        let poll_interval = std::time::Duration::from_millis(100);
1310
1311        loop {
1312            // Check if we have a result
1313            if let Ok(guard) = output_poll_for_timeout.0.lock() {
1314                if guard.is_some() {
1315                    // Output is ready, Bevy will exit via AppExit event
1316                    return; // Exit watchdog thread, Bevy will handle exit
1317                }
1318            }
1319
1320            if start.elapsed() > timeout {
1321                eprintln!(
1322                    "Error: Render timeout after {} seconds",
1323                    RENDER_TIMEOUT_SECS
1324                );
1325                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
1326                // Force exit on timeout (this is a failure case)
1327                std::process::exit(1);
1328            }
1329
1330            std::thread::sleep(poll_interval);
1331        }
1332    });
1333
1334    // Run Bevy app with HEADLESS configuration (no window surfaces!)
1335    // Uses ScheduleRunnerPlugin instead of WinitPlugin
1336    build_headless_app(request, output_clone, shared_rgba, shared_depth).run();
1337
1338    // App::run() returned - check shared_output for result
1339    if let Ok(guard) = shared_output.0.lock() {
1340        if let Some(output) = guard.as_ref() {
1341            return Ok(output.clone());
1342        }
1343    }
1344
1345    // Fallback: try to read from temp file (for legacy compatibility)
1346    if temp_path.exists() {
1347        if let Ok(output) = read_output_from_file(&temp_path) {
1348            let _ = std::fs::remove_file(&temp_path);
1349            return Ok(output);
1350        }
1351    }
1352
1353    Err(RenderError::RenderFailed(
1354        "Render did not complete".to_string(),
1355    ))
1356}
1357
1358/// Render a homogeneous sequence of viewpoints in a single headless Bevy app.
1359///
1360/// All captures share the same object, object rotation, and render configuration.
1361/// This is the fast path used by the batch API for episode-style workloads.
1362pub fn render_headless_sequence(
1363    object_dir: &Path,
1364    viewpoints: &[Transform],
1365    object_rotation: &ObjectRotation,
1366    config: &RenderConfig,
1367) -> Result<Vec<RenderOutput>, RenderError> {
1368    if viewpoints.is_empty() {
1369        return Ok(Vec::new());
1370    }
1371
1372    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1373        RenderError::RenderFailed(format!(
1374            "Cannot canonicalize object directory {}: {}",
1375            object_dir.display(),
1376            e
1377        ))
1378    })?;
1379    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1380    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1381
1382    if !mesh_path.exists() {
1383        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
1384            &mesh_path,
1385        )));
1386    }
1387    if !texture_path.exists() {
1388        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
1389            &texture_path,
1390        )));
1391    }
1392
1393    let request = RenderRequest {
1394        mesh_path: fs_path_to_asset_string(&mesh_path),
1395        texture_path: fs_path_to_asset_string(&texture_path),
1396        camera_transform: viewpoints[0],
1397        object_rotation: object_rotation.clone(),
1398        config: config.clone(),
1399    };
1400
1401    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1402    let rgba_clone = shared_rgba.clone();
1403
1404    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1405    let depth_clone = shared_depth.clone();
1406
1407    let mut app = App::new();
1408    app.add_plugins(
1409        DefaultPlugins
1410            .set(bevy::asset::AssetPlugin {
1411                // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
1412                // default (UnapprovedPathMode::Forbid → load() silently returns a
1413                // default handle). YCB meshes load from absolute paths, so allow them.
1414                unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
1415                ..default()
1416            })
1417            .set(WindowPlugin {
1418                primary_window: None,
1419                exit_condition: ExitCondition::DontExit,
1420                ..default()
1421            })
1422            .disable::<bevy::winit::WinitPlugin>()
1423            .disable::<LogPlugin>()
1424            .disable::<TerminalCtrlCHandlerPlugin>(),
1425    )
1426    .add_plugins(ObjPlugin)
1427    // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
1428    // Scene spawning panics unless those component types are registered. The
1429    // minimal headless plugin set doesn't register them, so do it explicitly.
1430    .register_type::<Mesh3d>()
1431    .register_type::<MeshMaterial3d<StandardMaterial>>()
1432    .register_type::<bevy::prelude::Transform>()
1433    .register_type::<bevy::prelude::GlobalTransform>()
1434    .register_type::<bevy::transform::components::TransformTreeChanged>()
1435    .register_type::<bevy::prelude::Visibility>()
1436    .register_type::<bevy::prelude::InheritedVisibility>()
1437    .register_type::<bevy::prelude::ViewVisibility>()
1438    .add_plugins(ImageCopyPlugin {
1439        shared_rgba: rgba_clone,
1440    })
1441    .add_plugins(DepthReadbackPlugin {
1442        shared_depth: depth_clone,
1443        near: config.near_plane,
1444        far: config.far_plane,
1445    })
1446    .insert_resource(request)
1447    .insert_resource(shared_rgba)
1448    .insert_resource(HeadlessBatchSequence::new(viewpoints.to_vec()))
1449    .init_resource::<RenderState>()
1450    .add_systems(Startup, setup_headless_scene)
1451    .add_systems(
1452        Update,
1453        (
1454            check_assets_loaded,
1455            apply_materials,
1456            tick_headless_batch_warmup,
1457            request_headless_capture,
1458            check_headless_capture_ready,
1459            extract_and_continue_headless_batch,
1460        )
1461            .chain(),
1462    );
1463
1464    // Manual app.update() loops do not run plugin finish/cleanup hooks automatically.
1465    // Bevy's screenshot plugin inserts CapturedScreenshots during finish(), so run the
1466    // normal startup phases before driving the headless batch loop ourselves.
1467    let trace_outer = render_trace_enabled();
1468    let t_finish = std::time::Instant::now();
1469    app.finish();
1470    let finish_ms = t_finish.elapsed().as_secs_f64() * 1000.0;
1471    let t_cleanup = std::time::Instant::now();
1472    app.cleanup();
1473    let cleanup_ms = t_cleanup.elapsed().as_secs_f64() * 1000.0;
1474    if trace_outer {
1475        eprintln!(
1476            "[render_trace][coldinit] app.finish ms={:.3} app.cleanup ms={:.3}",
1477            finish_ms, cleanup_ms
1478        );
1479    }
1480
1481    let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1482    let start = std::time::Instant::now();
1483
1484    let trace = std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok();
1485    let mut update_idx: u32 = 0;
1486    let mut last_completed_outputs: usize = 0;
1487    let mut viewpoint_start = std::time::Instant::now();
1488
1489    loop {
1490        if start.elapsed() > timeout {
1491            return Err(RenderError::RenderTimeout {
1492                duration_secs: RENDER_TIMEOUT_SECS,
1493            });
1494        }
1495
1496        let update_start = std::time::Instant::now();
1497        app.update();
1498        let update_elapsed_ms = update_start.elapsed().as_secs_f64() * 1000.0;
1499
1500        if trace {
1501            let batch = app.world().resource::<HeadlessBatchSequence>();
1502            let warmup = batch.warmup_frames_remaining;
1503            let current = batch.current_index;
1504            let completed = batch.outputs.len();
1505            let vp_ms = viewpoint_start.elapsed().as_secs_f64() * 1000.0;
1506            eprintln!(
1507                "[render_trace] update={update_idx} vp={current} warmup={warmup} \
1508                 completed={completed} update_ms={update_elapsed_ms:.2} vp_ms={vp_ms:.2}"
1509            );
1510            if completed > last_completed_outputs {
1511                eprintln!(
1512                    "[render_trace] viewpoint {} finished in {:.2} ms",
1513                    completed - 1,
1514                    vp_ms
1515                );
1516                last_completed_outputs = completed;
1517                viewpoint_start = std::time::Instant::now();
1518            }
1519        }
1520
1521        update_idx += 1;
1522
1523        if app.world().resource::<HeadlessBatchSequence>().done {
1524            break;
1525        }
1526    }
1527
1528    if trace {
1529        eprintln!(
1530            "[render_trace] total_wall_ms={:.2} updates={update_idx} viewpoints={}",
1531            start.elapsed().as_secs_f64() * 1000.0,
1532            viewpoints.len()
1533        );
1534    }
1535
1536    let mut batch = app.world_mut().resource_mut::<HeadlessBatchSequence>();
1537    if batch.outputs.len() != viewpoints.len() {
1538        return Err(RenderError::RenderFailed(format!(
1539            "Batch render produced {} outputs for {} viewpoints",
1540            batch.outputs.len(),
1541            viewpoints.len()
1542        )));
1543    }
1544
1545    Ok(std::mem::take(&mut batch.outputs))
1546}
1547
1548/// Assemble the shared single-render headless Bevy app.
1549fn build_headless_app(
1550    request: RenderRequest,
1551    shared_output: SharedOutput,
1552    shared_rgba: SharedRgbaBuffer,
1553    shared_depth: SharedDepthBuffer,
1554) -> App {
1555    let near = request.config.near_plane;
1556    let far = request.config.far_plane;
1557
1558    let mut app = App::new();
1559    app.add_plugins(
1560        DefaultPlugins
1561            .set(bevy::asset::AssetPlugin {
1562                // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
1563                // default (UnapprovedPathMode::Forbid → load() silently returns a
1564                // default handle). YCB meshes load from absolute paths, so allow them.
1565                unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
1566                ..default()
1567            })
1568            .set(WindowPlugin {
1569                primary_window: None,
1570                exit_condition: ExitCondition::DontExit,
1571                ..default()
1572            })
1573            .disable::<bevy::winit::WinitPlugin>()
1574            .disable::<LogPlugin>()
1575            .disable::<TerminalCtrlCHandlerPlugin>(),
1576    )
1577    .add_plugins(ScheduleRunnerPlugin::run_loop(Duration::from_secs_f64(
1578        1.0 / 60.0,
1579    )))
1580    .add_plugins(ObjPlugin)
1581    // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
1582    // Scene spawning panics unless those component types are registered. The
1583    // minimal headless plugin set doesn't register them, so do it explicitly.
1584    .register_type::<Mesh3d>()
1585    .register_type::<MeshMaterial3d<StandardMaterial>>()
1586    .register_type::<bevy::prelude::Transform>()
1587    .register_type::<bevy::prelude::GlobalTransform>()
1588    .register_type::<bevy::transform::components::TransformTreeChanged>()
1589    .register_type::<bevy::prelude::Visibility>()
1590    .register_type::<bevy::prelude::InheritedVisibility>()
1591    .register_type::<bevy::prelude::ViewVisibility>()
1592    .add_plugins(ImageCopyPlugin {
1593        shared_rgba: shared_rgba.clone(),
1594    })
1595    .add_plugins(DepthReadbackPlugin {
1596        shared_depth,
1597        near,
1598        far,
1599    })
1600    .insert_resource(request)
1601    .insert_resource(shared_output)
1602    .insert_resource(shared_rgba)
1603    .init_resource::<RenderState>()
1604    .add_systems(Startup, setup_headless_scene)
1605    .add_systems(
1606        Update,
1607        (
1608            check_assets_loaded,
1609            apply_materials,
1610            request_headless_capture,
1611            check_headless_capture_ready,
1612            extract_and_exit_headless,
1613        )
1614            .chain(),
1615    );
1616    app
1617}
1618
1619/// Serialize RenderOutput to bytes for IPC (used by subprocess mode)
1620#[allow(dead_code)]
1621fn serialize_output(output: &RenderOutput) -> Vec<u8> {
1622    let mut data = Vec::new();
1623
1624    // Header: width, height, rgba_len, depth_len
1625    data.extend_from_slice(&output.width.to_le_bytes());
1626    data.extend_from_slice(&output.height.to_le_bytes());
1627    data.extend_from_slice(&(output.rgba.len() as u32).to_le_bytes());
1628    data.extend_from_slice(&(output.depth.len() as u32).to_le_bytes());
1629
1630    // RGBA data
1631    data.extend_from_slice(&output.rgba);
1632
1633    // Depth data (as f64 bytes for TBP precision)
1634    for d in &output.depth {
1635        data.extend_from_slice(&d.to_le_bytes());
1636    }
1637
1638    // Intrinsics (f64 for TBP precision)
1639    data.extend_from_slice(&output.intrinsics.focal_length[0].to_le_bytes());
1640    data.extend_from_slice(&output.intrinsics.focal_length[1].to_le_bytes());
1641    data.extend_from_slice(&output.intrinsics.principal_point[0].to_le_bytes());
1642    data.extend_from_slice(&output.intrinsics.principal_point[1].to_le_bytes());
1643    data.extend_from_slice(&output.intrinsics.image_size[0].to_le_bytes());
1644    data.extend_from_slice(&output.intrinsics.image_size[1].to_le_bytes());
1645
1646    // Camera transform (translation + rotation quaternion)
1647    let t = output.camera_transform.translation;
1648    let r = output.camera_transform.rotation;
1649    data.extend_from_slice(&t.x.to_le_bytes());
1650    data.extend_from_slice(&t.y.to_le_bytes());
1651    data.extend_from_slice(&t.z.to_le_bytes());
1652    data.extend_from_slice(&r.x.to_le_bytes());
1653    data.extend_from_slice(&r.y.to_le_bytes());
1654    data.extend_from_slice(&r.z.to_le_bytes());
1655    data.extend_from_slice(&r.w.to_le_bytes());
1656
1657    // Object rotation (f64)
1658    let or = &output.object_rotation;
1659    data.extend_from_slice(&or.pitch.to_le_bytes());
1660    data.extend_from_slice(&or.yaw.to_le_bytes());
1661    data.extend_from_slice(&or.roll.to_le_bytes());
1662
1663    data
1664}
1665
1666/// Read RenderOutput from serialized file
1667fn read_output_from_file(path: &std::path::Path) -> Result<RenderOutput, RenderError> {
1668    let mut file = File::open(path).map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1669    let mut data = Vec::new();
1670    file.read_to_end(&mut data)
1671        .map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1672
1673    let mut cursor = 0;
1674
1675    let read_u32 = |data: &[u8], cursor: &mut usize| -> u32 {
1676        let val = u32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1677        *cursor += 4;
1678        val
1679    };
1680
1681    let read_f32 = |data: &[u8], cursor: &mut usize| -> f32 {
1682        let val = f32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1683        *cursor += 4;
1684        val
1685    };
1686
1687    let read_f64 = |data: &[u8], cursor: &mut usize| -> f64 {
1688        let val = f64::from_le_bytes(data[*cursor..*cursor + 8].try_into().unwrap());
1689        *cursor += 8;
1690        val
1691    };
1692
1693    let width = read_u32(&data, &mut cursor);
1694    let height = read_u32(&data, &mut cursor);
1695    let rgba_len = read_u32(&data, &mut cursor) as usize;
1696    let depth_len = read_u32(&data, &mut cursor) as usize;
1697
1698    let rgba = data[cursor..cursor + rgba_len].to_vec();
1699    cursor += rgba_len;
1700
1701    // Depth data (f64 for TBP precision)
1702    let mut depth = Vec::with_capacity(depth_len);
1703    for _ in 0..depth_len {
1704        depth.push(read_f64(&data, &mut cursor));
1705    }
1706
1707    // Intrinsics (f64 for TBP precision)
1708    let focal_length = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1709    let principal_point = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1710    let image_size = [read_u32(&data, &mut cursor), read_u32(&data, &mut cursor)];
1711
1712    // Camera transform (f32 for Bevy compatibility)
1713    let tx = read_f32(&data, &mut cursor);
1714    let ty = read_f32(&data, &mut cursor);
1715    let tz = read_f32(&data, &mut cursor);
1716    let rx = read_f32(&data, &mut cursor);
1717    let ry = read_f32(&data, &mut cursor);
1718    let rz = read_f32(&data, &mut cursor);
1719    let rw = read_f32(&data, &mut cursor);
1720
1721    // Object rotation (f64)
1722    let pitch = read_f64(&data, &mut cursor);
1723    let yaw = read_f64(&data, &mut cursor);
1724    let roll = read_f64(&data, &mut cursor);
1725
1726    Ok(RenderOutput {
1727        rgba,
1728        depth,
1729        width,
1730        height,
1731        intrinsics: crate::CameraIntrinsics {
1732            focal_length,
1733            principal_point,
1734            image_size,
1735        },
1736        camera_transform: Transform {
1737            translation: Vec3::new(tx, ty, tz),
1738            rotation: Quat::from_xyzw(rx, ry, rz, rw),
1739            scale: Vec3::ONE,
1740        },
1741        object_rotation: ObjectRotation { pitch, yaw, roll },
1742        target_point: Vec3::ZERO,
1743        targeting_policy: TargetingPolicy::Origin,
1744    })
1745}
1746
1747/// Setup the scene with camera, lighting, and object
1748#[allow(dead_code)]
1749fn setup_scene(
1750    mut commands: Commands,
1751    asset_server: Res<AssetServer>,
1752    request: Res<RenderRequest>,
1753    mut _materials: ResMut<Assets<StandardMaterial>>,
1754) {
1755    // Camera with depth prepass (Bevy 0.15+ uses Camera3d component)
1756    // Disable MSAA for depth readback compatibility (can't copy from multisampled texture)
1757    // Apply FOV from RenderConfig so the projection matches TBP's camera intrinsics.
1758    let fov = request.config.fov_radians();
1759    commands.spawn((
1760        Camera3d::default(),
1761        Camera::default(),
1762        Hdr,
1763        Projection::Perspective(PerspectiveProjection {
1764            fov,
1765            near: request.config.near_plane,
1766            far: request.config.far_plane,
1767            ..default()
1768        }),
1769        Msaa::Off,
1770        request.camera_transform,
1771        Tonemapping::None, // Accurate colors for software rendering
1772        DepthPrepass,
1773        NormalPrepass,
1774        RenderCamera,
1775    ));
1776
1777    // Ambient light (from config). In Bevy 0.18 the global ambient light is the
1778    // `GlobalAmbientLight` resource (the `AmbientLight` type became a per-camera component).
1779    let lighting = &request.config.lighting;
1780    commands.insert_resource(GlobalAmbientLight {
1781        color: Color::WHITE,
1782        brightness: lighting.ambient_brightness,
1783        ..default()
1784    });
1785
1786    // Key light (from config) - Bevy 0.15+ uses PointLight component directly
1787    if lighting.key_light_intensity > 0.0 {
1788        commands.spawn((
1789            PointLight {
1790                intensity: lighting.key_light_intensity,
1791                shadows_enabled: lighting.shadows_enabled,
1792                ..default()
1793            },
1794            Transform::from_xyz(
1795                lighting.key_light_position[0],
1796                lighting.key_light_position[1],
1797                lighting.key_light_position[2],
1798            ),
1799        ));
1800    }
1801
1802    // Fill light (from config)
1803    if lighting.fill_light_intensity > 0.0 {
1804        commands.spawn((
1805            PointLight {
1806                intensity: lighting.fill_light_intensity,
1807                shadows_enabled: lighting.shadows_enabled,
1808                ..default()
1809            },
1810            Transform::from_xyz(
1811                lighting.fill_light_position[0],
1812                lighting.fill_light_position[1],
1813                lighting.fill_light_position[2],
1814            ),
1815        ));
1816    }
1817
1818    // Load the scene
1819    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
1820    commands.insert_resource(LoadedScene(scene_handle.clone()));
1821
1822    // Load the texture
1823    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
1824    commands.insert_resource(LoadedTexture(texture_handle.clone()));
1825
1826    // Create material with texture (will be applied later)
1827    let _material = _materials.add(StandardMaterial {
1828        base_color_texture: Some(texture_handle),
1829        unlit: true,
1830        ..default()
1831    });
1832
1833    // Spawn the scene with rotation (Bevy 0.15+ uses SceneRoot)
1834    commands.spawn((
1835        SceneRoot(scene_handle),
1836        Transform::from_rotation(request.object_rotation.to_quat()),
1837        RenderedObject,
1838    ));
1839
1840    println!("Scene setup complete");
1841}
1842
1843/// Check if assets are loaded
1844fn check_assets_loaded(
1845    mut state: ResMut<RenderState>,
1846    asset_server: Res<AssetServer>,
1847    scene: Option<Res<LoadedScene>>,
1848    texture: Option<Res<LoadedTexture>>,
1849) {
1850    let trace = render_trace_enabled();
1851    let was_scene_loaded = state.scene_loaded;
1852    let was_texture_loaded = state.texture_loaded;
1853
1854    state.frame_count += 1;
1855
1856    if state.scene_loaded && state.texture_loaded {
1857        return;
1858    }
1859
1860    if let Some(scene) = scene {
1861        match asset_server.get_load_state(&scene.0) {
1862            Some(LoadState::Loaded) => {
1863                state.scene_loaded = true;
1864            }
1865            Some(LoadState::Failed(_)) => {}
1866            _ => {}
1867        }
1868    }
1869
1870    if let Some(texture) = texture {
1871        match asset_server.get_load_state(&texture.0) {
1872            Some(LoadState::Loaded) => {
1873                state.texture_loaded = true;
1874            }
1875            Some(LoadState::Failed(_)) => {}
1876            _ => {}
1877        }
1878    }
1879
1880    if trace {
1881        if !was_scene_loaded && state.scene_loaded {
1882            eprintln!(
1883                "[render_trace][coldinit] scene_loaded frame_count={}",
1884                state.frame_count
1885            );
1886        }
1887        if !was_texture_loaded && state.texture_loaded {
1888            eprintln!(
1889                "[render_trace][coldinit] texture_loaded frame_count={}",
1890                state.frame_count
1891            );
1892        }
1893    }
1894}
1895
1896/// Apply materials to loaded meshes
1897fn apply_materials(
1898    mut state: ResMut<RenderState>,
1899    texture: Option<Res<LoadedTexture>>,
1900    mut materials: ResMut<Assets<StandardMaterial>>,
1901    // Bevy 0.15+: Use MeshMaterial3d instead of Handle<StandardMaterial>
1902    mut mesh_query: Query<&mut MeshMaterial3d<StandardMaterial>, With<Mesh3d>>,
1903) {
1904    // NOTE: we intentionally do NOT wait for `texture_loaded` before applying the
1905    // material. The texture *handle* is valid immediately, so applying the material
1906    // as soon as the mesh entities exist lets the main-pass `StandardMaterial`
1907    // pipeline start compiling during the long async texture load. A late material
1908    // swap (after texture load) would reset the pipeline and capture a blank color
1909    // frame before it recompiled — the root cause of the 0.18 blank renders.
1910    if !state.scene_loaded || state.capture_ready {
1911        return;
1912    }
1913
1914    state.frame_count += 1;
1915
1916    let Some(tex) = texture else { return };
1917
1918    if !state.materials_applied {
1919        // The scene hierarchy is instantiated asynchronously after the asset
1920        // load event fires; wait until mesh entities exist before applying.
1921        if mesh_query.is_empty() {
1922            return;
1923        }
1924
1925        let textured_material = materials.add(StandardMaterial {
1926            base_color_texture: Some(tex.0.clone()),
1927            unlit: true,
1928            ..default()
1929        });
1930
1931        for mut mat in mesh_query.iter_mut() {
1932            mat.0 = textured_material.clone();
1933        }
1934
1935        state.materials_applied = true;
1936        state.materials_applied_frame = state.frame_count;
1937    }
1938
1939    // Record the frame the texture finished loading (once).
1940    if state.texture_loaded && state.texture_ready_frame == 0 {
1941        state.texture_ready_frame = state.frame_count;
1942    }
1943
1944    // Capture once the texture pixels are loaded (+ a small margin for GPU image
1945    // preparation) AND the main-pass pipeline has had time to compile since the
1946    // material was applied. Because the material is applied early, the pipeline is
1947    // almost always ready well before the texture, so this resolves to a few frames
1948    // after the texture loads — deterministic and fast (no 60/120-frame cushion).
1949    let texture_ready =
1950        state.texture_ready_frame != 0 && state.frame_count >= state.texture_ready_frame + 6;
1951    let pipeline_ready = state.frame_count >= state.materials_applied_frame + 6;
1952    if texture_ready && pipeline_ready {
1953        let was_ready = state.capture_ready;
1954        state.capture_ready = true;
1955        if render_trace_enabled() && !was_ready {
1956            eprintln!(
1957                "[render_trace][coldinit] capture_ready frame_count={}",
1958                state.frame_count
1959            );
1960        }
1961    }
1962}
1963
1964/// Request a screenshot capture (Bevy 0.15+ uses Screenshot entity + observer)
1965#[allow(dead_code)]
1966fn request_screenshot(
1967    mut commands: Commands,
1968    mut state: ResMut<RenderState>,
1969    shared_image: Res<SharedImageBuffer>,
1970    mut depth_request: ResMut<DepthCaptureRequest>,
1971) {
1972    if !state.capture_ready || state.screenshot_requested {
1973        return;
1974    }
1975
1976    // Clone the Arc for the observer closure
1977    let image_buffer = shared_image.0.clone();
1978
1979    // Also request depth capture
1980    depth_request.requested = true;
1981    println!("Depth capture requested");
1982
1983    // Spawn Screenshot entity with observer (Bevy 0.15+ API)
1984    println!("Requesting screenshot via Screenshot entity");
1985    commands
1986        .spawn(Screenshot::primary_window())
1987        .observe(move |trigger: On<ScreenshotCaptured>| {
1988            // ScreenshotCaptured derefs to Image
1989            let image: &Image = trigger.event();
1990
1991            // Get dimensions
1992            let width = image.texture_descriptor.size.width;
1993            let height = image.texture_descriptor.size.height;
1994
1995            // Bevy 0.18: Image.data is now Option<Vec<u8>>; skip if absent.
1996            let Some(rgba_data) = image.data.clone() else {
1997                return;
1998            };
1999
2000            // Store in shared buffer
2001            if let Ok(mut guard) = image_buffer.lock() {
2002                *guard = Some((rgba_data, width, height));
2003            }
2004        });
2005
2006    state.screenshot_requested = true;
2007    println!("Screenshot requested");
2008}
2009
2010/// Check if screenshot callback has completed
2011#[allow(dead_code)]
2012fn check_screenshot_ready(
2013    mut state: ResMut<RenderState>,
2014    shared_image: Res<SharedImageBuffer>,
2015    shared_depth: Res<SharedDepthBuffer>,
2016    request: Res<RenderRequest>,
2017) {
2018    if !state.screenshot_requested || state.captured {
2019        return;
2020    }
2021
2022    // Increment frame count while waiting for capture
2023    state.frame_count += 1;
2024
2025    // Check if RGBA callback has written data
2026    let rgba_ready = if let Ok(guard) = shared_image.0.lock() {
2027        if let Some((rgba_data, width, height)) = guard.as_ref() {
2028            if state.rgba_data.is_none() {
2029                state.rgba_data = Some(rgba_data.clone());
2030                state.image_width = *width;
2031                state.image_height = *height;
2032            }
2033            true
2034        } else {
2035            false
2036        }
2037    } else {
2038        false
2039    };
2040
2041    // Check if depth readback has completed
2042    let depth_ready = if let Ok(guard) = shared_depth.0.lock() {
2043        if let Some((depth_data, _width, _height)) = guard.as_ref() {
2044            if state.depth_data.is_none() {
2045                state.depth_data = Some(depth_data.clone());
2046            }
2047            true
2048        } else {
2049            false
2050        }
2051    } else {
2052        false
2053    };
2054
2055    // If depth readback failed or is taking too long, fall back to placeholder
2056    // (This allows graceful degradation on systems where depth readback fails)
2057    if rgba_ready && !depth_ready && state.frame_count > 60 {
2058        let camera_dist = request.camera_transform.translation.length() as f64;
2059        let pixel_count = (state.image_width * state.image_height) as usize;
2060        state.depth_data = Some(vec![camera_dist; pixel_count]);
2061    }
2062
2063    // Mark as captured when both RGBA and depth are ready
2064    if state.rgba_data.is_some() && state.depth_data.is_some() {
2065        state.captured = true;
2066    }
2067}
2068
2069/// Extract results and exit
2070#[allow(dead_code)]
2071fn extract_and_exit(
2072    mut state: ResMut<RenderState>,
2073    request: Res<RenderRequest>,
2074    shared_output: Res<SharedOutput>,
2075    mut commands: Commands,
2076    windows: Query<Entity, With<bevy::window::Window>>,
2077) {
2078    // Handle delayed exit after closing window
2079    if state.exit_requested {
2080        state.exit_frame_count += 1;
2081        // After a few frames with no window, Bevy should exit
2082        return;
2083    }
2084
2085    if !state.captured {
2086        return;
2087    }
2088
2089    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2090        // Use actual captured dimensions (may differ from config if window was resized)
2091        let width = state.image_width;
2092        let height = state.image_height;
2093
2094        // Compute intrinsics from the same TBP zoom formula as the camera projection.
2095        let intrinsics = request.config.intrinsics_for_size(width, height);
2096
2097        let output = RenderOutput {
2098            rgba: rgba.clone(),
2099            depth: depth.clone(),
2100            width,
2101            height,
2102            intrinsics,
2103            camera_transform: request.camera_transform,
2104            object_rotation: request.object_rotation.clone(),
2105            target_point: Vec3::ZERO,
2106            targeting_policy: TargetingPolicy::Origin,
2107        };
2108
2109        if let Ok(mut guard) = shared_output.0.lock() {
2110            *guard = Some(output);
2111            drop(guard); // Release lock immediately
2112
2113            // Small delay to allow watchdog to detect output before window close
2114            std::thread::sleep(std::time::Duration::from_millis(200));
2115        }
2116
2117        // Close all windows to trigger app exit
2118        // eprintln!("Closing windows to trigger exit...");
2119        for window_entity in windows.iter() {
2120            commands.entity(window_entity).despawn();
2121        }
2122        state.exit_requested = true;
2123    }
2124}
2125
2126// ============================================================================
2127// Headless Rendering Systems (no window surfaces)
2128// ============================================================================
2129
2130/// Setup the scene for headless rendering with RenderTarget::Image
2131fn setup_headless_scene(
2132    mut commands: Commands,
2133    mut images: ResMut<Assets<Image>>,
2134    asset_server: Res<AssetServer>,
2135    request: Res<RenderRequest>,
2136    mut _materials: ResMut<Assets<StandardMaterial>>,
2137) {
2138    let trace = render_trace_enabled();
2139    let t0 = trace.then(std::time::Instant::now);
2140
2141    #[cfg(test)]
2142    HEADLESS_SCENE_SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
2143
2144    let width = request.config.width;
2145    let height = request.config.height;
2146
2147    // Create render target image with proper texture usages
2148    let size = Extent3d {
2149        width,
2150        height,
2151        depth_or_array_layers: 1,
2152    };
2153
2154    let mut render_target_image = Image::new_fill(
2155        size,
2156        TextureDimension::D2,
2157        &[0, 0, 0, 255], // Initialize with opaque black
2158        TextureFormat::Rgba8UnormSrgb,
2159        RenderAssetUsages::default(),
2160    );
2161
2162    // Add required texture usages for headless rendering
2163    render_target_image.texture_descriptor.usage =
2164        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
2165
2166    let render_target_handle = images.add(render_target_image);
2167
2168    // Store handle for later access
2169    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
2170
2171    // Camera rendering to the image texture (NO window!)
2172    let fov = request.config.fov_radians();
2173    commands.spawn((
2174        Camera3d::default(),
2175        Camera::default(),
2176        Hdr,
2177        // In Bevy 0.18 the render target is a separate `RenderTarget` component,
2178        // and `RenderTarget::Image` wraps an `ImageRenderTarget` (via `From<Handle<Image>>`).
2179        RenderTarget::Image(render_target_handle.clone().into()),
2180        Projection::Perspective(PerspectiveProjection {
2181            fov,
2182            near: request.config.near_plane,
2183            far: request.config.far_plane,
2184            ..default()
2185        }),
2186        Msaa::Off,
2187        request.camera_transform,
2188        Tonemapping::None,
2189        DepthPrepass,
2190        NormalPrepass,
2191        RenderCamera,
2192        // Add ImageCopier to trigger RGBA extraction
2193        ImageCopier {
2194            src_image: render_target_handle,
2195            enabled: false, // Will enable when ready to capture
2196        },
2197    ));
2198
2199    // Ambient light (global resource in Bevy 0.18).
2200    let lighting = &request.config.lighting;
2201    commands.insert_resource(GlobalAmbientLight {
2202        color: Color::WHITE,
2203        brightness: lighting.ambient_brightness,
2204        ..default()
2205    });
2206
2207    // Key light
2208    if lighting.key_light_intensity > 0.0 {
2209        commands.spawn((
2210            PointLight {
2211                intensity: lighting.key_light_intensity,
2212                shadows_enabled: lighting.shadows_enabled,
2213                ..default()
2214            },
2215            Transform::from_xyz(
2216                lighting.key_light_position[0],
2217                lighting.key_light_position[1],
2218                lighting.key_light_position[2],
2219            ),
2220        ));
2221    }
2222
2223    // Fill light
2224    if lighting.fill_light_intensity > 0.0 {
2225        commands.spawn((
2226            PointLight {
2227                intensity: lighting.fill_light_intensity,
2228                shadows_enabled: lighting.shadows_enabled,
2229                ..default()
2230            },
2231            Transform::from_xyz(
2232                lighting.fill_light_position[0],
2233                lighting.fill_light_position[1],
2234                lighting.fill_light_position[2],
2235            ),
2236        ));
2237    }
2238
2239    // Load the scene
2240    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
2241    commands.insert_resource(LoadedScene(scene_handle.clone()));
2242
2243    // Load the texture
2244    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
2245    commands.insert_resource(LoadedTexture(texture_handle.clone()));
2246
2247    // Create material with texture
2248    let _material = _materials.add(StandardMaterial {
2249        base_color_texture: Some(texture_handle),
2250        unlit: true,
2251        ..default()
2252    });
2253
2254    // Spawn the scene with rotation
2255    commands.spawn((
2256        SceneRoot(scene_handle),
2257        Transform::from_rotation(request.object_rotation.to_quat()),
2258        RenderedObject,
2259    ));
2260
2261    if let Some(t0) = t0 {
2262        eprintln!(
2263            "[render_trace][startup] setup_headless_scene ms={:.3}",
2264            t0.elapsed().as_secs_f64() * 1000.0
2265        );
2266    }
2267}
2268
2269/// Request capture for headless rendering (enable ImageCopier)
2270fn request_headless_capture(
2271    mut state: ResMut<RenderState>,
2272    mut depth_request: ResMut<DepthCaptureRequest>,
2273    mut query: Query<&mut ImageCopier>,
2274    batch: Option<Res<HeadlessBatchSequence>>,
2275) {
2276    let trace = render_trace_enabled();
2277    let t0 = trace.then(std::time::Instant::now);
2278
2279    if !state.capture_ready || state.screenshot_requested {
2280        if let Some(t0) = t0 {
2281            eprintln!(
2282                "[render_trace][sys] request_headless_capture skipped(gate) ms={:.3}",
2283                t0.elapsed().as_secs_f64() * 1000.0
2284            );
2285        }
2286        return;
2287    }
2288
2289    if batch
2290        .as_ref()
2291        .is_some_and(|batch| batch.warmup_frames_remaining > 0)
2292    {
2293        if let Some(t0) = t0 {
2294            eprintln!(
2295                "[render_trace][sys] request_headless_capture skipped(warmup) ms={:.3}",
2296                t0.elapsed().as_secs_f64() * 1000.0
2297            );
2298        }
2299        return;
2300    }
2301
2302    // Enable the ImageCopier to trigger RGBA extraction
2303    for mut copier in query.iter_mut() {
2304        copier.enabled = true;
2305    }
2306
2307    // Request depth capture
2308    depth_request.requested = true;
2309
2310    state.screenshot_requested = true;
2311
2312    if let Some(t0) = t0 {
2313        eprintln!(
2314            "[render_trace][sys] request_headless_capture requested ms={:.3}",
2315            t0.elapsed().as_secs_f64() * 1000.0
2316        );
2317    }
2318}
2319
2320/// Check if headless capture has completed
2321fn check_headless_capture_ready(
2322    mut state: ResMut<RenderState>,
2323    shared_rgba: Res<SharedRgbaBuffer>,
2324    shared_depth: Res<SharedDepthBuffer>,
2325    request: Res<RenderRequest>,
2326    mut query: Query<&mut ImageCopier>,
2327) {
2328    let trace = render_trace_enabled();
2329    let t0 = trace.then(std::time::Instant::now);
2330
2331    if !state.screenshot_requested || state.captured {
2332        if let Some(t0) = t0 {
2333            eprintln!(
2334                "[render_trace][sys] check_headless_capture_ready skipped(gate) ms={:.3}",
2335                t0.elapsed().as_secs_f64() * 1000.0
2336            );
2337        }
2338        return;
2339    }
2340
2341    state.frame_count += 1;
2342    state.capture_retries += 1;
2343    // Bounded fallback so a genuinely-uniform scene (or persistent invalid
2344    // readback) still terminates instead of hanging to the watchdog.
2345    // Generous bound: slow paths (e.g. RenderSession's retained-render-world
2346    // settle after a scene swap) can take ~150 frames to produce a stable frame,
2347    // so force-accepting at 150 would grab a partial frame and break parity. Only
2348    // force as a true last resort to avoid hanging the watchdog.
2349    let force_accept = state.capture_retries > 150;
2350
2351    // RGBA: accept the first non-blank frame. Uniform clear-color frames are
2352    // pre-geometry reads from the nondeterministic one-shot capture — reject and
2353    // retry. The copier stays enabled until BOTH RGBA and depth are valid so a
2354    // late/odd depth frame can still be captured.
2355    if state.rgba_data.is_none() {
2356        let captured_rgba = shared_rgba.0.lock().ok().and_then(|g| g.clone());
2357        if let Some((rgba_data, width, height)) = captured_rgba {
2358            let non_blank = rgba_data
2359                .chunks_exact(4)
2360                .any(|px| px[0..3] != rgba_data[0..3]);
2361            // Stable == identical to the previous readback (render has settled).
2362            let stable = state.prev_rgba.as_deref() == Some(rgba_data.as_slice());
2363            if (non_blank && stable) || force_accept {
2364                state.image_width = width;
2365                state.image_height = height;
2366                state.rgba_data = Some(rgba_data);
2367                state.prev_rgba = None;
2368            } else {
2369                // Not settled yet: remember this frame and re-read fresh next one.
2370                state.prev_rgba = Some(rgba_data);
2371                if let Ok(mut g) = shared_rgba.0.lock() {
2372                    *g = None;
2373                }
2374            }
2375        }
2376    }
2377
2378    // Depth: accept the first readback that contains real foreground (the depth
2379    // readback can also miss the geometry, leaving an all-far-plane buffer).
2380    if state.depth_data.is_none() {
2381        let captured_depth = shared_depth.0.lock().ok().and_then(|g| g.clone());
2382        if let Some((depth_data, _w, _h)) = captured_depth {
2383            let far = request.config.far_plane as f64;
2384            // Require a real object-surface depth, not just any non-far value:
2385            // near-plane garbage (~0.01) would otherwise be accepted but is not a
2386            // valid surface, and downstream depth-validity checks require > 0.1m.
2387            let has_foreground = depth_data.iter().any(|&d| d > 0.1 && d < far * 0.999);
2388            // Settled == identical to the previous depth readback.
2389            let stable = state.prev_depth.as_deref() == Some(depth_data.as_slice());
2390            if has_foreground && stable {
2391                state.depth_data = Some(depth_data);
2392                state.prev_depth = None;
2393            } else {
2394                state.prev_depth = Some(depth_data);
2395                if let Ok(mut g) = shared_depth.0.lock() {
2396                    *g = None; // discard; retry next frame
2397                }
2398            }
2399        }
2400    }
2401
2402    // Last-resort fallback so we never hang the watchdog: once RGBA is in hand
2403    // and we've retried a lot, fill a uniform camera-distance depth placeholder.
2404    if state.rgba_data.is_some() && state.depth_data.is_none() && force_accept {
2405        let camera_dist = request.camera_transform.translation.length() as f64;
2406        let pixel_count = (state.image_width * state.image_height) as usize;
2407        state.depth_data = Some(vec![camera_dist; pixel_count]);
2408    }
2409
2410    let rgba_ready = state.rgba_data.is_some();
2411    let depth_ready = state.depth_data.is_some();
2412
2413    // Both valid → capture complete; stop the copier.
2414    if rgba_ready && depth_ready {
2415        state.captured = true;
2416        for mut copier in query.iter_mut() {
2417            copier.enabled = false;
2418        }
2419    }
2420
2421    if let Some(t0) = t0 {
2422        eprintln!(
2423            "[render_trace][sys] check_headless_capture_ready rgba_ready={} depth_ready={} captured={} frame_count={} ms={:.3}",
2424            rgba_ready,
2425            depth_ready,
2426            state.captured,
2427            state.frame_count,
2428            t0.elapsed().as_secs_f64() * 1000.0
2429        );
2430    }
2431}
2432
2433/// Extract results and exit for headless rendering
2434fn extract_and_exit_headless(
2435    mut state: ResMut<RenderState>,
2436    request: Res<RenderRequest>,
2437    shared_output: Res<SharedOutput>,
2438    mut app_exit: MessageWriter<bevy::app::AppExit>,
2439    batch: Option<Res<HeadlessBatchSequence>>,
2440) {
2441    if batch.is_some() {
2442        return;
2443    }
2444
2445    if state.exit_requested {
2446        return;
2447    }
2448
2449    if !state.captured {
2450        return;
2451    }
2452
2453    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2454        let width = state.image_width;
2455        let height = state.image_height;
2456
2457        // Compute intrinsics from the same TBP zoom formula as the camera projection.
2458        let intrinsics = request.config.intrinsics_for_size(width, height);
2459
2460        let output = RenderOutput {
2461            rgba: rgba.clone(),
2462            depth: depth.clone(),
2463            width,
2464            height,
2465            intrinsics,
2466            camera_transform: request.camera_transform,
2467            object_rotation: request.object_rotation.clone(),
2468            target_point: Vec3::ZERO,
2469            targeting_policy: TargetingPolicy::Origin,
2470        };
2471
2472        if let Ok(mut guard) = shared_output.0.lock() {
2473            *guard = Some(output);
2474            drop(guard);
2475            std::thread::sleep(std::time::Duration::from_millis(200));
2476        }
2477
2478        // Send AppExit event (headless apps use this instead of closing windows)
2479        app_exit.write(bevy::app::AppExit::Success);
2480        state.exit_requested = true;
2481    }
2482}
2483
2484/// Advance the short post-camera-move warmup for homogeneous batch rendering.
2485fn tick_headless_batch_warmup(batch: Option<ResMut<HeadlessBatchSequence>>) {
2486    let Some(mut batch) = batch else {
2487        return;
2488    };
2489
2490    if batch.warmup_frames_remaining > 0 {
2491        batch.warmup_frames_remaining -= 1;
2492    }
2493}
2494
2495/// Extract one batch output and continue rendering the next viewpoint in the same app.
2496fn extract_and_continue_headless_batch(
2497    mut state: ResMut<RenderState>,
2498    request: Res<RenderRequest>,
2499    buffers: (Res<SharedRgbaBuffer>, Res<SharedDepthBuffer>),
2500    batch: Option<ResMut<HeadlessBatchSequence>>,
2501    mut camera_query: Query<&mut Transform, With<RenderCamera>>,
2502    mut depth_request: ResMut<DepthCaptureRequest>,
2503    mut image_copiers: Query<&mut ImageCopier>,
2504) {
2505    let trace = render_trace_enabled();
2506    let t0 = trace.then(std::time::Instant::now);
2507
2508    let (shared_rgba, shared_depth) = buffers;
2509    let Some(mut batch) = batch else {
2510        if let Some(t0) = t0 {
2511            eprintln!(
2512                "[render_trace][sys] extract_and_continue_headless_batch skipped(no_batch) ms={:.3}",
2513                t0.elapsed().as_secs_f64() * 1000.0
2514            );
2515        }
2516        return;
2517    };
2518
2519    if state.exit_requested || !state.captured || batch.done {
2520        if let Some(t0) = t0 {
2521            eprintln!(
2522                "[render_trace][sys] extract_and_continue_headless_batch skipped(gate) captured={} done={} ms={:.3}",
2523                state.captured,
2524                batch.done,
2525                t0.elapsed().as_secs_f64() * 1000.0
2526            );
2527        }
2528        return;
2529    }
2530
2531    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2532        let width = state.image_width;
2533        let height = state.image_height;
2534
2535        let intrinsics = request.config.intrinsics_for_size(width, height);
2536
2537        let output = RenderOutput {
2538            rgba: rgba.clone(),
2539            depth: depth.clone(),
2540            width,
2541            height,
2542            intrinsics,
2543            camera_transform: batch
2544                .current_viewpoint()
2545                .unwrap_or(request.camera_transform),
2546            object_rotation: request.object_rotation.clone(),
2547            target_point: Vec3::ZERO,
2548            targeting_policy: TargetingPolicy::Origin,
2549        };
2550        batch.outputs.push(output);
2551
2552        let next_index = batch.current_index + 1;
2553        if next_index >= batch.viewpoints.len() {
2554            batch.done = true;
2555            state.exit_requested = true;
2556            return;
2557        }
2558
2559        batch.current_index = next_index;
2560        batch.warmup_frames_remaining = BATCH_WARMUP_FRAMES;
2561
2562        if let Some(next_viewpoint) = batch.current_viewpoint() {
2563            for mut camera_transform in camera_query.iter_mut() {
2564                *camera_transform = next_viewpoint;
2565            }
2566        }
2567
2568        if let Ok(mut guard) = shared_rgba.0.lock() {
2569            *guard = None;
2570        }
2571        if let Ok(mut guard) = shared_depth.0.lock() {
2572            *guard = None;
2573        }
2574
2575        for mut copier in image_copiers.iter_mut() {
2576            copier.enabled = false;
2577        }
2578
2579        depth_request.requested = false;
2580        state.frame_count = 0;
2581        state.capture_ready = true;
2582        state.screenshot_requested = false;
2583        state.captured = false;
2584        state.rgba_data = None;
2585        state.depth_data = None;
2586        state.image_width = 0;
2587        state.image_height = 0;
2588        // Reset the per-capture settle/retry tracking too, otherwise it
2589        // accumulates across viewpoints and force-accepts an unsettled frame for
2590        // later viewpoints (breaking parity).
2591        state.capture_retries = 0;
2592        state.prev_rgba = None;
2593        state.prev_depth = None;
2594
2595        if let Some(t0) = t0 {
2596            eprintln!(
2597                "[render_trace][sys] extract_and_continue_headless_batch extracted vp={} next={} done={} ms={:.3}",
2598                batch.current_index.saturating_sub(1),
2599                batch.current_index,
2600                batch.done,
2601                t0.elapsed().as_secs_f64() * 1000.0
2602            );
2603        }
2604    } else if let Some(t0) = t0 {
2605        eprintln!(
2606            "[render_trace][sys] extract_and_continue_headless_batch no_data ms={:.3}",
2607            t0.elapsed().as_secs_f64() * 1000.0
2608        );
2609    }
2610}
2611
2612// ============================================================================
2613// Persistent batch session (RenderSession)
2614//
2615// Amortizes wgpu device creation, Bevy app setup, and first-draw pipeline state
2616// object (PSO) compilation across multiple `render()` calls. Profile data (see
2617// issues #54 and #55) showed that on a 60-episode parity-gate, ~2.3s per episode
2618// lives in first-draw DX12 PSO compilation, totalling ~131s of 151s wall-clock.
2619// Keeping the `App` (and thus the `RenderDevice` and its PSO cache) alive across
2620// episodes recovers the bulk of that cost.
2621// ============================================================================
2622
2623/// Marker for the per-group scene entity so we can despawn it cleanly when the
2624/// next `RenderSession::render()` call swaps in a different object or rotation.
2625#[derive(Component)]
2626struct SessionScene;
2627
2628/// Session-persistent setup: render target image, camera (with prepass +
2629/// `ImageCopier`), ambient light, key + fill lights. Everything here lives for
2630/// the full lifetime of the `RenderSession`; per-group work (mesh/texture load,
2631/// scene entity spawn) happens outside Startup in `RenderSession::render()`.
2632fn setup_session_persistent_scene(
2633    mut commands: Commands,
2634    mut images: ResMut<Assets<Image>>,
2635    config: Res<SessionRenderConfig>,
2636) {
2637    let width = config.0.width;
2638    let height = config.0.height;
2639
2640    let size = Extent3d {
2641        width,
2642        height,
2643        depth_or_array_layers: 1,
2644    };
2645
2646    let mut render_target_image = Image::new_fill(
2647        size,
2648        TextureDimension::D2,
2649        &[0, 0, 0, 255],
2650        TextureFormat::Rgba8UnormSrgb,
2651        RenderAssetUsages::default(),
2652    );
2653    render_target_image.texture_descriptor.usage =
2654        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
2655
2656    let render_target_handle = images.add(render_target_image);
2657    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
2658
2659    let fov = config.0.fov_radians();
2660    commands.spawn((
2661        Camera3d::default(),
2662        Camera::default(),
2663        Hdr,
2664        RenderTarget::Image(render_target_handle.clone().into()),
2665        Projection::Perspective(PerspectiveProjection {
2666            fov,
2667            near: config.0.near_plane,
2668            far: config.0.far_plane,
2669            ..default()
2670        }),
2671        Msaa::Off,
2672        Transform::default(),
2673        Tonemapping::None,
2674        DepthPrepass,
2675        NormalPrepass,
2676        RenderCamera,
2677        ImageCopier {
2678            src_image: render_target_handle,
2679            enabled: false,
2680        },
2681    ));
2682
2683    let lighting = &config.0.lighting;
2684    commands.insert_resource(GlobalAmbientLight {
2685        color: Color::WHITE,
2686        brightness: lighting.ambient_brightness,
2687        ..default()
2688    });
2689
2690    if lighting.key_light_intensity > 0.0 {
2691        commands.spawn((
2692            PointLight {
2693                intensity: lighting.key_light_intensity,
2694                shadows_enabled: lighting.shadows_enabled,
2695                ..default()
2696            },
2697            Transform::from_xyz(
2698                lighting.key_light_position[0],
2699                lighting.key_light_position[1],
2700                lighting.key_light_position[2],
2701            ),
2702        ));
2703    }
2704
2705    if lighting.fill_light_intensity > 0.0 {
2706        commands.spawn((
2707            PointLight {
2708                intensity: lighting.fill_light_intensity,
2709                shadows_enabled: lighting.shadows_enabled,
2710                ..default()
2711            },
2712            Transform::from_xyz(
2713                lighting.fill_light_position[0],
2714                lighting.fill_light_position[1],
2715                lighting.fill_light_position[2],
2716            ),
2717        ));
2718    }
2719}
2720
2721/// Resource carrying the `RenderConfig` that was fixed at session construction.
2722/// Used by `setup_session_persistent_scene` to size the render target.
2723#[derive(Resource)]
2724struct SessionRenderConfig(RenderConfig);
2725
2726/// Persistent batch render session. Keeps a Bevy `App` (and its `RenderDevice`
2727/// plus PSO cache) alive across multiple `render()` calls, amortizing per-episode
2728/// cold-init cost.
2729///
2730/// # Thread affinity
2731///
2732/// `RenderSession` must be created, used, and dropped on the same thread. It
2733/// holds a `bevy::App` which owns GPU resources that are not safe to move
2734/// across threads. The `!Send + !Sync` marker is enforced via
2735/// `PhantomData<*const ()>`.
2736///
2737/// # Config invariant
2738///
2739/// The `RenderConfig` (resolution, lighting, near/far, fov) is fixed at
2740/// `new()`. All `render()` calls must use requests whose `render_config`
2741/// matches; heterogeneous configs are rejected.
2742///
2743/// # Phase 1 limitation
2744///
2745/// Each `render()` call must contain homogeneous requests (same `object_dir`
2746/// and `object_rotation`). Heterogeneous calls return
2747/// `BatchRenderError::InvalidConfig`. Hold a single `RenderSession` and call
2748/// `render()` once per episode to amortize setup across episodes.
2749pub struct RenderSession {
2750    app: App,
2751    render_config: RenderConfig,
2752    shared_rgba: SharedRgbaBuffer,
2753    shared_depth: SharedDepthBuffer,
2754    _not_send_sync: std::marker::PhantomData<*const ()>,
2755}
2756
2757impl RenderSession {
2758    /// Build the App, run plugin `finish()`/`cleanup()`, and perform one warmup
2759    /// `update()` so Startup systems run and the wgpu device + adapter are
2760    /// initialized. The first `render()` call still pays PSO compilation for
2761    /// the specific mesh/material combination; subsequent calls reuse the cache.
2762    pub fn new(render_config: &crate::RenderConfig) -> Result<Self, crate::RenderError> {
2763        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
2764        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
2765
2766        let mut app = App::new();
2767        app.add_plugins(
2768            DefaultPlugins
2769                .set(bevy::asset::AssetPlugin {
2770                    // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
2771                    // default (UnapprovedPathMode::Forbid → load() silently returns a
2772                    // default handle). YCB meshes load from absolute paths, so allow them.
2773                    unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
2774                    ..default()
2775                })
2776                .set(WindowPlugin {
2777                    primary_window: None,
2778                    exit_condition: ExitCondition::DontExit,
2779                    ..default()
2780                })
2781                .disable::<bevy::winit::WinitPlugin>()
2782                .disable::<LogPlugin>()
2783                .disable::<TerminalCtrlCHandlerPlugin>(),
2784        )
2785        .add_plugins(ObjPlugin)
2786        // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
2787        // Scene spawning panics unless those component types are registered. The
2788        // minimal headless plugin set doesn't register them, so do it explicitly.
2789        .register_type::<Mesh3d>()
2790        .register_type::<MeshMaterial3d<StandardMaterial>>()
2791        .register_type::<bevy::prelude::Transform>()
2792        .register_type::<bevy::prelude::GlobalTransform>()
2793        .register_type::<bevy::transform::components::TransformTreeChanged>()
2794        .register_type::<bevy::prelude::Visibility>()
2795        .register_type::<bevy::prelude::InheritedVisibility>()
2796        .register_type::<bevy::prelude::ViewVisibility>()
2797        .add_plugins(ImageCopyPlugin {
2798            shared_rgba: shared_rgba.clone(),
2799        })
2800        .add_plugins(DepthReadbackPlugin {
2801            shared_depth: shared_depth.clone(),
2802            near: render_config.near_plane,
2803            far: render_config.far_plane,
2804        })
2805        .insert_resource(SessionRenderConfig(render_config.clone()))
2806        .insert_resource(shared_rgba.clone())
2807        .init_resource::<RenderState>()
2808        .add_systems(Startup, setup_session_persistent_scene)
2809        .add_systems(
2810            Update,
2811            (
2812                check_assets_loaded,
2813                apply_materials,
2814                tick_headless_batch_warmup,
2815                request_headless_capture,
2816                check_headless_capture_ready,
2817                extract_and_continue_headless_batch,
2818            )
2819                .chain()
2820                // Gate the capture chain on `RenderRequest` existing. `new()`
2821                // runs a warmup `app.update()` to execute Startup (which spawns
2822                // the camera/lights/render target) before the first `render()`
2823                // call, but does not yet insert `RenderRequest`. Several systems
2824                // in this chain take `Res<RenderRequest>` (not `Option`) and
2825                // would panic on SystemState init if the resource were absent.
2826                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
2827        );
2828
2829        app.finish();
2830        app.cleanup();
2831
2832        // One warmup update runs Startup systems (render target, camera, lights)
2833        // so they exist before the first `render()` call seeds the camera
2834        // transform. The Update chain is gated by `RenderRequest` existence and
2835        // is a no-op this tick. PSO compilation for specific mesh/material
2836        // combinations still happens lazily on the first real render.
2837        app.update();
2838
2839        Ok(Self {
2840            app,
2841            render_config: render_config.clone(),
2842            shared_rgba,
2843            shared_depth,
2844            _not_send_sync: std::marker::PhantomData,
2845        })
2846    }
2847
2848    /// Render a homogeneous batch of viewpoints (same object + rotation + config).
2849    /// Returns outputs in request order.
2850    ///
2851    /// On `BatchRenderError::DeviceLost`, the returned error signals that the
2852    /// wgpu device was lost mid-render. This call produced no output; any
2853    /// outputs from earlier `render()` calls on this session are still valid.
2854    /// Recovery: drop this `RenderSession` and construct a new one.
2855    pub fn render(
2856        &mut self,
2857        requests: &[crate::BatchRenderRequest],
2858    ) -> Result<Vec<crate::BatchRenderOutput>, crate::BatchRenderError> {
2859        use crate::{BatchRenderError, BatchRenderOutput};
2860
2861        if requests.is_empty() {
2862            return Ok(Vec::new());
2863        }
2864
2865        // Enforce homogeneity and config invariance.
2866        let first = &requests[0];
2867        if first.render_config != self.render_config {
2868            return Err(BatchRenderError::InvalidConfig(
2869                "RenderSession render_config mismatch: session was constructed with a different \
2870                 RenderConfig than the first request carries. Session config cannot change after \
2871                 `new()`; construct a new session if you need a different resolution/camera."
2872                    .to_string(),
2873            ));
2874        }
2875        for r in &requests[1..] {
2876            if r.object_dir != first.object_dir
2877                || r.object_rotation != first.object_rotation
2878                || r.render_config != first.render_config
2879            {
2880                return Err(BatchRenderError::InvalidConfig(
2881                    "Phase 1 RenderSession::render requires homogeneous requests \
2882                     (same object_dir, object_rotation, and render_config across the batch). \
2883                     Call render() once per group instead."
2884                        .to_string(),
2885                ));
2886            }
2887        }
2888
2889        // Canonicalize paths and validate mesh/texture presence. This matches
2890        // `render_headless_sequence`'s preconditions so the error surface stays
2891        // consistent.
2892        let object_dir = std::fs::canonicalize(&first.object_dir).map_err(|e| {
2893            BatchRenderError::InvalidConfig(format!(
2894                "Cannot canonicalize object directory {}: {}",
2895                first.object_dir.display(),
2896                e
2897            ))
2898        })?;
2899        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
2900        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
2901        if !mesh_path.exists() {
2902            return Err(BatchRenderError::InvalidConfig(format!(
2903                "Mesh not found: {}",
2904                mesh_path.display()
2905            )));
2906        }
2907        if !texture_path.exists() {
2908            return Err(BatchRenderError::InvalidConfig(format!(
2909                "Texture not found: {}",
2910                texture_path.display()
2911            )));
2912        }
2913
2914        let viewpoints: Vec<Transform> = requests.iter().map(|r| r.viewpoint).collect();
2915
2916        // --- per-group scene swap (direct world manipulation) ---
2917        {
2918            let world = self.app.world_mut();
2919
2920            // Despawn any SessionScene entity from the previous group.
2921            let stale: Vec<Entity> = world
2922                .query_filtered::<Entity, With<SessionScene>>()
2923                .iter(world)
2924                .collect();
2925            for entity in stale {
2926                world.entity_mut(entity).despawn();
2927            }
2928
2929            // Clear shared RGBA/depth buffers so a stale payload can't leak
2930            // into the first viewpoint of this call.
2931            if let Ok(mut guard) = self.shared_rgba.0.lock() {
2932                *guard = None;
2933            }
2934            if let Ok(mut guard) = self.shared_depth.0.lock() {
2935                *guard = None;
2936            }
2937
2938            // Reset RenderState (scene_loaded, texture_loaded, capture_ready,
2939            // frame_count, materials_applied, etc.). Default() gives all false/0.
2940            *world.resource_mut::<RenderState>() = RenderState::default();
2941
2942            // Update RenderRequest so the existing capture systems see the new
2943            // object paths, rotation, and camera transform (seeded from first vp).
2944            let new_request = RenderRequest {
2945                mesh_path: fs_path_to_asset_string(&mesh_path),
2946                texture_path: fs_path_to_asset_string(&texture_path),
2947                camera_transform: viewpoints[0],
2948                object_rotation: first.object_rotation.clone(),
2949                config: self.render_config.clone(),
2950            };
2951            world.insert_resource(new_request);
2952
2953            // Kick off asset loads and install the handles under the names the
2954            // existing `check_assets_loaded` system expects.
2955            let asset_server = world.resource::<AssetServer>().clone();
2956            let scene_handle: Handle<Scene> =
2957                asset_server.load(fs_path_to_asset_string(&mesh_path));
2958            let texture_handle: Handle<Image> =
2959                asset_server.load(fs_path_to_asset_string(&texture_path));
2960            world.insert_resource(LoadedScene(scene_handle.clone()));
2961            world.insert_resource(LoadedTexture(texture_handle));
2962
2963            // Spawn the new scene entity tagged so we can find + despawn it next
2964            // render() call.
2965            world.spawn((
2966                SceneRoot(scene_handle),
2967                Transform::from_rotation(first.object_rotation.to_quat()),
2968                RenderedObject,
2969                SessionScene,
2970            ));
2971
2972            // Seed the camera transform to the first viewpoint now so the first
2973            // capture lines up; subsequent viewpoints are advanced by
2974            // `extract_and_continue_headless_batch`.
2975            let camera_entity = world
2976                .query_filtered::<Entity, With<RenderCamera>>()
2977                .iter(world)
2978                .next();
2979            if let Some(cam) = camera_entity {
2980                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
2981                    *transform = viewpoints[0];
2982                }
2983            }
2984
2985            // Install the viewpoint sequence for this render() call. The robust
2986            // settled-frame capture (reject blank/partial readbacks, retry until
2987            // two consecutive readbacks match) absorbs the despawn/respawn
2988            // render-world settle, so a separate discarded warmup pass is not
2989            // needed and the per-object cost stays low.
2990            world.insert_resource(HeadlessBatchSequence::new(viewpoints.clone()));
2991        }
2992
2993        // --- drive the real capture loop ---
2994        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
2995        let start = std::time::Instant::now();
2996        loop {
2997            if start.elapsed() > timeout {
2998                return Err(BatchRenderError::TotalFailure(format!(
2999                    "RenderSession::render timed out after {}s",
3000                    RENDER_TIMEOUT_SECS
3001                )));
3002            }
3003
3004            self.app.update();
3005
3006            if self.app.world().resource::<HeadlessBatchSequence>().done {
3007                break;
3008            }
3009        }
3010
3011        // Collect outputs and zip with requests to produce BatchRenderOutput in
3012        // request order.
3013        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
3014        if sequence.outputs.len() != requests.len() {
3015            return Err(BatchRenderError::TotalFailure(format!(
3016                "RenderSession produced {} outputs for {} requests",
3017                sequence.outputs.len(),
3018                requests.len()
3019            )));
3020        }
3021        let outputs = std::mem::take(&mut sequence.outputs);
3022
3023        Ok(requests
3024            .iter()
3025            .cloned()
3026            .zip(outputs)
3027            .map(|(req, out)| BatchRenderOutput::from_render_output(req, out))
3028            .collect())
3029    }
3030}
3031
3032// ============================================================================
3033// Per-step persistent renderer (PersistentRenderer)
3034//
3035// `RenderSession` reuses the App across calls but rebuilds the scene on every
3036// `render()` (despawn SceneRoot, re-issue asset_server.load, respawn). That's
3037// fine for the parity-gate path (one scene per episode of N viewpoints) but
3038// wasteful for surface-policy feedback loops where N=1 viewpoint per call and
3039// the object stays loaded for the whole episode.
3040//
3041// `PersistentRenderer` commits to one `object_dir` + `RenderConfig` at
3042// construction. `new()` loads mesh + texture + spawns the scene root + drives
3043// one warmup render (output discarded) so PSO compilation and material setup
3044// are paid up front. `render(camera, rotation)` then only mutates the camera
3045// `Transform` and (if changed) the scene root rotation, drives the capture
3046// chain for one frame, and returns. See issue #65.
3047// ============================================================================
3048
3049/// Marker for the `PersistentRenderer`'s scene root entity. We keep the
3050/// entity alive for the whole renderer lifetime and just mutate its
3051/// `Transform` when the caller-supplied object rotation changes.
3052#[derive(Component)]
3053struct PersistentScene;
3054
3055/// Persistent per-step renderer. Loads the scene once at `new()` and renders
3056/// one frame per `render()` call by mutating the camera transform and scene
3057/// root rotation in-place. Built for surface-policy feedback loops where the
3058/// object stays fixed for the duration of an episode and the camera moves
3059/// every step. See issue #65.
3060///
3061/// # Thread affinity
3062///
3063/// `PersistentRenderer` must be created, used, and dropped on the same thread.
3064/// Holds a `bevy::App` that owns GPU resources not safe to move across
3065/// threads; `!Send + !Sync` is enforced via `PhantomData<*const ()>`.
3066///
3067/// # Object + config invariants
3068///
3069/// `object_dir` and `RenderConfig` are fixed at `new()`. To render a different
3070/// object or change resolution/lighting, drop and rebuild. Rotation may change
3071/// freely between `render()` calls.
3072pub struct PersistentRenderer {
3073    app: App,
3074    object_dir: PathBuf,
3075    render_config: RenderConfig,
3076    shared_rgba: SharedRgbaBuffer,
3077    shared_depth: SharedDepthBuffer,
3078    _not_send_sync: std::marker::PhantomData<*const ()>,
3079}
3080
3081impl PersistentRenderer {
3082    /// Build the App, load the scene + texture, spawn the scene root, and drive
3083    /// one warmup render whose output is discarded. After `new()` returns, the
3084    /// first user-facing `render()` call benefits from a warm PSO cache and
3085    /// applied materials.
3086    pub fn new(
3087        object_dir: &Path,
3088        render_config: &RenderConfig,
3089    ) -> Result<Self, crate::RenderError> {
3090        let object_dir =
3091            std::fs::canonicalize(object_dir).map_err(|e| crate::RenderError::FileNotFound {
3092                path: object_dir.display().to_string(),
3093                reason: e.to_string(),
3094            })?;
3095        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3096        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3097        if !mesh_path.exists() {
3098            return Err(crate::RenderError::MeshNotFound(fs_path_to_asset_string(
3099                &mesh_path,
3100            )));
3101        }
3102        if !texture_path.exists() {
3103            return Err(crate::RenderError::TextureNotFound(
3104                fs_path_to_asset_string(&texture_path),
3105            ));
3106        }
3107
3108        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3109        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3110
3111        let mut app = App::new();
3112        app.add_plugins(
3113            DefaultPlugins
3114                .set(bevy::asset::AssetPlugin {
3115                    // Bevy 0.17+ forbids loading from absolute / `..` asset paths by
3116                    // default (UnapprovedPathMode::Forbid → load() silently returns a
3117                    // default handle). YCB meshes load from absolute paths, so allow them.
3118                    unapproved_path_mode: bevy::asset::UnapprovedPathMode::Allow,
3119                    ..default()
3120                })
3121                .set(WindowPlugin {
3122                    primary_window: None,
3123                    exit_condition: ExitCondition::DontExit,
3124                    ..default()
3125                })
3126                .disable::<bevy::winit::WinitPlugin>()
3127                .disable::<LogPlugin>()
3128                .disable::<TerminalCtrlCHandlerPlugin>(),
3129        )
3130        .add_plugins(ObjPlugin)
3131        // bevy_obj's Scene contains Mesh3d + MeshMaterial3d entities; reflection-based
3132        // Scene spawning panics unless those component types are registered. The
3133        // minimal headless plugin set doesn't register them, so do it explicitly.
3134        .register_type::<Mesh3d>()
3135        .register_type::<MeshMaterial3d<StandardMaterial>>()
3136        .register_type::<bevy::prelude::Transform>()
3137        .register_type::<bevy::prelude::GlobalTransform>()
3138        .register_type::<bevy::transform::components::TransformTreeChanged>()
3139        .register_type::<bevy::prelude::Visibility>()
3140        .register_type::<bevy::prelude::InheritedVisibility>()
3141        .register_type::<bevy::prelude::ViewVisibility>()
3142        .add_plugins(ImageCopyPlugin {
3143            shared_rgba: shared_rgba.clone(),
3144        })
3145        .add_plugins(DepthReadbackPlugin {
3146            shared_depth: shared_depth.clone(),
3147            near: render_config.near_plane,
3148            far: render_config.far_plane,
3149        })
3150        .insert_resource(SessionRenderConfig(render_config.clone()))
3151        .insert_resource(shared_rgba.clone())
3152        .init_resource::<RenderState>()
3153        .add_systems(Startup, setup_session_persistent_scene)
3154        .add_systems(
3155            Update,
3156            (
3157                check_assets_loaded,
3158                apply_materials,
3159                tick_headless_batch_warmup,
3160                request_headless_capture,
3161                check_headless_capture_ready,
3162                extract_and_continue_headless_batch,
3163            )
3164                .chain()
3165                // Same gate as RenderSession: capture chain only runs once
3166                // RenderRequest is installed. Startup runs first via the
3167                // warmup `app.update()` below.
3168                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
3169        );
3170
3171        app.finish();
3172        app.cleanup();
3173        // Warmup tick #1: Startup runs (camera, lights, render target spawn).
3174        app.update();
3175
3176        // Install scene + warmup render request. The warmup output is discarded
3177        // — its purpose is to pay PSO compilation and material application
3178        // upfront so the first user-facing render() is fast.
3179        let initial_request = RenderRequest {
3180            mesh_path: fs_path_to_asset_string(&mesh_path),
3181            texture_path: fs_path_to_asset_string(&texture_path),
3182            camera_transform: Transform::default(),
3183            object_rotation: ObjectRotation::identity(),
3184            config: render_config.clone(),
3185        };
3186
3187        {
3188            let world = app.world_mut();
3189            let asset_server = world.resource::<AssetServer>().clone();
3190            let scene_handle: Handle<Scene> =
3191                asset_server.load(fs_path_to_asset_string(&mesh_path));
3192            let texture_handle: Handle<Image> =
3193                asset_server.load(fs_path_to_asset_string(&texture_path));
3194            world.insert_resource(LoadedScene(scene_handle.clone()));
3195            world.insert_resource(LoadedTexture(texture_handle));
3196            world.insert_resource(initial_request);
3197            world.spawn((
3198                SceneRoot(scene_handle),
3199                Transform::from_rotation(ObjectRotation::identity().to_quat()),
3200                RenderedObject,
3201                PersistentScene,
3202            ));
3203            world.insert_resource(HeadlessBatchSequence::new(vec![Transform::default()]));
3204        }
3205
3206        // Drive the warmup render to completion.
3207        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3208        let start = std::time::Instant::now();
3209        loop {
3210            if start.elapsed() > timeout {
3211                return Err(crate::RenderError::RenderFailed(format!(
3212                    "PersistentRenderer::new warmup render timed out after {RENDER_TIMEOUT_SECS}s"
3213                )));
3214            }
3215            app.update();
3216            if app.world().resource::<HeadlessBatchSequence>().done {
3217                break;
3218            }
3219        }
3220        // Discard the warmup output so it doesn't leak into the first real
3221        // render() call's output buffer.
3222        app.world_mut()
3223            .resource_mut::<HeadlessBatchSequence>()
3224            .outputs
3225            .clear();
3226
3227        Ok(Self {
3228            app,
3229            object_dir,
3230            render_config: render_config.clone(),
3231            shared_rgba,
3232            shared_depth,
3233            _not_send_sync: std::marker::PhantomData,
3234        })
3235    }
3236
3237    /// Render one frame from the given camera transform and object rotation.
3238    /// Reuses the loaded scene + warm PSO cache from `new()`.
3239    pub fn render(
3240        &mut self,
3241        camera_transform: &Transform,
3242        object_rotation: &ObjectRotation,
3243    ) -> Result<RenderOutput, crate::RenderError> {
3244        let camera_transform = *camera_transform;
3245        let object_rotation_owned = object_rotation.clone();
3246
3247        {
3248            let world = self.app.world_mut();
3249
3250            // Update the persistent scene root rotation. Always-write avoids
3251            // the cost of an extra ObjectRotation comparison per call; the
3252            // mutation itself is a single Transform write.
3253            let scene_entity = world
3254                .query_filtered::<Entity, With<PersistentScene>>()
3255                .iter(world)
3256                .next();
3257            if let Some(entity) = scene_entity {
3258                if let Some(mut transform) = world.entity_mut(entity).get_mut::<Transform>() {
3259                    *transform = Transform::from_rotation(object_rotation_owned.to_quat());
3260                }
3261            }
3262
3263            // Update the camera transform.
3264            let cam_entity = world
3265                .query_filtered::<Entity, With<RenderCamera>>()
3266                .iter(world)
3267                .next();
3268            if let Some(cam) = cam_entity {
3269                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
3270                    *transform = camera_transform;
3271                }
3272            }
3273
3274            // Reset per-frame state, preserving scene_loaded / texture_loaded
3275            // / materials_applied / materials_applied_frame. The asset-load
3276            // and material-apply work was paid in `new()`'s warmup; we only
3277            // need to clear the per-capture state.
3278            //
3279            // `capture_ready = true` short-circuits `apply_materials` on
3280            // every tick of the render loop (no need to re-check material
3281            // application — it stays applied for the renderer's lifetime).
3282            // It does NOT short-circuit `request_headless_capture`, which
3283            // is gated by `HeadlessBatchSequence::warmup_frames_remaining`
3284            // below. Bug fix from PR #66 review (off-by-one / blank-step-0):
3285            // without that warmup gate, request_headless_capture fires same-
3286            // tick as the transform writes, capturing the previous render's
3287            // target before the new transforms have propagated.
3288            {
3289                let mut state = world.resource_mut::<RenderState>();
3290                state.exit_requested = false;
3291                state.screenshot_requested = false;
3292                state.captured = false;
3293                state.rgba_data = None;
3294                state.depth_data = None;
3295                state.frame_count = 0;
3296                state.image_width = 0;
3297                state.image_height = 0;
3298                state.capture_ready = true;
3299            }
3300
3301            // Clear shared GPU readback buffers so a stale payload from the
3302            // previous render() can't leak into this call's output.
3303            if let Ok(mut guard) = self.shared_rgba.0.lock() {
3304                *guard = None;
3305            }
3306            if let Ok(mut guard) = self.shared_depth.0.lock() {
3307                *guard = None;
3308            }
3309
3310            // Update RenderRequest (used by extract_and_continue_headless_batch
3311            // to stamp the output with the right intrinsics + rotation).
3312            {
3313                let mut req = world.resource_mut::<RenderRequest>();
3314                req.camera_transform = camera_transform;
3315                req.object_rotation = object_rotation_owned.clone();
3316            }
3317
3318            // Install fresh single-element batch with warmup frames so
3319            // `request_headless_capture` is gated until the new transforms
3320            // have propagated through the render pipeline.
3321            let mut batch = HeadlessBatchSequence::new(vec![camera_transform]);
3322            batch.warmup_frames_remaining = PERSISTENT_WARMUP_FRAMES;
3323            world.insert_resource(batch);
3324        }
3325
3326        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3327        let start = std::time::Instant::now();
3328        loop {
3329            if start.elapsed() > timeout {
3330                return Err(crate::RenderError::RenderFailed(format!(
3331                    "PersistentRenderer::render timed out after {RENDER_TIMEOUT_SECS}s"
3332                )));
3333            }
3334            self.app.update();
3335            if self.app.world().resource::<HeadlessBatchSequence>().done {
3336                break;
3337            }
3338        }
3339
3340        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
3341        let mut outputs = std::mem::take(&mut sequence.outputs);
3342        if outputs.len() != 1 {
3343            return Err(crate::RenderError::RenderFailed(format!(
3344                "PersistentRenderer::render expected 1 output, got {}",
3345                outputs.len()
3346            )));
3347        }
3348
3349        Ok(outputs.remove(0))
3350    }
3351
3352    /// Path to the YCB object directory this renderer was bound to.
3353    pub fn object_dir(&self) -> &Path {
3354        &self.object_dir
3355    }
3356
3357    /// The `RenderConfig` this renderer was constructed with.
3358    pub fn render_config(&self) -> &RenderConfig {
3359        &self.render_config
3360    }
3361
3362    /// Explicit close. Equivalent to dropping; provided to match the API
3363    /// proposal in #65 for callers that want lifetime-explicit teardown.
3364    pub fn close(self) {
3365        // Drop runs on return.
3366    }
3367}
3368
3369/// Render directly to files (for subprocess mode).
3370///
3371/// This function saves RGBA and depth data directly to files before exiting.
3372/// Designed for subprocess rendering where the process will exit after rendering.
3373pub fn render_to_files(
3374    object_dir: &Path,
3375    camera_transform: &Transform,
3376    object_rotation: &ObjectRotation,
3377    config: &RenderConfig,
3378    rgba_path: &Path,
3379    depth_path: &Path,
3380) -> Result<(), RenderError> {
3381    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3382    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3383
3384    if !mesh_path.exists() {
3385        return Err(RenderError::MeshNotFound(fs_path_to_asset_string(
3386            &mesh_path,
3387        )));
3388    }
3389    if !texture_path.exists() {
3390        return Err(RenderError::TextureNotFound(fs_path_to_asset_string(
3391            &texture_path,
3392        )));
3393    }
3394
3395    let request = RenderRequest {
3396        mesh_path: fs_path_to_asset_string(&mesh_path),
3397        texture_path: fs_path_to_asset_string(&texture_path),
3398        camera_transform: *camera_transform,
3399        object_rotation: object_rotation.clone(),
3400        config: config.clone(),
3401    };
3402
3403    // Shared state for output
3404    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
3405    let output_poll = shared_output.clone();
3406
3407    // Clone paths for watchdog thread
3408    let rgba_path = rgba_path.to_path_buf();
3409    let depth_path = depth_path.to_path_buf();
3410
3411    // Shared buffer for RGBA data from headless render target
3412    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3413
3414    // Shared buffer for depth readback
3415    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3416
3417    // Spawn watchdog thread that saves files and exits
3418    std::thread::spawn(move || {
3419        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3420        let start = std::time::Instant::now();
3421        let poll_interval = std::time::Duration::from_millis(100);
3422
3423        loop {
3424            if let Ok(guard) = output_poll.0.lock() {
3425                if let Some(output) = guard.as_ref() {
3426                    // Save RGBA as PNG
3427                    if let Err(e) =
3428                        save_rgba_to_png(&output.rgba, output.width, output.height, &rgba_path)
3429                    {
3430                        eprintln!("Failed to save RGBA: {:?}", e);
3431                        std::process::exit(1);
3432                    }
3433
3434                    // Save depth as binary f32
3435                    if let Err(e) = save_depth_to_binary(&output.depth, &depth_path) {
3436                        eprintln!("Failed to save depth: {:?}", e);
3437                        std::process::exit(1);
3438                    }
3439
3440                    std::process::exit(0);
3441                }
3442            }
3443
3444            if start.elapsed() > timeout {
3445                eprintln!(
3446                    "Error: Render timeout after {} seconds",
3447                    RENDER_TIMEOUT_SECS
3448                );
3449                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
3450                std::process::exit(1);
3451            }
3452
3453            std::thread::sleep(poll_interval);
3454        }
3455    });
3456
3457    // Configure rendering backend for this environment.
3458    // Use OnceLock so env vars are only set once per process — repeated calls
3459    // (e.g. sequential render_to_buffer calls in a parity loop) no longer trigger
3460    // redundant wgpu backend env writes. Full GPU adapter reuse across App instances
3461    // requires a persistent renderer (tracked in issue #14).
3462    static BACKEND_INIT: OnceLock<()> = OnceLock::new();
3463    BACKEND_INIT.get_or_init(|| {
3464        let backend_config = BackendConfig::headless();
3465        backend_config.apply_env();
3466    });
3467
3468    // Run Bevy app with HEADLESS configuration
3469    build_headless_app(request, shared_output, shared_rgba, shared_depth).run();
3470
3471    // Unreachable - watchdog thread exits the process
3472    Err(RenderError::RenderFailed(
3473        "Render did not complete".to_string(),
3474    ))
3475}
3476
3477/// Save RGBA data to PNG file
3478fn save_rgba_to_png(rgba: &[u8], width: u32, height: u32, path: &Path) -> Result<(), String> {
3479    use image::{ImageBuffer, Rgba};
3480
3481    // Create parent directories if needed
3482    if let Some(parent) = path.parent() {
3483        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3484    }
3485
3486    let img: ImageBuffer<Rgba<u8>, Vec<u8>> =
3487        ImageBuffer::from_raw(width, height, rgba.to_vec())
3488            .ok_or_else(|| "Failed to create image buffer".to_string())?;
3489
3490    img.save(path).map_err(|e| e.to_string())
3491}
3492
3493/// Save depth data to binary file (f64 for TBP precision)
3494fn save_depth_to_binary(depth: &[f64], path: &Path) -> Result<(), String> {
3495    // Create parent directories if needed
3496    if let Some(parent) = path.parent() {
3497        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3498    }
3499
3500    let bytes: Vec<u8> = depth.iter().flat_map(|f| f.to_le_bytes()).collect();
3501    std::fs::write(path, &bytes).map_err(|e| e.to_string())
3502}
3503
3504#[cfg(test)]
3505mod smoke_tests {
3506    use super::{headless_scene_setup_count, reset_headless_scene_setup_count};
3507    use crate::{
3508        BatchRenderConfig, BatchRenderRequest, ObjectRotation, RenderConfig, TargetingPolicy, Vec3,
3509        ViewpointConfig,
3510    };
3511    use image::{ImageBuffer, Rgba};
3512    use tempfile::TempDir;
3513
3514    fn write_synthetic_object() -> TempDir {
3515        let temp_dir = TempDir::new().expect("create temp dir for synthetic object");
3516        let object_dir = temp_dir.path().join("synthetic_cube").join("google_16k");
3517        std::fs::create_dir_all(&object_dir).expect("create synthetic google_16k dir");
3518
3519        // A small centered cube stays visible from all default TBP viewpoints and does not
3520        // need any YCB downloads.
3521        let obj = r#"o SyntheticCube
3522v -0.10 -0.10  0.10
3523v  0.10 -0.10  0.10
3524v  0.10  0.10  0.10
3525v -0.10  0.10  0.10
3526v -0.10 -0.10 -0.10
3527v  0.10 -0.10 -0.10
3528v  0.10  0.10 -0.10
3529v -0.10  0.10 -0.10
3530vt 0.0 0.0
3531vt 1.0 0.0
3532vt 1.0 1.0
3533vt 0.0 1.0
3534f 1/1 2/2 3/3
3535f 1/1 3/3 4/4
3536f 6/1 5/2 8/3
3537f 6/1 8/3 7/4
3538f 2/1 6/2 7/3
3539f 2/1 7/3 3/4
3540f 5/1 1/2 4/3
3541f 5/1 4/3 8/4
3542f 4/1 3/2 7/3
3543f 4/1 7/3 8/4
3544f 5/1 6/2 2/3
3545f 5/1 2/3 1/4
3546"#;
3547        std::fs::write(object_dir.join("textured.obj"), obj).expect("write synthetic obj");
3548
3549        let texture = ImageBuffer::from_fn(2, 2, |x, y| match (x, y) {
3550            (0, 0) => Rgba([255u8, 48, 48, 255]),
3551            (1, 0) => Rgba([48u8, 255, 48, 255]),
3552            (0, 1) => Rgba([48u8, 48, 255, 255]),
3553            _ => Rgba([255u8, 255, 64, 255]),
3554        });
3555        texture
3556            .save(object_dir.join("texture_map.png"))
3557            .expect("write synthetic texture");
3558
3559        temp_dir
3560    }
3561
3562    #[test]
3563    #[ignore = "headless throughput smoke check is opt-in because it needs a local render backend"]
3564    fn test_headless_batch_throughput_smoke() {
3565        crate::initialize();
3566        reset_headless_scene_setup_count();
3567
3568        let object_root = write_synthetic_object();
3569        let object_dir = object_root.path().join("synthetic_cube");
3570        let viewpoints = crate::generate_viewpoints(&ViewpointConfig::default());
3571        let request_count = 5usize;
3572        let config = RenderConfig::tbp_default();
3573
3574        let requests: Vec<_> = viewpoints
3575            .iter()
3576            .take(request_count)
3577            .copied()
3578            .map(|viewpoint| BatchRenderRequest {
3579                object_dir: object_dir.clone(),
3580                viewpoint,
3581                object_rotation: ObjectRotation::identity(),
3582                render_config: config.clone(),
3583                target_point: Vec3::ZERO,
3584                targeting_policy: TargetingPolicy::Origin,
3585            })
3586            .collect();
3587
3588        let start = std::time::Instant::now();
3589        let outputs = crate::render_batch(requests, &BatchRenderConfig::default())
3590            .expect("synthetic headless batch render should succeed");
3591        let elapsed = start.elapsed();
3592
3593        assert_eq!(outputs.len(), request_count);
3594        // This is the deterministic churn signal for the smoke check. Adapter log lines vary by
3595        // backend and logging config, but a homogeneous batch should still set up headless scene
3596        // state exactly once.
3597        assert_eq!(
3598            headless_scene_setup_count(),
3599            1,
3600            "homogeneous batch smoke check should reuse one headless app setup"
3601        );
3602
3603        for (idx, output) in outputs.iter().enumerate() {
3604            assert_eq!(output.width, config.width, "output {idx} width mismatch");
3605            assert_eq!(output.height, config.height, "output {idx} height mismatch");
3606            assert_eq!(
3607                output.rgba.len(),
3608                (config.width * config.height * 4) as usize,
3609                "output {idx} rgba size mismatch"
3610            );
3611            assert_eq!(
3612                output.depth.len(),
3613                (config.width * config.height) as usize,
3614                "output {idx} depth size mismatch"
3615            );
3616            assert!(
3617                output
3618                    .rgba
3619                    .chunks_exact(4)
3620                    .any(|px| px[0] != 0 || px[1] != 0 || px[2] != 0),
3621                "output {idx} should contain visible color"
3622            );
3623        }
3624
3625        // Acceptance target: under llvmpipe-class CPU rendering, five 64x64 captures should
3626        // finish in under 8s. Much slower runs usually mean we reintroduced per-capture app
3627        // churn or another headless startup regression.
3628        assert!(
3629            elapsed < std::time::Duration::from_secs(8),
3630            "5 synthetic headless captures took {:.2}s, expected < 8.0s",
3631            elapsed.as_secs_f64()
3632        );
3633    }
3634}