Skip to main content

bevy_sensor/
render.rs

1//! Headless rendering implementation using Bevy.
2//!
3//! This module provides two rendering modes:
4//!
5//! 1. **Headless mode** (default): Renders to an image texture without requiring
6//!    a window or display. Works on WSL2, CI servers, and any environment without
7//!    GPU windowing support.
8//!
9//! 2. **Windowed mode** (fallback): Uses a visible window for rendering when
10//!    headless mode fails. Requires a display (X11/Wayland).
11//!
12//! # Current Status
13//!
14//! - **RGBA**: Working via render-to-texture + GPU readback
15//! - **Depth**: Working via ViewDepthTexture + reverse-Z conversion
16//!
17//! # Headless Rendering Architecture
18//!
19//! The headless renderer:
20//! 1. Creates a Bevy app without window plugins (uses ScheduleRunnerPlugin)
21//! 2. Sets up a render-to-texture pipeline with RenderTarget::Image
22//! 3. Extracts RGBA data via ImageCopyDriver
23//! 4. Extracts depth via DepthReadbackNode
24//!
25//! # Running Requirements
26//!
27//! Headless mode should work without any display. For windowed fallback:
28//! ```bash
29//! DISPLAY=:0 cargo run --example test_render
30//! ```
31//!
32//! # Architecture Notes
33//!
34//! Bevy's `App::run()` does not return cleanly in all configurations. This
35//! implementation uses a watchdog thread that monitors for completion and
36//! calls `std::process::exit(0)` once the render output is serialized to
37//! a temp file. The main thread reads this file after the process would
38//! normally exit.
39
40use bevy::app::{ScheduleRunnerPlugin, TerminalCtrlCHandlerPlugin};
41use bevy::asset::LoadState;
42use bevy::core_pipeline::prepass::{DepthPrepass, NormalPrepass};
43use bevy::core_pipeline::tonemapping::Tonemapping;
44use bevy::ecs::query::QueryItem;
45use bevy::log::LogPlugin;
46use bevy::prelude::*;
47use bevy::render::camera::{ExtractedCamera, RenderTarget};
48use bevy::render::render_asset::{RenderAssetUsages, RenderAssets};
49use bevy::render::render_graph::{
50    Node, NodeRunError, RenderGraphApp, RenderGraphContext, RenderLabel, ViewNode, ViewNodeRunner,
51};
52use bevy::render::render_resource::{
53    Buffer, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, Extent3d, ImageCopyBuffer,
54    ImageCopyTexture, ImageDataLayout, MapMode, Origin3d, TextureAspect, TextureDimension,
55    TextureFormat, TextureUsages,
56};
57use bevy::render::renderer::RenderQueue;
58use bevy::render::renderer::{RenderContext, RenderDevice};
59use bevy::render::texture::GpuImage;
60use bevy::render::view::screenshot::{Screenshot, ScreenshotCaptured};
61use bevy::render::view::ViewDepthTexture;
62use bevy::render::{Extract, Render, RenderApp, RenderSet};
63use bevy::window::{ExitCondition, WindowPlugin};
64use bevy_obj::ObjPlugin;
65use std::fs::File;
66use std::io::Read as IoRead;
67use std::path::{Path, PathBuf};
68#[cfg(test)]
69use std::sync::atomic::{AtomicUsize, Ordering};
70use std::sync::{Arc, Mutex, OnceLock};
71use std::time::Duration;
72
73use crate::{backend::BackendConfig, ObjectRotation, RenderConfig, RenderError, RenderOutput};
74use ycbust::{GOOGLE_16K_MESH_RELATIVE, GOOGLE_16K_TEXTURE_RELATIVE};
75
76/// Watchdog timeout for a single render, in seconds.
77///
78/// Bounds how long any single render path waits before declaring failure.
79/// 180s accommodates first-run wgpu shader compilation on Windows, which
80/// can take well over 60s on a cold GPU cache (see commit 9cd1d11).
81const RENDER_TIMEOUT_SECS: u64 = 180;
82
83/// Warmup frames after each camera move in `render_headless_sequence`.
84///
85/// After writing a new camera `Transform`, Bevy needs at least one frame for
86/// transform propagation + render-world extract before the next capture is
87/// valid. Historically set to 3 as a conservative cushion; reducing directly
88/// shortens per-viewpoint wall-clock since `app.update()` in the batch path
89/// is not rate-limited. Validated against the pixel-exact hardware test
90/// `test_batch_render_matches_sequential_episode_outputs`.
91const BATCH_WARMUP_FRAMES: u32 = 1;
92
93/// Warmup frames at the start of each `PersistentRenderer::render()` call.
94///
95/// `BATCH_WARMUP_FRAMES = 1` works for inter-viewpoint advancement inside a
96/// batch because `extract_and_continue_headless_batch` writes the next
97/// camera transform *and* clears the shared GPU readback buffers in the
98/// same tick — so the in-flight copy from the previous viewpoint has
99/// already drained by the time the next capture is gated.
100///
101/// In the persistent per-call path, the previous render's output may still
102/// be sitting in `shared_rgba`/`shared_depth` (we clear them before the
103/// loop, but the pipeline still needs ticks to propagate the new camera/
104/// scene-rotation `Transform` writes through `PostUpdate` →
105/// `transform_propagate` → `Extract` → render graph → `ImageCopyDriver`
106/// before the capture we request actually reflects the new transforms.
107///
108/// Validated by `test_persistent_renderer_matches_render_to_buffer`. Three
109/// ticks of warmup gives Windows/DX12 enough room to drain the previous
110/// readback and capture the post-propagation color target:
111///   - tick 0: transforms propagate, render runs (no copy enabled)
112///   - tick 1: previous in-flight readback drains (no copy enabled)
113///   - tick 2: warmup hits 0, capture fires, render runs with copy enabled
114///   - tick 3: shared buffers populated → captured → batch finalized
115const PERSISTENT_WARMUP_FRAMES: u32 = 3;
116
117/// Check the render-trace env var. Cheap enough (single HashMap lookup) to call
118/// from per-frame systems; gate all tracing output behind this.
119#[inline]
120fn render_trace_enabled() -> bool {
121    std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok()
122}
123
124/// Check if a display is available for windowed rendering.
125///
126/// Returns true if DISPLAY or WAYLAND_DISPLAY environment variable is set.
127#[allow(dead_code)]
128fn display_available() -> bool {
129    std::env::var("DISPLAY").is_ok() || std::env::var("WAYLAND_DISPLAY").is_ok()
130}
131
132/// Check if we're running on WSL2 (which doesn't support Vulkan window surfaces).
133#[allow(dead_code)]
134fn is_wsl2() -> bool {
135    if let Ok(version) = std::fs::read_to_string("/proc/version") {
136        return version.to_lowercase().contains("microsoft")
137            || version.to_lowercase().contains("wsl");
138    }
139    false
140}
141
142/// Internal state for tracking render progress
143#[derive(Resource, Default)]
144struct RenderState {
145    frame_count: u32,
146    scene_loaded: bool,
147    texture_loaded: bool,
148    materials_applied: bool,
149    /// `frame_count` at the moment materials were applied; used to gate
150    /// `capture_ready` on N frames of render-graph propagation rather than
151    /// a legacy llvmpipe-era 60-frame wait.
152    materials_applied_frame: u32,
153    capture_ready: bool,
154    screenshot_requested: bool,
155    captured: bool,
156    exit_requested: bool,
157    #[allow(dead_code)]
158    exit_frame_count: u32,
159    rgba_data: Option<Vec<u8>>,
160    depth_data: Option<Vec<f64>>,
161    image_width: u32,
162    image_height: u32,
163}
164
165#[cfg(test)]
166static HEADLESS_SCENE_SETUP_COUNT: AtomicUsize = AtomicUsize::new(0);
167
168#[cfg(test)]
169fn reset_headless_scene_setup_count() {
170    HEADLESS_SCENE_SETUP_COUNT.store(0, Ordering::SeqCst);
171}
172
173#[cfg(test)]
174fn headless_scene_setup_count() -> usize {
175    HEADLESS_SCENE_SETUP_COUNT.load(Ordering::SeqCst)
176}
177
178/// Shared buffer for screenshot callback to write into
179#[derive(Resource, Clone)]
180#[allow(clippy::type_complexity)]
181#[allow(dead_code)]
182struct SharedImageBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
183
184/// Shared buffer for depth data from GPU readback
185/// Contains: (linear_depth_values, width, height)
186/// Uses f64 for TBP numerical precision compatibility.
187#[derive(Resource, Clone, Default)]
188#[allow(clippy::type_complexity)]
189struct SharedDepthBuffer(Arc<Mutex<Option<(Vec<f64>, u32, u32)>>>);
190
191// ============================================================================
192// Depth Readback Infrastructure
193// ============================================================================
194
195/// Request to capture depth - extracted from main world to render world
196#[derive(Resource, Default, Clone)]
197struct DepthCaptureRequest {
198    requested: bool,
199    near: f32,
200    far: f32,
201}
202
203/// Pending depth capture info for async processing
204struct PendingDepthCapture {
205    buffer: Buffer,
206    width: u32,
207    height: u32,
208    near: f32,
209    far: f32,
210}
211
212/// Queue for pending depth captures (written by render node, read by cleanup system)
213#[derive(Resource, Default)]
214struct PendingDepthCaptureQueue(Arc<Mutex<Vec<PendingDepthCapture>>>);
215
216// ============================================================================
217// Depth Buffer Helpers
218// ============================================================================
219
220mod depth_helpers {
221    /// wgpu requires buffer row alignment of 256 bytes
222    pub const COPY_BYTES_PER_ROW_ALIGNMENT: u32 = 256;
223
224    /// Align byte size to wgpu's COPY_BYTES_PER_ROW_ALIGNMENT
225    pub fn align_byte_size(value: u32) -> u32 {
226        let remainder = value % COPY_BYTES_PER_ROW_ALIGNMENT;
227        if remainder == 0 {
228            value
229        } else {
230            value + (COPY_BYTES_PER_ROW_ALIGNMENT - remainder)
231        }
232    }
233
234    /// Calculate aligned buffer size for an image
235    #[allow(dead_code)]
236    pub fn get_aligned_size(width: u32, height: u32, pixel_size: u32) -> u32 {
237        height * align_byte_size(width * pixel_size)
238    }
239
240    /// Convert reverse-Z NDC depth to linear depth in meters.
241    ///
242    /// Bevy uses reverse-Z depth buffer: near plane maps to depth=1, far plane to depth=0.
243    /// This provides better precision for distant objects.
244    ///
245    /// Formula derivation:
246    /// - At near plane (z = near): ndc = 1
247    /// - At far plane (z = far): ndc = 0
248    /// - linear = far / (1 + ndc * (far/near - 1))
249    pub fn reverse_z_to_linear_depth(ndc_depth: f32, near: f32, far: f32) -> f32 {
250        // Handle edge cases
251        if ndc_depth <= 0.0 {
252            return far; // Background (infinite distance in reverse-Z)
253        }
254        if ndc_depth >= 1.0 {
255            return near; // At or beyond near plane
256        }
257        // Reverse-Z formula: linear = far / (1 + ndc * (far/near - 1))
258        far / (1.0 + ndc_depth * (far / near - 1.0))
259    }
260
261    /// Extract depth values from aligned buffer, handling row padding
262    pub fn extract_depth_with_alignment(data: &[u8], width: u32, height: u32) -> Vec<f32> {
263        let pixel_size = 4u32; // f32 = 4 bytes
264        let aligned_row_bytes = align_byte_size(width * pixel_size) as usize;
265        let actual_row_bytes = (width * pixel_size) as usize;
266
267        let mut depth_values = Vec::with_capacity((width * height) as usize);
268
269        for y in 0..height as usize {
270            let row_start = y * aligned_row_bytes;
271            let row_data = &data[row_start..row_start + actual_row_bytes];
272
273            for x in 0..width as usize {
274                let offset = x * 4;
275                let bytes: [u8; 4] = row_data[offset..offset + 4].try_into().unwrap();
276                let depth_value = f32::from_le_bytes(bytes);
277                depth_values.push(depth_value);
278            }
279        }
280
281        depth_values
282    }
283
284    /// Convert all NDC depth values to linear meters (as f64 for TBP precision)
285    pub fn convert_depth_to_linear(raw_depth: &[f32], near: f32, far: f32) -> Vec<f64> {
286        raw_depth
287            .iter()
288            .map(|&ndc| reverse_z_to_linear_depth(ndc, near, far) as f64)
289            .collect()
290    }
291
292    #[cfg(test)]
293    mod tests {
294        use super::*;
295
296        #[test]
297        fn test_align_byte_size() {
298            assert_eq!(align_byte_size(256), 256);
299            assert_eq!(align_byte_size(257), 512);
300            assert_eq!(align_byte_size(1), 256);
301            assert_eq!(align_byte_size(512), 512);
302            assert_eq!(align_byte_size(0), 0);
303        }
304
305        #[test]
306        fn test_reverse_z_to_linear_depth() {
307            let near = 0.01;
308            let far = 10.0;
309
310            // Near plane (ndc=1 in reverse-Z)
311            let linear_near = reverse_z_to_linear_depth(1.0, near, far);
312            assert!((linear_near - near).abs() < 0.001);
313
314            // Mid-range depth (ndc=0.5 should give geometric mean area)
315            let linear_mid = reverse_z_to_linear_depth(0.5, near, far);
316            // At ndc=0.5: linear = 10 / (1 + 0.5 * (1000-1)) = 10 / 500.5 ≈ 0.02
317            assert!(linear_mid > near && linear_mid < far);
318
319            // Very close to far plane (ndc very small)
320            let linear_almost_far = reverse_z_to_linear_depth(0.0001, near, far);
321            // At ndc=0.0001: linear = 10 / (1 + 0.0001 * 999) ≈ 10 / 1.0999 ≈ 9.09
322            assert!(linear_almost_far > 9.0);
323
324            // Background (ndc=0)
325            let background = reverse_z_to_linear_depth(0.0, near, far);
326            assert_eq!(background, far);
327        }
328
329        #[test]
330        fn test_extract_depth_with_alignment() {
331            // 2x2 image, 4 bytes per pixel
332            // Aligned row = 256 bytes, but actual = 8 bytes
333            let width = 2u32;
334            let height = 2u32;
335
336            let mut data = vec![0u8; 256 * 2]; // 2 aligned rows
337
338            // Write test depth values
339            // Row 0: [0.5, 0.6]
340            data[0..4].copy_from_slice(&0.5f32.to_le_bytes());
341            data[4..8].copy_from_slice(&0.6f32.to_le_bytes());
342            // Row 1: [0.7, 0.8]
343            data[256..260].copy_from_slice(&0.7f32.to_le_bytes());
344            data[260..264].copy_from_slice(&0.8f32.to_le_bytes());
345
346            let depth = extract_depth_with_alignment(&data, width, height);
347            assert_eq!(depth.len(), 4);
348            assert!((depth[0] - 0.5).abs() < 0.001);
349            assert!((depth[1] - 0.6).abs() < 0.001);
350            assert!((depth[2] - 0.7).abs() < 0.001);
351            assert!((depth[3] - 0.8).abs() < 0.001);
352        }
353
354        #[test]
355        fn test_reverse_z_depth_at_near_plane() {
356            // Near plane should give near value
357            let near = 0.01;
358            let far = 100.0;
359            let depth = reverse_z_to_linear_depth(1.0, near, far);
360            assert!((depth - near).abs() < 0.0001);
361        }
362
363        #[test]
364        fn test_reverse_z_depth_at_far_plane() {
365            // Far plane (ndc=0) should give far value
366            let near = 0.01;
367            let far = 100.0;
368            let depth = reverse_z_to_linear_depth(0.0, near, far);
369            assert!((depth - far).abs() < 0.0001);
370        }
371
372        #[test]
373        fn test_reverse_z_monotonic() {
374            // Depth should increase as NDC decreases (reverse-Z)
375            let near = 0.01;
376            let far = 10.0;
377
378            let mut prev_depth = 0.0;
379            for i in (0..=100).rev() {
380                let ndc = i as f32 / 100.0;
381                let depth = reverse_z_to_linear_depth(ndc, near, far);
382                assert!(
383                    depth >= prev_depth,
384                    "Depth should be monotonic: ndc={}, depth={}, prev={}",
385                    ndc,
386                    depth,
387                    prev_depth
388                );
389                prev_depth = depth;
390            }
391        }
392
393        #[test]
394        fn test_convert_depth_to_linear_batch() {
395            let near = 0.01f32;
396            let far = 10.0f32;
397            let ndc_depths = vec![1.0f32, 0.5, 0.1, 0.0];
398
399            let linear = convert_depth_to_linear(&ndc_depths, near, far);
400
401            assert_eq!(linear.len(), 4);
402            // Near plane
403            assert!((linear[0] - near as f64).abs() < 0.001);
404            // Far plane
405            assert!((linear[3] - far as f64).abs() < 0.001);
406            // All should be in range [near, far]
407            for d in &linear {
408                assert!(*d >= near as f64 && *d <= far as f64);
409            }
410        }
411
412        #[test]
413        fn test_align_byte_size_edge_cases() {
414            // Powers of two should stay the same if multiple of 256
415            assert_eq!(align_byte_size(256), 256);
416            assert_eq!(align_byte_size(512), 512);
417            assert_eq!(align_byte_size(1024), 1024);
418
419            // Just under 256 should round up to 256
420            assert_eq!(align_byte_size(255), 256);
421            assert_eq!(align_byte_size(128), 256);
422
423            // Just over 256 should round up to 512
424            assert_eq!(align_byte_size(300), 512);
425        }
426
427        #[test]
428        fn test_extract_depth_64x64() {
429            // Test with TBP default resolution
430            let width = 64u32;
431            let height = 64u32;
432            let bytes_per_pixel = 4u32;
433            let padded_row = align_byte_size(width * bytes_per_pixel);
434
435            // Create aligned buffer
436            let mut data = vec![0u8; (padded_row * height) as usize];
437
438            // Fill with incrementing values
439            for y in 0..height {
440                for x in 0..width {
441                    let value = (y * width + x) as f32 / (width * height) as f32;
442                    let offset = (y * padded_row + x * bytes_per_pixel) as usize;
443                    data[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
444                }
445            }
446
447            let depth = extract_depth_with_alignment(&data, width, height);
448            assert_eq!(depth.len(), (width * height) as usize);
449
450            // Verify first and last values
451            assert!((depth[0] - 0.0).abs() < 0.001);
452            let expected_last = (width * height - 1) as f32 / (width * height) as f32;
453            assert!((depth[(width * height - 1) as usize] - expected_last).abs() < 0.001);
454        }
455    }
456}
457
458// ============================================================================
459// Depth Readback Render Node
460// ============================================================================
461
462/// Label for the depth readback render graph node.
463#[derive(Debug, Hash, PartialEq, Eq, Clone, bevy::render::render_graph::RenderLabel)]
464struct DepthReadbackLabel;
465
466/// Render node that copies the main camera's depth texture to a staging buffer.
467/// This runs after the main pass completes, using ViewDepthTexture.
468#[derive(Default)]
469struct DepthReadbackNode;
470
471impl ViewNode for DepthReadbackNode {
472    type ViewQuery = (&'static ViewDepthTexture, &'static ExtractedCamera);
473
474    fn run<'w>(
475        &self,
476        _graph: &mut RenderGraphContext,
477        render_context: &mut RenderContext<'w>,
478        (view_depth_texture, camera): QueryItem<'w, Self::ViewQuery>,
479        world: &'w World,
480    ) -> Result<(), NodeRunError> {
481        let trace = render_trace_enabled();
482        let t0 = trace.then(std::time::Instant::now);
483
484        // Check if depth capture is requested
485        let Some(request) = world.get_resource::<DepthCaptureRequest>() else {
486            return Ok(());
487        };
488        if !request.requested {
489            return Ok(());
490        }
491
492        // Get the pending queue
493        let Some(queue) = world.get_resource::<PendingDepthCaptureQueue>() else {
494            return Ok(());
495        };
496
497        // Get texture size from camera viewport or physical size
498        let Some(physical_size) = camera.physical_target_size else {
499            return Ok(());
500        };
501        let width = physical_size.x;
502        let height = physical_size.y;
503
504        let render_device = world.resource::<RenderDevice>();
505
506        // Calculate aligned buffer size (wgpu requires 256-byte row alignment)
507        let bytes_per_pixel = 4u32; // f32 = 4 bytes (Depth32Float)
508        let unpadded_bytes_per_row = width * bytes_per_pixel;
509        let padded_bytes_per_row = depth_helpers::align_byte_size(unpadded_bytes_per_row);
510        let buffer_size = (padded_bytes_per_row * height) as u64;
511
512        // Create staging buffer for CPU readback
513        let staging_buffer = render_device.create_buffer(&BufferDescriptor {
514            label: Some("depth_staging_buffer"),
515            size: buffer_size,
516            usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
517            mapped_at_creation: false,
518        });
519
520        // Copy depth texture to staging buffer
521        let encoder = render_context.command_encoder();
522        encoder.copy_texture_to_buffer(
523            ImageCopyTexture {
524                texture: &view_depth_texture.texture,
525                mip_level: 0,
526                origin: Origin3d::ZERO,
527                aspect: TextureAspect::DepthOnly,
528            },
529            ImageCopyBuffer {
530                buffer: &staging_buffer,
531                layout: ImageDataLayout {
532                    offset: 0,
533                    bytes_per_row: Some(padded_bytes_per_row),
534                    rows_per_image: Some(height),
535                },
536            },
537            Extent3d {
538                width,
539                height,
540                depth_or_array_layers: 1,
541            },
542        );
543
544        // Push to queue for async processing (queue is Arc<Mutex<Vec>>)
545        if let Ok(mut pending) = queue.0.lock() {
546            pending.push(PendingDepthCapture {
547                buffer: staging_buffer,
548                width,
549                height,
550                near: request.near,
551                far: request.far,
552            });
553        }
554
555        if let Some(t0) = t0 {
556            eprintln!(
557                "[render_trace][node] DepthReadbackNode ms={:.3}",
558                t0.elapsed().as_secs_f64() * 1000.0
559            );
560        }
561
562        Ok(())
563    }
564}
565
566// ============================================================================
567// Depth Readback Plugin
568// ============================================================================
569
570/// Plugin that sets up depth buffer readback from the GPU.
571struct DepthReadbackPlugin {
572    shared_depth: SharedDepthBuffer,
573    near: f32,
574    far: f32,
575}
576
577impl Plugin for DepthReadbackPlugin {
578    fn build(&self, app: &mut App) {
579        use bevy::core_pipeline::core_3d::graph::Core3d;
580        use bevy::core_pipeline::core_3d::graph::Node3d;
581
582        // Insert shared depth buffer in main app
583        app.insert_resource(self.shared_depth.clone());
584        app.insert_resource(DepthCaptureRequest {
585            requested: false,
586            near: self.near,
587            far: self.far,
588        });
589
590        // Get render app
591        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
592            eprintln!("Failed to get RenderApp for depth readback");
593            return;
594        };
595
596        // Insert resources in render world
597        render_app.insert_resource(self.shared_depth.clone());
598        render_app.init_resource::<PendingDepthCaptureQueue>();
599
600        // Add extraction system to copy request from main world
601        render_app.add_systems(ExtractSchedule, extract_depth_request);
602
603        // Add system to process completed depth captures
604        render_app.add_systems(Render, collect_depth_captures.in_set(RenderSet::Cleanup));
605
606        // Register the depth readback node in the render graph
607        // Run after main pass completes (depth buffer is ready) but before tonemapping
608        render_app
609            .add_render_graph_node::<ViewNodeRunner<DepthReadbackNode>>(Core3d, DepthReadbackLabel)
610            .add_render_graph_edges(
611                Core3d,
612                (Node3d::EndMainPass, DepthReadbackLabel, Node3d::Tonemapping),
613            );
614    }
615}
616
617/// Extract depth capture request from main world to render world
618fn extract_depth_request(mut commands: Commands, request: Extract<Res<DepthCaptureRequest>>) {
619    commands.insert_resource(DepthCaptureRequest {
620        requested: request.requested,
621        near: request.near,
622        far: request.far,
623    });
624}
625
626/// Process completed depth buffer captures (synchronous GPU-to-CPU readback with device polling)
627fn collect_depth_captures(
628    queue: Res<PendingDepthCaptureQueue>,
629    shared_depth: Res<SharedDepthBuffer>,
630    render_device: Res<RenderDevice>,
631) {
632    let trace = render_trace_enabled();
633    let t_sys = trace.then(std::time::Instant::now);
634
635    // Take all pending captures from the queue
636    let pending_captures = {
637        let Ok(mut pending) = queue.0.lock() else {
638            return;
639        };
640        std::mem::take(&mut *pending)
641    };
642
643    if pending_captures.is_empty() {
644        if let Some(t0) = t_sys {
645            eprintln!(
646                "[render_trace][sys] collect_depth_captures empty ms={:.3}",
647                t0.elapsed().as_secs_f64() * 1000.0
648            );
649        }
650        return;
651    }
652
653    let pending_count = pending_captures.len();
654
655    // Process each pending capture synchronously with device polling
656    for pending in pending_captures {
657        let width = pending.width;
658        let height = pending.height;
659        let near = pending.near;
660        let far = pending.far;
661        let buffer = pending.buffer;
662        let shared = shared_depth.0.clone();
663
664        // Use blocking sync approach with device polling (same as RGBA capture)
665        let buffer_slice = buffer.slice(..);
666
667        // Request mapping
668        let (tx, rx) = std::sync::mpsc::channel();
669        buffer_slice.map_async(MapMode::Read, move |result| {
670            let _ = tx.send(result);
671        });
672
673        let t_wait = trace.then(std::time::Instant::now);
674        let mut poll_iters: u32 = 0;
675
676        // Poll the device until mapping completes
677        loop {
678            render_device.poll(bevy::render::render_resource::Maintain::Poll);
679            poll_iters += 1;
680            match rx.try_recv() {
681                Ok(Ok(())) => {
682                    let data = buffer_slice.get_mapped_range();
683
684                    // Extract depth values with alignment handling
685                    let ndc_depth =
686                        depth_helpers::extract_depth_with_alignment(&data, width, height);
687
688                    drop(data);
689                    buffer.unmap();
690
691                    // Convert from reverse-Z NDC to linear depth in meters
692                    let linear_depth =
693                        depth_helpers::convert_depth_to_linear(&ndc_depth, near, far);
694
695                    // Store in shared buffer
696                    if let Ok(mut guard) = shared.lock() {
697                        *guard = Some((linear_depth, width, height));
698                    }
699                    break;
700                }
701                Ok(Err(e)) => {
702                    eprintln!("Failed to map depth buffer: {:?}", e);
703                    break;
704                }
705                Err(std::sync::mpsc::TryRecvError::Empty) => {
706                    // Keep polling
707                    std::thread::sleep(std::time::Duration::from_millis(1));
708                }
709                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
710                    eprintln!("Depth buffer mapping channel disconnected");
711                    break;
712                }
713            }
714        }
715
716        if let Some(t_wait) = t_wait {
717            eprintln!(
718                "[render_trace][sys] collect_depth_captures mapping_wait poll_iters={} ms={:.3}",
719                poll_iters,
720                t_wait.elapsed().as_secs_f64() * 1000.0
721            );
722        }
723    }
724
725    if let Some(t0) = t_sys {
726        eprintln!(
727            "[render_trace][sys] collect_depth_captures done pending={} ms={:.3}",
728            pending_count,
729            t0.elapsed().as_secs_f64() * 1000.0
730        );
731    }
732}
733
734// ============================================================================
735// Image Copy Infrastructure (for headless rendering)
736// ============================================================================
737
738/// Label for the image copy render graph node
739#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)]
740struct ImageCopyLabel;
741
742/// Component that marks an image for GPU-to-CPU copying
743#[derive(Component, Clone)]
744struct ImageCopier {
745    /// Handle to the source image (render target)
746    src_image: Handle<Image>,
747    /// Whether to capture on this frame
748    enabled: bool,
749}
750
751/// Resource containing all ImageCopiers for the render world
752#[derive(Resource, Default)]
753struct ImageCopiers(Vec<ImageCopier>);
754
755/// Pending image capture for async processing
756struct PendingImageCapture {
757    buffer: Buffer,
758    width: u32,
759    height: u32,
760    padded_bytes_per_row: u32,
761}
762
763/// Queue for pending image captures
764#[derive(Resource, Default)]
765struct PendingImageCaptureQueue(Arc<Mutex<Vec<PendingImageCapture>>>);
766
767/// Shared buffer for captured RGBA data
768#[derive(Resource, Clone, Default)]
769#[allow(clippy::type_complexity)]
770struct SharedRgbaBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
771
772/// Render graph node that copies render target images to staging buffers
773struct ImageCopyDriver;
774
775impl Node for ImageCopyDriver {
776    fn run(
777        &self,
778        _graph: &mut RenderGraphContext,
779        _render_context: &mut RenderContext,
780        world: &World,
781    ) -> Result<(), NodeRunError> {
782        let trace = render_trace_enabled();
783        let t0 = trace.then(std::time::Instant::now);
784
785        let Some(image_copiers) = world.get_resource::<ImageCopiers>() else {
786            return Ok(());
787        };
788
789        let Some(gpu_images) = world.get_resource::<RenderAssets<GpuImage>>() else {
790            return Ok(());
791        };
792
793        let Some(queue) = world.get_resource::<PendingImageCaptureQueue>() else {
794            return Ok(());
795        };
796
797        let render_device = world.resource::<RenderDevice>();
798
799        let Some(render_queue) = world.get_resource::<RenderQueue>() else {
800            return Ok(());
801        };
802
803        for image_copier in image_copiers.0.iter() {
804            if !image_copier.enabled {
805                continue;
806            }
807
808            let Some(gpu_image) = gpu_images.get(&image_copier.src_image) else {
809                continue;
810            };
811
812            let width = gpu_image.size.x;
813            let height = gpu_image.size.y;
814
815            // Calculate padded bytes per row (wgpu requires 256-byte alignment)
816            let block_dimensions = gpu_image.texture_format.block_dimensions();
817            let block_size = gpu_image.texture_format.block_copy_size(None).unwrap_or(4); // Default to 4 bytes for RGBA8
818
819            let padded_bytes_per_row = RenderDevice::align_copy_bytes_per_row(
820                (width as usize / block_dimensions.0 as usize) * block_size as usize,
821            );
822
823            let buffer_size = (padded_bytes_per_row * height as usize) as u64;
824
825            // Create staging buffer for CPU readback
826            let staging_buffer = render_device.create_buffer(&BufferDescriptor {
827                label: Some("image_copy_staging_buffer"),
828                size: buffer_size,
829                usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
830                mapped_at_creation: false,
831            });
832
833            // Create command encoder for the copy operation
834            let mut encoder =
835                render_device.create_command_encoder(&CommandEncoderDescriptor::default());
836
837            let texture_extent = Extent3d {
838                width,
839                height,
840                depth_or_array_layers: 1,
841            };
842
843            // Copy texture to buffer
844            encoder.copy_texture_to_buffer(
845                gpu_image.texture.as_image_copy(),
846                ImageCopyBuffer {
847                    buffer: &staging_buffer,
848                    layout: ImageDataLayout {
849                        offset: 0,
850                        bytes_per_row: Some(padded_bytes_per_row as u32),
851                        rows_per_image: None,
852                    },
853                },
854                texture_extent,
855            );
856
857            // Submit the copy command
858            render_queue.submit(std::iter::once(encoder.finish()));
859
860            // Queue for async processing
861            if let Ok(mut pending) = queue.0.lock() {
862                pending.push(PendingImageCapture {
863                    buffer: staging_buffer,
864                    width,
865                    height,
866                    padded_bytes_per_row: padded_bytes_per_row as u32,
867                });
868            }
869        }
870
871        if let Some(t0) = t0 {
872            eprintln!(
873                "[render_trace][node] ImageCopyDriver ms={:.3}",
874                t0.elapsed().as_secs_f64() * 1000.0
875            );
876        }
877
878        Ok(())
879    }
880}
881
882/// Extract ImageCopier components to render world
883fn extract_image_copiers(mut commands: Commands, query: Extract<Query<&ImageCopier>>) {
884    commands.insert_resource(ImageCopiers(query.iter().cloned().collect()));
885}
886
887/// Process completed image captures
888fn collect_image_captures(
889    queue: Res<PendingImageCaptureQueue>,
890    shared_rgba: Res<SharedRgbaBuffer>,
891    render_device: Res<RenderDevice>,
892) {
893    let trace = render_trace_enabled();
894    let t_sys = trace.then(std::time::Instant::now);
895
896    let pending_captures = {
897        let Ok(mut pending) = queue.0.lock() else {
898            return;
899        };
900        std::mem::take(&mut *pending)
901    };
902
903    if pending_captures.is_empty() {
904        if let Some(t0) = t_sys {
905            eprintln!(
906                "[render_trace][sys] collect_image_captures empty ms={:.3}",
907                t0.elapsed().as_secs_f64() * 1000.0
908            );
909        }
910        return;
911    }
912
913    let pending_count = pending_captures.len();
914
915    for pending in pending_captures {
916        let width = pending.width;
917        let height = pending.height;
918        let padded_bytes_per_row = pending.padded_bytes_per_row;
919        let buffer = pending.buffer;
920        let shared = shared_rgba.0.clone();
921
922        // Use blocking sync approach with device polling
923        let buffer_slice = buffer.slice(..);
924
925        // Request mapping
926        let (tx, rx) = std::sync::mpsc::channel();
927        buffer_slice.map_async(MapMode::Read, move |result| {
928            let _ = tx.send(result);
929        });
930
931        // Poll the device until mapping completes (with timeout)
932        let start = std::time::Instant::now();
933        let timeout = std::time::Duration::from_secs(10);
934        let mut poll_iters: u32 = 0;
935        loop {
936            render_device.poll(bevy::render::render_resource::Maintain::Poll);
937            poll_iters += 1;
938
939            if start.elapsed() > timeout {
940                eprintln!(
941                    "Warning: Buffer mapping timeout after {:?}",
942                    start.elapsed()
943                );
944                break;
945            }
946
947            match rx.try_recv() {
948                Ok(Ok(())) => {
949                    let data = buffer_slice.get_mapped_range();
950
951                    // Extract pixels with alignment handling
952                    let bytes_per_pixel = 4u32;
953                    let actual_row_bytes = (width * bytes_per_pixel) as usize;
954                    let padded_row_bytes = padded_bytes_per_row as usize;
955
956                    let mut rgba = Vec::with_capacity((width * height * 4) as usize);
957                    for y in 0..height as usize {
958                        let row_start = y * padded_row_bytes;
959                        rgba.extend_from_slice(&data[row_start..row_start + actual_row_bytes]);
960                    }
961
962                    drop(data);
963                    buffer.unmap();
964
965                    if let Ok(mut guard) = shared.lock() {
966                        *guard = Some((rgba, width, height));
967                    }
968                    break;
969                }
970                Ok(Err(e)) => {
971                    eprintln!("Failed to map image buffer: {:?}", e);
972                    break;
973                }
974                Err(std::sync::mpsc::TryRecvError::Empty) => {
975                    // Keep polling
976                    std::thread::sleep(std::time::Duration::from_millis(1));
977                }
978                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
979                    eprintln!("Image buffer mapping channel disconnected");
980                    break;
981                }
982            }
983        }
984
985        if trace {
986            eprintln!(
987                "[render_trace][sys] collect_image_captures mapping_wait poll_iters={} ms={:.3}",
988                poll_iters,
989                start.elapsed().as_secs_f64() * 1000.0
990            );
991        }
992    }
993
994    if let Some(t0) = t_sys {
995        eprintln!(
996            "[render_trace][sys] collect_image_captures done pending={} ms={:.3}",
997            pending_count,
998            t0.elapsed().as_secs_f64() * 1000.0
999        );
1000    }
1001}
1002
1003/// Plugin for headless image copy
1004struct ImageCopyPlugin {
1005    shared_rgba: SharedRgbaBuffer,
1006}
1007
1008impl Plugin for ImageCopyPlugin {
1009    fn build(&self, app: &mut App) {
1010        use bevy::render::render_graph::RenderGraph;
1011
1012        app.insert_resource(self.shared_rgba.clone());
1013
1014        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
1015            return;
1016        };
1017
1018        render_app.insert_resource(self.shared_rgba.clone());
1019        render_app.init_resource::<ImageCopiers>();
1020        render_app.init_resource::<PendingImageCaptureQueue>();
1021
1022        render_app.add_systems(ExtractSchedule, extract_image_copiers);
1023        render_app.add_systems(Render, collect_image_captures.in_set(RenderSet::Cleanup));
1024
1025        // Add image copy node to render graph (runs after camera driver)
1026        let mut graph = render_app.world_mut().resource_mut::<RenderGraph>();
1027        graph.add_node(ImageCopyLabel, ImageCopyDriver);
1028        graph.add_node_edge(bevy::render::graph::CameraDriverLabel, ImageCopyLabel);
1029    }
1030}
1031
1032// ============================================================================
1033// Render Request and Components
1034// ============================================================================
1035
1036/// Configuration passed to the Bevy app
1037#[derive(Resource, Clone)]
1038struct RenderRequest {
1039    mesh_path: String,
1040    texture_path: String,
1041    camera_transform: Transform,
1042    object_rotation: ObjectRotation,
1043    config: RenderConfig,
1044}
1045
1046/// Marker for the rendered object
1047#[derive(Component)]
1048struct RenderedObject;
1049
1050/// Marker for the render camera
1051#[derive(Component)]
1052struct RenderCamera;
1053
1054/// Handle for the loaded texture
1055#[derive(Resource)]
1056struct LoadedTexture(Handle<Image>);
1057
1058/// Handle for the loaded scene
1059#[derive(Resource)]
1060struct LoadedScene(Handle<Scene>);
1061
1062/// Shared output for extracting render results
1063#[derive(Resource, Clone)]
1064struct SharedOutput(Arc<Mutex<Option<RenderOutput>>>);
1065
1066/// Handle for the render target image
1067#[derive(Resource)]
1068#[allow(dead_code)]
1069struct RenderTargetImage(Handle<Image>);
1070
1071/// Tracks progress for a homogeneous batch of viewpoints rendered in one app.
1072#[derive(Resource)]
1073struct HeadlessBatchSequence {
1074    viewpoints: Vec<Transform>,
1075    current_index: usize,
1076    outputs: Vec<RenderOutput>,
1077    warmup_frames_remaining: u32,
1078    done: bool,
1079}
1080
1081impl HeadlessBatchSequence {
1082    fn new(viewpoints: Vec<Transform>) -> Self {
1083        let capacity = viewpoints.len();
1084        Self {
1085            viewpoints,
1086            current_index: 0,
1087            outputs: Vec::with_capacity(capacity),
1088            warmup_frames_remaining: 0,
1089            done: capacity == 0,
1090        }
1091    }
1092
1093    fn current_viewpoint(&self) -> Option<Transform> {
1094        self.viewpoints.get(self.current_index).cloned()
1095    }
1096}
1097
1098/// Perform headless rendering of a YCB object.
1099///
1100/// This uses true headless GPU rendering via `RenderTarget::Image`, which does NOT
1101/// require any window surfaces. This should work on WSL2 and other environments
1102/// without display servers.
1103///
1104/// Note: Bevy's App::run() does not return cleanly. A watchdog thread monitors
1105/// for results and terminates the process once the render is complete.
1106#[allow(dead_code)]
1107pub fn render_headless(
1108    object_dir: &Path,
1109    camera_transform: &Transform,
1110    object_rotation: &ObjectRotation,
1111    config: &RenderConfig,
1112) -> Result<RenderOutput, RenderError> {
1113    // Canonicalize paths so Bevy's asset server can find them regardless of
1114    // caller working directory. Relative paths like "../../ycb" pass the
1115    // exists() check but Bevy resolves assets against its own root.
1116    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1117        RenderError::RenderFailed(format!(
1118            "Cannot canonicalize object directory {}: {}",
1119            object_dir.display(),
1120            e
1121        ))
1122    })?;
1123    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1124    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1125
1126    if !mesh_path.exists() {
1127        return Err(RenderError::MeshNotFound(mesh_path.display().to_string()));
1128    }
1129    if !texture_path.exists() {
1130        return Err(RenderError::TextureNotFound(
1131            texture_path.display().to_string(),
1132        ));
1133    }
1134
1135    let request = RenderRequest {
1136        mesh_path: mesh_path.display().to_string(),
1137        texture_path: texture_path.display().to_string(),
1138        camera_transform: *camera_transform,
1139        object_rotation: object_rotation.clone(),
1140        config: config.clone(),
1141    };
1142
1143    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
1144    let output_clone = shared_output.clone();
1145
1146    // Shared buffer for RGBA data from headless render target
1147    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1148
1149    // Shared buffer for depth readback
1150    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1151
1152    // Create a temp file path for fallback output serialization
1153    let temp_path =
1154        std::env::temp_dir().join(format!("bevy_sensor_render_{}.bin", std::process::id()));
1155
1156    // Spawn watchdog thread that monitors for timeout (don't exit - let Bevy exit gracefully)
1157    let output_poll_for_timeout = shared_output.clone();
1158    std::thread::spawn(move || {
1159        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1160        let start = std::time::Instant::now();
1161        let poll_interval = std::time::Duration::from_millis(100);
1162
1163        loop {
1164            // Check if we have a result
1165            if let Ok(guard) = output_poll_for_timeout.0.lock() {
1166                if guard.is_some() {
1167                    // Output is ready, Bevy will exit via AppExit event
1168                    return; // Exit watchdog thread, Bevy will handle exit
1169                }
1170            }
1171
1172            if start.elapsed() > timeout {
1173                eprintln!(
1174                    "Error: Render timeout after {} seconds",
1175                    RENDER_TIMEOUT_SECS
1176                );
1177                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
1178                // Force exit on timeout (this is a failure case)
1179                std::process::exit(1);
1180            }
1181
1182            std::thread::sleep(poll_interval);
1183        }
1184    });
1185
1186    // Run Bevy app with HEADLESS configuration (no window surfaces!)
1187    // Uses ScheduleRunnerPlugin instead of WinitPlugin
1188    build_headless_app(request, output_clone, shared_rgba, shared_depth).run();
1189
1190    // App::run() returned - check shared_output for result
1191    if let Ok(guard) = shared_output.0.lock() {
1192        if let Some(output) = guard.as_ref() {
1193            return Ok(output.clone());
1194        }
1195    }
1196
1197    // Fallback: try to read from temp file (for legacy compatibility)
1198    if temp_path.exists() {
1199        if let Ok(output) = read_output_from_file(&temp_path) {
1200            let _ = std::fs::remove_file(&temp_path);
1201            return Ok(output);
1202        }
1203    }
1204
1205    Err(RenderError::RenderFailed(
1206        "Render did not complete".to_string(),
1207    ))
1208}
1209
1210/// Render a homogeneous sequence of viewpoints in a single headless Bevy app.
1211///
1212/// All captures share the same object, object rotation, and render configuration.
1213/// This is the fast path used by the batch API for episode-style workloads.
1214pub fn render_headless_sequence(
1215    object_dir: &Path,
1216    viewpoints: &[Transform],
1217    object_rotation: &ObjectRotation,
1218    config: &RenderConfig,
1219) -> Result<Vec<RenderOutput>, RenderError> {
1220    if viewpoints.is_empty() {
1221        return Ok(Vec::new());
1222    }
1223
1224    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1225        RenderError::RenderFailed(format!(
1226            "Cannot canonicalize object directory {}: {}",
1227            object_dir.display(),
1228            e
1229        ))
1230    })?;
1231    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1232    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1233
1234    if !mesh_path.exists() {
1235        return Err(RenderError::MeshNotFound(mesh_path.display().to_string()));
1236    }
1237    if !texture_path.exists() {
1238        return Err(RenderError::TextureNotFound(
1239            texture_path.display().to_string(),
1240        ));
1241    }
1242
1243    let request = RenderRequest {
1244        mesh_path: mesh_path.display().to_string(),
1245        texture_path: texture_path.display().to_string(),
1246        camera_transform: viewpoints[0],
1247        object_rotation: object_rotation.clone(),
1248        config: config.clone(),
1249    };
1250
1251    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1252    let rgba_clone = shared_rgba.clone();
1253
1254    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1255    let depth_clone = shared_depth.clone();
1256
1257    let mut app = App::new();
1258    app.add_plugins(
1259        DefaultPlugins
1260            .set(WindowPlugin {
1261                primary_window: None,
1262                exit_condition: ExitCondition::DontExit,
1263                ..default()
1264            })
1265            .disable::<bevy::winit::WinitPlugin>()
1266            .disable::<LogPlugin>()
1267            .disable::<TerminalCtrlCHandlerPlugin>(),
1268    )
1269    .add_plugins(ObjPlugin)
1270    .add_plugins(ImageCopyPlugin {
1271        shared_rgba: rgba_clone,
1272    })
1273    .add_plugins(DepthReadbackPlugin {
1274        shared_depth: depth_clone,
1275        near: config.near_plane,
1276        far: config.far_plane,
1277    })
1278    .insert_resource(request)
1279    .insert_resource(shared_rgba)
1280    .insert_resource(HeadlessBatchSequence::new(viewpoints.to_vec()))
1281    .init_resource::<RenderState>()
1282    .add_systems(Startup, setup_headless_scene)
1283    .add_systems(
1284        Update,
1285        (
1286            check_assets_loaded,
1287            apply_materials,
1288            tick_headless_batch_warmup,
1289            request_headless_capture,
1290            check_headless_capture_ready,
1291            extract_and_continue_headless_batch,
1292        )
1293            .chain(),
1294    );
1295
1296    // Manual app.update() loops do not run plugin finish/cleanup hooks automatically.
1297    // Bevy's screenshot plugin inserts CapturedScreenshots during finish(), so run the
1298    // normal startup phases before driving the headless batch loop ourselves.
1299    let trace_outer = render_trace_enabled();
1300    let t_finish = std::time::Instant::now();
1301    app.finish();
1302    let finish_ms = t_finish.elapsed().as_secs_f64() * 1000.0;
1303    let t_cleanup = std::time::Instant::now();
1304    app.cleanup();
1305    let cleanup_ms = t_cleanup.elapsed().as_secs_f64() * 1000.0;
1306    if trace_outer {
1307        eprintln!(
1308            "[render_trace][coldinit] app.finish ms={:.3} app.cleanup ms={:.3}",
1309            finish_ms, cleanup_ms
1310        );
1311    }
1312
1313    let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1314    let start = std::time::Instant::now();
1315
1316    let trace = std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok();
1317    let mut update_idx: u32 = 0;
1318    let mut last_completed_outputs: usize = 0;
1319    let mut viewpoint_start = std::time::Instant::now();
1320
1321    loop {
1322        if start.elapsed() > timeout {
1323            return Err(RenderError::RenderTimeout {
1324                duration_secs: RENDER_TIMEOUT_SECS,
1325            });
1326        }
1327
1328        let update_start = std::time::Instant::now();
1329        app.update();
1330        let update_elapsed_ms = update_start.elapsed().as_secs_f64() * 1000.0;
1331
1332        if trace {
1333            let batch = app.world().resource::<HeadlessBatchSequence>();
1334            let warmup = batch.warmup_frames_remaining;
1335            let current = batch.current_index;
1336            let completed = batch.outputs.len();
1337            let vp_ms = viewpoint_start.elapsed().as_secs_f64() * 1000.0;
1338            eprintln!(
1339                "[render_trace] update={update_idx} vp={current} warmup={warmup} \
1340                 completed={completed} update_ms={update_elapsed_ms:.2} vp_ms={vp_ms:.2}"
1341            );
1342            if completed > last_completed_outputs {
1343                eprintln!(
1344                    "[render_trace] viewpoint {} finished in {:.2} ms",
1345                    completed - 1,
1346                    vp_ms
1347                );
1348                last_completed_outputs = completed;
1349                viewpoint_start = std::time::Instant::now();
1350            }
1351        }
1352
1353        update_idx += 1;
1354
1355        if app.world().resource::<HeadlessBatchSequence>().done {
1356            break;
1357        }
1358    }
1359
1360    if trace {
1361        eprintln!(
1362            "[render_trace] total_wall_ms={:.2} updates={update_idx} viewpoints={}",
1363            start.elapsed().as_secs_f64() * 1000.0,
1364            viewpoints.len()
1365        );
1366    }
1367
1368    let mut batch = app.world_mut().resource_mut::<HeadlessBatchSequence>();
1369    if batch.outputs.len() != viewpoints.len() {
1370        return Err(RenderError::RenderFailed(format!(
1371            "Batch render produced {} outputs for {} viewpoints",
1372            batch.outputs.len(),
1373            viewpoints.len()
1374        )));
1375    }
1376
1377    Ok(std::mem::take(&mut batch.outputs))
1378}
1379
1380/// Assemble the shared single-render headless Bevy app.
1381fn build_headless_app(
1382    request: RenderRequest,
1383    shared_output: SharedOutput,
1384    shared_rgba: SharedRgbaBuffer,
1385    shared_depth: SharedDepthBuffer,
1386) -> App {
1387    let near = request.config.near_plane;
1388    let far = request.config.far_plane;
1389
1390    let mut app = App::new();
1391    app.add_plugins(
1392        DefaultPlugins
1393            .set(WindowPlugin {
1394                primary_window: None,
1395                exit_condition: ExitCondition::DontExit,
1396                ..default()
1397            })
1398            .disable::<bevy::winit::WinitPlugin>()
1399            .disable::<LogPlugin>()
1400            .disable::<TerminalCtrlCHandlerPlugin>(),
1401    )
1402    .add_plugins(ScheduleRunnerPlugin::run_loop(Duration::from_secs_f64(
1403        1.0 / 60.0,
1404    )))
1405    .add_plugins(ObjPlugin)
1406    .add_plugins(ImageCopyPlugin {
1407        shared_rgba: shared_rgba.clone(),
1408    })
1409    .add_plugins(DepthReadbackPlugin {
1410        shared_depth,
1411        near,
1412        far,
1413    })
1414    .insert_resource(request)
1415    .insert_resource(shared_output)
1416    .insert_resource(shared_rgba)
1417    .init_resource::<RenderState>()
1418    .add_systems(Startup, setup_headless_scene)
1419    .add_systems(
1420        Update,
1421        (
1422            check_assets_loaded,
1423            apply_materials,
1424            request_headless_capture,
1425            check_headless_capture_ready,
1426            extract_and_exit_headless,
1427        )
1428            .chain(),
1429    );
1430    app
1431}
1432
1433/// Serialize RenderOutput to bytes for IPC (used by subprocess mode)
1434#[allow(dead_code)]
1435fn serialize_output(output: &RenderOutput) -> Vec<u8> {
1436    let mut data = Vec::new();
1437
1438    // Header: width, height, rgba_len, depth_len
1439    data.extend_from_slice(&output.width.to_le_bytes());
1440    data.extend_from_slice(&output.height.to_le_bytes());
1441    data.extend_from_slice(&(output.rgba.len() as u32).to_le_bytes());
1442    data.extend_from_slice(&(output.depth.len() as u32).to_le_bytes());
1443
1444    // RGBA data
1445    data.extend_from_slice(&output.rgba);
1446
1447    // Depth data (as f64 bytes for TBP precision)
1448    for d in &output.depth {
1449        data.extend_from_slice(&d.to_le_bytes());
1450    }
1451
1452    // Intrinsics (f64 for TBP precision)
1453    data.extend_from_slice(&output.intrinsics.focal_length[0].to_le_bytes());
1454    data.extend_from_slice(&output.intrinsics.focal_length[1].to_le_bytes());
1455    data.extend_from_slice(&output.intrinsics.principal_point[0].to_le_bytes());
1456    data.extend_from_slice(&output.intrinsics.principal_point[1].to_le_bytes());
1457    data.extend_from_slice(&output.intrinsics.image_size[0].to_le_bytes());
1458    data.extend_from_slice(&output.intrinsics.image_size[1].to_le_bytes());
1459
1460    // Camera transform (translation + rotation quaternion)
1461    let t = output.camera_transform.translation;
1462    let r = output.camera_transform.rotation;
1463    data.extend_from_slice(&t.x.to_le_bytes());
1464    data.extend_from_slice(&t.y.to_le_bytes());
1465    data.extend_from_slice(&t.z.to_le_bytes());
1466    data.extend_from_slice(&r.x.to_le_bytes());
1467    data.extend_from_slice(&r.y.to_le_bytes());
1468    data.extend_from_slice(&r.z.to_le_bytes());
1469    data.extend_from_slice(&r.w.to_le_bytes());
1470
1471    // Object rotation (f64)
1472    let or = &output.object_rotation;
1473    data.extend_from_slice(&or.pitch.to_le_bytes());
1474    data.extend_from_slice(&or.yaw.to_le_bytes());
1475    data.extend_from_slice(&or.roll.to_le_bytes());
1476
1477    data
1478}
1479
1480/// Read RenderOutput from serialized file
1481fn read_output_from_file(path: &std::path::Path) -> Result<RenderOutput, RenderError> {
1482    let mut file = File::open(path).map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1483    let mut data = Vec::new();
1484    file.read_to_end(&mut data)
1485        .map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1486
1487    let mut cursor = 0;
1488
1489    let read_u32 = |data: &[u8], cursor: &mut usize| -> u32 {
1490        let val = u32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1491        *cursor += 4;
1492        val
1493    };
1494
1495    let read_f32 = |data: &[u8], cursor: &mut usize| -> f32 {
1496        let val = f32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1497        *cursor += 4;
1498        val
1499    };
1500
1501    let read_f64 = |data: &[u8], cursor: &mut usize| -> f64 {
1502        let val = f64::from_le_bytes(data[*cursor..*cursor + 8].try_into().unwrap());
1503        *cursor += 8;
1504        val
1505    };
1506
1507    let width = read_u32(&data, &mut cursor);
1508    let height = read_u32(&data, &mut cursor);
1509    let rgba_len = read_u32(&data, &mut cursor) as usize;
1510    let depth_len = read_u32(&data, &mut cursor) as usize;
1511
1512    let rgba = data[cursor..cursor + rgba_len].to_vec();
1513    cursor += rgba_len;
1514
1515    // Depth data (f64 for TBP precision)
1516    let mut depth = Vec::with_capacity(depth_len);
1517    for _ in 0..depth_len {
1518        depth.push(read_f64(&data, &mut cursor));
1519    }
1520
1521    // Intrinsics (f64 for TBP precision)
1522    let focal_length = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1523    let principal_point = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1524    let image_size = [read_u32(&data, &mut cursor), read_u32(&data, &mut cursor)];
1525
1526    // Camera transform (f32 for Bevy compatibility)
1527    let tx = read_f32(&data, &mut cursor);
1528    let ty = read_f32(&data, &mut cursor);
1529    let tz = read_f32(&data, &mut cursor);
1530    let rx = read_f32(&data, &mut cursor);
1531    let ry = read_f32(&data, &mut cursor);
1532    let rz = read_f32(&data, &mut cursor);
1533    let rw = read_f32(&data, &mut cursor);
1534
1535    // Object rotation (f64)
1536    let pitch = read_f64(&data, &mut cursor);
1537    let yaw = read_f64(&data, &mut cursor);
1538    let roll = read_f64(&data, &mut cursor);
1539
1540    Ok(RenderOutput {
1541        rgba,
1542        depth,
1543        width,
1544        height,
1545        intrinsics: crate::CameraIntrinsics {
1546            focal_length,
1547            principal_point,
1548            image_size,
1549        },
1550        camera_transform: Transform {
1551            translation: Vec3::new(tx, ty, tz),
1552            rotation: Quat::from_xyzw(rx, ry, rz, rw),
1553            scale: Vec3::ONE,
1554        },
1555        object_rotation: ObjectRotation { pitch, yaw, roll },
1556    })
1557}
1558
1559/// Setup the scene with camera, lighting, and object
1560#[allow(dead_code)]
1561fn setup_scene(
1562    mut commands: Commands,
1563    asset_server: Res<AssetServer>,
1564    request: Res<RenderRequest>,
1565    mut _materials: ResMut<Assets<StandardMaterial>>,
1566) {
1567    // Camera with depth prepass (Bevy 0.15+ uses Camera3d component)
1568    // Disable MSAA for depth readback compatibility (can't copy from multisampled texture)
1569    // Apply FOV from RenderConfig so the projection matches TBP's camera intrinsics.
1570    let fov = request.config.fov_radians();
1571    commands.spawn((
1572        Camera3d::default(),
1573        Camera {
1574            hdr: true,
1575            ..default()
1576        },
1577        Projection::Perspective(PerspectiveProjection {
1578            fov,
1579            near: request.config.near_plane,
1580            far: request.config.far_plane,
1581            ..default()
1582        }),
1583        Msaa::Off,
1584        request.camera_transform,
1585        Tonemapping::None, // Accurate colors for software rendering
1586        DepthPrepass,
1587        NormalPrepass,
1588        RenderCamera,
1589    ));
1590
1591    // Ambient light (from config)
1592    let lighting = &request.config.lighting;
1593    commands.insert_resource(AmbientLight {
1594        color: Color::WHITE,
1595        brightness: lighting.ambient_brightness,
1596    });
1597
1598    // Key light (from config) - Bevy 0.15+ uses PointLight component directly
1599    if lighting.key_light_intensity > 0.0 {
1600        commands.spawn((
1601            PointLight {
1602                intensity: lighting.key_light_intensity,
1603                shadows_enabled: lighting.shadows_enabled,
1604                ..default()
1605            },
1606            Transform::from_xyz(
1607                lighting.key_light_position[0],
1608                lighting.key_light_position[1],
1609                lighting.key_light_position[2],
1610            ),
1611        ));
1612    }
1613
1614    // Fill light (from config)
1615    if lighting.fill_light_intensity > 0.0 {
1616        commands.spawn((
1617            PointLight {
1618                intensity: lighting.fill_light_intensity,
1619                shadows_enabled: lighting.shadows_enabled,
1620                ..default()
1621            },
1622            Transform::from_xyz(
1623                lighting.fill_light_position[0],
1624                lighting.fill_light_position[1],
1625                lighting.fill_light_position[2],
1626            ),
1627        ));
1628    }
1629
1630    // Load the scene
1631    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
1632    commands.insert_resource(LoadedScene(scene_handle.clone()));
1633
1634    // Load the texture
1635    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
1636    commands.insert_resource(LoadedTexture(texture_handle.clone()));
1637
1638    // Create material with texture (will be applied later)
1639    let _material = _materials.add(StandardMaterial {
1640        base_color_texture: Some(texture_handle),
1641        unlit: true,
1642        ..default()
1643    });
1644
1645    // Spawn the scene with rotation (Bevy 0.15+ uses SceneRoot)
1646    commands.spawn((
1647        SceneRoot(scene_handle),
1648        Transform::from_rotation(request.object_rotation.to_quat()),
1649        RenderedObject,
1650    ));
1651
1652    println!("Scene setup complete");
1653}
1654
1655/// Check if assets are loaded
1656fn check_assets_loaded(
1657    mut state: ResMut<RenderState>,
1658    asset_server: Res<AssetServer>,
1659    scene: Option<Res<LoadedScene>>,
1660    texture: Option<Res<LoadedTexture>>,
1661) {
1662    let trace = render_trace_enabled();
1663    let was_scene_loaded = state.scene_loaded;
1664    let was_texture_loaded = state.texture_loaded;
1665
1666    state.frame_count += 1;
1667
1668    if state.scene_loaded && state.texture_loaded {
1669        return;
1670    }
1671
1672    if let Some(scene) = scene {
1673        match asset_server.get_load_state(&scene.0) {
1674            Some(LoadState::Loaded) => {
1675                state.scene_loaded = true;
1676            }
1677            Some(LoadState::Failed(_)) => {}
1678            _ => {}
1679        }
1680    }
1681
1682    if let Some(texture) = texture {
1683        match asset_server.get_load_state(&texture.0) {
1684            Some(LoadState::Loaded) => {
1685                state.texture_loaded = true;
1686            }
1687            Some(LoadState::Failed(_)) => {}
1688            _ => {}
1689        }
1690    }
1691
1692    if trace {
1693        if !was_scene_loaded && state.scene_loaded {
1694            eprintln!(
1695                "[render_trace][coldinit] scene_loaded frame_count={}",
1696                state.frame_count
1697            );
1698        }
1699        if !was_texture_loaded && state.texture_loaded {
1700            eprintln!(
1701                "[render_trace][coldinit] texture_loaded frame_count={}",
1702                state.frame_count
1703            );
1704        }
1705    }
1706}
1707
1708/// Apply materials to loaded meshes
1709fn apply_materials(
1710    mut state: ResMut<RenderState>,
1711    texture: Option<Res<LoadedTexture>>,
1712    mut materials: ResMut<Assets<StandardMaterial>>,
1713    // Bevy 0.15+: Use MeshMaterial3d instead of Handle<StandardMaterial>
1714    mut mesh_query: Query<&mut MeshMaterial3d<StandardMaterial>, With<Mesh3d>>,
1715) {
1716    if !state.scene_loaded || !state.texture_loaded || state.capture_ready {
1717        return;
1718    }
1719
1720    state.frame_count += 1;
1721
1722    let Some(tex) = texture else { return };
1723
1724    if !state.materials_applied {
1725        // The scene hierarchy is instantiated asynchronously after the asset
1726        // load event fires; wait until mesh entities exist before applying.
1727        if mesh_query.is_empty() {
1728            return;
1729        }
1730
1731        let textured_material = materials.add(StandardMaterial {
1732            base_color_texture: Some(tex.0.clone()),
1733            unlit: true,
1734            ..default()
1735        });
1736
1737        for mut mat in mesh_query.iter_mut() {
1738            mat.0 = textured_material.clone();
1739        }
1740
1741        state.materials_applied = true;
1742        state.materials_applied_frame = state.frame_count;
1743    }
1744
1745    // Two frames after material application is enough for the render graph
1746    // to pick up the new material on native GPU. The previous 60-frame gate
1747    // was a legacy llvmpipe software-rendering cushion.
1748    if state.frame_count >= state.materials_applied_frame + 2 {
1749        let was_ready = state.capture_ready;
1750        state.capture_ready = true;
1751        if render_trace_enabled() && !was_ready {
1752            eprintln!(
1753                "[render_trace][coldinit] capture_ready frame_count={}",
1754                state.frame_count
1755            );
1756        }
1757    }
1758}
1759
1760/// Request a screenshot capture (Bevy 0.15+ uses Screenshot entity + observer)
1761#[allow(dead_code)]
1762fn request_screenshot(
1763    mut commands: Commands,
1764    mut state: ResMut<RenderState>,
1765    shared_image: Res<SharedImageBuffer>,
1766    mut depth_request: ResMut<DepthCaptureRequest>,
1767) {
1768    if !state.capture_ready || state.screenshot_requested {
1769        return;
1770    }
1771
1772    // Clone the Arc for the observer closure
1773    let image_buffer = shared_image.0.clone();
1774
1775    // Also request depth capture
1776    depth_request.requested = true;
1777    println!("Depth capture requested");
1778
1779    // Spawn Screenshot entity with observer (Bevy 0.15+ API)
1780    println!("Requesting screenshot via Screenshot entity");
1781    commands.spawn(Screenshot::primary_window()).observe(
1782        move |trigger: Trigger<ScreenshotCaptured>| {
1783            // ScreenshotCaptured derefs to Image
1784            let image: &Image = trigger.event();
1785
1786            // Get dimensions
1787            let width = image.texture_descriptor.size.width;
1788            let height = image.texture_descriptor.size.height;
1789
1790            // Get raw image data - Bevy 0.15 Image.data is Vec<u8>
1791            let rgba_data = image.data.clone();
1792
1793            // Store in shared buffer
1794            if let Ok(mut guard) = image_buffer.lock() {
1795                *guard = Some((rgba_data, width, height));
1796            }
1797        },
1798    );
1799
1800    state.screenshot_requested = true;
1801    println!("Screenshot requested");
1802}
1803
1804/// Check if screenshot callback has completed
1805#[allow(dead_code)]
1806fn check_screenshot_ready(
1807    mut state: ResMut<RenderState>,
1808    shared_image: Res<SharedImageBuffer>,
1809    shared_depth: Res<SharedDepthBuffer>,
1810    request: Res<RenderRequest>,
1811) {
1812    if !state.screenshot_requested || state.captured {
1813        return;
1814    }
1815
1816    // Increment frame count while waiting for capture
1817    state.frame_count += 1;
1818
1819    // Check if RGBA callback has written data
1820    let rgba_ready = if let Ok(guard) = shared_image.0.lock() {
1821        if let Some((rgba_data, width, height)) = guard.as_ref() {
1822            if state.rgba_data.is_none() {
1823                state.rgba_data = Some(rgba_data.clone());
1824                state.image_width = *width;
1825                state.image_height = *height;
1826            }
1827            true
1828        } else {
1829            false
1830        }
1831    } else {
1832        false
1833    };
1834
1835    // Check if depth readback has completed
1836    let depth_ready = if let Ok(guard) = shared_depth.0.lock() {
1837        if let Some((depth_data, _width, _height)) = guard.as_ref() {
1838            if state.depth_data.is_none() {
1839                state.depth_data = Some(depth_data.clone());
1840            }
1841            true
1842        } else {
1843            false
1844        }
1845    } else {
1846        false
1847    };
1848
1849    // If depth readback failed or is taking too long, fall back to placeholder
1850    // (This allows graceful degradation on systems where depth readback fails)
1851    if rgba_ready && !depth_ready && state.frame_count > 60 {
1852        let camera_dist = request.camera_transform.translation.length() as f64;
1853        let pixel_count = (state.image_width * state.image_height) as usize;
1854        state.depth_data = Some(vec![camera_dist; pixel_count]);
1855    }
1856
1857    // Mark as captured when both RGBA and depth are ready
1858    if state.rgba_data.is_some() && state.depth_data.is_some() {
1859        state.captured = true;
1860    }
1861}
1862
1863/// Extract results and exit
1864#[allow(dead_code)]
1865fn extract_and_exit(
1866    mut state: ResMut<RenderState>,
1867    request: Res<RenderRequest>,
1868    shared_output: Res<SharedOutput>,
1869    mut commands: Commands,
1870    windows: Query<Entity, With<bevy::window::Window>>,
1871) {
1872    // Handle delayed exit after closing window
1873    if state.exit_requested {
1874        state.exit_frame_count += 1;
1875        // After a few frames with no window, Bevy should exit
1876        return;
1877    }
1878
1879    if !state.captured {
1880        return;
1881    }
1882
1883    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
1884        // Use actual captured dimensions (may differ from config if window was resized)
1885        let width = state.image_width;
1886        let height = state.image_height;
1887
1888        // Compute intrinsics based on actual dimensions (f64 for TBP precision)
1889        let config = &request.config;
1890        let intrinsics = crate::CameraIntrinsics {
1891            focal_length: [
1892                width as f64 * config.zoom as f64,
1893                height as f64 * config.zoom as f64,
1894            ],
1895            principal_point: [width as f64 / 2.0, height as f64 / 2.0],
1896            image_size: [width, height],
1897        };
1898
1899        let output = RenderOutput {
1900            rgba: rgba.clone(),
1901            depth: depth.clone(),
1902            width,
1903            height,
1904            intrinsics,
1905            camera_transform: request.camera_transform,
1906            object_rotation: request.object_rotation.clone(),
1907        };
1908
1909        if let Ok(mut guard) = shared_output.0.lock() {
1910            *guard = Some(output);
1911            drop(guard); // Release lock immediately
1912
1913            // Small delay to allow watchdog to detect output before window close
1914            std::thread::sleep(std::time::Duration::from_millis(200));
1915        }
1916
1917        // Close all windows to trigger app exit
1918        // eprintln!("Closing windows to trigger exit...");
1919        for window_entity in windows.iter() {
1920            commands.entity(window_entity).despawn();
1921        }
1922        state.exit_requested = true;
1923    }
1924}
1925
1926// ============================================================================
1927// Headless Rendering Systems (no window surfaces)
1928// ============================================================================
1929
1930/// Setup the scene for headless rendering with RenderTarget::Image
1931fn setup_headless_scene(
1932    mut commands: Commands,
1933    mut images: ResMut<Assets<Image>>,
1934    asset_server: Res<AssetServer>,
1935    request: Res<RenderRequest>,
1936    mut _materials: ResMut<Assets<StandardMaterial>>,
1937) {
1938    let trace = render_trace_enabled();
1939    let t0 = trace.then(std::time::Instant::now);
1940
1941    #[cfg(test)]
1942    HEADLESS_SCENE_SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
1943
1944    let width = request.config.width;
1945    let height = request.config.height;
1946
1947    // Create render target image with proper texture usages
1948    let size = Extent3d {
1949        width,
1950        height,
1951        depth_or_array_layers: 1,
1952    };
1953
1954    let mut render_target_image = Image::new_fill(
1955        size,
1956        TextureDimension::D2,
1957        &[0, 0, 0, 255], // Initialize with opaque black
1958        TextureFormat::Rgba8UnormSrgb,
1959        RenderAssetUsages::default(),
1960    );
1961
1962    // Add required texture usages for headless rendering
1963    render_target_image.texture_descriptor.usage =
1964        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
1965
1966    let render_target_handle = images.add(render_target_image);
1967
1968    // Store handle for later access
1969    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
1970
1971    // Camera rendering to the image texture (NO window!)
1972    let fov = request.config.fov_radians();
1973    commands.spawn((
1974        Camera3d::default(),
1975        Camera {
1976            hdr: true,
1977            target: RenderTarget::Image(render_target_handle.clone()),
1978            ..default()
1979        },
1980        Projection::Perspective(PerspectiveProjection {
1981            fov,
1982            near: request.config.near_plane,
1983            far: request.config.far_plane,
1984            ..default()
1985        }),
1986        Msaa::Off,
1987        request.camera_transform,
1988        Tonemapping::None,
1989        DepthPrepass,
1990        NormalPrepass,
1991        RenderCamera,
1992        // Add ImageCopier to trigger RGBA extraction
1993        ImageCopier {
1994            src_image: render_target_handle,
1995            enabled: false, // Will enable when ready to capture
1996        },
1997    ));
1998
1999    // Ambient light
2000    let lighting = &request.config.lighting;
2001    commands.insert_resource(AmbientLight {
2002        color: Color::WHITE,
2003        brightness: lighting.ambient_brightness,
2004    });
2005
2006    // Key light
2007    if lighting.key_light_intensity > 0.0 {
2008        commands.spawn((
2009            PointLight {
2010                intensity: lighting.key_light_intensity,
2011                shadows_enabled: lighting.shadows_enabled,
2012                ..default()
2013            },
2014            Transform::from_xyz(
2015                lighting.key_light_position[0],
2016                lighting.key_light_position[1],
2017                lighting.key_light_position[2],
2018            ),
2019        ));
2020    }
2021
2022    // Fill light
2023    if lighting.fill_light_intensity > 0.0 {
2024        commands.spawn((
2025            PointLight {
2026                intensity: lighting.fill_light_intensity,
2027                shadows_enabled: lighting.shadows_enabled,
2028                ..default()
2029            },
2030            Transform::from_xyz(
2031                lighting.fill_light_position[0],
2032                lighting.fill_light_position[1],
2033                lighting.fill_light_position[2],
2034            ),
2035        ));
2036    }
2037
2038    // Load the scene
2039    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
2040    commands.insert_resource(LoadedScene(scene_handle.clone()));
2041
2042    // Load the texture
2043    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
2044    commands.insert_resource(LoadedTexture(texture_handle.clone()));
2045
2046    // Create material with texture
2047    let _material = _materials.add(StandardMaterial {
2048        base_color_texture: Some(texture_handle),
2049        unlit: true,
2050        ..default()
2051    });
2052
2053    // Spawn the scene with rotation
2054    commands.spawn((
2055        SceneRoot(scene_handle),
2056        Transform::from_rotation(request.object_rotation.to_quat()),
2057        RenderedObject,
2058    ));
2059
2060    if let Some(t0) = t0 {
2061        eprintln!(
2062            "[render_trace][startup] setup_headless_scene ms={:.3}",
2063            t0.elapsed().as_secs_f64() * 1000.0
2064        );
2065    }
2066}
2067
2068/// Request capture for headless rendering (enable ImageCopier)
2069fn request_headless_capture(
2070    mut state: ResMut<RenderState>,
2071    mut depth_request: ResMut<DepthCaptureRequest>,
2072    mut query: Query<&mut ImageCopier>,
2073    batch: Option<Res<HeadlessBatchSequence>>,
2074) {
2075    let trace = render_trace_enabled();
2076    let t0 = trace.then(std::time::Instant::now);
2077
2078    if !state.capture_ready || state.screenshot_requested {
2079        if let Some(t0) = t0 {
2080            eprintln!(
2081                "[render_trace][sys] request_headless_capture skipped(gate) ms={:.3}",
2082                t0.elapsed().as_secs_f64() * 1000.0
2083            );
2084        }
2085        return;
2086    }
2087
2088    if batch
2089        .as_ref()
2090        .is_some_and(|batch| batch.warmup_frames_remaining > 0)
2091    {
2092        if let Some(t0) = t0 {
2093            eprintln!(
2094                "[render_trace][sys] request_headless_capture skipped(warmup) ms={:.3}",
2095                t0.elapsed().as_secs_f64() * 1000.0
2096            );
2097        }
2098        return;
2099    }
2100
2101    // Enable the ImageCopier to trigger RGBA extraction
2102    for mut copier in query.iter_mut() {
2103        copier.enabled = true;
2104    }
2105
2106    // Request depth capture
2107    depth_request.requested = true;
2108
2109    state.screenshot_requested = true;
2110
2111    if let Some(t0) = t0 {
2112        eprintln!(
2113            "[render_trace][sys] request_headless_capture requested ms={:.3}",
2114            t0.elapsed().as_secs_f64() * 1000.0
2115        );
2116    }
2117}
2118
2119/// Check if headless capture has completed
2120fn check_headless_capture_ready(
2121    mut state: ResMut<RenderState>,
2122    shared_rgba: Res<SharedRgbaBuffer>,
2123    shared_depth: Res<SharedDepthBuffer>,
2124    request: Res<RenderRequest>,
2125    mut query: Query<&mut ImageCopier>,
2126) {
2127    let trace = render_trace_enabled();
2128    let t0 = trace.then(std::time::Instant::now);
2129
2130    if !state.screenshot_requested || state.captured {
2131        if let Some(t0) = t0 {
2132            eprintln!(
2133                "[render_trace][sys] check_headless_capture_ready skipped(gate) ms={:.3}",
2134                t0.elapsed().as_secs_f64() * 1000.0
2135            );
2136        }
2137        return;
2138    }
2139
2140    state.frame_count += 1;
2141
2142    // Check if RGBA data is ready
2143    let rgba_ready = if let Ok(guard) = shared_rgba.0.lock() {
2144        if let Some((rgba_data, width, height)) = guard.as_ref() {
2145            if state.rgba_data.is_none() {
2146                state.rgba_data = Some(rgba_data.clone());
2147                state.image_width = *width;
2148                state.image_height = *height;
2149                // Disable further captures
2150                for mut copier in query.iter_mut() {
2151                    copier.enabled = false;
2152                }
2153            }
2154            true
2155        } else {
2156            false
2157        }
2158    } else {
2159        false
2160    };
2161
2162    // Check if depth data is ready
2163    let depth_ready = if let Ok(guard) = shared_depth.0.lock() {
2164        if let Some((depth_data, _width, _height)) = guard.as_ref() {
2165            if state.depth_data.is_none() {
2166                state.depth_data = Some(depth_data.clone());
2167            }
2168            true
2169        } else {
2170            false
2171        }
2172    } else {
2173        false
2174    };
2175
2176    // Fallback to placeholder depth after 10 extra frames if depth readback fails
2177    if rgba_ready && !depth_ready && state.frame_count > 70 {
2178        let camera_dist = request.camera_transform.translation.length() as f64;
2179        let pixel_count = (state.image_width * state.image_height) as usize;
2180        state.depth_data = Some(vec![camera_dist; pixel_count]);
2181    }
2182
2183    if state.rgba_data.is_some() && state.depth_data.is_some() {
2184        state.captured = true;
2185    }
2186
2187    if let Some(t0) = t0 {
2188        eprintln!(
2189            "[render_trace][sys] check_headless_capture_ready rgba_ready={} depth_ready={} captured={} frame_count={} ms={:.3}",
2190            rgba_ready,
2191            depth_ready,
2192            state.captured,
2193            state.frame_count,
2194            t0.elapsed().as_secs_f64() * 1000.0
2195        );
2196    }
2197}
2198
2199/// Extract results and exit for headless rendering
2200fn extract_and_exit_headless(
2201    mut state: ResMut<RenderState>,
2202    request: Res<RenderRequest>,
2203    shared_output: Res<SharedOutput>,
2204    mut app_exit: EventWriter<bevy::app::AppExit>,
2205    batch: Option<Res<HeadlessBatchSequence>>,
2206) {
2207    if batch.is_some() {
2208        return;
2209    }
2210
2211    if state.exit_requested {
2212        return;
2213    }
2214
2215    if !state.captured {
2216        return;
2217    }
2218
2219    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2220        let width = state.image_width;
2221        let height = state.image_height;
2222
2223        // Compute intrinsics (f64 for TBP precision)
2224        let config = &request.config;
2225        let intrinsics = crate::CameraIntrinsics {
2226            focal_length: [
2227                width as f64 * config.zoom as f64,
2228                height as f64 * config.zoom as f64,
2229            ],
2230            principal_point: [width as f64 / 2.0, height as f64 / 2.0],
2231            image_size: [width, height],
2232        };
2233
2234        let output = RenderOutput {
2235            rgba: rgba.clone(),
2236            depth: depth.clone(),
2237            width,
2238            height,
2239            intrinsics,
2240            camera_transform: request.camera_transform,
2241            object_rotation: request.object_rotation.clone(),
2242        };
2243
2244        if let Ok(mut guard) = shared_output.0.lock() {
2245            *guard = Some(output);
2246            drop(guard);
2247            std::thread::sleep(std::time::Duration::from_millis(200));
2248        }
2249
2250        // Send AppExit event (headless apps use this instead of closing windows)
2251        app_exit.send(bevy::app::AppExit::Success);
2252        state.exit_requested = true;
2253    }
2254}
2255
2256/// Advance the short post-camera-move warmup for homogeneous batch rendering.
2257fn tick_headless_batch_warmup(batch: Option<ResMut<HeadlessBatchSequence>>) {
2258    let Some(mut batch) = batch else {
2259        return;
2260    };
2261
2262    if batch.warmup_frames_remaining > 0 {
2263        batch.warmup_frames_remaining -= 1;
2264    }
2265}
2266
2267/// Extract one batch output and continue rendering the next viewpoint in the same app.
2268fn extract_and_continue_headless_batch(
2269    mut state: ResMut<RenderState>,
2270    request: Res<RenderRequest>,
2271    buffers: (Res<SharedRgbaBuffer>, Res<SharedDepthBuffer>),
2272    batch: Option<ResMut<HeadlessBatchSequence>>,
2273    mut camera_query: Query<&mut Transform, With<RenderCamera>>,
2274    mut depth_request: ResMut<DepthCaptureRequest>,
2275    mut image_copiers: Query<&mut ImageCopier>,
2276) {
2277    let trace = render_trace_enabled();
2278    let t0 = trace.then(std::time::Instant::now);
2279
2280    let (shared_rgba, shared_depth) = buffers;
2281    let Some(mut batch) = batch else {
2282        if let Some(t0) = t0 {
2283            eprintln!(
2284                "[render_trace][sys] extract_and_continue_headless_batch skipped(no_batch) ms={:.3}",
2285                t0.elapsed().as_secs_f64() * 1000.0
2286            );
2287        }
2288        return;
2289    };
2290
2291    if state.exit_requested || !state.captured || batch.done {
2292        if let Some(t0) = t0 {
2293            eprintln!(
2294                "[render_trace][sys] extract_and_continue_headless_batch skipped(gate) captured={} done={} ms={:.3}",
2295                state.captured,
2296                batch.done,
2297                t0.elapsed().as_secs_f64() * 1000.0
2298            );
2299        }
2300        return;
2301    }
2302
2303    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2304        let width = state.image_width;
2305        let height = state.image_height;
2306
2307        let config = &request.config;
2308        let intrinsics = crate::CameraIntrinsics {
2309            focal_length: [
2310                width as f64 * config.zoom as f64,
2311                height as f64 * config.zoom as f64,
2312            ],
2313            principal_point: [width as f64 / 2.0, height as f64 / 2.0],
2314            image_size: [width, height],
2315        };
2316
2317        let output = RenderOutput {
2318            rgba: rgba.clone(),
2319            depth: depth.clone(),
2320            width,
2321            height,
2322            intrinsics,
2323            camera_transform: batch
2324                .current_viewpoint()
2325                .unwrap_or(request.camera_transform),
2326            object_rotation: request.object_rotation.clone(),
2327        };
2328        batch.outputs.push(output);
2329
2330        let next_index = batch.current_index + 1;
2331        if next_index >= batch.viewpoints.len() {
2332            batch.done = true;
2333            state.exit_requested = true;
2334            return;
2335        }
2336
2337        batch.current_index = next_index;
2338        batch.warmup_frames_remaining = BATCH_WARMUP_FRAMES;
2339
2340        if let Some(next_viewpoint) = batch.current_viewpoint() {
2341            for mut camera_transform in camera_query.iter_mut() {
2342                *camera_transform = next_viewpoint;
2343            }
2344        }
2345
2346        if let Ok(mut guard) = shared_rgba.0.lock() {
2347            *guard = None;
2348        }
2349        if let Ok(mut guard) = shared_depth.0.lock() {
2350            *guard = None;
2351        }
2352
2353        for mut copier in image_copiers.iter_mut() {
2354            copier.enabled = false;
2355        }
2356
2357        depth_request.requested = false;
2358        state.frame_count = 0;
2359        state.capture_ready = true;
2360        state.screenshot_requested = false;
2361        state.captured = false;
2362        state.rgba_data = None;
2363        state.depth_data = None;
2364        state.image_width = 0;
2365        state.image_height = 0;
2366
2367        if let Some(t0) = t0 {
2368            eprintln!(
2369                "[render_trace][sys] extract_and_continue_headless_batch extracted vp={} next={} done={} ms={:.3}",
2370                batch.current_index.saturating_sub(1),
2371                batch.current_index,
2372                batch.done,
2373                t0.elapsed().as_secs_f64() * 1000.0
2374            );
2375        }
2376    } else if let Some(t0) = t0 {
2377        eprintln!(
2378            "[render_trace][sys] extract_and_continue_headless_batch no_data ms={:.3}",
2379            t0.elapsed().as_secs_f64() * 1000.0
2380        );
2381    }
2382}
2383
2384// ============================================================================
2385// Persistent batch session (RenderSession)
2386//
2387// Amortizes wgpu device creation, Bevy app setup, and first-draw pipeline state
2388// object (PSO) compilation across multiple `render()` calls. Profile data (see
2389// issues #54 and #55) showed that on a 60-episode parity-gate, ~2.3s per episode
2390// lives in first-draw DX12 PSO compilation, totalling ~131s of 151s wall-clock.
2391// Keeping the `App` (and thus the `RenderDevice` and its PSO cache) alive across
2392// episodes recovers the bulk of that cost.
2393// ============================================================================
2394
2395/// Marker for the per-group scene entity so we can despawn it cleanly when the
2396/// next `RenderSession::render()` call swaps in a different object or rotation.
2397#[derive(Component)]
2398struct SessionScene;
2399
2400/// Session-persistent setup: render target image, camera (with prepass +
2401/// `ImageCopier`), ambient light, key + fill lights. Everything here lives for
2402/// the full lifetime of the `RenderSession`; per-group work (mesh/texture load,
2403/// scene entity spawn) happens outside Startup in `RenderSession::render()`.
2404fn setup_session_persistent_scene(
2405    mut commands: Commands,
2406    mut images: ResMut<Assets<Image>>,
2407    config: Res<SessionRenderConfig>,
2408) {
2409    let width = config.0.width;
2410    let height = config.0.height;
2411
2412    let size = Extent3d {
2413        width,
2414        height,
2415        depth_or_array_layers: 1,
2416    };
2417
2418    let mut render_target_image = Image::new_fill(
2419        size,
2420        TextureDimension::D2,
2421        &[0, 0, 0, 255],
2422        TextureFormat::Rgba8UnormSrgb,
2423        RenderAssetUsages::default(),
2424    );
2425    render_target_image.texture_descriptor.usage =
2426        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
2427
2428    let render_target_handle = images.add(render_target_image);
2429    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
2430
2431    let fov = config.0.fov_radians();
2432    commands.spawn((
2433        Camera3d::default(),
2434        Camera {
2435            hdr: true,
2436            target: RenderTarget::Image(render_target_handle.clone()),
2437            ..default()
2438        },
2439        Projection::Perspective(PerspectiveProjection {
2440            fov,
2441            near: config.0.near_plane,
2442            far: config.0.far_plane,
2443            ..default()
2444        }),
2445        Msaa::Off,
2446        Transform::default(),
2447        Tonemapping::None,
2448        DepthPrepass,
2449        NormalPrepass,
2450        RenderCamera,
2451        ImageCopier {
2452            src_image: render_target_handle,
2453            enabled: false,
2454        },
2455    ));
2456
2457    let lighting = &config.0.lighting;
2458    commands.insert_resource(AmbientLight {
2459        color: Color::WHITE,
2460        brightness: lighting.ambient_brightness,
2461    });
2462
2463    if lighting.key_light_intensity > 0.0 {
2464        commands.spawn((
2465            PointLight {
2466                intensity: lighting.key_light_intensity,
2467                shadows_enabled: lighting.shadows_enabled,
2468                ..default()
2469            },
2470            Transform::from_xyz(
2471                lighting.key_light_position[0],
2472                lighting.key_light_position[1],
2473                lighting.key_light_position[2],
2474            ),
2475        ));
2476    }
2477
2478    if lighting.fill_light_intensity > 0.0 {
2479        commands.spawn((
2480            PointLight {
2481                intensity: lighting.fill_light_intensity,
2482                shadows_enabled: lighting.shadows_enabled,
2483                ..default()
2484            },
2485            Transform::from_xyz(
2486                lighting.fill_light_position[0],
2487                lighting.fill_light_position[1],
2488                lighting.fill_light_position[2],
2489            ),
2490        ));
2491    }
2492}
2493
2494/// Resource carrying the `RenderConfig` that was fixed at session construction.
2495/// Used by `setup_session_persistent_scene` to size the render target.
2496#[derive(Resource)]
2497struct SessionRenderConfig(RenderConfig);
2498
2499/// Persistent batch render session. Keeps a Bevy `App` (and its `RenderDevice`
2500/// plus PSO cache) alive across multiple `render()` calls, amortizing per-episode
2501/// cold-init cost.
2502///
2503/// # Thread affinity
2504///
2505/// `RenderSession` must be created, used, and dropped on the same thread. It
2506/// holds a `bevy::App` which owns GPU resources that are not safe to move
2507/// across threads. The `!Send + !Sync` marker is enforced via
2508/// `PhantomData<*const ()>`.
2509///
2510/// # Config invariant
2511///
2512/// The `RenderConfig` (resolution, lighting, near/far, fov) is fixed at
2513/// `new()`. All `render()` calls must use requests whose `render_config`
2514/// matches; heterogeneous configs are rejected.
2515///
2516/// # Phase 1 limitation
2517///
2518/// Each `render()` call must contain homogeneous requests (same `object_dir`
2519/// and `object_rotation`). Heterogeneous calls return
2520/// `BatchRenderError::InvalidConfig`. Hold a single `RenderSession` and call
2521/// `render()` once per episode to amortize setup across episodes.
2522pub struct RenderSession {
2523    app: App,
2524    render_config: RenderConfig,
2525    shared_rgba: SharedRgbaBuffer,
2526    shared_depth: SharedDepthBuffer,
2527    _not_send_sync: std::marker::PhantomData<*const ()>,
2528}
2529
2530impl RenderSession {
2531    /// Build the App, run plugin `finish()`/`cleanup()`, and perform one warmup
2532    /// `update()` so Startup systems run and the wgpu device + adapter are
2533    /// initialized. The first `render()` call still pays PSO compilation for
2534    /// the specific mesh/material combination; subsequent calls reuse the cache.
2535    pub fn new(render_config: &crate::RenderConfig) -> Result<Self, crate::RenderError> {
2536        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
2537        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
2538
2539        let mut app = App::new();
2540        app.add_plugins(
2541            DefaultPlugins
2542                .set(WindowPlugin {
2543                    primary_window: None,
2544                    exit_condition: ExitCondition::DontExit,
2545                    ..default()
2546                })
2547                .disable::<bevy::winit::WinitPlugin>()
2548                .disable::<LogPlugin>()
2549                .disable::<TerminalCtrlCHandlerPlugin>(),
2550        )
2551        .add_plugins(ObjPlugin)
2552        .add_plugins(ImageCopyPlugin {
2553            shared_rgba: shared_rgba.clone(),
2554        })
2555        .add_plugins(DepthReadbackPlugin {
2556            shared_depth: shared_depth.clone(),
2557            near: render_config.near_plane,
2558            far: render_config.far_plane,
2559        })
2560        .insert_resource(SessionRenderConfig(render_config.clone()))
2561        .insert_resource(shared_rgba.clone())
2562        .init_resource::<RenderState>()
2563        .add_systems(Startup, setup_session_persistent_scene)
2564        .add_systems(
2565            Update,
2566            (
2567                check_assets_loaded,
2568                apply_materials,
2569                tick_headless_batch_warmup,
2570                request_headless_capture,
2571                check_headless_capture_ready,
2572                extract_and_continue_headless_batch,
2573            )
2574                .chain()
2575                // Gate the capture chain on `RenderRequest` existing. `new()`
2576                // runs a warmup `app.update()` to execute Startup (which spawns
2577                // the camera/lights/render target) before the first `render()`
2578                // call, but does not yet insert `RenderRequest`. Several systems
2579                // in this chain take `Res<RenderRequest>` (not `Option`) and
2580                // would panic on SystemState init if the resource were absent.
2581                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
2582        );
2583
2584        app.finish();
2585        app.cleanup();
2586
2587        // One warmup update runs Startup systems (render target, camera, lights)
2588        // so they exist before the first `render()` call seeds the camera
2589        // transform. The Update chain is gated by `RenderRequest` existence and
2590        // is a no-op this tick. PSO compilation for specific mesh/material
2591        // combinations still happens lazily on the first real render.
2592        app.update();
2593
2594        Ok(Self {
2595            app,
2596            render_config: render_config.clone(),
2597            shared_rgba,
2598            shared_depth,
2599            _not_send_sync: std::marker::PhantomData,
2600        })
2601    }
2602
2603    /// Render a homogeneous batch of viewpoints (same object + rotation + config).
2604    /// Returns outputs in request order.
2605    ///
2606    /// On `BatchRenderError::DeviceLost`, the returned error signals that the
2607    /// wgpu device was lost mid-render. This call produced no output; any
2608    /// outputs from earlier `render()` calls on this session are still valid.
2609    /// Recovery: drop this `RenderSession` and construct a new one.
2610    pub fn render(
2611        &mut self,
2612        requests: &[crate::BatchRenderRequest],
2613    ) -> Result<Vec<crate::BatchRenderOutput>, crate::BatchRenderError> {
2614        use crate::{BatchRenderError, BatchRenderOutput};
2615
2616        if requests.is_empty() {
2617            return Ok(Vec::new());
2618        }
2619
2620        // Enforce homogeneity and config invariance.
2621        let first = &requests[0];
2622        if first.render_config != self.render_config {
2623            return Err(BatchRenderError::InvalidConfig(
2624                "RenderSession render_config mismatch: session was constructed with a different \
2625                 RenderConfig than the first request carries. Session config cannot change after \
2626                 `new()`; construct a new session if you need a different resolution/camera."
2627                    .to_string(),
2628            ));
2629        }
2630        for r in &requests[1..] {
2631            if r.object_dir != first.object_dir
2632                || r.object_rotation != first.object_rotation
2633                || r.render_config != first.render_config
2634            {
2635                return Err(BatchRenderError::InvalidConfig(
2636                    "Phase 1 RenderSession::render requires homogeneous requests \
2637                     (same object_dir, object_rotation, and render_config across the batch). \
2638                     Call render() once per group instead."
2639                        .to_string(),
2640                ));
2641            }
2642        }
2643
2644        // Canonicalize paths and validate mesh/texture presence. This matches
2645        // `render_headless_sequence`'s preconditions so the error surface stays
2646        // consistent.
2647        let object_dir = std::fs::canonicalize(&first.object_dir).map_err(|e| {
2648            BatchRenderError::InvalidConfig(format!(
2649                "Cannot canonicalize object directory {}: {}",
2650                first.object_dir.display(),
2651                e
2652            ))
2653        })?;
2654        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
2655        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
2656        if !mesh_path.exists() {
2657            return Err(BatchRenderError::InvalidConfig(format!(
2658                "Mesh not found: {}",
2659                mesh_path.display()
2660            )));
2661        }
2662        if !texture_path.exists() {
2663            return Err(BatchRenderError::InvalidConfig(format!(
2664                "Texture not found: {}",
2665                texture_path.display()
2666            )));
2667        }
2668
2669        let viewpoints: Vec<Transform> = requests.iter().map(|r| r.viewpoint).collect();
2670
2671        // --- per-group scene swap (direct world manipulation) ---
2672        {
2673            let world = self.app.world_mut();
2674
2675            // Despawn any SessionScene entity from the previous group.
2676            let stale: Vec<Entity> = world
2677                .query_filtered::<Entity, With<SessionScene>>()
2678                .iter(world)
2679                .collect();
2680            for entity in stale {
2681                world.entity_mut(entity).despawn_recursive();
2682            }
2683
2684            // Clear shared RGBA/depth buffers so a stale payload can't leak
2685            // into the first viewpoint of this call.
2686            if let Ok(mut guard) = self.shared_rgba.0.lock() {
2687                *guard = None;
2688            }
2689            if let Ok(mut guard) = self.shared_depth.0.lock() {
2690                *guard = None;
2691            }
2692
2693            // Reset RenderState (scene_loaded, texture_loaded, capture_ready,
2694            // frame_count, materials_applied, etc.). Default() gives all false/0.
2695            *world.resource_mut::<RenderState>() = RenderState::default();
2696
2697            // Update RenderRequest so the existing capture systems see the new
2698            // object paths, rotation, and camera transform (seeded from first vp).
2699            let new_request = RenderRequest {
2700                mesh_path: mesh_path.display().to_string(),
2701                texture_path: texture_path.display().to_string(),
2702                camera_transform: viewpoints[0],
2703                object_rotation: first.object_rotation.clone(),
2704                config: self.render_config.clone(),
2705            };
2706            world.insert_resource(new_request);
2707
2708            // Kick off asset loads and install the handles under the names the
2709            // existing `check_assets_loaded` system expects.
2710            let asset_server = world.resource::<AssetServer>().clone();
2711            let scene_handle: Handle<Scene> = asset_server.load(mesh_path.display().to_string());
2712            let texture_handle: Handle<Image> =
2713                asset_server.load(texture_path.display().to_string());
2714            world.insert_resource(LoadedScene(scene_handle.clone()));
2715            world.insert_resource(LoadedTexture(texture_handle));
2716
2717            // Spawn the new scene entity tagged so we can find + despawn it next
2718            // render() call.
2719            world.spawn((
2720                SceneRoot(scene_handle),
2721                Transform::from_rotation(first.object_rotation.to_quat()),
2722                RenderedObject,
2723                SessionScene,
2724            ));
2725
2726            // Seed the camera transform to the first viewpoint now so the first
2727            // capture lines up; subsequent viewpoints are advanced by
2728            // `extract_and_continue_headless_batch`.
2729            let camera_entity = world
2730                .query_filtered::<Entity, With<RenderCamera>>()
2731                .iter(world)
2732                .next();
2733            if let Some(cam) = camera_entity {
2734                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
2735                    *transform = viewpoints[0];
2736                }
2737            }
2738
2739            // Install the viewpoint sequence for this render() call.
2740            world.insert_resource(HeadlessBatchSequence::new(viewpoints.clone()));
2741        }
2742
2743        // --- drive the capture loop ---
2744        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
2745        let start = std::time::Instant::now();
2746        loop {
2747            if start.elapsed() > timeout {
2748                return Err(BatchRenderError::TotalFailure(format!(
2749                    "RenderSession::render timed out after {}s",
2750                    RENDER_TIMEOUT_SECS
2751                )));
2752            }
2753
2754            self.app.update();
2755
2756            if self.app.world().resource::<HeadlessBatchSequence>().done {
2757                break;
2758            }
2759        }
2760
2761        // Collect outputs and zip with requests to produce BatchRenderOutput in
2762        // request order.
2763        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
2764        if sequence.outputs.len() != requests.len() {
2765            return Err(BatchRenderError::TotalFailure(format!(
2766                "RenderSession produced {} outputs for {} requests",
2767                sequence.outputs.len(),
2768                requests.len()
2769            )));
2770        }
2771        let outputs = std::mem::take(&mut sequence.outputs);
2772
2773        Ok(requests
2774            .iter()
2775            .cloned()
2776            .zip(outputs)
2777            .map(|(req, out)| BatchRenderOutput::from_render_output(req, out))
2778            .collect())
2779    }
2780}
2781
2782// ============================================================================
2783// Per-step persistent renderer (PersistentRenderer)
2784//
2785// `RenderSession` reuses the App across calls but rebuilds the scene on every
2786// `render()` (despawn SceneRoot, re-issue asset_server.load, respawn). That's
2787// fine for the parity-gate path (one scene per episode of N viewpoints) but
2788// wasteful for surface-policy feedback loops where N=1 viewpoint per call and
2789// the object stays loaded for the whole episode.
2790//
2791// `PersistentRenderer` commits to one `object_dir` + `RenderConfig` at
2792// construction. `new()` loads mesh + texture + spawns the scene root + drives
2793// one warmup render (output discarded) so PSO compilation and material setup
2794// are paid up front. `render(camera, rotation)` then only mutates the camera
2795// `Transform` and (if changed) the scene root rotation, drives the capture
2796// chain for one frame, and returns. See issue #65.
2797// ============================================================================
2798
2799/// Marker for the `PersistentRenderer`'s scene root entity. We keep the
2800/// entity alive for the whole renderer lifetime and just mutate its
2801/// `Transform` when the caller-supplied object rotation changes.
2802#[derive(Component)]
2803struct PersistentScene;
2804
2805/// Persistent per-step renderer. Loads the scene once at `new()` and renders
2806/// one frame per `render()` call by mutating the camera transform and scene
2807/// root rotation in-place. Built for surface-policy feedback loops where the
2808/// object stays fixed for the duration of an episode and the camera moves
2809/// every step. See issue #65.
2810///
2811/// # Thread affinity
2812///
2813/// `PersistentRenderer` must be created, used, and dropped on the same thread.
2814/// Holds a `bevy::App` that owns GPU resources not safe to move across
2815/// threads; `!Send + !Sync` is enforced via `PhantomData<*const ()>`.
2816///
2817/// # Object + config invariants
2818///
2819/// `object_dir` and `RenderConfig` are fixed at `new()`. To render a different
2820/// object or change resolution/lighting, drop and rebuild. Rotation may change
2821/// freely between `render()` calls.
2822pub struct PersistentRenderer {
2823    app: App,
2824    object_dir: PathBuf,
2825    render_config: RenderConfig,
2826    shared_rgba: SharedRgbaBuffer,
2827    shared_depth: SharedDepthBuffer,
2828    _not_send_sync: std::marker::PhantomData<*const ()>,
2829}
2830
2831impl PersistentRenderer {
2832    /// Build the App, load the scene + texture, spawn the scene root, and drive
2833    /// one warmup render whose output is discarded. After `new()` returns, the
2834    /// first user-facing `render()` call benefits from a warm PSO cache and
2835    /// applied materials.
2836    pub fn new(
2837        object_dir: &Path,
2838        render_config: &RenderConfig,
2839    ) -> Result<Self, crate::RenderError> {
2840        let object_dir =
2841            std::fs::canonicalize(object_dir).map_err(|e| crate::RenderError::FileNotFound {
2842                path: object_dir.display().to_string(),
2843                reason: e.to_string(),
2844            })?;
2845        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
2846        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
2847        if !mesh_path.exists() {
2848            return Err(crate::RenderError::MeshNotFound(
2849                mesh_path.display().to_string(),
2850            ));
2851        }
2852        if !texture_path.exists() {
2853            return Err(crate::RenderError::TextureNotFound(
2854                texture_path.display().to_string(),
2855            ));
2856        }
2857
2858        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
2859        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
2860
2861        let mut app = App::new();
2862        app.add_plugins(
2863            DefaultPlugins
2864                .set(WindowPlugin {
2865                    primary_window: None,
2866                    exit_condition: ExitCondition::DontExit,
2867                    ..default()
2868                })
2869                .disable::<bevy::winit::WinitPlugin>()
2870                .disable::<LogPlugin>()
2871                .disable::<TerminalCtrlCHandlerPlugin>(),
2872        )
2873        .add_plugins(ObjPlugin)
2874        .add_plugins(ImageCopyPlugin {
2875            shared_rgba: shared_rgba.clone(),
2876        })
2877        .add_plugins(DepthReadbackPlugin {
2878            shared_depth: shared_depth.clone(),
2879            near: render_config.near_plane,
2880            far: render_config.far_plane,
2881        })
2882        .insert_resource(SessionRenderConfig(render_config.clone()))
2883        .insert_resource(shared_rgba.clone())
2884        .init_resource::<RenderState>()
2885        .add_systems(Startup, setup_session_persistent_scene)
2886        .add_systems(
2887            Update,
2888            (
2889                check_assets_loaded,
2890                apply_materials,
2891                tick_headless_batch_warmup,
2892                request_headless_capture,
2893                check_headless_capture_ready,
2894                extract_and_continue_headless_batch,
2895            )
2896                .chain()
2897                // Same gate as RenderSession: capture chain only runs once
2898                // RenderRequest is installed. Startup runs first via the
2899                // warmup `app.update()` below.
2900                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
2901        );
2902
2903        app.finish();
2904        app.cleanup();
2905        // Warmup tick #1: Startup runs (camera, lights, render target spawn).
2906        app.update();
2907
2908        // Install scene + warmup render request. The warmup output is discarded
2909        // — its purpose is to pay PSO compilation and material application
2910        // upfront so the first user-facing render() is fast.
2911        let initial_request = RenderRequest {
2912            mesh_path: mesh_path.display().to_string(),
2913            texture_path: texture_path.display().to_string(),
2914            camera_transform: Transform::default(),
2915            object_rotation: ObjectRotation::identity(),
2916            config: render_config.clone(),
2917        };
2918
2919        {
2920            let world = app.world_mut();
2921            let asset_server = world.resource::<AssetServer>().clone();
2922            let scene_handle: Handle<Scene> = asset_server.load(mesh_path.display().to_string());
2923            let texture_handle: Handle<Image> =
2924                asset_server.load(texture_path.display().to_string());
2925            world.insert_resource(LoadedScene(scene_handle.clone()));
2926            world.insert_resource(LoadedTexture(texture_handle));
2927            world.insert_resource(initial_request);
2928            world.spawn((
2929                SceneRoot(scene_handle),
2930                Transform::from_rotation(ObjectRotation::identity().to_quat()),
2931                RenderedObject,
2932                PersistentScene,
2933            ));
2934            world.insert_resource(HeadlessBatchSequence::new(vec![Transform::default()]));
2935        }
2936
2937        // Drive the warmup render to completion.
2938        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
2939        let start = std::time::Instant::now();
2940        loop {
2941            if start.elapsed() > timeout {
2942                return Err(crate::RenderError::RenderFailed(format!(
2943                    "PersistentRenderer::new warmup render timed out after {RENDER_TIMEOUT_SECS}s"
2944                )));
2945            }
2946            app.update();
2947            if app.world().resource::<HeadlessBatchSequence>().done {
2948                break;
2949            }
2950        }
2951        // Discard the warmup output so it doesn't leak into the first real
2952        // render() call's output buffer.
2953        app.world_mut()
2954            .resource_mut::<HeadlessBatchSequence>()
2955            .outputs
2956            .clear();
2957
2958        Ok(Self {
2959            app,
2960            object_dir,
2961            render_config: render_config.clone(),
2962            shared_rgba,
2963            shared_depth,
2964            _not_send_sync: std::marker::PhantomData,
2965        })
2966    }
2967
2968    /// Render one frame from the given camera transform and object rotation.
2969    /// Reuses the loaded scene + warm PSO cache from `new()`.
2970    pub fn render(
2971        &mut self,
2972        camera_transform: &Transform,
2973        object_rotation: &ObjectRotation,
2974    ) -> Result<RenderOutput, crate::RenderError> {
2975        let camera_transform = *camera_transform;
2976        let object_rotation_owned = object_rotation.clone();
2977
2978        {
2979            let world = self.app.world_mut();
2980
2981            // Update the persistent scene root rotation. Always-write avoids
2982            // the cost of an extra ObjectRotation comparison per call; the
2983            // mutation itself is a single Transform write.
2984            let scene_entity = world
2985                .query_filtered::<Entity, With<PersistentScene>>()
2986                .iter(world)
2987                .next();
2988            if let Some(entity) = scene_entity {
2989                if let Some(mut transform) = world.entity_mut(entity).get_mut::<Transform>() {
2990                    *transform = Transform::from_rotation(object_rotation_owned.to_quat());
2991                }
2992            }
2993
2994            // Update the camera transform.
2995            let cam_entity = world
2996                .query_filtered::<Entity, With<RenderCamera>>()
2997                .iter(world)
2998                .next();
2999            if let Some(cam) = cam_entity {
3000                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
3001                    *transform = camera_transform;
3002                }
3003            }
3004
3005            // Reset per-frame state, preserving scene_loaded / texture_loaded
3006            // / materials_applied / materials_applied_frame. The asset-load
3007            // and material-apply work was paid in `new()`'s warmup; we only
3008            // need to clear the per-capture state.
3009            //
3010            // `capture_ready = true` short-circuits `apply_materials` on
3011            // every tick of the render loop (no need to re-check material
3012            // application — it stays applied for the renderer's lifetime).
3013            // It does NOT short-circuit `request_headless_capture`, which
3014            // is gated by `HeadlessBatchSequence::warmup_frames_remaining`
3015            // below. Bug fix from PR #66 review (off-by-one / blank-step-0):
3016            // without that warmup gate, request_headless_capture fires same-
3017            // tick as the transform writes, capturing the previous render's
3018            // target before the new transforms have propagated.
3019            {
3020                let mut state = world.resource_mut::<RenderState>();
3021                state.exit_requested = false;
3022                state.screenshot_requested = false;
3023                state.captured = false;
3024                state.rgba_data = None;
3025                state.depth_data = None;
3026                state.frame_count = 0;
3027                state.image_width = 0;
3028                state.image_height = 0;
3029                state.capture_ready = true;
3030            }
3031
3032            // Clear shared GPU readback buffers so a stale payload from the
3033            // previous render() can't leak into this call's output.
3034            if let Ok(mut guard) = self.shared_rgba.0.lock() {
3035                *guard = None;
3036            }
3037            if let Ok(mut guard) = self.shared_depth.0.lock() {
3038                *guard = None;
3039            }
3040
3041            // Update RenderRequest (used by extract_and_continue_headless_batch
3042            // to stamp the output with the right intrinsics + rotation).
3043            {
3044                let mut req = world.resource_mut::<RenderRequest>();
3045                req.camera_transform = camera_transform;
3046                req.object_rotation = object_rotation_owned.clone();
3047            }
3048
3049            // Install fresh single-element batch with warmup frames so
3050            // `request_headless_capture` is gated until the new transforms
3051            // have propagated through the render pipeline.
3052            let mut batch = HeadlessBatchSequence::new(vec![camera_transform]);
3053            batch.warmup_frames_remaining = PERSISTENT_WARMUP_FRAMES;
3054            world.insert_resource(batch);
3055        }
3056
3057        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3058        let start = std::time::Instant::now();
3059        loop {
3060            if start.elapsed() > timeout {
3061                return Err(crate::RenderError::RenderFailed(format!(
3062                    "PersistentRenderer::render timed out after {RENDER_TIMEOUT_SECS}s"
3063                )));
3064            }
3065            self.app.update();
3066            if self.app.world().resource::<HeadlessBatchSequence>().done {
3067                break;
3068            }
3069        }
3070
3071        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
3072        let mut outputs = std::mem::take(&mut sequence.outputs);
3073        if outputs.len() != 1 {
3074            return Err(crate::RenderError::RenderFailed(format!(
3075                "PersistentRenderer::render expected 1 output, got {}",
3076                outputs.len()
3077            )));
3078        }
3079
3080        Ok(outputs.remove(0))
3081    }
3082
3083    /// Path to the YCB object directory this renderer was bound to.
3084    pub fn object_dir(&self) -> &Path {
3085        &self.object_dir
3086    }
3087
3088    /// The `RenderConfig` this renderer was constructed with.
3089    pub fn render_config(&self) -> &RenderConfig {
3090        &self.render_config
3091    }
3092
3093    /// Explicit close. Equivalent to dropping; provided to match the API
3094    /// proposal in #65 for callers that want lifetime-explicit teardown.
3095    pub fn close(self) {
3096        // Drop runs on return.
3097    }
3098}
3099
3100/// Render directly to files (for subprocess mode).
3101///
3102/// This function saves RGBA and depth data directly to files before exiting.
3103/// Designed for subprocess rendering where the process will exit after rendering.
3104pub fn render_to_files(
3105    object_dir: &Path,
3106    camera_transform: &Transform,
3107    object_rotation: &ObjectRotation,
3108    config: &RenderConfig,
3109    rgba_path: &Path,
3110    depth_path: &Path,
3111) -> Result<(), RenderError> {
3112    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3113    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3114
3115    if !mesh_path.exists() {
3116        return Err(RenderError::MeshNotFound(mesh_path.display().to_string()));
3117    }
3118    if !texture_path.exists() {
3119        return Err(RenderError::TextureNotFound(
3120            texture_path.display().to_string(),
3121        ));
3122    }
3123
3124    let request = RenderRequest {
3125        mesh_path: mesh_path.display().to_string(),
3126        texture_path: texture_path.display().to_string(),
3127        camera_transform: *camera_transform,
3128        object_rotation: object_rotation.clone(),
3129        config: config.clone(),
3130    };
3131
3132    // Shared state for output
3133    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
3134    let output_poll = shared_output.clone();
3135
3136    // Clone paths for watchdog thread
3137    let rgba_path = rgba_path.to_path_buf();
3138    let depth_path = depth_path.to_path_buf();
3139
3140    // Shared buffer for RGBA data from headless render target
3141    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3142
3143    // Shared buffer for depth readback
3144    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3145
3146    // Spawn watchdog thread that saves files and exits
3147    std::thread::spawn(move || {
3148        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3149        let start = std::time::Instant::now();
3150        let poll_interval = std::time::Duration::from_millis(100);
3151
3152        loop {
3153            if let Ok(guard) = output_poll.0.lock() {
3154                if let Some(output) = guard.as_ref() {
3155                    // Save RGBA as PNG
3156                    if let Err(e) =
3157                        save_rgba_to_png(&output.rgba, output.width, output.height, &rgba_path)
3158                    {
3159                        eprintln!("Failed to save RGBA: {:?}", e);
3160                        std::process::exit(1);
3161                    }
3162
3163                    // Save depth as binary f32
3164                    if let Err(e) = save_depth_to_binary(&output.depth, &depth_path) {
3165                        eprintln!("Failed to save depth: {:?}", e);
3166                        std::process::exit(1);
3167                    }
3168
3169                    std::process::exit(0);
3170                }
3171            }
3172
3173            if start.elapsed() > timeout {
3174                eprintln!(
3175                    "Error: Render timeout after {} seconds",
3176                    RENDER_TIMEOUT_SECS
3177                );
3178                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
3179                std::process::exit(1);
3180            }
3181
3182            std::thread::sleep(poll_interval);
3183        }
3184    });
3185
3186    // Configure rendering backend for this environment.
3187    // Use OnceLock so env vars are only set once per process — repeated calls
3188    // (e.g. sequential render_to_buffer calls in a parity loop) no longer trigger
3189    // redundant wgpu backend env writes. Full GPU adapter reuse across App instances
3190    // requires a persistent renderer (tracked in issue #14).
3191    static BACKEND_INIT: OnceLock<()> = OnceLock::new();
3192    BACKEND_INIT.get_or_init(|| {
3193        let backend_config = BackendConfig::headless();
3194        backend_config.apply_env();
3195    });
3196
3197    // Run Bevy app with HEADLESS configuration
3198    build_headless_app(request, shared_output, shared_rgba, shared_depth).run();
3199
3200    // Unreachable - watchdog thread exits the process
3201    Err(RenderError::RenderFailed(
3202        "Render did not complete".to_string(),
3203    ))
3204}
3205
3206/// Save RGBA data to PNG file
3207fn save_rgba_to_png(rgba: &[u8], width: u32, height: u32, path: &Path) -> Result<(), String> {
3208    use image::{ImageBuffer, Rgba};
3209
3210    // Create parent directories if needed
3211    if let Some(parent) = path.parent() {
3212        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3213    }
3214
3215    let img: ImageBuffer<Rgba<u8>, Vec<u8>> =
3216        ImageBuffer::from_raw(width, height, rgba.to_vec())
3217            .ok_or_else(|| "Failed to create image buffer".to_string())?;
3218
3219    img.save(path).map_err(|e| e.to_string())
3220}
3221
3222/// Save depth data to binary file (f64 for TBP precision)
3223fn save_depth_to_binary(depth: &[f64], path: &Path) -> Result<(), String> {
3224    // Create parent directories if needed
3225    if let Some(parent) = path.parent() {
3226        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3227    }
3228
3229    let bytes: Vec<u8> = depth.iter().flat_map(|f| f.to_le_bytes()).collect();
3230    std::fs::write(path, &bytes).map_err(|e| e.to_string())
3231}
3232
3233#[cfg(test)]
3234mod smoke_tests {
3235    use super::{headless_scene_setup_count, reset_headless_scene_setup_count};
3236    use crate::{
3237        BatchRenderConfig, BatchRenderRequest, ObjectRotation, RenderConfig, ViewpointConfig,
3238    };
3239    use image::{ImageBuffer, Rgba};
3240    use tempfile::TempDir;
3241
3242    fn write_synthetic_object() -> TempDir {
3243        let temp_dir = TempDir::new().expect("create temp dir for synthetic object");
3244        let object_dir = temp_dir.path().join("synthetic_cube").join("google_16k");
3245        std::fs::create_dir_all(&object_dir).expect("create synthetic google_16k dir");
3246
3247        // A small centered cube stays visible from all default TBP viewpoints and does not
3248        // need any YCB downloads.
3249        let obj = r#"o SyntheticCube
3250v -0.10 -0.10  0.10
3251v  0.10 -0.10  0.10
3252v  0.10  0.10  0.10
3253v -0.10  0.10  0.10
3254v -0.10 -0.10 -0.10
3255v  0.10 -0.10 -0.10
3256v  0.10  0.10 -0.10
3257v -0.10  0.10 -0.10
3258vt 0.0 0.0
3259vt 1.0 0.0
3260vt 1.0 1.0
3261vt 0.0 1.0
3262f 1/1 2/2 3/3
3263f 1/1 3/3 4/4
3264f 6/1 5/2 8/3
3265f 6/1 8/3 7/4
3266f 2/1 6/2 7/3
3267f 2/1 7/3 3/4
3268f 5/1 1/2 4/3
3269f 5/1 4/3 8/4
3270f 4/1 3/2 7/3
3271f 4/1 7/3 8/4
3272f 5/1 6/2 2/3
3273f 5/1 2/3 1/4
3274"#;
3275        std::fs::write(object_dir.join("textured.obj"), obj).expect("write synthetic obj");
3276
3277        let texture = ImageBuffer::from_fn(2, 2, |x, y| match (x, y) {
3278            (0, 0) => Rgba([255u8, 48, 48, 255]),
3279            (1, 0) => Rgba([48u8, 255, 48, 255]),
3280            (0, 1) => Rgba([48u8, 48, 255, 255]),
3281            _ => Rgba([255u8, 255, 64, 255]),
3282        });
3283        texture
3284            .save(object_dir.join("texture_map.png"))
3285            .expect("write synthetic texture");
3286
3287        temp_dir
3288    }
3289
3290    #[test]
3291    #[ignore = "headless throughput smoke check is opt-in because it needs a local render backend"]
3292    fn test_headless_batch_throughput_smoke() {
3293        crate::initialize();
3294        reset_headless_scene_setup_count();
3295
3296        let object_root = write_synthetic_object();
3297        let object_dir = object_root.path().join("synthetic_cube");
3298        let viewpoints = crate::generate_viewpoints(&ViewpointConfig::default());
3299        let request_count = 5usize;
3300        let config = RenderConfig::tbp_default();
3301
3302        let requests: Vec<_> = viewpoints
3303            .iter()
3304            .take(request_count)
3305            .copied()
3306            .map(|viewpoint| BatchRenderRequest {
3307                object_dir: object_dir.clone(),
3308                viewpoint,
3309                object_rotation: ObjectRotation::identity(),
3310                render_config: config.clone(),
3311            })
3312            .collect();
3313
3314        let start = std::time::Instant::now();
3315        let outputs = crate::render_batch(requests, &BatchRenderConfig::default())
3316            .expect("synthetic headless batch render should succeed");
3317        let elapsed = start.elapsed();
3318
3319        assert_eq!(outputs.len(), request_count);
3320        // This is the deterministic churn signal for the smoke check. Adapter log lines vary by
3321        // backend and logging config, but a homogeneous batch should still set up headless scene
3322        // state exactly once.
3323        assert_eq!(
3324            headless_scene_setup_count(),
3325            1,
3326            "homogeneous batch smoke check should reuse one headless app setup"
3327        );
3328
3329        for (idx, output) in outputs.iter().enumerate() {
3330            assert_eq!(output.width, config.width, "output {idx} width mismatch");
3331            assert_eq!(output.height, config.height, "output {idx} height mismatch");
3332            assert_eq!(
3333                output.rgba.len(),
3334                (config.width * config.height * 4) as usize,
3335                "output {idx} rgba size mismatch"
3336            );
3337            assert_eq!(
3338                output.depth.len(),
3339                (config.width * config.height) as usize,
3340                "output {idx} depth size mismatch"
3341            );
3342            assert!(
3343                output
3344                    .rgba
3345                    .chunks_exact(4)
3346                    .any(|px| px[0] != 0 || px[1] != 0 || px[2] != 0),
3347                "output {idx} should contain visible color"
3348            );
3349        }
3350
3351        // Acceptance target: under llvmpipe-class CPU rendering, five 64x64 captures should
3352        // finish in under 8s. Much slower runs usually mean we reintroduced per-capture app
3353        // churn or another headless startup regression.
3354        assert!(
3355            elapsed < std::time::Duration::from_secs(8),
3356            "5 synthetic headless captures took {:.2}s, expected < 8.0s",
3357            elapsed.as_secs_f64()
3358        );
3359    }
3360}