Skip to main content

bevy_sensor/
render.rs

1//! Headless rendering implementation using Bevy.
2//!
3//! This module provides two rendering modes:
4//!
5//! 1. **Headless mode** (default): Renders to an image texture without requiring
6//!    a window or display. Works on WSL2, CI servers, and any environment without
7//!    GPU windowing support.
8//!
9//! 2. **Windowed mode** (fallback): Uses a visible window for rendering when
10//!    headless mode fails. Requires a display (X11/Wayland).
11//!
12//! # Current Status
13//!
14//! - **RGBA**: Working via render-to-texture + GPU readback
15//! - **Depth**: Working via ViewDepthTexture + reverse-Z conversion
16//!
17//! # Headless Rendering Architecture
18//!
19//! The headless renderer:
20//! 1. Creates a Bevy app without window plugins (uses ScheduleRunnerPlugin)
21//! 2. Sets up a render-to-texture pipeline with RenderTarget::Image
22//! 3. Extracts RGBA data via ImageCopyDriver
23//! 4. Extracts depth via DepthReadbackNode
24//!
25//! # Running Requirements
26//!
27//! Headless mode should work without any display. For windowed fallback:
28//! ```bash
29//! DISPLAY=:0 cargo run --example test_render
30//! ```
31//!
32//! # Architecture Notes
33//!
34//! Bevy's `App::run()` does not return cleanly in all configurations. This
35//! implementation uses a watchdog thread that monitors for completion and
36//! calls `std::process::exit(0)` once the render output is serialized to
37//! a temp file. The main thread reads this file after the process would
38//! normally exit.
39
40use bevy::app::{ScheduleRunnerPlugin, TerminalCtrlCHandlerPlugin};
41use bevy::asset::LoadState;
42use bevy::core_pipeline::prepass::{DepthPrepass, NormalPrepass};
43use bevy::core_pipeline::tonemapping::Tonemapping;
44use bevy::ecs::query::QueryItem;
45use bevy::log::LogPlugin;
46use bevy::prelude::*;
47use bevy::render::camera::{ExtractedCamera, RenderTarget};
48use bevy::render::render_asset::{RenderAssetUsages, RenderAssets};
49use bevy::render::render_graph::{
50    Node, NodeRunError, RenderGraphApp, RenderGraphContext, RenderLabel, ViewNode, ViewNodeRunner,
51};
52use bevy::render::render_resource::{
53    Buffer, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, Extent3d, ImageCopyBuffer,
54    ImageCopyTexture, ImageDataLayout, MapMode, Origin3d, TextureAspect, TextureDimension,
55    TextureFormat, TextureUsages,
56};
57use bevy::render::renderer::RenderQueue;
58use bevy::render::renderer::{RenderContext, RenderDevice};
59use bevy::render::texture::GpuImage;
60use bevy::render::view::screenshot::{Screenshot, ScreenshotCaptured};
61use bevy::render::view::ViewDepthTexture;
62use bevy::render::{Extract, Render, RenderApp, RenderSet};
63use bevy::window::{ExitCondition, WindowPlugin};
64use bevy_obj::ObjPlugin;
65use std::fs::File;
66use std::io::Read as IoRead;
67use std::path::{Path, PathBuf};
68#[cfg(test)]
69use std::sync::atomic::{AtomicUsize, Ordering};
70use std::sync::{Arc, Mutex, OnceLock};
71use std::time::Duration;
72
73use crate::{
74    backend::BackendConfig, ObjectRotation, RenderConfig, RenderError, RenderOutput,
75    TargetingPolicy,
76};
77use ycbust::{GOOGLE_16K_MESH_RELATIVE, GOOGLE_16K_TEXTURE_RELATIVE};
78
79/// Watchdog timeout for a single render, in seconds.
80///
81/// Bounds how long any single render path waits before declaring failure.
82/// 180s accommodates first-run wgpu shader compilation on Windows, which
83/// can take well over 60s on a cold GPU cache (see commit 9cd1d11).
84const RENDER_TIMEOUT_SECS: u64 = 180;
85
86/// Warmup frames after each camera move in `render_headless_sequence`.
87///
88/// After writing a new camera `Transform`, Bevy needs at least one frame for
89/// transform propagation + render-world extract before the next capture is
90/// valid. Historically set to 3 as a conservative cushion; reducing directly
91/// shortens per-viewpoint wall-clock since `app.update()` in the batch path
92/// is not rate-limited. Validated against the pixel-exact hardware test
93/// `test_batch_render_matches_sequential_episode_outputs`.
94const BATCH_WARMUP_FRAMES: u32 = 1;
95
96/// Warmup frames at the start of each `PersistentRenderer::render()` call.
97///
98/// `BATCH_WARMUP_FRAMES = 1` works for inter-viewpoint advancement inside a
99/// batch because `extract_and_continue_headless_batch` writes the next
100/// camera transform *and* clears the shared GPU readback buffers in the
101/// same tick — so the in-flight copy from the previous viewpoint has
102/// already drained by the time the next capture is gated.
103///
104/// In the persistent per-call path, the previous render's output may still
105/// be sitting in `shared_rgba`/`shared_depth` (we clear them before the
106/// loop, but the pipeline still needs ticks to propagate the new camera/
107/// scene-rotation `Transform` writes through `PostUpdate` →
108/// `transform_propagate` → `Extract` → render graph → `ImageCopyDriver`
109/// before the capture we request actually reflects the new transforms.
110///
111/// Validated by `test_persistent_renderer_matches_render_to_buffer`. Three
112/// ticks of warmup gives Windows/DX12 enough room to drain the previous
113/// readback and capture the post-propagation color target:
114///   - tick 0: transforms propagate, render runs (no copy enabled)
115///   - tick 1: previous in-flight readback drains (no copy enabled)
116///   - tick 2: warmup hits 0, capture fires, render runs with copy enabled
117///   - tick 3: shared buffers populated → captured → batch finalized
118const PERSISTENT_WARMUP_FRAMES: u32 = 3;
119
120/// Check the render-trace env var. Cheap enough (single HashMap lookup) to call
121/// from per-frame systems; gate all tracing output behind this.
122#[inline]
123fn render_trace_enabled() -> bool {
124    std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok()
125}
126
127/// Check if a display is available for windowed rendering.
128///
129/// Returns true if DISPLAY or WAYLAND_DISPLAY environment variable is set.
130#[allow(dead_code)]
131fn display_available() -> bool {
132    std::env::var("DISPLAY").is_ok() || std::env::var("WAYLAND_DISPLAY").is_ok()
133}
134
135/// Check if we're running on WSL2 (which doesn't support Vulkan window surfaces).
136#[allow(dead_code)]
137fn is_wsl2() -> bool {
138    if let Ok(version) = std::fs::read_to_string("/proc/version") {
139        return version.to_lowercase().contains("microsoft")
140            || version.to_lowercase().contains("wsl");
141    }
142    false
143}
144
145/// Internal state for tracking render progress
146#[derive(Resource, Default)]
147struct RenderState {
148    frame_count: u32,
149    scene_loaded: bool,
150    texture_loaded: bool,
151    materials_applied: bool,
152    /// `frame_count` at the moment materials were applied; used to gate
153    /// `capture_ready` on N frames of render-graph propagation rather than
154    /// a legacy llvmpipe-era 60-frame wait.
155    materials_applied_frame: u32,
156    capture_ready: bool,
157    screenshot_requested: bool,
158    captured: bool,
159    exit_requested: bool,
160    #[allow(dead_code)]
161    exit_frame_count: u32,
162    rgba_data: Option<Vec<u8>>,
163    depth_data: Option<Vec<f64>>,
164    image_width: u32,
165    image_height: u32,
166}
167
168#[cfg(test)]
169static HEADLESS_SCENE_SETUP_COUNT: AtomicUsize = AtomicUsize::new(0);
170
171#[cfg(test)]
172fn reset_headless_scene_setup_count() {
173    HEADLESS_SCENE_SETUP_COUNT.store(0, Ordering::SeqCst);
174}
175
176#[cfg(test)]
177fn headless_scene_setup_count() -> usize {
178    HEADLESS_SCENE_SETUP_COUNT.load(Ordering::SeqCst)
179}
180
181/// Shared buffer for screenshot callback to write into
182#[derive(Resource, Clone)]
183#[allow(clippy::type_complexity)]
184#[allow(dead_code)]
185struct SharedImageBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
186
187/// Shared buffer for depth data from GPU readback
188/// Contains: (linear_depth_values, width, height)
189/// Uses f64 for TBP numerical precision compatibility.
190#[derive(Resource, Clone, Default)]
191#[allow(clippy::type_complexity)]
192struct SharedDepthBuffer(Arc<Mutex<Option<(Vec<f64>, u32, u32)>>>);
193
194// ============================================================================
195// Depth Readback Infrastructure
196// ============================================================================
197
198/// Request to capture depth - extracted from main world to render world
199#[derive(Resource, Default, Clone)]
200struct DepthCaptureRequest {
201    requested: bool,
202    near: f32,
203    far: f32,
204}
205
206/// Pending depth capture info for async processing
207struct PendingDepthCapture {
208    buffer: Buffer,
209    width: u32,
210    height: u32,
211    near: f32,
212    far: f32,
213}
214
215/// Queue for pending depth captures (written by render node, read by cleanup system)
216#[derive(Resource, Default)]
217struct PendingDepthCaptureQueue(Arc<Mutex<Vec<PendingDepthCapture>>>);
218
219// ============================================================================
220// Depth Buffer Helpers
221// ============================================================================
222
223mod depth_helpers {
224    /// wgpu requires buffer row alignment of 256 bytes
225    pub const COPY_BYTES_PER_ROW_ALIGNMENT: u32 = 256;
226
227    /// Align byte size to wgpu's COPY_BYTES_PER_ROW_ALIGNMENT
228    pub fn align_byte_size(value: u32) -> u32 {
229        let remainder = value % COPY_BYTES_PER_ROW_ALIGNMENT;
230        if remainder == 0 {
231            value
232        } else {
233            value + (COPY_BYTES_PER_ROW_ALIGNMENT - remainder)
234        }
235    }
236
237    /// Calculate aligned buffer size for an image
238    #[allow(dead_code)]
239    pub fn get_aligned_size(width: u32, height: u32, pixel_size: u32) -> u32 {
240        height * align_byte_size(width * pixel_size)
241    }
242
243    /// Convert reverse-Z NDC depth to linear depth in meters.
244    ///
245    /// Bevy uses reverse-Z depth buffer: near plane maps to depth=1, far plane to depth=0.
246    /// This provides better precision for distant objects.
247    ///
248    /// Formula derivation:
249    /// - At near plane (z = near): ndc = 1
250    /// - At far plane (z = far): ndc = 0
251    /// - linear = far / (1 + ndc * (far/near - 1))
252    pub fn reverse_z_to_linear_depth(ndc_depth: f32, near: f32, far: f32) -> f32 {
253        // Handle edge cases
254        if ndc_depth <= 0.0 {
255            return far; // Background (infinite distance in reverse-Z)
256        }
257        if ndc_depth >= 1.0 {
258            return near; // At or beyond near plane
259        }
260        // Reverse-Z formula: linear = far / (1 + ndc * (far/near - 1))
261        far / (1.0 + ndc_depth * (far / near - 1.0))
262    }
263
264    /// Extract depth values from aligned buffer, handling row padding
265    pub fn extract_depth_with_alignment(data: &[u8], width: u32, height: u32) -> Vec<f32> {
266        let pixel_size = 4u32; // f32 = 4 bytes
267        let aligned_row_bytes = align_byte_size(width * pixel_size) as usize;
268        let actual_row_bytes = (width * pixel_size) as usize;
269
270        let mut depth_values = Vec::with_capacity((width * height) as usize);
271
272        for y in 0..height as usize {
273            let row_start = y * aligned_row_bytes;
274            let row_data = &data[row_start..row_start + actual_row_bytes];
275
276            for x in 0..width as usize {
277                let offset = x * 4;
278                let bytes: [u8; 4] = row_data[offset..offset + 4].try_into().unwrap();
279                let depth_value = f32::from_le_bytes(bytes);
280                depth_values.push(depth_value);
281            }
282        }
283
284        depth_values
285    }
286
287    /// Convert all NDC depth values to linear meters (as f64 for TBP precision)
288    pub fn convert_depth_to_linear(raw_depth: &[f32], near: f32, far: f32) -> Vec<f64> {
289        raw_depth
290            .iter()
291            .map(|&ndc| reverse_z_to_linear_depth(ndc, near, far) as f64)
292            .collect()
293    }
294
295    #[cfg(test)]
296    mod tests {
297        use super::*;
298
299        #[test]
300        fn test_align_byte_size() {
301            assert_eq!(align_byte_size(256), 256);
302            assert_eq!(align_byte_size(257), 512);
303            assert_eq!(align_byte_size(1), 256);
304            assert_eq!(align_byte_size(512), 512);
305            assert_eq!(align_byte_size(0), 0);
306        }
307
308        #[test]
309        fn test_reverse_z_to_linear_depth() {
310            let near = 0.01;
311            let far = 10.0;
312
313            // Near plane (ndc=1 in reverse-Z)
314            let linear_near = reverse_z_to_linear_depth(1.0, near, far);
315            assert!((linear_near - near).abs() < 0.001);
316
317            // Mid-range depth (ndc=0.5 should give geometric mean area)
318            let linear_mid = reverse_z_to_linear_depth(0.5, near, far);
319            // At ndc=0.5: linear = 10 / (1 + 0.5 * (1000-1)) = 10 / 500.5 ≈ 0.02
320            assert!(linear_mid > near && linear_mid < far);
321
322            // Very close to far plane (ndc very small)
323            let linear_almost_far = reverse_z_to_linear_depth(0.0001, near, far);
324            // At ndc=0.0001: linear = 10 / (1 + 0.0001 * 999) ≈ 10 / 1.0999 ≈ 9.09
325            assert!(linear_almost_far > 9.0);
326
327            // Background (ndc=0)
328            let background = reverse_z_to_linear_depth(0.0, near, far);
329            assert_eq!(background, far);
330        }
331
332        #[test]
333        fn test_extract_depth_with_alignment() {
334            // 2x2 image, 4 bytes per pixel
335            // Aligned row = 256 bytes, but actual = 8 bytes
336            let width = 2u32;
337            let height = 2u32;
338
339            let mut data = vec![0u8; 256 * 2]; // 2 aligned rows
340
341            // Write test depth values
342            // Row 0: [0.5, 0.6]
343            data[0..4].copy_from_slice(&0.5f32.to_le_bytes());
344            data[4..8].copy_from_slice(&0.6f32.to_le_bytes());
345            // Row 1: [0.7, 0.8]
346            data[256..260].copy_from_slice(&0.7f32.to_le_bytes());
347            data[260..264].copy_from_slice(&0.8f32.to_le_bytes());
348
349            let depth = extract_depth_with_alignment(&data, width, height);
350            assert_eq!(depth.len(), 4);
351            assert!((depth[0] - 0.5).abs() < 0.001);
352            assert!((depth[1] - 0.6).abs() < 0.001);
353            assert!((depth[2] - 0.7).abs() < 0.001);
354            assert!((depth[3] - 0.8).abs() < 0.001);
355        }
356
357        #[test]
358        fn test_reverse_z_depth_at_near_plane() {
359            // Near plane should give near value
360            let near = 0.01;
361            let far = 100.0;
362            let depth = reverse_z_to_linear_depth(1.0, near, far);
363            assert!((depth - near).abs() < 0.0001);
364        }
365
366        #[test]
367        fn test_reverse_z_depth_at_far_plane() {
368            // Far plane (ndc=0) should give far value
369            let near = 0.01;
370            let far = 100.0;
371            let depth = reverse_z_to_linear_depth(0.0, near, far);
372            assert!((depth - far).abs() < 0.0001);
373        }
374
375        #[test]
376        fn test_reverse_z_monotonic() {
377            // Depth should increase as NDC decreases (reverse-Z)
378            let near = 0.01;
379            let far = 10.0;
380
381            let mut prev_depth = 0.0;
382            for i in (0..=100).rev() {
383                let ndc = i as f32 / 100.0;
384                let depth = reverse_z_to_linear_depth(ndc, near, far);
385                assert!(
386                    depth >= prev_depth,
387                    "Depth should be monotonic: ndc={}, depth={}, prev={}",
388                    ndc,
389                    depth,
390                    prev_depth
391                );
392                prev_depth = depth;
393            }
394        }
395
396        #[test]
397        fn test_convert_depth_to_linear_batch() {
398            let near = 0.01f32;
399            let far = 10.0f32;
400            let ndc_depths = vec![1.0f32, 0.5, 0.1, 0.0];
401
402            let linear = convert_depth_to_linear(&ndc_depths, near, far);
403
404            assert_eq!(linear.len(), 4);
405            // Near plane
406            assert!((linear[0] - near as f64).abs() < 0.001);
407            // Far plane
408            assert!((linear[3] - far as f64).abs() < 0.001);
409            // All should be in range [near, far]
410            for d in &linear {
411                assert!(*d >= near as f64 && *d <= far as f64);
412            }
413        }
414
415        #[test]
416        fn test_align_byte_size_edge_cases() {
417            // Powers of two should stay the same if multiple of 256
418            assert_eq!(align_byte_size(256), 256);
419            assert_eq!(align_byte_size(512), 512);
420            assert_eq!(align_byte_size(1024), 1024);
421
422            // Just under 256 should round up to 256
423            assert_eq!(align_byte_size(255), 256);
424            assert_eq!(align_byte_size(128), 256);
425
426            // Just over 256 should round up to 512
427            assert_eq!(align_byte_size(300), 512);
428        }
429
430        #[test]
431        fn test_extract_depth_64x64() {
432            // Test with TBP default resolution
433            let width = 64u32;
434            let height = 64u32;
435            let bytes_per_pixel = 4u32;
436            let padded_row = align_byte_size(width * bytes_per_pixel);
437
438            // Create aligned buffer
439            let mut data = vec![0u8; (padded_row * height) as usize];
440
441            // Fill with incrementing values
442            for y in 0..height {
443                for x in 0..width {
444                    let value = (y * width + x) as f32 / (width * height) as f32;
445                    let offset = (y * padded_row + x * bytes_per_pixel) as usize;
446                    data[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
447                }
448            }
449
450            let depth = extract_depth_with_alignment(&data, width, height);
451            assert_eq!(depth.len(), (width * height) as usize);
452
453            // Verify first and last values
454            assert!((depth[0] - 0.0).abs() < 0.001);
455            let expected_last = (width * height - 1) as f32 / (width * height) as f32;
456            assert!((depth[(width * height - 1) as usize] - expected_last).abs() < 0.001);
457        }
458    }
459}
460
461// ============================================================================
462// Depth Readback Render Node
463// ============================================================================
464
465/// Label for the depth readback render graph node.
466#[derive(Debug, Hash, PartialEq, Eq, Clone, bevy::render::render_graph::RenderLabel)]
467struct DepthReadbackLabel;
468
469/// Render node that copies the main camera's depth texture to a staging buffer.
470/// This runs after the main pass completes, using ViewDepthTexture.
471#[derive(Default)]
472struct DepthReadbackNode;
473
474impl ViewNode for DepthReadbackNode {
475    type ViewQuery = (&'static ViewDepthTexture, &'static ExtractedCamera);
476
477    fn run<'w>(
478        &self,
479        _graph: &mut RenderGraphContext,
480        render_context: &mut RenderContext<'w>,
481        (view_depth_texture, camera): QueryItem<'w, Self::ViewQuery>,
482        world: &'w World,
483    ) -> Result<(), NodeRunError> {
484        let trace = render_trace_enabled();
485        let t0 = trace.then(std::time::Instant::now);
486
487        // Check if depth capture is requested
488        let Some(request) = world.get_resource::<DepthCaptureRequest>() else {
489            return Ok(());
490        };
491        if !request.requested {
492            return Ok(());
493        }
494
495        // Get the pending queue
496        let Some(queue) = world.get_resource::<PendingDepthCaptureQueue>() else {
497            return Ok(());
498        };
499
500        // Get texture size from camera viewport or physical size
501        let Some(physical_size) = camera.physical_target_size else {
502            return Ok(());
503        };
504        let width = physical_size.x;
505        let height = physical_size.y;
506
507        let render_device = world.resource::<RenderDevice>();
508
509        // Calculate aligned buffer size (wgpu requires 256-byte row alignment)
510        let bytes_per_pixel = 4u32; // f32 = 4 bytes (Depth32Float)
511        let unpadded_bytes_per_row = width * bytes_per_pixel;
512        let padded_bytes_per_row = depth_helpers::align_byte_size(unpadded_bytes_per_row);
513        let buffer_size = (padded_bytes_per_row * height) as u64;
514
515        // Create staging buffer for CPU readback
516        let staging_buffer = render_device.create_buffer(&BufferDescriptor {
517            label: Some("depth_staging_buffer"),
518            size: buffer_size,
519            usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
520            mapped_at_creation: false,
521        });
522
523        // Copy depth texture to staging buffer
524        let encoder = render_context.command_encoder();
525        encoder.copy_texture_to_buffer(
526            ImageCopyTexture {
527                texture: &view_depth_texture.texture,
528                mip_level: 0,
529                origin: Origin3d::ZERO,
530                aspect: TextureAspect::DepthOnly,
531            },
532            ImageCopyBuffer {
533                buffer: &staging_buffer,
534                layout: ImageDataLayout {
535                    offset: 0,
536                    bytes_per_row: Some(padded_bytes_per_row),
537                    rows_per_image: Some(height),
538                },
539            },
540            Extent3d {
541                width,
542                height,
543                depth_or_array_layers: 1,
544            },
545        );
546
547        // Push to queue for async processing (queue is Arc<Mutex<Vec>>)
548        if let Ok(mut pending) = queue.0.lock() {
549            pending.push(PendingDepthCapture {
550                buffer: staging_buffer,
551                width,
552                height,
553                near: request.near,
554                far: request.far,
555            });
556        }
557
558        if let Some(t0) = t0 {
559            eprintln!(
560                "[render_trace][node] DepthReadbackNode ms={:.3}",
561                t0.elapsed().as_secs_f64() * 1000.0
562            );
563        }
564
565        Ok(())
566    }
567}
568
569// ============================================================================
570// Depth Readback Plugin
571// ============================================================================
572
573/// Plugin that sets up depth buffer readback from the GPU.
574struct DepthReadbackPlugin {
575    shared_depth: SharedDepthBuffer,
576    near: f32,
577    far: f32,
578}
579
580impl Plugin for DepthReadbackPlugin {
581    fn build(&self, app: &mut App) {
582        use bevy::core_pipeline::core_3d::graph::Core3d;
583        use bevy::core_pipeline::core_3d::graph::Node3d;
584
585        // Insert shared depth buffer in main app
586        app.insert_resource(self.shared_depth.clone());
587        app.insert_resource(DepthCaptureRequest {
588            requested: false,
589            near: self.near,
590            far: self.far,
591        });
592
593        // Get render app
594        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
595            eprintln!("Failed to get RenderApp for depth readback");
596            return;
597        };
598
599        // Insert resources in render world
600        render_app.insert_resource(self.shared_depth.clone());
601        render_app.init_resource::<PendingDepthCaptureQueue>();
602
603        // Add extraction system to copy request from main world
604        render_app.add_systems(ExtractSchedule, extract_depth_request);
605
606        // Add system to process completed depth captures
607        render_app.add_systems(Render, collect_depth_captures.in_set(RenderSet::Cleanup));
608
609        // Register the depth readback node in the render graph
610        // Run after main pass completes (depth buffer is ready) but before tonemapping
611        render_app
612            .add_render_graph_node::<ViewNodeRunner<DepthReadbackNode>>(Core3d, DepthReadbackLabel)
613            .add_render_graph_edges(
614                Core3d,
615                (Node3d::EndMainPass, DepthReadbackLabel, Node3d::Tonemapping),
616            );
617    }
618}
619
620/// Extract depth capture request from main world to render world
621fn extract_depth_request(mut commands: Commands, request: Extract<Res<DepthCaptureRequest>>) {
622    commands.insert_resource(DepthCaptureRequest {
623        requested: request.requested,
624        near: request.near,
625        far: request.far,
626    });
627}
628
629/// Process completed depth buffer captures (synchronous GPU-to-CPU readback with device polling)
630fn collect_depth_captures(
631    queue: Res<PendingDepthCaptureQueue>,
632    shared_depth: Res<SharedDepthBuffer>,
633    render_device: Res<RenderDevice>,
634) {
635    let trace = render_trace_enabled();
636    let t_sys = trace.then(std::time::Instant::now);
637
638    // Take all pending captures from the queue
639    let pending_captures = {
640        let Ok(mut pending) = queue.0.lock() else {
641            return;
642        };
643        std::mem::take(&mut *pending)
644    };
645
646    if pending_captures.is_empty() {
647        if let Some(t0) = t_sys {
648            eprintln!(
649                "[render_trace][sys] collect_depth_captures empty ms={:.3}",
650                t0.elapsed().as_secs_f64() * 1000.0
651            );
652        }
653        return;
654    }
655
656    let pending_count = pending_captures.len();
657
658    // Process each pending capture synchronously with device polling
659    for pending in pending_captures {
660        let width = pending.width;
661        let height = pending.height;
662        let near = pending.near;
663        let far = pending.far;
664        let buffer = pending.buffer;
665        let shared = shared_depth.0.clone();
666
667        // Use blocking sync approach with device polling (same as RGBA capture)
668        let buffer_slice = buffer.slice(..);
669
670        // Request mapping
671        let (tx, rx) = std::sync::mpsc::channel();
672        buffer_slice.map_async(MapMode::Read, move |result| {
673            let _ = tx.send(result);
674        });
675
676        let t_wait = trace.then(std::time::Instant::now);
677        let mut poll_iters: u32 = 0;
678
679        // Poll the device until mapping completes
680        loop {
681            render_device.poll(bevy::render::render_resource::Maintain::Poll);
682            poll_iters += 1;
683            match rx.try_recv() {
684                Ok(Ok(())) => {
685                    let data = buffer_slice.get_mapped_range();
686
687                    // Extract depth values with alignment handling
688                    let ndc_depth =
689                        depth_helpers::extract_depth_with_alignment(&data, width, height);
690
691                    drop(data);
692                    buffer.unmap();
693
694                    // Convert from reverse-Z NDC to linear depth in meters
695                    let linear_depth =
696                        depth_helpers::convert_depth_to_linear(&ndc_depth, near, far);
697
698                    // Store in shared buffer
699                    if let Ok(mut guard) = shared.lock() {
700                        *guard = Some((linear_depth, width, height));
701                    }
702                    break;
703                }
704                Ok(Err(e)) => {
705                    eprintln!("Failed to map depth buffer: {:?}", e);
706                    break;
707                }
708                Err(std::sync::mpsc::TryRecvError::Empty) => {
709                    // Keep polling
710                    std::thread::sleep(std::time::Duration::from_millis(1));
711                }
712                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
713                    eprintln!("Depth buffer mapping channel disconnected");
714                    break;
715                }
716            }
717        }
718
719        if let Some(t_wait) = t_wait {
720            eprintln!(
721                "[render_trace][sys] collect_depth_captures mapping_wait poll_iters={} ms={:.3}",
722                poll_iters,
723                t_wait.elapsed().as_secs_f64() * 1000.0
724            );
725        }
726    }
727
728    if let Some(t0) = t_sys {
729        eprintln!(
730            "[render_trace][sys] collect_depth_captures done pending={} ms={:.3}",
731            pending_count,
732            t0.elapsed().as_secs_f64() * 1000.0
733        );
734    }
735}
736
737// ============================================================================
738// Image Copy Infrastructure (for headless rendering)
739// ============================================================================
740
741/// Label for the image copy render graph node
742#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)]
743struct ImageCopyLabel;
744
745/// Component that marks an image for GPU-to-CPU copying
746#[derive(Component, Clone)]
747struct ImageCopier {
748    /// Handle to the source image (render target)
749    src_image: Handle<Image>,
750    /// Whether to capture on this frame
751    enabled: bool,
752}
753
754/// Resource containing all ImageCopiers for the render world
755#[derive(Resource, Default)]
756struct ImageCopiers(Vec<ImageCopier>);
757
758/// Pending image capture for async processing
759struct PendingImageCapture {
760    buffer: Buffer,
761    width: u32,
762    height: u32,
763    padded_bytes_per_row: u32,
764}
765
766/// Queue for pending image captures
767#[derive(Resource, Default)]
768struct PendingImageCaptureQueue(Arc<Mutex<Vec<PendingImageCapture>>>);
769
770/// Shared buffer for captured RGBA data
771#[derive(Resource, Clone, Default)]
772#[allow(clippy::type_complexity)]
773struct SharedRgbaBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
774
775/// Render graph node that copies render target images to staging buffers
776struct ImageCopyDriver;
777
778impl Node for ImageCopyDriver {
779    fn run(
780        &self,
781        _graph: &mut RenderGraphContext,
782        _render_context: &mut RenderContext,
783        world: &World,
784    ) -> Result<(), NodeRunError> {
785        let trace = render_trace_enabled();
786        let t0 = trace.then(std::time::Instant::now);
787
788        let Some(image_copiers) = world.get_resource::<ImageCopiers>() else {
789            return Ok(());
790        };
791
792        let Some(gpu_images) = world.get_resource::<RenderAssets<GpuImage>>() else {
793            return Ok(());
794        };
795
796        let Some(queue) = world.get_resource::<PendingImageCaptureQueue>() else {
797            return Ok(());
798        };
799
800        let render_device = world.resource::<RenderDevice>();
801
802        let Some(render_queue) = world.get_resource::<RenderQueue>() else {
803            return Ok(());
804        };
805
806        for image_copier in image_copiers.0.iter() {
807            if !image_copier.enabled {
808                continue;
809            }
810
811            let Some(gpu_image) = gpu_images.get(&image_copier.src_image) else {
812                continue;
813            };
814
815            let width = gpu_image.size.x;
816            let height = gpu_image.size.y;
817
818            // Calculate padded bytes per row (wgpu requires 256-byte alignment)
819            let block_dimensions = gpu_image.texture_format.block_dimensions();
820            let block_size = gpu_image.texture_format.block_copy_size(None).unwrap_or(4); // Default to 4 bytes for RGBA8
821
822            let padded_bytes_per_row = RenderDevice::align_copy_bytes_per_row(
823                (width as usize / block_dimensions.0 as usize) * block_size as usize,
824            );
825
826            let buffer_size = (padded_bytes_per_row * height as usize) as u64;
827
828            // Create staging buffer for CPU readback
829            let staging_buffer = render_device.create_buffer(&BufferDescriptor {
830                label: Some("image_copy_staging_buffer"),
831                size: buffer_size,
832                usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
833                mapped_at_creation: false,
834            });
835
836            // Create command encoder for the copy operation
837            let mut encoder =
838                render_device.create_command_encoder(&CommandEncoderDescriptor::default());
839
840            let texture_extent = Extent3d {
841                width,
842                height,
843                depth_or_array_layers: 1,
844            };
845
846            // Copy texture to buffer
847            encoder.copy_texture_to_buffer(
848                gpu_image.texture.as_image_copy(),
849                ImageCopyBuffer {
850                    buffer: &staging_buffer,
851                    layout: ImageDataLayout {
852                        offset: 0,
853                        bytes_per_row: Some(padded_bytes_per_row as u32),
854                        rows_per_image: None,
855                    },
856                },
857                texture_extent,
858            );
859
860            // Submit the copy command
861            render_queue.submit(std::iter::once(encoder.finish()));
862
863            // Queue for async processing
864            if let Ok(mut pending) = queue.0.lock() {
865                pending.push(PendingImageCapture {
866                    buffer: staging_buffer,
867                    width,
868                    height,
869                    padded_bytes_per_row: padded_bytes_per_row as u32,
870                });
871            }
872        }
873
874        if let Some(t0) = t0 {
875            eprintln!(
876                "[render_trace][node] ImageCopyDriver ms={:.3}",
877                t0.elapsed().as_secs_f64() * 1000.0
878            );
879        }
880
881        Ok(())
882    }
883}
884
885/// Extract ImageCopier components to render world
886fn extract_image_copiers(mut commands: Commands, query: Extract<Query<&ImageCopier>>) {
887    commands.insert_resource(ImageCopiers(query.iter().cloned().collect()));
888}
889
890/// Process completed image captures
891fn collect_image_captures(
892    queue: Res<PendingImageCaptureQueue>,
893    shared_rgba: Res<SharedRgbaBuffer>,
894    render_device: Res<RenderDevice>,
895) {
896    let trace = render_trace_enabled();
897    let t_sys = trace.then(std::time::Instant::now);
898
899    let pending_captures = {
900        let Ok(mut pending) = queue.0.lock() else {
901            return;
902        };
903        std::mem::take(&mut *pending)
904    };
905
906    if pending_captures.is_empty() {
907        if let Some(t0) = t_sys {
908            eprintln!(
909                "[render_trace][sys] collect_image_captures empty ms={:.3}",
910                t0.elapsed().as_secs_f64() * 1000.0
911            );
912        }
913        return;
914    }
915
916    let pending_count = pending_captures.len();
917
918    for pending in pending_captures {
919        let width = pending.width;
920        let height = pending.height;
921        let padded_bytes_per_row = pending.padded_bytes_per_row;
922        let buffer = pending.buffer;
923        let shared = shared_rgba.0.clone();
924
925        // Use blocking sync approach with device polling
926        let buffer_slice = buffer.slice(..);
927
928        // Request mapping
929        let (tx, rx) = std::sync::mpsc::channel();
930        buffer_slice.map_async(MapMode::Read, move |result| {
931            let _ = tx.send(result);
932        });
933
934        // Poll the device until mapping completes (with timeout)
935        let start = std::time::Instant::now();
936        let timeout = std::time::Duration::from_secs(10);
937        let mut poll_iters: u32 = 0;
938        loop {
939            render_device.poll(bevy::render::render_resource::Maintain::Poll);
940            poll_iters += 1;
941
942            if start.elapsed() > timeout {
943                eprintln!(
944                    "Warning: Buffer mapping timeout after {:?}",
945                    start.elapsed()
946                );
947                break;
948            }
949
950            match rx.try_recv() {
951                Ok(Ok(())) => {
952                    let data = buffer_slice.get_mapped_range();
953
954                    // Extract pixels with alignment handling
955                    let bytes_per_pixel = 4u32;
956                    let actual_row_bytes = (width * bytes_per_pixel) as usize;
957                    let padded_row_bytes = padded_bytes_per_row as usize;
958
959                    let mut rgba = Vec::with_capacity((width * height * 4) as usize);
960                    for y in 0..height as usize {
961                        let row_start = y * padded_row_bytes;
962                        rgba.extend_from_slice(&data[row_start..row_start + actual_row_bytes]);
963                    }
964
965                    drop(data);
966                    buffer.unmap();
967
968                    if let Ok(mut guard) = shared.lock() {
969                        *guard = Some((rgba, width, height));
970                    }
971                    break;
972                }
973                Ok(Err(e)) => {
974                    eprintln!("Failed to map image buffer: {:?}", e);
975                    break;
976                }
977                Err(std::sync::mpsc::TryRecvError::Empty) => {
978                    // Keep polling
979                    std::thread::sleep(std::time::Duration::from_millis(1));
980                }
981                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
982                    eprintln!("Image buffer mapping channel disconnected");
983                    break;
984                }
985            }
986        }
987
988        if trace {
989            eprintln!(
990                "[render_trace][sys] collect_image_captures mapping_wait poll_iters={} ms={:.3}",
991                poll_iters,
992                start.elapsed().as_secs_f64() * 1000.0
993            );
994        }
995    }
996
997    if let Some(t0) = t_sys {
998        eprintln!(
999            "[render_trace][sys] collect_image_captures done pending={} ms={:.3}",
1000            pending_count,
1001            t0.elapsed().as_secs_f64() * 1000.0
1002        );
1003    }
1004}
1005
1006/// Plugin for headless image copy
1007struct ImageCopyPlugin {
1008    shared_rgba: SharedRgbaBuffer,
1009}
1010
1011impl Plugin for ImageCopyPlugin {
1012    fn build(&self, app: &mut App) {
1013        use bevy::render::render_graph::RenderGraph;
1014
1015        app.insert_resource(self.shared_rgba.clone());
1016
1017        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
1018            return;
1019        };
1020
1021        render_app.insert_resource(self.shared_rgba.clone());
1022        render_app.init_resource::<ImageCopiers>();
1023        render_app.init_resource::<PendingImageCaptureQueue>();
1024
1025        render_app.add_systems(ExtractSchedule, extract_image_copiers);
1026        render_app.add_systems(Render, collect_image_captures.in_set(RenderSet::Cleanup));
1027
1028        // Add image copy node to render graph (runs after camera driver)
1029        let mut graph = render_app.world_mut().resource_mut::<RenderGraph>();
1030        graph.add_node(ImageCopyLabel, ImageCopyDriver);
1031        graph.add_node_edge(bevy::render::graph::CameraDriverLabel, ImageCopyLabel);
1032    }
1033}
1034
1035// ============================================================================
1036// Render Request and Components
1037// ============================================================================
1038
1039/// Configuration passed to the Bevy app
1040#[derive(Resource, Clone)]
1041struct RenderRequest {
1042    mesh_path: String,
1043    texture_path: String,
1044    camera_transform: Transform,
1045    object_rotation: ObjectRotation,
1046    config: RenderConfig,
1047}
1048
1049/// Marker for the rendered object
1050#[derive(Component)]
1051struct RenderedObject;
1052
1053/// Marker for the render camera
1054#[derive(Component)]
1055struct RenderCamera;
1056
1057/// Handle for the loaded texture
1058#[derive(Resource)]
1059struct LoadedTexture(Handle<Image>);
1060
1061/// Handle for the loaded scene
1062#[derive(Resource)]
1063struct LoadedScene(Handle<Scene>);
1064
1065/// Shared output for extracting render results
1066#[derive(Resource, Clone)]
1067struct SharedOutput(Arc<Mutex<Option<RenderOutput>>>);
1068
1069/// Handle for the render target image
1070#[derive(Resource)]
1071#[allow(dead_code)]
1072struct RenderTargetImage(Handle<Image>);
1073
1074/// Tracks progress for a homogeneous batch of viewpoints rendered in one app.
1075#[derive(Resource)]
1076struct HeadlessBatchSequence {
1077    viewpoints: Vec<Transform>,
1078    current_index: usize,
1079    outputs: Vec<RenderOutput>,
1080    warmup_frames_remaining: u32,
1081    done: bool,
1082}
1083
1084impl HeadlessBatchSequence {
1085    fn new(viewpoints: Vec<Transform>) -> Self {
1086        let capacity = viewpoints.len();
1087        Self {
1088            viewpoints,
1089            current_index: 0,
1090            outputs: Vec::with_capacity(capacity),
1091            warmup_frames_remaining: 0,
1092            done: capacity == 0,
1093        }
1094    }
1095
1096    fn current_viewpoint(&self) -> Option<Transform> {
1097        self.viewpoints.get(self.current_index).cloned()
1098    }
1099}
1100
1101/// Perform headless rendering of a YCB object.
1102///
1103/// This uses true headless GPU rendering via `RenderTarget::Image`, which does NOT
1104/// require any window surfaces. This should work on WSL2 and other environments
1105/// without display servers.
1106///
1107/// Note: Bevy's App::run() does not return cleanly. A watchdog thread monitors
1108/// for results and terminates the process once the render is complete.
1109#[allow(dead_code)]
1110pub fn render_headless(
1111    object_dir: &Path,
1112    camera_transform: &Transform,
1113    object_rotation: &ObjectRotation,
1114    config: &RenderConfig,
1115) -> Result<RenderOutput, RenderError> {
1116    // Canonicalize paths so Bevy's asset server can find them regardless of
1117    // caller working directory. Relative paths like "../../ycb" pass the
1118    // exists() check but Bevy resolves assets against its own root.
1119    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1120        RenderError::RenderFailed(format!(
1121            "Cannot canonicalize object directory {}: {}",
1122            object_dir.display(),
1123            e
1124        ))
1125    })?;
1126    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1127    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1128
1129    if !mesh_path.exists() {
1130        return Err(RenderError::MeshNotFound(mesh_path.display().to_string()));
1131    }
1132    if !texture_path.exists() {
1133        return Err(RenderError::TextureNotFound(
1134            texture_path.display().to_string(),
1135        ));
1136    }
1137
1138    let request = RenderRequest {
1139        mesh_path: mesh_path.display().to_string(),
1140        texture_path: texture_path.display().to_string(),
1141        camera_transform: *camera_transform,
1142        object_rotation: object_rotation.clone(),
1143        config: config.clone(),
1144    };
1145
1146    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
1147    let output_clone = shared_output.clone();
1148
1149    // Shared buffer for RGBA data from headless render target
1150    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1151
1152    // Shared buffer for depth readback
1153    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1154
1155    // Create a temp file path for fallback output serialization
1156    let temp_path =
1157        std::env::temp_dir().join(format!("bevy_sensor_render_{}.bin", std::process::id()));
1158
1159    // Spawn watchdog thread that monitors for timeout (don't exit - let Bevy exit gracefully)
1160    let output_poll_for_timeout = shared_output.clone();
1161    std::thread::spawn(move || {
1162        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1163        let start = std::time::Instant::now();
1164        let poll_interval = std::time::Duration::from_millis(100);
1165
1166        loop {
1167            // Check if we have a result
1168            if let Ok(guard) = output_poll_for_timeout.0.lock() {
1169                if guard.is_some() {
1170                    // Output is ready, Bevy will exit via AppExit event
1171                    return; // Exit watchdog thread, Bevy will handle exit
1172                }
1173            }
1174
1175            if start.elapsed() > timeout {
1176                eprintln!(
1177                    "Error: Render timeout after {} seconds",
1178                    RENDER_TIMEOUT_SECS
1179                );
1180                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
1181                // Force exit on timeout (this is a failure case)
1182                std::process::exit(1);
1183            }
1184
1185            std::thread::sleep(poll_interval);
1186        }
1187    });
1188
1189    // Run Bevy app with HEADLESS configuration (no window surfaces!)
1190    // Uses ScheduleRunnerPlugin instead of WinitPlugin
1191    build_headless_app(request, output_clone, shared_rgba, shared_depth).run();
1192
1193    // App::run() returned - check shared_output for result
1194    if let Ok(guard) = shared_output.0.lock() {
1195        if let Some(output) = guard.as_ref() {
1196            return Ok(output.clone());
1197        }
1198    }
1199
1200    // Fallback: try to read from temp file (for legacy compatibility)
1201    if temp_path.exists() {
1202        if let Ok(output) = read_output_from_file(&temp_path) {
1203            let _ = std::fs::remove_file(&temp_path);
1204            return Ok(output);
1205        }
1206    }
1207
1208    Err(RenderError::RenderFailed(
1209        "Render did not complete".to_string(),
1210    ))
1211}
1212
1213/// Render a homogeneous sequence of viewpoints in a single headless Bevy app.
1214///
1215/// All captures share the same object, object rotation, and render configuration.
1216/// This is the fast path used by the batch API for episode-style workloads.
1217pub fn render_headless_sequence(
1218    object_dir: &Path,
1219    viewpoints: &[Transform],
1220    object_rotation: &ObjectRotation,
1221    config: &RenderConfig,
1222) -> Result<Vec<RenderOutput>, RenderError> {
1223    if viewpoints.is_empty() {
1224        return Ok(Vec::new());
1225    }
1226
1227    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1228        RenderError::RenderFailed(format!(
1229            "Cannot canonicalize object directory {}: {}",
1230            object_dir.display(),
1231            e
1232        ))
1233    })?;
1234    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1235    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1236
1237    if !mesh_path.exists() {
1238        return Err(RenderError::MeshNotFound(mesh_path.display().to_string()));
1239    }
1240    if !texture_path.exists() {
1241        return Err(RenderError::TextureNotFound(
1242            texture_path.display().to_string(),
1243        ));
1244    }
1245
1246    let request = RenderRequest {
1247        mesh_path: mesh_path.display().to_string(),
1248        texture_path: texture_path.display().to_string(),
1249        camera_transform: viewpoints[0],
1250        object_rotation: object_rotation.clone(),
1251        config: config.clone(),
1252    };
1253
1254    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1255    let rgba_clone = shared_rgba.clone();
1256
1257    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1258    let depth_clone = shared_depth.clone();
1259
1260    let mut app = App::new();
1261    app.add_plugins(
1262        DefaultPlugins
1263            .set(WindowPlugin {
1264                primary_window: None,
1265                exit_condition: ExitCondition::DontExit,
1266                ..default()
1267            })
1268            .disable::<bevy::winit::WinitPlugin>()
1269            .disable::<LogPlugin>()
1270            .disable::<TerminalCtrlCHandlerPlugin>(),
1271    )
1272    .add_plugins(ObjPlugin)
1273    .add_plugins(ImageCopyPlugin {
1274        shared_rgba: rgba_clone,
1275    })
1276    .add_plugins(DepthReadbackPlugin {
1277        shared_depth: depth_clone,
1278        near: config.near_plane,
1279        far: config.far_plane,
1280    })
1281    .insert_resource(request)
1282    .insert_resource(shared_rgba)
1283    .insert_resource(HeadlessBatchSequence::new(viewpoints.to_vec()))
1284    .init_resource::<RenderState>()
1285    .add_systems(Startup, setup_headless_scene)
1286    .add_systems(
1287        Update,
1288        (
1289            check_assets_loaded,
1290            apply_materials,
1291            tick_headless_batch_warmup,
1292            request_headless_capture,
1293            check_headless_capture_ready,
1294            extract_and_continue_headless_batch,
1295        )
1296            .chain(),
1297    );
1298
1299    // Manual app.update() loops do not run plugin finish/cleanup hooks automatically.
1300    // Bevy's screenshot plugin inserts CapturedScreenshots during finish(), so run the
1301    // normal startup phases before driving the headless batch loop ourselves.
1302    let trace_outer = render_trace_enabled();
1303    let t_finish = std::time::Instant::now();
1304    app.finish();
1305    let finish_ms = t_finish.elapsed().as_secs_f64() * 1000.0;
1306    let t_cleanup = std::time::Instant::now();
1307    app.cleanup();
1308    let cleanup_ms = t_cleanup.elapsed().as_secs_f64() * 1000.0;
1309    if trace_outer {
1310        eprintln!(
1311            "[render_trace][coldinit] app.finish ms={:.3} app.cleanup ms={:.3}",
1312            finish_ms, cleanup_ms
1313        );
1314    }
1315
1316    let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1317    let start = std::time::Instant::now();
1318
1319    let trace = std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok();
1320    let mut update_idx: u32 = 0;
1321    let mut last_completed_outputs: usize = 0;
1322    let mut viewpoint_start = std::time::Instant::now();
1323
1324    loop {
1325        if start.elapsed() > timeout {
1326            return Err(RenderError::RenderTimeout {
1327                duration_secs: RENDER_TIMEOUT_SECS,
1328            });
1329        }
1330
1331        let update_start = std::time::Instant::now();
1332        app.update();
1333        let update_elapsed_ms = update_start.elapsed().as_secs_f64() * 1000.0;
1334
1335        if trace {
1336            let batch = app.world().resource::<HeadlessBatchSequence>();
1337            let warmup = batch.warmup_frames_remaining;
1338            let current = batch.current_index;
1339            let completed = batch.outputs.len();
1340            let vp_ms = viewpoint_start.elapsed().as_secs_f64() * 1000.0;
1341            eprintln!(
1342                "[render_trace] update={update_idx} vp={current} warmup={warmup} \
1343                 completed={completed} update_ms={update_elapsed_ms:.2} vp_ms={vp_ms:.2}"
1344            );
1345            if completed > last_completed_outputs {
1346                eprintln!(
1347                    "[render_trace] viewpoint {} finished in {:.2} ms",
1348                    completed - 1,
1349                    vp_ms
1350                );
1351                last_completed_outputs = completed;
1352                viewpoint_start = std::time::Instant::now();
1353            }
1354        }
1355
1356        update_idx += 1;
1357
1358        if app.world().resource::<HeadlessBatchSequence>().done {
1359            break;
1360        }
1361    }
1362
1363    if trace {
1364        eprintln!(
1365            "[render_trace] total_wall_ms={:.2} updates={update_idx} viewpoints={}",
1366            start.elapsed().as_secs_f64() * 1000.0,
1367            viewpoints.len()
1368        );
1369    }
1370
1371    let mut batch = app.world_mut().resource_mut::<HeadlessBatchSequence>();
1372    if batch.outputs.len() != viewpoints.len() {
1373        return Err(RenderError::RenderFailed(format!(
1374            "Batch render produced {} outputs for {} viewpoints",
1375            batch.outputs.len(),
1376            viewpoints.len()
1377        )));
1378    }
1379
1380    Ok(std::mem::take(&mut batch.outputs))
1381}
1382
1383/// Assemble the shared single-render headless Bevy app.
1384fn build_headless_app(
1385    request: RenderRequest,
1386    shared_output: SharedOutput,
1387    shared_rgba: SharedRgbaBuffer,
1388    shared_depth: SharedDepthBuffer,
1389) -> App {
1390    let near = request.config.near_plane;
1391    let far = request.config.far_plane;
1392
1393    let mut app = App::new();
1394    app.add_plugins(
1395        DefaultPlugins
1396            .set(WindowPlugin {
1397                primary_window: None,
1398                exit_condition: ExitCondition::DontExit,
1399                ..default()
1400            })
1401            .disable::<bevy::winit::WinitPlugin>()
1402            .disable::<LogPlugin>()
1403            .disable::<TerminalCtrlCHandlerPlugin>(),
1404    )
1405    .add_plugins(ScheduleRunnerPlugin::run_loop(Duration::from_secs_f64(
1406        1.0 / 60.0,
1407    )))
1408    .add_plugins(ObjPlugin)
1409    .add_plugins(ImageCopyPlugin {
1410        shared_rgba: shared_rgba.clone(),
1411    })
1412    .add_plugins(DepthReadbackPlugin {
1413        shared_depth,
1414        near,
1415        far,
1416    })
1417    .insert_resource(request)
1418    .insert_resource(shared_output)
1419    .insert_resource(shared_rgba)
1420    .init_resource::<RenderState>()
1421    .add_systems(Startup, setup_headless_scene)
1422    .add_systems(
1423        Update,
1424        (
1425            check_assets_loaded,
1426            apply_materials,
1427            request_headless_capture,
1428            check_headless_capture_ready,
1429            extract_and_exit_headless,
1430        )
1431            .chain(),
1432    );
1433    app
1434}
1435
1436/// Serialize RenderOutput to bytes for IPC (used by subprocess mode)
1437#[allow(dead_code)]
1438fn serialize_output(output: &RenderOutput) -> Vec<u8> {
1439    let mut data = Vec::new();
1440
1441    // Header: width, height, rgba_len, depth_len
1442    data.extend_from_slice(&output.width.to_le_bytes());
1443    data.extend_from_slice(&output.height.to_le_bytes());
1444    data.extend_from_slice(&(output.rgba.len() as u32).to_le_bytes());
1445    data.extend_from_slice(&(output.depth.len() as u32).to_le_bytes());
1446
1447    // RGBA data
1448    data.extend_from_slice(&output.rgba);
1449
1450    // Depth data (as f64 bytes for TBP precision)
1451    for d in &output.depth {
1452        data.extend_from_slice(&d.to_le_bytes());
1453    }
1454
1455    // Intrinsics (f64 for TBP precision)
1456    data.extend_from_slice(&output.intrinsics.focal_length[0].to_le_bytes());
1457    data.extend_from_slice(&output.intrinsics.focal_length[1].to_le_bytes());
1458    data.extend_from_slice(&output.intrinsics.principal_point[0].to_le_bytes());
1459    data.extend_from_slice(&output.intrinsics.principal_point[1].to_le_bytes());
1460    data.extend_from_slice(&output.intrinsics.image_size[0].to_le_bytes());
1461    data.extend_from_slice(&output.intrinsics.image_size[1].to_le_bytes());
1462
1463    // Camera transform (translation + rotation quaternion)
1464    let t = output.camera_transform.translation;
1465    let r = output.camera_transform.rotation;
1466    data.extend_from_slice(&t.x.to_le_bytes());
1467    data.extend_from_slice(&t.y.to_le_bytes());
1468    data.extend_from_slice(&t.z.to_le_bytes());
1469    data.extend_from_slice(&r.x.to_le_bytes());
1470    data.extend_from_slice(&r.y.to_le_bytes());
1471    data.extend_from_slice(&r.z.to_le_bytes());
1472    data.extend_from_slice(&r.w.to_le_bytes());
1473
1474    // Object rotation (f64)
1475    let or = &output.object_rotation;
1476    data.extend_from_slice(&or.pitch.to_le_bytes());
1477    data.extend_from_slice(&or.yaw.to_le_bytes());
1478    data.extend_from_slice(&or.roll.to_le_bytes());
1479
1480    data
1481}
1482
1483/// Read RenderOutput from serialized file
1484fn read_output_from_file(path: &std::path::Path) -> Result<RenderOutput, RenderError> {
1485    let mut file = File::open(path).map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1486    let mut data = Vec::new();
1487    file.read_to_end(&mut data)
1488        .map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1489
1490    let mut cursor = 0;
1491
1492    let read_u32 = |data: &[u8], cursor: &mut usize| -> u32 {
1493        let val = u32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1494        *cursor += 4;
1495        val
1496    };
1497
1498    let read_f32 = |data: &[u8], cursor: &mut usize| -> f32 {
1499        let val = f32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1500        *cursor += 4;
1501        val
1502    };
1503
1504    let read_f64 = |data: &[u8], cursor: &mut usize| -> f64 {
1505        let val = f64::from_le_bytes(data[*cursor..*cursor + 8].try_into().unwrap());
1506        *cursor += 8;
1507        val
1508    };
1509
1510    let width = read_u32(&data, &mut cursor);
1511    let height = read_u32(&data, &mut cursor);
1512    let rgba_len = read_u32(&data, &mut cursor) as usize;
1513    let depth_len = read_u32(&data, &mut cursor) as usize;
1514
1515    let rgba = data[cursor..cursor + rgba_len].to_vec();
1516    cursor += rgba_len;
1517
1518    // Depth data (f64 for TBP precision)
1519    let mut depth = Vec::with_capacity(depth_len);
1520    for _ in 0..depth_len {
1521        depth.push(read_f64(&data, &mut cursor));
1522    }
1523
1524    // Intrinsics (f64 for TBP precision)
1525    let focal_length = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1526    let principal_point = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1527    let image_size = [read_u32(&data, &mut cursor), read_u32(&data, &mut cursor)];
1528
1529    // Camera transform (f32 for Bevy compatibility)
1530    let tx = read_f32(&data, &mut cursor);
1531    let ty = read_f32(&data, &mut cursor);
1532    let tz = read_f32(&data, &mut cursor);
1533    let rx = read_f32(&data, &mut cursor);
1534    let ry = read_f32(&data, &mut cursor);
1535    let rz = read_f32(&data, &mut cursor);
1536    let rw = read_f32(&data, &mut cursor);
1537
1538    // Object rotation (f64)
1539    let pitch = read_f64(&data, &mut cursor);
1540    let yaw = read_f64(&data, &mut cursor);
1541    let roll = read_f64(&data, &mut cursor);
1542
1543    Ok(RenderOutput {
1544        rgba,
1545        depth,
1546        width,
1547        height,
1548        intrinsics: crate::CameraIntrinsics {
1549            focal_length,
1550            principal_point,
1551            image_size,
1552        },
1553        camera_transform: Transform {
1554            translation: Vec3::new(tx, ty, tz),
1555            rotation: Quat::from_xyzw(rx, ry, rz, rw),
1556            scale: Vec3::ONE,
1557        },
1558        object_rotation: ObjectRotation { pitch, yaw, roll },
1559        target_point: Vec3::ZERO,
1560        targeting_policy: TargetingPolicy::Origin,
1561    })
1562}
1563
1564/// Setup the scene with camera, lighting, and object
1565#[allow(dead_code)]
1566fn setup_scene(
1567    mut commands: Commands,
1568    asset_server: Res<AssetServer>,
1569    request: Res<RenderRequest>,
1570    mut _materials: ResMut<Assets<StandardMaterial>>,
1571) {
1572    // Camera with depth prepass (Bevy 0.15+ uses Camera3d component)
1573    // Disable MSAA for depth readback compatibility (can't copy from multisampled texture)
1574    // Apply FOV from RenderConfig so the projection matches TBP's camera intrinsics.
1575    let fov = request.config.fov_radians();
1576    commands.spawn((
1577        Camera3d::default(),
1578        Camera {
1579            hdr: true,
1580            ..default()
1581        },
1582        Projection::Perspective(PerspectiveProjection {
1583            fov,
1584            near: request.config.near_plane,
1585            far: request.config.far_plane,
1586            ..default()
1587        }),
1588        Msaa::Off,
1589        request.camera_transform,
1590        Tonemapping::None, // Accurate colors for software rendering
1591        DepthPrepass,
1592        NormalPrepass,
1593        RenderCamera,
1594    ));
1595
1596    // Ambient light (from config)
1597    let lighting = &request.config.lighting;
1598    commands.insert_resource(AmbientLight {
1599        color: Color::WHITE,
1600        brightness: lighting.ambient_brightness,
1601    });
1602
1603    // Key light (from config) - Bevy 0.15+ uses PointLight component directly
1604    if lighting.key_light_intensity > 0.0 {
1605        commands.spawn((
1606            PointLight {
1607                intensity: lighting.key_light_intensity,
1608                shadows_enabled: lighting.shadows_enabled,
1609                ..default()
1610            },
1611            Transform::from_xyz(
1612                lighting.key_light_position[0],
1613                lighting.key_light_position[1],
1614                lighting.key_light_position[2],
1615            ),
1616        ));
1617    }
1618
1619    // Fill light (from config)
1620    if lighting.fill_light_intensity > 0.0 {
1621        commands.spawn((
1622            PointLight {
1623                intensity: lighting.fill_light_intensity,
1624                shadows_enabled: lighting.shadows_enabled,
1625                ..default()
1626            },
1627            Transform::from_xyz(
1628                lighting.fill_light_position[0],
1629                lighting.fill_light_position[1],
1630                lighting.fill_light_position[2],
1631            ),
1632        ));
1633    }
1634
1635    // Load the scene
1636    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
1637    commands.insert_resource(LoadedScene(scene_handle.clone()));
1638
1639    // Load the texture
1640    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
1641    commands.insert_resource(LoadedTexture(texture_handle.clone()));
1642
1643    // Create material with texture (will be applied later)
1644    let _material = _materials.add(StandardMaterial {
1645        base_color_texture: Some(texture_handle),
1646        unlit: true,
1647        ..default()
1648    });
1649
1650    // Spawn the scene with rotation (Bevy 0.15+ uses SceneRoot)
1651    commands.spawn((
1652        SceneRoot(scene_handle),
1653        Transform::from_rotation(request.object_rotation.to_quat()),
1654        RenderedObject,
1655    ));
1656
1657    println!("Scene setup complete");
1658}
1659
1660/// Check if assets are loaded
1661fn check_assets_loaded(
1662    mut state: ResMut<RenderState>,
1663    asset_server: Res<AssetServer>,
1664    scene: Option<Res<LoadedScene>>,
1665    texture: Option<Res<LoadedTexture>>,
1666) {
1667    let trace = render_trace_enabled();
1668    let was_scene_loaded = state.scene_loaded;
1669    let was_texture_loaded = state.texture_loaded;
1670
1671    state.frame_count += 1;
1672
1673    if state.scene_loaded && state.texture_loaded {
1674        return;
1675    }
1676
1677    if let Some(scene) = scene {
1678        match asset_server.get_load_state(&scene.0) {
1679            Some(LoadState::Loaded) => {
1680                state.scene_loaded = true;
1681            }
1682            Some(LoadState::Failed(_)) => {}
1683            _ => {}
1684        }
1685    }
1686
1687    if let Some(texture) = texture {
1688        match asset_server.get_load_state(&texture.0) {
1689            Some(LoadState::Loaded) => {
1690                state.texture_loaded = true;
1691            }
1692            Some(LoadState::Failed(_)) => {}
1693            _ => {}
1694        }
1695    }
1696
1697    if trace {
1698        if !was_scene_loaded && state.scene_loaded {
1699            eprintln!(
1700                "[render_trace][coldinit] scene_loaded frame_count={}",
1701                state.frame_count
1702            );
1703        }
1704        if !was_texture_loaded && state.texture_loaded {
1705            eprintln!(
1706                "[render_trace][coldinit] texture_loaded frame_count={}",
1707                state.frame_count
1708            );
1709        }
1710    }
1711}
1712
1713/// Apply materials to loaded meshes
1714fn apply_materials(
1715    mut state: ResMut<RenderState>,
1716    texture: Option<Res<LoadedTexture>>,
1717    mut materials: ResMut<Assets<StandardMaterial>>,
1718    // Bevy 0.15+: Use MeshMaterial3d instead of Handle<StandardMaterial>
1719    mut mesh_query: Query<&mut MeshMaterial3d<StandardMaterial>, With<Mesh3d>>,
1720) {
1721    if !state.scene_loaded || !state.texture_loaded || state.capture_ready {
1722        return;
1723    }
1724
1725    state.frame_count += 1;
1726
1727    let Some(tex) = texture else { return };
1728
1729    if !state.materials_applied {
1730        // The scene hierarchy is instantiated asynchronously after the asset
1731        // load event fires; wait until mesh entities exist before applying.
1732        if mesh_query.is_empty() {
1733            return;
1734        }
1735
1736        let textured_material = materials.add(StandardMaterial {
1737            base_color_texture: Some(tex.0.clone()),
1738            unlit: true,
1739            ..default()
1740        });
1741
1742        for mut mat in mesh_query.iter_mut() {
1743            mat.0 = textured_material.clone();
1744        }
1745
1746        state.materials_applied = true;
1747        state.materials_applied_frame = state.frame_count;
1748    }
1749
1750    // Two frames after material application is enough for the render graph
1751    // to pick up the new material on native GPU. The previous 60-frame gate
1752    // was a legacy llvmpipe software-rendering cushion.
1753    if state.frame_count >= state.materials_applied_frame + 2 {
1754        let was_ready = state.capture_ready;
1755        state.capture_ready = true;
1756        if render_trace_enabled() && !was_ready {
1757            eprintln!(
1758                "[render_trace][coldinit] capture_ready frame_count={}",
1759                state.frame_count
1760            );
1761        }
1762    }
1763}
1764
1765/// Request a screenshot capture (Bevy 0.15+ uses Screenshot entity + observer)
1766#[allow(dead_code)]
1767fn request_screenshot(
1768    mut commands: Commands,
1769    mut state: ResMut<RenderState>,
1770    shared_image: Res<SharedImageBuffer>,
1771    mut depth_request: ResMut<DepthCaptureRequest>,
1772) {
1773    if !state.capture_ready || state.screenshot_requested {
1774        return;
1775    }
1776
1777    // Clone the Arc for the observer closure
1778    let image_buffer = shared_image.0.clone();
1779
1780    // Also request depth capture
1781    depth_request.requested = true;
1782    println!("Depth capture requested");
1783
1784    // Spawn Screenshot entity with observer (Bevy 0.15+ API)
1785    println!("Requesting screenshot via Screenshot entity");
1786    commands.spawn(Screenshot::primary_window()).observe(
1787        move |trigger: Trigger<ScreenshotCaptured>| {
1788            // ScreenshotCaptured derefs to Image
1789            let image: &Image = trigger.event();
1790
1791            // Get dimensions
1792            let width = image.texture_descriptor.size.width;
1793            let height = image.texture_descriptor.size.height;
1794
1795            // Get raw image data - Bevy 0.15 Image.data is Vec<u8>
1796            let rgba_data = image.data.clone();
1797
1798            // Store in shared buffer
1799            if let Ok(mut guard) = image_buffer.lock() {
1800                *guard = Some((rgba_data, width, height));
1801            }
1802        },
1803    );
1804
1805    state.screenshot_requested = true;
1806    println!("Screenshot requested");
1807}
1808
1809/// Check if screenshot callback has completed
1810#[allow(dead_code)]
1811fn check_screenshot_ready(
1812    mut state: ResMut<RenderState>,
1813    shared_image: Res<SharedImageBuffer>,
1814    shared_depth: Res<SharedDepthBuffer>,
1815    request: Res<RenderRequest>,
1816) {
1817    if !state.screenshot_requested || state.captured {
1818        return;
1819    }
1820
1821    // Increment frame count while waiting for capture
1822    state.frame_count += 1;
1823
1824    // Check if RGBA callback has written data
1825    let rgba_ready = if let Ok(guard) = shared_image.0.lock() {
1826        if let Some((rgba_data, width, height)) = guard.as_ref() {
1827            if state.rgba_data.is_none() {
1828                state.rgba_data = Some(rgba_data.clone());
1829                state.image_width = *width;
1830                state.image_height = *height;
1831            }
1832            true
1833        } else {
1834            false
1835        }
1836    } else {
1837        false
1838    };
1839
1840    // Check if depth readback has completed
1841    let depth_ready = if let Ok(guard) = shared_depth.0.lock() {
1842        if let Some((depth_data, _width, _height)) = guard.as_ref() {
1843            if state.depth_data.is_none() {
1844                state.depth_data = Some(depth_data.clone());
1845            }
1846            true
1847        } else {
1848            false
1849        }
1850    } else {
1851        false
1852    };
1853
1854    // If depth readback failed or is taking too long, fall back to placeholder
1855    // (This allows graceful degradation on systems where depth readback fails)
1856    if rgba_ready && !depth_ready && state.frame_count > 60 {
1857        let camera_dist = request.camera_transform.translation.length() as f64;
1858        let pixel_count = (state.image_width * state.image_height) as usize;
1859        state.depth_data = Some(vec![camera_dist; pixel_count]);
1860    }
1861
1862    // Mark as captured when both RGBA and depth are ready
1863    if state.rgba_data.is_some() && state.depth_data.is_some() {
1864        state.captured = true;
1865    }
1866}
1867
1868/// Extract results and exit
1869#[allow(dead_code)]
1870fn extract_and_exit(
1871    mut state: ResMut<RenderState>,
1872    request: Res<RenderRequest>,
1873    shared_output: Res<SharedOutput>,
1874    mut commands: Commands,
1875    windows: Query<Entity, With<bevy::window::Window>>,
1876) {
1877    // Handle delayed exit after closing window
1878    if state.exit_requested {
1879        state.exit_frame_count += 1;
1880        // After a few frames with no window, Bevy should exit
1881        return;
1882    }
1883
1884    if !state.captured {
1885        return;
1886    }
1887
1888    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
1889        // Use actual captured dimensions (may differ from config if window was resized)
1890        let width = state.image_width;
1891        let height = state.image_height;
1892
1893        // Compute intrinsics from the same TBP zoom formula as the camera projection.
1894        let intrinsics = request.config.intrinsics_for_size(width, height);
1895
1896        let output = RenderOutput {
1897            rgba: rgba.clone(),
1898            depth: depth.clone(),
1899            width,
1900            height,
1901            intrinsics,
1902            camera_transform: request.camera_transform,
1903            object_rotation: request.object_rotation.clone(),
1904            target_point: Vec3::ZERO,
1905            targeting_policy: TargetingPolicy::Origin,
1906        };
1907
1908        if let Ok(mut guard) = shared_output.0.lock() {
1909            *guard = Some(output);
1910            drop(guard); // Release lock immediately
1911
1912            // Small delay to allow watchdog to detect output before window close
1913            std::thread::sleep(std::time::Duration::from_millis(200));
1914        }
1915
1916        // Close all windows to trigger app exit
1917        // eprintln!("Closing windows to trigger exit...");
1918        for window_entity in windows.iter() {
1919            commands.entity(window_entity).despawn();
1920        }
1921        state.exit_requested = true;
1922    }
1923}
1924
1925// ============================================================================
1926// Headless Rendering Systems (no window surfaces)
1927// ============================================================================
1928
1929/// Setup the scene for headless rendering with RenderTarget::Image
1930fn setup_headless_scene(
1931    mut commands: Commands,
1932    mut images: ResMut<Assets<Image>>,
1933    asset_server: Res<AssetServer>,
1934    request: Res<RenderRequest>,
1935    mut _materials: ResMut<Assets<StandardMaterial>>,
1936) {
1937    let trace = render_trace_enabled();
1938    let t0 = trace.then(std::time::Instant::now);
1939
1940    #[cfg(test)]
1941    HEADLESS_SCENE_SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
1942
1943    let width = request.config.width;
1944    let height = request.config.height;
1945
1946    // Create render target image with proper texture usages
1947    let size = Extent3d {
1948        width,
1949        height,
1950        depth_or_array_layers: 1,
1951    };
1952
1953    let mut render_target_image = Image::new_fill(
1954        size,
1955        TextureDimension::D2,
1956        &[0, 0, 0, 255], // Initialize with opaque black
1957        TextureFormat::Rgba8UnormSrgb,
1958        RenderAssetUsages::default(),
1959    );
1960
1961    // Add required texture usages for headless rendering
1962    render_target_image.texture_descriptor.usage =
1963        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
1964
1965    let render_target_handle = images.add(render_target_image);
1966
1967    // Store handle for later access
1968    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
1969
1970    // Camera rendering to the image texture (NO window!)
1971    let fov = request.config.fov_radians();
1972    commands.spawn((
1973        Camera3d::default(),
1974        Camera {
1975            hdr: true,
1976            target: RenderTarget::Image(render_target_handle.clone()),
1977            ..default()
1978        },
1979        Projection::Perspective(PerspectiveProjection {
1980            fov,
1981            near: request.config.near_plane,
1982            far: request.config.far_plane,
1983            ..default()
1984        }),
1985        Msaa::Off,
1986        request.camera_transform,
1987        Tonemapping::None,
1988        DepthPrepass,
1989        NormalPrepass,
1990        RenderCamera,
1991        // Add ImageCopier to trigger RGBA extraction
1992        ImageCopier {
1993            src_image: render_target_handle,
1994            enabled: false, // Will enable when ready to capture
1995        },
1996    ));
1997
1998    // Ambient light
1999    let lighting = &request.config.lighting;
2000    commands.insert_resource(AmbientLight {
2001        color: Color::WHITE,
2002        brightness: lighting.ambient_brightness,
2003    });
2004
2005    // Key light
2006    if lighting.key_light_intensity > 0.0 {
2007        commands.spawn((
2008            PointLight {
2009                intensity: lighting.key_light_intensity,
2010                shadows_enabled: lighting.shadows_enabled,
2011                ..default()
2012            },
2013            Transform::from_xyz(
2014                lighting.key_light_position[0],
2015                lighting.key_light_position[1],
2016                lighting.key_light_position[2],
2017            ),
2018        ));
2019    }
2020
2021    // Fill light
2022    if lighting.fill_light_intensity > 0.0 {
2023        commands.spawn((
2024            PointLight {
2025                intensity: lighting.fill_light_intensity,
2026                shadows_enabled: lighting.shadows_enabled,
2027                ..default()
2028            },
2029            Transform::from_xyz(
2030                lighting.fill_light_position[0],
2031                lighting.fill_light_position[1],
2032                lighting.fill_light_position[2],
2033            ),
2034        ));
2035    }
2036
2037    // Load the scene
2038    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
2039    commands.insert_resource(LoadedScene(scene_handle.clone()));
2040
2041    // Load the texture
2042    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
2043    commands.insert_resource(LoadedTexture(texture_handle.clone()));
2044
2045    // Create material with texture
2046    let _material = _materials.add(StandardMaterial {
2047        base_color_texture: Some(texture_handle),
2048        unlit: true,
2049        ..default()
2050    });
2051
2052    // Spawn the scene with rotation
2053    commands.spawn((
2054        SceneRoot(scene_handle),
2055        Transform::from_rotation(request.object_rotation.to_quat()),
2056        RenderedObject,
2057    ));
2058
2059    if let Some(t0) = t0 {
2060        eprintln!(
2061            "[render_trace][startup] setup_headless_scene ms={:.3}",
2062            t0.elapsed().as_secs_f64() * 1000.0
2063        );
2064    }
2065}
2066
2067/// Request capture for headless rendering (enable ImageCopier)
2068fn request_headless_capture(
2069    mut state: ResMut<RenderState>,
2070    mut depth_request: ResMut<DepthCaptureRequest>,
2071    mut query: Query<&mut ImageCopier>,
2072    batch: Option<Res<HeadlessBatchSequence>>,
2073) {
2074    let trace = render_trace_enabled();
2075    let t0 = trace.then(std::time::Instant::now);
2076
2077    if !state.capture_ready || state.screenshot_requested {
2078        if let Some(t0) = t0 {
2079            eprintln!(
2080                "[render_trace][sys] request_headless_capture skipped(gate) ms={:.3}",
2081                t0.elapsed().as_secs_f64() * 1000.0
2082            );
2083        }
2084        return;
2085    }
2086
2087    if batch
2088        .as_ref()
2089        .is_some_and(|batch| batch.warmup_frames_remaining > 0)
2090    {
2091        if let Some(t0) = t0 {
2092            eprintln!(
2093                "[render_trace][sys] request_headless_capture skipped(warmup) ms={:.3}",
2094                t0.elapsed().as_secs_f64() * 1000.0
2095            );
2096        }
2097        return;
2098    }
2099
2100    // Enable the ImageCopier to trigger RGBA extraction
2101    for mut copier in query.iter_mut() {
2102        copier.enabled = true;
2103    }
2104
2105    // Request depth capture
2106    depth_request.requested = true;
2107
2108    state.screenshot_requested = true;
2109
2110    if let Some(t0) = t0 {
2111        eprintln!(
2112            "[render_trace][sys] request_headless_capture requested ms={:.3}",
2113            t0.elapsed().as_secs_f64() * 1000.0
2114        );
2115    }
2116}
2117
2118/// Check if headless capture has completed
2119fn check_headless_capture_ready(
2120    mut state: ResMut<RenderState>,
2121    shared_rgba: Res<SharedRgbaBuffer>,
2122    shared_depth: Res<SharedDepthBuffer>,
2123    request: Res<RenderRequest>,
2124    mut query: Query<&mut ImageCopier>,
2125) {
2126    let trace = render_trace_enabled();
2127    let t0 = trace.then(std::time::Instant::now);
2128
2129    if !state.screenshot_requested || state.captured {
2130        if let Some(t0) = t0 {
2131            eprintln!(
2132                "[render_trace][sys] check_headless_capture_ready skipped(gate) ms={:.3}",
2133                t0.elapsed().as_secs_f64() * 1000.0
2134            );
2135        }
2136        return;
2137    }
2138
2139    state.frame_count += 1;
2140
2141    // Check if RGBA data is ready
2142    let rgba_ready = if let Ok(guard) = shared_rgba.0.lock() {
2143        if let Some((rgba_data, width, height)) = guard.as_ref() {
2144            if state.rgba_data.is_none() {
2145                state.rgba_data = Some(rgba_data.clone());
2146                state.image_width = *width;
2147                state.image_height = *height;
2148                // Disable further captures
2149                for mut copier in query.iter_mut() {
2150                    copier.enabled = false;
2151                }
2152            }
2153            true
2154        } else {
2155            false
2156        }
2157    } else {
2158        false
2159    };
2160
2161    // Check if depth data is ready
2162    let depth_ready = if let Ok(guard) = shared_depth.0.lock() {
2163        if let Some((depth_data, _width, _height)) = guard.as_ref() {
2164            if state.depth_data.is_none() {
2165                state.depth_data = Some(depth_data.clone());
2166            }
2167            true
2168        } else {
2169            false
2170        }
2171    } else {
2172        false
2173    };
2174
2175    // Fallback to placeholder depth after 10 extra frames if depth readback fails
2176    if rgba_ready && !depth_ready && state.frame_count > 70 {
2177        let camera_dist = request.camera_transform.translation.length() as f64;
2178        let pixel_count = (state.image_width * state.image_height) as usize;
2179        state.depth_data = Some(vec![camera_dist; pixel_count]);
2180    }
2181
2182    if state.rgba_data.is_some() && state.depth_data.is_some() {
2183        state.captured = true;
2184    }
2185
2186    if let Some(t0) = t0 {
2187        eprintln!(
2188            "[render_trace][sys] check_headless_capture_ready rgba_ready={} depth_ready={} captured={} frame_count={} ms={:.3}",
2189            rgba_ready,
2190            depth_ready,
2191            state.captured,
2192            state.frame_count,
2193            t0.elapsed().as_secs_f64() * 1000.0
2194        );
2195    }
2196}
2197
2198/// Extract results and exit for headless rendering
2199fn extract_and_exit_headless(
2200    mut state: ResMut<RenderState>,
2201    request: Res<RenderRequest>,
2202    shared_output: Res<SharedOutput>,
2203    mut app_exit: EventWriter<bevy::app::AppExit>,
2204    batch: Option<Res<HeadlessBatchSequence>>,
2205) {
2206    if batch.is_some() {
2207        return;
2208    }
2209
2210    if state.exit_requested {
2211        return;
2212    }
2213
2214    if !state.captured {
2215        return;
2216    }
2217
2218    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2219        let width = state.image_width;
2220        let height = state.image_height;
2221
2222        // Compute intrinsics from the same TBP zoom formula as the camera projection.
2223        let intrinsics = request.config.intrinsics_for_size(width, height);
2224
2225        let output = RenderOutput {
2226            rgba: rgba.clone(),
2227            depth: depth.clone(),
2228            width,
2229            height,
2230            intrinsics,
2231            camera_transform: request.camera_transform,
2232            object_rotation: request.object_rotation.clone(),
2233            target_point: Vec3::ZERO,
2234            targeting_policy: TargetingPolicy::Origin,
2235        };
2236
2237        if let Ok(mut guard) = shared_output.0.lock() {
2238            *guard = Some(output);
2239            drop(guard);
2240            std::thread::sleep(std::time::Duration::from_millis(200));
2241        }
2242
2243        // Send AppExit event (headless apps use this instead of closing windows)
2244        app_exit.send(bevy::app::AppExit::Success);
2245        state.exit_requested = true;
2246    }
2247}
2248
2249/// Advance the short post-camera-move warmup for homogeneous batch rendering.
2250fn tick_headless_batch_warmup(batch: Option<ResMut<HeadlessBatchSequence>>) {
2251    let Some(mut batch) = batch else {
2252        return;
2253    };
2254
2255    if batch.warmup_frames_remaining > 0 {
2256        batch.warmup_frames_remaining -= 1;
2257    }
2258}
2259
2260/// Extract one batch output and continue rendering the next viewpoint in the same app.
2261fn extract_and_continue_headless_batch(
2262    mut state: ResMut<RenderState>,
2263    request: Res<RenderRequest>,
2264    buffers: (Res<SharedRgbaBuffer>, Res<SharedDepthBuffer>),
2265    batch: Option<ResMut<HeadlessBatchSequence>>,
2266    mut camera_query: Query<&mut Transform, With<RenderCamera>>,
2267    mut depth_request: ResMut<DepthCaptureRequest>,
2268    mut image_copiers: Query<&mut ImageCopier>,
2269) {
2270    let trace = render_trace_enabled();
2271    let t0 = trace.then(std::time::Instant::now);
2272
2273    let (shared_rgba, shared_depth) = buffers;
2274    let Some(mut batch) = batch else {
2275        if let Some(t0) = t0 {
2276            eprintln!(
2277                "[render_trace][sys] extract_and_continue_headless_batch skipped(no_batch) ms={:.3}",
2278                t0.elapsed().as_secs_f64() * 1000.0
2279            );
2280        }
2281        return;
2282    };
2283
2284    if state.exit_requested || !state.captured || batch.done {
2285        if let Some(t0) = t0 {
2286            eprintln!(
2287                "[render_trace][sys] extract_and_continue_headless_batch skipped(gate) captured={} done={} ms={:.3}",
2288                state.captured,
2289                batch.done,
2290                t0.elapsed().as_secs_f64() * 1000.0
2291            );
2292        }
2293        return;
2294    }
2295
2296    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2297        let width = state.image_width;
2298        let height = state.image_height;
2299
2300        let intrinsics = request.config.intrinsics_for_size(width, height);
2301
2302        let output = RenderOutput {
2303            rgba: rgba.clone(),
2304            depth: depth.clone(),
2305            width,
2306            height,
2307            intrinsics,
2308            camera_transform: batch
2309                .current_viewpoint()
2310                .unwrap_or(request.camera_transform),
2311            object_rotation: request.object_rotation.clone(),
2312            target_point: Vec3::ZERO,
2313            targeting_policy: TargetingPolicy::Origin,
2314        };
2315        batch.outputs.push(output);
2316
2317        let next_index = batch.current_index + 1;
2318        if next_index >= batch.viewpoints.len() {
2319            batch.done = true;
2320            state.exit_requested = true;
2321            return;
2322        }
2323
2324        batch.current_index = next_index;
2325        batch.warmup_frames_remaining = BATCH_WARMUP_FRAMES;
2326
2327        if let Some(next_viewpoint) = batch.current_viewpoint() {
2328            for mut camera_transform in camera_query.iter_mut() {
2329                *camera_transform = next_viewpoint;
2330            }
2331        }
2332
2333        if let Ok(mut guard) = shared_rgba.0.lock() {
2334            *guard = None;
2335        }
2336        if let Ok(mut guard) = shared_depth.0.lock() {
2337            *guard = None;
2338        }
2339
2340        for mut copier in image_copiers.iter_mut() {
2341            copier.enabled = false;
2342        }
2343
2344        depth_request.requested = false;
2345        state.frame_count = 0;
2346        state.capture_ready = true;
2347        state.screenshot_requested = false;
2348        state.captured = false;
2349        state.rgba_data = None;
2350        state.depth_data = None;
2351        state.image_width = 0;
2352        state.image_height = 0;
2353
2354        if let Some(t0) = t0 {
2355            eprintln!(
2356                "[render_trace][sys] extract_and_continue_headless_batch extracted vp={} next={} done={} ms={:.3}",
2357                batch.current_index.saturating_sub(1),
2358                batch.current_index,
2359                batch.done,
2360                t0.elapsed().as_secs_f64() * 1000.0
2361            );
2362        }
2363    } else if let Some(t0) = t0 {
2364        eprintln!(
2365            "[render_trace][sys] extract_and_continue_headless_batch no_data ms={:.3}",
2366            t0.elapsed().as_secs_f64() * 1000.0
2367        );
2368    }
2369}
2370
2371// ============================================================================
2372// Persistent batch session (RenderSession)
2373//
2374// Amortizes wgpu device creation, Bevy app setup, and first-draw pipeline state
2375// object (PSO) compilation across multiple `render()` calls. Profile data (see
2376// issues #54 and #55) showed that on a 60-episode parity-gate, ~2.3s per episode
2377// lives in first-draw DX12 PSO compilation, totalling ~131s of 151s wall-clock.
2378// Keeping the `App` (and thus the `RenderDevice` and its PSO cache) alive across
2379// episodes recovers the bulk of that cost.
2380// ============================================================================
2381
2382/// Marker for the per-group scene entity so we can despawn it cleanly when the
2383/// next `RenderSession::render()` call swaps in a different object or rotation.
2384#[derive(Component)]
2385struct SessionScene;
2386
2387/// Session-persistent setup: render target image, camera (with prepass +
2388/// `ImageCopier`), ambient light, key + fill lights. Everything here lives for
2389/// the full lifetime of the `RenderSession`; per-group work (mesh/texture load,
2390/// scene entity spawn) happens outside Startup in `RenderSession::render()`.
2391fn setup_session_persistent_scene(
2392    mut commands: Commands,
2393    mut images: ResMut<Assets<Image>>,
2394    config: Res<SessionRenderConfig>,
2395) {
2396    let width = config.0.width;
2397    let height = config.0.height;
2398
2399    let size = Extent3d {
2400        width,
2401        height,
2402        depth_or_array_layers: 1,
2403    };
2404
2405    let mut render_target_image = Image::new_fill(
2406        size,
2407        TextureDimension::D2,
2408        &[0, 0, 0, 255],
2409        TextureFormat::Rgba8UnormSrgb,
2410        RenderAssetUsages::default(),
2411    );
2412    render_target_image.texture_descriptor.usage =
2413        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
2414
2415    let render_target_handle = images.add(render_target_image);
2416    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
2417
2418    let fov = config.0.fov_radians();
2419    commands.spawn((
2420        Camera3d::default(),
2421        Camera {
2422            hdr: true,
2423            target: RenderTarget::Image(render_target_handle.clone()),
2424            ..default()
2425        },
2426        Projection::Perspective(PerspectiveProjection {
2427            fov,
2428            near: config.0.near_plane,
2429            far: config.0.far_plane,
2430            ..default()
2431        }),
2432        Msaa::Off,
2433        Transform::default(),
2434        Tonemapping::None,
2435        DepthPrepass,
2436        NormalPrepass,
2437        RenderCamera,
2438        ImageCopier {
2439            src_image: render_target_handle,
2440            enabled: false,
2441        },
2442    ));
2443
2444    let lighting = &config.0.lighting;
2445    commands.insert_resource(AmbientLight {
2446        color: Color::WHITE,
2447        brightness: lighting.ambient_brightness,
2448    });
2449
2450    if lighting.key_light_intensity > 0.0 {
2451        commands.spawn((
2452            PointLight {
2453                intensity: lighting.key_light_intensity,
2454                shadows_enabled: lighting.shadows_enabled,
2455                ..default()
2456            },
2457            Transform::from_xyz(
2458                lighting.key_light_position[0],
2459                lighting.key_light_position[1],
2460                lighting.key_light_position[2],
2461            ),
2462        ));
2463    }
2464
2465    if lighting.fill_light_intensity > 0.0 {
2466        commands.spawn((
2467            PointLight {
2468                intensity: lighting.fill_light_intensity,
2469                shadows_enabled: lighting.shadows_enabled,
2470                ..default()
2471            },
2472            Transform::from_xyz(
2473                lighting.fill_light_position[0],
2474                lighting.fill_light_position[1],
2475                lighting.fill_light_position[2],
2476            ),
2477        ));
2478    }
2479}
2480
2481/// Resource carrying the `RenderConfig` that was fixed at session construction.
2482/// Used by `setup_session_persistent_scene` to size the render target.
2483#[derive(Resource)]
2484struct SessionRenderConfig(RenderConfig);
2485
2486/// Persistent batch render session. Keeps a Bevy `App` (and its `RenderDevice`
2487/// plus PSO cache) alive across multiple `render()` calls, amortizing per-episode
2488/// cold-init cost.
2489///
2490/// # Thread affinity
2491///
2492/// `RenderSession` must be created, used, and dropped on the same thread. It
2493/// holds a `bevy::App` which owns GPU resources that are not safe to move
2494/// across threads. The `!Send + !Sync` marker is enforced via
2495/// `PhantomData<*const ()>`.
2496///
2497/// # Config invariant
2498///
2499/// The `RenderConfig` (resolution, lighting, near/far, fov) is fixed at
2500/// `new()`. All `render()` calls must use requests whose `render_config`
2501/// matches; heterogeneous configs are rejected.
2502///
2503/// # Phase 1 limitation
2504///
2505/// Each `render()` call must contain homogeneous requests (same `object_dir`
2506/// and `object_rotation`). Heterogeneous calls return
2507/// `BatchRenderError::InvalidConfig`. Hold a single `RenderSession` and call
2508/// `render()` once per episode to amortize setup across episodes.
2509pub struct RenderSession {
2510    app: App,
2511    render_config: RenderConfig,
2512    shared_rgba: SharedRgbaBuffer,
2513    shared_depth: SharedDepthBuffer,
2514    _not_send_sync: std::marker::PhantomData<*const ()>,
2515}
2516
2517impl RenderSession {
2518    /// Build the App, run plugin `finish()`/`cleanup()`, and perform one warmup
2519    /// `update()` so Startup systems run and the wgpu device + adapter are
2520    /// initialized. The first `render()` call still pays PSO compilation for
2521    /// the specific mesh/material combination; subsequent calls reuse the cache.
2522    pub fn new(render_config: &crate::RenderConfig) -> Result<Self, crate::RenderError> {
2523        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
2524        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
2525
2526        let mut app = App::new();
2527        app.add_plugins(
2528            DefaultPlugins
2529                .set(WindowPlugin {
2530                    primary_window: None,
2531                    exit_condition: ExitCondition::DontExit,
2532                    ..default()
2533                })
2534                .disable::<bevy::winit::WinitPlugin>()
2535                .disable::<LogPlugin>()
2536                .disable::<TerminalCtrlCHandlerPlugin>(),
2537        )
2538        .add_plugins(ObjPlugin)
2539        .add_plugins(ImageCopyPlugin {
2540            shared_rgba: shared_rgba.clone(),
2541        })
2542        .add_plugins(DepthReadbackPlugin {
2543            shared_depth: shared_depth.clone(),
2544            near: render_config.near_plane,
2545            far: render_config.far_plane,
2546        })
2547        .insert_resource(SessionRenderConfig(render_config.clone()))
2548        .insert_resource(shared_rgba.clone())
2549        .init_resource::<RenderState>()
2550        .add_systems(Startup, setup_session_persistent_scene)
2551        .add_systems(
2552            Update,
2553            (
2554                check_assets_loaded,
2555                apply_materials,
2556                tick_headless_batch_warmup,
2557                request_headless_capture,
2558                check_headless_capture_ready,
2559                extract_and_continue_headless_batch,
2560            )
2561                .chain()
2562                // Gate the capture chain on `RenderRequest` existing. `new()`
2563                // runs a warmup `app.update()` to execute Startup (which spawns
2564                // the camera/lights/render target) before the first `render()`
2565                // call, but does not yet insert `RenderRequest`. Several systems
2566                // in this chain take `Res<RenderRequest>` (not `Option`) and
2567                // would panic on SystemState init if the resource were absent.
2568                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
2569        );
2570
2571        app.finish();
2572        app.cleanup();
2573
2574        // One warmup update runs Startup systems (render target, camera, lights)
2575        // so they exist before the first `render()` call seeds the camera
2576        // transform. The Update chain is gated by `RenderRequest` existence and
2577        // is a no-op this tick. PSO compilation for specific mesh/material
2578        // combinations still happens lazily on the first real render.
2579        app.update();
2580
2581        Ok(Self {
2582            app,
2583            render_config: render_config.clone(),
2584            shared_rgba,
2585            shared_depth,
2586            _not_send_sync: std::marker::PhantomData,
2587        })
2588    }
2589
2590    /// Render a homogeneous batch of viewpoints (same object + rotation + config).
2591    /// Returns outputs in request order.
2592    ///
2593    /// On `BatchRenderError::DeviceLost`, the returned error signals that the
2594    /// wgpu device was lost mid-render. This call produced no output; any
2595    /// outputs from earlier `render()` calls on this session are still valid.
2596    /// Recovery: drop this `RenderSession` and construct a new one.
2597    pub fn render(
2598        &mut self,
2599        requests: &[crate::BatchRenderRequest],
2600    ) -> Result<Vec<crate::BatchRenderOutput>, crate::BatchRenderError> {
2601        use crate::{BatchRenderError, BatchRenderOutput};
2602
2603        if requests.is_empty() {
2604            return Ok(Vec::new());
2605        }
2606
2607        // Enforce homogeneity and config invariance.
2608        let first = &requests[0];
2609        if first.render_config != self.render_config {
2610            return Err(BatchRenderError::InvalidConfig(
2611                "RenderSession render_config mismatch: session was constructed with a different \
2612                 RenderConfig than the first request carries. Session config cannot change after \
2613                 `new()`; construct a new session if you need a different resolution/camera."
2614                    .to_string(),
2615            ));
2616        }
2617        for r in &requests[1..] {
2618            if r.object_dir != first.object_dir
2619                || r.object_rotation != first.object_rotation
2620                || r.render_config != first.render_config
2621            {
2622                return Err(BatchRenderError::InvalidConfig(
2623                    "Phase 1 RenderSession::render requires homogeneous requests \
2624                     (same object_dir, object_rotation, and render_config across the batch). \
2625                     Call render() once per group instead."
2626                        .to_string(),
2627                ));
2628            }
2629        }
2630
2631        // Canonicalize paths and validate mesh/texture presence. This matches
2632        // `render_headless_sequence`'s preconditions so the error surface stays
2633        // consistent.
2634        let object_dir = std::fs::canonicalize(&first.object_dir).map_err(|e| {
2635            BatchRenderError::InvalidConfig(format!(
2636                "Cannot canonicalize object directory {}: {}",
2637                first.object_dir.display(),
2638                e
2639            ))
2640        })?;
2641        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
2642        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
2643        if !mesh_path.exists() {
2644            return Err(BatchRenderError::InvalidConfig(format!(
2645                "Mesh not found: {}",
2646                mesh_path.display()
2647            )));
2648        }
2649        if !texture_path.exists() {
2650            return Err(BatchRenderError::InvalidConfig(format!(
2651                "Texture not found: {}",
2652                texture_path.display()
2653            )));
2654        }
2655
2656        let viewpoints: Vec<Transform> = requests.iter().map(|r| r.viewpoint).collect();
2657
2658        // --- per-group scene swap (direct world manipulation) ---
2659        {
2660            let world = self.app.world_mut();
2661
2662            // Despawn any SessionScene entity from the previous group.
2663            let stale: Vec<Entity> = world
2664                .query_filtered::<Entity, With<SessionScene>>()
2665                .iter(world)
2666                .collect();
2667            for entity in stale {
2668                world.entity_mut(entity).despawn_recursive();
2669            }
2670
2671            // Clear shared RGBA/depth buffers so a stale payload can't leak
2672            // into the first viewpoint of this call.
2673            if let Ok(mut guard) = self.shared_rgba.0.lock() {
2674                *guard = None;
2675            }
2676            if let Ok(mut guard) = self.shared_depth.0.lock() {
2677                *guard = None;
2678            }
2679
2680            // Reset RenderState (scene_loaded, texture_loaded, capture_ready,
2681            // frame_count, materials_applied, etc.). Default() gives all false/0.
2682            *world.resource_mut::<RenderState>() = RenderState::default();
2683
2684            // Update RenderRequest so the existing capture systems see the new
2685            // object paths, rotation, and camera transform (seeded from first vp).
2686            let new_request = RenderRequest {
2687                mesh_path: mesh_path.display().to_string(),
2688                texture_path: texture_path.display().to_string(),
2689                camera_transform: viewpoints[0],
2690                object_rotation: first.object_rotation.clone(),
2691                config: self.render_config.clone(),
2692            };
2693            world.insert_resource(new_request);
2694
2695            // Kick off asset loads and install the handles under the names the
2696            // existing `check_assets_loaded` system expects.
2697            let asset_server = world.resource::<AssetServer>().clone();
2698            let scene_handle: Handle<Scene> = asset_server.load(mesh_path.display().to_string());
2699            let texture_handle: Handle<Image> =
2700                asset_server.load(texture_path.display().to_string());
2701            world.insert_resource(LoadedScene(scene_handle.clone()));
2702            world.insert_resource(LoadedTexture(texture_handle));
2703
2704            // Spawn the new scene entity tagged so we can find + despawn it next
2705            // render() call.
2706            world.spawn((
2707                SceneRoot(scene_handle),
2708                Transform::from_rotation(first.object_rotation.to_quat()),
2709                RenderedObject,
2710                SessionScene,
2711            ));
2712
2713            // Seed the camera transform to the first viewpoint now so the first
2714            // capture lines up; subsequent viewpoints are advanced by
2715            // `extract_and_continue_headless_batch`.
2716            let camera_entity = world
2717                .query_filtered::<Entity, With<RenderCamera>>()
2718                .iter(world)
2719                .next();
2720            if let Some(cam) = camera_entity {
2721                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
2722                    *transform = viewpoints[0];
2723                }
2724            }
2725
2726            // Install the viewpoint sequence for this render() call.
2727            world.insert_resource(HeadlessBatchSequence::new(viewpoints.clone()));
2728        }
2729
2730        // --- drive the capture loop ---
2731        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
2732        let start = std::time::Instant::now();
2733        loop {
2734            if start.elapsed() > timeout {
2735                return Err(BatchRenderError::TotalFailure(format!(
2736                    "RenderSession::render timed out after {}s",
2737                    RENDER_TIMEOUT_SECS
2738                )));
2739            }
2740
2741            self.app.update();
2742
2743            if self.app.world().resource::<HeadlessBatchSequence>().done {
2744                break;
2745            }
2746        }
2747
2748        // Collect outputs and zip with requests to produce BatchRenderOutput in
2749        // request order.
2750        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
2751        if sequence.outputs.len() != requests.len() {
2752            return Err(BatchRenderError::TotalFailure(format!(
2753                "RenderSession produced {} outputs for {} requests",
2754                sequence.outputs.len(),
2755                requests.len()
2756            )));
2757        }
2758        let outputs = std::mem::take(&mut sequence.outputs);
2759
2760        Ok(requests
2761            .iter()
2762            .cloned()
2763            .zip(outputs)
2764            .map(|(req, out)| BatchRenderOutput::from_render_output(req, out))
2765            .collect())
2766    }
2767}
2768
2769// ============================================================================
2770// Per-step persistent renderer (PersistentRenderer)
2771//
2772// `RenderSession` reuses the App across calls but rebuilds the scene on every
2773// `render()` (despawn SceneRoot, re-issue asset_server.load, respawn). That's
2774// fine for the parity-gate path (one scene per episode of N viewpoints) but
2775// wasteful for surface-policy feedback loops where N=1 viewpoint per call and
2776// the object stays loaded for the whole episode.
2777//
2778// `PersistentRenderer` commits to one `object_dir` + `RenderConfig` at
2779// construction. `new()` loads mesh + texture + spawns the scene root + drives
2780// one warmup render (output discarded) so PSO compilation and material setup
2781// are paid up front. `render(camera, rotation)` then only mutates the camera
2782// `Transform` and (if changed) the scene root rotation, drives the capture
2783// chain for one frame, and returns. See issue #65.
2784// ============================================================================
2785
2786/// Marker for the `PersistentRenderer`'s scene root entity. We keep the
2787/// entity alive for the whole renderer lifetime and just mutate its
2788/// `Transform` when the caller-supplied object rotation changes.
2789#[derive(Component)]
2790struct PersistentScene;
2791
2792/// Persistent per-step renderer. Loads the scene once at `new()` and renders
2793/// one frame per `render()` call by mutating the camera transform and scene
2794/// root rotation in-place. Built for surface-policy feedback loops where the
2795/// object stays fixed for the duration of an episode and the camera moves
2796/// every step. See issue #65.
2797///
2798/// # Thread affinity
2799///
2800/// `PersistentRenderer` must be created, used, and dropped on the same thread.
2801/// Holds a `bevy::App` that owns GPU resources not safe to move across
2802/// threads; `!Send + !Sync` is enforced via `PhantomData<*const ()>`.
2803///
2804/// # Object + config invariants
2805///
2806/// `object_dir` and `RenderConfig` are fixed at `new()`. To render a different
2807/// object or change resolution/lighting, drop and rebuild. Rotation may change
2808/// freely between `render()` calls.
2809pub struct PersistentRenderer {
2810    app: App,
2811    object_dir: PathBuf,
2812    render_config: RenderConfig,
2813    shared_rgba: SharedRgbaBuffer,
2814    shared_depth: SharedDepthBuffer,
2815    _not_send_sync: std::marker::PhantomData<*const ()>,
2816}
2817
2818impl PersistentRenderer {
2819    /// Build the App, load the scene + texture, spawn the scene root, and drive
2820    /// one warmup render whose output is discarded. After `new()` returns, the
2821    /// first user-facing `render()` call benefits from a warm PSO cache and
2822    /// applied materials.
2823    pub fn new(
2824        object_dir: &Path,
2825        render_config: &RenderConfig,
2826    ) -> Result<Self, crate::RenderError> {
2827        let object_dir =
2828            std::fs::canonicalize(object_dir).map_err(|e| crate::RenderError::FileNotFound {
2829                path: object_dir.display().to_string(),
2830                reason: e.to_string(),
2831            })?;
2832        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
2833        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
2834        if !mesh_path.exists() {
2835            return Err(crate::RenderError::MeshNotFound(
2836                mesh_path.display().to_string(),
2837            ));
2838        }
2839        if !texture_path.exists() {
2840            return Err(crate::RenderError::TextureNotFound(
2841                texture_path.display().to_string(),
2842            ));
2843        }
2844
2845        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
2846        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
2847
2848        let mut app = App::new();
2849        app.add_plugins(
2850            DefaultPlugins
2851                .set(WindowPlugin {
2852                    primary_window: None,
2853                    exit_condition: ExitCondition::DontExit,
2854                    ..default()
2855                })
2856                .disable::<bevy::winit::WinitPlugin>()
2857                .disable::<LogPlugin>()
2858                .disable::<TerminalCtrlCHandlerPlugin>(),
2859        )
2860        .add_plugins(ObjPlugin)
2861        .add_plugins(ImageCopyPlugin {
2862            shared_rgba: shared_rgba.clone(),
2863        })
2864        .add_plugins(DepthReadbackPlugin {
2865            shared_depth: shared_depth.clone(),
2866            near: render_config.near_plane,
2867            far: render_config.far_plane,
2868        })
2869        .insert_resource(SessionRenderConfig(render_config.clone()))
2870        .insert_resource(shared_rgba.clone())
2871        .init_resource::<RenderState>()
2872        .add_systems(Startup, setup_session_persistent_scene)
2873        .add_systems(
2874            Update,
2875            (
2876                check_assets_loaded,
2877                apply_materials,
2878                tick_headless_batch_warmup,
2879                request_headless_capture,
2880                check_headless_capture_ready,
2881                extract_and_continue_headless_batch,
2882            )
2883                .chain()
2884                // Same gate as RenderSession: capture chain only runs once
2885                // RenderRequest is installed. Startup runs first via the
2886                // warmup `app.update()` below.
2887                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
2888        );
2889
2890        app.finish();
2891        app.cleanup();
2892        // Warmup tick #1: Startup runs (camera, lights, render target spawn).
2893        app.update();
2894
2895        // Install scene + warmup render request. The warmup output is discarded
2896        // — its purpose is to pay PSO compilation and material application
2897        // upfront so the first user-facing render() is fast.
2898        let initial_request = RenderRequest {
2899            mesh_path: mesh_path.display().to_string(),
2900            texture_path: texture_path.display().to_string(),
2901            camera_transform: Transform::default(),
2902            object_rotation: ObjectRotation::identity(),
2903            config: render_config.clone(),
2904        };
2905
2906        {
2907            let world = app.world_mut();
2908            let asset_server = world.resource::<AssetServer>().clone();
2909            let scene_handle: Handle<Scene> = asset_server.load(mesh_path.display().to_string());
2910            let texture_handle: Handle<Image> =
2911                asset_server.load(texture_path.display().to_string());
2912            world.insert_resource(LoadedScene(scene_handle.clone()));
2913            world.insert_resource(LoadedTexture(texture_handle));
2914            world.insert_resource(initial_request);
2915            world.spawn((
2916                SceneRoot(scene_handle),
2917                Transform::from_rotation(ObjectRotation::identity().to_quat()),
2918                RenderedObject,
2919                PersistentScene,
2920            ));
2921            world.insert_resource(HeadlessBatchSequence::new(vec![Transform::default()]));
2922        }
2923
2924        // Drive the warmup render to completion.
2925        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
2926        let start = std::time::Instant::now();
2927        loop {
2928            if start.elapsed() > timeout {
2929                return Err(crate::RenderError::RenderFailed(format!(
2930                    "PersistentRenderer::new warmup render timed out after {RENDER_TIMEOUT_SECS}s"
2931                )));
2932            }
2933            app.update();
2934            if app.world().resource::<HeadlessBatchSequence>().done {
2935                break;
2936            }
2937        }
2938        // Discard the warmup output so it doesn't leak into the first real
2939        // render() call's output buffer.
2940        app.world_mut()
2941            .resource_mut::<HeadlessBatchSequence>()
2942            .outputs
2943            .clear();
2944
2945        Ok(Self {
2946            app,
2947            object_dir,
2948            render_config: render_config.clone(),
2949            shared_rgba,
2950            shared_depth,
2951            _not_send_sync: std::marker::PhantomData,
2952        })
2953    }
2954
2955    /// Render one frame from the given camera transform and object rotation.
2956    /// Reuses the loaded scene + warm PSO cache from `new()`.
2957    pub fn render(
2958        &mut self,
2959        camera_transform: &Transform,
2960        object_rotation: &ObjectRotation,
2961    ) -> Result<RenderOutput, crate::RenderError> {
2962        let camera_transform = *camera_transform;
2963        let object_rotation_owned = object_rotation.clone();
2964
2965        {
2966            let world = self.app.world_mut();
2967
2968            // Update the persistent scene root rotation. Always-write avoids
2969            // the cost of an extra ObjectRotation comparison per call; the
2970            // mutation itself is a single Transform write.
2971            let scene_entity = world
2972                .query_filtered::<Entity, With<PersistentScene>>()
2973                .iter(world)
2974                .next();
2975            if let Some(entity) = scene_entity {
2976                if let Some(mut transform) = world.entity_mut(entity).get_mut::<Transform>() {
2977                    *transform = Transform::from_rotation(object_rotation_owned.to_quat());
2978                }
2979            }
2980
2981            // Update the camera transform.
2982            let cam_entity = world
2983                .query_filtered::<Entity, With<RenderCamera>>()
2984                .iter(world)
2985                .next();
2986            if let Some(cam) = cam_entity {
2987                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
2988                    *transform = camera_transform;
2989                }
2990            }
2991
2992            // Reset per-frame state, preserving scene_loaded / texture_loaded
2993            // / materials_applied / materials_applied_frame. The asset-load
2994            // and material-apply work was paid in `new()`'s warmup; we only
2995            // need to clear the per-capture state.
2996            //
2997            // `capture_ready = true` short-circuits `apply_materials` on
2998            // every tick of the render loop (no need to re-check material
2999            // application — it stays applied for the renderer's lifetime).
3000            // It does NOT short-circuit `request_headless_capture`, which
3001            // is gated by `HeadlessBatchSequence::warmup_frames_remaining`
3002            // below. Bug fix from PR #66 review (off-by-one / blank-step-0):
3003            // without that warmup gate, request_headless_capture fires same-
3004            // tick as the transform writes, capturing the previous render's
3005            // target before the new transforms have propagated.
3006            {
3007                let mut state = world.resource_mut::<RenderState>();
3008                state.exit_requested = false;
3009                state.screenshot_requested = false;
3010                state.captured = false;
3011                state.rgba_data = None;
3012                state.depth_data = None;
3013                state.frame_count = 0;
3014                state.image_width = 0;
3015                state.image_height = 0;
3016                state.capture_ready = true;
3017            }
3018
3019            // Clear shared GPU readback buffers so a stale payload from the
3020            // previous render() can't leak into this call's output.
3021            if let Ok(mut guard) = self.shared_rgba.0.lock() {
3022                *guard = None;
3023            }
3024            if let Ok(mut guard) = self.shared_depth.0.lock() {
3025                *guard = None;
3026            }
3027
3028            // Update RenderRequest (used by extract_and_continue_headless_batch
3029            // to stamp the output with the right intrinsics + rotation).
3030            {
3031                let mut req = world.resource_mut::<RenderRequest>();
3032                req.camera_transform = camera_transform;
3033                req.object_rotation = object_rotation_owned.clone();
3034            }
3035
3036            // Install fresh single-element batch with warmup frames so
3037            // `request_headless_capture` is gated until the new transforms
3038            // have propagated through the render pipeline.
3039            let mut batch = HeadlessBatchSequence::new(vec![camera_transform]);
3040            batch.warmup_frames_remaining = PERSISTENT_WARMUP_FRAMES;
3041            world.insert_resource(batch);
3042        }
3043
3044        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3045        let start = std::time::Instant::now();
3046        loop {
3047            if start.elapsed() > timeout {
3048                return Err(crate::RenderError::RenderFailed(format!(
3049                    "PersistentRenderer::render timed out after {RENDER_TIMEOUT_SECS}s"
3050                )));
3051            }
3052            self.app.update();
3053            if self.app.world().resource::<HeadlessBatchSequence>().done {
3054                break;
3055            }
3056        }
3057
3058        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
3059        let mut outputs = std::mem::take(&mut sequence.outputs);
3060        if outputs.len() != 1 {
3061            return Err(crate::RenderError::RenderFailed(format!(
3062                "PersistentRenderer::render expected 1 output, got {}",
3063                outputs.len()
3064            )));
3065        }
3066
3067        Ok(outputs.remove(0))
3068    }
3069
3070    /// Path to the YCB object directory this renderer was bound to.
3071    pub fn object_dir(&self) -> &Path {
3072        &self.object_dir
3073    }
3074
3075    /// The `RenderConfig` this renderer was constructed with.
3076    pub fn render_config(&self) -> &RenderConfig {
3077        &self.render_config
3078    }
3079
3080    /// Explicit close. Equivalent to dropping; provided to match the API
3081    /// proposal in #65 for callers that want lifetime-explicit teardown.
3082    pub fn close(self) {
3083        // Drop runs on return.
3084    }
3085}
3086
3087/// Render directly to files (for subprocess mode).
3088///
3089/// This function saves RGBA and depth data directly to files before exiting.
3090/// Designed for subprocess rendering where the process will exit after rendering.
3091pub fn render_to_files(
3092    object_dir: &Path,
3093    camera_transform: &Transform,
3094    object_rotation: &ObjectRotation,
3095    config: &RenderConfig,
3096    rgba_path: &Path,
3097    depth_path: &Path,
3098) -> Result<(), RenderError> {
3099    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
3100    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
3101
3102    if !mesh_path.exists() {
3103        return Err(RenderError::MeshNotFound(mesh_path.display().to_string()));
3104    }
3105    if !texture_path.exists() {
3106        return Err(RenderError::TextureNotFound(
3107            texture_path.display().to_string(),
3108        ));
3109    }
3110
3111    let request = RenderRequest {
3112        mesh_path: mesh_path.display().to_string(),
3113        texture_path: texture_path.display().to_string(),
3114        camera_transform: *camera_transform,
3115        object_rotation: object_rotation.clone(),
3116        config: config.clone(),
3117    };
3118
3119    // Shared state for output
3120    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
3121    let output_poll = shared_output.clone();
3122
3123    // Clone paths for watchdog thread
3124    let rgba_path = rgba_path.to_path_buf();
3125    let depth_path = depth_path.to_path_buf();
3126
3127    // Shared buffer for RGBA data from headless render target
3128    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
3129
3130    // Shared buffer for depth readback
3131    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
3132
3133    // Spawn watchdog thread that saves files and exits
3134    std::thread::spawn(move || {
3135        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
3136        let start = std::time::Instant::now();
3137        let poll_interval = std::time::Duration::from_millis(100);
3138
3139        loop {
3140            if let Ok(guard) = output_poll.0.lock() {
3141                if let Some(output) = guard.as_ref() {
3142                    // Save RGBA as PNG
3143                    if let Err(e) =
3144                        save_rgba_to_png(&output.rgba, output.width, output.height, &rgba_path)
3145                    {
3146                        eprintln!("Failed to save RGBA: {:?}", e);
3147                        std::process::exit(1);
3148                    }
3149
3150                    // Save depth as binary f32
3151                    if let Err(e) = save_depth_to_binary(&output.depth, &depth_path) {
3152                        eprintln!("Failed to save depth: {:?}", e);
3153                        std::process::exit(1);
3154                    }
3155
3156                    std::process::exit(0);
3157                }
3158            }
3159
3160            if start.elapsed() > timeout {
3161                eprintln!(
3162                    "Error: Render timeout after {} seconds",
3163                    RENDER_TIMEOUT_SECS
3164                );
3165                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
3166                std::process::exit(1);
3167            }
3168
3169            std::thread::sleep(poll_interval);
3170        }
3171    });
3172
3173    // Configure rendering backend for this environment.
3174    // Use OnceLock so env vars are only set once per process — repeated calls
3175    // (e.g. sequential render_to_buffer calls in a parity loop) no longer trigger
3176    // redundant wgpu backend env writes. Full GPU adapter reuse across App instances
3177    // requires a persistent renderer (tracked in issue #14).
3178    static BACKEND_INIT: OnceLock<()> = OnceLock::new();
3179    BACKEND_INIT.get_or_init(|| {
3180        let backend_config = BackendConfig::headless();
3181        backend_config.apply_env();
3182    });
3183
3184    // Run Bevy app with HEADLESS configuration
3185    build_headless_app(request, shared_output, shared_rgba, shared_depth).run();
3186
3187    // Unreachable - watchdog thread exits the process
3188    Err(RenderError::RenderFailed(
3189        "Render did not complete".to_string(),
3190    ))
3191}
3192
3193/// Save RGBA data to PNG file
3194fn save_rgba_to_png(rgba: &[u8], width: u32, height: u32, path: &Path) -> Result<(), String> {
3195    use image::{ImageBuffer, Rgba};
3196
3197    // Create parent directories if needed
3198    if let Some(parent) = path.parent() {
3199        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3200    }
3201
3202    let img: ImageBuffer<Rgba<u8>, Vec<u8>> =
3203        ImageBuffer::from_raw(width, height, rgba.to_vec())
3204            .ok_or_else(|| "Failed to create image buffer".to_string())?;
3205
3206    img.save(path).map_err(|e| e.to_string())
3207}
3208
3209/// Save depth data to binary file (f64 for TBP precision)
3210fn save_depth_to_binary(depth: &[f64], path: &Path) -> Result<(), String> {
3211    // Create parent directories if needed
3212    if let Some(parent) = path.parent() {
3213        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
3214    }
3215
3216    let bytes: Vec<u8> = depth.iter().flat_map(|f| f.to_le_bytes()).collect();
3217    std::fs::write(path, &bytes).map_err(|e| e.to_string())
3218}
3219
3220#[cfg(test)]
3221mod smoke_tests {
3222    use super::{headless_scene_setup_count, reset_headless_scene_setup_count};
3223    use crate::{
3224        BatchRenderConfig, BatchRenderRequest, ObjectRotation, RenderConfig, TargetingPolicy, Vec3,
3225        ViewpointConfig,
3226    };
3227    use image::{ImageBuffer, Rgba};
3228    use tempfile::TempDir;
3229
3230    fn write_synthetic_object() -> TempDir {
3231        let temp_dir = TempDir::new().expect("create temp dir for synthetic object");
3232        let object_dir = temp_dir.path().join("synthetic_cube").join("google_16k");
3233        std::fs::create_dir_all(&object_dir).expect("create synthetic google_16k dir");
3234
3235        // A small centered cube stays visible from all default TBP viewpoints and does not
3236        // need any YCB downloads.
3237        let obj = r#"o SyntheticCube
3238v -0.10 -0.10  0.10
3239v  0.10 -0.10  0.10
3240v  0.10  0.10  0.10
3241v -0.10  0.10  0.10
3242v -0.10 -0.10 -0.10
3243v  0.10 -0.10 -0.10
3244v  0.10  0.10 -0.10
3245v -0.10  0.10 -0.10
3246vt 0.0 0.0
3247vt 1.0 0.0
3248vt 1.0 1.0
3249vt 0.0 1.0
3250f 1/1 2/2 3/3
3251f 1/1 3/3 4/4
3252f 6/1 5/2 8/3
3253f 6/1 8/3 7/4
3254f 2/1 6/2 7/3
3255f 2/1 7/3 3/4
3256f 5/1 1/2 4/3
3257f 5/1 4/3 8/4
3258f 4/1 3/2 7/3
3259f 4/1 7/3 8/4
3260f 5/1 6/2 2/3
3261f 5/1 2/3 1/4
3262"#;
3263        std::fs::write(object_dir.join("textured.obj"), obj).expect("write synthetic obj");
3264
3265        let texture = ImageBuffer::from_fn(2, 2, |x, y| match (x, y) {
3266            (0, 0) => Rgba([255u8, 48, 48, 255]),
3267            (1, 0) => Rgba([48u8, 255, 48, 255]),
3268            (0, 1) => Rgba([48u8, 48, 255, 255]),
3269            _ => Rgba([255u8, 255, 64, 255]),
3270        });
3271        texture
3272            .save(object_dir.join("texture_map.png"))
3273            .expect("write synthetic texture");
3274
3275        temp_dir
3276    }
3277
3278    #[test]
3279    #[ignore = "headless throughput smoke check is opt-in because it needs a local render backend"]
3280    fn test_headless_batch_throughput_smoke() {
3281        crate::initialize();
3282        reset_headless_scene_setup_count();
3283
3284        let object_root = write_synthetic_object();
3285        let object_dir = object_root.path().join("synthetic_cube");
3286        let viewpoints = crate::generate_viewpoints(&ViewpointConfig::default());
3287        let request_count = 5usize;
3288        let config = RenderConfig::tbp_default();
3289
3290        let requests: Vec<_> = viewpoints
3291            .iter()
3292            .take(request_count)
3293            .copied()
3294            .map(|viewpoint| BatchRenderRequest {
3295                object_dir: object_dir.clone(),
3296                viewpoint,
3297                object_rotation: ObjectRotation::identity(),
3298                render_config: config.clone(),
3299                target_point: Vec3::ZERO,
3300                targeting_policy: TargetingPolicy::Origin,
3301            })
3302            .collect();
3303
3304        let start = std::time::Instant::now();
3305        let outputs = crate::render_batch(requests, &BatchRenderConfig::default())
3306            .expect("synthetic headless batch render should succeed");
3307        let elapsed = start.elapsed();
3308
3309        assert_eq!(outputs.len(), request_count);
3310        // This is the deterministic churn signal for the smoke check. Adapter log lines vary by
3311        // backend and logging config, but a homogeneous batch should still set up headless scene
3312        // state exactly once.
3313        assert_eq!(
3314            headless_scene_setup_count(),
3315            1,
3316            "homogeneous batch smoke check should reuse one headless app setup"
3317        );
3318
3319        for (idx, output) in outputs.iter().enumerate() {
3320            assert_eq!(output.width, config.width, "output {idx} width mismatch");
3321            assert_eq!(output.height, config.height, "output {idx} height mismatch");
3322            assert_eq!(
3323                output.rgba.len(),
3324                (config.width * config.height * 4) as usize,
3325                "output {idx} rgba size mismatch"
3326            );
3327            assert_eq!(
3328                output.depth.len(),
3329                (config.width * config.height) as usize,
3330                "output {idx} depth size mismatch"
3331            );
3332            assert!(
3333                output
3334                    .rgba
3335                    .chunks_exact(4)
3336                    .any(|px| px[0] != 0 || px[1] != 0 || px[2] != 0),
3337                "output {idx} should contain visible color"
3338            );
3339        }
3340
3341        // Acceptance target: under llvmpipe-class CPU rendering, five 64x64 captures should
3342        // finish in under 8s. Much slower runs usually mean we reintroduced per-capture app
3343        // churn or another headless startup regression.
3344        assert!(
3345            elapsed < std::time::Duration::from_secs(8),
3346            "5 synthetic headless captures took {:.2}s, expected < 8.0s",
3347            elapsed.as_secs_f64()
3348        );
3349    }
3350}