Skip to main content

bevy_sensor/
render.rs

1//! Headless rendering implementation using Bevy.
2//!
3//! This module provides two rendering modes:
4//!
5//! 1. **Headless mode** (default): Renders to an image texture without requiring
6//!    a window or display. Works on WSL2, CI servers, and any environment without
7//!    GPU windowing support.
8//!
9//! 2. **Windowed mode** (fallback): Uses a visible window for rendering when
10//!    headless mode fails. Requires a display (X11/Wayland).
11//!
12//! # Current Status
13//!
14//! - **RGBA**: Working via render-to-texture + GPU readback
15//! - **Depth**: Working via ViewDepthTexture + reverse-Z conversion
16//!
17//! # Headless Rendering Architecture
18//!
19//! The headless renderer:
20//! 1. Creates a Bevy app without window plugins (uses ScheduleRunnerPlugin)
21//! 2. Sets up a render-to-texture pipeline with RenderTarget::Image
22//! 3. Extracts RGBA data via ImageCopyDriver
23//! 4. Extracts depth via DepthReadbackNode
24//!
25//! # Running Requirements
26//!
27//! Headless mode should work without any display. For windowed fallback:
28//! ```bash
29//! DISPLAY=:0 cargo run --example test_render
30//! ```
31//!
32//! # Architecture Notes
33//!
34//! Bevy's `App::run()` does not return cleanly in all configurations. This
35//! implementation uses a watchdog thread that monitors for completion and
36//! calls `std::process::exit(0)` once the render output is serialized to
37//! a temp file. The main thread reads this file after the process would
38//! normally exit.
39
40use bevy::app::{ScheduleRunnerPlugin, TerminalCtrlCHandlerPlugin};
41use bevy::asset::LoadState;
42use bevy::core_pipeline::prepass::{DepthPrepass, NormalPrepass};
43use bevy::core_pipeline::tonemapping::Tonemapping;
44use bevy::ecs::query::QueryItem;
45use bevy::log::LogPlugin;
46use bevy::prelude::*;
47use bevy::render::camera::{ExtractedCamera, RenderTarget};
48use bevy::render::render_asset::{RenderAssetUsages, RenderAssets};
49use bevy::render::render_graph::{
50    Node, NodeRunError, RenderGraphApp, RenderGraphContext, RenderLabel, ViewNode, ViewNodeRunner,
51};
52use bevy::render::render_resource::{
53    Buffer, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, Extent3d, ImageCopyBuffer,
54    ImageCopyTexture, ImageDataLayout, MapMode, Origin3d, TextureAspect, TextureDimension,
55    TextureFormat, TextureUsages,
56};
57use bevy::render::renderer::RenderQueue;
58use bevy::render::renderer::{RenderContext, RenderDevice};
59use bevy::render::texture::GpuImage;
60use bevy::render::view::screenshot::{Screenshot, ScreenshotCaptured};
61use bevy::render::view::ViewDepthTexture;
62use bevy::render::{Extract, Render, RenderApp, RenderSet};
63use bevy::window::{ExitCondition, WindowPlugin};
64use bevy_obj::ObjPlugin;
65use std::fs::File;
66use std::io::Read as IoRead;
67use std::path::Path;
68#[cfg(test)]
69use std::sync::atomic::{AtomicUsize, Ordering};
70use std::sync::{Arc, Mutex, OnceLock};
71use std::time::Duration;
72
73use crate::{backend::BackendConfig, ObjectRotation, RenderConfig, RenderError, RenderOutput};
74use ycbust::{GOOGLE_16K_MESH_RELATIVE, GOOGLE_16K_TEXTURE_RELATIVE};
75
76/// Watchdog timeout for a single render, in seconds.
77///
78/// Bounds how long any single render path waits before declaring failure.
79/// 180s accommodates first-run wgpu shader compilation on Windows, which
80/// can take well over 60s on a cold GPU cache (see commit 9cd1d11).
81const RENDER_TIMEOUT_SECS: u64 = 180;
82
83/// Warmup frames after each camera move in `render_headless_sequence`.
84///
85/// After writing a new camera `Transform`, Bevy needs at least one frame for
86/// transform propagation + render-world extract before the next capture is
87/// valid. Historically set to 3 as a conservative cushion; reducing directly
88/// shortens per-viewpoint wall-clock since `app.update()` in the batch path
89/// is not rate-limited. Validated against the pixel-exact hardware test
90/// `test_batch_render_matches_sequential_episode_outputs`.
91const BATCH_WARMUP_FRAMES: u32 = 1;
92
93/// Check the render-trace env var. Cheap enough (single HashMap lookup) to call
94/// from per-frame systems; gate all tracing output behind this.
95#[inline]
96fn render_trace_enabled() -> bool {
97    std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok()
98}
99
100/// Check if a display is available for windowed rendering.
101///
102/// Returns true if DISPLAY or WAYLAND_DISPLAY environment variable is set.
103#[allow(dead_code)]
104fn display_available() -> bool {
105    std::env::var("DISPLAY").is_ok() || std::env::var("WAYLAND_DISPLAY").is_ok()
106}
107
108/// Check if we're running on WSL2 (which doesn't support Vulkan window surfaces).
109#[allow(dead_code)]
110fn is_wsl2() -> bool {
111    if let Ok(version) = std::fs::read_to_string("/proc/version") {
112        return version.to_lowercase().contains("microsoft")
113            || version.to_lowercase().contains("wsl");
114    }
115    false
116}
117
118/// Internal state for tracking render progress
119#[derive(Resource, Default)]
120struct RenderState {
121    frame_count: u32,
122    scene_loaded: bool,
123    texture_loaded: bool,
124    materials_applied: bool,
125    /// `frame_count` at the moment materials were applied; used to gate
126    /// `capture_ready` on N frames of render-graph propagation rather than
127    /// a legacy llvmpipe-era 60-frame wait.
128    materials_applied_frame: u32,
129    capture_ready: bool,
130    screenshot_requested: bool,
131    captured: bool,
132    exit_requested: bool,
133    #[allow(dead_code)]
134    exit_frame_count: u32,
135    rgba_data: Option<Vec<u8>>,
136    depth_data: Option<Vec<f64>>,
137    image_width: u32,
138    image_height: u32,
139}
140
141#[cfg(test)]
142static HEADLESS_SCENE_SETUP_COUNT: AtomicUsize = AtomicUsize::new(0);
143
144#[cfg(test)]
145fn reset_headless_scene_setup_count() {
146    HEADLESS_SCENE_SETUP_COUNT.store(0, Ordering::SeqCst);
147}
148
149#[cfg(test)]
150fn headless_scene_setup_count() -> usize {
151    HEADLESS_SCENE_SETUP_COUNT.load(Ordering::SeqCst)
152}
153
154/// Shared buffer for screenshot callback to write into
155#[derive(Resource, Clone)]
156#[allow(clippy::type_complexity)]
157#[allow(dead_code)]
158struct SharedImageBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
159
160/// Shared buffer for depth data from GPU readback
161/// Contains: (linear_depth_values, width, height)
162/// Uses f64 for TBP numerical precision compatibility.
163#[derive(Resource, Clone, Default)]
164#[allow(clippy::type_complexity)]
165struct SharedDepthBuffer(Arc<Mutex<Option<(Vec<f64>, u32, u32)>>>);
166
167// ============================================================================
168// Depth Readback Infrastructure
169// ============================================================================
170
171/// Request to capture depth - extracted from main world to render world
172#[derive(Resource, Default, Clone)]
173struct DepthCaptureRequest {
174    requested: bool,
175    near: f32,
176    far: f32,
177}
178
179/// Pending depth capture info for async processing
180struct PendingDepthCapture {
181    buffer: Buffer,
182    width: u32,
183    height: u32,
184    near: f32,
185    far: f32,
186}
187
188/// Queue for pending depth captures (written by render node, read by cleanup system)
189#[derive(Resource, Default)]
190struct PendingDepthCaptureQueue(Arc<Mutex<Vec<PendingDepthCapture>>>);
191
192// ============================================================================
193// Depth Buffer Helpers
194// ============================================================================
195
196mod depth_helpers {
197    /// wgpu requires buffer row alignment of 256 bytes
198    pub const COPY_BYTES_PER_ROW_ALIGNMENT: u32 = 256;
199
200    /// Align byte size to wgpu's COPY_BYTES_PER_ROW_ALIGNMENT
201    pub fn align_byte_size(value: u32) -> u32 {
202        let remainder = value % COPY_BYTES_PER_ROW_ALIGNMENT;
203        if remainder == 0 {
204            value
205        } else {
206            value + (COPY_BYTES_PER_ROW_ALIGNMENT - remainder)
207        }
208    }
209
210    /// Calculate aligned buffer size for an image
211    #[allow(dead_code)]
212    pub fn get_aligned_size(width: u32, height: u32, pixel_size: u32) -> u32 {
213        height * align_byte_size(width * pixel_size)
214    }
215
216    /// Convert reverse-Z NDC depth to linear depth in meters.
217    ///
218    /// Bevy uses reverse-Z depth buffer: near plane maps to depth=1, far plane to depth=0.
219    /// This provides better precision for distant objects.
220    ///
221    /// Formula derivation:
222    /// - At near plane (z = near): ndc = 1
223    /// - At far plane (z = far): ndc = 0
224    /// - linear = far / (1 + ndc * (far/near - 1))
225    pub fn reverse_z_to_linear_depth(ndc_depth: f32, near: f32, far: f32) -> f32 {
226        // Handle edge cases
227        if ndc_depth <= 0.0 {
228            return far; // Background (infinite distance in reverse-Z)
229        }
230        if ndc_depth >= 1.0 {
231            return near; // At or beyond near plane
232        }
233        // Reverse-Z formula: linear = far / (1 + ndc * (far/near - 1))
234        far / (1.0 + ndc_depth * (far / near - 1.0))
235    }
236
237    /// Extract depth values from aligned buffer, handling row padding
238    pub fn extract_depth_with_alignment(data: &[u8], width: u32, height: u32) -> Vec<f32> {
239        let pixel_size = 4u32; // f32 = 4 bytes
240        let aligned_row_bytes = align_byte_size(width * pixel_size) as usize;
241        let actual_row_bytes = (width * pixel_size) as usize;
242
243        let mut depth_values = Vec::with_capacity((width * height) as usize);
244
245        for y in 0..height as usize {
246            let row_start = y * aligned_row_bytes;
247            let row_data = &data[row_start..row_start + actual_row_bytes];
248
249            for x in 0..width as usize {
250                let offset = x * 4;
251                let bytes: [u8; 4] = row_data[offset..offset + 4].try_into().unwrap();
252                let depth_value = f32::from_le_bytes(bytes);
253                depth_values.push(depth_value);
254            }
255        }
256
257        depth_values
258    }
259
260    /// Convert all NDC depth values to linear meters (as f64 for TBP precision)
261    pub fn convert_depth_to_linear(raw_depth: &[f32], near: f32, far: f32) -> Vec<f64> {
262        raw_depth
263            .iter()
264            .map(|&ndc| reverse_z_to_linear_depth(ndc, near, far) as f64)
265            .collect()
266    }
267
268    #[cfg(test)]
269    mod tests {
270        use super::*;
271
272        #[test]
273        fn test_align_byte_size() {
274            assert_eq!(align_byte_size(256), 256);
275            assert_eq!(align_byte_size(257), 512);
276            assert_eq!(align_byte_size(1), 256);
277            assert_eq!(align_byte_size(512), 512);
278            assert_eq!(align_byte_size(0), 0);
279        }
280
281        #[test]
282        fn test_reverse_z_to_linear_depth() {
283            let near = 0.01;
284            let far = 10.0;
285
286            // Near plane (ndc=1 in reverse-Z)
287            let linear_near = reverse_z_to_linear_depth(1.0, near, far);
288            assert!((linear_near - near).abs() < 0.001);
289
290            // Mid-range depth (ndc=0.5 should give geometric mean area)
291            let linear_mid = reverse_z_to_linear_depth(0.5, near, far);
292            // At ndc=0.5: linear = 10 / (1 + 0.5 * (1000-1)) = 10 / 500.5 ≈ 0.02
293            assert!(linear_mid > near && linear_mid < far);
294
295            // Very close to far plane (ndc very small)
296            let linear_almost_far = reverse_z_to_linear_depth(0.0001, near, far);
297            // At ndc=0.0001: linear = 10 / (1 + 0.0001 * 999) ≈ 10 / 1.0999 ≈ 9.09
298            assert!(linear_almost_far > 9.0);
299
300            // Background (ndc=0)
301            let background = reverse_z_to_linear_depth(0.0, near, far);
302            assert_eq!(background, far);
303        }
304
305        #[test]
306        fn test_extract_depth_with_alignment() {
307            // 2x2 image, 4 bytes per pixel
308            // Aligned row = 256 bytes, but actual = 8 bytes
309            let width = 2u32;
310            let height = 2u32;
311
312            let mut data = vec![0u8; 256 * 2]; // 2 aligned rows
313
314            // Write test depth values
315            // Row 0: [0.5, 0.6]
316            data[0..4].copy_from_slice(&0.5f32.to_le_bytes());
317            data[4..8].copy_from_slice(&0.6f32.to_le_bytes());
318            // Row 1: [0.7, 0.8]
319            data[256..260].copy_from_slice(&0.7f32.to_le_bytes());
320            data[260..264].copy_from_slice(&0.8f32.to_le_bytes());
321
322            let depth = extract_depth_with_alignment(&data, width, height);
323            assert_eq!(depth.len(), 4);
324            assert!((depth[0] - 0.5).abs() < 0.001);
325            assert!((depth[1] - 0.6).abs() < 0.001);
326            assert!((depth[2] - 0.7).abs() < 0.001);
327            assert!((depth[3] - 0.8).abs() < 0.001);
328        }
329
330        #[test]
331        fn test_reverse_z_depth_at_near_plane() {
332            // Near plane should give near value
333            let near = 0.01;
334            let far = 100.0;
335            let depth = reverse_z_to_linear_depth(1.0, near, far);
336            assert!((depth - near).abs() < 0.0001);
337        }
338
339        #[test]
340        fn test_reverse_z_depth_at_far_plane() {
341            // Far plane (ndc=0) should give far value
342            let near = 0.01;
343            let far = 100.0;
344            let depth = reverse_z_to_linear_depth(0.0, near, far);
345            assert!((depth - far).abs() < 0.0001);
346        }
347
348        #[test]
349        fn test_reverse_z_monotonic() {
350            // Depth should increase as NDC decreases (reverse-Z)
351            let near = 0.01;
352            let far = 10.0;
353
354            let mut prev_depth = 0.0;
355            for i in (0..=100).rev() {
356                let ndc = i as f32 / 100.0;
357                let depth = reverse_z_to_linear_depth(ndc, near, far);
358                assert!(
359                    depth >= prev_depth,
360                    "Depth should be monotonic: ndc={}, depth={}, prev={}",
361                    ndc,
362                    depth,
363                    prev_depth
364                );
365                prev_depth = depth;
366            }
367        }
368
369        #[test]
370        fn test_convert_depth_to_linear_batch() {
371            let near = 0.01f32;
372            let far = 10.0f32;
373            let ndc_depths = vec![1.0f32, 0.5, 0.1, 0.0];
374
375            let linear = convert_depth_to_linear(&ndc_depths, near, far);
376
377            assert_eq!(linear.len(), 4);
378            // Near plane
379            assert!((linear[0] - near as f64).abs() < 0.001);
380            // Far plane
381            assert!((linear[3] - far as f64).abs() < 0.001);
382            // All should be in range [near, far]
383            for d in &linear {
384                assert!(*d >= near as f64 && *d <= far as f64);
385            }
386        }
387
388        #[test]
389        fn test_align_byte_size_edge_cases() {
390            // Powers of two should stay the same if multiple of 256
391            assert_eq!(align_byte_size(256), 256);
392            assert_eq!(align_byte_size(512), 512);
393            assert_eq!(align_byte_size(1024), 1024);
394
395            // Just under 256 should round up to 256
396            assert_eq!(align_byte_size(255), 256);
397            assert_eq!(align_byte_size(128), 256);
398
399            // Just over 256 should round up to 512
400            assert_eq!(align_byte_size(300), 512);
401        }
402
403        #[test]
404        fn test_extract_depth_64x64() {
405            // Test with TBP default resolution
406            let width = 64u32;
407            let height = 64u32;
408            let bytes_per_pixel = 4u32;
409            let padded_row = align_byte_size(width * bytes_per_pixel);
410
411            // Create aligned buffer
412            let mut data = vec![0u8; (padded_row * height) as usize];
413
414            // Fill with incrementing values
415            for y in 0..height {
416                for x in 0..width {
417                    let value = (y * width + x) as f32 / (width * height) as f32;
418                    let offset = (y * padded_row + x * bytes_per_pixel) as usize;
419                    data[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
420                }
421            }
422
423            let depth = extract_depth_with_alignment(&data, width, height);
424            assert_eq!(depth.len(), (width * height) as usize);
425
426            // Verify first and last values
427            assert!((depth[0] - 0.0).abs() < 0.001);
428            let expected_last = (width * height - 1) as f32 / (width * height) as f32;
429            assert!((depth[(width * height - 1) as usize] - expected_last).abs() < 0.001);
430        }
431    }
432}
433
434// ============================================================================
435// Depth Readback Render Node
436// ============================================================================
437
438/// Label for the depth readback render graph node.
439#[derive(Debug, Hash, PartialEq, Eq, Clone, bevy::render::render_graph::RenderLabel)]
440struct DepthReadbackLabel;
441
442/// Render node that copies the main camera's depth texture to a staging buffer.
443/// This runs after the main pass completes, using ViewDepthTexture.
444#[derive(Default)]
445struct DepthReadbackNode;
446
447impl ViewNode for DepthReadbackNode {
448    type ViewQuery = (&'static ViewDepthTexture, &'static ExtractedCamera);
449
450    fn run<'w>(
451        &self,
452        _graph: &mut RenderGraphContext,
453        render_context: &mut RenderContext<'w>,
454        (view_depth_texture, camera): QueryItem<'w, Self::ViewQuery>,
455        world: &'w World,
456    ) -> Result<(), NodeRunError> {
457        let trace = render_trace_enabled();
458        let t0 = trace.then(std::time::Instant::now);
459
460        // Check if depth capture is requested
461        let Some(request) = world.get_resource::<DepthCaptureRequest>() else {
462            return Ok(());
463        };
464        if !request.requested {
465            return Ok(());
466        }
467
468        // Get the pending queue
469        let Some(queue) = world.get_resource::<PendingDepthCaptureQueue>() else {
470            return Ok(());
471        };
472
473        // Get texture size from camera viewport or physical size
474        let Some(physical_size) = camera.physical_target_size else {
475            return Ok(());
476        };
477        let width = physical_size.x;
478        let height = physical_size.y;
479
480        let render_device = world.resource::<RenderDevice>();
481
482        // Calculate aligned buffer size (wgpu requires 256-byte row alignment)
483        let bytes_per_pixel = 4u32; // f32 = 4 bytes (Depth32Float)
484        let unpadded_bytes_per_row = width * bytes_per_pixel;
485        let padded_bytes_per_row = depth_helpers::align_byte_size(unpadded_bytes_per_row);
486        let buffer_size = (padded_bytes_per_row * height) as u64;
487
488        // Create staging buffer for CPU readback
489        let staging_buffer = render_device.create_buffer(&BufferDescriptor {
490            label: Some("depth_staging_buffer"),
491            size: buffer_size,
492            usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
493            mapped_at_creation: false,
494        });
495
496        // Copy depth texture to staging buffer
497        let encoder = render_context.command_encoder();
498        encoder.copy_texture_to_buffer(
499            ImageCopyTexture {
500                texture: &view_depth_texture.texture,
501                mip_level: 0,
502                origin: Origin3d::ZERO,
503                aspect: TextureAspect::DepthOnly,
504            },
505            ImageCopyBuffer {
506                buffer: &staging_buffer,
507                layout: ImageDataLayout {
508                    offset: 0,
509                    bytes_per_row: Some(padded_bytes_per_row),
510                    rows_per_image: Some(height),
511                },
512            },
513            Extent3d {
514                width,
515                height,
516                depth_or_array_layers: 1,
517            },
518        );
519
520        // Push to queue for async processing (queue is Arc<Mutex<Vec>>)
521        if let Ok(mut pending) = queue.0.lock() {
522            pending.push(PendingDepthCapture {
523                buffer: staging_buffer,
524                width,
525                height,
526                near: request.near,
527                far: request.far,
528            });
529        }
530
531        if let Some(t0) = t0 {
532            eprintln!(
533                "[render_trace][node] DepthReadbackNode ms={:.3}",
534                t0.elapsed().as_secs_f64() * 1000.0
535            );
536        }
537
538        Ok(())
539    }
540}
541
542// ============================================================================
543// Depth Readback Plugin
544// ============================================================================
545
546/// Plugin that sets up depth buffer readback from the GPU.
547struct DepthReadbackPlugin {
548    shared_depth: SharedDepthBuffer,
549    near: f32,
550    far: f32,
551}
552
553impl Plugin for DepthReadbackPlugin {
554    fn build(&self, app: &mut App) {
555        use bevy::core_pipeline::core_3d::graph::Core3d;
556        use bevy::core_pipeline::core_3d::graph::Node3d;
557
558        // Insert shared depth buffer in main app
559        app.insert_resource(self.shared_depth.clone());
560        app.insert_resource(DepthCaptureRequest {
561            requested: false,
562            near: self.near,
563            far: self.far,
564        });
565
566        // Get render app
567        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
568            eprintln!("Failed to get RenderApp for depth readback");
569            return;
570        };
571
572        // Insert resources in render world
573        render_app.insert_resource(self.shared_depth.clone());
574        render_app.init_resource::<PendingDepthCaptureQueue>();
575
576        // Add extraction system to copy request from main world
577        render_app.add_systems(ExtractSchedule, extract_depth_request);
578
579        // Add system to process completed depth captures
580        render_app.add_systems(Render, collect_depth_captures.in_set(RenderSet::Cleanup));
581
582        // Register the depth readback node in the render graph
583        // Run after main pass completes (depth buffer is ready) but before tonemapping
584        render_app
585            .add_render_graph_node::<ViewNodeRunner<DepthReadbackNode>>(Core3d, DepthReadbackLabel)
586            .add_render_graph_edges(
587                Core3d,
588                (Node3d::EndMainPass, DepthReadbackLabel, Node3d::Tonemapping),
589            );
590    }
591}
592
593/// Extract depth capture request from main world to render world
594fn extract_depth_request(mut commands: Commands, request: Extract<Res<DepthCaptureRequest>>) {
595    commands.insert_resource(DepthCaptureRequest {
596        requested: request.requested,
597        near: request.near,
598        far: request.far,
599    });
600}
601
602/// Process completed depth buffer captures (synchronous GPU-to-CPU readback with device polling)
603fn collect_depth_captures(
604    queue: Res<PendingDepthCaptureQueue>,
605    shared_depth: Res<SharedDepthBuffer>,
606    render_device: Res<RenderDevice>,
607) {
608    let trace = render_trace_enabled();
609    let t_sys = trace.then(std::time::Instant::now);
610
611    // Take all pending captures from the queue
612    let pending_captures = {
613        let Ok(mut pending) = queue.0.lock() else {
614            return;
615        };
616        std::mem::take(&mut *pending)
617    };
618
619    if pending_captures.is_empty() {
620        if let Some(t0) = t_sys {
621            eprintln!(
622                "[render_trace][sys] collect_depth_captures empty ms={:.3}",
623                t0.elapsed().as_secs_f64() * 1000.0
624            );
625        }
626        return;
627    }
628
629    let pending_count = pending_captures.len();
630
631    // Process each pending capture synchronously with device polling
632    for pending in pending_captures {
633        let width = pending.width;
634        let height = pending.height;
635        let near = pending.near;
636        let far = pending.far;
637        let buffer = pending.buffer;
638        let shared = shared_depth.0.clone();
639
640        // Use blocking sync approach with device polling (same as RGBA capture)
641        let buffer_slice = buffer.slice(..);
642
643        // Request mapping
644        let (tx, rx) = std::sync::mpsc::channel();
645        buffer_slice.map_async(MapMode::Read, move |result| {
646            let _ = tx.send(result);
647        });
648
649        let t_wait = trace.then(std::time::Instant::now);
650        let mut poll_iters: u32 = 0;
651
652        // Poll the device until mapping completes
653        loop {
654            render_device.poll(bevy::render::render_resource::Maintain::Poll);
655            poll_iters += 1;
656            match rx.try_recv() {
657                Ok(Ok(())) => {
658                    let data = buffer_slice.get_mapped_range();
659
660                    // Extract depth values with alignment handling
661                    let ndc_depth =
662                        depth_helpers::extract_depth_with_alignment(&data, width, height);
663
664                    drop(data);
665                    buffer.unmap();
666
667                    // Convert from reverse-Z NDC to linear depth in meters
668                    let linear_depth =
669                        depth_helpers::convert_depth_to_linear(&ndc_depth, near, far);
670
671                    // Store in shared buffer
672                    if let Ok(mut guard) = shared.lock() {
673                        *guard = Some((linear_depth, width, height));
674                    }
675                    break;
676                }
677                Ok(Err(e)) => {
678                    eprintln!("Failed to map depth buffer: {:?}", e);
679                    break;
680                }
681                Err(std::sync::mpsc::TryRecvError::Empty) => {
682                    // Keep polling
683                    std::thread::sleep(std::time::Duration::from_millis(1));
684                }
685                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
686                    eprintln!("Depth buffer mapping channel disconnected");
687                    break;
688                }
689            }
690        }
691
692        if let Some(t_wait) = t_wait {
693            eprintln!(
694                "[render_trace][sys] collect_depth_captures mapping_wait poll_iters={} ms={:.3}",
695                poll_iters,
696                t_wait.elapsed().as_secs_f64() * 1000.0
697            );
698        }
699    }
700
701    if let Some(t0) = t_sys {
702        eprintln!(
703            "[render_trace][sys] collect_depth_captures done pending={} ms={:.3}",
704            pending_count,
705            t0.elapsed().as_secs_f64() * 1000.0
706        );
707    }
708}
709
710// ============================================================================
711// Image Copy Infrastructure (for headless rendering)
712// ============================================================================
713
714/// Label for the image copy render graph node
715#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)]
716struct ImageCopyLabel;
717
718/// Component that marks an image for GPU-to-CPU copying
719#[derive(Component, Clone)]
720struct ImageCopier {
721    /// Handle to the source image (render target)
722    src_image: Handle<Image>,
723    /// Whether to capture on this frame
724    enabled: bool,
725}
726
727/// Resource containing all ImageCopiers for the render world
728#[derive(Resource, Default)]
729struct ImageCopiers(Vec<ImageCopier>);
730
731/// Pending image capture for async processing
732struct PendingImageCapture {
733    buffer: Buffer,
734    width: u32,
735    height: u32,
736    padded_bytes_per_row: u32,
737}
738
739/// Queue for pending image captures
740#[derive(Resource, Default)]
741struct PendingImageCaptureQueue(Arc<Mutex<Vec<PendingImageCapture>>>);
742
743/// Shared buffer for captured RGBA data
744#[derive(Resource, Clone, Default)]
745#[allow(clippy::type_complexity)]
746struct SharedRgbaBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
747
748/// Render graph node that copies render target images to staging buffers
749struct ImageCopyDriver;
750
751impl Node for ImageCopyDriver {
752    fn run(
753        &self,
754        _graph: &mut RenderGraphContext,
755        _render_context: &mut RenderContext,
756        world: &World,
757    ) -> Result<(), NodeRunError> {
758        let trace = render_trace_enabled();
759        let t0 = trace.then(std::time::Instant::now);
760
761        let Some(image_copiers) = world.get_resource::<ImageCopiers>() else {
762            return Ok(());
763        };
764
765        let Some(gpu_images) = world.get_resource::<RenderAssets<GpuImage>>() else {
766            return Ok(());
767        };
768
769        let Some(queue) = world.get_resource::<PendingImageCaptureQueue>() else {
770            return Ok(());
771        };
772
773        let render_device = world.resource::<RenderDevice>();
774
775        let Some(render_queue) = world.get_resource::<RenderQueue>() else {
776            return Ok(());
777        };
778
779        for image_copier in image_copiers.0.iter() {
780            if !image_copier.enabled {
781                continue;
782            }
783
784            let Some(gpu_image) = gpu_images.get(&image_copier.src_image) else {
785                continue;
786            };
787
788            let width = gpu_image.size.x;
789            let height = gpu_image.size.y;
790
791            // Calculate padded bytes per row (wgpu requires 256-byte alignment)
792            let block_dimensions = gpu_image.texture_format.block_dimensions();
793            let block_size = gpu_image.texture_format.block_copy_size(None).unwrap_or(4); // Default to 4 bytes for RGBA8
794
795            let padded_bytes_per_row = RenderDevice::align_copy_bytes_per_row(
796                (width as usize / block_dimensions.0 as usize) * block_size as usize,
797            );
798
799            let buffer_size = (padded_bytes_per_row * height as usize) as u64;
800
801            // Create staging buffer for CPU readback
802            let staging_buffer = render_device.create_buffer(&BufferDescriptor {
803                label: Some("image_copy_staging_buffer"),
804                size: buffer_size,
805                usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
806                mapped_at_creation: false,
807            });
808
809            // Create command encoder for the copy operation
810            let mut encoder =
811                render_device.create_command_encoder(&CommandEncoderDescriptor::default());
812
813            let texture_extent = Extent3d {
814                width,
815                height,
816                depth_or_array_layers: 1,
817            };
818
819            // Copy texture to buffer
820            encoder.copy_texture_to_buffer(
821                gpu_image.texture.as_image_copy(),
822                ImageCopyBuffer {
823                    buffer: &staging_buffer,
824                    layout: ImageDataLayout {
825                        offset: 0,
826                        bytes_per_row: Some(padded_bytes_per_row as u32),
827                        rows_per_image: None,
828                    },
829                },
830                texture_extent,
831            );
832
833            // Submit the copy command
834            render_queue.submit(std::iter::once(encoder.finish()));
835
836            // Queue for async processing
837            if let Ok(mut pending) = queue.0.lock() {
838                pending.push(PendingImageCapture {
839                    buffer: staging_buffer,
840                    width,
841                    height,
842                    padded_bytes_per_row: padded_bytes_per_row as u32,
843                });
844            }
845        }
846
847        if let Some(t0) = t0 {
848            eprintln!(
849                "[render_trace][node] ImageCopyDriver ms={:.3}",
850                t0.elapsed().as_secs_f64() * 1000.0
851            );
852        }
853
854        Ok(())
855    }
856}
857
858/// Extract ImageCopier components to render world
859fn extract_image_copiers(mut commands: Commands, query: Extract<Query<&ImageCopier>>) {
860    commands.insert_resource(ImageCopiers(query.iter().cloned().collect()));
861}
862
863/// Process completed image captures
864fn collect_image_captures(
865    queue: Res<PendingImageCaptureQueue>,
866    shared_rgba: Res<SharedRgbaBuffer>,
867    render_device: Res<RenderDevice>,
868) {
869    let trace = render_trace_enabled();
870    let t_sys = trace.then(std::time::Instant::now);
871
872    let pending_captures = {
873        let Ok(mut pending) = queue.0.lock() else {
874            return;
875        };
876        std::mem::take(&mut *pending)
877    };
878
879    if pending_captures.is_empty() {
880        if let Some(t0) = t_sys {
881            eprintln!(
882                "[render_trace][sys] collect_image_captures empty ms={:.3}",
883                t0.elapsed().as_secs_f64() * 1000.0
884            );
885        }
886        return;
887    }
888
889    let pending_count = pending_captures.len();
890
891    for pending in pending_captures {
892        let width = pending.width;
893        let height = pending.height;
894        let padded_bytes_per_row = pending.padded_bytes_per_row;
895        let buffer = pending.buffer;
896        let shared = shared_rgba.0.clone();
897
898        // Use blocking sync approach with device polling
899        let buffer_slice = buffer.slice(..);
900
901        // Request mapping
902        let (tx, rx) = std::sync::mpsc::channel();
903        buffer_slice.map_async(MapMode::Read, move |result| {
904            let _ = tx.send(result);
905        });
906
907        // Poll the device until mapping completes (with timeout)
908        let start = std::time::Instant::now();
909        let timeout = std::time::Duration::from_secs(10);
910        let mut poll_iters: u32 = 0;
911        loop {
912            render_device.poll(bevy::render::render_resource::Maintain::Poll);
913            poll_iters += 1;
914
915            if start.elapsed() > timeout {
916                eprintln!(
917                    "Warning: Buffer mapping timeout after {:?}",
918                    start.elapsed()
919                );
920                break;
921            }
922
923            match rx.try_recv() {
924                Ok(Ok(())) => {
925                    let data = buffer_slice.get_mapped_range();
926
927                    // Extract pixels with alignment handling
928                    let bytes_per_pixel = 4u32;
929                    let actual_row_bytes = (width * bytes_per_pixel) as usize;
930                    let padded_row_bytes = padded_bytes_per_row as usize;
931
932                    let mut rgba = Vec::with_capacity((width * height * 4) as usize);
933                    for y in 0..height as usize {
934                        let row_start = y * padded_row_bytes;
935                        rgba.extend_from_slice(&data[row_start..row_start + actual_row_bytes]);
936                    }
937
938                    drop(data);
939                    buffer.unmap();
940
941                    if let Ok(mut guard) = shared.lock() {
942                        *guard = Some((rgba, width, height));
943                    }
944                    break;
945                }
946                Ok(Err(e)) => {
947                    eprintln!("Failed to map image buffer: {:?}", e);
948                    break;
949                }
950                Err(std::sync::mpsc::TryRecvError::Empty) => {
951                    // Keep polling
952                    std::thread::sleep(std::time::Duration::from_millis(1));
953                }
954                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
955                    eprintln!("Image buffer mapping channel disconnected");
956                    break;
957                }
958            }
959        }
960
961        if trace {
962            eprintln!(
963                "[render_trace][sys] collect_image_captures mapping_wait poll_iters={} ms={:.3}",
964                poll_iters,
965                start.elapsed().as_secs_f64() * 1000.0
966            );
967        }
968    }
969
970    if let Some(t0) = t_sys {
971        eprintln!(
972            "[render_trace][sys] collect_image_captures done pending={} ms={:.3}",
973            pending_count,
974            t0.elapsed().as_secs_f64() * 1000.0
975        );
976    }
977}
978
979/// Plugin for headless image copy
980struct ImageCopyPlugin {
981    shared_rgba: SharedRgbaBuffer,
982}
983
984impl Plugin for ImageCopyPlugin {
985    fn build(&self, app: &mut App) {
986        use bevy::render::render_graph::RenderGraph;
987
988        app.insert_resource(self.shared_rgba.clone());
989
990        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
991            return;
992        };
993
994        render_app.insert_resource(self.shared_rgba.clone());
995        render_app.init_resource::<ImageCopiers>();
996        render_app.init_resource::<PendingImageCaptureQueue>();
997
998        render_app.add_systems(ExtractSchedule, extract_image_copiers);
999        render_app.add_systems(Render, collect_image_captures.in_set(RenderSet::Cleanup));
1000
1001        // Add image copy node to render graph (runs after camera driver)
1002        let mut graph = render_app.world_mut().resource_mut::<RenderGraph>();
1003        graph.add_node(ImageCopyLabel, ImageCopyDriver);
1004        graph.add_node_edge(bevy::render::graph::CameraDriverLabel, ImageCopyLabel);
1005    }
1006}
1007
1008// ============================================================================
1009// Render Request and Components
1010// ============================================================================
1011
1012/// Configuration passed to the Bevy app
1013#[derive(Resource, Clone)]
1014struct RenderRequest {
1015    mesh_path: String,
1016    texture_path: String,
1017    camera_transform: Transform,
1018    object_rotation: ObjectRotation,
1019    config: RenderConfig,
1020}
1021
1022/// Marker for the rendered object
1023#[derive(Component)]
1024struct RenderedObject;
1025
1026/// Marker for the render camera
1027#[derive(Component)]
1028struct RenderCamera;
1029
1030/// Handle for the loaded texture
1031#[derive(Resource)]
1032struct LoadedTexture(Handle<Image>);
1033
1034/// Handle for the loaded scene
1035#[derive(Resource)]
1036struct LoadedScene(Handle<Scene>);
1037
1038/// Shared output for extracting render results
1039#[derive(Resource, Clone)]
1040struct SharedOutput(Arc<Mutex<Option<RenderOutput>>>);
1041
1042/// Handle for the render target image
1043#[derive(Resource)]
1044#[allow(dead_code)]
1045struct RenderTargetImage(Handle<Image>);
1046
1047/// Tracks progress for a homogeneous batch of viewpoints rendered in one app.
1048#[derive(Resource)]
1049struct HeadlessBatchSequence {
1050    viewpoints: Vec<Transform>,
1051    current_index: usize,
1052    outputs: Vec<RenderOutput>,
1053    warmup_frames_remaining: u32,
1054    done: bool,
1055}
1056
1057impl HeadlessBatchSequence {
1058    fn new(viewpoints: Vec<Transform>) -> Self {
1059        let capacity = viewpoints.len();
1060        Self {
1061            viewpoints,
1062            current_index: 0,
1063            outputs: Vec::with_capacity(capacity),
1064            warmup_frames_remaining: 0,
1065            done: capacity == 0,
1066        }
1067    }
1068
1069    fn current_viewpoint(&self) -> Option<Transform> {
1070        self.viewpoints.get(self.current_index).cloned()
1071    }
1072}
1073
1074/// Perform headless rendering of a YCB object.
1075///
1076/// This uses true headless GPU rendering via `RenderTarget::Image`, which does NOT
1077/// require any window surfaces. This should work on WSL2 and other environments
1078/// without display servers.
1079///
1080/// Note: Bevy's App::run() does not return cleanly. A watchdog thread monitors
1081/// for results and terminates the process once the render is complete.
1082#[allow(dead_code)]
1083pub fn render_headless(
1084    object_dir: &Path,
1085    camera_transform: &Transform,
1086    object_rotation: &ObjectRotation,
1087    config: &RenderConfig,
1088) -> Result<RenderOutput, RenderError> {
1089    // Canonicalize paths so Bevy's asset server can find them regardless of
1090    // caller working directory. Relative paths like "../../ycb" pass the
1091    // exists() check but Bevy resolves assets against its own root.
1092    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1093        RenderError::RenderFailed(format!(
1094            "Cannot canonicalize object directory {}: {}",
1095            object_dir.display(),
1096            e
1097        ))
1098    })?;
1099    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1100    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1101
1102    if !mesh_path.exists() {
1103        return Err(RenderError::MeshNotFound(mesh_path.display().to_string()));
1104    }
1105    if !texture_path.exists() {
1106        return Err(RenderError::TextureNotFound(
1107            texture_path.display().to_string(),
1108        ));
1109    }
1110
1111    let request = RenderRequest {
1112        mesh_path: mesh_path.display().to_string(),
1113        texture_path: texture_path.display().to_string(),
1114        camera_transform: *camera_transform,
1115        object_rotation: object_rotation.clone(),
1116        config: config.clone(),
1117    };
1118
1119    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
1120    let output_clone = shared_output.clone();
1121
1122    // Shared buffer for RGBA data from headless render target
1123    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1124
1125    // Shared buffer for depth readback
1126    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1127
1128    // Create a temp file path for fallback output serialization
1129    let temp_path =
1130        std::env::temp_dir().join(format!("bevy_sensor_render_{}.bin", std::process::id()));
1131
1132    // Spawn watchdog thread that monitors for timeout (don't exit - let Bevy exit gracefully)
1133    let output_poll_for_timeout = shared_output.clone();
1134    std::thread::spawn(move || {
1135        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1136        let start = std::time::Instant::now();
1137        let poll_interval = std::time::Duration::from_millis(100);
1138
1139        loop {
1140            // Check if we have a result
1141            if let Ok(guard) = output_poll_for_timeout.0.lock() {
1142                if guard.is_some() {
1143                    // Output is ready, Bevy will exit via AppExit event
1144                    return; // Exit watchdog thread, Bevy will handle exit
1145                }
1146            }
1147
1148            if start.elapsed() > timeout {
1149                eprintln!(
1150                    "Error: Render timeout after {} seconds",
1151                    RENDER_TIMEOUT_SECS
1152                );
1153                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
1154                // Force exit on timeout (this is a failure case)
1155                std::process::exit(1);
1156            }
1157
1158            std::thread::sleep(poll_interval);
1159        }
1160    });
1161
1162    // Run Bevy app with HEADLESS configuration (no window surfaces!)
1163    // Uses ScheduleRunnerPlugin instead of WinitPlugin
1164    build_headless_app(request, output_clone, shared_rgba, shared_depth).run();
1165
1166    // App::run() returned - check shared_output for result
1167    if let Ok(guard) = shared_output.0.lock() {
1168        if let Some(output) = guard.as_ref() {
1169            return Ok(output.clone());
1170        }
1171    }
1172
1173    // Fallback: try to read from temp file (for legacy compatibility)
1174    if temp_path.exists() {
1175        if let Ok(output) = read_output_from_file(&temp_path) {
1176            let _ = std::fs::remove_file(&temp_path);
1177            return Ok(output);
1178        }
1179    }
1180
1181    Err(RenderError::RenderFailed(
1182        "Render did not complete".to_string(),
1183    ))
1184}
1185
1186/// Render a homogeneous sequence of viewpoints in a single headless Bevy app.
1187///
1188/// All captures share the same object, object rotation, and render configuration.
1189/// This is the fast path used by the batch API for episode-style workloads.
1190pub fn render_headless_sequence(
1191    object_dir: &Path,
1192    viewpoints: &[Transform],
1193    object_rotation: &ObjectRotation,
1194    config: &RenderConfig,
1195) -> Result<Vec<RenderOutput>, RenderError> {
1196    if viewpoints.is_empty() {
1197        return Ok(Vec::new());
1198    }
1199
1200    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1201        RenderError::RenderFailed(format!(
1202            "Cannot canonicalize object directory {}: {}",
1203            object_dir.display(),
1204            e
1205        ))
1206    })?;
1207    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
1208    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
1209
1210    if !mesh_path.exists() {
1211        return Err(RenderError::MeshNotFound(mesh_path.display().to_string()));
1212    }
1213    if !texture_path.exists() {
1214        return Err(RenderError::TextureNotFound(
1215            texture_path.display().to_string(),
1216        ));
1217    }
1218
1219    let request = RenderRequest {
1220        mesh_path: mesh_path.display().to_string(),
1221        texture_path: texture_path.display().to_string(),
1222        camera_transform: viewpoints[0],
1223        object_rotation: object_rotation.clone(),
1224        config: config.clone(),
1225    };
1226
1227    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1228    let rgba_clone = shared_rgba.clone();
1229
1230    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1231    let depth_clone = shared_depth.clone();
1232
1233    let mut app = App::new();
1234    app.add_plugins(
1235        DefaultPlugins
1236            .set(WindowPlugin {
1237                primary_window: None,
1238                exit_condition: ExitCondition::DontExit,
1239                ..default()
1240            })
1241            .disable::<bevy::winit::WinitPlugin>()
1242            .disable::<LogPlugin>()
1243            .disable::<TerminalCtrlCHandlerPlugin>(),
1244    )
1245    .add_plugins(ObjPlugin)
1246    .add_plugins(ImageCopyPlugin {
1247        shared_rgba: rgba_clone,
1248    })
1249    .add_plugins(DepthReadbackPlugin {
1250        shared_depth: depth_clone,
1251        near: config.near_plane,
1252        far: config.far_plane,
1253    })
1254    .insert_resource(request)
1255    .insert_resource(shared_rgba)
1256    .insert_resource(HeadlessBatchSequence::new(viewpoints.to_vec()))
1257    .init_resource::<RenderState>()
1258    .add_systems(Startup, setup_headless_scene)
1259    .add_systems(
1260        Update,
1261        (
1262            check_assets_loaded,
1263            apply_materials,
1264            tick_headless_batch_warmup,
1265            request_headless_capture,
1266            check_headless_capture_ready,
1267            extract_and_continue_headless_batch,
1268        )
1269            .chain(),
1270    );
1271
1272    // Manual app.update() loops do not run plugin finish/cleanup hooks automatically.
1273    // Bevy's screenshot plugin inserts CapturedScreenshots during finish(), so run the
1274    // normal startup phases before driving the headless batch loop ourselves.
1275    let trace_outer = render_trace_enabled();
1276    let t_finish = std::time::Instant::now();
1277    app.finish();
1278    let finish_ms = t_finish.elapsed().as_secs_f64() * 1000.0;
1279    let t_cleanup = std::time::Instant::now();
1280    app.cleanup();
1281    let cleanup_ms = t_cleanup.elapsed().as_secs_f64() * 1000.0;
1282    if trace_outer {
1283        eprintln!(
1284            "[render_trace][coldinit] app.finish ms={:.3} app.cleanup ms={:.3}",
1285            finish_ms, cleanup_ms
1286        );
1287    }
1288
1289    let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1290    let start = std::time::Instant::now();
1291
1292    let trace = std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok();
1293    let mut update_idx: u32 = 0;
1294    let mut last_completed_outputs: usize = 0;
1295    let mut viewpoint_start = std::time::Instant::now();
1296
1297    loop {
1298        if start.elapsed() > timeout {
1299            return Err(RenderError::RenderTimeout {
1300                duration_secs: RENDER_TIMEOUT_SECS,
1301            });
1302        }
1303
1304        let update_start = std::time::Instant::now();
1305        app.update();
1306        let update_elapsed_ms = update_start.elapsed().as_secs_f64() * 1000.0;
1307
1308        if trace {
1309            let batch = app.world().resource::<HeadlessBatchSequence>();
1310            let warmup = batch.warmup_frames_remaining;
1311            let current = batch.current_index;
1312            let completed = batch.outputs.len();
1313            let vp_ms = viewpoint_start.elapsed().as_secs_f64() * 1000.0;
1314            eprintln!(
1315                "[render_trace] update={update_idx} vp={current} warmup={warmup} \
1316                 completed={completed} update_ms={update_elapsed_ms:.2} vp_ms={vp_ms:.2}"
1317            );
1318            if completed > last_completed_outputs {
1319                eprintln!(
1320                    "[render_trace] viewpoint {} finished in {:.2} ms",
1321                    completed - 1,
1322                    vp_ms
1323                );
1324                last_completed_outputs = completed;
1325                viewpoint_start = std::time::Instant::now();
1326            }
1327        }
1328
1329        update_idx += 1;
1330
1331        if app.world().resource::<HeadlessBatchSequence>().done {
1332            break;
1333        }
1334    }
1335
1336    if trace {
1337        eprintln!(
1338            "[render_trace] total_wall_ms={:.2} updates={update_idx} viewpoints={}",
1339            start.elapsed().as_secs_f64() * 1000.0,
1340            viewpoints.len()
1341        );
1342    }
1343
1344    let mut batch = app.world_mut().resource_mut::<HeadlessBatchSequence>();
1345    if batch.outputs.len() != viewpoints.len() {
1346        return Err(RenderError::RenderFailed(format!(
1347            "Batch render produced {} outputs for {} viewpoints",
1348            batch.outputs.len(),
1349            viewpoints.len()
1350        )));
1351    }
1352
1353    Ok(std::mem::take(&mut batch.outputs))
1354}
1355
1356/// Assemble the shared single-render headless Bevy app.
1357fn build_headless_app(
1358    request: RenderRequest,
1359    shared_output: SharedOutput,
1360    shared_rgba: SharedRgbaBuffer,
1361    shared_depth: SharedDepthBuffer,
1362) -> App {
1363    let near = request.config.near_plane;
1364    let far = request.config.far_plane;
1365
1366    let mut app = App::new();
1367    app.add_plugins(
1368        DefaultPlugins
1369            .set(WindowPlugin {
1370                primary_window: None,
1371                exit_condition: ExitCondition::DontExit,
1372                ..default()
1373            })
1374            .disable::<bevy::winit::WinitPlugin>()
1375            .disable::<LogPlugin>()
1376            .disable::<TerminalCtrlCHandlerPlugin>(),
1377    )
1378    .add_plugins(ScheduleRunnerPlugin::run_loop(Duration::from_secs_f64(
1379        1.0 / 60.0,
1380    )))
1381    .add_plugins(ObjPlugin)
1382    .add_plugins(ImageCopyPlugin {
1383        shared_rgba: shared_rgba.clone(),
1384    })
1385    .add_plugins(DepthReadbackPlugin {
1386        shared_depth,
1387        near,
1388        far,
1389    })
1390    .insert_resource(request)
1391    .insert_resource(shared_output)
1392    .insert_resource(shared_rgba)
1393    .init_resource::<RenderState>()
1394    .add_systems(Startup, setup_headless_scene)
1395    .add_systems(
1396        Update,
1397        (
1398            check_assets_loaded,
1399            apply_materials,
1400            request_headless_capture,
1401            check_headless_capture_ready,
1402            extract_and_exit_headless,
1403        )
1404            .chain(),
1405    );
1406    app
1407}
1408
1409/// Serialize RenderOutput to bytes for IPC (used by subprocess mode)
1410#[allow(dead_code)]
1411fn serialize_output(output: &RenderOutput) -> Vec<u8> {
1412    let mut data = Vec::new();
1413
1414    // Header: width, height, rgba_len, depth_len
1415    data.extend_from_slice(&output.width.to_le_bytes());
1416    data.extend_from_slice(&output.height.to_le_bytes());
1417    data.extend_from_slice(&(output.rgba.len() as u32).to_le_bytes());
1418    data.extend_from_slice(&(output.depth.len() as u32).to_le_bytes());
1419
1420    // RGBA data
1421    data.extend_from_slice(&output.rgba);
1422
1423    // Depth data (as f64 bytes for TBP precision)
1424    for d in &output.depth {
1425        data.extend_from_slice(&d.to_le_bytes());
1426    }
1427
1428    // Intrinsics (f64 for TBP precision)
1429    data.extend_from_slice(&output.intrinsics.focal_length[0].to_le_bytes());
1430    data.extend_from_slice(&output.intrinsics.focal_length[1].to_le_bytes());
1431    data.extend_from_slice(&output.intrinsics.principal_point[0].to_le_bytes());
1432    data.extend_from_slice(&output.intrinsics.principal_point[1].to_le_bytes());
1433    data.extend_from_slice(&output.intrinsics.image_size[0].to_le_bytes());
1434    data.extend_from_slice(&output.intrinsics.image_size[1].to_le_bytes());
1435
1436    // Camera transform (translation + rotation quaternion)
1437    let t = output.camera_transform.translation;
1438    let r = output.camera_transform.rotation;
1439    data.extend_from_slice(&t.x.to_le_bytes());
1440    data.extend_from_slice(&t.y.to_le_bytes());
1441    data.extend_from_slice(&t.z.to_le_bytes());
1442    data.extend_from_slice(&r.x.to_le_bytes());
1443    data.extend_from_slice(&r.y.to_le_bytes());
1444    data.extend_from_slice(&r.z.to_le_bytes());
1445    data.extend_from_slice(&r.w.to_le_bytes());
1446
1447    // Object rotation (f64)
1448    let or = &output.object_rotation;
1449    data.extend_from_slice(&or.pitch.to_le_bytes());
1450    data.extend_from_slice(&or.yaw.to_le_bytes());
1451    data.extend_from_slice(&or.roll.to_le_bytes());
1452
1453    data
1454}
1455
1456/// Read RenderOutput from serialized file
1457fn read_output_from_file(path: &std::path::Path) -> Result<RenderOutput, RenderError> {
1458    let mut file = File::open(path).map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1459    let mut data = Vec::new();
1460    file.read_to_end(&mut data)
1461        .map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1462
1463    let mut cursor = 0;
1464
1465    let read_u32 = |data: &[u8], cursor: &mut usize| -> u32 {
1466        let val = u32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1467        *cursor += 4;
1468        val
1469    };
1470
1471    let read_f32 = |data: &[u8], cursor: &mut usize| -> f32 {
1472        let val = f32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1473        *cursor += 4;
1474        val
1475    };
1476
1477    let read_f64 = |data: &[u8], cursor: &mut usize| -> f64 {
1478        let val = f64::from_le_bytes(data[*cursor..*cursor + 8].try_into().unwrap());
1479        *cursor += 8;
1480        val
1481    };
1482
1483    let width = read_u32(&data, &mut cursor);
1484    let height = read_u32(&data, &mut cursor);
1485    let rgba_len = read_u32(&data, &mut cursor) as usize;
1486    let depth_len = read_u32(&data, &mut cursor) as usize;
1487
1488    let rgba = data[cursor..cursor + rgba_len].to_vec();
1489    cursor += rgba_len;
1490
1491    // Depth data (f64 for TBP precision)
1492    let mut depth = Vec::with_capacity(depth_len);
1493    for _ in 0..depth_len {
1494        depth.push(read_f64(&data, &mut cursor));
1495    }
1496
1497    // Intrinsics (f64 for TBP precision)
1498    let focal_length = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1499    let principal_point = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1500    let image_size = [read_u32(&data, &mut cursor), read_u32(&data, &mut cursor)];
1501
1502    // Camera transform (f32 for Bevy compatibility)
1503    let tx = read_f32(&data, &mut cursor);
1504    let ty = read_f32(&data, &mut cursor);
1505    let tz = read_f32(&data, &mut cursor);
1506    let rx = read_f32(&data, &mut cursor);
1507    let ry = read_f32(&data, &mut cursor);
1508    let rz = read_f32(&data, &mut cursor);
1509    let rw = read_f32(&data, &mut cursor);
1510
1511    // Object rotation (f64)
1512    let pitch = read_f64(&data, &mut cursor);
1513    let yaw = read_f64(&data, &mut cursor);
1514    let roll = read_f64(&data, &mut cursor);
1515
1516    Ok(RenderOutput {
1517        rgba,
1518        depth,
1519        width,
1520        height,
1521        intrinsics: crate::CameraIntrinsics {
1522            focal_length,
1523            principal_point,
1524            image_size,
1525        },
1526        camera_transform: Transform {
1527            translation: Vec3::new(tx, ty, tz),
1528            rotation: Quat::from_xyzw(rx, ry, rz, rw),
1529            scale: Vec3::ONE,
1530        },
1531        object_rotation: ObjectRotation { pitch, yaw, roll },
1532    })
1533}
1534
1535/// Setup the scene with camera, lighting, and object
1536#[allow(dead_code)]
1537fn setup_scene(
1538    mut commands: Commands,
1539    asset_server: Res<AssetServer>,
1540    request: Res<RenderRequest>,
1541    mut _materials: ResMut<Assets<StandardMaterial>>,
1542) {
1543    // Camera with depth prepass (Bevy 0.15+ uses Camera3d component)
1544    // Disable MSAA for depth readback compatibility (can't copy from multisampled texture)
1545    // Apply FOV from RenderConfig so the projection matches TBP's camera intrinsics.
1546    let fov = request.config.fov_radians();
1547    commands.spawn((
1548        Camera3d::default(),
1549        Camera {
1550            hdr: true,
1551            ..default()
1552        },
1553        Projection::Perspective(PerspectiveProjection {
1554            fov,
1555            near: request.config.near_plane,
1556            far: request.config.far_plane,
1557            ..default()
1558        }),
1559        Msaa::Off,
1560        request.camera_transform,
1561        Tonemapping::None, // Accurate colors for software rendering
1562        DepthPrepass,
1563        NormalPrepass,
1564        RenderCamera,
1565    ));
1566
1567    // Ambient light (from config)
1568    let lighting = &request.config.lighting;
1569    commands.insert_resource(AmbientLight {
1570        color: Color::WHITE,
1571        brightness: lighting.ambient_brightness,
1572    });
1573
1574    // Key light (from config) - Bevy 0.15+ uses PointLight component directly
1575    if lighting.key_light_intensity > 0.0 {
1576        commands.spawn((
1577            PointLight {
1578                intensity: lighting.key_light_intensity,
1579                shadows_enabled: lighting.shadows_enabled,
1580                ..default()
1581            },
1582            Transform::from_xyz(
1583                lighting.key_light_position[0],
1584                lighting.key_light_position[1],
1585                lighting.key_light_position[2],
1586            ),
1587        ));
1588    }
1589
1590    // Fill light (from config)
1591    if lighting.fill_light_intensity > 0.0 {
1592        commands.spawn((
1593            PointLight {
1594                intensity: lighting.fill_light_intensity,
1595                shadows_enabled: lighting.shadows_enabled,
1596                ..default()
1597            },
1598            Transform::from_xyz(
1599                lighting.fill_light_position[0],
1600                lighting.fill_light_position[1],
1601                lighting.fill_light_position[2],
1602            ),
1603        ));
1604    }
1605
1606    // Load the scene
1607    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
1608    commands.insert_resource(LoadedScene(scene_handle.clone()));
1609
1610    // Load the texture
1611    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
1612    commands.insert_resource(LoadedTexture(texture_handle.clone()));
1613
1614    // Create material with texture (will be applied later)
1615    let _material = _materials.add(StandardMaterial {
1616        base_color_texture: Some(texture_handle),
1617        unlit: true,
1618        ..default()
1619    });
1620
1621    // Spawn the scene with rotation (Bevy 0.15+ uses SceneRoot)
1622    commands.spawn((
1623        SceneRoot(scene_handle),
1624        Transform::from_rotation(request.object_rotation.to_quat()),
1625        RenderedObject,
1626    ));
1627
1628    println!("Scene setup complete");
1629}
1630
1631/// Check if assets are loaded
1632fn check_assets_loaded(
1633    mut state: ResMut<RenderState>,
1634    asset_server: Res<AssetServer>,
1635    scene: Option<Res<LoadedScene>>,
1636    texture: Option<Res<LoadedTexture>>,
1637) {
1638    let trace = render_trace_enabled();
1639    let was_scene_loaded = state.scene_loaded;
1640    let was_texture_loaded = state.texture_loaded;
1641
1642    state.frame_count += 1;
1643
1644    if state.scene_loaded && state.texture_loaded {
1645        return;
1646    }
1647
1648    if let Some(scene) = scene {
1649        match asset_server.get_load_state(&scene.0) {
1650            Some(LoadState::Loaded) => {
1651                state.scene_loaded = true;
1652            }
1653            Some(LoadState::Failed(_)) => {}
1654            _ => {}
1655        }
1656    }
1657
1658    if let Some(texture) = texture {
1659        match asset_server.get_load_state(&texture.0) {
1660            Some(LoadState::Loaded) => {
1661                state.texture_loaded = true;
1662            }
1663            Some(LoadState::Failed(_)) => {}
1664            _ => {}
1665        }
1666    }
1667
1668    if trace {
1669        if !was_scene_loaded && state.scene_loaded {
1670            eprintln!(
1671                "[render_trace][coldinit] scene_loaded frame_count={}",
1672                state.frame_count
1673            );
1674        }
1675        if !was_texture_loaded && state.texture_loaded {
1676            eprintln!(
1677                "[render_trace][coldinit] texture_loaded frame_count={}",
1678                state.frame_count
1679            );
1680        }
1681    }
1682}
1683
1684/// Apply materials to loaded meshes
1685fn apply_materials(
1686    mut state: ResMut<RenderState>,
1687    texture: Option<Res<LoadedTexture>>,
1688    mut materials: ResMut<Assets<StandardMaterial>>,
1689    // Bevy 0.15+: Use MeshMaterial3d instead of Handle<StandardMaterial>
1690    mut mesh_query: Query<&mut MeshMaterial3d<StandardMaterial>, With<Mesh3d>>,
1691) {
1692    if !state.scene_loaded || !state.texture_loaded || state.capture_ready {
1693        return;
1694    }
1695
1696    state.frame_count += 1;
1697
1698    let Some(tex) = texture else { return };
1699
1700    if !state.materials_applied {
1701        // The scene hierarchy is instantiated asynchronously after the asset
1702        // load event fires; wait until mesh entities exist before applying.
1703        if mesh_query.is_empty() {
1704            return;
1705        }
1706
1707        let textured_material = materials.add(StandardMaterial {
1708            base_color_texture: Some(tex.0.clone()),
1709            unlit: true,
1710            ..default()
1711        });
1712
1713        for mut mat in mesh_query.iter_mut() {
1714            mat.0 = textured_material.clone();
1715        }
1716
1717        state.materials_applied = true;
1718        state.materials_applied_frame = state.frame_count;
1719    }
1720
1721    // Two frames after material application is enough for the render graph
1722    // to pick up the new material on native GPU. The previous 60-frame gate
1723    // was a legacy llvmpipe software-rendering cushion.
1724    if state.frame_count >= state.materials_applied_frame + 2 {
1725        let was_ready = state.capture_ready;
1726        state.capture_ready = true;
1727        if render_trace_enabled() && !was_ready {
1728            eprintln!(
1729                "[render_trace][coldinit] capture_ready frame_count={}",
1730                state.frame_count
1731            );
1732        }
1733    }
1734}
1735
1736/// Request a screenshot capture (Bevy 0.15+ uses Screenshot entity + observer)
1737#[allow(dead_code)]
1738fn request_screenshot(
1739    mut commands: Commands,
1740    mut state: ResMut<RenderState>,
1741    shared_image: Res<SharedImageBuffer>,
1742    mut depth_request: ResMut<DepthCaptureRequest>,
1743) {
1744    if !state.capture_ready || state.screenshot_requested {
1745        return;
1746    }
1747
1748    // Clone the Arc for the observer closure
1749    let image_buffer = shared_image.0.clone();
1750
1751    // Also request depth capture
1752    depth_request.requested = true;
1753    println!("Depth capture requested");
1754
1755    // Spawn Screenshot entity with observer (Bevy 0.15+ API)
1756    println!("Requesting screenshot via Screenshot entity");
1757    commands.spawn(Screenshot::primary_window()).observe(
1758        move |trigger: Trigger<ScreenshotCaptured>| {
1759            // ScreenshotCaptured derefs to Image
1760            let image: &Image = trigger.event();
1761
1762            // Get dimensions
1763            let width = image.texture_descriptor.size.width;
1764            let height = image.texture_descriptor.size.height;
1765
1766            // Get raw image data - Bevy 0.15 Image.data is Vec<u8>
1767            let rgba_data = image.data.clone();
1768
1769            // Store in shared buffer
1770            if let Ok(mut guard) = image_buffer.lock() {
1771                *guard = Some((rgba_data, width, height));
1772            }
1773        },
1774    );
1775
1776    state.screenshot_requested = true;
1777    println!("Screenshot requested");
1778}
1779
1780/// Check if screenshot callback has completed
1781#[allow(dead_code)]
1782fn check_screenshot_ready(
1783    mut state: ResMut<RenderState>,
1784    shared_image: Res<SharedImageBuffer>,
1785    shared_depth: Res<SharedDepthBuffer>,
1786    request: Res<RenderRequest>,
1787) {
1788    if !state.screenshot_requested || state.captured {
1789        return;
1790    }
1791
1792    // Increment frame count while waiting for capture
1793    state.frame_count += 1;
1794
1795    // Check if RGBA callback has written data
1796    let rgba_ready = if let Ok(guard) = shared_image.0.lock() {
1797        if let Some((rgba_data, width, height)) = guard.as_ref() {
1798            if state.rgba_data.is_none() {
1799                state.rgba_data = Some(rgba_data.clone());
1800                state.image_width = *width;
1801                state.image_height = *height;
1802            }
1803            true
1804        } else {
1805            false
1806        }
1807    } else {
1808        false
1809    };
1810
1811    // Check if depth readback has completed
1812    let depth_ready = if let Ok(guard) = shared_depth.0.lock() {
1813        if let Some((depth_data, _width, _height)) = guard.as_ref() {
1814            if state.depth_data.is_none() {
1815                state.depth_data = Some(depth_data.clone());
1816            }
1817            true
1818        } else {
1819            false
1820        }
1821    } else {
1822        false
1823    };
1824
1825    // If depth readback failed or is taking too long, fall back to placeholder
1826    // (This allows graceful degradation on systems where depth readback fails)
1827    if rgba_ready && !depth_ready && state.frame_count > 60 {
1828        let camera_dist = request.camera_transform.translation.length() as f64;
1829        let pixel_count = (state.image_width * state.image_height) as usize;
1830        state.depth_data = Some(vec![camera_dist; pixel_count]);
1831    }
1832
1833    // Mark as captured when both RGBA and depth are ready
1834    if state.rgba_data.is_some() && state.depth_data.is_some() {
1835        state.captured = true;
1836    }
1837}
1838
1839/// Extract results and exit
1840#[allow(dead_code)]
1841fn extract_and_exit(
1842    mut state: ResMut<RenderState>,
1843    request: Res<RenderRequest>,
1844    shared_output: Res<SharedOutput>,
1845    mut commands: Commands,
1846    windows: Query<Entity, With<bevy::window::Window>>,
1847) {
1848    // Handle delayed exit after closing window
1849    if state.exit_requested {
1850        state.exit_frame_count += 1;
1851        // After a few frames with no window, Bevy should exit
1852        return;
1853    }
1854
1855    if !state.captured {
1856        return;
1857    }
1858
1859    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
1860        // Use actual captured dimensions (may differ from config if window was resized)
1861        let width = state.image_width;
1862        let height = state.image_height;
1863
1864        // Compute intrinsics based on actual dimensions (f64 for TBP precision)
1865        let config = &request.config;
1866        let intrinsics = crate::CameraIntrinsics {
1867            focal_length: [
1868                width as f64 * config.zoom as f64,
1869                height as f64 * config.zoom as f64,
1870            ],
1871            principal_point: [width as f64 / 2.0, height as f64 / 2.0],
1872            image_size: [width, height],
1873        };
1874
1875        let output = RenderOutput {
1876            rgba: rgba.clone(),
1877            depth: depth.clone(),
1878            width,
1879            height,
1880            intrinsics,
1881            camera_transform: request.camera_transform,
1882            object_rotation: request.object_rotation.clone(),
1883        };
1884
1885        if let Ok(mut guard) = shared_output.0.lock() {
1886            *guard = Some(output);
1887            drop(guard); // Release lock immediately
1888
1889            // Small delay to allow watchdog to detect output before window close
1890            std::thread::sleep(std::time::Duration::from_millis(200));
1891        }
1892
1893        // Close all windows to trigger app exit
1894        // eprintln!("Closing windows to trigger exit...");
1895        for window_entity in windows.iter() {
1896            commands.entity(window_entity).despawn();
1897        }
1898        state.exit_requested = true;
1899    }
1900}
1901
1902// ============================================================================
1903// Headless Rendering Systems (no window surfaces)
1904// ============================================================================
1905
1906/// Setup the scene for headless rendering with RenderTarget::Image
1907fn setup_headless_scene(
1908    mut commands: Commands,
1909    mut images: ResMut<Assets<Image>>,
1910    asset_server: Res<AssetServer>,
1911    request: Res<RenderRequest>,
1912    mut _materials: ResMut<Assets<StandardMaterial>>,
1913) {
1914    let trace = render_trace_enabled();
1915    let t0 = trace.then(std::time::Instant::now);
1916
1917    #[cfg(test)]
1918    HEADLESS_SCENE_SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
1919
1920    let width = request.config.width;
1921    let height = request.config.height;
1922
1923    // Create render target image with proper texture usages
1924    let size = Extent3d {
1925        width,
1926        height,
1927        depth_or_array_layers: 1,
1928    };
1929
1930    let mut render_target_image = Image::new_fill(
1931        size,
1932        TextureDimension::D2,
1933        &[0, 0, 0, 255], // Initialize with opaque black
1934        TextureFormat::Rgba8UnormSrgb,
1935        RenderAssetUsages::default(),
1936    );
1937
1938    // Add required texture usages for headless rendering
1939    render_target_image.texture_descriptor.usage =
1940        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
1941
1942    let render_target_handle = images.add(render_target_image);
1943
1944    // Store handle for later access
1945    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
1946
1947    // Camera rendering to the image texture (NO window!)
1948    let fov = request.config.fov_radians();
1949    commands.spawn((
1950        Camera3d::default(),
1951        Camera {
1952            hdr: true,
1953            target: RenderTarget::Image(render_target_handle.clone()),
1954            ..default()
1955        },
1956        Projection::Perspective(PerspectiveProjection {
1957            fov,
1958            near: request.config.near_plane,
1959            far: request.config.far_plane,
1960            ..default()
1961        }),
1962        Msaa::Off,
1963        request.camera_transform,
1964        Tonemapping::None,
1965        DepthPrepass,
1966        NormalPrepass,
1967        RenderCamera,
1968        // Add ImageCopier to trigger RGBA extraction
1969        ImageCopier {
1970            src_image: render_target_handle,
1971            enabled: false, // Will enable when ready to capture
1972        },
1973    ));
1974
1975    // Ambient light
1976    let lighting = &request.config.lighting;
1977    commands.insert_resource(AmbientLight {
1978        color: Color::WHITE,
1979        brightness: lighting.ambient_brightness,
1980    });
1981
1982    // Key light
1983    if lighting.key_light_intensity > 0.0 {
1984        commands.spawn((
1985            PointLight {
1986                intensity: lighting.key_light_intensity,
1987                shadows_enabled: lighting.shadows_enabled,
1988                ..default()
1989            },
1990            Transform::from_xyz(
1991                lighting.key_light_position[0],
1992                lighting.key_light_position[1],
1993                lighting.key_light_position[2],
1994            ),
1995        ));
1996    }
1997
1998    // Fill light
1999    if lighting.fill_light_intensity > 0.0 {
2000        commands.spawn((
2001            PointLight {
2002                intensity: lighting.fill_light_intensity,
2003                shadows_enabled: lighting.shadows_enabled,
2004                ..default()
2005            },
2006            Transform::from_xyz(
2007                lighting.fill_light_position[0],
2008                lighting.fill_light_position[1],
2009                lighting.fill_light_position[2],
2010            ),
2011        ));
2012    }
2013
2014    // Load the scene
2015    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
2016    commands.insert_resource(LoadedScene(scene_handle.clone()));
2017
2018    // Load the texture
2019    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
2020    commands.insert_resource(LoadedTexture(texture_handle.clone()));
2021
2022    // Create material with texture
2023    let _material = _materials.add(StandardMaterial {
2024        base_color_texture: Some(texture_handle),
2025        unlit: true,
2026        ..default()
2027    });
2028
2029    // Spawn the scene with rotation
2030    commands.spawn((
2031        SceneRoot(scene_handle),
2032        Transform::from_rotation(request.object_rotation.to_quat()),
2033        RenderedObject,
2034    ));
2035
2036    if let Some(t0) = t0 {
2037        eprintln!(
2038            "[render_trace][startup] setup_headless_scene ms={:.3}",
2039            t0.elapsed().as_secs_f64() * 1000.0
2040        );
2041    }
2042}
2043
2044/// Request capture for headless rendering (enable ImageCopier)
2045fn request_headless_capture(
2046    mut state: ResMut<RenderState>,
2047    mut depth_request: ResMut<DepthCaptureRequest>,
2048    mut query: Query<&mut ImageCopier>,
2049    batch: Option<Res<HeadlessBatchSequence>>,
2050) {
2051    let trace = render_trace_enabled();
2052    let t0 = trace.then(std::time::Instant::now);
2053
2054    if !state.capture_ready || state.screenshot_requested {
2055        if let Some(t0) = t0 {
2056            eprintln!(
2057                "[render_trace][sys] request_headless_capture skipped(gate) ms={:.3}",
2058                t0.elapsed().as_secs_f64() * 1000.0
2059            );
2060        }
2061        return;
2062    }
2063
2064    if batch
2065        .as_ref()
2066        .is_some_and(|batch| batch.warmup_frames_remaining > 0)
2067    {
2068        if let Some(t0) = t0 {
2069            eprintln!(
2070                "[render_trace][sys] request_headless_capture skipped(warmup) ms={:.3}",
2071                t0.elapsed().as_secs_f64() * 1000.0
2072            );
2073        }
2074        return;
2075    }
2076
2077    // Enable the ImageCopier to trigger RGBA extraction
2078    for mut copier in query.iter_mut() {
2079        copier.enabled = true;
2080    }
2081
2082    // Request depth capture
2083    depth_request.requested = true;
2084
2085    state.screenshot_requested = true;
2086
2087    if let Some(t0) = t0 {
2088        eprintln!(
2089            "[render_trace][sys] request_headless_capture requested ms={:.3}",
2090            t0.elapsed().as_secs_f64() * 1000.0
2091        );
2092    }
2093}
2094
2095/// Check if headless capture has completed
2096fn check_headless_capture_ready(
2097    mut state: ResMut<RenderState>,
2098    shared_rgba: Res<SharedRgbaBuffer>,
2099    shared_depth: Res<SharedDepthBuffer>,
2100    request: Res<RenderRequest>,
2101    mut query: Query<&mut ImageCopier>,
2102) {
2103    let trace = render_trace_enabled();
2104    let t0 = trace.then(std::time::Instant::now);
2105
2106    if !state.screenshot_requested || state.captured {
2107        if let Some(t0) = t0 {
2108            eprintln!(
2109                "[render_trace][sys] check_headless_capture_ready skipped(gate) ms={:.3}",
2110                t0.elapsed().as_secs_f64() * 1000.0
2111            );
2112        }
2113        return;
2114    }
2115
2116    state.frame_count += 1;
2117
2118    // Check if RGBA data is ready
2119    let rgba_ready = if let Ok(guard) = shared_rgba.0.lock() {
2120        if let Some((rgba_data, width, height)) = guard.as_ref() {
2121            if state.rgba_data.is_none() {
2122                state.rgba_data = Some(rgba_data.clone());
2123                state.image_width = *width;
2124                state.image_height = *height;
2125                // Disable further captures
2126                for mut copier in query.iter_mut() {
2127                    copier.enabled = false;
2128                }
2129            }
2130            true
2131        } else {
2132            false
2133        }
2134    } else {
2135        false
2136    };
2137
2138    // Check if depth data is ready
2139    let depth_ready = if let Ok(guard) = shared_depth.0.lock() {
2140        if let Some((depth_data, _width, _height)) = guard.as_ref() {
2141            if state.depth_data.is_none() {
2142                state.depth_data = Some(depth_data.clone());
2143            }
2144            true
2145        } else {
2146            false
2147        }
2148    } else {
2149        false
2150    };
2151
2152    // Fallback to placeholder depth after 10 extra frames if depth readback fails
2153    if rgba_ready && !depth_ready && state.frame_count > 70 {
2154        let camera_dist = request.camera_transform.translation.length() as f64;
2155        let pixel_count = (state.image_width * state.image_height) as usize;
2156        state.depth_data = Some(vec![camera_dist; pixel_count]);
2157    }
2158
2159    if state.rgba_data.is_some() && state.depth_data.is_some() {
2160        state.captured = true;
2161    }
2162
2163    if let Some(t0) = t0 {
2164        eprintln!(
2165            "[render_trace][sys] check_headless_capture_ready rgba_ready={} depth_ready={} captured={} frame_count={} ms={:.3}",
2166            rgba_ready,
2167            depth_ready,
2168            state.captured,
2169            state.frame_count,
2170            t0.elapsed().as_secs_f64() * 1000.0
2171        );
2172    }
2173}
2174
2175/// Extract results and exit for headless rendering
2176fn extract_and_exit_headless(
2177    mut state: ResMut<RenderState>,
2178    request: Res<RenderRequest>,
2179    shared_output: Res<SharedOutput>,
2180    mut app_exit: EventWriter<bevy::app::AppExit>,
2181    batch: Option<Res<HeadlessBatchSequence>>,
2182) {
2183    if batch.is_some() {
2184        return;
2185    }
2186
2187    if state.exit_requested {
2188        return;
2189    }
2190
2191    if !state.captured {
2192        return;
2193    }
2194
2195    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2196        let width = state.image_width;
2197        let height = state.image_height;
2198
2199        // Compute intrinsics (f64 for TBP precision)
2200        let config = &request.config;
2201        let intrinsics = crate::CameraIntrinsics {
2202            focal_length: [
2203                width as f64 * config.zoom as f64,
2204                height as f64 * config.zoom as f64,
2205            ],
2206            principal_point: [width as f64 / 2.0, height as f64 / 2.0],
2207            image_size: [width, height],
2208        };
2209
2210        let output = RenderOutput {
2211            rgba: rgba.clone(),
2212            depth: depth.clone(),
2213            width,
2214            height,
2215            intrinsics,
2216            camera_transform: request.camera_transform,
2217            object_rotation: request.object_rotation.clone(),
2218        };
2219
2220        if let Ok(mut guard) = shared_output.0.lock() {
2221            *guard = Some(output);
2222            drop(guard);
2223            std::thread::sleep(std::time::Duration::from_millis(200));
2224        }
2225
2226        // Send AppExit event (headless apps use this instead of closing windows)
2227        app_exit.send(bevy::app::AppExit::Success);
2228        state.exit_requested = true;
2229    }
2230}
2231
2232/// Advance the short post-camera-move warmup for homogeneous batch rendering.
2233fn tick_headless_batch_warmup(batch: Option<ResMut<HeadlessBatchSequence>>) {
2234    let Some(mut batch) = batch else {
2235        return;
2236    };
2237
2238    if batch.warmup_frames_remaining > 0 {
2239        batch.warmup_frames_remaining -= 1;
2240    }
2241}
2242
2243/// Extract one batch output and continue rendering the next viewpoint in the same app.
2244fn extract_and_continue_headless_batch(
2245    mut state: ResMut<RenderState>,
2246    request: Res<RenderRequest>,
2247    buffers: (Res<SharedRgbaBuffer>, Res<SharedDepthBuffer>),
2248    batch: Option<ResMut<HeadlessBatchSequence>>,
2249    mut camera_query: Query<&mut Transform, With<RenderCamera>>,
2250    mut depth_request: ResMut<DepthCaptureRequest>,
2251    mut image_copiers: Query<&mut ImageCopier>,
2252) {
2253    let trace = render_trace_enabled();
2254    let t0 = trace.then(std::time::Instant::now);
2255
2256    let (shared_rgba, shared_depth) = buffers;
2257    let Some(mut batch) = batch else {
2258        if let Some(t0) = t0 {
2259            eprintln!(
2260                "[render_trace][sys] extract_and_continue_headless_batch skipped(no_batch) ms={:.3}",
2261                t0.elapsed().as_secs_f64() * 1000.0
2262            );
2263        }
2264        return;
2265    };
2266
2267    if state.exit_requested || !state.captured || batch.done {
2268        if let Some(t0) = t0 {
2269            eprintln!(
2270                "[render_trace][sys] extract_and_continue_headless_batch skipped(gate) captured={} done={} ms={:.3}",
2271                state.captured,
2272                batch.done,
2273                t0.elapsed().as_secs_f64() * 1000.0
2274            );
2275        }
2276        return;
2277    }
2278
2279    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2280        let width = state.image_width;
2281        let height = state.image_height;
2282
2283        let config = &request.config;
2284        let intrinsics = crate::CameraIntrinsics {
2285            focal_length: [
2286                width as f64 * config.zoom as f64,
2287                height as f64 * config.zoom as f64,
2288            ],
2289            principal_point: [width as f64 / 2.0, height as f64 / 2.0],
2290            image_size: [width, height],
2291        };
2292
2293        let output = RenderOutput {
2294            rgba: rgba.clone(),
2295            depth: depth.clone(),
2296            width,
2297            height,
2298            intrinsics,
2299            camera_transform: batch
2300                .current_viewpoint()
2301                .unwrap_or(request.camera_transform),
2302            object_rotation: request.object_rotation.clone(),
2303        };
2304        batch.outputs.push(output);
2305
2306        let next_index = batch.current_index + 1;
2307        if next_index >= batch.viewpoints.len() {
2308            batch.done = true;
2309            state.exit_requested = true;
2310            return;
2311        }
2312
2313        batch.current_index = next_index;
2314        batch.warmup_frames_remaining = BATCH_WARMUP_FRAMES;
2315
2316        if let Some(next_viewpoint) = batch.current_viewpoint() {
2317            for mut camera_transform in camera_query.iter_mut() {
2318                *camera_transform = next_viewpoint;
2319            }
2320        }
2321
2322        if let Ok(mut guard) = shared_rgba.0.lock() {
2323            *guard = None;
2324        }
2325        if let Ok(mut guard) = shared_depth.0.lock() {
2326            *guard = None;
2327        }
2328
2329        for mut copier in image_copiers.iter_mut() {
2330            copier.enabled = false;
2331        }
2332
2333        depth_request.requested = false;
2334        state.frame_count = 0;
2335        state.capture_ready = true;
2336        state.screenshot_requested = false;
2337        state.captured = false;
2338        state.rgba_data = None;
2339        state.depth_data = None;
2340        state.image_width = 0;
2341        state.image_height = 0;
2342
2343        if let Some(t0) = t0 {
2344            eprintln!(
2345                "[render_trace][sys] extract_and_continue_headless_batch extracted vp={} next={} done={} ms={:.3}",
2346                batch.current_index.saturating_sub(1),
2347                batch.current_index,
2348                batch.done,
2349                t0.elapsed().as_secs_f64() * 1000.0
2350            );
2351        }
2352    } else if let Some(t0) = t0 {
2353        eprintln!(
2354            "[render_trace][sys] extract_and_continue_headless_batch no_data ms={:.3}",
2355            t0.elapsed().as_secs_f64() * 1000.0
2356        );
2357    }
2358}
2359
2360// ============================================================================
2361// Persistent batch session (RenderSession)
2362//
2363// Amortizes wgpu device creation, Bevy app setup, and first-draw pipeline state
2364// object (PSO) compilation across multiple `render()` calls. Profile data (see
2365// issues #54 and #55) showed that on a 60-episode parity-gate, ~2.3s per episode
2366// lives in first-draw DX12 PSO compilation, totalling ~131s of 151s wall-clock.
2367// Keeping the `App` (and thus the `RenderDevice` and its PSO cache) alive across
2368// episodes recovers the bulk of that cost.
2369// ============================================================================
2370
2371/// Marker for the per-group scene entity so we can despawn it cleanly when the
2372/// next `RenderSession::render()` call swaps in a different object or rotation.
2373#[derive(Component)]
2374struct SessionScene;
2375
2376/// Session-persistent setup: render target image, camera (with prepass +
2377/// `ImageCopier`), ambient light, key + fill lights. Everything here lives for
2378/// the full lifetime of the `RenderSession`; per-group work (mesh/texture load,
2379/// scene entity spawn) happens outside Startup in `RenderSession::render()`.
2380fn setup_session_persistent_scene(
2381    mut commands: Commands,
2382    mut images: ResMut<Assets<Image>>,
2383    config: Res<SessionRenderConfig>,
2384) {
2385    let width = config.0.width;
2386    let height = config.0.height;
2387
2388    let size = Extent3d {
2389        width,
2390        height,
2391        depth_or_array_layers: 1,
2392    };
2393
2394    let mut render_target_image = Image::new_fill(
2395        size,
2396        TextureDimension::D2,
2397        &[0, 0, 0, 255],
2398        TextureFormat::Rgba8UnormSrgb,
2399        RenderAssetUsages::default(),
2400    );
2401    render_target_image.texture_descriptor.usage =
2402        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
2403
2404    let render_target_handle = images.add(render_target_image);
2405    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
2406
2407    let fov = config.0.fov_radians();
2408    commands.spawn((
2409        Camera3d::default(),
2410        Camera {
2411            hdr: true,
2412            target: RenderTarget::Image(render_target_handle.clone()),
2413            ..default()
2414        },
2415        Projection::Perspective(PerspectiveProjection {
2416            fov,
2417            near: config.0.near_plane,
2418            far: config.0.far_plane,
2419            ..default()
2420        }),
2421        Msaa::Off,
2422        Transform::default(),
2423        Tonemapping::None,
2424        DepthPrepass,
2425        NormalPrepass,
2426        RenderCamera,
2427        ImageCopier {
2428            src_image: render_target_handle,
2429            enabled: false,
2430        },
2431    ));
2432
2433    let lighting = &config.0.lighting;
2434    commands.insert_resource(AmbientLight {
2435        color: Color::WHITE,
2436        brightness: lighting.ambient_brightness,
2437    });
2438
2439    if lighting.key_light_intensity > 0.0 {
2440        commands.spawn((
2441            PointLight {
2442                intensity: lighting.key_light_intensity,
2443                shadows_enabled: lighting.shadows_enabled,
2444                ..default()
2445            },
2446            Transform::from_xyz(
2447                lighting.key_light_position[0],
2448                lighting.key_light_position[1],
2449                lighting.key_light_position[2],
2450            ),
2451        ));
2452    }
2453
2454    if lighting.fill_light_intensity > 0.0 {
2455        commands.spawn((
2456            PointLight {
2457                intensity: lighting.fill_light_intensity,
2458                shadows_enabled: lighting.shadows_enabled,
2459                ..default()
2460            },
2461            Transform::from_xyz(
2462                lighting.fill_light_position[0],
2463                lighting.fill_light_position[1],
2464                lighting.fill_light_position[2],
2465            ),
2466        ));
2467    }
2468}
2469
2470/// Resource carrying the `RenderConfig` that was fixed at session construction.
2471/// Used by `setup_session_persistent_scene` to size the render target.
2472#[derive(Resource)]
2473struct SessionRenderConfig(RenderConfig);
2474
2475/// Persistent batch render session. Keeps a Bevy `App` (and its `RenderDevice`
2476/// plus PSO cache) alive across multiple `render()` calls, amortizing per-episode
2477/// cold-init cost.
2478///
2479/// # Thread affinity
2480///
2481/// `RenderSession` must be created, used, and dropped on the same thread. It
2482/// holds a `bevy::App` which owns GPU resources that are not safe to move
2483/// across threads. The `!Send + !Sync` marker is enforced via
2484/// `PhantomData<*const ()>`.
2485///
2486/// # Config invariant
2487///
2488/// The `RenderConfig` (resolution, lighting, near/far, fov) is fixed at
2489/// `new()`. All `render()` calls must use requests whose `render_config`
2490/// matches; heterogeneous configs are rejected.
2491///
2492/// # Phase 1 limitation
2493///
2494/// Each `render()` call must contain homogeneous requests (same `object_dir`
2495/// and `object_rotation`). Heterogeneous calls return
2496/// `BatchRenderError::InvalidConfig`. Hold a single `RenderSession` and call
2497/// `render()` once per episode to amortize setup across episodes.
2498pub struct RenderSession {
2499    app: App,
2500    render_config: RenderConfig,
2501    shared_rgba: SharedRgbaBuffer,
2502    shared_depth: SharedDepthBuffer,
2503    _not_send_sync: std::marker::PhantomData<*const ()>,
2504}
2505
2506impl RenderSession {
2507    /// Build the App, run plugin `finish()`/`cleanup()`, and perform one warmup
2508    /// `update()` so Startup systems run and the wgpu device + adapter are
2509    /// initialized. The first `render()` call still pays PSO compilation for
2510    /// the specific mesh/material combination; subsequent calls reuse the cache.
2511    pub fn new(render_config: &crate::RenderConfig) -> Result<Self, crate::RenderError> {
2512        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
2513        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
2514
2515        let mut app = App::new();
2516        app.add_plugins(
2517            DefaultPlugins
2518                .set(WindowPlugin {
2519                    primary_window: None,
2520                    exit_condition: ExitCondition::DontExit,
2521                    ..default()
2522                })
2523                .disable::<bevy::winit::WinitPlugin>()
2524                .disable::<LogPlugin>()
2525                .disable::<TerminalCtrlCHandlerPlugin>(),
2526        )
2527        .add_plugins(ObjPlugin)
2528        .add_plugins(ImageCopyPlugin {
2529            shared_rgba: shared_rgba.clone(),
2530        })
2531        .add_plugins(DepthReadbackPlugin {
2532            shared_depth: shared_depth.clone(),
2533            near: render_config.near_plane,
2534            far: render_config.far_plane,
2535        })
2536        .insert_resource(SessionRenderConfig(render_config.clone()))
2537        .insert_resource(shared_rgba.clone())
2538        .init_resource::<RenderState>()
2539        .add_systems(Startup, setup_session_persistent_scene)
2540        .add_systems(
2541            Update,
2542            (
2543                check_assets_loaded,
2544                apply_materials,
2545                tick_headless_batch_warmup,
2546                request_headless_capture,
2547                check_headless_capture_ready,
2548                extract_and_continue_headless_batch,
2549            )
2550                .chain()
2551                // Gate the capture chain on `RenderRequest` existing. `new()`
2552                // runs a warmup `app.update()` to execute Startup (which spawns
2553                // the camera/lights/render target) before the first `render()`
2554                // call, but does not yet insert `RenderRequest`. Several systems
2555                // in this chain take `Res<RenderRequest>` (not `Option`) and
2556                // would panic on SystemState init if the resource were absent.
2557                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
2558        );
2559
2560        app.finish();
2561        app.cleanup();
2562
2563        // One warmup update runs Startup systems (render target, camera, lights)
2564        // so they exist before the first `render()` call seeds the camera
2565        // transform. The Update chain is gated by `RenderRequest` existence and
2566        // is a no-op this tick. PSO compilation for specific mesh/material
2567        // combinations still happens lazily on the first real render.
2568        app.update();
2569
2570        Ok(Self {
2571            app,
2572            render_config: render_config.clone(),
2573            shared_rgba,
2574            shared_depth,
2575            _not_send_sync: std::marker::PhantomData,
2576        })
2577    }
2578
2579    /// Render a homogeneous batch of viewpoints (same object + rotation + config).
2580    /// Returns outputs in request order.
2581    ///
2582    /// On `BatchRenderError::DeviceLost`, the returned error signals that the
2583    /// wgpu device was lost mid-render. This call produced no output; any
2584    /// outputs from earlier `render()` calls on this session are still valid.
2585    /// Recovery: drop this `RenderSession` and construct a new one.
2586    pub fn render(
2587        &mut self,
2588        requests: &[crate::BatchRenderRequest],
2589    ) -> Result<Vec<crate::BatchRenderOutput>, crate::BatchRenderError> {
2590        use crate::{BatchRenderError, BatchRenderOutput};
2591
2592        if requests.is_empty() {
2593            return Ok(Vec::new());
2594        }
2595
2596        // Enforce homogeneity and config invariance.
2597        let first = &requests[0];
2598        if first.render_config != self.render_config {
2599            return Err(BatchRenderError::InvalidConfig(
2600                "RenderSession render_config mismatch: session was constructed with a different \
2601                 RenderConfig than the first request carries. Session config cannot change after \
2602                 `new()`; construct a new session if you need a different resolution/camera."
2603                    .to_string(),
2604            ));
2605        }
2606        for r in &requests[1..] {
2607            if r.object_dir != first.object_dir
2608                || r.object_rotation != first.object_rotation
2609                || r.render_config != first.render_config
2610            {
2611                return Err(BatchRenderError::InvalidConfig(
2612                    "Phase 1 RenderSession::render requires homogeneous requests \
2613                     (same object_dir, object_rotation, and render_config across the batch). \
2614                     Call render() once per group instead."
2615                        .to_string(),
2616                ));
2617            }
2618        }
2619
2620        // Canonicalize paths and validate mesh/texture presence. This matches
2621        // `render_headless_sequence`'s preconditions so the error surface stays
2622        // consistent.
2623        let object_dir = std::fs::canonicalize(&first.object_dir).map_err(|e| {
2624            BatchRenderError::InvalidConfig(format!(
2625                "Cannot canonicalize object directory {}: {}",
2626                first.object_dir.display(),
2627                e
2628            ))
2629        })?;
2630        let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
2631        let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
2632        if !mesh_path.exists() {
2633            return Err(BatchRenderError::InvalidConfig(format!(
2634                "Mesh not found: {}",
2635                mesh_path.display()
2636            )));
2637        }
2638        if !texture_path.exists() {
2639            return Err(BatchRenderError::InvalidConfig(format!(
2640                "Texture not found: {}",
2641                texture_path.display()
2642            )));
2643        }
2644
2645        let viewpoints: Vec<Transform> = requests.iter().map(|r| r.viewpoint).collect();
2646
2647        // --- per-group scene swap (direct world manipulation) ---
2648        {
2649            let world = self.app.world_mut();
2650
2651            // Despawn any SessionScene entity from the previous group.
2652            let stale: Vec<Entity> = world
2653                .query_filtered::<Entity, With<SessionScene>>()
2654                .iter(world)
2655                .collect();
2656            for entity in stale {
2657                world.entity_mut(entity).despawn_recursive();
2658            }
2659
2660            // Clear shared RGBA/depth buffers so a stale payload can't leak
2661            // into the first viewpoint of this call.
2662            if let Ok(mut guard) = self.shared_rgba.0.lock() {
2663                *guard = None;
2664            }
2665            if let Ok(mut guard) = self.shared_depth.0.lock() {
2666                *guard = None;
2667            }
2668
2669            // Reset RenderState (scene_loaded, texture_loaded, capture_ready,
2670            // frame_count, materials_applied, etc.). Default() gives all false/0.
2671            *world.resource_mut::<RenderState>() = RenderState::default();
2672
2673            // Update RenderRequest so the existing capture systems see the new
2674            // object paths, rotation, and camera transform (seeded from first vp).
2675            let new_request = RenderRequest {
2676                mesh_path: mesh_path.display().to_string(),
2677                texture_path: texture_path.display().to_string(),
2678                camera_transform: viewpoints[0],
2679                object_rotation: first.object_rotation.clone(),
2680                config: self.render_config.clone(),
2681            };
2682            world.insert_resource(new_request);
2683
2684            // Kick off asset loads and install the handles under the names the
2685            // existing `check_assets_loaded` system expects.
2686            let asset_server = world.resource::<AssetServer>().clone();
2687            let scene_handle: Handle<Scene> = asset_server.load(mesh_path.display().to_string());
2688            let texture_handle: Handle<Image> =
2689                asset_server.load(texture_path.display().to_string());
2690            world.insert_resource(LoadedScene(scene_handle.clone()));
2691            world.insert_resource(LoadedTexture(texture_handle));
2692
2693            // Spawn the new scene entity tagged so we can find + despawn it next
2694            // render() call.
2695            world.spawn((
2696                SceneRoot(scene_handle),
2697                Transform::from_rotation(first.object_rotation.to_quat()),
2698                RenderedObject,
2699                SessionScene,
2700            ));
2701
2702            // Seed the camera transform to the first viewpoint now so the first
2703            // capture lines up; subsequent viewpoints are advanced by
2704            // `extract_and_continue_headless_batch`.
2705            let camera_entity = world
2706                .query_filtered::<Entity, With<RenderCamera>>()
2707                .iter(world)
2708                .next();
2709            if let Some(cam) = camera_entity {
2710                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
2711                    *transform = viewpoints[0];
2712                }
2713            }
2714
2715            // Install the viewpoint sequence for this render() call.
2716            world.insert_resource(HeadlessBatchSequence::new(viewpoints.clone()));
2717        }
2718
2719        // --- drive the capture loop ---
2720        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
2721        let start = std::time::Instant::now();
2722        loop {
2723            if start.elapsed() > timeout {
2724                return Err(BatchRenderError::TotalFailure(format!(
2725                    "RenderSession::render timed out after {}s",
2726                    RENDER_TIMEOUT_SECS
2727                )));
2728            }
2729
2730            self.app.update();
2731
2732            if self.app.world().resource::<HeadlessBatchSequence>().done {
2733                break;
2734            }
2735        }
2736
2737        // Collect outputs and zip with requests to produce BatchRenderOutput in
2738        // request order.
2739        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
2740        if sequence.outputs.len() != requests.len() {
2741            return Err(BatchRenderError::TotalFailure(format!(
2742                "RenderSession produced {} outputs for {} requests",
2743                sequence.outputs.len(),
2744                requests.len()
2745            )));
2746        }
2747        let outputs = std::mem::take(&mut sequence.outputs);
2748
2749        Ok(requests
2750            .iter()
2751            .cloned()
2752            .zip(outputs)
2753            .map(|(req, out)| BatchRenderOutput::from_render_output(req, out))
2754            .collect())
2755    }
2756}
2757
2758/// Render directly to files (for subprocess mode).
2759///
2760/// This function saves RGBA and depth data directly to files before exiting.
2761/// Designed for subprocess rendering where the process will exit after rendering.
2762pub fn render_to_files(
2763    object_dir: &Path,
2764    camera_transform: &Transform,
2765    object_rotation: &ObjectRotation,
2766    config: &RenderConfig,
2767    rgba_path: &Path,
2768    depth_path: &Path,
2769) -> Result<(), RenderError> {
2770    let mesh_path = object_dir.join(GOOGLE_16K_MESH_RELATIVE);
2771    let texture_path = object_dir.join(GOOGLE_16K_TEXTURE_RELATIVE);
2772
2773    if !mesh_path.exists() {
2774        return Err(RenderError::MeshNotFound(mesh_path.display().to_string()));
2775    }
2776    if !texture_path.exists() {
2777        return Err(RenderError::TextureNotFound(
2778            texture_path.display().to_string(),
2779        ));
2780    }
2781
2782    let request = RenderRequest {
2783        mesh_path: mesh_path.display().to_string(),
2784        texture_path: texture_path.display().to_string(),
2785        camera_transform: *camera_transform,
2786        object_rotation: object_rotation.clone(),
2787        config: config.clone(),
2788    };
2789
2790    // Shared state for output
2791    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
2792    let output_poll = shared_output.clone();
2793
2794    // Clone paths for watchdog thread
2795    let rgba_path = rgba_path.to_path_buf();
2796    let depth_path = depth_path.to_path_buf();
2797
2798    // Shared buffer for RGBA data from headless render target
2799    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
2800
2801    // Shared buffer for depth readback
2802    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
2803
2804    // Spawn watchdog thread that saves files and exits
2805    std::thread::spawn(move || {
2806        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
2807        let start = std::time::Instant::now();
2808        let poll_interval = std::time::Duration::from_millis(100);
2809
2810        loop {
2811            if let Ok(guard) = output_poll.0.lock() {
2812                if let Some(output) = guard.as_ref() {
2813                    // Save RGBA as PNG
2814                    if let Err(e) =
2815                        save_rgba_to_png(&output.rgba, output.width, output.height, &rgba_path)
2816                    {
2817                        eprintln!("Failed to save RGBA: {:?}", e);
2818                        std::process::exit(1);
2819                    }
2820
2821                    // Save depth as binary f32
2822                    if let Err(e) = save_depth_to_binary(&output.depth, &depth_path) {
2823                        eprintln!("Failed to save depth: {:?}", e);
2824                        std::process::exit(1);
2825                    }
2826
2827                    std::process::exit(0);
2828                }
2829            }
2830
2831            if start.elapsed() > timeout {
2832                eprintln!(
2833                    "Error: Render timeout after {} seconds",
2834                    RENDER_TIMEOUT_SECS
2835                );
2836                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
2837                std::process::exit(1);
2838            }
2839
2840            std::thread::sleep(poll_interval);
2841        }
2842    });
2843
2844    // Configure rendering backend for this environment.
2845    // Use OnceLock so env vars are only set once per process — repeated calls
2846    // (e.g. sequential render_to_buffer calls in a parity loop) no longer trigger
2847    // redundant wgpu backend env writes. Full GPU adapter reuse across App instances
2848    // requires a persistent renderer (tracked in issue #14).
2849    static BACKEND_INIT: OnceLock<()> = OnceLock::new();
2850    BACKEND_INIT.get_or_init(|| {
2851        let backend_config = BackendConfig::headless();
2852        backend_config.apply_env();
2853    });
2854
2855    // Run Bevy app with HEADLESS configuration
2856    build_headless_app(request, shared_output, shared_rgba, shared_depth).run();
2857
2858    // Unreachable - watchdog thread exits the process
2859    Err(RenderError::RenderFailed(
2860        "Render did not complete".to_string(),
2861    ))
2862}
2863
2864/// Save RGBA data to PNG file
2865fn save_rgba_to_png(rgba: &[u8], width: u32, height: u32, path: &Path) -> Result<(), String> {
2866    use image::{ImageBuffer, Rgba};
2867
2868    // Create parent directories if needed
2869    if let Some(parent) = path.parent() {
2870        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
2871    }
2872
2873    let img: ImageBuffer<Rgba<u8>, Vec<u8>> =
2874        ImageBuffer::from_raw(width, height, rgba.to_vec())
2875            .ok_or_else(|| "Failed to create image buffer".to_string())?;
2876
2877    img.save(path).map_err(|e| e.to_string())
2878}
2879
2880/// Save depth data to binary file (f64 for TBP precision)
2881fn save_depth_to_binary(depth: &[f64], path: &Path) -> Result<(), String> {
2882    // Create parent directories if needed
2883    if let Some(parent) = path.parent() {
2884        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
2885    }
2886
2887    let bytes: Vec<u8> = depth.iter().flat_map(|f| f.to_le_bytes()).collect();
2888    std::fs::write(path, &bytes).map_err(|e| e.to_string())
2889}
2890
2891#[cfg(test)]
2892mod smoke_tests {
2893    use super::{headless_scene_setup_count, reset_headless_scene_setup_count};
2894    use crate::{
2895        BatchRenderConfig, BatchRenderRequest, ObjectRotation, RenderConfig, ViewpointConfig,
2896    };
2897    use image::{ImageBuffer, Rgba};
2898    use tempfile::TempDir;
2899
2900    fn write_synthetic_object() -> TempDir {
2901        let temp_dir = TempDir::new().expect("create temp dir for synthetic object");
2902        let object_dir = temp_dir.path().join("synthetic_cube").join("google_16k");
2903        std::fs::create_dir_all(&object_dir).expect("create synthetic google_16k dir");
2904
2905        // A small centered cube stays visible from all default TBP viewpoints and does not
2906        // need any YCB downloads.
2907        let obj = r#"o SyntheticCube
2908v -0.10 -0.10  0.10
2909v  0.10 -0.10  0.10
2910v  0.10  0.10  0.10
2911v -0.10  0.10  0.10
2912v -0.10 -0.10 -0.10
2913v  0.10 -0.10 -0.10
2914v  0.10  0.10 -0.10
2915v -0.10  0.10 -0.10
2916vt 0.0 0.0
2917vt 1.0 0.0
2918vt 1.0 1.0
2919vt 0.0 1.0
2920f 1/1 2/2 3/3
2921f 1/1 3/3 4/4
2922f 6/1 5/2 8/3
2923f 6/1 8/3 7/4
2924f 2/1 6/2 7/3
2925f 2/1 7/3 3/4
2926f 5/1 1/2 4/3
2927f 5/1 4/3 8/4
2928f 4/1 3/2 7/3
2929f 4/1 7/3 8/4
2930f 5/1 6/2 2/3
2931f 5/1 2/3 1/4
2932"#;
2933        std::fs::write(object_dir.join("textured.obj"), obj).expect("write synthetic obj");
2934
2935        let texture = ImageBuffer::from_fn(2, 2, |x, y| match (x, y) {
2936            (0, 0) => Rgba([255u8, 48, 48, 255]),
2937            (1, 0) => Rgba([48u8, 255, 48, 255]),
2938            (0, 1) => Rgba([48u8, 48, 255, 255]),
2939            _ => Rgba([255u8, 255, 64, 255]),
2940        });
2941        texture
2942            .save(object_dir.join("texture_map.png"))
2943            .expect("write synthetic texture");
2944
2945        temp_dir
2946    }
2947
2948    #[test]
2949    #[ignore = "headless throughput smoke check is opt-in because it needs a local render backend"]
2950    fn test_headless_batch_throughput_smoke() {
2951        crate::initialize();
2952        reset_headless_scene_setup_count();
2953
2954        let object_root = write_synthetic_object();
2955        let object_dir = object_root.path().join("synthetic_cube");
2956        let viewpoints = crate::generate_viewpoints(&ViewpointConfig::default());
2957        let request_count = 5usize;
2958        let config = RenderConfig::tbp_default();
2959
2960        let requests: Vec<_> = viewpoints
2961            .iter()
2962            .take(request_count)
2963            .copied()
2964            .map(|viewpoint| BatchRenderRequest {
2965                object_dir: object_dir.clone(),
2966                viewpoint,
2967                object_rotation: ObjectRotation::identity(),
2968                render_config: config.clone(),
2969            })
2970            .collect();
2971
2972        let start = std::time::Instant::now();
2973        let outputs = crate::render_batch(requests, &BatchRenderConfig::default())
2974            .expect("synthetic headless batch render should succeed");
2975        let elapsed = start.elapsed();
2976
2977        assert_eq!(outputs.len(), request_count);
2978        // This is the deterministic churn signal for the smoke check. Adapter log lines vary by
2979        // backend and logging config, but a homogeneous batch should still set up headless scene
2980        // state exactly once.
2981        assert_eq!(
2982            headless_scene_setup_count(),
2983            1,
2984            "homogeneous batch smoke check should reuse one headless app setup"
2985        );
2986
2987        for (idx, output) in outputs.iter().enumerate() {
2988            assert_eq!(output.width, config.width, "output {idx} width mismatch");
2989            assert_eq!(output.height, config.height, "output {idx} height mismatch");
2990            assert_eq!(
2991                output.rgba.len(),
2992                (config.width * config.height * 4) as usize,
2993                "output {idx} rgba size mismatch"
2994            );
2995            assert_eq!(
2996                output.depth.len(),
2997                (config.width * config.height) as usize,
2998                "output {idx} depth size mismatch"
2999            );
3000            assert!(
3001                output
3002                    .rgba
3003                    .chunks_exact(4)
3004                    .any(|px| px[0] != 0 || px[1] != 0 || px[2] != 0),
3005                "output {idx} should contain visible color"
3006            );
3007        }
3008
3009        // Acceptance target: under llvmpipe-class CPU rendering, five 64x64 captures should
3010        // finish in under 8s. Much slower runs usually mean we reintroduced per-capture app
3011        // churn or another headless startup regression.
3012        assert!(
3013            elapsed < std::time::Duration::from_secs(8),
3014            "5 synthetic headless captures took {:.2}s, expected < 8.0s",
3015            elapsed.as_secs_f64()
3016        );
3017    }
3018}