Skip to main content

bevy_sensor/
render.rs

1//! Headless rendering implementation using Bevy.
2//!
3//! This module provides two rendering modes:
4//!
5//! 1. **Headless mode** (default): Renders to an image texture without requiring
6//!    a window or display. Works on WSL2, CI servers, and any environment without
7//!    GPU windowing support.
8//!
9//! 2. **Windowed mode** (fallback): Uses a visible window for rendering when
10//!    headless mode fails. Requires a display (X11/Wayland).
11//!
12//! # Current Status
13//!
14//! - **RGBA**: Working via render-to-texture + GPU readback
15//! - **Depth**: Working via ViewDepthTexture + reverse-Z conversion
16//!
17//! # Headless Rendering Architecture
18//!
19//! The headless renderer:
20//! 1. Creates a Bevy app without window plugins (uses ScheduleRunnerPlugin)
21//! 2. Sets up a render-to-texture pipeline with RenderTarget::Image
22//! 3. Extracts RGBA data via ImageCopyDriver
23//! 4. Extracts depth via DepthReadbackNode
24//!
25//! # Running Requirements
26//!
27//! Headless mode should work without any display. For windowed fallback:
28//! ```bash
29//! DISPLAY=:0 cargo run --example test_render
30//! ```
31//!
32//! # Architecture Notes
33//!
34//! Bevy's `App::run()` does not return cleanly in all configurations. This
35//! implementation uses a watchdog thread that monitors for completion and
36//! calls `std::process::exit(0)` once the render output is serialized to
37//! a temp file. The main thread reads this file after the process would
38//! normally exit.
39
40use bevy::app::{ScheduleRunnerPlugin, TerminalCtrlCHandlerPlugin};
41use bevy::asset::LoadState;
42use bevy::core_pipeline::prepass::{DepthPrepass, NormalPrepass};
43use bevy::core_pipeline::tonemapping::Tonemapping;
44use bevy::ecs::query::QueryItem;
45use bevy::log::LogPlugin;
46use bevy::prelude::*;
47use bevy::render::camera::{ExtractedCamera, RenderTarget};
48use bevy::render::render_asset::{RenderAssetUsages, RenderAssets};
49use bevy::render::render_graph::{
50    Node, NodeRunError, RenderGraphApp, RenderGraphContext, RenderLabel, ViewNode, ViewNodeRunner,
51};
52use bevy::render::render_resource::{
53    Buffer, BufferDescriptor, BufferUsages, CommandEncoderDescriptor, Extent3d, ImageCopyBuffer,
54    ImageCopyTexture, ImageDataLayout, MapMode, Origin3d, TextureAspect, TextureDimension,
55    TextureFormat, TextureUsages,
56};
57use bevy::render::renderer::RenderQueue;
58use bevy::render::renderer::{RenderContext, RenderDevice};
59use bevy::render::texture::GpuImage;
60use bevy::render::view::screenshot::{Screenshot, ScreenshotCaptured};
61use bevy::render::view::ViewDepthTexture;
62use bevy::render::{Extract, Render, RenderApp, RenderSet};
63use bevy::window::{ExitCondition, WindowPlugin};
64use bevy_obj::ObjPlugin;
65use std::fs::File;
66use std::io::Read as IoRead;
67use std::path::Path;
68#[cfg(test)]
69use std::sync::atomic::{AtomicUsize, Ordering};
70use std::sync::{Arc, Mutex, OnceLock};
71use std::time::Duration;
72
73use crate::{backend::BackendConfig, ObjectRotation, RenderConfig, RenderError, RenderOutput};
74
75/// Watchdog timeout for a single render, in seconds.
76///
77/// Bounds how long any single render path waits before declaring failure.
78/// 180s accommodates first-run wgpu shader compilation on Windows, which
79/// can take well over 60s on a cold GPU cache (see commit 9cd1d11).
80const RENDER_TIMEOUT_SECS: u64 = 180;
81
82/// Warmup frames after each camera move in `render_headless_sequence`.
83///
84/// After writing a new camera `Transform`, Bevy needs at least one frame for
85/// transform propagation + render-world extract before the next capture is
86/// valid. Historically set to 3 as a conservative cushion; reducing directly
87/// shortens per-viewpoint wall-clock since `app.update()` in the batch path
88/// is not rate-limited. Validated against the pixel-exact hardware test
89/// `test_batch_render_matches_sequential_episode_outputs`.
90const BATCH_WARMUP_FRAMES: u32 = 1;
91
92/// Check the render-trace env var. Cheap enough (single HashMap lookup) to call
93/// from per-frame systems; gate all tracing output behind this.
94#[inline]
95fn render_trace_enabled() -> bool {
96    std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok()
97}
98
99/// Check if a display is available for windowed rendering.
100///
101/// Returns true if DISPLAY or WAYLAND_DISPLAY environment variable is set.
102#[allow(dead_code)]
103fn display_available() -> bool {
104    std::env::var("DISPLAY").is_ok() || std::env::var("WAYLAND_DISPLAY").is_ok()
105}
106
107/// Check if we're running on WSL2 (which doesn't support Vulkan window surfaces).
108#[allow(dead_code)]
109fn is_wsl2() -> bool {
110    if let Ok(version) = std::fs::read_to_string("/proc/version") {
111        return version.to_lowercase().contains("microsoft")
112            || version.to_lowercase().contains("wsl");
113    }
114    false
115}
116
117/// Internal state for tracking render progress
118#[derive(Resource, Default)]
119struct RenderState {
120    frame_count: u32,
121    scene_loaded: bool,
122    texture_loaded: bool,
123    materials_applied: bool,
124    /// `frame_count` at the moment materials were applied; used to gate
125    /// `capture_ready` on N frames of render-graph propagation rather than
126    /// a legacy llvmpipe-era 60-frame wait.
127    materials_applied_frame: u32,
128    capture_ready: bool,
129    screenshot_requested: bool,
130    captured: bool,
131    exit_requested: bool,
132    #[allow(dead_code)]
133    exit_frame_count: u32,
134    rgba_data: Option<Vec<u8>>,
135    depth_data: Option<Vec<f64>>,
136    image_width: u32,
137    image_height: u32,
138}
139
140#[cfg(test)]
141static HEADLESS_SCENE_SETUP_COUNT: AtomicUsize = AtomicUsize::new(0);
142
143#[cfg(test)]
144fn reset_headless_scene_setup_count() {
145    HEADLESS_SCENE_SETUP_COUNT.store(0, Ordering::SeqCst);
146}
147
148#[cfg(test)]
149fn headless_scene_setup_count() -> usize {
150    HEADLESS_SCENE_SETUP_COUNT.load(Ordering::SeqCst)
151}
152
153/// Shared buffer for screenshot callback to write into
154#[derive(Resource, Clone)]
155#[allow(clippy::type_complexity)]
156#[allow(dead_code)]
157struct SharedImageBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
158
159/// Shared buffer for depth data from GPU readback
160/// Contains: (linear_depth_values, width, height)
161/// Uses f64 for TBP numerical precision compatibility.
162#[derive(Resource, Clone, Default)]
163#[allow(clippy::type_complexity)]
164struct SharedDepthBuffer(Arc<Mutex<Option<(Vec<f64>, u32, u32)>>>);
165
166// ============================================================================
167// Depth Readback Infrastructure
168// ============================================================================
169
170/// Request to capture depth - extracted from main world to render world
171#[derive(Resource, Default, Clone)]
172struct DepthCaptureRequest {
173    requested: bool,
174    near: f32,
175    far: f32,
176}
177
178/// Pending depth capture info for async processing
179struct PendingDepthCapture {
180    buffer: Buffer,
181    width: u32,
182    height: u32,
183    near: f32,
184    far: f32,
185}
186
187/// Queue for pending depth captures (written by render node, read by cleanup system)
188#[derive(Resource, Default)]
189struct PendingDepthCaptureQueue(Arc<Mutex<Vec<PendingDepthCapture>>>);
190
191// ============================================================================
192// Depth Buffer Helpers
193// ============================================================================
194
195mod depth_helpers {
196    /// wgpu requires buffer row alignment of 256 bytes
197    pub const COPY_BYTES_PER_ROW_ALIGNMENT: u32 = 256;
198
199    /// Align byte size to wgpu's COPY_BYTES_PER_ROW_ALIGNMENT
200    pub fn align_byte_size(value: u32) -> u32 {
201        let remainder = value % COPY_BYTES_PER_ROW_ALIGNMENT;
202        if remainder == 0 {
203            value
204        } else {
205            value + (COPY_BYTES_PER_ROW_ALIGNMENT - remainder)
206        }
207    }
208
209    /// Calculate aligned buffer size for an image
210    #[allow(dead_code)]
211    pub fn get_aligned_size(width: u32, height: u32, pixel_size: u32) -> u32 {
212        height * align_byte_size(width * pixel_size)
213    }
214
215    /// Convert reverse-Z NDC depth to linear depth in meters.
216    ///
217    /// Bevy uses reverse-Z depth buffer: near plane maps to depth=1, far plane to depth=0.
218    /// This provides better precision for distant objects.
219    ///
220    /// Formula derivation:
221    /// - At near plane (z = near): ndc = 1
222    /// - At far plane (z = far): ndc = 0
223    /// - linear = far / (1 + ndc * (far/near - 1))
224    pub fn reverse_z_to_linear_depth(ndc_depth: f32, near: f32, far: f32) -> f32 {
225        // Handle edge cases
226        if ndc_depth <= 0.0 {
227            return far; // Background (infinite distance in reverse-Z)
228        }
229        if ndc_depth >= 1.0 {
230            return near; // At or beyond near plane
231        }
232        // Reverse-Z formula: linear = far / (1 + ndc * (far/near - 1))
233        far / (1.0 + ndc_depth * (far / near - 1.0))
234    }
235
236    /// Extract depth values from aligned buffer, handling row padding
237    pub fn extract_depth_with_alignment(data: &[u8], width: u32, height: u32) -> Vec<f32> {
238        let pixel_size = 4u32; // f32 = 4 bytes
239        let aligned_row_bytes = align_byte_size(width * pixel_size) as usize;
240        let actual_row_bytes = (width * pixel_size) as usize;
241
242        let mut depth_values = Vec::with_capacity((width * height) as usize);
243
244        for y in 0..height as usize {
245            let row_start = y * aligned_row_bytes;
246            let row_data = &data[row_start..row_start + actual_row_bytes];
247
248            for x in 0..width as usize {
249                let offset = x * 4;
250                let bytes: [u8; 4] = row_data[offset..offset + 4].try_into().unwrap();
251                let depth_value = f32::from_le_bytes(bytes);
252                depth_values.push(depth_value);
253            }
254        }
255
256        depth_values
257    }
258
259    /// Convert all NDC depth values to linear meters (as f64 for TBP precision)
260    pub fn convert_depth_to_linear(raw_depth: &[f32], near: f32, far: f32) -> Vec<f64> {
261        raw_depth
262            .iter()
263            .map(|&ndc| reverse_z_to_linear_depth(ndc, near, far) as f64)
264            .collect()
265    }
266
267    #[cfg(test)]
268    mod tests {
269        use super::*;
270
271        #[test]
272        fn test_align_byte_size() {
273            assert_eq!(align_byte_size(256), 256);
274            assert_eq!(align_byte_size(257), 512);
275            assert_eq!(align_byte_size(1), 256);
276            assert_eq!(align_byte_size(512), 512);
277            assert_eq!(align_byte_size(0), 0);
278        }
279
280        #[test]
281        fn test_reverse_z_to_linear_depth() {
282            let near = 0.01;
283            let far = 10.0;
284
285            // Near plane (ndc=1 in reverse-Z)
286            let linear_near = reverse_z_to_linear_depth(1.0, near, far);
287            assert!((linear_near - near).abs() < 0.001);
288
289            // Mid-range depth (ndc=0.5 should give geometric mean area)
290            let linear_mid = reverse_z_to_linear_depth(0.5, near, far);
291            // At ndc=0.5: linear = 10 / (1 + 0.5 * (1000-1)) = 10 / 500.5 ≈ 0.02
292            assert!(linear_mid > near && linear_mid < far);
293
294            // Very close to far plane (ndc very small)
295            let linear_almost_far = reverse_z_to_linear_depth(0.0001, near, far);
296            // At ndc=0.0001: linear = 10 / (1 + 0.0001 * 999) ≈ 10 / 1.0999 ≈ 9.09
297            assert!(linear_almost_far > 9.0);
298
299            // Background (ndc=0)
300            let background = reverse_z_to_linear_depth(0.0, near, far);
301            assert_eq!(background, far);
302        }
303
304        #[test]
305        fn test_extract_depth_with_alignment() {
306            // 2x2 image, 4 bytes per pixel
307            // Aligned row = 256 bytes, but actual = 8 bytes
308            let width = 2u32;
309            let height = 2u32;
310
311            let mut data = vec![0u8; 256 * 2]; // 2 aligned rows
312
313            // Write test depth values
314            // Row 0: [0.5, 0.6]
315            data[0..4].copy_from_slice(&0.5f32.to_le_bytes());
316            data[4..8].copy_from_slice(&0.6f32.to_le_bytes());
317            // Row 1: [0.7, 0.8]
318            data[256..260].copy_from_slice(&0.7f32.to_le_bytes());
319            data[260..264].copy_from_slice(&0.8f32.to_le_bytes());
320
321            let depth = extract_depth_with_alignment(&data, width, height);
322            assert_eq!(depth.len(), 4);
323            assert!((depth[0] - 0.5).abs() < 0.001);
324            assert!((depth[1] - 0.6).abs() < 0.001);
325            assert!((depth[2] - 0.7).abs() < 0.001);
326            assert!((depth[3] - 0.8).abs() < 0.001);
327        }
328
329        #[test]
330        fn test_reverse_z_depth_at_near_plane() {
331            // Near plane should give near value
332            let near = 0.01;
333            let far = 100.0;
334            let depth = reverse_z_to_linear_depth(1.0, near, far);
335            assert!((depth - near).abs() < 0.0001);
336        }
337
338        #[test]
339        fn test_reverse_z_depth_at_far_plane() {
340            // Far plane (ndc=0) should give far value
341            let near = 0.01;
342            let far = 100.0;
343            let depth = reverse_z_to_linear_depth(0.0, near, far);
344            assert!((depth - far).abs() < 0.0001);
345        }
346
347        #[test]
348        fn test_reverse_z_monotonic() {
349            // Depth should increase as NDC decreases (reverse-Z)
350            let near = 0.01;
351            let far = 10.0;
352
353            let mut prev_depth = 0.0;
354            for i in (0..=100).rev() {
355                let ndc = i as f32 / 100.0;
356                let depth = reverse_z_to_linear_depth(ndc, near, far);
357                assert!(
358                    depth >= prev_depth,
359                    "Depth should be monotonic: ndc={}, depth={}, prev={}",
360                    ndc,
361                    depth,
362                    prev_depth
363                );
364                prev_depth = depth;
365            }
366        }
367
368        #[test]
369        fn test_convert_depth_to_linear_batch() {
370            let near = 0.01f32;
371            let far = 10.0f32;
372            let ndc_depths = vec![1.0f32, 0.5, 0.1, 0.0];
373
374            let linear = convert_depth_to_linear(&ndc_depths, near, far);
375
376            assert_eq!(linear.len(), 4);
377            // Near plane
378            assert!((linear[0] - near as f64).abs() < 0.001);
379            // Far plane
380            assert!((linear[3] - far as f64).abs() < 0.001);
381            // All should be in range [near, far]
382            for d in &linear {
383                assert!(*d >= near as f64 && *d <= far as f64);
384            }
385        }
386
387        #[test]
388        fn test_align_byte_size_edge_cases() {
389            // Powers of two should stay the same if multiple of 256
390            assert_eq!(align_byte_size(256), 256);
391            assert_eq!(align_byte_size(512), 512);
392            assert_eq!(align_byte_size(1024), 1024);
393
394            // Just under 256 should round up to 256
395            assert_eq!(align_byte_size(255), 256);
396            assert_eq!(align_byte_size(128), 256);
397
398            // Just over 256 should round up to 512
399            assert_eq!(align_byte_size(300), 512);
400        }
401
402        #[test]
403        fn test_extract_depth_64x64() {
404            // Test with TBP default resolution
405            let width = 64u32;
406            let height = 64u32;
407            let bytes_per_pixel = 4u32;
408            let padded_row = align_byte_size(width * bytes_per_pixel);
409
410            // Create aligned buffer
411            let mut data = vec![0u8; (padded_row * height) as usize];
412
413            // Fill with incrementing values
414            for y in 0..height {
415                for x in 0..width {
416                    let value = (y * width + x) as f32 / (width * height) as f32;
417                    let offset = (y * padded_row + x * bytes_per_pixel) as usize;
418                    data[offset..offset + 4].copy_from_slice(&value.to_le_bytes());
419                }
420            }
421
422            let depth = extract_depth_with_alignment(&data, width, height);
423            assert_eq!(depth.len(), (width * height) as usize);
424
425            // Verify first and last values
426            assert!((depth[0] - 0.0).abs() < 0.001);
427            let expected_last = (width * height - 1) as f32 / (width * height) as f32;
428            assert!((depth[(width * height - 1) as usize] - expected_last).abs() < 0.001);
429        }
430    }
431}
432
433// ============================================================================
434// Depth Readback Render Node
435// ============================================================================
436
437/// Label for the depth readback render graph node.
438#[derive(Debug, Hash, PartialEq, Eq, Clone, bevy::render::render_graph::RenderLabel)]
439struct DepthReadbackLabel;
440
441/// Render node that copies the main camera's depth texture to a staging buffer.
442/// This runs after the main pass completes, using ViewDepthTexture.
443#[derive(Default)]
444struct DepthReadbackNode;
445
446impl ViewNode for DepthReadbackNode {
447    type ViewQuery = (&'static ViewDepthTexture, &'static ExtractedCamera);
448
449    fn run<'w>(
450        &self,
451        _graph: &mut RenderGraphContext,
452        render_context: &mut RenderContext<'w>,
453        (view_depth_texture, camera): QueryItem<'w, Self::ViewQuery>,
454        world: &'w World,
455    ) -> Result<(), NodeRunError> {
456        let trace = render_trace_enabled();
457        let t0 = trace.then(std::time::Instant::now);
458
459        // Check if depth capture is requested
460        let Some(request) = world.get_resource::<DepthCaptureRequest>() else {
461            return Ok(());
462        };
463        if !request.requested {
464            return Ok(());
465        }
466
467        // Get the pending queue
468        let Some(queue) = world.get_resource::<PendingDepthCaptureQueue>() else {
469            return Ok(());
470        };
471
472        // Get texture size from camera viewport or physical size
473        let Some(physical_size) = camera.physical_target_size else {
474            return Ok(());
475        };
476        let width = physical_size.x;
477        let height = physical_size.y;
478
479        let render_device = world.resource::<RenderDevice>();
480
481        // Calculate aligned buffer size (wgpu requires 256-byte row alignment)
482        let bytes_per_pixel = 4u32; // f32 = 4 bytes (Depth32Float)
483        let unpadded_bytes_per_row = width * bytes_per_pixel;
484        let padded_bytes_per_row = depth_helpers::align_byte_size(unpadded_bytes_per_row);
485        let buffer_size = (padded_bytes_per_row * height) as u64;
486
487        // Create staging buffer for CPU readback
488        let staging_buffer = render_device.create_buffer(&BufferDescriptor {
489            label: Some("depth_staging_buffer"),
490            size: buffer_size,
491            usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
492            mapped_at_creation: false,
493        });
494
495        // Copy depth texture to staging buffer
496        let encoder = render_context.command_encoder();
497        encoder.copy_texture_to_buffer(
498            ImageCopyTexture {
499                texture: &view_depth_texture.texture,
500                mip_level: 0,
501                origin: Origin3d::ZERO,
502                aspect: TextureAspect::DepthOnly,
503            },
504            ImageCopyBuffer {
505                buffer: &staging_buffer,
506                layout: ImageDataLayout {
507                    offset: 0,
508                    bytes_per_row: Some(padded_bytes_per_row),
509                    rows_per_image: Some(height),
510                },
511            },
512            Extent3d {
513                width,
514                height,
515                depth_or_array_layers: 1,
516            },
517        );
518
519        // Push to queue for async processing (queue is Arc<Mutex<Vec>>)
520        if let Ok(mut pending) = queue.0.lock() {
521            pending.push(PendingDepthCapture {
522                buffer: staging_buffer,
523                width,
524                height,
525                near: request.near,
526                far: request.far,
527            });
528        }
529
530        if let Some(t0) = t0 {
531            eprintln!(
532                "[render_trace][node] DepthReadbackNode ms={:.3}",
533                t0.elapsed().as_secs_f64() * 1000.0
534            );
535        }
536
537        Ok(())
538    }
539}
540
541// ============================================================================
542// Depth Readback Plugin
543// ============================================================================
544
545/// Plugin that sets up depth buffer readback from the GPU.
546struct DepthReadbackPlugin {
547    shared_depth: SharedDepthBuffer,
548    near: f32,
549    far: f32,
550}
551
552impl Plugin for DepthReadbackPlugin {
553    fn build(&self, app: &mut App) {
554        use bevy::core_pipeline::core_3d::graph::Core3d;
555        use bevy::core_pipeline::core_3d::graph::Node3d;
556
557        // Insert shared depth buffer in main app
558        app.insert_resource(self.shared_depth.clone());
559        app.insert_resource(DepthCaptureRequest {
560            requested: false,
561            near: self.near,
562            far: self.far,
563        });
564
565        // Get render app
566        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
567            eprintln!("Failed to get RenderApp for depth readback");
568            return;
569        };
570
571        // Insert resources in render world
572        render_app.insert_resource(self.shared_depth.clone());
573        render_app.init_resource::<PendingDepthCaptureQueue>();
574
575        // Add extraction system to copy request from main world
576        render_app.add_systems(ExtractSchedule, extract_depth_request);
577
578        // Add system to process completed depth captures
579        render_app.add_systems(Render, collect_depth_captures.in_set(RenderSet::Cleanup));
580
581        // Register the depth readback node in the render graph
582        // Run after main pass completes (depth buffer is ready) but before tonemapping
583        render_app
584            .add_render_graph_node::<ViewNodeRunner<DepthReadbackNode>>(Core3d, DepthReadbackLabel)
585            .add_render_graph_edges(
586                Core3d,
587                (Node3d::EndMainPass, DepthReadbackLabel, Node3d::Tonemapping),
588            );
589    }
590}
591
592/// Extract depth capture request from main world to render world
593fn extract_depth_request(mut commands: Commands, request: Extract<Res<DepthCaptureRequest>>) {
594    commands.insert_resource(DepthCaptureRequest {
595        requested: request.requested,
596        near: request.near,
597        far: request.far,
598    });
599}
600
601/// Process completed depth buffer captures (synchronous GPU-to-CPU readback with device polling)
602fn collect_depth_captures(
603    queue: Res<PendingDepthCaptureQueue>,
604    shared_depth: Res<SharedDepthBuffer>,
605    render_device: Res<RenderDevice>,
606) {
607    let trace = render_trace_enabled();
608    let t_sys = trace.then(std::time::Instant::now);
609
610    // Take all pending captures from the queue
611    let pending_captures = {
612        let Ok(mut pending) = queue.0.lock() else {
613            return;
614        };
615        std::mem::take(&mut *pending)
616    };
617
618    if pending_captures.is_empty() {
619        if let Some(t0) = t_sys {
620            eprintln!(
621                "[render_trace][sys] collect_depth_captures empty ms={:.3}",
622                t0.elapsed().as_secs_f64() * 1000.0
623            );
624        }
625        return;
626    }
627
628    let pending_count = pending_captures.len();
629
630    // Process each pending capture synchronously with device polling
631    for pending in pending_captures {
632        let width = pending.width;
633        let height = pending.height;
634        let near = pending.near;
635        let far = pending.far;
636        let buffer = pending.buffer;
637        let shared = shared_depth.0.clone();
638
639        // Use blocking sync approach with device polling (same as RGBA capture)
640        let buffer_slice = buffer.slice(..);
641
642        // Request mapping
643        let (tx, rx) = std::sync::mpsc::channel();
644        buffer_slice.map_async(MapMode::Read, move |result| {
645            let _ = tx.send(result);
646        });
647
648        let t_wait = trace.then(std::time::Instant::now);
649        let mut poll_iters: u32 = 0;
650
651        // Poll the device until mapping completes
652        loop {
653            render_device.poll(bevy::render::render_resource::Maintain::Poll);
654            poll_iters += 1;
655            match rx.try_recv() {
656                Ok(Ok(())) => {
657                    let data = buffer_slice.get_mapped_range();
658
659                    // Extract depth values with alignment handling
660                    let ndc_depth =
661                        depth_helpers::extract_depth_with_alignment(&data, width, height);
662
663                    drop(data);
664                    buffer.unmap();
665
666                    // Convert from reverse-Z NDC to linear depth in meters
667                    let linear_depth =
668                        depth_helpers::convert_depth_to_linear(&ndc_depth, near, far);
669
670                    // Store in shared buffer
671                    if let Ok(mut guard) = shared.lock() {
672                        *guard = Some((linear_depth, width, height));
673                    }
674                    break;
675                }
676                Ok(Err(e)) => {
677                    eprintln!("Failed to map depth buffer: {:?}", e);
678                    break;
679                }
680                Err(std::sync::mpsc::TryRecvError::Empty) => {
681                    // Keep polling
682                    std::thread::sleep(std::time::Duration::from_millis(1));
683                }
684                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
685                    eprintln!("Depth buffer mapping channel disconnected");
686                    break;
687                }
688            }
689        }
690
691        if let Some(t_wait) = t_wait {
692            eprintln!(
693                "[render_trace][sys] collect_depth_captures mapping_wait poll_iters={} ms={:.3}",
694                poll_iters,
695                t_wait.elapsed().as_secs_f64() * 1000.0
696            );
697        }
698    }
699
700    if let Some(t0) = t_sys {
701        eprintln!(
702            "[render_trace][sys] collect_depth_captures done pending={} ms={:.3}",
703            pending_count,
704            t0.elapsed().as_secs_f64() * 1000.0
705        );
706    }
707}
708
709// ============================================================================
710// Image Copy Infrastructure (for headless rendering)
711// ============================================================================
712
713/// Label for the image copy render graph node
714#[derive(Debug, Hash, PartialEq, Eq, Clone, RenderLabel)]
715struct ImageCopyLabel;
716
717/// Component that marks an image for GPU-to-CPU copying
718#[derive(Component, Clone)]
719struct ImageCopier {
720    /// Handle to the source image (render target)
721    src_image: Handle<Image>,
722    /// Whether to capture on this frame
723    enabled: bool,
724}
725
726/// Resource containing all ImageCopiers for the render world
727#[derive(Resource, Default)]
728struct ImageCopiers(Vec<ImageCopier>);
729
730/// Pending image capture for async processing
731struct PendingImageCapture {
732    buffer: Buffer,
733    width: u32,
734    height: u32,
735    padded_bytes_per_row: u32,
736}
737
738/// Queue for pending image captures
739#[derive(Resource, Default)]
740struct PendingImageCaptureQueue(Arc<Mutex<Vec<PendingImageCapture>>>);
741
742/// Shared buffer for captured RGBA data
743#[derive(Resource, Clone, Default)]
744#[allow(clippy::type_complexity)]
745struct SharedRgbaBuffer(Arc<Mutex<Option<(Vec<u8>, u32, u32)>>>);
746
747/// Render graph node that copies render target images to staging buffers
748struct ImageCopyDriver;
749
750impl Node for ImageCopyDriver {
751    fn run(
752        &self,
753        _graph: &mut RenderGraphContext,
754        _render_context: &mut RenderContext,
755        world: &World,
756    ) -> Result<(), NodeRunError> {
757        let trace = render_trace_enabled();
758        let t0 = trace.then(std::time::Instant::now);
759
760        let Some(image_copiers) = world.get_resource::<ImageCopiers>() else {
761            return Ok(());
762        };
763
764        let Some(gpu_images) = world.get_resource::<RenderAssets<GpuImage>>() else {
765            return Ok(());
766        };
767
768        let Some(queue) = world.get_resource::<PendingImageCaptureQueue>() else {
769            return Ok(());
770        };
771
772        let render_device = world.resource::<RenderDevice>();
773
774        let Some(render_queue) = world.get_resource::<RenderQueue>() else {
775            return Ok(());
776        };
777
778        for image_copier in image_copiers.0.iter() {
779            if !image_copier.enabled {
780                continue;
781            }
782
783            let Some(gpu_image) = gpu_images.get(&image_copier.src_image) else {
784                continue;
785            };
786
787            let width = gpu_image.size.x;
788            let height = gpu_image.size.y;
789
790            // Calculate padded bytes per row (wgpu requires 256-byte alignment)
791            let block_dimensions = gpu_image.texture_format.block_dimensions();
792            let block_size = gpu_image.texture_format.block_copy_size(None).unwrap_or(4); // Default to 4 bytes for RGBA8
793
794            let padded_bytes_per_row = RenderDevice::align_copy_bytes_per_row(
795                (width as usize / block_dimensions.0 as usize) * block_size as usize,
796            );
797
798            let buffer_size = (padded_bytes_per_row * height as usize) as u64;
799
800            // Create staging buffer for CPU readback
801            let staging_buffer = render_device.create_buffer(&BufferDescriptor {
802                label: Some("image_copy_staging_buffer"),
803                size: buffer_size,
804                usage: BufferUsages::COPY_DST | BufferUsages::MAP_READ,
805                mapped_at_creation: false,
806            });
807
808            // Create command encoder for the copy operation
809            let mut encoder =
810                render_device.create_command_encoder(&CommandEncoderDescriptor::default());
811
812            let texture_extent = Extent3d {
813                width,
814                height,
815                depth_or_array_layers: 1,
816            };
817
818            // Copy texture to buffer
819            encoder.copy_texture_to_buffer(
820                gpu_image.texture.as_image_copy(),
821                ImageCopyBuffer {
822                    buffer: &staging_buffer,
823                    layout: ImageDataLayout {
824                        offset: 0,
825                        bytes_per_row: Some(padded_bytes_per_row as u32),
826                        rows_per_image: None,
827                    },
828                },
829                texture_extent,
830            );
831
832            // Submit the copy command
833            render_queue.submit(std::iter::once(encoder.finish()));
834
835            // Queue for async processing
836            if let Ok(mut pending) = queue.0.lock() {
837                pending.push(PendingImageCapture {
838                    buffer: staging_buffer,
839                    width,
840                    height,
841                    padded_bytes_per_row: padded_bytes_per_row as u32,
842                });
843            }
844        }
845
846        if let Some(t0) = t0 {
847            eprintln!(
848                "[render_trace][node] ImageCopyDriver ms={:.3}",
849                t0.elapsed().as_secs_f64() * 1000.0
850            );
851        }
852
853        Ok(())
854    }
855}
856
857/// Extract ImageCopier components to render world
858fn extract_image_copiers(mut commands: Commands, query: Extract<Query<&ImageCopier>>) {
859    commands.insert_resource(ImageCopiers(query.iter().cloned().collect()));
860}
861
862/// Process completed image captures
863fn collect_image_captures(
864    queue: Res<PendingImageCaptureQueue>,
865    shared_rgba: Res<SharedRgbaBuffer>,
866    render_device: Res<RenderDevice>,
867) {
868    let trace = render_trace_enabled();
869    let t_sys = trace.then(std::time::Instant::now);
870
871    let pending_captures = {
872        let Ok(mut pending) = queue.0.lock() else {
873            return;
874        };
875        std::mem::take(&mut *pending)
876    };
877
878    if pending_captures.is_empty() {
879        if let Some(t0) = t_sys {
880            eprintln!(
881                "[render_trace][sys] collect_image_captures empty ms={:.3}",
882                t0.elapsed().as_secs_f64() * 1000.0
883            );
884        }
885        return;
886    }
887
888    let pending_count = pending_captures.len();
889
890    for pending in pending_captures {
891        let width = pending.width;
892        let height = pending.height;
893        let padded_bytes_per_row = pending.padded_bytes_per_row;
894        let buffer = pending.buffer;
895        let shared = shared_rgba.0.clone();
896
897        // Use blocking sync approach with device polling
898        let buffer_slice = buffer.slice(..);
899
900        // Request mapping
901        let (tx, rx) = std::sync::mpsc::channel();
902        buffer_slice.map_async(MapMode::Read, move |result| {
903            let _ = tx.send(result);
904        });
905
906        // Poll the device until mapping completes (with timeout)
907        let start = std::time::Instant::now();
908        let timeout = std::time::Duration::from_secs(10);
909        let mut poll_iters: u32 = 0;
910        loop {
911            render_device.poll(bevy::render::render_resource::Maintain::Poll);
912            poll_iters += 1;
913
914            if start.elapsed() > timeout {
915                eprintln!(
916                    "Warning: Buffer mapping timeout after {:?}",
917                    start.elapsed()
918                );
919                break;
920            }
921
922            match rx.try_recv() {
923                Ok(Ok(())) => {
924                    let data = buffer_slice.get_mapped_range();
925
926                    // Extract pixels with alignment handling
927                    let bytes_per_pixel = 4u32;
928                    let actual_row_bytes = (width * bytes_per_pixel) as usize;
929                    let padded_row_bytes = padded_bytes_per_row as usize;
930
931                    let mut rgba = Vec::with_capacity((width * height * 4) as usize);
932                    for y in 0..height as usize {
933                        let row_start = y * padded_row_bytes;
934                        rgba.extend_from_slice(&data[row_start..row_start + actual_row_bytes]);
935                    }
936
937                    drop(data);
938                    buffer.unmap();
939
940                    if let Ok(mut guard) = shared.lock() {
941                        *guard = Some((rgba, width, height));
942                    }
943                    break;
944                }
945                Ok(Err(e)) => {
946                    eprintln!("Failed to map image buffer: {:?}", e);
947                    break;
948                }
949                Err(std::sync::mpsc::TryRecvError::Empty) => {
950                    // Keep polling
951                    std::thread::sleep(std::time::Duration::from_millis(1));
952                }
953                Err(std::sync::mpsc::TryRecvError::Disconnected) => {
954                    eprintln!("Image buffer mapping channel disconnected");
955                    break;
956                }
957            }
958        }
959
960        if trace {
961            eprintln!(
962                "[render_trace][sys] collect_image_captures mapping_wait poll_iters={} ms={:.3}",
963                poll_iters,
964                start.elapsed().as_secs_f64() * 1000.0
965            );
966        }
967    }
968
969    if let Some(t0) = t_sys {
970        eprintln!(
971            "[render_trace][sys] collect_image_captures done pending={} ms={:.3}",
972            pending_count,
973            t0.elapsed().as_secs_f64() * 1000.0
974        );
975    }
976}
977
978/// Plugin for headless image copy
979struct ImageCopyPlugin {
980    shared_rgba: SharedRgbaBuffer,
981}
982
983impl Plugin for ImageCopyPlugin {
984    fn build(&self, app: &mut App) {
985        use bevy::render::render_graph::RenderGraph;
986
987        app.insert_resource(self.shared_rgba.clone());
988
989        let Some(render_app) = app.get_sub_app_mut(RenderApp) else {
990            return;
991        };
992
993        render_app.insert_resource(self.shared_rgba.clone());
994        render_app.init_resource::<ImageCopiers>();
995        render_app.init_resource::<PendingImageCaptureQueue>();
996
997        render_app.add_systems(ExtractSchedule, extract_image_copiers);
998        render_app.add_systems(Render, collect_image_captures.in_set(RenderSet::Cleanup));
999
1000        // Add image copy node to render graph (runs after camera driver)
1001        let mut graph = render_app.world_mut().resource_mut::<RenderGraph>();
1002        graph.add_node(ImageCopyLabel, ImageCopyDriver);
1003        graph.add_node_edge(bevy::render::graph::CameraDriverLabel, ImageCopyLabel);
1004    }
1005}
1006
1007// ============================================================================
1008// Render Request and Components
1009// ============================================================================
1010
1011/// Configuration passed to the Bevy app
1012#[derive(Resource, Clone)]
1013struct RenderRequest {
1014    mesh_path: String,
1015    texture_path: String,
1016    camera_transform: Transform,
1017    object_rotation: ObjectRotation,
1018    config: RenderConfig,
1019}
1020
1021/// Marker for the rendered object
1022#[derive(Component)]
1023struct RenderedObject;
1024
1025/// Marker for the render camera
1026#[derive(Component)]
1027struct RenderCamera;
1028
1029/// Handle for the loaded texture
1030#[derive(Resource)]
1031struct LoadedTexture(Handle<Image>);
1032
1033/// Handle for the loaded scene
1034#[derive(Resource)]
1035struct LoadedScene(Handle<Scene>);
1036
1037/// Shared output for extracting render results
1038#[derive(Resource, Clone)]
1039struct SharedOutput(Arc<Mutex<Option<RenderOutput>>>);
1040
1041/// Handle for the render target image
1042#[derive(Resource)]
1043#[allow(dead_code)]
1044struct RenderTargetImage(Handle<Image>);
1045
1046/// Tracks progress for a homogeneous batch of viewpoints rendered in one app.
1047#[derive(Resource)]
1048struct HeadlessBatchSequence {
1049    viewpoints: Vec<Transform>,
1050    current_index: usize,
1051    outputs: Vec<RenderOutput>,
1052    warmup_frames_remaining: u32,
1053    done: bool,
1054}
1055
1056impl HeadlessBatchSequence {
1057    fn new(viewpoints: Vec<Transform>) -> Self {
1058        let capacity = viewpoints.len();
1059        Self {
1060            viewpoints,
1061            current_index: 0,
1062            outputs: Vec::with_capacity(capacity),
1063            warmup_frames_remaining: 0,
1064            done: capacity == 0,
1065        }
1066    }
1067
1068    fn current_viewpoint(&self) -> Option<Transform> {
1069        self.viewpoints.get(self.current_index).cloned()
1070    }
1071}
1072
1073/// Perform headless rendering of a YCB object.
1074///
1075/// This uses true headless GPU rendering via `RenderTarget::Image`, which does NOT
1076/// require any window surfaces. This should work on WSL2 and other environments
1077/// without display servers.
1078///
1079/// Note: Bevy's App::run() does not return cleanly. A watchdog thread monitors
1080/// for results and terminates the process once the render is complete.
1081#[allow(dead_code)]
1082pub fn render_headless(
1083    object_dir: &Path,
1084    camera_transform: &Transform,
1085    object_rotation: &ObjectRotation,
1086    config: &RenderConfig,
1087) -> Result<RenderOutput, RenderError> {
1088    // Canonicalize paths so Bevy's asset server can find them regardless of
1089    // caller working directory. Relative paths like "../../ycb" pass the
1090    // exists() check but Bevy resolves assets against its own root.
1091    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1092        RenderError::RenderFailed(format!(
1093            "Cannot canonicalize object directory {}: {}",
1094            object_dir.display(),
1095            e
1096        ))
1097    })?;
1098    let mesh_path = object_dir.join("google_16k/textured.obj");
1099    let texture_path = object_dir.join("google_16k/texture_map.png");
1100
1101    if !mesh_path.exists() {
1102        return Err(RenderError::MeshNotFound(mesh_path.display().to_string()));
1103    }
1104    if !texture_path.exists() {
1105        return Err(RenderError::TextureNotFound(
1106            texture_path.display().to_string(),
1107        ));
1108    }
1109
1110    let request = RenderRequest {
1111        mesh_path: mesh_path.display().to_string(),
1112        texture_path: texture_path.display().to_string(),
1113        camera_transform: *camera_transform,
1114        object_rotation: object_rotation.clone(),
1115        config: config.clone(),
1116    };
1117
1118    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
1119    let output_clone = shared_output.clone();
1120
1121    // Shared buffer for RGBA data from headless render target
1122    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1123
1124    // Shared buffer for depth readback
1125    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1126
1127    // Create a temp file path for fallback output serialization
1128    let temp_path =
1129        std::env::temp_dir().join(format!("bevy_sensor_render_{}.bin", std::process::id()));
1130
1131    // Spawn watchdog thread that monitors for timeout (don't exit - let Bevy exit gracefully)
1132    let output_poll_for_timeout = shared_output.clone();
1133    std::thread::spawn(move || {
1134        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1135        let start = std::time::Instant::now();
1136        let poll_interval = std::time::Duration::from_millis(100);
1137
1138        loop {
1139            // Check if we have a result
1140            if let Ok(guard) = output_poll_for_timeout.0.lock() {
1141                if guard.is_some() {
1142                    // Output is ready, Bevy will exit via AppExit event
1143                    return; // Exit watchdog thread, Bevy will handle exit
1144                }
1145            }
1146
1147            if start.elapsed() > timeout {
1148                eprintln!(
1149                    "Error: Render timeout after {} seconds",
1150                    RENDER_TIMEOUT_SECS
1151                );
1152                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
1153                // Force exit on timeout (this is a failure case)
1154                std::process::exit(1);
1155            }
1156
1157            std::thread::sleep(poll_interval);
1158        }
1159    });
1160
1161    // Run Bevy app with HEADLESS configuration (no window surfaces!)
1162    // Uses ScheduleRunnerPlugin instead of WinitPlugin
1163    build_headless_app(request, output_clone, shared_rgba, shared_depth).run();
1164
1165    // App::run() returned - check shared_output for result
1166    if let Ok(guard) = shared_output.0.lock() {
1167        if let Some(output) = guard.as_ref() {
1168            return Ok(output.clone());
1169        }
1170    }
1171
1172    // Fallback: try to read from temp file (for legacy compatibility)
1173    if temp_path.exists() {
1174        if let Ok(output) = read_output_from_file(&temp_path) {
1175            let _ = std::fs::remove_file(&temp_path);
1176            return Ok(output);
1177        }
1178    }
1179
1180    Err(RenderError::RenderFailed(
1181        "Render did not complete".to_string(),
1182    ))
1183}
1184
1185/// Render a homogeneous sequence of viewpoints in a single headless Bevy app.
1186///
1187/// All captures share the same object, object rotation, and render configuration.
1188/// This is the fast path used by the batch API for episode-style workloads.
1189pub fn render_headless_sequence(
1190    object_dir: &Path,
1191    viewpoints: &[Transform],
1192    object_rotation: &ObjectRotation,
1193    config: &RenderConfig,
1194) -> Result<Vec<RenderOutput>, RenderError> {
1195    if viewpoints.is_empty() {
1196        return Ok(Vec::new());
1197    }
1198
1199    let object_dir = std::fs::canonicalize(object_dir).map_err(|e| {
1200        RenderError::RenderFailed(format!(
1201            "Cannot canonicalize object directory {}: {}",
1202            object_dir.display(),
1203            e
1204        ))
1205    })?;
1206    let mesh_path = object_dir.join("google_16k/textured.obj");
1207    let texture_path = object_dir.join("google_16k/texture_map.png");
1208
1209    if !mesh_path.exists() {
1210        return Err(RenderError::MeshNotFound(mesh_path.display().to_string()));
1211    }
1212    if !texture_path.exists() {
1213        return Err(RenderError::TextureNotFound(
1214            texture_path.display().to_string(),
1215        ));
1216    }
1217
1218    let request = RenderRequest {
1219        mesh_path: mesh_path.display().to_string(),
1220        texture_path: texture_path.display().to_string(),
1221        camera_transform: viewpoints[0],
1222        object_rotation: object_rotation.clone(),
1223        config: config.clone(),
1224    };
1225
1226    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
1227    let rgba_clone = shared_rgba.clone();
1228
1229    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
1230    let depth_clone = shared_depth.clone();
1231
1232    let mut app = App::new();
1233    app.add_plugins(
1234        DefaultPlugins
1235            .set(WindowPlugin {
1236                primary_window: None,
1237                exit_condition: ExitCondition::DontExit,
1238                ..default()
1239            })
1240            .disable::<bevy::winit::WinitPlugin>()
1241            .disable::<LogPlugin>()
1242            .disable::<TerminalCtrlCHandlerPlugin>(),
1243    )
1244    .add_plugins(ObjPlugin)
1245    .add_plugins(ImageCopyPlugin {
1246        shared_rgba: rgba_clone,
1247    })
1248    .add_plugins(DepthReadbackPlugin {
1249        shared_depth: depth_clone,
1250        near: config.near_plane,
1251        far: config.far_plane,
1252    })
1253    .insert_resource(request)
1254    .insert_resource(shared_rgba)
1255    .insert_resource(HeadlessBatchSequence::new(viewpoints.to_vec()))
1256    .init_resource::<RenderState>()
1257    .add_systems(Startup, setup_headless_scene)
1258    .add_systems(
1259        Update,
1260        (
1261            check_assets_loaded,
1262            apply_materials,
1263            tick_headless_batch_warmup,
1264            request_headless_capture,
1265            check_headless_capture_ready,
1266            extract_and_continue_headless_batch,
1267        )
1268            .chain(),
1269    );
1270
1271    // Manual app.update() loops do not run plugin finish/cleanup hooks automatically.
1272    // Bevy's screenshot plugin inserts CapturedScreenshots during finish(), so run the
1273    // normal startup phases before driving the headless batch loop ourselves.
1274    let trace_outer = render_trace_enabled();
1275    let t_finish = std::time::Instant::now();
1276    app.finish();
1277    let finish_ms = t_finish.elapsed().as_secs_f64() * 1000.0;
1278    let t_cleanup = std::time::Instant::now();
1279    app.cleanup();
1280    let cleanup_ms = t_cleanup.elapsed().as_secs_f64() * 1000.0;
1281    if trace_outer {
1282        eprintln!(
1283            "[render_trace][coldinit] app.finish ms={:.3} app.cleanup ms={:.3}",
1284            finish_ms, cleanup_ms
1285        );
1286    }
1287
1288    let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
1289    let start = std::time::Instant::now();
1290
1291    let trace = std::env::var("BEVY_SENSOR_RENDER_TRACE").is_ok();
1292    let mut update_idx: u32 = 0;
1293    let mut last_completed_outputs: usize = 0;
1294    let mut viewpoint_start = std::time::Instant::now();
1295
1296    loop {
1297        if start.elapsed() > timeout {
1298            return Err(RenderError::RenderTimeout {
1299                duration_secs: RENDER_TIMEOUT_SECS,
1300            });
1301        }
1302
1303        let update_start = std::time::Instant::now();
1304        app.update();
1305        let update_elapsed_ms = update_start.elapsed().as_secs_f64() * 1000.0;
1306
1307        if trace {
1308            let batch = app.world().resource::<HeadlessBatchSequence>();
1309            let warmup = batch.warmup_frames_remaining;
1310            let current = batch.current_index;
1311            let completed = batch.outputs.len();
1312            let vp_ms = viewpoint_start.elapsed().as_secs_f64() * 1000.0;
1313            eprintln!(
1314                "[render_trace] update={update_idx} vp={current} warmup={warmup} \
1315                 completed={completed} update_ms={update_elapsed_ms:.2} vp_ms={vp_ms:.2}"
1316            );
1317            if completed > last_completed_outputs {
1318                eprintln!(
1319                    "[render_trace] viewpoint {} finished in {:.2} ms",
1320                    completed - 1,
1321                    vp_ms
1322                );
1323                last_completed_outputs = completed;
1324                viewpoint_start = std::time::Instant::now();
1325            }
1326        }
1327
1328        update_idx += 1;
1329
1330        if app.world().resource::<HeadlessBatchSequence>().done {
1331            break;
1332        }
1333    }
1334
1335    if trace {
1336        eprintln!(
1337            "[render_trace] total_wall_ms={:.2} updates={update_idx} viewpoints={}",
1338            start.elapsed().as_secs_f64() * 1000.0,
1339            viewpoints.len()
1340        );
1341    }
1342
1343    let mut batch = app.world_mut().resource_mut::<HeadlessBatchSequence>();
1344    if batch.outputs.len() != viewpoints.len() {
1345        return Err(RenderError::RenderFailed(format!(
1346            "Batch render produced {} outputs for {} viewpoints",
1347            batch.outputs.len(),
1348            viewpoints.len()
1349        )));
1350    }
1351
1352    Ok(std::mem::take(&mut batch.outputs))
1353}
1354
1355/// Assemble the shared single-render headless Bevy app.
1356fn build_headless_app(
1357    request: RenderRequest,
1358    shared_output: SharedOutput,
1359    shared_rgba: SharedRgbaBuffer,
1360    shared_depth: SharedDepthBuffer,
1361) -> App {
1362    let near = request.config.near_plane;
1363    let far = request.config.far_plane;
1364
1365    let mut app = App::new();
1366    app.add_plugins(
1367        DefaultPlugins
1368            .set(WindowPlugin {
1369                primary_window: None,
1370                exit_condition: ExitCondition::DontExit,
1371                ..default()
1372            })
1373            .disable::<bevy::winit::WinitPlugin>()
1374            .disable::<LogPlugin>()
1375            .disable::<TerminalCtrlCHandlerPlugin>(),
1376    )
1377    .add_plugins(ScheduleRunnerPlugin::run_loop(Duration::from_secs_f64(
1378        1.0 / 60.0,
1379    )))
1380    .add_plugins(ObjPlugin)
1381    .add_plugins(ImageCopyPlugin {
1382        shared_rgba: shared_rgba.clone(),
1383    })
1384    .add_plugins(DepthReadbackPlugin {
1385        shared_depth,
1386        near,
1387        far,
1388    })
1389    .insert_resource(request)
1390    .insert_resource(shared_output)
1391    .insert_resource(shared_rgba)
1392    .init_resource::<RenderState>()
1393    .add_systems(Startup, setup_headless_scene)
1394    .add_systems(
1395        Update,
1396        (
1397            check_assets_loaded,
1398            apply_materials,
1399            request_headless_capture,
1400            check_headless_capture_ready,
1401            extract_and_exit_headless,
1402        )
1403            .chain(),
1404    );
1405    app
1406}
1407
1408/// Serialize RenderOutput to bytes for IPC (used by subprocess mode)
1409#[allow(dead_code)]
1410fn serialize_output(output: &RenderOutput) -> Vec<u8> {
1411    let mut data = Vec::new();
1412
1413    // Header: width, height, rgba_len, depth_len
1414    data.extend_from_slice(&output.width.to_le_bytes());
1415    data.extend_from_slice(&output.height.to_le_bytes());
1416    data.extend_from_slice(&(output.rgba.len() as u32).to_le_bytes());
1417    data.extend_from_slice(&(output.depth.len() as u32).to_le_bytes());
1418
1419    // RGBA data
1420    data.extend_from_slice(&output.rgba);
1421
1422    // Depth data (as f64 bytes for TBP precision)
1423    for d in &output.depth {
1424        data.extend_from_slice(&d.to_le_bytes());
1425    }
1426
1427    // Intrinsics (f64 for TBP precision)
1428    data.extend_from_slice(&output.intrinsics.focal_length[0].to_le_bytes());
1429    data.extend_from_slice(&output.intrinsics.focal_length[1].to_le_bytes());
1430    data.extend_from_slice(&output.intrinsics.principal_point[0].to_le_bytes());
1431    data.extend_from_slice(&output.intrinsics.principal_point[1].to_le_bytes());
1432    data.extend_from_slice(&output.intrinsics.image_size[0].to_le_bytes());
1433    data.extend_from_slice(&output.intrinsics.image_size[1].to_le_bytes());
1434
1435    // Camera transform (translation + rotation quaternion)
1436    let t = output.camera_transform.translation;
1437    let r = output.camera_transform.rotation;
1438    data.extend_from_slice(&t.x.to_le_bytes());
1439    data.extend_from_slice(&t.y.to_le_bytes());
1440    data.extend_from_slice(&t.z.to_le_bytes());
1441    data.extend_from_slice(&r.x.to_le_bytes());
1442    data.extend_from_slice(&r.y.to_le_bytes());
1443    data.extend_from_slice(&r.z.to_le_bytes());
1444    data.extend_from_slice(&r.w.to_le_bytes());
1445
1446    // Object rotation (f64)
1447    let or = &output.object_rotation;
1448    data.extend_from_slice(&or.pitch.to_le_bytes());
1449    data.extend_from_slice(&or.yaw.to_le_bytes());
1450    data.extend_from_slice(&or.roll.to_le_bytes());
1451
1452    data
1453}
1454
1455/// Read RenderOutput from serialized file
1456fn read_output_from_file(path: &std::path::Path) -> Result<RenderOutput, RenderError> {
1457    let mut file = File::open(path).map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1458    let mut data = Vec::new();
1459    file.read_to_end(&mut data)
1460        .map_err(|e| RenderError::RenderFailed(e.to_string()))?;
1461
1462    let mut cursor = 0;
1463
1464    let read_u32 = |data: &[u8], cursor: &mut usize| -> u32 {
1465        let val = u32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1466        *cursor += 4;
1467        val
1468    };
1469
1470    let read_f32 = |data: &[u8], cursor: &mut usize| -> f32 {
1471        let val = f32::from_le_bytes(data[*cursor..*cursor + 4].try_into().unwrap());
1472        *cursor += 4;
1473        val
1474    };
1475
1476    let read_f64 = |data: &[u8], cursor: &mut usize| -> f64 {
1477        let val = f64::from_le_bytes(data[*cursor..*cursor + 8].try_into().unwrap());
1478        *cursor += 8;
1479        val
1480    };
1481
1482    let width = read_u32(&data, &mut cursor);
1483    let height = read_u32(&data, &mut cursor);
1484    let rgba_len = read_u32(&data, &mut cursor) as usize;
1485    let depth_len = read_u32(&data, &mut cursor) as usize;
1486
1487    let rgba = data[cursor..cursor + rgba_len].to_vec();
1488    cursor += rgba_len;
1489
1490    // Depth data (f64 for TBP precision)
1491    let mut depth = Vec::with_capacity(depth_len);
1492    for _ in 0..depth_len {
1493        depth.push(read_f64(&data, &mut cursor));
1494    }
1495
1496    // Intrinsics (f64 for TBP precision)
1497    let focal_length = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1498    let principal_point = [read_f64(&data, &mut cursor), read_f64(&data, &mut cursor)];
1499    let image_size = [read_u32(&data, &mut cursor), read_u32(&data, &mut cursor)];
1500
1501    // Camera transform (f32 for Bevy compatibility)
1502    let tx = read_f32(&data, &mut cursor);
1503    let ty = read_f32(&data, &mut cursor);
1504    let tz = read_f32(&data, &mut cursor);
1505    let rx = read_f32(&data, &mut cursor);
1506    let ry = read_f32(&data, &mut cursor);
1507    let rz = read_f32(&data, &mut cursor);
1508    let rw = read_f32(&data, &mut cursor);
1509
1510    // Object rotation (f64)
1511    let pitch = read_f64(&data, &mut cursor);
1512    let yaw = read_f64(&data, &mut cursor);
1513    let roll = read_f64(&data, &mut cursor);
1514
1515    Ok(RenderOutput {
1516        rgba,
1517        depth,
1518        width,
1519        height,
1520        intrinsics: crate::CameraIntrinsics {
1521            focal_length,
1522            principal_point,
1523            image_size,
1524        },
1525        camera_transform: Transform {
1526            translation: Vec3::new(tx, ty, tz),
1527            rotation: Quat::from_xyzw(rx, ry, rz, rw),
1528            scale: Vec3::ONE,
1529        },
1530        object_rotation: ObjectRotation { pitch, yaw, roll },
1531    })
1532}
1533
1534/// Setup the scene with camera, lighting, and object
1535#[allow(dead_code)]
1536fn setup_scene(
1537    mut commands: Commands,
1538    asset_server: Res<AssetServer>,
1539    request: Res<RenderRequest>,
1540    mut _materials: ResMut<Assets<StandardMaterial>>,
1541) {
1542    // Camera with depth prepass (Bevy 0.15+ uses Camera3d component)
1543    // Disable MSAA for depth readback compatibility (can't copy from multisampled texture)
1544    // Apply FOV from RenderConfig so the projection matches TBP's camera intrinsics.
1545    let fov = request.config.fov_radians();
1546    commands.spawn((
1547        Camera3d::default(),
1548        Camera {
1549            hdr: true,
1550            ..default()
1551        },
1552        Projection::Perspective(PerspectiveProjection {
1553            fov,
1554            near: request.config.near_plane,
1555            far: request.config.far_plane,
1556            ..default()
1557        }),
1558        Msaa::Off,
1559        request.camera_transform,
1560        Tonemapping::None, // Accurate colors for software rendering
1561        DepthPrepass,
1562        NormalPrepass,
1563        RenderCamera,
1564    ));
1565
1566    // Ambient light (from config)
1567    let lighting = &request.config.lighting;
1568    commands.insert_resource(AmbientLight {
1569        color: Color::WHITE,
1570        brightness: lighting.ambient_brightness,
1571    });
1572
1573    // Key light (from config) - Bevy 0.15+ uses PointLight component directly
1574    if lighting.key_light_intensity > 0.0 {
1575        commands.spawn((
1576            PointLight {
1577                intensity: lighting.key_light_intensity,
1578                shadows_enabled: lighting.shadows_enabled,
1579                ..default()
1580            },
1581            Transform::from_xyz(
1582                lighting.key_light_position[0],
1583                lighting.key_light_position[1],
1584                lighting.key_light_position[2],
1585            ),
1586        ));
1587    }
1588
1589    // Fill light (from config)
1590    if lighting.fill_light_intensity > 0.0 {
1591        commands.spawn((
1592            PointLight {
1593                intensity: lighting.fill_light_intensity,
1594                shadows_enabled: lighting.shadows_enabled,
1595                ..default()
1596            },
1597            Transform::from_xyz(
1598                lighting.fill_light_position[0],
1599                lighting.fill_light_position[1],
1600                lighting.fill_light_position[2],
1601            ),
1602        ));
1603    }
1604
1605    // Load the scene
1606    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
1607    commands.insert_resource(LoadedScene(scene_handle.clone()));
1608
1609    // Load the texture
1610    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
1611    commands.insert_resource(LoadedTexture(texture_handle.clone()));
1612
1613    // Create material with texture (will be applied later)
1614    let _material = _materials.add(StandardMaterial {
1615        base_color_texture: Some(texture_handle),
1616        unlit: true,
1617        ..default()
1618    });
1619
1620    // Spawn the scene with rotation (Bevy 0.15+ uses SceneRoot)
1621    commands.spawn((
1622        SceneRoot(scene_handle),
1623        Transform::from_rotation(request.object_rotation.to_quat()),
1624        RenderedObject,
1625    ));
1626
1627    println!("Scene setup complete");
1628}
1629
1630/// Check if assets are loaded
1631fn check_assets_loaded(
1632    mut state: ResMut<RenderState>,
1633    asset_server: Res<AssetServer>,
1634    scene: Option<Res<LoadedScene>>,
1635    texture: Option<Res<LoadedTexture>>,
1636) {
1637    let trace = render_trace_enabled();
1638    let was_scene_loaded = state.scene_loaded;
1639    let was_texture_loaded = state.texture_loaded;
1640
1641    state.frame_count += 1;
1642
1643    if state.scene_loaded && state.texture_loaded {
1644        return;
1645    }
1646
1647    if let Some(scene) = scene {
1648        match asset_server.get_load_state(&scene.0) {
1649            Some(LoadState::Loaded) => {
1650                state.scene_loaded = true;
1651            }
1652            Some(LoadState::Failed(_)) => {}
1653            _ => {}
1654        }
1655    }
1656
1657    if let Some(texture) = texture {
1658        match asset_server.get_load_state(&texture.0) {
1659            Some(LoadState::Loaded) => {
1660                state.texture_loaded = true;
1661            }
1662            Some(LoadState::Failed(_)) => {}
1663            _ => {}
1664        }
1665    }
1666
1667    if trace {
1668        if !was_scene_loaded && state.scene_loaded {
1669            eprintln!(
1670                "[render_trace][coldinit] scene_loaded frame_count={}",
1671                state.frame_count
1672            );
1673        }
1674        if !was_texture_loaded && state.texture_loaded {
1675            eprintln!(
1676                "[render_trace][coldinit] texture_loaded frame_count={}",
1677                state.frame_count
1678            );
1679        }
1680    }
1681}
1682
1683/// Apply materials to loaded meshes
1684fn apply_materials(
1685    mut state: ResMut<RenderState>,
1686    texture: Option<Res<LoadedTexture>>,
1687    mut materials: ResMut<Assets<StandardMaterial>>,
1688    // Bevy 0.15+: Use MeshMaterial3d instead of Handle<StandardMaterial>
1689    mut mesh_query: Query<&mut MeshMaterial3d<StandardMaterial>, With<Mesh3d>>,
1690) {
1691    if !state.scene_loaded || !state.texture_loaded || state.capture_ready {
1692        return;
1693    }
1694
1695    state.frame_count += 1;
1696
1697    let Some(tex) = texture else { return };
1698
1699    if !state.materials_applied {
1700        // The scene hierarchy is instantiated asynchronously after the asset
1701        // load event fires; wait until mesh entities exist before applying.
1702        if mesh_query.is_empty() {
1703            return;
1704        }
1705
1706        let textured_material = materials.add(StandardMaterial {
1707            base_color_texture: Some(tex.0.clone()),
1708            unlit: true,
1709            ..default()
1710        });
1711
1712        for mut mat in mesh_query.iter_mut() {
1713            mat.0 = textured_material.clone();
1714        }
1715
1716        state.materials_applied = true;
1717        state.materials_applied_frame = state.frame_count;
1718    }
1719
1720    // Two frames after material application is enough for the render graph
1721    // to pick up the new material on native GPU. The previous 60-frame gate
1722    // was a legacy llvmpipe software-rendering cushion.
1723    if state.frame_count >= state.materials_applied_frame + 2 {
1724        let was_ready = state.capture_ready;
1725        state.capture_ready = true;
1726        if render_trace_enabled() && !was_ready {
1727            eprintln!(
1728                "[render_trace][coldinit] capture_ready frame_count={}",
1729                state.frame_count
1730            );
1731        }
1732    }
1733}
1734
1735/// Request a screenshot capture (Bevy 0.15+ uses Screenshot entity + observer)
1736#[allow(dead_code)]
1737fn request_screenshot(
1738    mut commands: Commands,
1739    mut state: ResMut<RenderState>,
1740    shared_image: Res<SharedImageBuffer>,
1741    mut depth_request: ResMut<DepthCaptureRequest>,
1742) {
1743    if !state.capture_ready || state.screenshot_requested {
1744        return;
1745    }
1746
1747    // Clone the Arc for the observer closure
1748    let image_buffer = shared_image.0.clone();
1749
1750    // Also request depth capture
1751    depth_request.requested = true;
1752    println!("Depth capture requested");
1753
1754    // Spawn Screenshot entity with observer (Bevy 0.15+ API)
1755    println!("Requesting screenshot via Screenshot entity");
1756    commands.spawn(Screenshot::primary_window()).observe(
1757        move |trigger: Trigger<ScreenshotCaptured>| {
1758            // ScreenshotCaptured derefs to Image
1759            let image: &Image = trigger.event();
1760
1761            // Get dimensions
1762            let width = image.texture_descriptor.size.width;
1763            let height = image.texture_descriptor.size.height;
1764
1765            // Get raw image data - Bevy 0.15 Image.data is Vec<u8>
1766            let rgba_data = image.data.clone();
1767
1768            // Store in shared buffer
1769            if let Ok(mut guard) = image_buffer.lock() {
1770                *guard = Some((rgba_data, width, height));
1771            }
1772        },
1773    );
1774
1775    state.screenshot_requested = true;
1776    println!("Screenshot requested");
1777}
1778
1779/// Check if screenshot callback has completed
1780#[allow(dead_code)]
1781fn check_screenshot_ready(
1782    mut state: ResMut<RenderState>,
1783    shared_image: Res<SharedImageBuffer>,
1784    shared_depth: Res<SharedDepthBuffer>,
1785    request: Res<RenderRequest>,
1786) {
1787    if !state.screenshot_requested || state.captured {
1788        return;
1789    }
1790
1791    // Increment frame count while waiting for capture
1792    state.frame_count += 1;
1793
1794    // Check if RGBA callback has written data
1795    let rgba_ready = if let Ok(guard) = shared_image.0.lock() {
1796        if let Some((rgba_data, width, height)) = guard.as_ref() {
1797            if state.rgba_data.is_none() {
1798                state.rgba_data = Some(rgba_data.clone());
1799                state.image_width = *width;
1800                state.image_height = *height;
1801            }
1802            true
1803        } else {
1804            false
1805        }
1806    } else {
1807        false
1808    };
1809
1810    // Check if depth readback has completed
1811    let depth_ready = if let Ok(guard) = shared_depth.0.lock() {
1812        if let Some((depth_data, _width, _height)) = guard.as_ref() {
1813            if state.depth_data.is_none() {
1814                state.depth_data = Some(depth_data.clone());
1815            }
1816            true
1817        } else {
1818            false
1819        }
1820    } else {
1821        false
1822    };
1823
1824    // If depth readback failed or is taking too long, fall back to placeholder
1825    // (This allows graceful degradation on systems where depth readback fails)
1826    if rgba_ready && !depth_ready && state.frame_count > 60 {
1827        let camera_dist = request.camera_transform.translation.length() as f64;
1828        let pixel_count = (state.image_width * state.image_height) as usize;
1829        state.depth_data = Some(vec![camera_dist; pixel_count]);
1830    }
1831
1832    // Mark as captured when both RGBA and depth are ready
1833    if state.rgba_data.is_some() && state.depth_data.is_some() {
1834        state.captured = true;
1835    }
1836}
1837
1838/// Extract results and exit
1839#[allow(dead_code)]
1840fn extract_and_exit(
1841    mut state: ResMut<RenderState>,
1842    request: Res<RenderRequest>,
1843    shared_output: Res<SharedOutput>,
1844    mut commands: Commands,
1845    windows: Query<Entity, With<bevy::window::Window>>,
1846) {
1847    // Handle delayed exit after closing window
1848    if state.exit_requested {
1849        state.exit_frame_count += 1;
1850        // After a few frames with no window, Bevy should exit
1851        return;
1852    }
1853
1854    if !state.captured {
1855        return;
1856    }
1857
1858    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
1859        // Use actual captured dimensions (may differ from config if window was resized)
1860        let width = state.image_width;
1861        let height = state.image_height;
1862
1863        // Compute intrinsics based on actual dimensions (f64 for TBP precision)
1864        let config = &request.config;
1865        let intrinsics = crate::CameraIntrinsics {
1866            focal_length: [
1867                width as f64 * config.zoom as f64,
1868                height as f64 * config.zoom as f64,
1869            ],
1870            principal_point: [width as f64 / 2.0, height as f64 / 2.0],
1871            image_size: [width, height],
1872        };
1873
1874        let output = RenderOutput {
1875            rgba: rgba.clone(),
1876            depth: depth.clone(),
1877            width,
1878            height,
1879            intrinsics,
1880            camera_transform: request.camera_transform,
1881            object_rotation: request.object_rotation.clone(),
1882        };
1883
1884        if let Ok(mut guard) = shared_output.0.lock() {
1885            *guard = Some(output);
1886            drop(guard); // Release lock immediately
1887
1888            // Small delay to allow watchdog to detect output before window close
1889            std::thread::sleep(std::time::Duration::from_millis(200));
1890        }
1891
1892        // Close all windows to trigger app exit
1893        // eprintln!("Closing windows to trigger exit...");
1894        for window_entity in windows.iter() {
1895            commands.entity(window_entity).despawn();
1896        }
1897        state.exit_requested = true;
1898    }
1899}
1900
1901// ============================================================================
1902// Headless Rendering Systems (no window surfaces)
1903// ============================================================================
1904
1905/// Setup the scene for headless rendering with RenderTarget::Image
1906fn setup_headless_scene(
1907    mut commands: Commands,
1908    mut images: ResMut<Assets<Image>>,
1909    asset_server: Res<AssetServer>,
1910    request: Res<RenderRequest>,
1911    mut _materials: ResMut<Assets<StandardMaterial>>,
1912) {
1913    let trace = render_trace_enabled();
1914    let t0 = trace.then(std::time::Instant::now);
1915
1916    #[cfg(test)]
1917    HEADLESS_SCENE_SETUP_COUNT.fetch_add(1, Ordering::SeqCst);
1918
1919    let width = request.config.width;
1920    let height = request.config.height;
1921
1922    // Create render target image with proper texture usages
1923    let size = Extent3d {
1924        width,
1925        height,
1926        depth_or_array_layers: 1,
1927    };
1928
1929    let mut render_target_image = Image::new_fill(
1930        size,
1931        TextureDimension::D2,
1932        &[0, 0, 0, 255], // Initialize with opaque black
1933        TextureFormat::Rgba8UnormSrgb,
1934        RenderAssetUsages::default(),
1935    );
1936
1937    // Add required texture usages for headless rendering
1938    render_target_image.texture_descriptor.usage =
1939        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
1940
1941    let render_target_handle = images.add(render_target_image);
1942
1943    // Store handle for later access
1944    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
1945
1946    // Camera rendering to the image texture (NO window!)
1947    let fov = request.config.fov_radians();
1948    commands.spawn((
1949        Camera3d::default(),
1950        Camera {
1951            hdr: true,
1952            target: RenderTarget::Image(render_target_handle.clone()),
1953            ..default()
1954        },
1955        Projection::Perspective(PerspectiveProjection {
1956            fov,
1957            near: request.config.near_plane,
1958            far: request.config.far_plane,
1959            ..default()
1960        }),
1961        Msaa::Off,
1962        request.camera_transform,
1963        Tonemapping::None,
1964        DepthPrepass,
1965        NormalPrepass,
1966        RenderCamera,
1967        // Add ImageCopier to trigger RGBA extraction
1968        ImageCopier {
1969            src_image: render_target_handle,
1970            enabled: false, // Will enable when ready to capture
1971        },
1972    ));
1973
1974    // Ambient light
1975    let lighting = &request.config.lighting;
1976    commands.insert_resource(AmbientLight {
1977        color: Color::WHITE,
1978        brightness: lighting.ambient_brightness,
1979    });
1980
1981    // Key light
1982    if lighting.key_light_intensity > 0.0 {
1983        commands.spawn((
1984            PointLight {
1985                intensity: lighting.key_light_intensity,
1986                shadows_enabled: lighting.shadows_enabled,
1987                ..default()
1988            },
1989            Transform::from_xyz(
1990                lighting.key_light_position[0],
1991                lighting.key_light_position[1],
1992                lighting.key_light_position[2],
1993            ),
1994        ));
1995    }
1996
1997    // Fill light
1998    if lighting.fill_light_intensity > 0.0 {
1999        commands.spawn((
2000            PointLight {
2001                intensity: lighting.fill_light_intensity,
2002                shadows_enabled: lighting.shadows_enabled,
2003                ..default()
2004            },
2005            Transform::from_xyz(
2006                lighting.fill_light_position[0],
2007                lighting.fill_light_position[1],
2008                lighting.fill_light_position[2],
2009            ),
2010        ));
2011    }
2012
2013    // Load the scene
2014    let scene_handle: Handle<Scene> = asset_server.load(&request.mesh_path);
2015    commands.insert_resource(LoadedScene(scene_handle.clone()));
2016
2017    // Load the texture
2018    let texture_handle: Handle<Image> = asset_server.load(&request.texture_path);
2019    commands.insert_resource(LoadedTexture(texture_handle.clone()));
2020
2021    // Create material with texture
2022    let _material = _materials.add(StandardMaterial {
2023        base_color_texture: Some(texture_handle),
2024        unlit: true,
2025        ..default()
2026    });
2027
2028    // Spawn the scene with rotation
2029    commands.spawn((
2030        SceneRoot(scene_handle),
2031        Transform::from_rotation(request.object_rotation.to_quat()),
2032        RenderedObject,
2033    ));
2034
2035    if let Some(t0) = t0 {
2036        eprintln!(
2037            "[render_trace][startup] setup_headless_scene ms={:.3}",
2038            t0.elapsed().as_secs_f64() * 1000.0
2039        );
2040    }
2041}
2042
2043/// Request capture for headless rendering (enable ImageCopier)
2044fn request_headless_capture(
2045    mut state: ResMut<RenderState>,
2046    mut depth_request: ResMut<DepthCaptureRequest>,
2047    mut query: Query<&mut ImageCopier>,
2048    batch: Option<Res<HeadlessBatchSequence>>,
2049) {
2050    let trace = render_trace_enabled();
2051    let t0 = trace.then(std::time::Instant::now);
2052
2053    if !state.capture_ready || state.screenshot_requested {
2054        if let Some(t0) = t0 {
2055            eprintln!(
2056                "[render_trace][sys] request_headless_capture skipped(gate) ms={:.3}",
2057                t0.elapsed().as_secs_f64() * 1000.0
2058            );
2059        }
2060        return;
2061    }
2062
2063    if batch
2064        .as_ref()
2065        .is_some_and(|batch| batch.warmup_frames_remaining > 0)
2066    {
2067        if let Some(t0) = t0 {
2068            eprintln!(
2069                "[render_trace][sys] request_headless_capture skipped(warmup) ms={:.3}",
2070                t0.elapsed().as_secs_f64() * 1000.0
2071            );
2072        }
2073        return;
2074    }
2075
2076    // Enable the ImageCopier to trigger RGBA extraction
2077    for mut copier in query.iter_mut() {
2078        copier.enabled = true;
2079    }
2080
2081    // Request depth capture
2082    depth_request.requested = true;
2083
2084    state.screenshot_requested = true;
2085
2086    if let Some(t0) = t0 {
2087        eprintln!(
2088            "[render_trace][sys] request_headless_capture requested ms={:.3}",
2089            t0.elapsed().as_secs_f64() * 1000.0
2090        );
2091    }
2092}
2093
2094/// Check if headless capture has completed
2095fn check_headless_capture_ready(
2096    mut state: ResMut<RenderState>,
2097    shared_rgba: Res<SharedRgbaBuffer>,
2098    shared_depth: Res<SharedDepthBuffer>,
2099    request: Res<RenderRequest>,
2100    mut query: Query<&mut ImageCopier>,
2101) {
2102    let trace = render_trace_enabled();
2103    let t0 = trace.then(std::time::Instant::now);
2104
2105    if !state.screenshot_requested || state.captured {
2106        if let Some(t0) = t0 {
2107            eprintln!(
2108                "[render_trace][sys] check_headless_capture_ready skipped(gate) ms={:.3}",
2109                t0.elapsed().as_secs_f64() * 1000.0
2110            );
2111        }
2112        return;
2113    }
2114
2115    state.frame_count += 1;
2116
2117    // Check if RGBA data is ready
2118    let rgba_ready = if let Ok(guard) = shared_rgba.0.lock() {
2119        if let Some((rgba_data, width, height)) = guard.as_ref() {
2120            if state.rgba_data.is_none() {
2121                state.rgba_data = Some(rgba_data.clone());
2122                state.image_width = *width;
2123                state.image_height = *height;
2124                // Disable further captures
2125                for mut copier in query.iter_mut() {
2126                    copier.enabled = false;
2127                }
2128            }
2129            true
2130        } else {
2131            false
2132        }
2133    } else {
2134        false
2135    };
2136
2137    // Check if depth data is ready
2138    let depth_ready = if let Ok(guard) = shared_depth.0.lock() {
2139        if let Some((depth_data, _width, _height)) = guard.as_ref() {
2140            if state.depth_data.is_none() {
2141                state.depth_data = Some(depth_data.clone());
2142            }
2143            true
2144        } else {
2145            false
2146        }
2147    } else {
2148        false
2149    };
2150
2151    // Fallback to placeholder depth after 10 extra frames if depth readback fails
2152    if rgba_ready && !depth_ready && state.frame_count > 70 {
2153        let camera_dist = request.camera_transform.translation.length() as f64;
2154        let pixel_count = (state.image_width * state.image_height) as usize;
2155        state.depth_data = Some(vec![camera_dist; pixel_count]);
2156    }
2157
2158    if state.rgba_data.is_some() && state.depth_data.is_some() {
2159        state.captured = true;
2160    }
2161
2162    if let Some(t0) = t0 {
2163        eprintln!(
2164            "[render_trace][sys] check_headless_capture_ready rgba_ready={} depth_ready={} captured={} frame_count={} ms={:.3}",
2165            rgba_ready,
2166            depth_ready,
2167            state.captured,
2168            state.frame_count,
2169            t0.elapsed().as_secs_f64() * 1000.0
2170        );
2171    }
2172}
2173
2174/// Extract results and exit for headless rendering
2175fn extract_and_exit_headless(
2176    mut state: ResMut<RenderState>,
2177    request: Res<RenderRequest>,
2178    shared_output: Res<SharedOutput>,
2179    mut app_exit: EventWriter<bevy::app::AppExit>,
2180    batch: Option<Res<HeadlessBatchSequence>>,
2181) {
2182    if batch.is_some() {
2183        return;
2184    }
2185
2186    if state.exit_requested {
2187        return;
2188    }
2189
2190    if !state.captured {
2191        return;
2192    }
2193
2194    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2195        let width = state.image_width;
2196        let height = state.image_height;
2197
2198        // Compute intrinsics (f64 for TBP precision)
2199        let config = &request.config;
2200        let intrinsics = crate::CameraIntrinsics {
2201            focal_length: [
2202                width as f64 * config.zoom as f64,
2203                height as f64 * config.zoom as f64,
2204            ],
2205            principal_point: [width as f64 / 2.0, height as f64 / 2.0],
2206            image_size: [width, height],
2207        };
2208
2209        let output = RenderOutput {
2210            rgba: rgba.clone(),
2211            depth: depth.clone(),
2212            width,
2213            height,
2214            intrinsics,
2215            camera_transform: request.camera_transform,
2216            object_rotation: request.object_rotation.clone(),
2217        };
2218
2219        if let Ok(mut guard) = shared_output.0.lock() {
2220            *guard = Some(output);
2221            drop(guard);
2222            std::thread::sleep(std::time::Duration::from_millis(200));
2223        }
2224
2225        // Send AppExit event (headless apps use this instead of closing windows)
2226        app_exit.send(bevy::app::AppExit::Success);
2227        state.exit_requested = true;
2228    }
2229}
2230
2231/// Advance the short post-camera-move warmup for homogeneous batch rendering.
2232fn tick_headless_batch_warmup(batch: Option<ResMut<HeadlessBatchSequence>>) {
2233    let Some(mut batch) = batch else {
2234        return;
2235    };
2236
2237    if batch.warmup_frames_remaining > 0 {
2238        batch.warmup_frames_remaining -= 1;
2239    }
2240}
2241
2242/// Extract one batch output and continue rendering the next viewpoint in the same app.
2243fn extract_and_continue_headless_batch(
2244    mut state: ResMut<RenderState>,
2245    request: Res<RenderRequest>,
2246    buffers: (Res<SharedRgbaBuffer>, Res<SharedDepthBuffer>),
2247    batch: Option<ResMut<HeadlessBatchSequence>>,
2248    mut camera_query: Query<&mut Transform, With<RenderCamera>>,
2249    mut depth_request: ResMut<DepthCaptureRequest>,
2250    mut image_copiers: Query<&mut ImageCopier>,
2251) {
2252    let trace = render_trace_enabled();
2253    let t0 = trace.then(std::time::Instant::now);
2254
2255    let (shared_rgba, shared_depth) = buffers;
2256    let Some(mut batch) = batch else {
2257        if let Some(t0) = t0 {
2258            eprintln!(
2259                "[render_trace][sys] extract_and_continue_headless_batch skipped(no_batch) ms={:.3}",
2260                t0.elapsed().as_secs_f64() * 1000.0
2261            );
2262        }
2263        return;
2264    };
2265
2266    if state.exit_requested || !state.captured || batch.done {
2267        if let Some(t0) = t0 {
2268            eprintln!(
2269                "[render_trace][sys] extract_and_continue_headless_batch skipped(gate) captured={} done={} ms={:.3}",
2270                state.captured,
2271                batch.done,
2272                t0.elapsed().as_secs_f64() * 1000.0
2273            );
2274        }
2275        return;
2276    }
2277
2278    if let (Some(rgba), Some(depth)) = (&state.rgba_data, &state.depth_data) {
2279        let width = state.image_width;
2280        let height = state.image_height;
2281
2282        let config = &request.config;
2283        let intrinsics = crate::CameraIntrinsics {
2284            focal_length: [
2285                width as f64 * config.zoom as f64,
2286                height as f64 * config.zoom as f64,
2287            ],
2288            principal_point: [width as f64 / 2.0, height as f64 / 2.0],
2289            image_size: [width, height],
2290        };
2291
2292        let output = RenderOutput {
2293            rgba: rgba.clone(),
2294            depth: depth.clone(),
2295            width,
2296            height,
2297            intrinsics,
2298            camera_transform: batch
2299                .current_viewpoint()
2300                .unwrap_or(request.camera_transform),
2301            object_rotation: request.object_rotation.clone(),
2302        };
2303        batch.outputs.push(output);
2304
2305        let next_index = batch.current_index + 1;
2306        if next_index >= batch.viewpoints.len() {
2307            batch.done = true;
2308            state.exit_requested = true;
2309            return;
2310        }
2311
2312        batch.current_index = next_index;
2313        batch.warmup_frames_remaining = BATCH_WARMUP_FRAMES;
2314
2315        if let Some(next_viewpoint) = batch.current_viewpoint() {
2316            for mut camera_transform in camera_query.iter_mut() {
2317                *camera_transform = next_viewpoint;
2318            }
2319        }
2320
2321        if let Ok(mut guard) = shared_rgba.0.lock() {
2322            *guard = None;
2323        }
2324        if let Ok(mut guard) = shared_depth.0.lock() {
2325            *guard = None;
2326        }
2327
2328        for mut copier in image_copiers.iter_mut() {
2329            copier.enabled = false;
2330        }
2331
2332        depth_request.requested = false;
2333        state.frame_count = 0;
2334        state.capture_ready = true;
2335        state.screenshot_requested = false;
2336        state.captured = false;
2337        state.rgba_data = None;
2338        state.depth_data = None;
2339        state.image_width = 0;
2340        state.image_height = 0;
2341
2342        if let Some(t0) = t0 {
2343            eprintln!(
2344                "[render_trace][sys] extract_and_continue_headless_batch extracted vp={} next={} done={} ms={:.3}",
2345                batch.current_index.saturating_sub(1),
2346                batch.current_index,
2347                batch.done,
2348                t0.elapsed().as_secs_f64() * 1000.0
2349            );
2350        }
2351    } else if let Some(t0) = t0 {
2352        eprintln!(
2353            "[render_trace][sys] extract_and_continue_headless_batch no_data ms={:.3}",
2354            t0.elapsed().as_secs_f64() * 1000.0
2355        );
2356    }
2357}
2358
2359// ============================================================================
2360// Persistent batch session (RenderSession)
2361//
2362// Amortizes wgpu device creation, Bevy app setup, and first-draw pipeline state
2363// object (PSO) compilation across multiple `render()` calls. Profile data (see
2364// issues #54 and #55) showed that on a 60-episode parity-gate, ~2.3s per episode
2365// lives in first-draw DX12 PSO compilation, totalling ~131s of 151s wall-clock.
2366// Keeping the `App` (and thus the `RenderDevice` and its PSO cache) alive across
2367// episodes recovers the bulk of that cost.
2368// ============================================================================
2369
2370/// Marker for the per-group scene entity so we can despawn it cleanly when the
2371/// next `RenderSession::render()` call swaps in a different object or rotation.
2372#[derive(Component)]
2373struct SessionScene;
2374
2375/// Session-persistent setup: render target image, camera (with prepass +
2376/// `ImageCopier`), ambient light, key + fill lights. Everything here lives for
2377/// the full lifetime of the `RenderSession`; per-group work (mesh/texture load,
2378/// scene entity spawn) happens outside Startup in `RenderSession::render()`.
2379fn setup_session_persistent_scene(
2380    mut commands: Commands,
2381    mut images: ResMut<Assets<Image>>,
2382    config: Res<SessionRenderConfig>,
2383) {
2384    let width = config.0.width;
2385    let height = config.0.height;
2386
2387    let size = Extent3d {
2388        width,
2389        height,
2390        depth_or_array_layers: 1,
2391    };
2392
2393    let mut render_target_image = Image::new_fill(
2394        size,
2395        TextureDimension::D2,
2396        &[0, 0, 0, 255],
2397        TextureFormat::Rgba8UnormSrgb,
2398        RenderAssetUsages::default(),
2399    );
2400    render_target_image.texture_descriptor.usage =
2401        TextureUsages::TEXTURE_BINDING | TextureUsages::COPY_SRC | TextureUsages::RENDER_ATTACHMENT;
2402
2403    let render_target_handle = images.add(render_target_image);
2404    commands.insert_resource(RenderTargetImage(render_target_handle.clone()));
2405
2406    let fov = config.0.fov_radians();
2407    commands.spawn((
2408        Camera3d::default(),
2409        Camera {
2410            hdr: true,
2411            target: RenderTarget::Image(render_target_handle.clone()),
2412            ..default()
2413        },
2414        Projection::Perspective(PerspectiveProjection {
2415            fov,
2416            near: config.0.near_plane,
2417            far: config.0.far_plane,
2418            ..default()
2419        }),
2420        Msaa::Off,
2421        Transform::default(),
2422        Tonemapping::None,
2423        DepthPrepass,
2424        NormalPrepass,
2425        RenderCamera,
2426        ImageCopier {
2427            src_image: render_target_handle,
2428            enabled: false,
2429        },
2430    ));
2431
2432    let lighting = &config.0.lighting;
2433    commands.insert_resource(AmbientLight {
2434        color: Color::WHITE,
2435        brightness: lighting.ambient_brightness,
2436    });
2437
2438    if lighting.key_light_intensity > 0.0 {
2439        commands.spawn((
2440            PointLight {
2441                intensity: lighting.key_light_intensity,
2442                shadows_enabled: lighting.shadows_enabled,
2443                ..default()
2444            },
2445            Transform::from_xyz(
2446                lighting.key_light_position[0],
2447                lighting.key_light_position[1],
2448                lighting.key_light_position[2],
2449            ),
2450        ));
2451    }
2452
2453    if lighting.fill_light_intensity > 0.0 {
2454        commands.spawn((
2455            PointLight {
2456                intensity: lighting.fill_light_intensity,
2457                shadows_enabled: lighting.shadows_enabled,
2458                ..default()
2459            },
2460            Transform::from_xyz(
2461                lighting.fill_light_position[0],
2462                lighting.fill_light_position[1],
2463                lighting.fill_light_position[2],
2464            ),
2465        ));
2466    }
2467}
2468
2469/// Resource carrying the `RenderConfig` that was fixed at session construction.
2470/// Used by `setup_session_persistent_scene` to size the render target.
2471#[derive(Resource)]
2472struct SessionRenderConfig(RenderConfig);
2473
2474/// Persistent batch render session. Keeps a Bevy `App` (and its `RenderDevice`
2475/// plus PSO cache) alive across multiple `render()` calls, amortizing per-episode
2476/// cold-init cost.
2477///
2478/// # Thread affinity
2479///
2480/// `RenderSession` must be created, used, and dropped on the same thread. It
2481/// holds a `bevy::App` which owns GPU resources that are not safe to move
2482/// across threads. The `!Send + !Sync` marker is enforced via
2483/// `PhantomData<*const ()>`.
2484///
2485/// # Config invariant
2486///
2487/// The `RenderConfig` (resolution, lighting, near/far, fov) is fixed at
2488/// `new()`. All `render()` calls must use requests whose `render_config`
2489/// matches; heterogeneous configs are rejected.
2490///
2491/// # Phase 1 limitation
2492///
2493/// Each `render()` call must contain homogeneous requests (same `object_dir`
2494/// and `object_rotation`). Heterogeneous calls return
2495/// `BatchRenderError::InvalidConfig`. Hold a single `RenderSession` and call
2496/// `render()` once per episode to amortize setup across episodes.
2497pub struct RenderSession {
2498    app: App,
2499    render_config: RenderConfig,
2500    shared_rgba: SharedRgbaBuffer,
2501    shared_depth: SharedDepthBuffer,
2502    _not_send_sync: std::marker::PhantomData<*const ()>,
2503}
2504
2505impl RenderSession {
2506    /// Build the App, run plugin `finish()`/`cleanup()`, and perform one warmup
2507    /// `update()` so Startup systems run and the wgpu device + adapter are
2508    /// initialized. The first `render()` call still pays PSO compilation for
2509    /// the specific mesh/material combination; subsequent calls reuse the cache.
2510    pub fn new(render_config: &crate::RenderConfig) -> Result<Self, crate::RenderError> {
2511        let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
2512        let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
2513
2514        let mut app = App::new();
2515        app.add_plugins(
2516            DefaultPlugins
2517                .set(WindowPlugin {
2518                    primary_window: None,
2519                    exit_condition: ExitCondition::DontExit,
2520                    ..default()
2521                })
2522                .disable::<bevy::winit::WinitPlugin>()
2523                .disable::<LogPlugin>()
2524                .disable::<TerminalCtrlCHandlerPlugin>(),
2525        )
2526        .add_plugins(ObjPlugin)
2527        .add_plugins(ImageCopyPlugin {
2528            shared_rgba: shared_rgba.clone(),
2529        })
2530        .add_plugins(DepthReadbackPlugin {
2531            shared_depth: shared_depth.clone(),
2532            near: render_config.near_plane,
2533            far: render_config.far_plane,
2534        })
2535        .insert_resource(SessionRenderConfig(render_config.clone()))
2536        .insert_resource(shared_rgba.clone())
2537        .init_resource::<RenderState>()
2538        .add_systems(Startup, setup_session_persistent_scene)
2539        .add_systems(
2540            Update,
2541            (
2542                check_assets_loaded,
2543                apply_materials,
2544                tick_headless_batch_warmup,
2545                request_headless_capture,
2546                check_headless_capture_ready,
2547                extract_and_continue_headless_batch,
2548            )
2549                .chain()
2550                // Gate the capture chain on `RenderRequest` existing. `new()`
2551                // runs a warmup `app.update()` to execute Startup (which spawns
2552                // the camera/lights/render target) before the first `render()`
2553                // call, but does not yet insert `RenderRequest`. Several systems
2554                // in this chain take `Res<RenderRequest>` (not `Option`) and
2555                // would panic on SystemState init if the resource were absent.
2556                .run_if(bevy::ecs::schedule::common_conditions::resource_exists::<RenderRequest>),
2557        );
2558
2559        app.finish();
2560        app.cleanup();
2561
2562        // One warmup update runs Startup systems (render target, camera, lights)
2563        // so they exist before the first `render()` call seeds the camera
2564        // transform. The Update chain is gated by `RenderRequest` existence and
2565        // is a no-op this tick. PSO compilation for specific mesh/material
2566        // combinations still happens lazily on the first real render.
2567        app.update();
2568
2569        Ok(Self {
2570            app,
2571            render_config: render_config.clone(),
2572            shared_rgba,
2573            shared_depth,
2574            _not_send_sync: std::marker::PhantomData,
2575        })
2576    }
2577
2578    /// Render a homogeneous batch of viewpoints (same object + rotation + config).
2579    /// Returns outputs in request order.
2580    ///
2581    /// On `BatchRenderError::DeviceLost`, the returned error signals that the
2582    /// wgpu device was lost mid-render. This call produced no output; any
2583    /// outputs from earlier `render()` calls on this session are still valid.
2584    /// Recovery: drop this `RenderSession` and construct a new one.
2585    pub fn render(
2586        &mut self,
2587        requests: &[crate::BatchRenderRequest],
2588    ) -> Result<Vec<crate::BatchRenderOutput>, crate::BatchRenderError> {
2589        use crate::{BatchRenderError, BatchRenderOutput};
2590
2591        if requests.is_empty() {
2592            return Ok(Vec::new());
2593        }
2594
2595        // Enforce homogeneity and config invariance.
2596        let first = &requests[0];
2597        if first.render_config != self.render_config {
2598            return Err(BatchRenderError::InvalidConfig(
2599                "RenderSession render_config mismatch: session was constructed with a different \
2600                 RenderConfig than the first request carries. Session config cannot change after \
2601                 `new()`; construct a new session if you need a different resolution/camera."
2602                    .to_string(),
2603            ));
2604        }
2605        for r in &requests[1..] {
2606            if r.object_dir != first.object_dir
2607                || r.object_rotation != first.object_rotation
2608                || r.render_config != first.render_config
2609            {
2610                return Err(BatchRenderError::InvalidConfig(
2611                    "Phase 1 RenderSession::render requires homogeneous requests \
2612                     (same object_dir, object_rotation, and render_config across the batch). \
2613                     Call render() once per group instead."
2614                        .to_string(),
2615                ));
2616            }
2617        }
2618
2619        // Canonicalize paths and validate mesh/texture presence. This matches
2620        // `render_headless_sequence`'s preconditions so the error surface stays
2621        // consistent.
2622        let object_dir = std::fs::canonicalize(&first.object_dir).map_err(|e| {
2623            BatchRenderError::InvalidConfig(format!(
2624                "Cannot canonicalize object directory {}: {}",
2625                first.object_dir.display(),
2626                e
2627            ))
2628        })?;
2629        let mesh_path = object_dir.join("google_16k/textured.obj");
2630        let texture_path = object_dir.join("google_16k/texture_map.png");
2631        if !mesh_path.exists() {
2632            return Err(BatchRenderError::InvalidConfig(format!(
2633                "Mesh not found: {}",
2634                mesh_path.display()
2635            )));
2636        }
2637        if !texture_path.exists() {
2638            return Err(BatchRenderError::InvalidConfig(format!(
2639                "Texture not found: {}",
2640                texture_path.display()
2641            )));
2642        }
2643
2644        let viewpoints: Vec<Transform> = requests.iter().map(|r| r.viewpoint).collect();
2645
2646        // --- per-group scene swap (direct world manipulation) ---
2647        {
2648            let world = self.app.world_mut();
2649
2650            // Despawn any SessionScene entity from the previous group.
2651            let stale: Vec<Entity> = world
2652                .query_filtered::<Entity, With<SessionScene>>()
2653                .iter(world)
2654                .collect();
2655            for entity in stale {
2656                world.entity_mut(entity).despawn_recursive();
2657            }
2658
2659            // Clear shared RGBA/depth buffers so a stale payload can't leak
2660            // into the first viewpoint of this call.
2661            if let Ok(mut guard) = self.shared_rgba.0.lock() {
2662                *guard = None;
2663            }
2664            if let Ok(mut guard) = self.shared_depth.0.lock() {
2665                *guard = None;
2666            }
2667
2668            // Reset RenderState (scene_loaded, texture_loaded, capture_ready,
2669            // frame_count, materials_applied, etc.). Default() gives all false/0.
2670            *world.resource_mut::<RenderState>() = RenderState::default();
2671
2672            // Update RenderRequest so the existing capture systems see the new
2673            // object paths, rotation, and camera transform (seeded from first vp).
2674            let new_request = RenderRequest {
2675                mesh_path: mesh_path.display().to_string(),
2676                texture_path: texture_path.display().to_string(),
2677                camera_transform: viewpoints[0],
2678                object_rotation: first.object_rotation.clone(),
2679                config: self.render_config.clone(),
2680            };
2681            world.insert_resource(new_request);
2682
2683            // Kick off asset loads and install the handles under the names the
2684            // existing `check_assets_loaded` system expects.
2685            let asset_server = world.resource::<AssetServer>().clone();
2686            let scene_handle: Handle<Scene> = asset_server.load(mesh_path.display().to_string());
2687            let texture_handle: Handle<Image> =
2688                asset_server.load(texture_path.display().to_string());
2689            world.insert_resource(LoadedScene(scene_handle.clone()));
2690            world.insert_resource(LoadedTexture(texture_handle));
2691
2692            // Spawn the new scene entity tagged so we can find + despawn it next
2693            // render() call.
2694            world.spawn((
2695                SceneRoot(scene_handle),
2696                Transform::from_rotation(first.object_rotation.to_quat()),
2697                RenderedObject,
2698                SessionScene,
2699            ));
2700
2701            // Seed the camera transform to the first viewpoint now so the first
2702            // capture lines up; subsequent viewpoints are advanced by
2703            // `extract_and_continue_headless_batch`.
2704            let camera_entity = world
2705                .query_filtered::<Entity, With<RenderCamera>>()
2706                .iter(world)
2707                .next();
2708            if let Some(cam) = camera_entity {
2709                if let Some(mut transform) = world.entity_mut(cam).get_mut::<Transform>() {
2710                    *transform = viewpoints[0];
2711                }
2712            }
2713
2714            // Install the viewpoint sequence for this render() call.
2715            world.insert_resource(HeadlessBatchSequence::new(viewpoints.clone()));
2716        }
2717
2718        // --- drive the capture loop ---
2719        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
2720        let start = std::time::Instant::now();
2721        loop {
2722            if start.elapsed() > timeout {
2723                return Err(BatchRenderError::TotalFailure(format!(
2724                    "RenderSession::render timed out after {}s",
2725                    RENDER_TIMEOUT_SECS
2726                )));
2727            }
2728
2729            self.app.update();
2730
2731            if self.app.world().resource::<HeadlessBatchSequence>().done {
2732                break;
2733            }
2734        }
2735
2736        // Collect outputs and zip with requests to produce BatchRenderOutput in
2737        // request order.
2738        let mut sequence = self.app.world_mut().resource_mut::<HeadlessBatchSequence>();
2739        if sequence.outputs.len() != requests.len() {
2740            return Err(BatchRenderError::TotalFailure(format!(
2741                "RenderSession produced {} outputs for {} requests",
2742                sequence.outputs.len(),
2743                requests.len()
2744            )));
2745        }
2746        let outputs = std::mem::take(&mut sequence.outputs);
2747
2748        Ok(requests
2749            .iter()
2750            .cloned()
2751            .zip(outputs)
2752            .map(|(req, out)| BatchRenderOutput::from_render_output(req, out))
2753            .collect())
2754    }
2755}
2756
2757/// Render directly to files (for subprocess mode).
2758///
2759/// This function saves RGBA and depth data directly to files before exiting.
2760/// Designed for subprocess rendering where the process will exit after rendering.
2761pub fn render_to_files(
2762    object_dir: &Path,
2763    camera_transform: &Transform,
2764    object_rotation: &ObjectRotation,
2765    config: &RenderConfig,
2766    rgba_path: &Path,
2767    depth_path: &Path,
2768) -> Result<(), RenderError> {
2769    let mesh_path = object_dir.join("google_16k/textured.obj");
2770    let texture_path = object_dir.join("google_16k/texture_map.png");
2771
2772    if !mesh_path.exists() {
2773        return Err(RenderError::MeshNotFound(mesh_path.display().to_string()));
2774    }
2775    if !texture_path.exists() {
2776        return Err(RenderError::TextureNotFound(
2777            texture_path.display().to_string(),
2778        ));
2779    }
2780
2781    let request = RenderRequest {
2782        mesh_path: mesh_path.display().to_string(),
2783        texture_path: texture_path.display().to_string(),
2784        camera_transform: *camera_transform,
2785        object_rotation: object_rotation.clone(),
2786        config: config.clone(),
2787    };
2788
2789    // Shared state for output
2790    let shared_output: SharedOutput = SharedOutput(Arc::new(Mutex::new(None)));
2791    let output_poll = shared_output.clone();
2792
2793    // Clone paths for watchdog thread
2794    let rgba_path = rgba_path.to_path_buf();
2795    let depth_path = depth_path.to_path_buf();
2796
2797    // Shared buffer for RGBA data from headless render target
2798    let shared_rgba: SharedRgbaBuffer = SharedRgbaBuffer::default();
2799
2800    // Shared buffer for depth readback
2801    let shared_depth: SharedDepthBuffer = SharedDepthBuffer::default();
2802
2803    // Spawn watchdog thread that saves files and exits
2804    std::thread::spawn(move || {
2805        let timeout = std::time::Duration::from_secs(RENDER_TIMEOUT_SECS);
2806        let start = std::time::Instant::now();
2807        let poll_interval = std::time::Duration::from_millis(100);
2808
2809        loop {
2810            if let Ok(guard) = output_poll.0.lock() {
2811                if let Some(output) = guard.as_ref() {
2812                    // Save RGBA as PNG
2813                    if let Err(e) =
2814                        save_rgba_to_png(&output.rgba, output.width, output.height, &rgba_path)
2815                    {
2816                        eprintln!("Failed to save RGBA: {:?}", e);
2817                        std::process::exit(1);
2818                    }
2819
2820                    // Save depth as binary f32
2821                    if let Err(e) = save_depth_to_binary(&output.depth, &depth_path) {
2822                        eprintln!("Failed to save depth: {:?}", e);
2823                        std::process::exit(1);
2824                    }
2825
2826                    std::process::exit(0);
2827                }
2828            }
2829
2830            if start.elapsed() > timeout {
2831                eprintln!(
2832                    "Error: Render timeout after {} seconds",
2833                    RENDER_TIMEOUT_SECS
2834                );
2835                eprintln!("Debug info: This may indicate GPU issues, missing assets, or insufficient system resources.");
2836                std::process::exit(1);
2837            }
2838
2839            std::thread::sleep(poll_interval);
2840        }
2841    });
2842
2843    // Configure rendering backend for this environment.
2844    // Use OnceLock so env vars are only set once per process — repeated calls
2845    // (e.g. sequential render_to_buffer calls in a parity loop) no longer trigger
2846    // redundant wgpu backend env writes. Full GPU adapter reuse across App instances
2847    // requires a persistent renderer (tracked in issue #14).
2848    static BACKEND_INIT: OnceLock<()> = OnceLock::new();
2849    BACKEND_INIT.get_or_init(|| {
2850        let backend_config = BackendConfig::headless();
2851        backend_config.apply_env();
2852    });
2853
2854    // Run Bevy app with HEADLESS configuration
2855    build_headless_app(request, shared_output, shared_rgba, shared_depth).run();
2856
2857    // Unreachable - watchdog thread exits the process
2858    Err(RenderError::RenderFailed(
2859        "Render did not complete".to_string(),
2860    ))
2861}
2862
2863/// Save RGBA data to PNG file
2864fn save_rgba_to_png(rgba: &[u8], width: u32, height: u32, path: &Path) -> Result<(), String> {
2865    use image::{ImageBuffer, Rgba};
2866
2867    // Create parent directories if needed
2868    if let Some(parent) = path.parent() {
2869        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
2870    }
2871
2872    let img: ImageBuffer<Rgba<u8>, Vec<u8>> =
2873        ImageBuffer::from_raw(width, height, rgba.to_vec())
2874            .ok_or_else(|| "Failed to create image buffer".to_string())?;
2875
2876    img.save(path).map_err(|e| e.to_string())
2877}
2878
2879/// Save depth data to binary file (f64 for TBP precision)
2880fn save_depth_to_binary(depth: &[f64], path: &Path) -> Result<(), String> {
2881    // Create parent directories if needed
2882    if let Some(parent) = path.parent() {
2883        std::fs::create_dir_all(parent).map_err(|e| e.to_string())?;
2884    }
2885
2886    let bytes: Vec<u8> = depth.iter().flat_map(|f| f.to_le_bytes()).collect();
2887    std::fs::write(path, &bytes).map_err(|e| e.to_string())
2888}
2889
2890#[cfg(test)]
2891mod smoke_tests {
2892    use super::{headless_scene_setup_count, reset_headless_scene_setup_count};
2893    use crate::{
2894        BatchRenderConfig, BatchRenderRequest, ObjectRotation, RenderConfig, ViewpointConfig,
2895    };
2896    use image::{ImageBuffer, Rgba};
2897    use tempfile::TempDir;
2898
2899    fn write_synthetic_object() -> TempDir {
2900        let temp_dir = TempDir::new().expect("create temp dir for synthetic object");
2901        let object_dir = temp_dir.path().join("synthetic_cube").join("google_16k");
2902        std::fs::create_dir_all(&object_dir).expect("create synthetic google_16k dir");
2903
2904        // A small centered cube stays visible from all default TBP viewpoints and does not
2905        // need any YCB downloads.
2906        let obj = r#"o SyntheticCube
2907v -0.10 -0.10  0.10
2908v  0.10 -0.10  0.10
2909v  0.10  0.10  0.10
2910v -0.10  0.10  0.10
2911v -0.10 -0.10 -0.10
2912v  0.10 -0.10 -0.10
2913v  0.10  0.10 -0.10
2914v -0.10  0.10 -0.10
2915vt 0.0 0.0
2916vt 1.0 0.0
2917vt 1.0 1.0
2918vt 0.0 1.0
2919f 1/1 2/2 3/3
2920f 1/1 3/3 4/4
2921f 6/1 5/2 8/3
2922f 6/1 8/3 7/4
2923f 2/1 6/2 7/3
2924f 2/1 7/3 3/4
2925f 5/1 1/2 4/3
2926f 5/1 4/3 8/4
2927f 4/1 3/2 7/3
2928f 4/1 7/3 8/4
2929f 5/1 6/2 2/3
2930f 5/1 2/3 1/4
2931"#;
2932        std::fs::write(object_dir.join("textured.obj"), obj).expect("write synthetic obj");
2933
2934        let texture = ImageBuffer::from_fn(2, 2, |x, y| match (x, y) {
2935            (0, 0) => Rgba([255u8, 48, 48, 255]),
2936            (1, 0) => Rgba([48u8, 255, 48, 255]),
2937            (0, 1) => Rgba([48u8, 48, 255, 255]),
2938            _ => Rgba([255u8, 255, 64, 255]),
2939        });
2940        texture
2941            .save(object_dir.join("texture_map.png"))
2942            .expect("write synthetic texture");
2943
2944        temp_dir
2945    }
2946
2947    #[test]
2948    #[ignore = "headless throughput smoke check is opt-in because it needs a local render backend"]
2949    fn test_headless_batch_throughput_smoke() {
2950        crate::initialize();
2951        reset_headless_scene_setup_count();
2952
2953        let object_root = write_synthetic_object();
2954        let object_dir = object_root.path().join("synthetic_cube");
2955        let viewpoints = crate::generate_viewpoints(&ViewpointConfig::default());
2956        let request_count = 5usize;
2957        let config = RenderConfig::tbp_default();
2958
2959        let requests: Vec<_> = viewpoints
2960            .iter()
2961            .take(request_count)
2962            .copied()
2963            .map(|viewpoint| BatchRenderRequest {
2964                object_dir: object_dir.clone(),
2965                viewpoint,
2966                object_rotation: ObjectRotation::identity(),
2967                render_config: config.clone(),
2968            })
2969            .collect();
2970
2971        let start = std::time::Instant::now();
2972        let outputs = crate::render_batch(requests, &BatchRenderConfig::default())
2973            .expect("synthetic headless batch render should succeed");
2974        let elapsed = start.elapsed();
2975
2976        assert_eq!(outputs.len(), request_count);
2977        // This is the deterministic churn signal for the smoke check. Adapter log lines vary by
2978        // backend and logging config, but a homogeneous batch should still set up headless scene
2979        // state exactly once.
2980        assert_eq!(
2981            headless_scene_setup_count(),
2982            1,
2983            "homogeneous batch smoke check should reuse one headless app setup"
2984        );
2985
2986        for (idx, output) in outputs.iter().enumerate() {
2987            assert_eq!(output.width, config.width, "output {idx} width mismatch");
2988            assert_eq!(output.height, config.height, "output {idx} height mismatch");
2989            assert_eq!(
2990                output.rgba.len(),
2991                (config.width * config.height * 4) as usize,
2992                "output {idx} rgba size mismatch"
2993            );
2994            assert_eq!(
2995                output.depth.len(),
2996                (config.width * config.height) as usize,
2997                "output {idx} depth size mismatch"
2998            );
2999            assert!(
3000                output
3001                    .rgba
3002                    .chunks_exact(4)
3003                    .any(|px| px[0] != 0 || px[1] != 0 || px[2] != 0),
3004                "output {idx} should contain visible color"
3005            );
3006        }
3007
3008        // Acceptance target: under llvmpipe-class CPU rendering, five 64x64 captures should
3009        // finish in under 8s. Much slower runs usually mean we reintroduced per-capture app
3010        // churn or another headless startup regression.
3011        assert!(
3012            elapsed < std::time::Duration::from_secs(8),
3013            "5 synthetic headless captures took {:.2}s, expected < 8.0s",
3014            elapsed.as_secs_f64()
3015        );
3016    }
3017}