Skip to main content

edgefirst_image/
opengl_headless.rs

1// SPDX-FileCopyrightText: Copyright 2025 Au-Zone Technologies
2// SPDX-License-Identifier: Apache-2.0
3
4#![cfg(target_os = "linux")]
5#![cfg(feature = "opengl")]
6
7use edgefirst_decoder::DetectBox;
8#[cfg(feature = "decoder")]
9use edgefirst_decoder::{ProtoData, ProtoTensor, Segmentation};
10use edgefirst_tensor::{TensorMemory, TensorTrait};
11use four_char_code::FourCharCode;
12use gbm::{
13    drm::{buffer::DrmFourcc, control::Device as DrmControlDevice, Device as DrmDevice},
14    AsRaw, Device,
15};
16use khronos_egl::{self as egl, Attrib, Display, Dynamic, Instance, EGL1_4};
17use log::{debug, error};
18use std::{
19    collections::BTreeSet,
20    ffi::{c_char, c_void, CStr, CString},
21    mem::ManuallyDrop,
22    os::fd::AsRawFd,
23    ptr::{null, null_mut, NonNull},
24    rc::Rc,
25    str::FromStr,
26    sync::OnceLock,
27    thread::JoinHandle,
28    time::Instant,
29};
30use tokio::sync::mpsc::Sender;
31
32macro_rules! function {
33    () => {{
34        fn f() {}
35        fn type_name_of<T>(_: T) -> &'static str {
36            std::any::type_name::<T>()
37        }
38        let name = type_name_of(f);
39
40        // Find and cut the rest of the path
41        match &name[..name.len() - 3].rfind(':') {
42            Some(pos) => &name[pos + 1..name.len() - 3],
43            None => &name[..name.len() - 3],
44        }
45    }};
46}
47
48#[cfg(feature = "decoder")]
49use crate::DEFAULT_COLORS;
50use crate::{
51    CPUProcessor, Crop, Error, Flip, ImageProcessorTrait, Rect, Rotation, TensorImage,
52    TensorImageRef, GREY, NV12, PLANAR_RGB, PLANAR_RGBA, RGB, RGBA, YUYV,
53};
54
55#[cfg(feature = "decoder")]
56use crate::MaskResult;
57
58/// Identifies the type of EGL display used for headless OpenGL ES rendering.
59///
60/// The HAL probes displays in priority order: GBM first (direct GPU access),
61/// then platform device enumeration, then the default display. Use
62/// [`probe_egl_displays`] to discover which are available and
63/// [`ImageProcessorConfig::egl_display`](crate::ImageProcessorConfig::egl_display)
64/// to override the auto-detection.
65///
66/// # Display Types
67///
68/// - **`Gbm`** — Opens a DRM render node (e.g. `/dev/dri/renderD128`) and
69///   creates a GBM (Generic Buffer Manager) device, then calls
70///   `eglGetPlatformDisplay(EGL_PLATFORM_GBM_KHR, gbm_device)`. This is a
71///   direct GPU path through the DRM/KMS subsystem — no compositor required.
72///   Preferred for headless edge AI workloads. On some drivers (e.g. Vivante
73///   on i.MX8), this path may trigger heap corruption during process shutdown.
74///
75/// - **`PlatformDevice`** — Uses the `EGL_EXT_device_enumeration` extension
76///   to query available EGL devices via `eglQueryDevicesEXT`, then selects the
77///   first device with `eglGetPlatformDisplay(EGL_EXT_platform_device, ...)`.
78///   Also headless and compositor-free. Common on NVIDIA GPUs.
79///
80/// - **`Default`** — Calls `eglGetDisplay(EGL_DEFAULT_DISPLAY)`, letting the
81///   EGL implementation choose the display. On Wayland systems this connects
82///   to the compositor; on X11 it connects to the X server. May block on
83///   headless systems where a compositor is expected but not running.
84#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
85pub enum EglDisplayKind {
86    Gbm,
87    PlatformDevice,
88    Default,
89}
90
91impl std::fmt::Display for EglDisplayKind {
92    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
93        match self {
94            EglDisplayKind::Gbm => write!(f, "GBM"),
95            EglDisplayKind::PlatformDevice => write!(f, "PlatformDevice"),
96            EglDisplayKind::Default => write!(f, "Default"),
97        }
98    }
99}
100
101/// A validated, available EGL display discovered by [`probe_egl_displays`].
102#[derive(Debug, Clone)]
103pub struct EglDisplayInfo {
104    /// The type of EGL display.
105    pub kind: EglDisplayKind,
106    /// Human-readable description for logging/diagnostics
107    /// (e.g. "GBM via /dev/dri/renderD128").
108    pub description: String,
109}
110
111/// EGL library handle. Intentionally leaked (never dlclose'd) to avoid SIGBUS
112/// on process exit: GPU drivers may keep internal state that outlives explicit
113/// EGL cleanup, and dlclose can unmap memory still referenced by the driver.
114static EGL_LIB: OnceLock<&'static libloading::Library> = OnceLock::new();
115
116fn get_egl_lib() -> Result<&'static libloading::Library, crate::Error> {
117    if let Some(egl) = EGL_LIB.get() {
118        Ok(egl)
119    } else {
120        let egl = unsafe { libloading::Library::new("libEGL.so.1")? };
121        // Leak the library to prevent dlclose on process exit
122        let egl: &'static libloading::Library = Box::leak(Box::new(egl));
123        Ok(EGL_LIB.get_or_init(|| egl))
124    }
125}
126
127type Egl = Instance<Dynamic<&'static libloading::Library, EGL1_4>>;
128
129/// Check whether an EGL display supports GLES3 RGBA8 PBuffer rendering.
130///
131/// Returns `true` if `eglChooseConfig` finds at least one matching config.
132fn probe_config_check(egl: &Egl, display: egl::Display) -> bool {
133    let attributes = [
134        egl::SURFACE_TYPE,
135        egl::PBUFFER_BIT,
136        egl::RENDERABLE_TYPE,
137        egl::OPENGL_ES3_BIT,
138        egl::RED_SIZE,
139        8,
140        egl::GREEN_SIZE,
141        8,
142        egl::BLUE_SIZE,
143        8,
144        egl::ALPHA_SIZE,
145        8,
146        egl::NONE,
147    ];
148    egl.choose_first_config(display, &attributes)
149        .ok()
150        .flatten()
151        .is_some()
152}
153
154/// Probe for available EGL displays supporting headless OpenGL ES 3.0.
155///
156/// Returns validated displays in priority order (GBM, PlatformDevice,
157/// Default). Each display is validated with `eglInitialize` +
158/// `eglChooseConfig` using the same GLES3 RGBA8 PBuffer attributes used by
159/// the image processor. Probed state is cleaned up with `eglTerminate` — no
160/// EGL resources are left alive.
161///
162/// An empty list means OpenGL is not available on this system.
163///
164/// # Errors
165///
166/// Returns an error only if `libEGL.so.1` cannot be loaded. Individual
167/// display probe failures are silently skipped.
168pub fn probe_egl_displays() -> Result<Vec<EglDisplayInfo>, Error> {
169    let egl: Egl = unsafe { Instance::<Dynamic<_, EGL1_4>>::load_required_from(get_egl_lib()?)? };
170
171    let mut results = Vec::new();
172
173    // GBM
174    if let Ok(display_type) = GlContext::egl_get_gbm_display(&egl) {
175        let display = display_type.as_display();
176        if egl.initialize(display).is_ok() {
177            if probe_config_check(&egl, display) {
178                results.push(EglDisplayInfo {
179                    kind: EglDisplayKind::Gbm,
180                    description: "GBM via /dev/dri/renderD128".to_string(),
181                });
182            }
183            let _ = egl.terminate(display);
184        }
185    }
186
187    // PlatformDevice
188    if let Ok(display_type) = GlContext::egl_get_platform_display_from_device(&egl) {
189        let display = display_type.as_display();
190        if egl.initialize(display).is_ok() {
191            if probe_config_check(&egl, display) {
192                results.push(EglDisplayInfo {
193                    kind: EglDisplayKind::PlatformDevice,
194                    description: "EGL platform device via EGL_EXT_device_enumeration".to_string(),
195                });
196            }
197            let _ = egl.terminate(display);
198        }
199    }
200
201    // Default
202    if let Ok(display_type) = GlContext::egl_get_default_display(&egl) {
203        let display = display_type.as_display();
204        if egl.initialize(display).is_ok() {
205            if probe_config_check(&egl, display) {
206                results.push(EglDisplayInfo {
207                    kind: EglDisplayKind::Default,
208                    description: "EGL default display".to_string(),
209                });
210            }
211            let _ = egl.terminate(display);
212        }
213    }
214
215    Ok(results)
216}
217
218pub(crate) struct GlContext {
219    pub(crate) support_dma: bool,
220    pub(crate) surface: Option<egl::Surface>,
221    pub(crate) display: EglDisplayType,
222    pub(crate) ctx: egl::Context,
223    /// Wrapped in ManuallyDrop because the khronos-egl Dynamic instance's
224    /// Drop calls eglReleaseThread() which can panic during process shutdown
225    /// if the EGL library has been partially unloaded. We drop it explicitly
226    /// inside catch_unwind in GlContext::drop.
227    pub(crate) egl: ManuallyDrop<Rc<Egl>>,
228}
229
230pub(crate) enum EglDisplayType {
231    Default(egl::Display),
232    Gbm(egl::Display, #[allow(dead_code)] Device<Card>),
233    PlatformDisplay(egl::Display),
234}
235
236impl EglDisplayType {
237    fn as_display(&self) -> egl::Display {
238        match self {
239            EglDisplayType::Default(disp) => *disp,
240            EglDisplayType::Gbm(disp, _) => *disp,
241            EglDisplayType::PlatformDisplay(disp) => *disp,
242        }
243    }
244}
245
246impl GlContext {
247    pub(crate) fn new(kind: Option<EglDisplayKind>) -> Result<GlContext, crate::Error> {
248        // Create an EGL API instance.
249        let egl: Rc<Egl> =
250            Rc::new(unsafe { Instance::<Dynamic<_, EGL1_4>>::load_required_from(get_egl_lib()?)? });
251
252        if let Some(kind) = kind {
253            // Specific display type requested — try only that one.
254            let display_fn = match kind {
255                EglDisplayKind::Gbm => Self::egl_get_gbm_display as fn(&Egl) -> _,
256                EglDisplayKind::PlatformDevice => Self::egl_get_platform_display_from_device,
257                EglDisplayKind::Default => Self::egl_get_default_display,
258            };
259            return Self::try_initialize_egl(egl, display_fn).map_err(|e| {
260                log::debug!("Failed to initialize EGL with {kind} display: {e:?}");
261                e
262            });
263        }
264
265        // Try headless-friendly EGL methods first (GBM/DRM, device enumeration)
266        // before the default display, which may block if a compositor (Wayland)
267        // is expected but not running.
268        if let Ok(headless) = Self::try_initialize_egl(egl.clone(), Self::egl_get_gbm_display) {
269            return Ok(headless);
270        } else {
271            log::debug!("Didn't initialize EGL with GBM Display");
272        }
273
274        if let Ok(headless) =
275            Self::try_initialize_egl(egl.clone(), Self::egl_get_platform_display_from_device)
276        {
277            return Ok(headless);
278        } else {
279            log::debug!("Didn't initialize EGL with platform display from device enumeration");
280        }
281
282        if let Ok(headless) = Self::try_initialize_egl(egl.clone(), Self::egl_get_default_display) {
283            return Ok(headless);
284        } else {
285            log::debug!("Didn't initialize EGL with Default Display");
286        }
287
288        Err(Error::OpenGl(
289            "Could not initialize EGL with any known method".to_string(),
290        ))
291    }
292
293    fn try_initialize_egl(
294        egl: Rc<Egl>,
295        display_fn: impl Fn(&Egl) -> Result<EglDisplayType, crate::Error>,
296    ) -> Result<GlContext, crate::Error> {
297        let display = display_fn(&egl)?;
298        log::debug!("egl initialize with display: {:x?}", display.as_display());
299        egl.initialize(display.as_display())?;
300        let attributes = [
301            egl::SURFACE_TYPE,
302            egl::PBUFFER_BIT,
303            egl::RENDERABLE_TYPE,
304            egl::OPENGL_ES3_BIT,
305            egl::RED_SIZE,
306            8,
307            egl::GREEN_SIZE,
308            8,
309            egl::BLUE_SIZE,
310            8,
311            egl::ALPHA_SIZE,
312            8,
313            egl::NONE,
314        ];
315
316        let config =
317            if let Some(config) = egl.choose_first_config(display.as_display(), &attributes)? {
318                config
319            } else {
320                return Err(crate::Error::NotImplemented(
321                    "Did not find valid OpenGL ES config".to_string(),
322                ));
323            };
324
325        debug!("config: {config:?}");
326
327        let surface = Some(egl.create_pbuffer_surface(
328            display.as_display(),
329            config,
330            &[egl::WIDTH, 64, egl::HEIGHT, 64, egl::NONE],
331        )?);
332
333        egl.bind_api(egl::OPENGL_ES_API)?;
334        let context_attributes = [egl::CONTEXT_MAJOR_VERSION, 3, egl::NONE, egl::NONE];
335
336        let ctx = egl.create_context(display.as_display(), config, None, &context_attributes)?;
337        debug!("ctx: {ctx:?}");
338
339        egl.make_current(display.as_display(), surface, surface, Some(ctx))?;
340
341        let support_dma = Self::egl_check_support_dma(&egl).is_ok();
342        let headless = GlContext {
343            display,
344            ctx,
345            egl: ManuallyDrop::new(egl),
346            surface,
347            support_dma,
348        };
349        Ok(headless)
350    }
351
352    fn egl_get_default_display(egl: &Egl) -> Result<EglDisplayType, crate::Error> {
353        // get the default display
354        if let Some(display) = unsafe { egl.get_display(egl::DEFAULT_DISPLAY) } {
355            debug!("default display: {display:?}");
356            return Ok(EglDisplayType::Default(display));
357        }
358
359        Err(Error::OpenGl(
360            "Could not obtain EGL Default Display".to_string(),
361        ))
362    }
363
364    fn egl_get_gbm_display(egl: &Egl) -> Result<EglDisplayType, crate::Error> {
365        // init a GBM device
366        let gbm = Device::new(Card::open_global()?)?;
367
368        debug!("gbm: {gbm:?}");
369        let display = Self::egl_get_platform_display_with_fallback(
370            egl,
371            egl_ext::PLATFORM_GBM_KHR,
372            gbm.as_raw() as *mut c_void,
373            &[egl::ATTRIB_NONE],
374        )?;
375
376        Ok(EglDisplayType::Gbm(display, gbm))
377    }
378
379    fn egl_get_platform_display_from_device(egl: &Egl) -> Result<EglDisplayType, crate::Error> {
380        let extensions = egl.query_string(None, egl::EXTENSIONS)?;
381        let extensions = extensions.to_string_lossy();
382        log::debug!("EGL Extensions: {}", extensions);
383
384        if !extensions.contains("EGL_EXT_device_enumeration") {
385            return Err(Error::GLVersion(
386                "EGL doesn't supported EGL_EXT_device_enumeration extension".to_string(),
387            ));
388        }
389
390        type EGLDeviceEXT = *mut c_void;
391        let devices = if let Some(ext) = egl.get_proc_address("eglQueryDevicesEXT") {
392            let func: unsafe extern "system" fn(
393                max_devices: egl::Int,
394                devices: *mut EGLDeviceEXT,
395                num_devices: *mut egl::Int,
396            ) -> *const c_char = unsafe { std::mem::transmute(ext) };
397            let mut devices = [std::ptr::null_mut(); 10];
398            let mut num_devices = 0;
399            unsafe { func(devices.len() as i32, devices.as_mut_ptr(), &mut num_devices) };
400            for i in 0..num_devices {
401                log::debug!("EGL device: {:?}", devices[i as usize]);
402            }
403            devices[0..num_devices as usize].to_vec()
404        } else {
405            return Err(Error::GLVersion(
406                "EGL doesn't supported eglQueryDevicesEXT function".to_string(),
407            ));
408        };
409
410        if !extensions.contains("EGL_EXT_platform_device") {
411            return Err(Error::GLVersion(
412                "EGL doesn't supported EGL_EXT_platform_device extension".to_string(),
413            ));
414        }
415
416        // just use the first device?
417        let disp = Self::egl_get_platform_display_with_fallback(
418            egl,
419            egl_ext::PLATFORM_DEVICE_EXT,
420            devices[0],
421            &[egl::ATTRIB_NONE],
422        )?;
423        Ok(EglDisplayType::PlatformDisplay(disp))
424    }
425
426    fn egl_check_support_dma(egl: &Egl) -> Result<(), crate::Error> {
427        let extensions = egl.query_string(None, egl::EXTENSIONS)?;
428        let extensions = extensions.to_string_lossy();
429        log::debug!("EGL Extensions: {}", extensions);
430
431        if egl.upcast::<egl::EGL1_5>().is_some() {
432            return Ok(());
433        }
434
435        if !extensions.contains("EGL_EXT_image_dma_buf_import") {
436            return Err(crate::Error::GLVersion(
437                "EGL does not support EGL_EXT_image_dma_buf_import extension".to_string(),
438            ));
439        }
440
441        if egl.get_proc_address("eglCreateImageKHR").is_none() {
442            return Err(crate::Error::GLVersion(
443                "EGL does not support eglCreateImageKHR function".to_string(),
444            ));
445        }
446
447        if egl.get_proc_address("eglDestroyImageKHR").is_none() {
448            return Err(crate::Error::GLVersion(
449                "EGL does not support eglDestroyImageKHR function".to_string(),
450            ));
451        }
452        // Err(crate::Error::GLVersion("EGL Version too low".to_string()))
453        Ok(())
454    }
455
456    fn egl_get_platform_display_with_fallback(
457        egl: &Egl,
458        platform: egl::Enum,
459        native_display: *mut c_void,
460        attrib_list: &[Attrib],
461    ) -> Result<Display, Error> {
462        if let Some(egl) = egl.upcast::<egl::EGL1_5>() {
463            unsafe { egl.get_platform_display(platform, native_display, attrib_list) }
464                .map_err(|e| e.into())
465        } else if let Some(ext) = egl.get_proc_address("eglGetPlatformDisplayEXT") {
466            let func: unsafe extern "system" fn(
467                platform: egl::Enum,
468                native_display: *mut c_void,
469                attrib_list: *const Attrib,
470            ) -> egl::EGLDisplay = unsafe { std::mem::transmute(ext) };
471            let disp = unsafe { func(platform, native_display, attrib_list.as_ptr()) };
472            if disp != egl::NO_DISPLAY {
473                Ok(unsafe { Display::from_ptr(disp) })
474            } else {
475                Err(egl.get_error().map(|e| e.into()).unwrap_or(Error::Internal(
476                    "EGL failed but no error was reported".to_owned(),
477                )))
478            }
479        } else {
480            Err(Error::EGLLoad(egl::LoadError::InvalidVersion {
481                provided: egl.version(),
482                required: khronos_egl::Version::EGL1_5,
483            }))
484        }
485    }
486
487    fn egl_create_image_with_fallback(
488        egl: &Egl,
489        display: Display,
490        ctx: egl::Context,
491        target: egl::Enum,
492        buffer: egl::ClientBuffer,
493        attrib_list: &[Attrib],
494    ) -> Result<egl::Image, Error> {
495        if let Some(egl) = egl.upcast::<egl::EGL1_5>() {
496            egl.create_image(display, ctx, target, buffer, attrib_list)
497                .map_err(|e| e.into())
498        } else if let Some(ext) = egl.get_proc_address("eglCreateImageKHR") {
499            log::trace!("eglCreateImageKHR addr: {:?}", ext);
500            let func: unsafe extern "system" fn(
501                display: egl::EGLDisplay,
502                ctx: egl::EGLContext,
503                target: egl::Enum,
504                buffer: egl::EGLClientBuffer,
505                attrib_list: *const egl::Int,
506            ) -> egl::EGLImage = unsafe { std::mem::transmute(ext) };
507            let new_attrib_list = attrib_list
508                .iter()
509                .map(|x| *x as egl::Int)
510                .collect::<Vec<_>>();
511
512            let image = unsafe {
513                func(
514                    display.as_ptr(),
515                    ctx.as_ptr(),
516                    target,
517                    buffer.as_ptr(),
518                    new_attrib_list.as_ptr(),
519                )
520            };
521            if image != egl::NO_IMAGE {
522                Ok(unsafe { egl::Image::from_ptr(image) })
523            } else {
524                Err(egl.get_error().map(|e| e.into()).unwrap_or(Error::Internal(
525                    "EGL failed but no error was reported".to_owned(),
526                )))
527            }
528        } else {
529            Err(Error::EGLLoad(egl::LoadError::InvalidVersion {
530                provided: egl.version(),
531                required: khronos_egl::Version::EGL1_5,
532            }))
533        }
534    }
535
536    fn egl_destory_image_with_fallback(
537        egl: &Egl,
538        display: Display,
539        image: egl::Image,
540    ) -> Result<(), Error> {
541        if let Some(egl) = egl.upcast::<egl::EGL1_5>() {
542            egl.destroy_image(display, image).map_err(|e| e.into())
543        } else if let Some(ext) = egl.get_proc_address("eglDestroyImageKHR") {
544            let func: unsafe extern "system" fn(
545                display: egl::EGLDisplay,
546                image: egl::EGLImage,
547            ) -> egl::Boolean = unsafe { std::mem::transmute(ext) };
548            let res = unsafe { func(display.as_ptr(), image.as_ptr()) };
549            if res == egl::TRUE {
550                Ok(())
551            } else {
552                Err(egl.get_error().map(|e| e.into()).unwrap_or(Error::Internal(
553                    "EGL failed but no error was reported".to_owned(),
554                )))
555            }
556        } else {
557            Err(Error::EGLLoad(egl::LoadError::InvalidVersion {
558                provided: egl.version(),
559                required: khronos_egl::Version::EGL1_5,
560            }))
561        }
562    }
563}
564
565impl Drop for GlContext {
566    fn drop(&mut self) {
567        // During process shutdown (e.g. Python interpreter exit), the EGL/GL
568        // shared libraries may already be partially unloaded, causing panics
569        // or heap corruption when calling cleanup functions. We suppress
570        // panic output and catch panics to prevent propagation.
571        let prev_hook = std::panic::take_hook();
572        std::panic::set_hook(Box::new(|_| {}));
573        let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
574            let _ = self
575                .egl
576                .make_current(self.display.as_display(), None, None, None);
577
578            let _ = self
579                .egl
580                .destroy_context(self.display.as_display(), self.ctx);
581
582            if let Some(surface) = self.surface.take() {
583                let _ = self.egl.destroy_surface(self.display.as_display(), surface);
584            }
585
586            // Note: eglTerminate is intentionally omitted. The context and
587            // surface are already destroyed above, and calling terminate after
588            // individual resource destruction can cause double-free issues on
589            // some EGL drivers (observed as heap corruption on ARM targets
590            // during process shutdown).
591        }));
592        std::panic::set_hook(prev_hook);
593
594        // The Rc<Egl> (ManuallyDrop) is intentionally NOT dropped. The
595        // khronos-egl Dynamic instance's Drop calls eglReleaseThread() which
596        // panics if the EGL library has been unloaded (local/x86_64) or
597        // causes heap corruption by calling into invalid memory (ARM). Since
598        // EGL display connections are process-scoped singletons, leaking the
599        // Rc is harmless — the OS reclaims all resources on process exit.
600    }
601}
602
603#[derive(Debug)]
604/// A simple wrapper for a device node.
605pub(crate) struct Card(std::fs::File);
606
607/// Implementing `AsFd` is a prerequisite to implementing the traits found
608/// in this crate. Here, we are just calling `as_fd()` on the inner File.
609impl std::os::unix::io::AsFd for Card {
610    fn as_fd(&self) -> std::os::unix::io::BorrowedFd<'_> {
611        self.0.as_fd()
612    }
613}
614
615/// With `AsFd` implemented, we can now implement `drm::Device`.
616impl DrmDevice for Card {}
617impl DrmControlDevice for Card {}
618
619/// Simple helper methods for opening a `Card`.
620impl Card {
621    pub fn open(path: &str) -> Result<Self, crate::Error> {
622        let mut options = std::fs::OpenOptions::new();
623        options.read(true);
624        options.write(true);
625        let c = options.open(path);
626        match c {
627            Ok(c) => Ok(Card(c)),
628            Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
629                Err(Error::NotFound(format!("File not found: {path}")))
630            }
631            Err(e) => Err(e.into()),
632        }
633    }
634
635    pub fn open_global() -> Result<Self, crate::Error> {
636        let targets = ["/dev/dri/renderD128", "/dev/dri/card0", "/dev/dri/card1"];
637        let e = Self::open(targets[0]);
638        if let Ok(t) = e {
639            return Ok(t);
640        }
641        for t in &targets[1..] {
642            if let Ok(t) = Self::open(t) {
643                return Ok(t);
644            }
645        }
646        e
647    }
648}
649
650#[derive(Debug, Clone, Copy)]
651struct RegionOfInterest {
652    left: f32,
653    top: f32,
654    right: f32,
655    bottom: f32,
656}
657
658enum GLProcessorMessage {
659    ImageConvert(
660        SendablePtr<TensorImage>,
661        SendablePtr<TensorImage>,
662        Rotation,
663        Flip,
664        Crop,
665        tokio::sync::oneshot::Sender<Result<(), Error>>,
666    ),
667    SetColors(
668        Vec<[u8; 4]>,
669        tokio::sync::oneshot::Sender<Result<(), Error>>,
670    ),
671    ImageRender(
672        SendablePtr<TensorImage>,
673        SendablePtr<DetectBox>,
674        SendablePtr<Segmentation>,
675        tokio::sync::oneshot::Sender<Result<(), Error>>,
676    ),
677    #[cfg(feature = "decoder")]
678    ImageRenderProtos(
679        SendablePtr<TensorImage>,
680        SendablePtr<DetectBox>,
681        Box<ProtoData>,
682        tokio::sync::oneshot::Sender<Result<(), Error>>,
683    ),
684    #[cfg(feature = "decoder")]
685    SetInt8Interpolation(
686        Int8InterpolationMode,
687        tokio::sync::oneshot::Sender<Result<(), Error>>,
688    ),
689    #[cfg(feature = "decoder")]
690    RenderMasksFromProtos(
691        SendablePtr<DetectBox>,
692        Box<ProtoData>,
693        usize,
694        usize,
695        tokio::sync::oneshot::Sender<Result<Vec<MaskResult>, Error>>,
696    ),
697}
698
699/// OpenGL multi-threaded image converter. The actual conversion is done in a
700/// separate rendering thread, as OpenGL contexts are not thread-safe. This can
701/// be safely sent between threads. The `convert()` call sends the conversion
702/// request to the rendering thread and waits for the result.
703#[derive(Debug)]
704pub struct GLProcessorThreaded {
705    // This is only None when the converter is being dropped.
706    handle: Option<JoinHandle<()>>,
707
708    // This is only None when the converter is being dropped.
709    sender: Option<Sender<GLProcessorMessage>>,
710    support_dma: bool,
711}
712
713unsafe impl Send for GLProcessorThreaded {}
714unsafe impl Sync for GLProcessorThreaded {}
715
716struct SendablePtr<T: Send> {
717    ptr: NonNull<T>,
718    len: usize,
719}
720
721unsafe impl<T> Send for SendablePtr<T> where T: Send {}
722
723impl GLProcessorThreaded {
724    /// Creates a new OpenGL multi-threaded image converter.
725    pub fn new(kind: Option<EglDisplayKind>) -> Result<Self, Error> {
726        let (send, mut recv) = tokio::sync::mpsc::channel::<GLProcessorMessage>(1);
727
728        let (create_ctx_send, create_ctx_recv) = tokio::sync::oneshot::channel();
729
730        let func = move || {
731            let mut gl_converter = match GLProcessorST::new(kind) {
732                Ok(gl) => gl,
733                Err(e) => {
734                    let _ = create_ctx_send.send(Err(e));
735                    return;
736                }
737            };
738            let _ = create_ctx_send.send(Ok(gl_converter.gl_context.support_dma));
739            while let Some(msg) = recv.blocking_recv() {
740                match msg {
741                    GLProcessorMessage::ImageConvert(src, mut dst, rotation, flip, crop, resp) => {
742                        // SAFETY: This is safe because the convert() function waits for the resp to
743                        // be sent before dropping the borrow for src and dst
744                        let src = unsafe { src.ptr.as_ref() };
745                        let dst = unsafe { dst.ptr.as_mut() };
746                        let res = gl_converter.convert(src, dst, rotation, flip, crop);
747                        let _ = resp.send(res);
748                    }
749                    GLProcessorMessage::ImageRender(mut dst, det, seg, resp) => {
750                        // SAFETY: This is safe because the render_to_image() function waits for the
751                        // resp to be sent before dropping the borrow for dst, detect, and
752                        // segmentation
753                        let dst = unsafe { dst.ptr.as_mut() };
754                        let det = unsafe { std::slice::from_raw_parts(det.ptr.as_ptr(), det.len) };
755                        let seg = unsafe { std::slice::from_raw_parts(seg.ptr.as_ptr(), seg.len) };
756                        let res = gl_converter.render_to_image(dst, det, seg);
757                        let _ = resp.send(res);
758                    }
759                    #[cfg(feature = "decoder")]
760                    GLProcessorMessage::ImageRenderProtos(mut dst, det, proto_data, resp) => {
761                        // SAFETY: Same safety invariant as ImageRender — caller
762                        // blocks on resp before dropping borrows.
763                        let dst = unsafe { dst.ptr.as_mut() };
764                        let det = unsafe { std::slice::from_raw_parts(det.ptr.as_ptr(), det.len) };
765                        let res = gl_converter.render_from_protos(dst, det, &proto_data);
766                        let _ = resp.send(res);
767                    }
768                    GLProcessorMessage::SetColors(colors, resp) => {
769                        let res = gl_converter.set_class_colors(&colors);
770                        let _ = resp.send(res);
771                    }
772                    #[cfg(feature = "decoder")]
773                    GLProcessorMessage::SetInt8Interpolation(mode, resp) => {
774                        gl_converter.set_int8_interpolation_mode(mode);
775                        let _ = resp.send(Ok(()));
776                    }
777                    #[cfg(feature = "decoder")]
778                    GLProcessorMessage::RenderMasksFromProtos(
779                        det,
780                        proto_data,
781                        output_width,
782                        output_height,
783                        resp,
784                    ) => {
785                        let det = unsafe { std::slice::from_raw_parts(det.ptr.as_ptr(), det.len) };
786                        let res = gl_converter.render_masks_from_protos(
787                            det,
788                            &proto_data,
789                            output_width,
790                            output_height,
791                        );
792                        let _ = resp.send(res);
793                    }
794                }
795            }
796        };
797
798        // let handle = tokio::task::spawn(func());
799        let handle = std::thread::spawn(func);
800
801        let support_dma = match create_ctx_recv.blocking_recv() {
802            Ok(Err(e)) => return Err(e),
803            Err(_) => {
804                return Err(Error::Internal(
805                    "GL converter error messaging closed without update".to_string(),
806                ));
807            }
808            Ok(Ok(supports_dma)) => supports_dma,
809        };
810
811        Ok(Self {
812            handle: Some(handle),
813            sender: Some(send),
814            support_dma,
815        })
816    }
817}
818
819impl ImageProcessorTrait for GLProcessorThreaded {
820    fn convert(
821        &mut self,
822        src: &TensorImage,
823        dst: &mut TensorImage,
824        rotation: crate::Rotation,
825        flip: Flip,
826        crop: Crop,
827    ) -> crate::Result<()> {
828        crop.check_crop(src, dst)?;
829        if !GLProcessorST::check_src_format_supported(self.support_dma, src) {
830            return Err(crate::Error::NotSupported(format!(
831                "Opengl doesn't support {} source texture",
832                src.fourcc().display()
833            )));
834        }
835
836        if !GLProcessorST::check_dst_format_supported(self.support_dma, dst) {
837            return Err(crate::Error::NotSupported(format!(
838                "Opengl doesn't support {} destination texture",
839                dst.fourcc().display()
840            )));
841        }
842
843        let (err_send, err_recv) = tokio::sync::oneshot::channel();
844        self.sender
845            .as_ref()
846            .unwrap()
847            .blocking_send(GLProcessorMessage::ImageConvert(
848                SendablePtr {
849                    ptr: src.into(),
850                    len: 1,
851                },
852                SendablePtr {
853                    ptr: dst.into(),
854                    len: 1,
855                },
856                rotation,
857                flip,
858                crop,
859                err_send,
860            ))
861            .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
862        err_recv.blocking_recv().map_err(|_| {
863            Error::Internal("GL converter error messaging closed without update".to_string())
864        })?
865    }
866
867    fn convert_ref(
868        &mut self,
869        src: &TensorImage,
870        dst: &mut TensorImageRef<'_>,
871        rotation: Rotation,
872        flip: Flip,
873        crop: Crop,
874    ) -> crate::Result<()> {
875        // OpenGL doesn't support PLANAR_RGB output, delegate to CPU
876        let mut cpu = CPUProcessor::new();
877        cpu.convert_ref(src, dst, rotation, flip, crop)
878    }
879
880    #[cfg(feature = "decoder")]
881    fn render_to_image(
882        &mut self,
883        dst: &mut TensorImage,
884        detect: &[crate::DetectBox],
885        segmentation: &[crate::Segmentation],
886    ) -> crate::Result<()> {
887        let (err_send, err_recv) = tokio::sync::oneshot::channel();
888        self.sender
889            .as_ref()
890            .unwrap()
891            .blocking_send(GLProcessorMessage::ImageRender(
892                SendablePtr {
893                    ptr: dst.into(),
894                    len: 1,
895                },
896                SendablePtr {
897                    ptr: NonNull::new(detect.as_ptr() as *mut DetectBox).unwrap(),
898                    len: detect.len(),
899                },
900                SendablePtr {
901                    ptr: NonNull::new(segmentation.as_ptr() as *mut Segmentation).unwrap(),
902                    len: segmentation.len(),
903                },
904                err_send,
905            ))
906            .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
907        err_recv.blocking_recv().map_err(|_| {
908            Error::Internal("GL converter error messaging closed without update".to_string())
909        })?
910    }
911
912    #[cfg(feature = "decoder")]
913    fn render_from_protos(
914        &mut self,
915        dst: &mut TensorImage,
916        detect: &[DetectBox],
917        proto_data: &ProtoData,
918    ) -> crate::Result<()> {
919        let (err_send, err_recv) = tokio::sync::oneshot::channel();
920        self.sender
921            .as_ref()
922            .unwrap()
923            .blocking_send(GLProcessorMessage::ImageRenderProtos(
924                SendablePtr {
925                    ptr: NonNull::new(dst as *mut TensorImage).unwrap(),
926                    len: 1,
927                },
928                SendablePtr {
929                    ptr: NonNull::new(detect.as_ptr() as *mut DetectBox).unwrap(),
930                    len: detect.len(),
931                },
932                Box::new(proto_data.clone()),
933                err_send,
934            ))
935            .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
936        err_recv.blocking_recv().map_err(|_| {
937            Error::Internal("GL converter error messaging closed without update".to_string())
938        })?
939    }
940
941    #[cfg(feature = "decoder")]
942    fn render_masks_from_protos(
943        &mut self,
944        detect: &[DetectBox],
945        proto_data: ProtoData,
946        output_width: usize,
947        output_height: usize,
948    ) -> crate::Result<Vec<MaskResult>> {
949        // Delegate to the non-trait method on GLProcessorThreaded
950        GLProcessorThreaded::render_masks_from_protos(
951            self,
952            detect,
953            proto_data,
954            output_width,
955            output_height,
956        )
957    }
958
959    #[cfg(feature = "decoder")]
960    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<(), crate::Error> {
961        let (err_send, err_recv) = tokio::sync::oneshot::channel();
962        self.sender
963            .as_ref()
964            .unwrap()
965            .blocking_send(GLProcessorMessage::SetColors(colors.to_vec(), err_send))
966            .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
967        err_recv.blocking_recv().map_err(|_| {
968            Error::Internal("GL converter error messaging closed without update".to_string())
969        })?
970    }
971}
972
973impl GLProcessorThreaded {
974    /// Sets the interpolation mode for int8 proto textures.
975    #[cfg(feature = "decoder")]
976    pub fn set_int8_interpolation_mode(
977        &mut self,
978        mode: Int8InterpolationMode,
979    ) -> Result<(), crate::Error> {
980        let (err_send, err_recv) = tokio::sync::oneshot::channel();
981        self.sender
982            .as_ref()
983            .unwrap()
984            .blocking_send(GLProcessorMessage::SetInt8Interpolation(mode, err_send))
985            .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
986        err_recv.blocking_recv().map_err(|_| {
987            Error::Internal("GL converter error messaging closed without update".to_string())
988        })?
989    }
990
991    /// Render per-instance grayscale masks at full output resolution via the GL thread.
992    #[cfg(feature = "decoder")]
993    pub fn render_masks_from_protos(
994        &mut self,
995        detect: &[DetectBox],
996        proto_data: ProtoData,
997        output_width: usize,
998        output_height: usize,
999    ) -> Result<Vec<MaskResult>, crate::Error> {
1000        let (resp_send, resp_recv) = tokio::sync::oneshot::channel();
1001        self.sender
1002            .as_ref()
1003            .unwrap()
1004            .blocking_send(GLProcessorMessage::RenderMasksFromProtos(
1005                SendablePtr {
1006                    ptr: NonNull::new(detect.as_ptr() as *mut DetectBox).unwrap(),
1007                    len: detect.len(),
1008                },
1009                Box::new(proto_data),
1010                output_width,
1011                output_height,
1012                resp_send,
1013            ))
1014            .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
1015        resp_recv.blocking_recv().map_err(|_| {
1016            Error::Internal("GL converter error messaging closed without update".to_string())
1017        })?
1018    }
1019}
1020
1021impl Drop for GLProcessorThreaded {
1022    fn drop(&mut self) {
1023        drop(self.sender.take());
1024        let _ = self.handle.take().and_then(|h| h.join().ok());
1025    }
1026}
1027
1028/// Interpolation mode for int8 proto textures (GL_R8I cannot use GL_LINEAR).
1029#[cfg(feature = "decoder")]
1030#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1031pub enum Int8InterpolationMode {
1032    /// texelFetch at nearest texel — simplest, fastest GPU execution.
1033    Nearest,
1034    /// texelFetch × 4 neighbors with shader-computed bilinear weights (default).
1035    Bilinear,
1036    /// Two-pass: dequant int8→f16 FBO, then existing f16 shader with GL_LINEAR.
1037    TwoPass,
1038}
1039
1040/// OpenGL single-threaded image converter.
1041pub struct GLProcessorST {
1042    camera_eglimage_texture: Texture,
1043    camera_normal_texture: Texture,
1044    render_texture: Texture,
1045    #[cfg(feature = "decoder")]
1046    segmentation_texture: Texture,
1047    #[cfg(feature = "decoder")]
1048    segmentation_program: GlProgram,
1049    #[cfg(feature = "decoder")]
1050    instanced_segmentation_program: GlProgram,
1051    #[cfg(feature = "decoder")]
1052    proto_texture: Texture,
1053    #[cfg(feature = "decoder")]
1054    proto_segmentation_program: GlProgram,
1055    #[cfg(feature = "decoder")]
1056    proto_segmentation_int8_nearest_program: GlProgram,
1057    #[cfg(feature = "decoder")]
1058    proto_segmentation_int8_bilinear_program: GlProgram,
1059    #[cfg(feature = "decoder")]
1060    proto_dequant_int8_program: GlProgram,
1061    #[cfg(feature = "decoder")]
1062    proto_segmentation_f32_program: GlProgram,
1063    #[cfg(feature = "decoder")]
1064    color_program: GlProgram,
1065    #[cfg(feature = "decoder")]
1066    /// Whether GL_OES_texture_float_linear is available (allows GL_LINEAR on R32F textures).
1067    has_float_linear: bool,
1068    #[cfg(feature = "decoder")]
1069    /// Interpolation mode for int8 proto textures.
1070    int8_interpolation_mode: Int8InterpolationMode,
1071    #[cfg(feature = "decoder")]
1072    /// Intermediate FBO texture for two-pass int8 dequant path.
1073    proto_dequant_texture: Texture,
1074    #[cfg(feature = "decoder")]
1075    proto_mask_int8_bilinear_program: GlProgram,
1076    #[cfg(feature = "decoder")]
1077    proto_mask_int8_nearest_program: GlProgram,
1078    #[cfg(feature = "decoder")]
1079    proto_mask_f32_program: GlProgram,
1080    #[cfg(feature = "decoder")]
1081    /// Dedicated FBO for mask rendering (render_masks_from_protos).
1082    mask_fbo: u32,
1083    #[cfg(feature = "decoder")]
1084    /// R8 texture attached to mask_fbo.
1085    mask_fbo_texture: u32,
1086    #[cfg(feature = "decoder")]
1087    /// Current allocated width of mask FBO texture.
1088    mask_fbo_width: usize,
1089    #[cfg(feature = "decoder")]
1090    /// Current allocated height of mask FBO texture.
1091    mask_fbo_height: usize,
1092    vertex_buffer: Buffer,
1093    texture_buffer: Buffer,
1094    texture_program: GlProgram,
1095    texture_program_yuv: GlProgram,
1096    texture_program_planar: GlProgram,
1097    gl_context: GlContext,
1098}
1099
1100impl Drop for GLProcessorST {
1101    fn drop(&mut self) {
1102        unsafe {
1103            #[cfg(feature = "decoder")]
1104            {
1105                if self.mask_fbo != 0 {
1106                    gls::gl::DeleteFramebuffers(1, &self.mask_fbo);
1107                }
1108                if self.mask_fbo_texture != 0 {
1109                    gls::gl::DeleteTextures(1, &self.mask_fbo_texture);
1110                }
1111            }
1112        }
1113    }
1114}
1115
1116impl ImageProcessorTrait for GLProcessorST {
1117    fn convert(
1118        &mut self,
1119        src: &TensorImage,
1120        dst: &mut TensorImage,
1121        rotation: crate::Rotation,
1122        flip: Flip,
1123        crop: Crop,
1124    ) -> crate::Result<()> {
1125        crop.check_crop(src, dst)?;
1126        if !Self::check_src_format_supported(self.gl_context.support_dma, src) {
1127            return Err(crate::Error::NotSupported(format!(
1128                "Opengl doesn't support {} source texture",
1129                src.fourcc().display()
1130            )));
1131        }
1132
1133        if !Self::check_dst_format_supported(self.gl_context.support_dma, dst) {
1134            return Err(crate::Error::NotSupported(format!(
1135                "Opengl doesn't support {} destination texture",
1136                dst.fourcc().display()
1137            )));
1138        }
1139        log::debug!(
1140            "dst tensor: {:?} src tensor :{:?}",
1141            dst.tensor().memory(),
1142            src.tensor().memory()
1143        );
1144        check_gl_error(function!(), line!())?;
1145        if self.gl_context.support_dma
1146            && dst.tensor().memory() == TensorMemory::Dma
1147            && dst.fourcc() != RGB
1148        // DMA generally doesn't support RGB
1149        {
1150            let res = self.convert_dest_dma(dst, src, rotation, flip, crop);
1151            return res;
1152        }
1153        let start = Instant::now();
1154        let res = self.convert_dest_non_dma(dst, src, rotation, flip, crop);
1155        log::debug!("convert_dest_non_dma takes {:?}", start.elapsed());
1156        res
1157    }
1158
1159    fn convert_ref(
1160        &mut self,
1161        src: &TensorImage,
1162        dst: &mut TensorImageRef<'_>,
1163        rotation: Rotation,
1164        flip: Flip,
1165        crop: Crop,
1166    ) -> crate::Result<()> {
1167        // OpenGL doesn't support PLANAR_RGB output, delegate to CPU
1168        let mut cpu = CPUProcessor::new();
1169        cpu.convert_ref(src, dst, rotation, flip, crop)
1170    }
1171
1172    #[cfg(feature = "decoder")]
1173    fn render_to_image(
1174        &mut self,
1175        dst: &mut TensorImage,
1176        detect: &[DetectBox],
1177        segmentation: &[Segmentation],
1178    ) -> Result<(), crate::Error> {
1179        use crate::FunctionTimer;
1180
1181        let _timer = FunctionTimer::new("GLProcessorST::render_to_image");
1182        if !matches!(dst.fourcc(), RGBA | RGB) {
1183            return Err(crate::Error::NotSupported(
1184                "Opengl image rendering only supports RGBA or RGB images".to_string(),
1185            ));
1186        }
1187
1188        let (_render_buffer, is_dma) = match dst.tensor.memory() {
1189            edgefirst_tensor::TensorMemory::Dma => {
1190                if let Ok(render_buffer) = self.setup_renderbuffer_dma(dst) {
1191                    (render_buffer, true)
1192                } else {
1193                    (
1194                        self.setup_renderbuffer_non_dma(
1195                            dst,
1196                            Crop::new().with_dst_rect(Some(Rect::new(0, 0, 0, 0))),
1197                        )?,
1198                        false,
1199                    )
1200                }
1201            }
1202            _ => (
1203                self.setup_renderbuffer_non_dma(
1204                    dst,
1205                    Crop::new().with_dst_rect(Some(Rect::new(0, 0, 0, 0))),
1206                )?,
1207                false,
1208            ), // Add dest rect to make sure dst is rendered fully
1209        };
1210
1211        gls::enable(gls::gl::BLEND);
1212        gls::blend_func_separate(
1213            gls::gl::SRC_ALPHA,
1214            gls::gl::ONE_MINUS_SRC_ALPHA,
1215            gls::gl::ZERO,
1216            gls::gl::ONE,
1217        );
1218
1219        self.render_box(dst, detect)?;
1220        self.render_segmentation(detect, segmentation)?;
1221
1222        gls::finish();
1223        if !is_dma {
1224            let mut dst_map = dst.tensor().map()?;
1225            let format = match dst.fourcc() {
1226                RGB => gls::gl::RGB,
1227                RGBA => gls::gl::RGBA,
1228                _ => unreachable!(),
1229            };
1230            unsafe {
1231                gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
1232                gls::gl::ReadnPixels(
1233                    0,
1234                    0,
1235                    dst.width() as i32,
1236                    dst.height() as i32,
1237                    format,
1238                    gls::gl::UNSIGNED_BYTE,
1239                    dst.tensor.len() as i32,
1240                    dst_map.as_mut_ptr() as *mut c_void,
1241                );
1242            }
1243        }
1244
1245        Ok(())
1246    }
1247
1248    #[cfg(feature = "decoder")]
1249    fn render_from_protos(
1250        &mut self,
1251        dst: &mut TensorImage,
1252        detect: &[DetectBox],
1253        proto_data: &ProtoData,
1254    ) -> crate::Result<()> {
1255        use crate::FunctionTimer;
1256
1257        let _timer = FunctionTimer::new("GLProcessorST::render_from_protos");
1258        if !matches!(dst.fourcc(), RGBA | RGB) {
1259            return Err(crate::Error::NotSupported(
1260                "Opengl image rendering only supports RGBA or RGB images".to_string(),
1261            ));
1262        }
1263
1264        let (_render_buffer, is_dma) = match dst.tensor.memory() {
1265            edgefirst_tensor::TensorMemory::Dma => {
1266                if let Ok(render_buffer) = self.setup_renderbuffer_dma(dst) {
1267                    (render_buffer, true)
1268                } else {
1269                    (
1270                        self.setup_renderbuffer_non_dma(
1271                            dst,
1272                            Crop::new().with_dst_rect(Some(Rect::new(0, 0, 0, 0))),
1273                        )?,
1274                        false,
1275                    )
1276                }
1277            }
1278            _ => (
1279                self.setup_renderbuffer_non_dma(
1280                    dst,
1281                    Crop::new().with_dst_rect(Some(Rect::new(0, 0, 0, 0))),
1282                )?,
1283                false,
1284            ),
1285        };
1286
1287        gls::enable(gls::gl::BLEND);
1288        gls::blend_func_separate(
1289            gls::gl::SRC_ALPHA,
1290            gls::gl::ONE_MINUS_SRC_ALPHA,
1291            gls::gl::ZERO,
1292            gls::gl::ONE,
1293        );
1294
1295        self.render_box(dst, detect)?;
1296        self.render_proto_segmentation(detect, proto_data)?;
1297
1298        gls::finish();
1299        if !is_dma {
1300            let mut dst_map = dst.tensor().map()?;
1301            let format = match dst.fourcc() {
1302                RGB => gls::gl::RGB,
1303                RGBA => gls::gl::RGBA,
1304                _ => unreachable!(),
1305            };
1306            unsafe {
1307                gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
1308                gls::gl::ReadnPixels(
1309                    0,
1310                    0,
1311                    dst.width() as i32,
1312                    dst.height() as i32,
1313                    format,
1314                    gls::gl::UNSIGNED_BYTE,
1315                    dst.tensor.len() as i32,
1316                    dst_map.as_mut_ptr() as *mut c_void,
1317                );
1318            }
1319        }
1320
1321        Ok(())
1322    }
1323
1324    #[cfg(feature = "decoder")]
1325    fn render_masks_from_protos(
1326        &mut self,
1327        detect: &[DetectBox],
1328        proto_data: ProtoData,
1329        output_width: usize,
1330        output_height: usize,
1331    ) -> crate::Result<Vec<MaskResult>> {
1332        GLProcessorST::render_masks_from_protos(
1333            self,
1334            detect,
1335            &proto_data,
1336            output_width,
1337            output_height,
1338        )
1339    }
1340
1341    #[cfg(feature = "decoder")]
1342    fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> crate::Result<()> {
1343        if colors.is_empty() {
1344            return Ok(());
1345        }
1346        let mut colors_f32 = colors
1347            .iter()
1348            .map(|c| {
1349                [
1350                    c[0] as f32 / 255.0,
1351                    c[1] as f32 / 255.0,
1352                    c[2] as f32 / 255.0,
1353                    c[3] as f32 / 255.0,
1354                ]
1355            })
1356            .take(20)
1357            .collect::<Vec<[f32; 4]>>();
1358
1359        self.segmentation_program
1360            .load_uniform_4fv(c"colors", &colors_f32)?;
1361        self.instanced_segmentation_program
1362            .load_uniform_4fv(c"colors", &colors_f32)?;
1363        self.proto_segmentation_program
1364            .load_uniform_4fv(c"colors", &colors_f32)?;
1365        self.proto_segmentation_int8_nearest_program
1366            .load_uniform_4fv(c"colors", &colors_f32)?;
1367        self.proto_segmentation_int8_bilinear_program
1368            .load_uniform_4fv(c"colors", &colors_f32)?;
1369        self.proto_segmentation_f32_program
1370            .load_uniform_4fv(c"colors", &colors_f32)?;
1371
1372        colors_f32.iter_mut().for_each(|c| {
1373            c[3] = 1.0; // set alpha to 1.0 for color rendering
1374        });
1375        self.color_program
1376            .load_uniform_4fv(c"colors", &colors_f32)?;
1377
1378        Ok(())
1379    }
1380}
1381
1382impl GLProcessorST {
1383    pub fn new(kind: Option<EglDisplayKind>) -> Result<GLProcessorST, crate::Error> {
1384        let gl_context = GlContext::new(kind)?;
1385        gls::load_with(|s| {
1386            gl_context
1387                .egl
1388                .get_proc_address(s)
1389                .map_or(std::ptr::null(), |p| p as *const _)
1390        });
1391
1392        let has_float_linear = Self::gl_check_support()?;
1393
1394        // Uploads and downloads are all packed with no alignment requirements
1395        unsafe {
1396            gls::gl::PixelStorei(gls::gl::PACK_ALIGNMENT, 1);
1397            gls::gl::PixelStorei(gls::gl::UNPACK_ALIGNMENT, 1);
1398        }
1399
1400        let texture_program_planar =
1401            GlProgram::new(generate_vertex_shader(), generate_planar_rgb_shader())?;
1402
1403        let texture_program =
1404            GlProgram::new(generate_vertex_shader(), generate_texture_fragment_shader())?;
1405
1406        let texture_program_yuv = GlProgram::new(
1407            generate_vertex_shader(),
1408            generate_texture_fragment_shader_yuv(),
1409        )?;
1410
1411        #[cfg(feature = "decoder")]
1412        let segmentation_program =
1413            GlProgram::new(generate_vertex_shader(), generate_segmentation_shader())?;
1414        #[cfg(feature = "decoder")]
1415        segmentation_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1416        #[cfg(feature = "decoder")]
1417        let instanced_segmentation_program = GlProgram::new(
1418            generate_vertex_shader(),
1419            generate_instanced_segmentation_shader(),
1420        )?;
1421        #[cfg(feature = "decoder")]
1422        instanced_segmentation_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1423
1424        // Existing f16 proto shader (RGBA16F, 4 protos per layer)
1425        #[cfg(feature = "decoder")]
1426        let proto_segmentation_program = GlProgram::new(
1427            generate_vertex_shader(),
1428            generate_proto_segmentation_shader(),
1429        )?;
1430        #[cfg(feature = "decoder")]
1431        proto_segmentation_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1432
1433        // Int8 proto shaders (R8I, 1 proto per layer, 32 layers)
1434        #[cfg(feature = "decoder")]
1435        let proto_segmentation_int8_nearest_program = GlProgram::new(
1436            generate_vertex_shader(),
1437            generate_proto_segmentation_shader_int8_nearest(),
1438        )?;
1439        #[cfg(feature = "decoder")]
1440        proto_segmentation_int8_nearest_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1441
1442        #[cfg(feature = "decoder")]
1443        let proto_segmentation_int8_bilinear_program = GlProgram::new(
1444            generate_vertex_shader(),
1445            generate_proto_segmentation_shader_int8_bilinear(),
1446        )?;
1447        #[cfg(feature = "decoder")]
1448        proto_segmentation_int8_bilinear_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1449
1450        #[cfg(feature = "decoder")]
1451        let proto_dequant_int8_program = GlProgram::new(
1452            generate_vertex_shader(),
1453            generate_proto_dequant_shader_int8(),
1454        )?;
1455
1456        // F32 proto shader (R32F, 1 proto per layer, 32 layers)
1457        #[cfg(feature = "decoder")]
1458        let proto_segmentation_f32_program = GlProgram::new(
1459            generate_vertex_shader(),
1460            generate_proto_segmentation_shader_f32(),
1461        )?;
1462        #[cfg(feature = "decoder")]
1463        proto_segmentation_f32_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1464
1465        #[cfg(feature = "decoder")]
1466        let color_program = GlProgram::new(generate_vertex_shader(), generate_color_shader())?;
1467        #[cfg(feature = "decoder")]
1468        color_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1469
1470        // Grayscale mask shaders (no color/discard, sigmoid → RED channel)
1471        #[cfg(feature = "decoder")]
1472        let proto_mask_int8_nearest_program = GlProgram::new(
1473            generate_vertex_shader(),
1474            generate_proto_mask_shader_int8_nearest(),
1475        )?;
1476        #[cfg(feature = "decoder")]
1477        let proto_mask_int8_bilinear_program = GlProgram::new(
1478            generate_vertex_shader(),
1479            generate_proto_mask_shader_int8_bilinear(),
1480        )?;
1481        #[cfg(feature = "decoder")]
1482        let proto_mask_f32_program =
1483            GlProgram::new(generate_vertex_shader(), generate_proto_mask_shader_f32())?;
1484
1485        let camera_eglimage_texture = Texture::new();
1486        let camera_normal_texture = Texture::new();
1487        let render_texture = Texture::new();
1488        let segmentation_texture = Texture::new();
1489        #[cfg(feature = "decoder")]
1490        let proto_texture = Texture::new();
1491        #[cfg(feature = "decoder")]
1492        let proto_dequant_texture = Texture::new();
1493        let vertex_buffer = Buffer::new(0, 3, 100);
1494        let texture_buffer = Buffer::new(1, 2, 100);
1495
1496        let converter = GLProcessorST {
1497            gl_context,
1498            texture_program,
1499            texture_program_yuv,
1500            texture_program_planar,
1501            camera_eglimage_texture,
1502            camera_normal_texture,
1503            #[cfg(feature = "decoder")]
1504            segmentation_texture,
1505            #[cfg(feature = "decoder")]
1506            proto_texture,
1507            #[cfg(feature = "decoder")]
1508            proto_segmentation_int8_nearest_program,
1509            #[cfg(feature = "decoder")]
1510            proto_segmentation_int8_bilinear_program,
1511            #[cfg(feature = "decoder")]
1512            proto_dequant_int8_program,
1513            #[cfg(feature = "decoder")]
1514            proto_segmentation_f32_program,
1515            #[cfg(feature = "decoder")]
1516            has_float_linear,
1517            #[cfg(feature = "decoder")]
1518            int8_interpolation_mode: Int8InterpolationMode::Bilinear,
1519            #[cfg(feature = "decoder")]
1520            proto_dequant_texture,
1521            #[cfg(feature = "decoder")]
1522            proto_mask_int8_bilinear_program,
1523            #[cfg(feature = "decoder")]
1524            proto_mask_int8_nearest_program,
1525            #[cfg(feature = "decoder")]
1526            proto_mask_f32_program,
1527            #[cfg(feature = "decoder")]
1528            mask_fbo: 0,
1529            #[cfg(feature = "decoder")]
1530            mask_fbo_texture: 0,
1531            #[cfg(feature = "decoder")]
1532            mask_fbo_width: 0,
1533            #[cfg(feature = "decoder")]
1534            mask_fbo_height: 0,
1535            vertex_buffer,
1536            texture_buffer,
1537            render_texture,
1538            #[cfg(feature = "decoder")]
1539            segmentation_program,
1540            #[cfg(feature = "decoder")]
1541            instanced_segmentation_program,
1542            #[cfg(feature = "decoder")]
1543            proto_segmentation_program,
1544            #[cfg(feature = "decoder")]
1545            color_program,
1546        };
1547        check_gl_error(function!(), line!())?;
1548
1549        log::debug!("GLConverter created");
1550        Ok(converter)
1551    }
1552
1553    /// Sets the interpolation mode for int8 proto textures.
1554    #[cfg(feature = "decoder")]
1555    pub fn set_int8_interpolation_mode(&mut self, mode: Int8InterpolationMode) {
1556        self.int8_interpolation_mode = mode;
1557        log::debug!("Int8 interpolation mode set to {:?}", mode);
1558    }
1559
1560    /// Ensures the mask FBO + R8 texture are allocated at the given dimensions.
1561    /// Creates or resizes the FBO and texture as needed.
1562    #[cfg(feature = "decoder")]
1563    fn ensure_mask_fbo(&mut self, width: usize, height: usize) -> crate::Result<()> {
1564        if self.mask_fbo_width == width && self.mask_fbo_height == height && self.mask_fbo != 0 {
1565            return Ok(());
1566        }
1567
1568        // Create FBO if needed
1569        if self.mask_fbo == 0 {
1570            unsafe {
1571                gls::gl::GenFramebuffers(1, &mut self.mask_fbo);
1572            }
1573        }
1574        // Create texture if needed
1575        if self.mask_fbo_texture == 0 {
1576            unsafe {
1577                gls::gl::GenTextures(1, &mut self.mask_fbo_texture);
1578            }
1579        }
1580
1581        // Allocate R8 texture
1582        unsafe {
1583            gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.mask_fbo_texture);
1584            gls::gl::TexImage2D(
1585                gls::gl::TEXTURE_2D,
1586                0,
1587                gls::gl::R8 as i32,
1588                width as i32,
1589                height as i32,
1590                0,
1591                gls::gl::RED,
1592                gls::gl::UNSIGNED_BYTE,
1593                std::ptr::null(),
1594            );
1595            gls::gl::TexParameteri(
1596                gls::gl::TEXTURE_2D,
1597                gls::gl::TEXTURE_MIN_FILTER,
1598                gls::gl::NEAREST as i32,
1599            );
1600            gls::gl::TexParameteri(
1601                gls::gl::TEXTURE_2D,
1602                gls::gl::TEXTURE_MAG_FILTER,
1603                gls::gl::NEAREST as i32,
1604            );
1605        }
1606
1607        // Attach to FBO
1608        unsafe {
1609            gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, self.mask_fbo);
1610            gls::gl::FramebufferTexture2D(
1611                gls::gl::FRAMEBUFFER,
1612                gls::gl::COLOR_ATTACHMENT0,
1613                gls::gl::TEXTURE_2D,
1614                self.mask_fbo_texture,
1615                0,
1616            );
1617            let status = gls::gl::CheckFramebufferStatus(gls::gl::FRAMEBUFFER);
1618            if status != gls::gl::FRAMEBUFFER_COMPLETE {
1619                return Err(crate::Error::OpenGl(format!(
1620                    "Mask FBO incomplete: status=0x{status:X}"
1621                )));
1622            }
1623            gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, 0);
1624        }
1625
1626        self.mask_fbo_width = width;
1627        self.mask_fbo_height = height;
1628        log::debug!("Mask FBO allocated at {width}x{height}");
1629        Ok(())
1630    }
1631
1632    /// Render per-instance grayscale masks at full output resolution.
1633    ///
1634    /// For each detection, renders a quad to a dedicated R8 FBO using a
1635    /// grayscale mask shader (sigmoid → RED channel, no threshold/discard).
1636    /// Reads back only the bounding-box region via `glReadPixels`.
1637    ///
1638    /// Returns a `Vec` of `(x, y, w, h, Vec<u8>)` tuples — one per detection.
1639    #[cfg(feature = "decoder")]
1640    pub fn render_masks_from_protos(
1641        &mut self,
1642        detect: &[DetectBox],
1643        proto_data: &ProtoData,
1644        output_width: usize,
1645        output_height: usize,
1646    ) -> crate::Result<Vec<MaskResult>> {
1647        use crate::FunctionTimer;
1648
1649        let _timer = FunctionTimer::new("GLProcessorST::render_masks_from_protos");
1650
1651        if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
1652            return Ok(Vec::new());
1653        }
1654
1655        let (height, width, num_protos) = proto_data.protos.dim();
1656        let texture_target = gls::gl::TEXTURE_2D_ARRAY;
1657
1658        // Save current FBO and viewport
1659        let (saved_fbo, saved_viewport) = unsafe {
1660            let mut fbo: i32 = 0;
1661            gls::gl::GetIntegerv(gls::gl::FRAMEBUFFER_BINDING, &mut fbo);
1662            let mut vp = [0i32; 4];
1663            gls::gl::GetIntegerv(gls::gl::VIEWPORT, vp.as_mut_ptr());
1664            (fbo as u32, vp)
1665        };
1666
1667        // Ensure mask FBO is allocated at the right size
1668        self.ensure_mask_fbo(output_width, output_height)?;
1669
1670        // Upload proto texture array and select the appropriate shader
1671        gls::active_texture(gls::gl::TEXTURE0);
1672        gls::bind_texture(texture_target, self.proto_texture.id);
1673        gls::tex_parameteri(
1674            texture_target,
1675            gls::gl::TEXTURE_MIN_FILTER,
1676            gls::gl::NEAREST as i32,
1677        );
1678        gls::tex_parameteri(
1679            texture_target,
1680            gls::gl::TEXTURE_MAG_FILTER,
1681            gls::gl::NEAREST as i32,
1682        );
1683        gls::tex_parameteri(
1684            texture_target,
1685            gls::gl::TEXTURE_WRAP_S,
1686            gls::gl::CLAMP_TO_EDGE as i32,
1687        );
1688        gls::tex_parameteri(
1689            texture_target,
1690            gls::gl::TEXTURE_WRAP_T,
1691            gls::gl::CLAMP_TO_EDGE as i32,
1692        );
1693
1694        match &proto_data.protos {
1695            ProtoTensor::Quantized {
1696                protos,
1697                quantization,
1698            } => {
1699                // Repack to layer-first layout (same as render_proto_segmentation_int8)
1700                let mut tex_data = vec![0i8; height * width * num_protos];
1701                for k in 0..num_protos {
1702                    for y in 0..height {
1703                        for x in 0..width {
1704                            tex_data[k * height * width + y * width + x] = protos[[y, x, k]];
1705                        }
1706                    }
1707                }
1708                gls::tex_image3d(
1709                    texture_target,
1710                    0,
1711                    gls::gl::R8I as i32,
1712                    width as i32,
1713                    height as i32,
1714                    num_protos as i32,
1715                    0,
1716                    gls::gl::RED_INTEGER,
1717                    gls::gl::BYTE,
1718                    Some(&tex_data),
1719                );
1720
1721                let proto_scale = quantization.scale;
1722                let proto_scaled_zp = -(quantization.zero_point as f32) * quantization.scale;
1723
1724                let program = match self.int8_interpolation_mode {
1725                    Int8InterpolationMode::Nearest => &self.proto_mask_int8_nearest_program,
1726                    _ => &self.proto_mask_int8_bilinear_program,
1727                };
1728                gls::use_program(program.id);
1729                program.load_uniform_1i(c"num_protos", num_protos as i32)?;
1730                program.load_uniform_1f(c"proto_scale", proto_scale)?;
1731                program.load_uniform_1f(c"proto_scaled_zp", proto_scaled_zp)?;
1732
1733                self.render_mask_quads(
1734                    program,
1735                    detect,
1736                    &proto_data.mask_coefficients,
1737                    output_width,
1738                    output_height,
1739                )
1740            }
1741            ProtoTensor::Float(protos_f32) => {
1742                // Repack to layer-first layout
1743                let mut tex_data = vec![0.0f32; height * width * num_protos];
1744                for k in 0..num_protos {
1745                    for y in 0..height {
1746                        for x in 0..width {
1747                            tex_data[k * height * width + y * width + x] = protos_f32[[y, x, k]];
1748                        }
1749                    }
1750                }
1751                gls::tex_image3d(
1752                    texture_target,
1753                    0,
1754                    gls::gl::R32F as i32,
1755                    width as i32,
1756                    height as i32,
1757                    num_protos as i32,
1758                    0,
1759                    gls::gl::RED,
1760                    gls::gl::FLOAT,
1761                    Some(&tex_data),
1762                );
1763                if self.has_float_linear {
1764                    gls::tex_parameteri(
1765                        texture_target,
1766                        gls::gl::TEXTURE_MIN_FILTER,
1767                        gls::gl::LINEAR as i32,
1768                    );
1769                    gls::tex_parameteri(
1770                        texture_target,
1771                        gls::gl::TEXTURE_MAG_FILTER,
1772                        gls::gl::LINEAR as i32,
1773                    );
1774                }
1775
1776                let program = &self.proto_mask_f32_program;
1777                gls::use_program(program.id);
1778                program.load_uniform_1i(c"num_protos", num_protos as i32)?;
1779
1780                self.render_mask_quads(
1781                    program,
1782                    detect,
1783                    &proto_data.mask_coefficients,
1784                    output_width,
1785                    output_height,
1786                )
1787            }
1788        }
1789        .inspect(|_| {
1790            // Restore previous FBO + viewport
1791            unsafe {
1792                gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, saved_fbo);
1793                gls::gl::Viewport(
1794                    saved_viewport[0],
1795                    saved_viewport[1],
1796                    saved_viewport[2],
1797                    saved_viewport[3],
1798                );
1799            }
1800        })
1801    }
1802
1803    /// Render per-detection quads to the mask FBO and read back bbox regions.
1804    ///
1805    /// For each detection: clear FBO, set coefficients, render quad, read back
1806    /// the bounding-box region as R8 pixels.
1807    #[cfg(feature = "decoder")]
1808    fn render_mask_quads(
1809        &self,
1810        program: &GlProgram,
1811        detect: &[DetectBox],
1812        mask_coefficients: &[Vec<f32>],
1813        output_width: usize,
1814        output_height: usize,
1815    ) -> crate::Result<Vec<MaskResult>> {
1816        let mut results = Vec::with_capacity(detect.len());
1817
1818        if let Some(first_coeff) = mask_coefficients.first() {
1819            if first_coeff.len() > 32 {
1820                log::warn!(
1821                    "render_mask_quads: {} mask coefficients exceeds shader \
1822                     limit of 32 — coefficients will be truncated",
1823                    first_coeff.len()
1824                );
1825            }
1826        }
1827
1828        // Bind mask FBO and set viewport
1829        unsafe {
1830            gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, self.mask_fbo);
1831            gls::gl::Viewport(0, 0, output_width as i32, output_height as i32);
1832            gls::gl::Disable(gls::gl::BLEND);
1833        }
1834
1835        for (det, coeff) in detect.iter().zip(mask_coefficients.iter()) {
1836            // Clear to black (0)
1837            unsafe {
1838                gls::gl::ClearColor(0.0, 0.0, 0.0, 0.0);
1839                gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
1840            }
1841
1842            // Set mask coefficients
1843            let mut packed_coeff = [[0.0f32; 4]; 8];
1844            for (i, val) in coeff.iter().enumerate().take(32) {
1845                packed_coeff[i / 4][i % 4] = *val;
1846            }
1847            program.load_uniform_4fv(c"mask_coeff", &packed_coeff)?;
1848
1849            // Compute bbox pixel coordinates, clamped to FBO bounds.
1850            let ow = output_width as i32;
1851            let oh = output_height as i32;
1852            let bbox_x = (det.bbox.xmin * output_width as f32).round() as i32;
1853            let bbox_y = (det.bbox.ymin * output_height as f32).round() as i32;
1854            let bbox_x2 = (det.bbox.xmax * output_width as f32).round() as i32;
1855            let bbox_y2 = (det.bbox.ymax * output_height as f32).round() as i32;
1856            let bbox_x = bbox_x.max(0).min(ow);
1857            let bbox_y = bbox_y.max(0).min(oh);
1858            let bbox_x2 = bbox_x2.max(bbox_x).min(ow);
1859            let bbox_y2 = bbox_y2.max(bbox_y).min(oh);
1860            let bbox_w = (bbox_x2 - bbox_x).max(1);
1861            let bbox_h = (bbox_y2 - bbox_y).max(1);
1862
1863            // Compute NDC coordinates for the quad
1864            let cvt = |normalized: f32| normalized * 2.0 - 1.0;
1865            let dst_left = cvt(det.bbox.xmin);
1866            let dst_right = cvt(det.bbox.xmax);
1867            let dst_top = cvt(det.bbox.ymax);
1868            let dst_bottom = cvt(det.bbox.ymin);
1869
1870            let src_left = det.bbox.xmin;
1871            let src_right = det.bbox.xmax;
1872            let src_top = 1.0 - det.bbox.ymin;
1873            let src_bottom = 1.0 - det.bbox.ymax;
1874
1875            unsafe {
1876                gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
1877                gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
1878                let verts: [f32; 12] = [
1879                    dst_left, dst_top, 0.0, dst_right, dst_top, 0.0, dst_right, dst_bottom, 0.0,
1880                    dst_left, dst_bottom, 0.0,
1881                ];
1882                gls::gl::BufferSubData(
1883                    gls::gl::ARRAY_BUFFER,
1884                    0,
1885                    (size_of::<f32>() * 12) as isize,
1886                    verts.as_ptr() as *const c_void,
1887                );
1888
1889                gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
1890                gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
1891                let tc: [f32; 8] = [
1892                    src_left, src_top, src_right, src_top, src_right, src_bottom, src_left,
1893                    src_bottom,
1894                ];
1895                gls::gl::BufferSubData(
1896                    gls::gl::ARRAY_BUFFER,
1897                    0,
1898                    (size_of::<f32>() * 8) as isize,
1899                    tc.as_ptr() as *const c_void,
1900                );
1901
1902                let idx: [u32; 4] = [0, 1, 2, 3];
1903                gls::gl::DrawElements(
1904                    gls::gl::TRIANGLE_FAN,
1905                    4,
1906                    gls::gl::UNSIGNED_INT,
1907                    idx.as_ptr() as *const c_void,
1908                );
1909            }
1910
1911            // Read back the bbox region only
1912            let pixel_count = (bbox_w * bbox_h) as usize;
1913            let mut pixels = vec![0u8; pixel_count];
1914
1915            unsafe {
1916                gls::gl::Finish();
1917                gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
1918                gls::gl::ReadnPixels(
1919                    bbox_x,
1920                    bbox_y,
1921                    bbox_w,
1922                    bbox_h,
1923                    gls::gl::RED,
1924                    gls::gl::UNSIGNED_BYTE,
1925                    pixel_count as i32,
1926                    pixels.as_mut_ptr() as *mut c_void,
1927                );
1928            }
1929
1930            results.push(MaskResult {
1931                x: bbox_x as usize,
1932                y: bbox_y as usize,
1933                w: bbox_w as usize,
1934                h: bbox_h as usize,
1935                pixels,
1936            });
1937        }
1938
1939        Ok(results)
1940    }
1941
1942    fn check_src_format_supported(support_dma: bool, img: &TensorImage) -> bool {
1943        if support_dma && img.tensor().memory() == TensorMemory::Dma {
1944            // EGLImage supports RGBA, GREY, YUYV, and NV12 for DMA buffers
1945            matches!(img.fourcc(), RGBA | GREY | YUYV | NV12)
1946        } else {
1947            matches!(img.fourcc(), RGB | RGBA | GREY)
1948        }
1949    }
1950
1951    fn check_dst_format_supported(support_dma: bool, img: &TensorImage) -> bool {
1952        if support_dma && img.tensor().memory() == TensorMemory::Dma {
1953            // generally EGLImage doesn't support RGB
1954            matches!(img.fourcc(), RGBA | GREY | PLANAR_RGB)
1955        } else {
1956            matches!(img.fourcc(), RGB | RGBA | GREY)
1957        }
1958    }
1959
1960    /// Checks required GL extensions and returns whether optional capabilities
1961    /// are available. Returns `has_float_linear` (GL_OES_texture_float_linear).
1962    fn gl_check_support() -> Result<bool, crate::Error> {
1963        if let Ok(version) = gls::get_string(gls::gl::SHADING_LANGUAGE_VERSION) {
1964            log::debug!("GL Shading Language Version: {version:?}");
1965        } else {
1966            log::warn!("Could not get GL Shading Language Version");
1967        }
1968
1969        let extensions = unsafe {
1970            let str = gls::gl::GetString(gls::gl::EXTENSIONS);
1971            if str.is_null() {
1972                return Err(crate::Error::GLVersion(
1973                    "GL returned no supported extensions".to_string(),
1974                ));
1975            }
1976            CStr::from_ptr(str as *const c_char)
1977                .to_string_lossy()
1978                .to_string()
1979        };
1980        log::debug!("GL Extensions: {extensions}");
1981        let required_ext = [
1982            "GL_OES_EGL_image_external_essl3",
1983            "GL_OES_surfaceless_context",
1984        ];
1985        let extensions = extensions.split_ascii_whitespace().collect::<BTreeSet<_>>();
1986        for required in required_ext {
1987            if !extensions.contains(required) {
1988                return Err(crate::Error::GLVersion(format!(
1989                    "GL does not support {required} extension",
1990                )));
1991            }
1992        }
1993
1994        let has_float_linear = extensions.contains("GL_OES_texture_float_linear");
1995        log::debug!("GL_OES_texture_float_linear: {has_float_linear}");
1996
1997        Ok(has_float_linear)
1998    }
1999
2000    fn setup_renderbuffer_dma(&mut self, dst: &TensorImage) -> crate::Result<FrameBuffer> {
2001        let frame_buffer = FrameBuffer::new();
2002        frame_buffer.bind();
2003
2004        let (width, height) = if matches!(dst.fourcc(), PLANAR_RGB) {
2005            let width = dst.width();
2006            let height = dst.height() * 3;
2007            (width as i32, height as i32)
2008        } else {
2009            (dst.width() as i32, dst.height() as i32)
2010        };
2011        let dest_img = self.create_image_from_dma2(dst)?;
2012        unsafe {
2013            gls::gl::UseProgram(self.texture_program_yuv.id);
2014            gls::gl::ActiveTexture(gls::gl::TEXTURE0);
2015            gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.render_texture.id);
2016            gls::gl::TexParameteri(
2017                gls::gl::TEXTURE_2D,
2018                gls::gl::TEXTURE_MIN_FILTER,
2019                gls::gl::LINEAR as i32,
2020            );
2021            gls::gl::TexParameteri(
2022                gls::gl::TEXTURE_2D,
2023                gls::gl::TEXTURE_MAG_FILTER,
2024                gls::gl::LINEAR as i32,
2025            );
2026            gls::gl::EGLImageTargetTexture2DOES(gls::gl::TEXTURE_2D, dest_img.egl_image.as_ptr());
2027            gls::gl::FramebufferTexture2D(
2028                gls::gl::FRAMEBUFFER,
2029                gls::gl::COLOR_ATTACHMENT0,
2030                gls::gl::TEXTURE_2D,
2031                self.render_texture.id,
2032                0,
2033            );
2034            check_gl_error(function!(), line!())?;
2035            gls::gl::Viewport(0, 0, width, height);
2036        }
2037        Ok(frame_buffer)
2038    }
2039
2040    fn convert_dest_dma(
2041        &mut self,
2042        dst: &mut TensorImage,
2043        src: &TensorImage,
2044        rotation: crate::Rotation,
2045        flip: Flip,
2046        crop: Crop,
2047    ) -> crate::Result<()> {
2048        assert!(self.gl_context.support_dma);
2049        let _framebuffer = self.setup_renderbuffer_dma(dst)?;
2050        if dst.is_planar() {
2051            self.convert_to_planar(src, dst, rotation, flip, crop)
2052        } else {
2053            self.convert_to(src, dst, rotation, flip, crop)
2054        }
2055    }
2056
2057    fn setup_renderbuffer_non_dma(
2058        &mut self,
2059        dst: &TensorImage,
2060        crop: Crop,
2061    ) -> crate::Result<FrameBuffer> {
2062        debug_assert!(matches!(dst.fourcc(), RGB | RGBA | GREY | PLANAR_RGB));
2063        let (width, height) = if dst.is_planar() {
2064            let width = dst.width() / 4;
2065            let height = match dst.fourcc() {
2066                RGBA => dst.height() * 4,
2067                RGB => dst.height() * 3,
2068                GREY => dst.height(),
2069                _ => unreachable!(),
2070            };
2071            (width as i32, height as i32)
2072        } else {
2073            (dst.width() as i32, dst.height() as i32)
2074        };
2075
2076        let format = if dst.is_planar() {
2077            gls::gl::RED
2078        } else {
2079            match dst.fourcc() {
2080                RGB => gls::gl::RGB,
2081                RGBA => gls::gl::RGBA,
2082                GREY => gls::gl::RED,
2083                _ => unreachable!(),
2084            }
2085        };
2086
2087        let start = Instant::now();
2088        let frame_buffer = FrameBuffer::new();
2089        frame_buffer.bind();
2090
2091        let map;
2092
2093        let pixels = if crop.dst_rect.is_none_or(|crop| {
2094            crop.top == 0
2095                && crop.left == 0
2096                && crop.height == dst.height()
2097                && crop.width == dst.width()
2098        }) {
2099            std::ptr::null()
2100        } else {
2101            map = dst.tensor().map()?;
2102            map.as_ptr() as *const c_void
2103        };
2104        unsafe {
2105            gls::gl::UseProgram(self.texture_program.id);
2106            gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.render_texture.id);
2107            gls::gl::ActiveTexture(gls::gl::TEXTURE0);
2108            gls::gl::TexParameteri(
2109                gls::gl::TEXTURE_2D,
2110                gls::gl::TEXTURE_MIN_FILTER,
2111                gls::gl::LINEAR as i32,
2112            );
2113            gls::gl::TexParameteri(
2114                gls::gl::TEXTURE_2D,
2115                gls::gl::TEXTURE_MAG_FILTER,
2116                gls::gl::LINEAR as i32,
2117            );
2118
2119            gls::gl::TexImage2D(
2120                gls::gl::TEXTURE_2D,
2121                0,
2122                format as i32,
2123                width,
2124                height,
2125                0,
2126                format,
2127                gls::gl::UNSIGNED_BYTE,
2128                pixels,
2129            );
2130            check_gl_error(function!(), line!())?;
2131            gls::gl::FramebufferTexture2D(
2132                gls::gl::FRAMEBUFFER,
2133                gls::gl::COLOR_ATTACHMENT0,
2134                gls::gl::TEXTURE_2D,
2135                self.render_texture.id,
2136                0,
2137            );
2138            check_gl_error(function!(), line!())?;
2139            gls::gl::Viewport(0, 0, width, height);
2140        }
2141        log::debug!("Set up framebuffer takes {:?}", start.elapsed());
2142        Ok(frame_buffer)
2143    }
2144
2145    fn convert_dest_non_dma(
2146        &mut self,
2147        dst: &mut TensorImage,
2148        src: &TensorImage,
2149        rotation: crate::Rotation,
2150        flip: Flip,
2151        crop: Crop,
2152    ) -> crate::Result<()> {
2153        let _framebuffer = self.setup_renderbuffer_non_dma(dst, crop)?;
2154        let start = Instant::now();
2155        if dst.is_planar() {
2156            self.convert_to_planar(src, dst, rotation, flip, crop)?;
2157        } else {
2158            self.convert_to(src, dst, rotation, flip, crop)?;
2159        }
2160        log::debug!("Draw to framebuffer takes {:?}", start.elapsed());
2161        let start = Instant::now();
2162        let dest_format = match dst.fourcc() {
2163            RGB => gls::gl::RGB,
2164            RGBA => gls::gl::RGBA,
2165            GREY => gls::gl::RED,
2166            _ => unreachable!(),
2167        };
2168
2169        unsafe {
2170            let mut dst_map = dst.tensor().map()?;
2171            gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
2172            gls::gl::ReadnPixels(
2173                0,
2174                0,
2175                dst.width() as i32,
2176                dst.height() as i32,
2177                dest_format,
2178                gls::gl::UNSIGNED_BYTE,
2179                dst.tensor.len() as i32,
2180                dst_map.as_mut_ptr() as *mut c_void,
2181            );
2182        }
2183        log::debug!("Read from framebuffer takes {:?}", start.elapsed());
2184        Ok(())
2185    }
2186
2187    fn convert_to(
2188        &mut self,
2189        src: &TensorImage,
2190        dst: &TensorImage,
2191        rotation: crate::Rotation,
2192        flip: Flip,
2193        crop: Crop,
2194    ) -> Result<(), crate::Error> {
2195        check_gl_error(function!(), line!())?;
2196
2197        let has_crop = crop.dst_rect.is_some_and(|x| {
2198            x.left != 0 || x.top != 0 || x.width != dst.width() || x.height != dst.height()
2199        });
2200        if has_crop {
2201            if let Some(dst_color) = crop.dst_color {
2202                unsafe {
2203                    gls::gl::ClearColor(
2204                        dst_color[0] as f32 / 255.0,
2205                        dst_color[1] as f32 / 255.0,
2206                        dst_color[2] as f32 / 255.0,
2207                        dst_color[3] as f32 / 255.0,
2208                    );
2209                    gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
2210                };
2211            }
2212        }
2213
2214        // top and bottom are flipped because OpenGL uses 0,0 as bottom left
2215        let src_roi = if let Some(crop) = crop.src_rect {
2216            RegionOfInterest {
2217                left: crop.left as f32 / src.width() as f32,
2218                top: (crop.top + crop.height) as f32 / src.height() as f32,
2219                right: (crop.left + crop.width) as f32 / src.width() as f32,
2220                bottom: crop.top as f32 / src.height() as f32,
2221            }
2222        } else {
2223            RegionOfInterest {
2224                left: 0.,
2225                top: 1.,
2226                right: 1.,
2227                bottom: 0.,
2228            }
2229        };
2230
2231        // top and bottom are flipped because OpenGL uses 0,0 as bottom left
2232        let cvt_screen_coord = |normalized| normalized * 2.0 - 1.0;
2233        let dst_roi = if let Some(crop) = crop.dst_rect {
2234            RegionOfInterest {
2235                left: cvt_screen_coord(crop.left as f32 / dst.width() as f32),
2236                top: cvt_screen_coord((crop.top + crop.height) as f32 / dst.height() as f32),
2237                right: cvt_screen_coord((crop.left + crop.width) as f32 / dst.width() as f32),
2238                bottom: cvt_screen_coord(crop.top as f32 / dst.height() as f32),
2239            }
2240        } else {
2241            RegionOfInterest {
2242                left: -1.,
2243                top: 1.,
2244                right: 1.,
2245                bottom: -1.,
2246            }
2247        };
2248        let rotation_offset = match rotation {
2249            crate::Rotation::None => 0,
2250            crate::Rotation::Clockwise90 => 1,
2251            crate::Rotation::Rotate180 => 2,
2252            crate::Rotation::CounterClockwise90 => 3,
2253        };
2254        if self.gl_context.support_dma && src.tensor().memory() == TensorMemory::Dma {
2255            match self.create_image_from_dma2(src) {
2256                Ok(new_egl_image) => self.draw_camera_texture_eglimage(
2257                    src,
2258                    &new_egl_image,
2259                    src_roi,
2260                    dst_roi,
2261                    rotation_offset,
2262                    flip,
2263                )?,
2264                Err(e) => {
2265                    log::warn!("EGL image creation failed for {:?}: {:?}", src.fourcc(), e);
2266                    let start = Instant::now();
2267                    self.draw_src_texture(src, src_roi, dst_roi, rotation_offset, flip)?;
2268                    log::debug!("draw_src_texture takes {:?}", start.elapsed());
2269                }
2270            }
2271        } else {
2272            let start = Instant::now();
2273            self.draw_src_texture(src, src_roi, dst_roi, rotation_offset, flip)?;
2274            log::debug!("draw_src_texture takes {:?}", start.elapsed());
2275        }
2276
2277        let start = Instant::now();
2278        unsafe { gls::gl::Finish() };
2279        log::debug!("gl_Finish takes {:?}", start.elapsed());
2280        check_gl_error(function!(), line!())?;
2281        Ok(())
2282    }
2283
2284    fn convert_to_planar(
2285        &self,
2286        src: &TensorImage,
2287        dst: &TensorImage,
2288        rotation: crate::Rotation,
2289        flip: Flip,
2290        crop: Crop,
2291    ) -> Result<(), crate::Error> {
2292        // if let Some(crop) = crop.src_rect
2293        //     && (crop.left > 0
2294        //         || crop.top > 0
2295        //         || crop.height < src.height()
2296        //         || crop.width < src.width())
2297        // {
2298        //     return Err(crate::Error::NotSupported(
2299        //         "Cropping in planar RGB mode is not supported".to_string(),
2300        //     ));
2301        // }
2302
2303        // if let Some(crop) = crop.dst_rect
2304        //     && (crop.left > 0
2305        //         || crop.top > 0
2306        //         || crop.height < src.height()
2307        //         || crop.width < src.width())
2308        // {
2309        //     return Err(crate::Error::NotSupported(
2310        //         "Cropping in planar RGB mode is not supported".to_string(),
2311        //     ));
2312        // }
2313
2314        let alpha = match dst.fourcc() {
2315            PLANAR_RGB => false,
2316            PLANAR_RGBA => true,
2317            _ => {
2318                return Err(crate::Error::NotSupported(
2319                    "Destination format must be PLANAR_RGB or PLANAR_RGBA".to_string(),
2320                ));
2321            }
2322        };
2323
2324        // top and bottom are flipped because OpenGL uses 0,0 as bottom left
2325        let src_roi = if let Some(crop) = crop.src_rect {
2326            RegionOfInterest {
2327                left: crop.left as f32 / src.width() as f32,
2328                top: (crop.top + crop.height) as f32 / src.height() as f32,
2329                right: (crop.left + crop.width) as f32 / src.width() as f32,
2330                bottom: crop.top as f32 / src.height() as f32,
2331            }
2332        } else {
2333            RegionOfInterest {
2334                left: 0.,
2335                top: 1.,
2336                right: 1.,
2337                bottom: 0.,
2338            }
2339        };
2340
2341        // top and bottom are flipped because OpenGL uses 0,0 as bottom left
2342        let cvt_screen_coord = |normalized| normalized * 2.0 - 1.0;
2343        let dst_roi = if let Some(crop) = crop.dst_rect {
2344            RegionOfInterest {
2345                left: cvt_screen_coord(crop.left as f32 / dst.width() as f32),
2346                top: cvt_screen_coord((crop.top + crop.height) as f32 / dst.height() as f32),
2347                right: cvt_screen_coord((crop.left + crop.width) as f32 / dst.width() as f32),
2348                bottom: cvt_screen_coord(crop.top as f32 / dst.height() as f32),
2349            }
2350        } else {
2351            RegionOfInterest {
2352                left: -1.,
2353                top: 1.,
2354                right: 1.,
2355                bottom: -1.,
2356            }
2357        };
2358        let rotation_offset = match rotation {
2359            crate::Rotation::None => 0,
2360            crate::Rotation::Clockwise90 => 1,
2361            crate::Rotation::Rotate180 => 2,
2362            crate::Rotation::CounterClockwise90 => 3,
2363        };
2364
2365        let has_crop = crop.dst_rect.is_some_and(|x| {
2366            x.left != 0 || x.top != 0 || x.width != dst.width() || x.height != dst.height()
2367        });
2368        if has_crop {
2369            if let Some(dst_color) = crop.dst_color {
2370                self.clear_rect_planar(
2371                    dst.width(),
2372                    dst.height(),
2373                    dst_roi,
2374                    [
2375                        dst_color[0] as f32 / 255.0,
2376                        dst_color[1] as f32 / 255.0,
2377                        dst_color[2] as f32 / 255.0,
2378                        dst_color[3] as f32 / 255.0,
2379                    ],
2380                    alpha,
2381                )?;
2382            }
2383        }
2384
2385        let new_egl_image = self.create_image_from_dma2(src)?;
2386
2387        self.draw_camera_texture_to_rgb_planar(
2388            &new_egl_image,
2389            src_roi,
2390            dst_roi,
2391            rotation_offset,
2392            flip,
2393            alpha,
2394        )?;
2395        unsafe { gls::gl::Finish() };
2396
2397        Ok(())
2398    }
2399
2400    fn clear_rect_planar(
2401        &self,
2402        width: usize,
2403        height: usize,
2404        dst_roi: RegionOfInterest,
2405        color: [f32; 4],
2406        alpha: bool,
2407    ) -> Result<(), Error> {
2408        if !alpha && color[0] == color[1] && color[1] == color[2] {
2409            unsafe {
2410                gls::gl::ClearColor(color[0], color[0], color[0], 1.0);
2411                gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
2412            };
2413        }
2414
2415        let split = if alpha { 4 } else { 3 };
2416
2417        unsafe {
2418            gls::gl::Enable(gls::gl::SCISSOR_TEST);
2419            let x = (((dst_roi.left + 1.0) / 2.0) * width as f32).round() as i32;
2420            let y = (((dst_roi.bottom + 1.0) / 2.0) * height as f32).round() as i32;
2421            let width = (((dst_roi.right - dst_roi.left) / 2.0) * width as f32).round() as i32;
2422            let height = (((dst_roi.top - dst_roi.bottom) / 2.0) * height as f32 / split as f32)
2423                .round() as i32;
2424            for (i, c) in color.iter().enumerate().take(split) {
2425                gls::gl::Scissor(x, y + i as i32 * height, width, height);
2426                gls::gl::ClearColor(*c, *c, *c, 1.0);
2427                gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
2428            }
2429            gls::gl::Disable(gls::gl::SCISSOR_TEST);
2430        }
2431        Ok(())
2432    }
2433
2434    #[allow(clippy::too_many_arguments)]
2435    fn draw_camera_texture_to_rgb_planar(
2436        &self,
2437        egl_img: &EglImage,
2438        src_roi: RegionOfInterest,
2439        mut dst_roi: RegionOfInterest,
2440        rotation_offset: usize,
2441        flip: Flip,
2442        alpha: bool,
2443    ) -> Result<(), Error> {
2444        let texture_target = gls::gl::TEXTURE_EXTERNAL_OES;
2445        match flip {
2446            Flip::None => {}
2447            Flip::Vertical => {
2448                std::mem::swap(&mut dst_roi.top, &mut dst_roi.bottom);
2449            }
2450            Flip::Horizontal => {
2451                std::mem::swap(&mut dst_roi.left, &mut dst_roi.right);
2452            }
2453        }
2454        unsafe {
2455            // self.texture_program.load_uniform_1f(c"width", width as f32);
2456            gls::gl::UseProgram(self.texture_program_planar.id);
2457            gls::gl::BindTexture(texture_target, self.camera_eglimage_texture.id);
2458            gls::gl::ActiveTexture(gls::gl::TEXTURE0);
2459            gls::gl::TexParameteri(
2460                texture_target,
2461                gls::gl::TEXTURE_MIN_FILTER,
2462                gls::gl::LINEAR as i32,
2463            );
2464            gls::gl::TexParameteri(
2465                texture_target,
2466                gls::gl::TEXTURE_MAG_FILTER,
2467                gls::gl::LINEAR as i32,
2468            );
2469            gls::gl::TexParameteri(
2470                texture_target,
2471                gls::gl::TEXTURE_WRAP_S,
2472                gls::gl::CLAMP_TO_EDGE as i32,
2473            );
2474
2475            gls::gl::TexParameteri(
2476                texture_target,
2477                gls::gl::TEXTURE_WRAP_T,
2478                gls::gl::CLAMP_TO_EDGE as i32,
2479            );
2480
2481            gls::egl_image_target_texture_2d_oes(texture_target, egl_img.egl_image.as_ptr());
2482            check_gl_error(function!(), line!())?;
2483            let y_centers = if alpha {
2484                vec![-3.0 / 4.0, -1.0 / 4.0, 1.0 / 4.0, 3.0 / 4.0]
2485            } else {
2486                vec![-2.0 / 3.0, 0.0, 2.0 / 3.0]
2487            };
2488            let swizzles = [gls::gl::RED, gls::gl::GREEN, gls::gl::BLUE, gls::gl::ALPHA];
2489            // starts from bottom
2490            for (i, y_center) in y_centers.iter().enumerate() {
2491                gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
2492                gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
2493                let camera_vertices: [f32; 12] = [
2494                    dst_roi.left,
2495                    dst_roi.top / 3.0 + y_center,
2496                    0., // left top
2497                    dst_roi.right,
2498                    dst_roi.top / 3.0 + y_center,
2499                    0., // right top
2500                    dst_roi.right,
2501                    dst_roi.bottom / 3.0 + y_center,
2502                    0., // right bottom
2503                    dst_roi.left,
2504                    dst_roi.bottom / 3.0 + y_center,
2505                    0., // left bottom
2506                ];
2507                gls::gl::BufferData(
2508                    gls::gl::ARRAY_BUFFER,
2509                    (size_of::<f32>() * camera_vertices.len()) as isize,
2510                    camera_vertices.as_ptr() as *const c_void,
2511                    gls::gl::DYNAMIC_DRAW,
2512                );
2513
2514                gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
2515                gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
2516                let texture_vertices: [f32; 16] = [
2517                    src_roi.left,
2518                    src_roi.top,
2519                    src_roi.right,
2520                    src_roi.top,
2521                    src_roi.right,
2522                    src_roi.bottom,
2523                    src_roi.left,
2524                    src_roi.bottom,
2525                    src_roi.left,
2526                    src_roi.top,
2527                    src_roi.right,
2528                    src_roi.top,
2529                    src_roi.right,
2530                    src_roi.bottom,
2531                    src_roi.left,
2532                    src_roi.bottom,
2533                ];
2534
2535                gls::gl::BufferData(
2536                    gls::gl::ARRAY_BUFFER,
2537                    (size_of::<f32>() * 8) as isize,
2538                    (texture_vertices[(rotation_offset * 2)..]).as_ptr() as *const c_void,
2539                    gls::gl::DYNAMIC_DRAW,
2540                );
2541                let vertices_index: [u32; 4] = [0, 1, 2, 3];
2542                // self.texture_program_planar
2543                //     .load_uniform_1i(c"color_index", 2 - i as i32);
2544
2545                gls::gl::TexParameteri(
2546                    texture_target,
2547                    gls::gl::TEXTURE_SWIZZLE_R,
2548                    swizzles[i] as i32,
2549                );
2550
2551                gls::gl::DrawElements(
2552                    gls::gl::TRIANGLE_FAN,
2553                    vertices_index.len() as i32,
2554                    gls::gl::UNSIGNED_INT,
2555                    vertices_index.as_ptr() as *const c_void,
2556                );
2557            }
2558            check_gl_error(function!(), line!())?;
2559        }
2560        Ok(())
2561    }
2562
2563    fn draw_src_texture(
2564        &mut self,
2565        src: &TensorImage,
2566        src_roi: RegionOfInterest,
2567        mut dst_roi: RegionOfInterest,
2568        rotation_offset: usize,
2569        flip: Flip,
2570    ) -> Result<(), Error> {
2571        let texture_target = gls::gl::TEXTURE_2D;
2572        let texture_format = match src.fourcc() {
2573            RGB => gls::gl::RGB,
2574            RGBA => gls::gl::RGBA,
2575            GREY => gls::gl::RED,
2576            _ => {
2577                return Err(Error::NotSupported(format!(
2578                    "draw_src_texture does not support {:?} (use DMA-BUF path for YUV)",
2579                    src.fourcc()
2580                )));
2581            }
2582        };
2583        unsafe {
2584            gls::gl::UseProgram(self.texture_program.id);
2585            gls::gl::BindTexture(texture_target, self.camera_normal_texture.id);
2586            gls::gl::ActiveTexture(gls::gl::TEXTURE0);
2587            gls::gl::TexParameteri(
2588                texture_target,
2589                gls::gl::TEXTURE_MIN_FILTER,
2590                gls::gl::LINEAR as i32,
2591            );
2592            gls::gl::TexParameteri(
2593                texture_target,
2594                gls::gl::TEXTURE_MAG_FILTER,
2595                gls::gl::LINEAR as i32,
2596            );
2597            if src.fourcc() == GREY {
2598                for swizzle in [
2599                    gls::gl::TEXTURE_SWIZZLE_R,
2600                    gls::gl::TEXTURE_SWIZZLE_G,
2601                    gls::gl::TEXTURE_SWIZZLE_B,
2602                ] {
2603                    gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, gls::gl::RED as i32);
2604                }
2605            } else {
2606                for (swizzle, src) in [
2607                    (gls::gl::TEXTURE_SWIZZLE_R, gls::gl::RED),
2608                    (gls::gl::TEXTURE_SWIZZLE_G, gls::gl::GREEN),
2609                    (gls::gl::TEXTURE_SWIZZLE_B, gls::gl::BLUE),
2610                ] {
2611                    gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, src as i32);
2612                }
2613            }
2614            self.camera_normal_texture.update_texture(
2615                texture_target,
2616                src.width(),
2617                src.height(),
2618                texture_format,
2619                &src.tensor().map()?,
2620            );
2621
2622            gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
2623            gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
2624
2625            match flip {
2626                Flip::None => {}
2627                Flip::Vertical => {
2628                    std::mem::swap(&mut dst_roi.top, &mut dst_roi.bottom);
2629                }
2630                Flip::Horizontal => {
2631                    std::mem::swap(&mut dst_roi.left, &mut dst_roi.right);
2632                }
2633            }
2634
2635            let camera_vertices: [f32; 12] = [
2636                dst_roi.left,
2637                dst_roi.top,
2638                0., // left top
2639                dst_roi.right,
2640                dst_roi.top,
2641                0., // right top
2642                dst_roi.right,
2643                dst_roi.bottom,
2644                0., // right bottom
2645                dst_roi.left,
2646                dst_roi.bottom,
2647                0., // left bottom
2648            ];
2649            gls::gl::BufferData(
2650                gls::gl::ARRAY_BUFFER,
2651                (size_of::<f32>() * camera_vertices.len()) as isize,
2652                camera_vertices.as_ptr() as *const c_void,
2653                gls::gl::DYNAMIC_DRAW,
2654            );
2655            gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
2656            gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
2657            let texture_vertices: [f32; 16] = [
2658                src_roi.left,
2659                src_roi.top,
2660                src_roi.right,
2661                src_roi.top,
2662                src_roi.right,
2663                src_roi.bottom,
2664                src_roi.left,
2665                src_roi.bottom,
2666                src_roi.left,
2667                src_roi.top,
2668                src_roi.right,
2669                src_roi.top,
2670                src_roi.right,
2671                src_roi.bottom,
2672                src_roi.left,
2673                src_roi.bottom,
2674            ];
2675
2676            gls::gl::BufferData(
2677                gls::gl::ARRAY_BUFFER,
2678                (size_of::<f32>() * 8) as isize,
2679                (texture_vertices[(rotation_offset * 2)..]).as_ptr() as *const c_void,
2680                gls::gl::DYNAMIC_DRAW,
2681            );
2682            let vertices_index: [u32; 4] = [0, 1, 2, 3];
2683            gls::gl::DrawElements(
2684                gls::gl::TRIANGLE_FAN,
2685                vertices_index.len() as i32,
2686                gls::gl::UNSIGNED_INT,
2687                vertices_index.as_ptr() as *const c_void,
2688            );
2689            check_gl_error(function!(), line!())?;
2690
2691            Ok(())
2692        }
2693    }
2694
2695    fn draw_camera_texture_eglimage(
2696        &self,
2697        src: &TensorImage,
2698        egl_img: &EglImage,
2699        src_roi: RegionOfInterest,
2700        mut dst_roi: RegionOfInterest,
2701        rotation_offset: usize,
2702        flip: Flip,
2703    ) -> Result<(), Error> {
2704        // let texture_target = gls::gl::TEXTURE_2D;
2705        let texture_target = gls::gl::TEXTURE_EXTERNAL_OES;
2706        unsafe {
2707            gls::gl::UseProgram(self.texture_program_yuv.id);
2708            gls::gl::BindTexture(texture_target, self.camera_eglimage_texture.id);
2709            gls::gl::ActiveTexture(gls::gl::TEXTURE0);
2710            gls::gl::TexParameteri(
2711                texture_target,
2712                gls::gl::TEXTURE_MIN_FILTER,
2713                gls::gl::LINEAR as i32,
2714            );
2715            gls::gl::TexParameteri(
2716                texture_target,
2717                gls::gl::TEXTURE_MAG_FILTER,
2718                gls::gl::LINEAR as i32,
2719            );
2720
2721            if src.fourcc() == GREY {
2722                for swizzle in [
2723                    gls::gl::TEXTURE_SWIZZLE_R,
2724                    gls::gl::TEXTURE_SWIZZLE_G,
2725                    gls::gl::TEXTURE_SWIZZLE_B,
2726                ] {
2727                    gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, gls::gl::RED as i32);
2728                }
2729            } else {
2730                for (swizzle, src) in [
2731                    (gls::gl::TEXTURE_SWIZZLE_R, gls::gl::RED),
2732                    (gls::gl::TEXTURE_SWIZZLE_G, gls::gl::GREEN),
2733                    (gls::gl::TEXTURE_SWIZZLE_B, gls::gl::BLUE),
2734                ] {
2735                    gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, src as i32);
2736                }
2737            }
2738
2739            gls::egl_image_target_texture_2d_oes(texture_target, egl_img.egl_image.as_ptr());
2740            check_gl_error(function!(), line!())?;
2741            gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
2742            gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
2743
2744            match flip {
2745                Flip::None => {}
2746                Flip::Vertical => {
2747                    std::mem::swap(&mut dst_roi.top, &mut dst_roi.bottom);
2748                }
2749                Flip::Horizontal => {
2750                    std::mem::swap(&mut dst_roi.left, &mut dst_roi.right);
2751                }
2752            }
2753
2754            let camera_vertices: [f32; 12] = [
2755                dst_roi.left,
2756                dst_roi.top,
2757                0., // left top
2758                dst_roi.right,
2759                dst_roi.top,
2760                0., // right top
2761                dst_roi.right,
2762                dst_roi.bottom,
2763                0., // right bottom
2764                dst_roi.left,
2765                dst_roi.bottom,
2766                0., // left bottom
2767            ];
2768            gls::gl::BufferSubData(
2769                gls::gl::ARRAY_BUFFER,
2770                0,
2771                (size_of::<f32>() * camera_vertices.len()) as isize,
2772                camera_vertices.as_ptr() as *const c_void,
2773            );
2774
2775            gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
2776            gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
2777
2778            let texture_vertices: [f32; 16] = [
2779                src_roi.left,
2780                src_roi.top,
2781                src_roi.right,
2782                src_roi.top,
2783                src_roi.right,
2784                src_roi.bottom,
2785                src_roi.left,
2786                src_roi.bottom,
2787                src_roi.left,
2788                src_roi.top,
2789                src_roi.right,
2790                src_roi.top,
2791                src_roi.right,
2792                src_roi.bottom,
2793                src_roi.left,
2794                src_roi.bottom,
2795            ];
2796            gls::gl::BufferSubData(
2797                gls::gl::ARRAY_BUFFER,
2798                0,
2799                (size_of::<f32>() * 8) as isize,
2800                (texture_vertices[(rotation_offset * 2)..]).as_ptr() as *const c_void,
2801            );
2802
2803            let vertices_index: [u32; 4] = [0, 1, 2, 3];
2804            gls::gl::DrawElements(
2805                gls::gl::TRIANGLE_FAN,
2806                vertices_index.len() as i32,
2807                gls::gl::UNSIGNED_INT,
2808                vertices_index.as_ptr() as *const c_void,
2809            );
2810        }
2811        check_gl_error(function!(), line!())?;
2812        Ok(())
2813    }
2814
2815    fn create_image_from_dma2(&self, src: &TensorImage) -> Result<EglImage, crate::Error> {
2816        let width;
2817        let height;
2818        let format;
2819        let channels;
2820
2821        // NV12 is semi-planar but handled specially via EGL multi-plane import
2822        if src.fourcc() == NV12 {
2823            if !src.width().is_multiple_of(4) {
2824                return Err(Error::NotSupported(
2825                    "OpenGL EGLImage doesn't support image widths which are not multiples of 4"
2826                        .to_string(),
2827                ));
2828            }
2829            width = src.width();
2830            height = src.height();
2831            format = fourcc_to_drm(NV12);
2832            channels = 1; // Y plane pitch is 1 byte per pixel
2833        } else if src.is_planar() {
2834            if !src.width().is_multiple_of(16) {
2835                return Err(Error::NotSupported(
2836                    "OpenGL Planar RGB EGLImage doesn't support image widths which are not multiples of 16"
2837                        .to_string(),
2838                ));
2839            }
2840            match src.fourcc() {
2841                PLANAR_RGB => {
2842                    format = DrmFourcc::R8;
2843                    width = src.width();
2844                    height = src.height() * 3;
2845                    channels = 1;
2846                }
2847                fourcc => {
2848                    return Err(crate::Error::NotSupported(format!(
2849                        "Unsupported Planar FourCC {fourcc:?}"
2850                    )));
2851                }
2852            };
2853        } else {
2854            if !src.width().is_multiple_of(4) {
2855                return Err(Error::NotSupported(
2856                    "OpenGL EGLImage doesn't support image widths which are not multiples of 4"
2857                        .to_string(),
2858                ));
2859            }
2860            width = src.width();
2861            height = src.height();
2862            format = fourcc_to_drm(src.fourcc());
2863            channels = src.channels();
2864        }
2865
2866        let fd = match &src.tensor {
2867            edgefirst_tensor::Tensor::Dma(dma_tensor) => dma_tensor.fd.as_raw_fd(),
2868            edgefirst_tensor::Tensor::Shm(_) => {
2869                return Err(Error::NotImplemented(
2870                    "OpenGL EGLImage doesn't support SHM".to_string(),
2871                ));
2872            }
2873            edgefirst_tensor::Tensor::Mem(_) => {
2874                return Err(Error::NotImplemented(
2875                    "OpenGL EGLImage doesn't support MEM".to_string(),
2876                ));
2877            }
2878        };
2879
2880        // For NV12, plane0 pitch is width (Y is 1 byte/pixel)
2881        // For other formats, pitch is width * channels
2882        let plane0_pitch = if src.fourcc() == NV12 {
2883            width
2884        } else {
2885            width * channels
2886        };
2887
2888        let mut egl_img_attr = vec![
2889            egl_ext::LINUX_DRM_FOURCC as Attrib,
2890            format as Attrib,
2891            khronos_egl::WIDTH as Attrib,
2892            width as Attrib,
2893            khronos_egl::HEIGHT as Attrib,
2894            height as Attrib,
2895            egl_ext::DMA_BUF_PLANE0_PITCH as Attrib,
2896            plane0_pitch as Attrib,
2897            egl_ext::DMA_BUF_PLANE0_OFFSET as Attrib,
2898            0 as Attrib,
2899            egl_ext::DMA_BUF_PLANE0_FD as Attrib,
2900            fd as Attrib,
2901            egl::IMAGE_PRESERVED as Attrib,
2902            egl::TRUE as Attrib,
2903        ];
2904
2905        // NV12 requires a second plane for UV data
2906        if src.fourcc() == NV12 {
2907            let uv_offset = width * height; // Y plane size
2908            egl_img_attr.append(&mut vec![
2909                egl_ext::DMA_BUF_PLANE1_FD as Attrib,
2910                fd as Attrib,
2911                egl_ext::DMA_BUF_PLANE1_OFFSET as Attrib,
2912                uv_offset as Attrib,
2913                egl_ext::DMA_BUF_PLANE1_PITCH as Attrib,
2914                width as Attrib, // UV plane has same width as Y plane
2915            ]);
2916        }
2917
2918        if matches!(src.fourcc(), YUYV | NV12) {
2919            egl_img_attr.append(&mut vec![
2920                egl_ext::YUV_COLOR_SPACE_HINT as Attrib,
2921                egl_ext::ITU_REC709 as Attrib,
2922                egl_ext::SAMPLE_RANGE_HINT as Attrib,
2923                egl_ext::YUV_NARROW_RANGE as Attrib,
2924            ]);
2925        }
2926
2927        egl_img_attr.push(khronos_egl::NONE as Attrib);
2928
2929        match self.new_egl_image_owned(egl_ext::LINUX_DMA_BUF, &egl_img_attr) {
2930            Ok(v) => Ok(v),
2931            Err(e) => Err(e),
2932        }
2933    }
2934
2935    fn new_egl_image_owned(
2936        &'_ self,
2937        target: egl::Enum,
2938        attrib_list: &[Attrib],
2939    ) -> Result<EglImage, Error> {
2940        let image = GlContext::egl_create_image_with_fallback(
2941            &self.gl_context.egl,
2942            self.gl_context.display.as_display(),
2943            unsafe { egl::Context::from_ptr(egl::NO_CONTEXT) },
2944            target,
2945            unsafe { egl::ClientBuffer::from_ptr(null_mut()) },
2946            attrib_list,
2947        )?;
2948        Ok(EglImage {
2949            egl_image: image,
2950            display: self.gl_context.display.as_display(),
2951            egl: Rc::clone(&self.gl_context.egl),
2952        })
2953    }
2954
2955    // Reshapes the segmentation to be compatible with RGBA texture array rendering.
2956    fn reshape_segmentation_to_rgba(&self, segmentation: &[u8], shape: [usize; 3]) -> Vec<u8> {
2957        let [height, width, classes] = shape;
2958
2959        let n_layer_stride = height * width * 4;
2960        let n_row_stride = width * 4;
2961        let n_col_stride = 4;
2962        let row_stride = width * classes;
2963        let col_stride = classes;
2964
2965        let mut new_segmentation = vec![0u8; n_layer_stride * classes.div_ceil(4)];
2966
2967        for i in 0..height {
2968            for j in 0..width {
2969                for k in 0..classes.div_ceil(4) * 4 {
2970                    if k >= classes {
2971                        new_segmentation[n_layer_stride * (k / 4)
2972                            + i * n_row_stride
2973                            + j * n_col_stride
2974                            + k % 4] = 0;
2975                    } else {
2976                        new_segmentation[n_layer_stride * (k / 4)
2977                            + i * n_row_stride
2978                            + j * n_col_stride
2979                            + k % 4] = segmentation[i * row_stride + j * col_stride + k];
2980                    }
2981                }
2982            }
2983        }
2984
2985        new_segmentation
2986    }
2987
2988    #[cfg(feature = "decoder")]
2989    fn render_modelpack_segmentation(
2990        &mut self,
2991        dst_roi: RegionOfInterest,
2992        segmentation: &[u8],
2993        shape: [usize; 3],
2994    ) -> Result<(), crate::Error> {
2995        log::debug!("start render_segmentation_to_image");
2996
2997        // TODO: Implement specialization for 2 classes and 4 classes which shouldn't
2998        // need rearranging the data
2999        let new_segmentation = self.reshape_segmentation_to_rgba(segmentation, shape);
3000
3001        let [height, width, classes] = shape;
3002
3003        let format = gls::gl::RGBA;
3004        let texture_target = gls::gl::TEXTURE_2D_ARRAY;
3005        self.segmentation_program
3006            .load_uniform_1i(c"background_index", shape[2] as i32 - 1)?;
3007
3008        gls::use_program(self.segmentation_program.id);
3009
3010        gls::bind_texture(texture_target, self.segmentation_texture.id);
3011        gls::active_texture(gls::gl::TEXTURE0);
3012        gls::tex_parameteri(
3013            texture_target,
3014            gls::gl::TEXTURE_MIN_FILTER,
3015            gls::gl::LINEAR as i32,
3016        );
3017        gls::tex_parameteri(
3018            texture_target,
3019            gls::gl::TEXTURE_MAG_FILTER,
3020            gls::gl::LINEAR as i32,
3021        );
3022        gls::tex_parameteri(
3023            texture_target,
3024            gls::gl::TEXTURE_WRAP_S,
3025            gls::gl::CLAMP_TO_EDGE as i32,
3026        );
3027
3028        gls::tex_parameteri(
3029            texture_target,
3030            gls::gl::TEXTURE_WRAP_T,
3031            gls::gl::CLAMP_TO_EDGE as i32,
3032        );
3033
3034        gls::tex_image3d(
3035            texture_target,
3036            0,
3037            format as i32,
3038            width as i32,
3039            height as i32,
3040            classes.div_ceil(4) as i32,
3041            0,
3042            format,
3043            gls::gl::UNSIGNED_BYTE,
3044            Some(&new_segmentation),
3045        );
3046
3047        let src_roi = RegionOfInterest {
3048            left: 0.,
3049            top: 1.,
3050            right: 1.,
3051            bottom: 0.,
3052        };
3053
3054        unsafe {
3055            gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
3056            gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
3057
3058            let camera_vertices: [f32; 12] = [
3059                dst_roi.left,
3060                dst_roi.top,
3061                0., // left top
3062                dst_roi.right,
3063                dst_roi.top,
3064                0., // right top
3065                dst_roi.right,
3066                dst_roi.bottom,
3067                0., // right bottom
3068                dst_roi.left,
3069                dst_roi.bottom,
3070                0., // left bottom
3071            ];
3072            gls::gl::BufferSubData(
3073                gls::gl::ARRAY_BUFFER,
3074                0,
3075                (size_of::<f32>() * camera_vertices.len()) as isize,
3076                camera_vertices.as_ptr() as *const c_void,
3077            );
3078
3079            gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
3080            gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
3081
3082            let texture_vertices: [f32; 8] = [
3083                src_roi.left,
3084                src_roi.top,
3085                src_roi.right,
3086                src_roi.top,
3087                src_roi.right,
3088                src_roi.bottom,
3089                src_roi.left,
3090                src_roi.bottom,
3091            ];
3092            gls::gl::BufferSubData(
3093                gls::gl::ARRAY_BUFFER,
3094                0,
3095                (size_of::<f32>() * 8) as isize,
3096                (texture_vertices[0..]).as_ptr() as *const c_void,
3097            );
3098
3099            let vertices_index: [u32; 4] = [0, 1, 2, 3];
3100            gls::gl::DrawElements(
3101                gls::gl::TRIANGLE_FAN,
3102                vertices_index.len() as i32,
3103                gls::gl::UNSIGNED_INT,
3104                vertices_index.as_ptr() as *const c_void,
3105            );
3106        }
3107
3108        Ok(())
3109    }
3110
3111    #[cfg(feature = "decoder")]
3112    fn render_yolo_segmentation(
3113        &mut self,
3114        dst_roi: RegionOfInterest,
3115        segmentation: &[u8],
3116        shape: [usize; 2],
3117        class: usize,
3118    ) -> Result<(), crate::Error> {
3119        log::debug!("start render_yolo_segmentation");
3120
3121        let [height, width] = shape;
3122
3123        let format = gls::gl::RED;
3124        let texture_target = gls::gl::TEXTURE_2D;
3125        gls::use_program(self.instanced_segmentation_program.id);
3126        self.instanced_segmentation_program
3127            .load_uniform_1i(c"class_index", class as i32)?;
3128        gls::bind_texture(texture_target, self.segmentation_texture.id);
3129        gls::active_texture(gls::gl::TEXTURE0);
3130        gls::tex_parameteri(
3131            texture_target,
3132            gls::gl::TEXTURE_MIN_FILTER,
3133            gls::gl::LINEAR as i32,
3134        );
3135        gls::tex_parameteri(
3136            texture_target,
3137            gls::gl::TEXTURE_MAG_FILTER,
3138            gls::gl::LINEAR as i32,
3139        );
3140        gls::tex_parameteri(
3141            texture_target,
3142            gls::gl::TEXTURE_WRAP_S,
3143            gls::gl::CLAMP_TO_EDGE as i32,
3144        );
3145
3146        gls::tex_parameteri(
3147            texture_target,
3148            gls::gl::TEXTURE_WRAP_T,
3149            gls::gl::CLAMP_TO_EDGE as i32,
3150        );
3151
3152        gls::tex_image2d(
3153            texture_target,
3154            0,
3155            format as i32,
3156            width as i32,
3157            height as i32,
3158            0,
3159            format,
3160            gls::gl::UNSIGNED_BYTE,
3161            Some(segmentation),
3162        );
3163
3164        let src_roi = RegionOfInterest {
3165            left: 0.,
3166            top: 1.,
3167            right: 1.,
3168            bottom: 0.,
3169        };
3170
3171        unsafe {
3172            gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
3173            gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
3174
3175            let camera_vertices: [f32; 12] = [
3176                dst_roi.left,
3177                dst_roi.top,
3178                0., // left top
3179                dst_roi.right,
3180                dst_roi.top,
3181                0., // right top
3182                dst_roi.right,
3183                dst_roi.bottom,
3184                0., // right bottom
3185                dst_roi.left,
3186                dst_roi.bottom,
3187                0., // left bottom
3188            ];
3189            gls::gl::BufferSubData(
3190                gls::gl::ARRAY_BUFFER,
3191                0,
3192                (size_of::<f32>() * camera_vertices.len()) as isize,
3193                camera_vertices.as_ptr() as *const c_void,
3194            );
3195
3196            gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
3197            gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
3198
3199            let texture_vertices: [f32; 8] = [
3200                src_roi.left,
3201                src_roi.top,
3202                src_roi.right,
3203                src_roi.top,
3204                src_roi.right,
3205                src_roi.bottom,
3206                src_roi.left,
3207                src_roi.bottom,
3208            ];
3209            gls::gl::BufferSubData(
3210                gls::gl::ARRAY_BUFFER,
3211                0,
3212                (size_of::<f32>() * 8) as isize,
3213                (texture_vertices).as_ptr() as *const c_void,
3214            );
3215
3216            let vertices_index: [u32; 4] = [0, 1, 2, 3];
3217            gls::gl::DrawElements(
3218                gls::gl::TRIANGLE_FAN,
3219                vertices_index.len() as i32,
3220                gls::gl::UNSIGNED_INT,
3221                vertices_index.as_ptr() as *const c_void,
3222            );
3223            gls::gl::Finish();
3224        }
3225
3226        Ok(())
3227    }
3228
3229    /// Repack proto tensor `(H, W, num_protos)` as f32 into RGBA f16 layers
3230    /// suitable for upload to a GL_TEXTURE_2D_ARRAY with GL_RGBA16F.
3231    ///
3232    /// Returns `(repacked_bytes, num_layers)` where each layer is H*W*4 half-floats.
3233    #[cfg(feature = "decoder")]
3234    fn repack_protos_to_rgba_f16(protos: &ndarray::Array3<f32>) -> (Vec<u8>, usize) {
3235        let (height, width, num_protos) = protos.dim();
3236        let num_layers = num_protos.div_ceil(4);
3237        // Each layer is H*W*4 half-floats, each half-float is 2 bytes
3238        let layer_stride = height * width * 4;
3239        let mut buf = vec![0u16; layer_stride * num_layers];
3240
3241        for y in 0..height {
3242            for x in 0..width {
3243                for k in 0..num_layers * 4 {
3244                    let val = if k < num_protos {
3245                        half::f16::from_f32(protos[[y, x, k]])
3246                    } else {
3247                        half::f16::ZERO
3248                    };
3249                    let layer = k / 4;
3250                    let channel = k % 4;
3251                    buf[layer * layer_stride + y * width * 4 + x * 4 + channel] = val.to_bits();
3252                }
3253            }
3254        }
3255
3256        // Reinterpret u16 buffer as bytes
3257        let byte_buf = unsafe {
3258            std::slice::from_raw_parts(buf.as_ptr() as *const u8, buf.len() * 2).to_vec()
3259        };
3260        (byte_buf, num_layers)
3261    }
3262
3263    /// Render YOLO proto segmentation masks using the fused GPU pipeline.
3264    ///
3265    /// Dispatches to the appropriate shader based on `ProtoTensor` variant:
3266    /// - `Quantized`: uploads raw int8 as `GL_R8I`, dequantizes in shader
3267    /// - `Float`: uploads as `GL_R32F` with hardware bilinear (if available),
3268    ///   or falls back to f16 repack path
3269    #[cfg(feature = "decoder")]
3270    fn render_proto_segmentation(
3271        &mut self,
3272        detect: &[DetectBox],
3273        proto_data: &ProtoData,
3274    ) -> crate::Result<()> {
3275        if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
3276            return Ok(());
3277        }
3278
3279        let (height, width, num_protos) = proto_data.protos.dim();
3280        let texture_target = gls::gl::TEXTURE_2D_ARRAY;
3281
3282        match &proto_data.protos {
3283            ProtoTensor::Quantized {
3284                protos,
3285                quantization,
3286            } => {
3287                self.render_proto_segmentation_int8(
3288                    detect,
3289                    &proto_data.mask_coefficients,
3290                    protos,
3291                    quantization,
3292                    height,
3293                    width,
3294                    num_protos,
3295                    texture_target,
3296                )?;
3297            }
3298            ProtoTensor::Float(protos_f32) => {
3299                if self.has_float_linear {
3300                    self.render_proto_segmentation_f32(
3301                        detect,
3302                        &proto_data.mask_coefficients,
3303                        protos_f32,
3304                        height,
3305                        width,
3306                        num_protos,
3307                        texture_target,
3308                    )?;
3309                } else {
3310                    // Fallback: repack to RGBA16F and use existing f16 shader
3311                    self.render_proto_segmentation_f16(
3312                        detect,
3313                        &proto_data.mask_coefficients,
3314                        protos_f32,
3315                        height,
3316                        width,
3317                        num_protos,
3318                        texture_target,
3319                    )?;
3320                }
3321            }
3322        }
3323
3324        unsafe { gls::gl::Finish() };
3325        Ok(())
3326    }
3327
3328    /// Render detection quads using the active program. Shared by all proto
3329    /// shader paths.
3330    #[cfg(feature = "decoder")]
3331    fn render_proto_detection_quads(
3332        &self,
3333        program: &GlProgram,
3334        detect: &[DetectBox],
3335        mask_coefficients: &[Vec<f32>],
3336    ) -> crate::Result<()> {
3337        let cvt_screen_coord = |normalized: f32| normalized * 2.0 - 1.0;
3338
3339        for (det, coeff) in detect.iter().zip(mask_coefficients.iter()) {
3340            let mut packed_coeff = [[0.0f32; 4]; 8];
3341            for (i, val) in coeff.iter().enumerate().take(32) {
3342                packed_coeff[i / 4][i % 4] = *val;
3343            }
3344
3345            program.load_uniform_4fv(c"mask_coeff", &packed_coeff)?;
3346            program.load_uniform_1i(c"class_index", det.label as i32)?;
3347
3348            let dst_roi = RegionOfInterest {
3349                left: cvt_screen_coord(det.bbox.xmin),
3350                top: cvt_screen_coord(det.bbox.ymax),
3351                right: cvt_screen_coord(det.bbox.xmax),
3352                bottom: cvt_screen_coord(det.bbox.ymin),
3353            };
3354
3355            // Proto texture coords: tex row 0 = image top (data uploaded in
3356            // row-major order where y=0 is top of image, and GL treats the
3357            // first row of pixel data as the bottom of the texture — but
3358            // texelFetch(y=0) returns that bottom row, which is our image top).
3359            // So tc.y=0 → image top, tc.y=1 → image bottom.
3360            // At NDC top (higher Y = image bottom = ymax), we want tc.y = ymax.
3361            // At NDC bottom (lower Y = image top = ymin), we want tc.y = ymin.
3362            let src_roi = RegionOfInterest {
3363                left: det.bbox.xmin,
3364                top: det.bbox.ymax,
3365                right: det.bbox.xmax,
3366                bottom: det.bbox.ymin,
3367            };
3368
3369            unsafe {
3370                gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
3371                gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
3372
3373                let camera_vertices: [f32; 12] = [
3374                    dst_roi.left,
3375                    dst_roi.top,
3376                    0.,
3377                    dst_roi.right,
3378                    dst_roi.top,
3379                    0.,
3380                    dst_roi.right,
3381                    dst_roi.bottom,
3382                    0.,
3383                    dst_roi.left,
3384                    dst_roi.bottom,
3385                    0.,
3386                ];
3387                gls::gl::BufferSubData(
3388                    gls::gl::ARRAY_BUFFER,
3389                    0,
3390                    (size_of::<f32>() * camera_vertices.len()) as isize,
3391                    camera_vertices.as_ptr() as *const c_void,
3392                );
3393
3394                gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
3395                gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
3396
3397                let texture_vertices: [f32; 8] = [
3398                    src_roi.left,
3399                    src_roi.top,
3400                    src_roi.right,
3401                    src_roi.top,
3402                    src_roi.right,
3403                    src_roi.bottom,
3404                    src_roi.left,
3405                    src_roi.bottom,
3406                ];
3407                gls::gl::BufferSubData(
3408                    gls::gl::ARRAY_BUFFER,
3409                    0,
3410                    (size_of::<f32>() * 8) as isize,
3411                    texture_vertices.as_ptr() as *const c_void,
3412                );
3413
3414                let vertices_index: [u32; 4] = [0, 1, 2, 3];
3415                gls::gl::DrawElements(
3416                    gls::gl::TRIANGLE_FAN,
3417                    vertices_index.len() as i32,
3418                    gls::gl::UNSIGNED_INT,
3419                    vertices_index.as_ptr() as *const c_void,
3420                );
3421            }
3422        }
3423        Ok(())
3424    }
3425
3426    /// Int8 proto path: upload raw i8 protos as `GL_R8I`, dispatch by
3427    /// interpolation mode.
3428    #[cfg(feature = "decoder")]
3429    #[allow(clippy::too_many_arguments)]
3430    fn render_proto_segmentation_int8(
3431        &mut self,
3432        detect: &[DetectBox],
3433        mask_coefficients: &[Vec<f32>],
3434        protos: &ndarray::Array3<i8>,
3435        quantization: &edgefirst_decoder::Quantization,
3436        height: usize,
3437        width: usize,
3438        num_protos: usize,
3439        texture_target: u32,
3440    ) -> crate::Result<()> {
3441        // Upload raw int8 protos as R8I texture array (1 proto per layer)
3442        gls::bind_texture(texture_target, self.proto_texture.id);
3443        gls::active_texture(gls::gl::TEXTURE0);
3444        gls::tex_parameteri(
3445            texture_target,
3446            gls::gl::TEXTURE_MIN_FILTER,
3447            gls::gl::NEAREST as i32,
3448        );
3449        gls::tex_parameteri(
3450            texture_target,
3451            gls::gl::TEXTURE_MAG_FILTER,
3452            gls::gl::NEAREST as i32,
3453        );
3454        gls::tex_parameteri(
3455            texture_target,
3456            gls::gl::TEXTURE_WRAP_S,
3457            gls::gl::CLAMP_TO_EDGE as i32,
3458        );
3459        gls::tex_parameteri(
3460            texture_target,
3461            gls::gl::TEXTURE_WRAP_T,
3462            gls::gl::CLAMP_TO_EDGE as i32,
3463        );
3464
3465        // Protos are (H, W, num_protos) in row-major. We need to repack to
3466        // layer-first layout: layer k = all (H, W) texels for proto k.
3467        let mut tex_data = vec![0i8; height * width * num_protos];
3468        for k in 0..num_protos {
3469            for y in 0..height {
3470                for x in 0..width {
3471                    tex_data[k * height * width + y * width + x] = protos[[y, x, k]];
3472                }
3473            }
3474        }
3475
3476        gls::tex_image3d(
3477            texture_target,
3478            0,
3479            gls::gl::R8I as i32,
3480            width as i32,
3481            height as i32,
3482            num_protos as i32,
3483            0,
3484            gls::gl::RED_INTEGER,
3485            gls::gl::BYTE,
3486            Some(&tex_data),
3487        );
3488
3489        let proto_scale = quantization.scale;
3490        let proto_scaled_zp = -(quantization.zero_point as f32) * quantization.scale;
3491
3492        match self.int8_interpolation_mode {
3493            Int8InterpolationMode::Nearest => {
3494                let program = &self.proto_segmentation_int8_nearest_program;
3495                gls::use_program(program.id);
3496                program.load_uniform_1i(c"num_protos", num_protos as i32)?;
3497                program.load_uniform_1f(c"proto_scale", proto_scale)?;
3498                program.load_uniform_1f(c"proto_scaled_zp", proto_scaled_zp)?;
3499                self.render_proto_detection_quads(program, detect, mask_coefficients)?;
3500            }
3501            Int8InterpolationMode::Bilinear => {
3502                let program = &self.proto_segmentation_int8_bilinear_program;
3503                gls::use_program(program.id);
3504                program.load_uniform_1i(c"num_protos", num_protos as i32)?;
3505                program.load_uniform_1f(c"proto_scale", proto_scale)?;
3506                program.load_uniform_1f(c"proto_scaled_zp", proto_scaled_zp)?;
3507                self.render_proto_detection_quads(program, detect, mask_coefficients)?;
3508            }
3509            Int8InterpolationMode::TwoPass => {
3510                self.render_proto_int8_two_pass(
3511                    detect,
3512                    mask_coefficients,
3513                    quantization,
3514                    height,
3515                    width,
3516                    num_protos,
3517                    texture_target,
3518                )?;
3519            }
3520        }
3521
3522        Ok(())
3523    }
3524
3525    /// Two-pass int8 path: dequant int8→RGBA16F FBO, then render with
3526    /// existing f16 shader using GL_LINEAR.
3527    #[cfg(feature = "decoder")]
3528    #[allow(clippy::too_many_arguments)]
3529    fn render_proto_int8_two_pass(
3530        &self,
3531        detect: &[DetectBox],
3532        mask_coefficients: &[Vec<f32>],
3533        quantization: &edgefirst_decoder::Quantization,
3534        height: usize,
3535        width: usize,
3536        num_protos: usize,
3537        texture_target: u32,
3538    ) -> crate::Result<()> {
3539        let num_layers = num_protos.div_ceil(4);
3540
3541        // Save the caller's FBO and viewport so we can restore after dequant.
3542        let (saved_fbo, saved_viewport) = unsafe {
3543            let mut fbo: i32 = 0;
3544            gls::gl::GetIntegerv(gls::gl::FRAMEBUFFER_BINDING, &mut fbo);
3545            let mut vp = [0i32; 4];
3546            gls::gl::GetIntegerv(gls::gl::VIEWPORT, vp.as_mut_ptr());
3547            (fbo as u32, vp)
3548        };
3549
3550        // Pass 1: Dequantize int8 → RGBA16F texture via framebuffer
3551        let dequant_fbo = FrameBuffer::new();
3552        gls::bind_texture(texture_target, self.proto_dequant_texture.id);
3553        gls::tex_image3d::<u8>(
3554            texture_target,
3555            0,
3556            gls::gl::RGBA16F as i32,
3557            width as i32,
3558            height as i32,
3559            num_layers as i32,
3560            0,
3561            gls::gl::RGBA,
3562            gls::gl::HALF_FLOAT,
3563            None,
3564        );
3565        gls::tex_parameteri(
3566            texture_target,
3567            gls::gl::TEXTURE_MIN_FILTER,
3568            gls::gl::LINEAR as i32,
3569        );
3570        gls::tex_parameteri(
3571            texture_target,
3572            gls::gl::TEXTURE_MAG_FILTER,
3573            gls::gl::LINEAR as i32,
3574        );
3575        gls::tex_parameteri(
3576            texture_target,
3577            gls::gl::TEXTURE_WRAP_S,
3578            gls::gl::CLAMP_TO_EDGE as i32,
3579        );
3580        gls::tex_parameteri(
3581            texture_target,
3582            gls::gl::TEXTURE_WRAP_T,
3583            gls::gl::CLAMP_TO_EDGE as i32,
3584        );
3585
3586        let proto_scale = quantization.scale;
3587        let proto_scaled_zp = -(quantization.zero_point as f32) * quantization.scale;
3588
3589        let dequant_program = &self.proto_dequant_int8_program;
3590        gls::use_program(dequant_program.id);
3591        dequant_program.load_uniform_1f(c"proto_scale", proto_scale)?;
3592        dequant_program.load_uniform_1f(c"proto_scaled_zp", proto_scaled_zp)?;
3593
3594        // Bind the int8 proto texture to TEXTURE0 for the dequant shader
3595        gls::active_texture(gls::gl::TEXTURE0);
3596        gls::bind_texture(texture_target, self.proto_texture.id);
3597
3598        // Render each RGBA16F layer (4 protos per layer)
3599        for layer in 0..num_layers {
3600            dequant_fbo.bind();
3601            unsafe {
3602                gls::gl::FramebufferTextureLayer(
3603                    gls::gl::FRAMEBUFFER,
3604                    gls::gl::COLOR_ATTACHMENT0,
3605                    self.proto_dequant_texture.id,
3606                    0,
3607                    layer as i32,
3608                );
3609                gls::gl::Viewport(0, 0, width as i32, height as i32);
3610            }
3611            dequant_program.load_uniform_1i(c"base_layer", (layer * 4) as i32)?;
3612
3613            // Full-screen quad
3614            unsafe {
3615                gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
3616                gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
3617                let verts: [f32; 12] = [
3618                    -1.0, -1.0, 0.0, 1.0, -1.0, 0.0, 1.0, 1.0, 0.0, -1.0, 1.0, 0.0,
3619                ];
3620                gls::gl::BufferSubData(
3621                    gls::gl::ARRAY_BUFFER,
3622                    0,
3623                    (size_of::<f32>() * 12) as isize,
3624                    verts.as_ptr() as *const c_void,
3625                );
3626                gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
3627                gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
3628                let tc: [f32; 8] = [0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0];
3629                gls::gl::BufferSubData(
3630                    gls::gl::ARRAY_BUFFER,
3631                    0,
3632                    (size_of::<f32>() * 8) as isize,
3633                    tc.as_ptr() as *const c_void,
3634                );
3635                let idx: [u32; 4] = [0, 1, 2, 3];
3636                gls::gl::DrawElements(
3637                    gls::gl::TRIANGLE_FAN,
3638                    4,
3639                    gls::gl::UNSIGNED_INT,
3640                    idx.as_ptr() as *const c_void,
3641                );
3642            }
3643        }
3644
3645        // Drop the dequant FBO (its Drop unbinds to 0) and restore the caller's.
3646        drop(dequant_fbo);
3647        unsafe {
3648            gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, saved_fbo);
3649            gls::gl::Viewport(
3650                saved_viewport[0],
3651                saved_viewport[1],
3652                saved_viewport[2],
3653                saved_viewport[3],
3654            );
3655        }
3656
3657        // Pass 2: render with existing f16 shader reading from dequant texture
3658        let program = &self.proto_segmentation_program;
3659        gls::use_program(program.id);
3660        gls::active_texture(gls::gl::TEXTURE0);
3661        gls::bind_texture(texture_target, self.proto_dequant_texture.id);
3662        program.load_uniform_1i(c"num_layers", num_layers as i32)?;
3663        self.render_proto_detection_quads(program, detect, mask_coefficients)?;
3664
3665        Ok(())
3666    }
3667
3668    /// F32 proto path: upload as `GL_R32F` with `GL_LINEAR` filtering.
3669    #[cfg(feature = "decoder")]
3670    #[allow(clippy::too_many_arguments)]
3671    fn render_proto_segmentation_f32(
3672        &self,
3673        detect: &[DetectBox],
3674        mask_coefficients: &[Vec<f32>],
3675        protos_f32: &ndarray::Array3<f32>,
3676        height: usize,
3677        width: usize,
3678        num_protos: usize,
3679        texture_target: u32,
3680    ) -> crate::Result<()> {
3681        let program = &self.proto_segmentation_f32_program;
3682        gls::use_program(program.id);
3683        gls::bind_texture(texture_target, self.proto_texture.id);
3684        gls::active_texture(gls::gl::TEXTURE0);
3685        gls::tex_parameteri(
3686            texture_target,
3687            gls::gl::TEXTURE_MIN_FILTER,
3688            gls::gl::LINEAR as i32,
3689        );
3690        gls::tex_parameteri(
3691            texture_target,
3692            gls::gl::TEXTURE_MAG_FILTER,
3693            gls::gl::LINEAR as i32,
3694        );
3695        gls::tex_parameteri(
3696            texture_target,
3697            gls::gl::TEXTURE_WRAP_S,
3698            gls::gl::CLAMP_TO_EDGE as i32,
3699        );
3700        gls::tex_parameteri(
3701            texture_target,
3702            gls::gl::TEXTURE_WRAP_T,
3703            gls::gl::CLAMP_TO_EDGE as i32,
3704        );
3705
3706        // Repack protos to layer-first layout: (num_protos, H, W)
3707        let mut tex_data = vec![0.0f32; height * width * num_protos];
3708        for k in 0..num_protos {
3709            for y in 0..height {
3710                for x in 0..width {
3711                    tex_data[k * height * width + y * width + x] = protos_f32[[y, x, k]];
3712                }
3713            }
3714        }
3715
3716        gls::tex_image3d(
3717            texture_target,
3718            0,
3719            gls::gl::R32F as i32,
3720            width as i32,
3721            height as i32,
3722            num_protos as i32,
3723            0,
3724            gls::gl::RED,
3725            gls::gl::FLOAT,
3726            Some(&tex_data),
3727        );
3728
3729        program.load_uniform_1i(c"num_protos", num_protos as i32)?;
3730        self.render_proto_detection_quads(program, detect, mask_coefficients)?;
3731
3732        Ok(())
3733    }
3734
3735    /// F16 fallback path: repack f32 protos to RGBA16F and use existing
3736    /// f16 shader with GL_LINEAR. Used when GL_OES_texture_float_linear
3737    /// is not available.
3738    #[cfg(feature = "decoder")]
3739    #[allow(clippy::too_many_arguments)]
3740    fn render_proto_segmentation_f16(
3741        &self,
3742        detect: &[DetectBox],
3743        mask_coefficients: &[Vec<f32>],
3744        protos_f32: &ndarray::Array3<f32>,
3745        height: usize,
3746        width: usize,
3747        num_protos: usize,
3748        texture_target: u32,
3749    ) -> crate::Result<()> {
3750        let num_layers = num_protos.div_ceil(4);
3751        let (tex_data, _) = Self::repack_protos_to_rgba_f16(protos_f32);
3752
3753        let program = &self.proto_segmentation_program;
3754        gls::use_program(program.id);
3755        gls::bind_texture(texture_target, self.proto_texture.id);
3756        gls::active_texture(gls::gl::TEXTURE0);
3757        gls::tex_parameteri(
3758            texture_target,
3759            gls::gl::TEXTURE_MIN_FILTER,
3760            gls::gl::LINEAR as i32,
3761        );
3762        gls::tex_parameteri(
3763            texture_target,
3764            gls::gl::TEXTURE_MAG_FILTER,
3765            gls::gl::LINEAR as i32,
3766        );
3767        gls::tex_parameteri(
3768            texture_target,
3769            gls::gl::TEXTURE_WRAP_S,
3770            gls::gl::CLAMP_TO_EDGE as i32,
3771        );
3772        gls::tex_parameteri(
3773            texture_target,
3774            gls::gl::TEXTURE_WRAP_T,
3775            gls::gl::CLAMP_TO_EDGE as i32,
3776        );
3777
3778        gls::tex_image3d(
3779            texture_target,
3780            0,
3781            gls::gl::RGBA16F as i32,
3782            width as i32,
3783            height as i32,
3784            num_layers as i32,
3785            0,
3786            gls::gl::RGBA,
3787            gls::gl::HALF_FLOAT,
3788            Some(&tex_data),
3789        );
3790
3791        program.load_uniform_1i(c"num_layers", num_layers as i32)?;
3792        self.render_proto_detection_quads(program, detect, mask_coefficients)?;
3793
3794        Ok(())
3795    }
3796
3797    fn render_segmentation(
3798        &mut self,
3799        detect: &[DetectBox],
3800        segmentation: &[Segmentation],
3801    ) -> crate::Result<()> {
3802        if segmentation.is_empty() {
3803            return Ok(());
3804        }
3805
3806        let is_modelpack = segmentation[0].segmentation.shape()[2] > 1;
3807        // top and bottom are flipped because OpenGL uses 0,0 as bottom left
3808        let cvt_screen_coord = |normalized| normalized * 2.0 - 1.0;
3809        if is_modelpack {
3810            let seg = &segmentation[0];
3811            let dst_roi = RegionOfInterest {
3812                left: cvt_screen_coord(seg.xmin),
3813                top: cvt_screen_coord(seg.ymax),
3814                right: cvt_screen_coord(seg.xmax),
3815                bottom: cvt_screen_coord(seg.ymin),
3816            };
3817            let segment = seg.segmentation.as_standard_layout();
3818            let slice = segment.as_slice().ok_or(Error::Internal(
3819                "Cannot get slice of segmentation".to_owned(),
3820            ))?;
3821
3822            self.render_modelpack_segmentation(
3823                dst_roi,
3824                slice,
3825                [
3826                    seg.segmentation.shape()[0],
3827                    seg.segmentation.shape()[1],
3828                    seg.segmentation.shape()[2],
3829                ],
3830            )?;
3831        } else {
3832            for (seg, det) in segmentation.iter().zip(detect) {
3833                let dst_roi = RegionOfInterest {
3834                    left: cvt_screen_coord(seg.xmin),
3835                    top: cvt_screen_coord(seg.ymax),
3836                    right: cvt_screen_coord(seg.xmax),
3837                    bottom: cvt_screen_coord(seg.ymin),
3838                };
3839
3840                let segment = seg.segmentation.as_standard_layout();
3841                let slice = segment.as_slice().ok_or(Error::Internal(
3842                    "Cannot get slice of segmentation".to_owned(),
3843                ))?;
3844
3845                self.render_yolo_segmentation(
3846                    dst_roi,
3847                    slice,
3848                    [seg.segmentation.shape()[0], seg.segmentation.shape()[1]],
3849                    det.label,
3850                )?;
3851            }
3852        }
3853
3854        gls::disable(gls::gl::BLEND);
3855        Ok(())
3856    }
3857
3858    fn render_box(&mut self, dst: &TensorImage, detect: &[DetectBox]) -> Result<(), Error> {
3859        unsafe {
3860            gls::gl::UseProgram(self.color_program.id);
3861            let rescale = |x: f32| x * 2.0 - 1.0;
3862            let thickness = 3.0;
3863            for d in detect {
3864                self.color_program
3865                    .load_uniform_1i(c"class_index", d.label as i32)?;
3866                gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
3867                gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
3868                let bbox: [f32; 4] = d.bbox.into();
3869                let outer_box = [
3870                    bbox[0] - thickness / dst.width() as f32,
3871                    bbox[1] - thickness / dst.height() as f32,
3872                    bbox[2] + thickness / dst.width() as f32,
3873                    bbox[3] + thickness / dst.height() as f32,
3874                ];
3875                let camera_vertices: [f32; 24] = [
3876                    rescale(bbox[0]),
3877                    rescale(bbox[3]),
3878                    0., // bottom left
3879                    rescale(bbox[2]),
3880                    rescale(bbox[3]),
3881                    0., // bottom right
3882                    rescale(bbox[2]),
3883                    rescale(bbox[1]),
3884                    0., // top right
3885                    rescale(bbox[0]),
3886                    rescale(bbox[1]),
3887                    0., // top left
3888                    rescale(outer_box[0]),
3889                    rescale(outer_box[3]),
3890                    0., // bottom left
3891                    rescale(outer_box[2]),
3892                    rescale(outer_box[3]),
3893                    0., // bottom right
3894                    rescale(outer_box[2]),
3895                    rescale(outer_box[1]),
3896                    0., // top right
3897                    rescale(outer_box[0]),
3898                    rescale(outer_box[1]),
3899                    0., // top left
3900                ];
3901                gls::gl::BufferData(
3902                    gls::gl::ARRAY_BUFFER,
3903                    (size_of::<f32>() * camera_vertices.len()) as isize,
3904                    camera_vertices.as_ptr() as *const c_void,
3905                    gls::gl::DYNAMIC_DRAW,
3906                );
3907
3908                let vertices_index: [u32; 10] = [0, 1, 5, 2, 6, 3, 7, 0, 4, 5];
3909                gls::gl::DrawElements(
3910                    gls::gl::TRIANGLE_STRIP,
3911                    vertices_index.len() as i32,
3912                    gls::gl::UNSIGNED_INT,
3913                    vertices_index.as_ptr() as *const c_void,
3914                );
3915            }
3916        }
3917        check_gl_error(function!(), line!())?;
3918        Ok(())
3919    }
3920}
3921struct EglImage {
3922    egl_image: egl::Image,
3923    egl: Rc<Egl>,
3924    display: egl::Display,
3925}
3926
3927impl Drop for EglImage {
3928    fn drop(&mut self) {
3929        if self.egl_image.as_ptr() == egl::NO_IMAGE {
3930            return;
3931        }
3932
3933        let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
3934            let e =
3935                GlContext::egl_destory_image_with_fallback(&self.egl, self.display, self.egl_image);
3936            if let Err(e) = e {
3937                error!("Could not destroy EGL image: {e:?}");
3938            }
3939        }));
3940    }
3941}
3942
3943struct Texture {
3944    id: u32,
3945    target: gls::gl::types::GLenum,
3946    width: usize,
3947    height: usize,
3948    format: gls::gl::types::GLenum,
3949}
3950
3951impl Default for Texture {
3952    fn default() -> Self {
3953        Self::new()
3954    }
3955}
3956
3957impl Texture {
3958    fn new() -> Self {
3959        let mut id = 0;
3960        unsafe { gls::gl::GenTextures(1, &raw mut id) };
3961        Self {
3962            id,
3963            target: 0,
3964            width: 0,
3965            height: 0,
3966            format: 0,
3967        }
3968    }
3969
3970    fn update_texture(
3971        &mut self,
3972        target: gls::gl::types::GLenum,
3973        width: usize,
3974        height: usize,
3975        format: gls::gl::types::GLenum,
3976        data: &[u8],
3977    ) {
3978        if target != self.target
3979            || width != self.width
3980            || height != self.height
3981            || format != self.format
3982        {
3983            unsafe {
3984                gls::gl::TexImage2D(
3985                    target,
3986                    0,
3987                    format as i32,
3988                    width as i32,
3989                    height as i32,
3990                    0,
3991                    format,
3992                    gls::gl::UNSIGNED_BYTE,
3993                    data.as_ptr() as *const c_void,
3994                );
3995            }
3996            self.target = target;
3997            self.format = format;
3998            self.width = width;
3999            self.height = height;
4000        } else {
4001            unsafe {
4002                gls::gl::TexSubImage2D(
4003                    target,
4004                    0,
4005                    0,
4006                    0,
4007                    width as i32,
4008                    height as i32,
4009                    format,
4010                    gls::gl::UNSIGNED_BYTE,
4011                    data.as_ptr() as *const c_void,
4012                );
4013            }
4014        }
4015    }
4016}
4017
4018impl Drop for Texture {
4019    fn drop(&mut self) {
4020        let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| unsafe {
4021            gls::gl::DeleteTextures(1, &raw mut self.id)
4022        }));
4023    }
4024}
4025
4026struct Buffer {
4027    id: u32,
4028    buffer_index: u32,
4029}
4030
4031impl Buffer {
4032    fn new(buffer_index: u32, size_per_point: usize, max_points: usize) -> Buffer {
4033        let mut id = 0;
4034        unsafe {
4035            gls::gl::EnableVertexAttribArray(buffer_index);
4036            gls::gl::GenBuffers(1, &raw mut id);
4037            gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, id);
4038            gls::gl::VertexAttribPointer(
4039                buffer_index,
4040                size_per_point as i32,
4041                gls::gl::FLOAT,
4042                gls::gl::FALSE,
4043                0,
4044                null(),
4045            );
4046            gls::gl::BufferData(
4047                gls::gl::ARRAY_BUFFER,
4048                (size_of::<f32>() * size_per_point * max_points) as isize,
4049                null(),
4050                gls::gl::DYNAMIC_DRAW,
4051            );
4052        }
4053
4054        Buffer { id, buffer_index }
4055    }
4056}
4057
4058impl Drop for Buffer {
4059    fn drop(&mut self) {
4060        let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| unsafe {
4061            gls::gl::DeleteBuffers(1, &raw mut self.id)
4062        }));
4063    }
4064}
4065
4066struct FrameBuffer {
4067    id: u32,
4068}
4069
4070impl FrameBuffer {
4071    fn new() -> FrameBuffer {
4072        let mut id = 0;
4073        unsafe {
4074            gls::gl::GenFramebuffers(1, &raw mut id);
4075        }
4076
4077        FrameBuffer { id }
4078    }
4079
4080    fn bind(&self) {
4081        unsafe { gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, self.id) };
4082    }
4083
4084    fn unbind(&self) {
4085        unsafe { gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, 0) };
4086    }
4087}
4088
4089impl Drop for FrameBuffer {
4090    fn drop(&mut self) {
4091        let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
4092            self.unbind();
4093            unsafe {
4094                gls::gl::DeleteFramebuffers(1, &raw mut self.id);
4095            }
4096        }));
4097    }
4098}
4099
4100pub struct GlProgram {
4101    id: u32,
4102    vertex_id: u32,
4103    fragment_id: u32,
4104}
4105
4106impl GlProgram {
4107    fn new(vertex_shader: &str, fragment_shader: &str) -> Result<Self, crate::Error> {
4108        let id = unsafe { gls::gl::CreateProgram() };
4109        let vertex_id = unsafe { gls::gl::CreateShader(gls::gl::VERTEX_SHADER) };
4110        if compile_shader_from_str(vertex_id, vertex_shader, "shader_vert").is_err() {
4111            log::debug!("Vertex shader source:\n{}", vertex_shader);
4112            return Err(crate::Error::OpenGl(format!(
4113                "Shader compile error: {vertex_shader}"
4114            )));
4115        }
4116        unsafe {
4117            gls::gl::AttachShader(id, vertex_id);
4118        }
4119
4120        let fragment_id = unsafe { gls::gl::CreateShader(gls::gl::FRAGMENT_SHADER) };
4121        if compile_shader_from_str(fragment_id, fragment_shader, "shader_frag").is_err() {
4122            log::debug!("Fragment shader source:\n{}", fragment_shader);
4123            return Err(crate::Error::OpenGl(format!(
4124                "Shader compile error: {fragment_shader}"
4125            )));
4126        }
4127
4128        unsafe {
4129            gls::gl::AttachShader(id, fragment_id);
4130            gls::gl::LinkProgram(id);
4131            gls::gl::UseProgram(id);
4132        }
4133
4134        Ok(Self {
4135            id,
4136            vertex_id,
4137            fragment_id,
4138        })
4139    }
4140
4141    #[allow(dead_code)]
4142    fn load_uniform_1f(&self, name: &CStr, value: f32) -> Result<(), crate::Error> {
4143        unsafe {
4144            gls::gl::UseProgram(self.id);
4145            let location = gls::gl::GetUniformLocation(self.id, name.as_ptr());
4146            gls::gl::Uniform1f(location, value);
4147        }
4148        Ok(())
4149    }
4150
4151    #[allow(dead_code)]
4152    fn load_uniform_1i(&self, name: &CStr, value: i32) -> Result<(), crate::Error> {
4153        unsafe {
4154            gls::gl::UseProgram(self.id);
4155            let location = gls::gl::GetUniformLocation(self.id, name.as_ptr());
4156            gls::gl::Uniform1i(location, value);
4157        }
4158        Ok(())
4159    }
4160
4161    fn load_uniform_4fv(&self, name: &CStr, value: &[[f32; 4]]) -> Result<(), crate::Error> {
4162        unsafe {
4163            gls::gl::UseProgram(self.id);
4164            let location = gls::gl::GetUniformLocation(self.id, name.as_ptr());
4165            if location == -1 {
4166                return Err(crate::Error::OpenGl(format!(
4167                    "Could not find uniform location for '{}'",
4168                    name.to_string_lossy().into_owned()
4169                )));
4170            }
4171            gls::gl::Uniform4fv(location, value.len() as i32, value.as_flattened().as_ptr());
4172        }
4173        check_gl_error(function!(), line!())?;
4174        Ok(())
4175    }
4176}
4177
4178impl Drop for GlProgram {
4179    fn drop(&mut self) {
4180        let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| unsafe {
4181            gls::gl::DeleteProgram(self.id);
4182            gls::gl::DeleteShader(self.fragment_id);
4183            gls::gl::DeleteShader(self.vertex_id);
4184        }));
4185    }
4186}
4187
4188fn compile_shader_from_str(shader: u32, shader_source: &str, shader_name: &str) -> Result<(), ()> {
4189    let src = match CString::from_str(shader_source) {
4190        Ok(v) => v,
4191        Err(_) => return Err(()),
4192    };
4193    let src_ptr = src.as_ptr();
4194    unsafe {
4195        gls::gl::ShaderSource(shader, 1, &raw const src_ptr, null());
4196        gls::gl::CompileShader(shader);
4197        let mut is_compiled = 0;
4198        gls::gl::GetShaderiv(shader, gls::gl::COMPILE_STATUS, &raw mut is_compiled);
4199        if is_compiled == 0 {
4200            let mut max_length = 0;
4201            gls::gl::GetShaderiv(shader, gls::gl::INFO_LOG_LENGTH, &raw mut max_length);
4202            let mut error_log: Vec<u8> = vec![0; max_length as usize];
4203            gls::gl::GetShaderInfoLog(
4204                shader,
4205                max_length,
4206                &raw mut max_length,
4207                error_log.as_mut_ptr() as *mut c_char,
4208            );
4209            error!(
4210                "Shader '{}' failed: {:?}\n",
4211                shader_name,
4212                CString::from_vec_with_nul(error_log)
4213                    .unwrap()
4214                    .into_string()
4215                    .unwrap()
4216            );
4217            gls::gl::DeleteShader(shader);
4218            return Err(());
4219        }
4220        Ok(())
4221    }
4222}
4223
4224fn check_gl_error(name: &str, line: u32) -> Result<(), Error> {
4225    unsafe {
4226        let err = gls::gl::GetError();
4227        if err != gls::gl::NO_ERROR {
4228            error!("GL Error: {name}:{line}: {err:#X}");
4229            // panic!("GL Error: {err}");
4230            return Err(Error::OpenGl(format!("{err:#X}")));
4231        }
4232    }
4233    Ok(())
4234}
4235
4236fn fourcc_to_drm(fourcc: FourCharCode) -> DrmFourcc {
4237    match fourcc {
4238        RGBA => DrmFourcc::Abgr8888,
4239        YUYV => DrmFourcc::Yuyv,
4240        RGB => DrmFourcc::Bgr888,
4241        GREY => DrmFourcc::R8,
4242        NV12 => DrmFourcc::Nv12,
4243        _ => todo!(),
4244    }
4245}
4246
4247mod egl_ext {
4248    #![allow(dead_code)]
4249    pub(crate) const LINUX_DMA_BUF: u32 = 0x3270;
4250    pub(crate) const LINUX_DRM_FOURCC: u32 = 0x3271;
4251    pub(crate) const DMA_BUF_PLANE0_FD: u32 = 0x3272;
4252    pub(crate) const DMA_BUF_PLANE0_OFFSET: u32 = 0x3273;
4253    pub(crate) const DMA_BUF_PLANE0_PITCH: u32 = 0x3274;
4254    pub(crate) const DMA_BUF_PLANE1_FD: u32 = 0x3275;
4255    pub(crate) const DMA_BUF_PLANE1_OFFSET: u32 = 0x3276;
4256    pub(crate) const DMA_BUF_PLANE1_PITCH: u32 = 0x3277;
4257    pub(crate) const DMA_BUF_PLANE2_FD: u32 = 0x3278;
4258    pub(crate) const DMA_BUF_PLANE2_OFFSET: u32 = 0x3279;
4259    pub(crate) const DMA_BUF_PLANE2_PITCH: u32 = 0x327A;
4260    pub(crate) const YUV_COLOR_SPACE_HINT: u32 = 0x327B;
4261    pub(crate) const SAMPLE_RANGE_HINT: u32 = 0x327C;
4262    pub(crate) const YUV_CHROMA_HORIZONTAL_SITING_HINT: u32 = 0x327D;
4263    pub(crate) const YUV_CHROMA_VERTICAL_SITING_HINT: u32 = 0x327E;
4264
4265    pub(crate) const ITU_REC601: u32 = 0x327F;
4266    pub(crate) const ITU_REC709: u32 = 0x3280;
4267    pub(crate) const ITU_REC2020: u32 = 0x3281;
4268
4269    pub(crate) const YUV_FULL_RANGE: u32 = 0x3282;
4270    pub(crate) const YUV_NARROW_RANGE: u32 = 0x3283;
4271
4272    pub(crate) const YUV_CHROMA_SITING_0: u32 = 0x3284;
4273    pub(crate) const YUV_CHROMA_SITING_0_5: u32 = 0x3285;
4274
4275    pub(crate) const PLATFORM_GBM_KHR: u32 = 0x31D7;
4276
4277    pub(crate) const PLATFORM_DEVICE_EXT: u32 = 0x313F;
4278}
4279
4280fn generate_vertex_shader() -> &'static str {
4281    "\
4282#version 300 es
4283precision mediump float;
4284layout(location = 0) in vec3 pos;
4285layout(location = 1) in vec2 texCoord;
4286
4287out vec3 fragPos;
4288out vec2 tc;
4289
4290void main() {
4291    fragPos = pos;
4292    tc = texCoord;
4293
4294    gl_Position = vec4(pos, 1.0);
4295}
4296"
4297}
4298
4299fn generate_texture_fragment_shader() -> &'static str {
4300    "\
4301#version 300 es
4302
4303precision mediump float;
4304uniform sampler2D tex;
4305in vec3 fragPos;
4306in vec2 tc;
4307
4308out vec4 color;
4309
4310void main(){
4311    color = texture(tex, tc);
4312}
4313"
4314}
4315
4316fn generate_texture_fragment_shader_yuv() -> &'static str {
4317    "\
4318#version 300 es
4319#extension GL_OES_EGL_image_external_essl3 : require
4320precision mediump float;
4321uniform samplerExternalOES tex;
4322in vec3 fragPos;
4323in vec2 tc;
4324
4325out vec4 color;
4326
4327void main(){
4328    color = texture(tex, tc);
4329}
4330"
4331}
4332
4333fn generate_planar_rgb_shader() -> &'static str {
4334    "\
4335#version 300 es
4336#extension GL_OES_EGL_image_external_essl3 : require
4337precision mediump float;
4338uniform samplerExternalOES tex;
4339in vec3 fragPos;
4340in vec2 tc;
4341
4342out vec4 color;
4343
4344void main(){
4345    color = texture(tex, tc);
4346}
4347"
4348}
4349
4350/// this shader requires a reshape of the segmentation output tensor to (H, W,
4351/// C/4, 4)
4352fn generate_segmentation_shader() -> &'static str {
4353    "\
4354#version 300 es
4355precision mediump float;
4356precision mediump sampler2DArray;
4357
4358uniform sampler2DArray tex;
4359uniform vec4 colors[20];
4360uniform int background_index;
4361
4362in vec3 fragPos;
4363in vec2 tc;
4364in vec4 fragColor;
4365
4366out vec4 color;
4367
4368float max_arg(const in vec4 args, out int argmax) {
4369    if (args[0] >= args[1] && args[0] >= args[2] && args[0] >= args[3]) {
4370        argmax = 0;
4371        return args[0];
4372    }
4373    if (args[1] >= args[0] && args[1] >= args[2] && args[1] >= args[3]) {
4374        argmax = 1;
4375        return args[1];
4376    }
4377    if (args[2] >= args[0] && args[2] >= args[1] && args[2] >= args[3]) {
4378        argmax = 2;
4379        return args[2];
4380    }
4381    argmax = 3;
4382    return args[3];
4383}
4384
4385void main() {
4386    mediump int layers = textureSize(tex, 0).z;
4387    float max_all = -4.0;
4388    int max_ind = 0;
4389    for (int i = 0; i < layers; i++) {
4390        vec4 d = texture(tex, vec3(tc, i));
4391        int max_ind_ = 0;
4392        float max_ = max_arg(d, max_ind_);
4393        if (max_ <= max_all) { continue; }
4394        max_all = max_;
4395        max_ind = i*4 + max_ind_;
4396    }
4397    if (max_ind == background_index) {
4398        discard;
4399    }
4400    max_ind = max_ind % 20;
4401    color = colors[max_ind];
4402}
4403"
4404}
4405
4406fn generate_instanced_segmentation_shader() -> &'static str {
4407    "\
4408#version 300 es
4409precision mediump float;
4410uniform sampler2D mask0;
4411uniform vec4 colors[20];
4412uniform int class_index;
4413in vec3 fragPos;
4414in vec2 tc;
4415in vec4 fragColor;
4416
4417out vec4 color;
4418void main() {
4419    float r0 = texture(mask0, tc).r;
4420    int arg = int(r0>=0.5);
4421    if (arg == 0) {
4422        discard;
4423    }
4424    color = colors[class_index % 20];
4425}
4426"
4427}
4428
4429#[cfg(feature = "decoder")]
4430fn generate_proto_segmentation_shader() -> &'static str {
4431    "\
4432#version 300 es
4433precision highp float;
4434precision highp sampler2DArray;
4435
4436uniform sampler2DArray proto_tex;  // ceil(num_protos/4) layers, RGBA = 4 channels per layer
4437uniform vec4 mask_coeff[8];        // 32 coefficients packed as 8 vec4s
4438uniform vec4 colors[20];
4439uniform int class_index;
4440uniform int num_layers;
4441
4442in vec2 tc;
4443out vec4 color;
4444
4445void main() {
4446    float acc = 0.0;
4447    for (int i = 0; i < num_layers; i++) {
4448        // texture() returns bilinearly interpolated proto values (GL_LINEAR)
4449        acc += dot(mask_coeff[i], texture(proto_tex, vec3(tc, float(i))));
4450    }
4451    float mask = 1.0 / (1.0 + exp(-acc));  // sigmoid
4452    if (mask < 0.5) discard;
4453    color = colors[class_index % 20];
4454}
4455"
4456}
4457
4458/// Int8 proto shader — nearest-neighbor only.
4459///
4460/// Uses `texelFetch()` at the nearest texel. No interpolation. Simplest and
4461/// fastest GPU execution but may show staircase artifacts at mask edges.
4462///
4463/// Layout: `GL_R8I` texture with 1 proto per layer (32 layers).
4464/// Mask coefficients packed as `vec4[8]`, indexed `mask_coeff[k/4][k%4]`.
4465#[cfg(feature = "decoder")]
4466fn generate_proto_segmentation_shader_int8_nearest() -> &'static str {
4467    "\
4468#version 300 es
4469precision highp float;
4470precision highp int;
4471precision highp isampler2DArray;
4472
4473uniform isampler2DArray proto_tex;  // 32 layers, R channel = 1 proto per layer
4474uniform vec4 mask_coeff[8];         // 32 coefficients packed as 8 vec4s
4475uniform vec4 colors[20];
4476uniform int class_index;
4477uniform int num_protos;
4478uniform float proto_scale;
4479uniform float proto_scaled_zp;      // -zero_point * scale
4480
4481in vec2 tc;
4482out vec4 color;
4483
4484void main() {
4485    ivec3 tex_size = textureSize(proto_tex, 0);
4486    int ix = clamp(int(tc.x * float(tex_size.x)), 0, tex_size.x - 1);
4487    int iy = clamp(int(tc.y * float(tex_size.y)), 0, tex_size.y - 1);
4488
4489    float acc = 0.0;
4490    for (int k = 0; k < num_protos; k++) {
4491        float raw = float(texelFetch(proto_tex, ivec3(ix, iy, k), 0).r);
4492        float val = raw * proto_scale + proto_scaled_zp;
4493        acc += mask_coeff[k / 4][k % 4] * val;
4494    }
4495    float mask = 1.0 / (1.0 + exp(-acc));
4496    if (mask < 0.5) discard;
4497    color = colors[class_index % 20];
4498}
4499"
4500}
4501
4502/// Int8 proto shader — shader-based bilinear interpolation (recommended).
4503///
4504/// Uses `texelFetch()` to fetch 4 neighboring texels per fragment, dequantizes
4505/// each, and computes bilinear weights from `fract(tc * textureSize)`.
4506///
4507/// Layout: `GL_R8I` texture with 1 proto per layer (32 layers).
4508#[cfg(feature = "decoder")]
4509fn generate_proto_segmentation_shader_int8_bilinear() -> &'static str {
4510    "\
4511#version 300 es
4512precision highp float;
4513precision highp int;
4514precision highp isampler2DArray;
4515
4516uniform isampler2DArray proto_tex;  // 32 layers, R channel = 1 proto per layer
4517uniform vec4 mask_coeff[8];         // 32 coefficients packed as 8 vec4s
4518uniform vec4 colors[20];
4519uniform int class_index;
4520uniform int num_protos;
4521uniform float proto_scale;
4522uniform float proto_scaled_zp;      // -zero_point * scale
4523
4524in vec2 tc;
4525out vec4 color;
4526
4527void main() {
4528    ivec3 tex_size = textureSize(proto_tex, 0);
4529    // Compute continuous position (matching GL_LINEAR convention: center at +0.5)
4530    vec2 pos = tc * vec2(tex_size.xy) - 0.5;
4531    vec2 f = fract(pos);
4532    ivec2 p0 = ivec2(floor(pos));
4533    ivec2 p1 = p0 + 1;
4534    // Clamp to texture bounds
4535    p0 = clamp(p0, ivec2(0), tex_size.xy - 1);
4536    p1 = clamp(p1, ivec2(0), tex_size.xy - 1);
4537
4538    float w00 = (1.0 - f.x) * (1.0 - f.y);
4539    float w10 = f.x * (1.0 - f.y);
4540    float w01 = (1.0 - f.x) * f.y;
4541    float w11 = f.x * f.y;
4542
4543    float acc = 0.0;
4544    for (int k = 0; k < num_protos; k++) {
4545        float r00 = float(texelFetch(proto_tex, ivec3(p0.x, p0.y, k), 0).r);
4546        float r10 = float(texelFetch(proto_tex, ivec3(p1.x, p0.y, k), 0).r);
4547        float r01 = float(texelFetch(proto_tex, ivec3(p0.x, p1.y, k), 0).r);
4548        float r11 = float(texelFetch(proto_tex, ivec3(p1.x, p1.y, k), 0).r);
4549        float interp = r00 * w00 + r10 * w10 + r01 * w01 + r11 * w11;
4550        float val = interp * proto_scale + proto_scaled_zp;
4551        acc += mask_coeff[k / 4][k % 4] * val;
4552    }
4553    float mask = 1.0 / (1.0 + exp(-acc));
4554    if (mask < 0.5) discard;
4555    color = colors[class_index % 20];
4556}
4557"
4558}
4559
4560/// Int8 dequantization pass shader (two-pass Option C, pass 1).
4561///
4562/// Reads `GL_R8I` texel, dequantizes, and writes float to `GL_RGBA16F` render
4563/// target. This shader processes 4 protos at a time (packing into RGBA).
4564/// After this pass, the existing f16 shader reads the dequantized texture with
4565/// `GL_LINEAR`.
4566#[cfg(feature = "decoder")]
4567fn generate_proto_dequant_shader_int8() -> &'static str {
4568    "\
4569#version 300 es
4570precision highp float;
4571precision highp int;
4572precision highp isampler2DArray;
4573
4574uniform isampler2DArray proto_tex;  // 32 layers of R8I (1 proto per layer)
4575uniform float proto_scale;
4576uniform float proto_scaled_zp;      // -zero_point * scale
4577uniform int base_layer;             // first proto index for this output layer (0, 4, 8, ...)
4578
4579in vec2 tc;
4580out vec4 color;
4581
4582void main() {
4583    ivec3 tex_size = textureSize(proto_tex, 0);
4584    int ix = clamp(int(tc.x * float(tex_size.x)), 0, tex_size.x - 1);
4585    int iy = clamp(int(tc.y * float(tex_size.y)), 0, tex_size.y - 1);
4586
4587    vec4 result;
4588    for (int c = 0; c < 4; c++) {
4589        int layer = base_layer + c;
4590        float raw = float(texelFetch(proto_tex, ivec3(ix, iy, layer), 0).r);
4591        result[c] = raw * proto_scale + proto_scaled_zp;
4592    }
4593    color = result;
4594}
4595"
4596}
4597
4598/// F32 proto shader — direct R32F texture with hardware bilinear filtering.
4599///
4600/// Same structure as int8 bilinear shader but uses `texture()` for hardware
4601/// interpolation (requires `GL_OES_texture_float_linear`). No dequantization.
4602///
4603/// Layout: `GL_R32F` texture with 1 proto per layer (32 layers).
4604#[cfg(feature = "decoder")]
4605fn generate_proto_segmentation_shader_f32() -> &'static str {
4606    "\
4607#version 300 es
4608precision highp float;
4609precision highp sampler2DArray;
4610
4611uniform sampler2DArray proto_tex;  // 32 layers, R channel = 1 proto per layer
4612uniform vec4 mask_coeff[8];        // 32 coefficients packed as 8 vec4s
4613uniform vec4 colors[20];
4614uniform int class_index;
4615uniform int num_protos;
4616
4617in vec2 tc;
4618out vec4 color;
4619
4620void main() {
4621    float acc = 0.0;
4622    for (int k = 0; k < num_protos; k++) {
4623        // texture() returns bilinearly interpolated proto value (GL_LINEAR on R32F)
4624        float val = texture(proto_tex, vec3(tc, float(k))).r;
4625        acc += mask_coeff[k / 4][k % 4] * val;
4626    }
4627    float mask = 1.0 / (1.0 + exp(-acc));
4628    if (mask < 0.5) discard;
4629    color = colors[class_index % 20];
4630}
4631"
4632}
4633
4634/// Grayscale mask shader — int8, nearest-neighbor.
4635///
4636/// Same accumulation as the colored variant but outputs `sigmoid(acc)` to the
4637/// RED channel without thresholding or discarding.  Used by
4638/// `render_masks_from_protos()` for per-instance mask readback.
4639#[cfg(feature = "decoder")]
4640fn generate_proto_mask_shader_int8_nearest() -> &'static str {
4641    "\
4642#version 300 es
4643precision highp float;
4644precision highp int;
4645precision highp isampler2DArray;
4646
4647uniform isampler2DArray proto_tex;
4648uniform vec4 mask_coeff[8];
4649uniform int num_protos;
4650uniform float proto_scale;
4651uniform float proto_scaled_zp;
4652
4653in vec2 tc;
4654out vec4 color;
4655
4656void main() {
4657    ivec3 tex_size = textureSize(proto_tex, 0);
4658    int ix = clamp(int(tc.x * float(tex_size.x)), 0, tex_size.x - 1);
4659    int iy = clamp(int(tc.y * float(tex_size.y)), 0, tex_size.y - 1);
4660
4661    float acc = 0.0;
4662    for (int k = 0; k < num_protos; k++) {
4663        float raw = float(texelFetch(proto_tex, ivec3(ix, iy, k), 0).r);
4664        float val = raw * proto_scale + proto_scaled_zp;
4665        acc += mask_coeff[k / 4][k % 4] * val;
4666    }
4667    float mask = 1.0 / (1.0 + exp(-acc));
4668    color = vec4(mask, 0.0, 0.0, 1.0);
4669}
4670"
4671}
4672
4673/// Grayscale mask shader — int8, shader-based bilinear interpolation.
4674///
4675/// Same accumulation as the colored bilinear variant but outputs
4676/// `sigmoid(acc)` to the RED channel without thresholding or discarding.
4677#[cfg(feature = "decoder")]
4678fn generate_proto_mask_shader_int8_bilinear() -> &'static str {
4679    "\
4680#version 300 es
4681precision highp float;
4682precision highp int;
4683precision highp isampler2DArray;
4684
4685uniform isampler2DArray proto_tex;
4686uniform vec4 mask_coeff[8];
4687uniform int num_protos;
4688uniform float proto_scale;
4689uniform float proto_scaled_zp;
4690
4691in vec2 tc;
4692out vec4 color;
4693
4694void main() {
4695    ivec3 tex_size = textureSize(proto_tex, 0);
4696    vec2 pos = tc * vec2(tex_size.xy) - 0.5;
4697    vec2 f = fract(pos);
4698    ivec2 p0 = ivec2(floor(pos));
4699    ivec2 p1 = p0 + 1;
4700    p0 = clamp(p0, ivec2(0), tex_size.xy - 1);
4701    p1 = clamp(p1, ivec2(0), tex_size.xy - 1);
4702
4703    float w00 = (1.0 - f.x) * (1.0 - f.y);
4704    float w10 = f.x * (1.0 - f.y);
4705    float w01 = (1.0 - f.x) * f.y;
4706    float w11 = f.x * f.y;
4707
4708    float acc = 0.0;
4709    for (int k = 0; k < num_protos; k++) {
4710        float r00 = float(texelFetch(proto_tex, ivec3(p0.x, p0.y, k), 0).r);
4711        float r10 = float(texelFetch(proto_tex, ivec3(p1.x, p0.y, k), 0).r);
4712        float r01 = float(texelFetch(proto_tex, ivec3(p0.x, p1.y, k), 0).r);
4713        float r11 = float(texelFetch(proto_tex, ivec3(p1.x, p1.y, k), 0).r);
4714        float interp = r00 * w00 + r10 * w10 + r01 * w01 + r11 * w11;
4715        float val = interp * proto_scale + proto_scaled_zp;
4716        acc += mask_coeff[k / 4][k % 4] * val;
4717    }
4718    float mask = 1.0 / (1.0 + exp(-acc));
4719    color = vec4(mask, 0.0, 0.0, 1.0);
4720}
4721"
4722}
4723
4724/// Grayscale mask shader — f32 protos with hardware bilinear filtering.
4725///
4726/// Same accumulation as the colored f32 variant but outputs `sigmoid(acc)` to
4727/// the RED channel without thresholding or discarding.
4728#[cfg(feature = "decoder")]
4729fn generate_proto_mask_shader_f32() -> &'static str {
4730    "\
4731#version 300 es
4732precision highp float;
4733precision highp sampler2DArray;
4734
4735uniform sampler2DArray proto_tex;
4736uniform vec4 mask_coeff[8];
4737uniform int num_protos;
4738
4739in vec2 tc;
4740out vec4 color;
4741
4742void main() {
4743    float acc = 0.0;
4744    for (int k = 0; k < num_protos; k++) {
4745        float val = texture(proto_tex, vec3(tc, float(k))).r;
4746        acc += mask_coeff[k / 4][k % 4] * val;
4747    }
4748    float mask = 1.0 / (1.0 + exp(-acc));
4749    color = vec4(mask, 0.0, 0.0, 1.0);
4750}
4751"
4752}
4753
4754fn generate_color_shader() -> &'static str {
4755    "\
4756#version 300 es
4757precision mediump float;
4758uniform vec4 colors[20];
4759uniform int class_index;
4760
4761out vec4 color;
4762void main() {
4763    int index = class_index % 20;
4764    color = colors[index];
4765}
4766"
4767}
4768
4769#[cfg(test)]
4770#[cfg(feature = "opengl")]
4771mod gl_tests {
4772    use super::*;
4773    use crate::{TensorImage, RGBA};
4774    #[cfg(feature = "dma_test_formats")]
4775    use crate::{NV12, YUYV};
4776    use edgefirst_tensor::TensorTrait;
4777    #[cfg(feature = "dma_test_formats")]
4778    use edgefirst_tensor::{is_dma_available, TensorMapTrait, TensorMemory};
4779    use image::buffer::ConvertBuffer;
4780    use ndarray::Array3;
4781
4782    #[test]
4783    #[cfg(feature = "decoder")]
4784    fn test_segmentation() {
4785        use edgefirst_decoder::Segmentation;
4786
4787        if !is_opengl_available() {
4788            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4789            return;
4790        }
4791
4792        let mut image = TensorImage::load(
4793            include_bytes!("../../../testdata/giraffe.jpg"),
4794            Some(RGBA),
4795            None,
4796        )
4797        .unwrap();
4798
4799        let mut segmentation = Array3::from_shape_vec(
4800            (2, 160, 160),
4801            include_bytes!("../../../testdata/modelpack_seg_2x160x160.bin").to_vec(),
4802        )
4803        .unwrap();
4804        segmentation.swap_axes(0, 1);
4805        segmentation.swap_axes(1, 2);
4806        let segmentation = segmentation.as_standard_layout().to_owned();
4807
4808        let seg = Segmentation {
4809            segmentation,
4810            xmin: 0.0,
4811            ymin: 0.0,
4812            xmax: 1.0,
4813            ymax: 1.0,
4814        };
4815
4816        let mut renderer = GLProcessorThreaded::new(None).unwrap();
4817        renderer.render_to_image(&mut image, &[], &[seg]).unwrap();
4818    }
4819
4820    #[test]
4821    #[cfg(feature = "decoder")]
4822    fn test_segmentation_mem() {
4823        use edgefirst_decoder::Segmentation;
4824
4825        if !is_opengl_available() {
4826            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4827            return;
4828        }
4829
4830        let mut image = TensorImage::load(
4831            include_bytes!("../../../testdata/giraffe.jpg"),
4832            Some(RGBA),
4833            Some(edgefirst_tensor::TensorMemory::Mem),
4834        )
4835        .unwrap();
4836
4837        let mut segmentation = Array3::from_shape_vec(
4838            (2, 160, 160),
4839            include_bytes!("../../../testdata/modelpack_seg_2x160x160.bin").to_vec(),
4840        )
4841        .unwrap();
4842        segmentation.swap_axes(0, 1);
4843        segmentation.swap_axes(1, 2);
4844        let segmentation = segmentation.as_standard_layout().to_owned();
4845
4846        let seg = Segmentation {
4847            segmentation,
4848            xmin: 0.0,
4849            ymin: 0.0,
4850            xmax: 1.0,
4851            ymax: 1.0,
4852        };
4853
4854        let mut renderer = GLProcessorThreaded::new(None).unwrap();
4855        renderer.render_to_image(&mut image, &[], &[seg]).unwrap();
4856    }
4857
4858    #[test]
4859    #[cfg(feature = "decoder")]
4860    fn test_segmentation_yolo() {
4861        use edgefirst_decoder::Segmentation;
4862        use ndarray::Array3;
4863
4864        if !is_opengl_available() {
4865            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4866            return;
4867        }
4868
4869        let mut image = TensorImage::load(
4870            include_bytes!("../../../testdata/giraffe.jpg"),
4871            Some(RGBA),
4872            None,
4873        )
4874        .unwrap();
4875
4876        let segmentation = Array3::from_shape_vec(
4877            (76, 55, 1),
4878            include_bytes!("../../../testdata/yolov8_seg_crop_76x55.bin").to_vec(),
4879        )
4880        .unwrap();
4881
4882        let detect = DetectBox {
4883            bbox: [0.59375, 0.25, 0.9375, 0.725].into(),
4884            score: 0.99,
4885            label: 1,
4886        };
4887
4888        let seg = Segmentation {
4889            segmentation,
4890            xmin: 0.59375,
4891            ymin: 0.25,
4892            xmax: 0.9375,
4893            ymax: 0.725,
4894        };
4895
4896        let mut renderer = GLProcessorThreaded::new(None).unwrap();
4897        renderer
4898            .set_class_colors(&[[255, 255, 0, 233], [128, 128, 255, 100]])
4899            .unwrap();
4900        renderer
4901            .render_to_image(&mut image, &[detect], &[seg])
4902            .unwrap();
4903
4904        let expected = TensorImage::load(
4905            include_bytes!("../../../testdata/output_render_gl.jpg"),
4906            Some(RGBA),
4907            None,
4908        )
4909        .unwrap();
4910
4911        compare_images(&image, &expected, 0.99, function!());
4912    }
4913
4914    #[test]
4915    #[cfg(feature = "decoder")]
4916    fn test_boxes() {
4917        use edgefirst_decoder::DetectBox;
4918
4919        if !is_opengl_available() {
4920            eprintln!("SKIPPED: {} - OpenGL not available", function!());
4921            return;
4922        }
4923
4924        let mut image = TensorImage::load(
4925            include_bytes!("../../../testdata/giraffe.jpg"),
4926            Some(RGBA),
4927            None,
4928        )
4929        .unwrap();
4930
4931        let detect = DetectBox {
4932            bbox: [0.59375, 0.25, 0.9375, 0.725].into(),
4933            score: 0.99,
4934            label: 0,
4935        };
4936        let mut renderer = GLProcessorThreaded::new(None).unwrap();
4937        renderer
4938            .set_class_colors(&[[255, 255, 0, 233], [128, 128, 255, 100]])
4939            .unwrap();
4940        renderer
4941            .render_to_image(&mut image, &[detect], &[])
4942            .unwrap();
4943    }
4944
4945    static GL_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
4946    // Helper function to check if OpenGL is available
4947    fn is_opengl_available() -> bool {
4948        #[cfg(all(target_os = "linux", feature = "opengl"))]
4949        {
4950            *GL_AVAILABLE.get_or_init(|| GLProcessorThreaded::new(None).is_ok())
4951        }
4952
4953        #[cfg(not(all(target_os = "linux", feature = "opengl")))]
4954        {
4955            false
4956        }
4957    }
4958
4959    fn compare_images(img1: &TensorImage, img2: &TensorImage, threshold: f64, name: &str) {
4960        assert_eq!(img1.height(), img2.height(), "Heights differ");
4961        assert_eq!(img1.width(), img2.width(), "Widths differ");
4962        assert_eq!(img1.fourcc(), img2.fourcc(), "FourCC differ");
4963        assert!(
4964            matches!(img1.fourcc(), RGB | RGBA | GREY | PLANAR_RGB),
4965            "FourCC must be RGB or RGBA for comparison"
4966        );
4967
4968        let image1 = match img1.fourcc() {
4969            RGB => image::RgbImage::from_vec(
4970                img1.width() as u32,
4971                img1.height() as u32,
4972                img1.tensor().map().unwrap().to_vec(),
4973            )
4974            .unwrap(),
4975            RGBA => image::RgbaImage::from_vec(
4976                img1.width() as u32,
4977                img1.height() as u32,
4978                img1.tensor().map().unwrap().to_vec(),
4979            )
4980            .unwrap()
4981            .convert(),
4982            GREY => image::GrayImage::from_vec(
4983                img1.width() as u32,
4984                img1.height() as u32,
4985                img1.tensor().map().unwrap().to_vec(),
4986            )
4987            .unwrap()
4988            .convert(),
4989            PLANAR_RGB => image::GrayImage::from_vec(
4990                img1.width() as u32,
4991                (img1.height() * 3) as u32,
4992                img1.tensor().map().unwrap().to_vec(),
4993            )
4994            .unwrap()
4995            .convert(),
4996            _ => return,
4997        };
4998
4999        let image2 = match img2.fourcc() {
5000            RGB => image::RgbImage::from_vec(
5001                img2.width() as u32,
5002                img2.height() as u32,
5003                img2.tensor().map().unwrap().to_vec(),
5004            )
5005            .unwrap(),
5006            RGBA => image::RgbaImage::from_vec(
5007                img2.width() as u32,
5008                img2.height() as u32,
5009                img2.tensor().map().unwrap().to_vec(),
5010            )
5011            .unwrap()
5012            .convert(),
5013            GREY => image::GrayImage::from_vec(
5014                img2.width() as u32,
5015                img2.height() as u32,
5016                img2.tensor().map().unwrap().to_vec(),
5017            )
5018            .unwrap()
5019            .convert(),
5020            PLANAR_RGB => image::GrayImage::from_vec(
5021                img2.width() as u32,
5022                (img2.height() * 3) as u32,
5023                img2.tensor().map().unwrap().to_vec(),
5024            )
5025            .unwrap()
5026            .convert(),
5027            _ => return,
5028        };
5029
5030        let similarity = image_compare::rgb_similarity_structure(
5031            &image_compare::Algorithm::RootMeanSquared,
5032            &image1,
5033            &image2,
5034        )
5035        .expect("Image Comparison failed");
5036        if similarity.score < threshold {
5037            // image1.save(format!("{name}_1.png"));
5038            // image2.save(format!("{name}_2.png"));
5039            similarity
5040                .image
5041                .to_color_map()
5042                .save(format!("{name}.png"))
5043                .unwrap();
5044            panic!(
5045                "{name}: converted image and target image have similarity score too low: {} < {}",
5046                similarity.score, threshold
5047            )
5048        }
5049    }
5050
5051    // =========================================================================
5052    // NV12 Reference Validation Tests
5053    // These tests compare OpenGL NV12 conversions against ffmpeg-generated
5054    // references
5055    // =========================================================================
5056
5057    #[cfg(feature = "dma_test_formats")]
5058    fn load_raw_image(
5059        width: usize,
5060        height: usize,
5061        fourcc: FourCharCode,
5062        memory: Option<TensorMemory>,
5063        bytes: &[u8],
5064    ) -> Result<TensorImage, crate::Error> {
5065        let img = TensorImage::new(width, height, fourcc, memory)?;
5066        let mut map = img.tensor().map()?;
5067        map.as_mut_slice()[..bytes.len()].copy_from_slice(bytes);
5068        Ok(img)
5069    }
5070
5071    /// Test OpenGL NV12→RGBA conversion against ffmpeg reference
5072    #[test]
5073    #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
5074    fn test_opengl_nv12_to_rgba_reference() {
5075        if !is_dma_available() {
5076            return;
5077        }
5078        // Load NV12 source with DMA
5079        let src = load_raw_image(
5080            1280,
5081            720,
5082            NV12,
5083            Some(TensorMemory::Dma),
5084            include_bytes!("../../../testdata/camera720p.nv12"),
5085        )
5086        .unwrap();
5087
5088        // Load RGBA reference (ffmpeg-generated)
5089        let reference = load_raw_image(
5090            1280,
5091            720,
5092            RGBA,
5093            None,
5094            include_bytes!("../../../testdata/camera720p.rgba"),
5095        )
5096        .unwrap();
5097
5098        // Convert using OpenGL
5099        let mut dst = TensorImage::new(1280, 720, RGBA, Some(TensorMemory::Dma)).unwrap();
5100        let mut gl = GLProcessorThreaded::new(None).unwrap();
5101        gl.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::no_crop())
5102            .unwrap();
5103
5104        // Copy to CPU for comparison
5105        let cpu_dst = TensorImage::new(1280, 720, RGBA, None).unwrap();
5106        cpu_dst
5107            .tensor()
5108            .map()
5109            .unwrap()
5110            .as_mut_slice()
5111            .copy_from_slice(dst.tensor().map().unwrap().as_slice());
5112
5113        compare_images(&reference, &cpu_dst, 0.98, "opengl_nv12_to_rgba_reference");
5114    }
5115
5116    /// Test OpenGL YUYV→RGBA conversion against ffmpeg reference
5117    #[test]
5118    #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
5119    fn test_opengl_yuyv_to_rgba_reference() {
5120        if !is_dma_available() {
5121            return;
5122        }
5123        // Load YUYV source with DMA
5124        let src = load_raw_image(
5125            1280,
5126            720,
5127            YUYV,
5128            Some(TensorMemory::Dma),
5129            include_bytes!("../../../testdata/camera720p.yuyv"),
5130        )
5131        .unwrap();
5132
5133        // Load RGBA reference (ffmpeg-generated)
5134        let reference = load_raw_image(
5135            1280,
5136            720,
5137            RGBA,
5138            None,
5139            include_bytes!("../../../testdata/camera720p.rgba"),
5140        )
5141        .unwrap();
5142
5143        // Convert using OpenGL
5144        let mut dst = TensorImage::new(1280, 720, RGBA, Some(TensorMemory::Dma)).unwrap();
5145        let mut gl = GLProcessorThreaded::new(None).unwrap();
5146        gl.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::no_crop())
5147            .unwrap();
5148
5149        // Copy to CPU for comparison
5150        let cpu_dst = TensorImage::new(1280, 720, RGBA, None).unwrap();
5151        cpu_dst
5152            .tensor()
5153            .map()
5154            .unwrap()
5155            .as_mut_slice()
5156            .copy_from_slice(dst.tensor().map().unwrap().as_slice());
5157
5158        compare_images(&reference, &cpu_dst, 0.98, "opengl_yuyv_to_rgba_reference");
5159    }
5160
5161    // =========================================================================
5162    // EGL Display Probe & Override Tests
5163    // =========================================================================
5164
5165    /// Validate that probe_egl_displays() discovers available display types
5166    /// and returns them in priority order (GBM first).
5167    ///
5168    /// On headless i.MX hardware, GBM and PlatformDevice are typically
5169    /// available. Default requires a running compositor (Wayland/X11) and
5170    /// may not be present on headless targets.
5171    #[test]
5172    fn test_probe_egl_displays() {
5173        let displays = match probe_egl_displays() {
5174            Ok(d) => d,
5175            Err(e) => {
5176                eprintln!("SKIPPED: {} - EGL not available: {e:?}", function!());
5177                return;
5178            }
5179        };
5180
5181        if displays.is_empty() {
5182            eprintln!("SKIPPED: {} - No EGL displays available", function!());
5183            return;
5184        }
5185
5186        let kinds: Vec<_> = displays.iter().map(|d| d.kind).collect();
5187        eprintln!("Probed EGL displays: {kinds:?}");
5188        for d in &displays {
5189            eprintln!("  {:?}: {}", d.kind, d.description);
5190        }
5191
5192        // GBM requires /dev/dri/renderD128 — skip hardware-specific
5193        // assertions when it is not present (CI runners, non-GPU hosts).
5194        if !kinds.contains(&EglDisplayKind::Gbm) {
5195            eprintln!(
5196                "SKIPPED: {} - GBM not available (no /dev/dri/renderD128), got: {kinds:?}",
5197                function!()
5198            );
5199            return;
5200        }
5201
5202        // On i.MX hardware at least two display types should be available
5203        assert!(
5204            displays.len() >= 2,
5205            "Expected at least 2 display types, got {}: {kinds:?}",
5206            displays.len()
5207        );
5208
5209        // Verify ordering: GBM should come first (priority order)
5210        assert_eq!(
5211            displays[0].kind,
5212            EglDisplayKind::Gbm,
5213            "First display should be GBM (priority order)"
5214        );
5215
5216        // Log which optional types are available
5217        if !kinds.contains(&EglDisplayKind::PlatformDevice) {
5218            eprintln!("Note: PlatformDevice not available on this system");
5219        }
5220        if !kinds.contains(&EglDisplayKind::Default) {
5221            eprintln!("Note: Default display not available (no compositor running)");
5222        }
5223    }
5224
5225    /// Validate that explicitly selecting each available display kind via
5226    /// GLProcessorThreaded::new(Some(kind)) succeeds and produces a working
5227    /// converter.
5228    #[test]
5229    fn test_override_each_display_kind() {
5230        let displays = match probe_egl_displays() {
5231            Ok(d) => d,
5232            Err(e) => {
5233                eprintln!("SKIPPED: {} - EGL not available: {e:?}", function!());
5234                return;
5235            }
5236        };
5237
5238        if displays.is_empty() {
5239            eprintln!("SKIPPED: {} - No EGL displays available", function!());
5240            return;
5241        }
5242
5243        for display in &displays {
5244            eprintln!(
5245                "Testing override: {:?} ({})",
5246                display.kind, display.description
5247            );
5248            let mut gl = GLProcessorThreaded::new(Some(display.kind)).unwrap_or_else(|e| {
5249                panic!(
5250                    "GLProcessorThreaded::new(Some({:?})) failed: {e:?}",
5251                    display.kind
5252                )
5253            });
5254
5255            // Smoke test: do a simple RGBA → RGBA conversion to verify the
5256            // GL context is fully functional.
5257            let src = TensorImage::load(
5258                include_bytes!("../../../testdata/zidane.jpg"),
5259                Some(RGBA),
5260                None,
5261            )
5262            .unwrap();
5263            let mut dst = TensorImage::new(320, 240, RGBA, None).unwrap();
5264            gl.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::no_crop())
5265                .unwrap_or_else(|e| {
5266                    panic!("convert() with {:?} display failed: {e:?}", display.kind)
5267                });
5268            eprintln!("  {:?} display: convert OK", display.kind);
5269        }
5270    }
5271
5272    /// Validate that requesting a display kind that doesn't exist on the
5273    /// system returns an error rather than falling back silently.
5274    #[test]
5275    fn test_override_unavailable_display_errors() {
5276        let displays = match probe_egl_displays() {
5277            Ok(d) => d,
5278            Err(e) => {
5279                eprintln!("SKIPPED: {} - EGL not available: {e:?}", function!());
5280                return;
5281            }
5282        };
5283        let available_kinds: Vec<_> = displays.iter().map(|d| d.kind).collect();
5284
5285        // Find a kind that is NOT available; if all three are available,
5286        // this test has nothing to verify — skip it.
5287        let unavailable = [
5288            EglDisplayKind::PlatformDevice,
5289            EglDisplayKind::Gbm,
5290            EglDisplayKind::Default,
5291        ]
5292        .into_iter()
5293        .find(|k| !available_kinds.contains(k));
5294
5295        if let Some(kind) = unavailable {
5296            eprintln!("Testing override with unavailable kind: {kind:?}");
5297            let result = GLProcessorThreaded::new(Some(kind));
5298            assert!(
5299                result.is_err(),
5300                "Expected error for unavailable display kind {kind:?}, got Ok"
5301            );
5302            eprintln!("  Correctly returned error: {:?}", result.unwrap_err());
5303        } else {
5304            eprintln!(
5305                "SKIPPED: {} - All three display kinds are available",
5306                function!()
5307            );
5308        }
5309    }
5310
5311    /// Validate that auto-detection (None) still works — this is the existing
5312    /// default behaviour and must not regress.
5313    #[test]
5314    fn test_auto_detect_display() {
5315        if !is_opengl_available() {
5316            eprintln!("SKIPPED: {} - OpenGL not available", function!());
5317            return;
5318        }
5319
5320        let mut gl = GLProcessorThreaded::new(None).expect("auto-detect should succeed");
5321        let src = TensorImage::load(
5322            include_bytes!("../../../testdata/zidane.jpg"),
5323            Some(RGBA),
5324            None,
5325        )
5326        .unwrap();
5327        let mut dst = TensorImage::new(320, 240, RGBA, None).unwrap();
5328        gl.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::no_crop())
5329            .expect("auto-detect convert should succeed");
5330    }
5331}