1#![cfg(target_os = "linux")]
5#![cfg(feature = "opengl")]
6
7use edgefirst_decoder::{DetectBox, ProtoData, ProtoTensor, Segmentation};
8use edgefirst_tensor::{TensorMemory, TensorTrait};
9use four_char_code::FourCharCode;
10use gbm::{
11 drm::{buffer::DrmFourcc, control::Device as DrmControlDevice, Device as DrmDevice},
12 AsRaw, Device,
13};
14use khronos_egl::{self as egl, Attrib, Display, Dynamic, Instance, EGL1_4};
15use log::{debug, error};
16use std::{
17 collections::BTreeSet,
18 ffi::{c_char, c_void, CStr, CString},
19 mem::ManuallyDrop,
20 os::fd::AsRawFd,
21 ptr::{null, null_mut, NonNull},
22 rc::Rc,
23 str::FromStr,
24 sync::OnceLock,
25 thread::JoinHandle,
26 time::Instant,
27};
28use tokio::sync::mpsc::{Sender, WeakSender};
29
30macro_rules! function {
31 () => {{
32 fn f() {}
33 fn type_name_of<T>(_: T) -> &'static str {
34 std::any::type_name::<T>()
35 }
36 let name = type_name_of(f);
37
38 match &name[..name.len() - 3].rfind(':') {
40 Some(pos) => &name[pos + 1..name.len() - 3],
41 None => &name[..name.len() - 3],
42 }
43 }};
44}
45
46use crate::{
47 fourcc_is_int8, fourcc_is_packed_rgb, CPUProcessor, Crop, Error, Flip, ImageProcessorTrait,
48 MaskRegion, Rect, Rotation, TensorImage, TensorImageRef, BGRA, DEFAULT_COLORS, GREY, NV12,
49 PLANAR_RGB, PLANAR_RGBA, PLANAR_RGB_INT8, RGB, RGBA, RGB_INT8, VYUY, YUYV,
50};
51
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
84pub enum EglDisplayKind {
85 Gbm,
86 PlatformDevice,
87 Default,
88}
89
90impl std::fmt::Display for EglDisplayKind {
91 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
92 match self {
93 EglDisplayKind::Gbm => write!(f, "GBM"),
94 EglDisplayKind::PlatformDevice => write!(f, "PlatformDevice"),
95 EglDisplayKind::Default => write!(f, "Default"),
96 }
97 }
98}
99
100#[derive(Debug, Clone)]
102pub struct EglDisplayInfo {
103 pub kind: EglDisplayKind,
105 pub description: String,
108}
109
110static EGL_LIB: OnceLock<&'static libloading::Library> = OnceLock::new();
114
115fn get_egl_lib() -> Result<&'static libloading::Library, crate::Error> {
116 if let Some(egl) = EGL_LIB.get() {
117 Ok(egl)
118 } else {
119 let egl = unsafe { libloading::Library::new("libEGL.so.1")? };
120 let egl: &'static libloading::Library = Box::leak(Box::new(egl));
122 Ok(EGL_LIB.get_or_init(|| egl))
123 }
124}
125
126type Egl = Instance<Dynamic<&'static libloading::Library, EGL1_4>>;
127
128fn probe_display_extensions(egl: &Egl, display: egl::Display) -> bool {
134 let Ok(ext_str) = egl.query_string(Some(display), egl::EXTENSIONS) else {
135 return false;
136 };
137 let exts = ext_str.to_string_lossy();
138
139 let required = ["EGL_KHR_surfaceless_context", "EGL_KHR_no_config_context"];
140
141 for r in &required {
142 if !exts.contains(r) {
143 log::debug!("Display missing required extension: {r}");
144 return false;
145 }
146 }
147
148 egl.bind_api(egl::OPENGL_ES_API).is_ok()
149}
150
151pub fn probe_egl_displays() -> Result<Vec<EglDisplayInfo>, Error> {
166 let egl: Egl = unsafe { Instance::<Dynamic<_, EGL1_4>>::load_required_from(get_egl_lib()?)? };
167
168 let mut results = Vec::new();
169
170 if let Ok(display_type) = GlContext::egl_get_platform_display_from_device(&egl) {
172 let display = display_type.as_display();
173 if egl.initialize(display).is_ok() {
174 if probe_display_extensions(&egl, display) {
175 results.push(EglDisplayInfo {
176 kind: EglDisplayKind::PlatformDevice,
177 description: "EGL platform device via EGL_EXT_device_enumeration".to_string(),
178 });
179 }
180 let _ = egl.terminate(display);
181 }
182 }
183
184 if let Ok(display_type) = GlContext::egl_get_gbm_display(&egl) {
186 let display = display_type.as_display();
187 if egl.initialize(display).is_ok() {
188 if probe_display_extensions(&egl, display) {
189 results.push(EglDisplayInfo {
190 kind: EglDisplayKind::Gbm,
191 description: "GBM via /dev/dri/renderD128".to_string(),
192 });
193 }
194 let _ = egl.terminate(display);
195 }
196 }
197
198 if let Ok(display_type) = GlContext::egl_get_default_display(&egl) {
200 let display = display_type.as_display();
201 if egl.initialize(display).is_ok() {
202 if probe_display_extensions(&egl, display) {
203 results.push(EglDisplayInfo {
204 kind: EglDisplayKind::Default,
205 description: "EGL default display".to_string(),
206 });
207 }
208 let _ = egl.terminate(display);
209 }
210 }
211
212 Ok(results)
213}
214
215#[derive(Debug, Clone, Copy, PartialEq, Eq)]
218pub(crate) enum TransferBackend {
219 DmaBuf,
224
225 Pbo,
228
229 Sync,
234}
235
236impl TransferBackend {
237 pub(crate) fn is_dma(self) -> bool {
239 self == TransferBackend::DmaBuf
240 }
241
242 #[allow(dead_code)]
244 pub(crate) fn is_pbo(self) -> bool {
245 self == TransferBackend::Pbo
246 }
247}
248
249pub(crate) struct GlContext {
250 pub(crate) transfer_backend: TransferBackend,
251 pub(crate) display: EglDisplayType,
252 pub(crate) ctx: egl::Context,
253 pub(crate) egl: ManuallyDrop<Rc<Egl>>,
258}
259
260pub(crate) enum EglDisplayType {
261 Default(egl::Display),
262 Gbm(egl::Display, #[allow(dead_code)] Device<Card>),
263 PlatformDisplay(egl::Display),
264}
265
266impl EglDisplayType {
267 fn as_display(&self) -> egl::Display {
268 match self {
269 EglDisplayType::Default(disp) => *disp,
270 EglDisplayType::Gbm(disp, _) => *disp,
271 EglDisplayType::PlatformDisplay(disp) => *disp,
272 }
273 }
274}
275
276impl GlContext {
277 pub(crate) fn new(kind: Option<EglDisplayKind>) -> Result<GlContext, crate::Error> {
278 let egl: Rc<Egl> =
280 Rc::new(unsafe { Instance::<Dynamic<_, EGL1_4>>::load_required_from(get_egl_lib()?)? });
281
282 if let Some(kind) = kind {
283 let display_fn = match kind {
285 EglDisplayKind::Gbm => Self::egl_get_gbm_display as fn(&Egl) -> _,
286 EglDisplayKind::PlatformDevice => Self::egl_get_platform_display_from_device,
287 EglDisplayKind::Default => Self::egl_get_default_display,
288 };
289 return Self::try_initialize_egl(egl, display_fn).map_err(|e| {
290 log::debug!("Failed to initialize EGL with {kind} display: {e:?}");
291 e
292 });
293 }
294
295 if let Ok(headless) =
297 Self::try_initialize_egl(egl.clone(), Self::egl_get_platform_display_from_device)
298 {
299 return Ok(headless);
300 } else {
301 log::debug!("Didn't initialize EGL with platform display from device enumeration");
302 }
303
304 if let Ok(headless) = Self::try_initialize_egl(egl.clone(), Self::egl_get_gbm_display) {
306 return Ok(headless);
307 } else {
308 log::debug!("Didn't initialize EGL with GBM Display");
309 }
310
311 if let Ok(headless) = Self::try_initialize_egl(egl.clone(), Self::egl_get_default_display) {
313 return Ok(headless);
314 } else {
315 log::debug!("Didn't initialize EGL with Default Display");
316 }
317
318 Err(Error::OpenGl(
319 "Could not initialize EGL with any known method".to_string(),
320 ))
321 }
322
323 fn try_initialize_egl(
324 egl: Rc<Egl>,
325 display_fn: impl Fn(&Egl) -> Result<EglDisplayType, crate::Error>,
326 ) -> Result<GlContext, crate::Error> {
327 let display = display_fn(&egl)?;
328 log::debug!("egl initialize with display: {:x?}", display.as_display());
329 egl.initialize(display.as_display())?;
330
331 let ext_str = egl.query_string(Some(display.as_display()), egl::EXTENSIONS)?;
333 let exts = ext_str.to_string_lossy();
334
335 if !exts.contains("EGL_KHR_surfaceless_context") {
336 return Err(crate::Error::GLVersion(
337 "EGL display does not support EGL_KHR_surfaceless_context".to_string(),
338 ));
339 }
340
341 if !exts.contains("EGL_KHR_no_config_context") {
342 return Err(crate::Error::GLVersion(
343 "EGL display does not support EGL_KHR_no_config_context".to_string(),
344 ));
345 }
346
347 egl.bind_api(egl::OPENGL_ES_API)?;
348
349 let context_attributes = [egl::CONTEXT_MAJOR_VERSION, 3, egl::NONE, egl::NONE];
353 let ctx = egl.create_context(
354 display.as_display(),
355 egl_ext::NO_CONFIG_KHR,
356 None,
357 &context_attributes,
358 )?;
359 debug!("ctx: {ctx:?}");
360
361 egl.make_current(display.as_display(), None, None, Some(ctx))?;
364
365 let has_dma_extensions = Self::egl_check_support_dma(&egl).is_ok();
366 let transfer_backend = if has_dma_extensions {
367 TransferBackend::DmaBuf
368 } else {
369 TransferBackend::Sync
370 };
371 Ok(GlContext {
372 display,
373 ctx,
374 egl: ManuallyDrop::new(egl),
375 transfer_backend,
376 })
377 }
378
379 fn egl_get_default_display(egl: &Egl) -> Result<EglDisplayType, crate::Error> {
380 if let Some(display) = unsafe { egl.get_display(egl::DEFAULT_DISPLAY) } {
382 debug!("default display: {display:?}");
383 return Ok(EglDisplayType::Default(display));
384 }
385
386 Err(Error::OpenGl(
387 "Could not obtain EGL Default Display".to_string(),
388 ))
389 }
390
391 fn egl_get_gbm_display(egl: &Egl) -> Result<EglDisplayType, crate::Error> {
392 let gbm = Device::new(Card::open_global()?)?;
394
395 debug!("gbm: {gbm:?}");
396 let display = Self::egl_get_platform_display_with_fallback(
397 egl,
398 egl_ext::PLATFORM_GBM_KHR,
399 gbm.as_raw() as *mut c_void,
400 &[egl::ATTRIB_NONE],
401 )?;
402
403 Ok(EglDisplayType::Gbm(display, gbm))
404 }
405
406 fn egl_get_platform_display_from_device(egl: &Egl) -> Result<EglDisplayType, crate::Error> {
407 let extensions = egl.query_string(None, egl::EXTENSIONS)?;
408 let extensions = extensions.to_string_lossy();
409 log::debug!("EGL Extensions: {}", extensions);
410
411 if !extensions.contains("EGL_EXT_device_enumeration") {
412 return Err(Error::GLVersion(
413 "EGL doesn't supported EGL_EXT_device_enumeration extension".to_string(),
414 ));
415 }
416
417 type EGLDeviceEXT = *mut c_void;
418 let devices = if let Some(ext) = egl.get_proc_address("eglQueryDevicesEXT") {
419 let func: unsafe extern "system" fn(
420 max_devices: egl::Int,
421 devices: *mut EGLDeviceEXT,
422 num_devices: *mut egl::Int,
423 ) -> *const c_char = unsafe { std::mem::transmute(ext) };
424 let mut devices = [std::ptr::null_mut(); 10];
425 let mut num_devices = 0;
426 unsafe { func(devices.len() as i32, devices.as_mut_ptr(), &mut num_devices) };
427 for i in 0..num_devices {
428 log::debug!("EGL device: {:?}", devices[i as usize]);
429 }
430 devices[0..num_devices as usize].to_vec()
431 } else {
432 return Err(Error::GLVersion(
433 "EGL doesn't supported eglQueryDevicesEXT function".to_string(),
434 ));
435 };
436
437 if !extensions.contains("EGL_EXT_platform_device") {
438 return Err(Error::GLVersion(
439 "EGL doesn't supported EGL_EXT_platform_device extension".to_string(),
440 ));
441 }
442
443 if devices.is_empty() {
444 return Err(Error::GLVersion(
445 "EGL_EXT_device_enumeration returned 0 devices".to_string(),
446 ));
447 }
448 let disp = Self::egl_get_platform_display_with_fallback(
449 egl,
450 egl_ext::PLATFORM_DEVICE_EXT,
451 devices[0],
452 &[egl::ATTRIB_NONE],
453 )?;
454 Ok(EglDisplayType::PlatformDisplay(disp))
455 }
456
457 fn egl_check_support_dma(egl: &Egl) -> Result<(), crate::Error> {
458 let extensions = egl.query_string(None, egl::EXTENSIONS)?;
459 let extensions = extensions.to_string_lossy();
460 log::debug!("EGL Extensions: {}", extensions);
461
462 if egl.upcast::<egl::EGL1_5>().is_some() {
463 return Ok(());
464 }
465
466 if !extensions.contains("EGL_EXT_image_dma_buf_import") {
467 return Err(crate::Error::GLVersion(
468 "EGL does not support EGL_EXT_image_dma_buf_import extension".to_string(),
469 ));
470 }
471
472 if egl.get_proc_address("eglCreateImageKHR").is_none() {
473 return Err(crate::Error::GLVersion(
474 "EGL does not support eglCreateImageKHR function".to_string(),
475 ));
476 }
477
478 if egl.get_proc_address("eglDestroyImageKHR").is_none() {
479 return Err(crate::Error::GLVersion(
480 "EGL does not support eglDestroyImageKHR function".to_string(),
481 ));
482 }
483 Ok(())
484 }
485
486 fn egl_get_platform_display_with_fallback(
487 egl: &Egl,
488 platform: egl::Enum,
489 native_display: *mut c_void,
490 attrib_list: &[Attrib],
491 ) -> Result<Display, Error> {
492 if let Some(egl) = egl.upcast::<egl::EGL1_5>() {
493 unsafe { egl.get_platform_display(platform, native_display, attrib_list) }
494 .map_err(|e| e.into())
495 } else if let Some(ext) = egl.get_proc_address("eglGetPlatformDisplayEXT") {
496 let func: unsafe extern "system" fn(
497 platform: egl::Enum,
498 native_display: *mut c_void,
499 attrib_list: *const Attrib,
500 ) -> egl::EGLDisplay = unsafe { std::mem::transmute(ext) };
501 let disp = unsafe { func(platform, native_display, attrib_list.as_ptr()) };
502 if disp != egl::NO_DISPLAY {
503 Ok(unsafe { Display::from_ptr(disp) })
504 } else {
505 Err(egl.get_error().map(|e| e.into()).unwrap_or(Error::Internal(
506 "EGL failed but no error was reported".to_owned(),
507 )))
508 }
509 } else {
510 Err(Error::EGLLoad(egl::LoadError::InvalidVersion {
511 provided: egl.version(),
512 required: khronos_egl::Version::EGL1_5,
513 }))
514 }
515 }
516
517 fn egl_create_image_with_fallback(
518 egl: &Egl,
519 display: Display,
520 ctx: egl::Context,
521 target: egl::Enum,
522 buffer: egl::ClientBuffer,
523 attrib_list: &[Attrib],
524 ) -> Result<egl::Image, Error> {
525 if let Some(egl) = egl.upcast::<egl::EGL1_5>() {
526 egl.create_image(display, ctx, target, buffer, attrib_list)
527 .map_err(|e| e.into())
528 } else if let Some(ext) = egl.get_proc_address("eglCreateImageKHR") {
529 log::trace!("eglCreateImageKHR addr: {:?}", ext);
530 let func: unsafe extern "system" fn(
531 display: egl::EGLDisplay,
532 ctx: egl::EGLContext,
533 target: egl::Enum,
534 buffer: egl::EGLClientBuffer,
535 attrib_list: *const egl::Int,
536 ) -> egl::EGLImage = unsafe { std::mem::transmute(ext) };
537 let new_attrib_list = attrib_list
538 .iter()
539 .map(|x| *x as egl::Int)
540 .collect::<Vec<_>>();
541
542 let image = unsafe {
543 func(
544 display.as_ptr(),
545 ctx.as_ptr(),
546 target,
547 buffer.as_ptr(),
548 new_attrib_list.as_ptr(),
549 )
550 };
551 if image != egl::NO_IMAGE {
552 Ok(unsafe { egl::Image::from_ptr(image) })
553 } else {
554 Err(egl.get_error().map(|e| e.into()).unwrap_or(Error::Internal(
555 "EGL failed but no error was reported".to_owned(),
556 )))
557 }
558 } else {
559 Err(Error::EGLLoad(egl::LoadError::InvalidVersion {
560 provided: egl.version(),
561 required: khronos_egl::Version::EGL1_5,
562 }))
563 }
564 }
565
566 fn egl_destroy_image_with_fallback(
567 egl: &Egl,
568 display: Display,
569 image: egl::Image,
570 ) -> Result<(), Error> {
571 if let Some(egl) = egl.upcast::<egl::EGL1_5>() {
572 egl.destroy_image(display, image).map_err(|e| e.into())
573 } else if let Some(ext) = egl.get_proc_address("eglDestroyImageKHR") {
574 let func: unsafe extern "system" fn(
575 display: egl::EGLDisplay,
576 image: egl::EGLImage,
577 ) -> egl::Boolean = unsafe { std::mem::transmute(ext) };
578 let res = unsafe { func(display.as_ptr(), image.as_ptr()) };
579 if res == egl::TRUE {
580 Ok(())
581 } else {
582 Err(egl.get_error().map(|e| e.into()).unwrap_or(Error::Internal(
583 "EGL failed but no error was reported".to_owned(),
584 )))
585 }
586 } else {
587 Err(Error::EGLLoad(egl::LoadError::InvalidVersion {
588 provided: egl.version(),
589 required: khronos_egl::Version::EGL1_5,
590 }))
591 }
592 }
593}
594
595impl Drop for GlContext {
596 fn drop(&mut self) {
597 let prev_hook = std::panic::take_hook();
602 std::panic::set_hook(Box::new(|_| {}));
603 let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
604 let _ = self
605 .egl
606 .make_current(self.display.as_display(), None, None, None);
607
608 let _ = self
609 .egl
610 .destroy_context(self.display.as_display(), self.ctx);
611
612 let _ = self.egl.terminate(self.display.as_display());
617 }));
618 std::panic::set_hook(prev_hook);
619
620 }
625}
626
627#[derive(Debug)]
628pub(crate) struct Card(std::fs::File);
630
631impl std::os::unix::io::AsFd for Card {
634 fn as_fd(&self) -> std::os::unix::io::BorrowedFd<'_> {
635 self.0.as_fd()
636 }
637}
638
639impl DrmDevice for Card {}
641impl DrmControlDevice for Card {}
642
643impl Card {
645 pub fn open(path: &str) -> Result<Self, crate::Error> {
646 let mut options = std::fs::OpenOptions::new();
647 options.read(true);
648 options.write(true);
649 let c = options.open(path);
650 match c {
651 Ok(c) => Ok(Card(c)),
652 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
653 Err(Error::NotFound(format!("File not found: {path}")))
654 }
655 Err(e) => Err(e.into()),
656 }
657 }
658
659 pub fn open_global() -> Result<Self, crate::Error> {
660 let targets = ["/dev/dri/renderD128", "/dev/dri/card0", "/dev/dri/card1"];
661 let e = Self::open(targets[0]);
662 if let Ok(t) = e {
663 return Ok(t);
664 }
665 for t in &targets[1..] {
666 if let Ok(t) = Self::open(t) {
667 return Ok(t);
668 }
669 }
670 e
671 }
672}
673
674#[derive(Debug, Clone, Copy)]
675struct RegionOfInterest {
676 left: f32,
677 top: f32,
678 right: f32,
679 bottom: f32,
680}
681
682#[allow(clippy::type_complexity)]
683enum GLProcessorMessage {
684 ImageConvert(
685 SendablePtr<TensorImage>,
686 SendablePtr<TensorImage>,
687 Rotation,
688 Flip,
689 Crop,
690 tokio::sync::oneshot::Sender<Result<(), Error>>,
691 ),
692 SetColors(
693 Vec<[u8; 4]>,
694 tokio::sync::oneshot::Sender<Result<(), Error>>,
695 ),
696 DrawMasks(
697 SendablePtr<TensorImage>,
698 SendablePtr<DetectBox>,
699 SendablePtr<Segmentation>,
700 tokio::sync::oneshot::Sender<Result<(), Error>>,
701 ),
702 DrawMasksProto(
703 SendablePtr<TensorImage>,
704 SendablePtr<DetectBox>,
705 Box<ProtoData>,
706 tokio::sync::oneshot::Sender<Result<(), Error>>,
707 ),
708 SetInt8Interpolation(
709 Int8InterpolationMode,
710 tokio::sync::oneshot::Sender<Result<(), Error>>,
711 ),
712 DecodeMasksAtlas(
713 SendablePtr<DetectBox>,
714 Box<ProtoData>,
715 usize, usize, tokio::sync::oneshot::Sender<Result<(Vec<u8>, Vec<MaskRegion>), Error>>,
718 ),
719 PboCreate(
720 usize, tokio::sync::oneshot::Sender<Result<u32, Error>>,
722 ),
723 PboMap(
724 u32, usize, tokio::sync::oneshot::Sender<Result<edgefirst_tensor::PboMapping, Error>>,
727 ),
728 PboUnmap(
729 u32, tokio::sync::oneshot::Sender<Result<(), Error>>,
731 ),
732 PboDelete(u32), }
734
735struct GlPboOps {
742 sender: WeakSender<GLProcessorMessage>,
743}
744
745unsafe impl edgefirst_tensor::PboOps for GlPboOps {
750 fn map_buffer(
751 &self,
752 buffer_id: u32,
753 size: usize,
754 ) -> edgefirst_tensor::Result<edgefirst_tensor::PboMapping> {
755 let sender = self
756 .sender
757 .upgrade()
758 .ok_or(edgefirst_tensor::Error::PboDisconnected)?;
759 let (tx, rx) = tokio::sync::oneshot::channel();
760 sender
761 .blocking_send(GLProcessorMessage::PboMap(buffer_id, size, tx))
762 .map_err(|_| edgefirst_tensor::Error::PboDisconnected)?;
763 rx.blocking_recv()
764 .map_err(|_| edgefirst_tensor::Error::PboDisconnected)?
765 .map_err(|e| {
766 edgefirst_tensor::Error::NotImplemented(format!("GL PBO map failed: {e:?}"))
767 })
768 }
769
770 fn unmap_buffer(&self, buffer_id: u32) -> edgefirst_tensor::Result<()> {
771 let sender = self
772 .sender
773 .upgrade()
774 .ok_or(edgefirst_tensor::Error::PboDisconnected)?;
775 let (tx, rx) = tokio::sync::oneshot::channel();
776 sender
777 .blocking_send(GLProcessorMessage::PboUnmap(buffer_id, tx))
778 .map_err(|_| edgefirst_tensor::Error::PboDisconnected)?;
779 rx.blocking_recv()
780 .map_err(|_| edgefirst_tensor::Error::PboDisconnected)?
781 .map_err(|e| {
782 edgefirst_tensor::Error::NotImplemented(format!("GL PBO unmap failed: {e:?}"))
783 })
784 }
785
786 fn delete_buffer(&self, buffer_id: u32) {
787 if let Some(sender) = self.sender.upgrade() {
788 let _ = sender.blocking_send(GLProcessorMessage::PboDelete(buffer_id));
789 }
790 }
791}
792
793#[derive(Debug)]
798pub struct GLProcessorThreaded {
799 handle: Option<JoinHandle<()>>,
801
802 sender: Option<Sender<GLProcessorMessage>>,
804 transfer_backend: TransferBackend,
805 has_bgra: bool,
806}
807
808unsafe impl Send for GLProcessorThreaded {}
809unsafe impl Sync for GLProcessorThreaded {}
810
811struct SendablePtr<T: Send> {
812 ptr: NonNull<T>,
813 len: usize,
814}
815
816unsafe impl<T> Send for SendablePtr<T> where T: Send {}
817
818impl GLProcessorThreaded {
819 pub fn new(kind: Option<EglDisplayKind>) -> Result<Self, Error> {
821 let (send, mut recv) = tokio::sync::mpsc::channel::<GLProcessorMessage>(1);
822
823 let (create_ctx_send, create_ctx_recv) = tokio::sync::oneshot::channel();
824
825 let func = move || {
826 let mut gl_converter = match GLProcessorST::new(kind) {
827 Ok(gl) => gl,
828 Err(e) => {
829 let _ = create_ctx_send.send(Err(e));
830 return;
831 }
832 };
833 let _ = create_ctx_send.send(Ok((
834 gl_converter.gl_context.transfer_backend,
835 gl_converter.has_bgra,
836 )));
837 while let Some(msg) = recv.blocking_recv() {
838 match msg {
839 GLProcessorMessage::ImageConvert(src, mut dst, rotation, flip, crop, resp) => {
840 let src = unsafe { src.ptr.as_ref() };
843 let dst = unsafe { dst.ptr.as_mut() };
844 let res = gl_converter.convert(src, dst, rotation, flip, crop);
845 let _ = resp.send(res);
846 }
847 GLProcessorMessage::DrawMasks(mut dst, det, seg, resp) => {
848 let dst = unsafe { dst.ptr.as_mut() };
852 let det = unsafe { std::slice::from_raw_parts(det.ptr.as_ptr(), det.len) };
853 let seg = unsafe { std::slice::from_raw_parts(seg.ptr.as_ptr(), seg.len) };
854 let res = gl_converter.draw_masks(dst, det, seg);
855 let _ = resp.send(res);
856 }
857 GLProcessorMessage::DrawMasksProto(mut dst, det, proto_data, resp) => {
858 let dst = unsafe { dst.ptr.as_mut() };
861 let det = unsafe { std::slice::from_raw_parts(det.ptr.as_ptr(), det.len) };
862 let res = gl_converter.draw_masks_proto(dst, det, &proto_data);
863 let _ = resp.send(res);
864 }
865 GLProcessorMessage::SetColors(colors, resp) => {
866 let res = gl_converter.set_class_colors(&colors);
867 let _ = resp.send(res);
868 }
869 GLProcessorMessage::SetInt8Interpolation(mode, resp) => {
870 gl_converter.set_int8_interpolation_mode(mode);
871 let _ = resp.send(Ok(()));
872 }
873 GLProcessorMessage::DecodeMasksAtlas(
874 det,
875 proto_data,
876 output_width,
877 output_height,
878 resp,
879 ) => {
880 let det = unsafe { std::slice::from_raw_parts(det.ptr.as_ptr(), det.len) };
881 let res = gl_converter.decode_masks_atlas(
882 det,
883 &proto_data,
884 output_width,
885 output_height,
886 );
887 let _ = resp.send(res);
888 }
889 GLProcessorMessage::PboCreate(size, resp) => {
890 let result = unsafe {
891 let mut id: u32 = 0;
892 gls::gl::GenBuffers(1, &mut id);
893 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, id);
894 gls::gl::BufferData(
895 gls::gl::PIXEL_PACK_BUFFER,
896 size as isize,
897 std::ptr::null(),
898 gls::gl::STREAM_COPY,
899 );
900 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
901 match check_gl_error("PboCreate", 0) {
902 Ok(()) => Ok(id),
903 Err(e) => {
904 gls::gl::DeleteBuffers(1, &id);
905 Err(e)
906 }
907 }
908 };
909 let _ = resp.send(result);
910 }
911 GLProcessorMessage::PboMap(buffer_id, size, resp) => {
912 let result = unsafe {
913 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, buffer_id);
914 let ptr = gls::gl::MapBufferRange(
915 gls::gl::PIXEL_PACK_BUFFER,
916 0,
917 size as isize,
918 gls::gl::MAP_READ_BIT | gls::gl::MAP_WRITE_BIT,
919 );
920 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
921 if ptr.is_null() {
922 Err(crate::Error::OpenGl(
923 "glMapBufferRange returned null".to_string(),
924 ))
925 } else {
926 Ok(edgefirst_tensor::PboMapping {
927 ptr: ptr as *mut u8,
928 size,
929 })
930 }
931 };
932 let _ = resp.send(result);
933 }
934 GLProcessorMessage::PboUnmap(buffer_id, resp) => {
935 let result = unsafe {
936 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, buffer_id);
937 let ok = gls::gl::UnmapBuffer(gls::gl::PIXEL_PACK_BUFFER);
938 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
939 if ok == gls::gl::FALSE {
940 Err(Error::OpenGl(
941 "PBO data was corrupted during mapping".into(),
942 ))
943 } else {
944 check_gl_error("PboUnmap", 0)
945 }
946 };
947 let _ = resp.send(result);
948 }
949 GLProcessorMessage::PboDelete(buffer_id) => unsafe {
950 gls::gl::DeleteBuffers(1, &buffer_id);
951 },
952 }
953 }
954 };
955
956 let handle = std::thread::spawn(func);
958
959 let (transfer_backend, has_bgra) = match create_ctx_recv.blocking_recv() {
960 Ok(Err(e)) => return Err(e),
961 Err(_) => {
962 return Err(Error::Internal(
963 "GL converter error messaging closed without update".to_string(),
964 ));
965 }
966 Ok(Ok(tb)) => tb,
967 };
968
969 Ok(Self {
970 handle: Some(handle),
971 sender: Some(send),
972 transfer_backend,
973 has_bgra,
974 })
975 }
976}
977
978impl ImageProcessorTrait for GLProcessorThreaded {
979 fn convert(
980 &mut self,
981 src: &TensorImage,
982 dst: &mut TensorImage,
983 rotation: crate::Rotation,
984 flip: Flip,
985 crop: Crop,
986 ) -> crate::Result<()> {
987 crop.check_crop(src, dst)?;
988 if !GLProcessorST::check_src_format_supported(self.transfer_backend, src) {
989 return Err(crate::Error::NotSupported(format!(
990 "Opengl doesn't support {} source texture",
991 src.fourcc().display()
992 )));
993 }
994
995 if !GLProcessorST::check_dst_format_supported(self.transfer_backend, dst, self.has_bgra) {
996 return Err(crate::Error::NotSupported(format!(
997 "Opengl doesn't support {} destination texture",
998 dst.fourcc().display()
999 )));
1000 }
1001
1002 let (err_send, err_recv) = tokio::sync::oneshot::channel();
1003 self.sender
1004 .as_ref()
1005 .unwrap()
1006 .blocking_send(GLProcessorMessage::ImageConvert(
1007 SendablePtr {
1008 ptr: src.into(),
1009 len: 1,
1010 },
1011 SendablePtr {
1012 ptr: dst.into(),
1013 len: 1,
1014 },
1015 rotation,
1016 flip,
1017 crop,
1018 err_send,
1019 ))
1020 .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
1021 err_recv.blocking_recv().map_err(|_| {
1022 Error::Internal("GL converter error messaging closed without update".to_string())
1023 })?
1024 }
1025
1026 fn convert_ref(
1027 &mut self,
1028 src: &TensorImage,
1029 dst: &mut TensorImageRef<'_>,
1030 rotation: Rotation,
1031 flip: Flip,
1032 crop: Crop,
1033 ) -> crate::Result<()> {
1034 let mut cpu = CPUProcessor::new();
1036 cpu.convert_ref(src, dst, rotation, flip, crop)
1037 }
1038
1039 fn draw_masks(
1040 &mut self,
1041 dst: &mut TensorImage,
1042 detect: &[crate::DetectBox],
1043 segmentation: &[crate::Segmentation],
1044 ) -> crate::Result<()> {
1045 let (err_send, err_recv) = tokio::sync::oneshot::channel();
1046 self.sender
1047 .as_ref()
1048 .unwrap()
1049 .blocking_send(GLProcessorMessage::DrawMasks(
1050 SendablePtr {
1051 ptr: dst.into(),
1052 len: 1,
1053 },
1054 SendablePtr {
1055 ptr: NonNull::new(detect.as_ptr() as *mut DetectBox).unwrap(),
1056 len: detect.len(),
1057 },
1058 SendablePtr {
1059 ptr: NonNull::new(segmentation.as_ptr() as *mut Segmentation).unwrap(),
1060 len: segmentation.len(),
1061 },
1062 err_send,
1063 ))
1064 .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
1065 err_recv.blocking_recv().map_err(|_| {
1066 Error::Internal("GL converter error messaging closed without update".to_string())
1067 })?
1068 }
1069
1070 fn draw_masks_proto(
1071 &mut self,
1072 dst: &mut TensorImage,
1073 detect: &[DetectBox],
1074 proto_data: &ProtoData,
1075 ) -> crate::Result<()> {
1076 let (err_send, err_recv) = tokio::sync::oneshot::channel();
1077 self.sender
1078 .as_ref()
1079 .unwrap()
1080 .blocking_send(GLProcessorMessage::DrawMasksProto(
1081 SendablePtr {
1082 ptr: NonNull::new(dst as *mut TensorImage).unwrap(),
1083 len: 1,
1084 },
1085 SendablePtr {
1086 ptr: NonNull::new(detect.as_ptr() as *mut DetectBox).unwrap(),
1087 len: detect.len(),
1088 },
1089 Box::new(proto_data.clone()),
1090 err_send,
1091 ))
1092 .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
1093 err_recv.blocking_recv().map_err(|_| {
1094 Error::Internal("GL converter error messaging closed without update".to_string())
1095 })?
1096 }
1097
1098 fn decode_masks_atlas(
1099 &mut self,
1100 detect: &[DetectBox],
1101 proto_data: ProtoData,
1102 output_width: usize,
1103 output_height: usize,
1104 ) -> crate::Result<(Vec<u8>, Vec<MaskRegion>)> {
1105 GLProcessorThreaded::decode_masks_atlas(
1106 self,
1107 detect,
1108 proto_data,
1109 output_width,
1110 output_height,
1111 )
1112 }
1113
1114 fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<(), crate::Error> {
1115 let (err_send, err_recv) = tokio::sync::oneshot::channel();
1116 self.sender
1117 .as_ref()
1118 .unwrap()
1119 .blocking_send(GLProcessorMessage::SetColors(colors.to_vec(), err_send))
1120 .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
1121 err_recv.blocking_recv().map_err(|_| {
1122 Error::Internal("GL converter error messaging closed without update".to_string())
1123 })?
1124 }
1125}
1126
1127impl GLProcessorThreaded {
1128 pub fn set_int8_interpolation_mode(
1130 &mut self,
1131 mode: Int8InterpolationMode,
1132 ) -> Result<(), crate::Error> {
1133 let (err_send, err_recv) = tokio::sync::oneshot::channel();
1134 self.sender
1135 .as_ref()
1136 .unwrap()
1137 .blocking_send(GLProcessorMessage::SetInt8Interpolation(mode, err_send))
1138 .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
1139 err_recv.blocking_recv().map_err(|_| {
1140 Error::Internal("GL converter error messaging closed without update".to_string())
1141 })?
1142 }
1143
1144 pub fn decode_masks_atlas(
1150 &mut self,
1151 detect: &[DetectBox],
1152 proto_data: ProtoData,
1153 output_width: usize,
1154 output_height: usize,
1155 ) -> Result<(Vec<u8>, Vec<MaskRegion>), crate::Error> {
1156 let (resp_send, resp_recv) = tokio::sync::oneshot::channel();
1157 self.sender
1158 .as_ref()
1159 .unwrap()
1160 .blocking_send(GLProcessorMessage::DecodeMasksAtlas(
1161 SendablePtr {
1162 ptr: NonNull::new(detect.as_ptr() as *mut DetectBox).unwrap(),
1163 len: detect.len(),
1164 },
1165 Box::new(proto_data),
1166 output_width,
1167 output_height,
1168 resp_send,
1169 ))
1170 .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
1171 resp_recv.blocking_recv().map_err(|_| {
1172 Error::Internal("GL converter error messaging closed without update".to_string())
1173 })?
1174 }
1175
1176 pub fn create_pbo_image(
1178 &self,
1179 width: usize,
1180 height: usize,
1181 fourcc: four_char_code::FourCharCode,
1182 ) -> Result<crate::TensorImage, Error> {
1183 let sender = self
1184 .sender
1185 .as_ref()
1186 .ok_or(Error::OpenGl("GL processor is shutting down".to_string()))?;
1187
1188 let channels = crate::fourcc_channels(fourcc)?;
1189 let size = width * height * channels;
1190 if size == 0 {
1191 return Err(Error::OpenGl("Invalid image dimensions".to_string()));
1192 }
1193
1194 let (tx, rx) = tokio::sync::oneshot::channel();
1196 sender
1197 .blocking_send(GLProcessorMessage::PboCreate(size, tx))
1198 .map_err(|_| Error::OpenGl("GL thread channel closed".to_string()))?;
1199 let buffer_id = rx
1200 .blocking_recv()
1201 .map_err(|_| Error::OpenGl("GL thread did not respond".to_string()))??;
1202
1203 let ops: std::sync::Arc<dyn edgefirst_tensor::PboOps> = std::sync::Arc::new(GlPboOps {
1204 sender: sender.downgrade(),
1205 });
1206
1207 let shape = if crate::fourcc_planar(fourcc)? {
1208 vec![channels, height, width]
1209 } else {
1210 vec![height, width, channels]
1211 };
1212
1213 let pbo_tensor =
1214 edgefirst_tensor::PboTensor::<u8>::from_pbo(buffer_id, size, &shape, None, ops)
1215 .map_err(|e| Error::OpenGl(format!("PBO tensor creation failed: {e:?}")))?;
1216 let tensor = edgefirst_tensor::Tensor::Pbo(pbo_tensor);
1217 crate::TensorImage::from_tensor(tensor, fourcc)
1218 .map_err(|e| Error::OpenGl(format!("Failed to wrap PBO tensor as image: {e:?}")))
1219 }
1220
1221 #[allow(dead_code)]
1223 pub(crate) fn transfer_backend(&self) -> TransferBackend {
1224 self.transfer_backend
1225 }
1226}
1227
1228impl Drop for GLProcessorThreaded {
1229 fn drop(&mut self) {
1230 drop(self.sender.take());
1231 let _ = self.handle.take().and_then(|h| h.join().ok());
1232 }
1233}
1234
1235#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1237pub enum Int8InterpolationMode {
1238 Nearest,
1240 Bilinear,
1242 TwoPass,
1244}
1245
1246#[derive(Debug)]
1248enum CacheKind {
1249 Src,
1250 Dst,
1251}
1252
1253struct CachedEglImage {
1255 egl_image: EglImage,
1256 guard: std::sync::Weak<()>,
1258 renderbuffer: Option<u32>,
1260 last_used: u64,
1262}
1263
1264struct EglImageCache {
1270 entries: std::collections::HashMap<u64, CachedEglImage>,
1271 capacity: usize,
1272 hits: u64,
1273 misses: u64,
1274 access_counter: u64,
1276}
1277
1278impl EglImageCache {
1279 fn new(capacity: usize) -> Self {
1280 Self {
1281 entries: std::collections::HashMap::with_capacity(capacity),
1282 capacity,
1283 hits: 0,
1284 misses: 0,
1285 access_counter: 0,
1286 }
1287 }
1288
1289 fn next_timestamp(&mut self) -> u64 {
1291 self.access_counter += 1;
1292 self.access_counter
1293 }
1294
1295 fn evict_lru(&mut self) {
1297 if let Some((&evict_id, _)) = self.entries.iter().min_by_key(|(_, entry)| entry.last_used) {
1298 if let Some(evicted) = self.entries.remove(&evict_id) {
1299 if let Some(rbo) = evicted.renderbuffer {
1300 unsafe { gls::gl::DeleteRenderbuffers(1, &rbo) };
1301 }
1302 }
1303 }
1304 }
1305
1306 fn sweep(&mut self) {
1308 let before = self.entries.len();
1309 self.entries.retain(|_id, entry| {
1310 let alive = entry.guard.upgrade().is_some();
1311 if !alive {
1312 if let Some(rbo) = entry.renderbuffer {
1313 unsafe { gls::gl::DeleteRenderbuffers(1, &rbo) };
1314 }
1315 }
1316 alive
1317 });
1318 let swept = before - self.entries.len();
1319 if swept > 0 {
1320 log::debug!("EglImageCache: swept {swept} dead entries");
1321 }
1322 }
1323}
1324
1325impl Drop for EglImageCache {
1326 fn drop(&mut self) {
1327 for entry in self.entries.values() {
1328 if let Some(rbo) = entry.renderbuffer {
1329 unsafe { gls::gl::DeleteRenderbuffers(1, &rbo) };
1330 }
1331 }
1332 log::debug!(
1333 "EglImageCache stats: {} hits, {} misses, {} entries remaining",
1334 self.hits,
1335 self.misses,
1336 self.entries.len()
1337 );
1338 }
1339}
1340
1341pub struct GLProcessorST {
1343 camera_eglimage_texture: Texture,
1344 camera_normal_texture: Texture,
1345 render_texture: Texture,
1346 segmentation_texture: Texture,
1347 segmentation_program: GlProgram,
1348 instanced_segmentation_program: GlProgram,
1349 proto_texture: Texture,
1350 proto_segmentation_program: GlProgram,
1351 proto_segmentation_int8_nearest_program: GlProgram,
1352 proto_segmentation_int8_bilinear_program: GlProgram,
1353 proto_dequant_int8_program: GlProgram,
1354 proto_segmentation_f32_program: GlProgram,
1355 color_program: GlProgram,
1356 has_float_linear: bool,
1358 has_bgra: bool,
1360 int8_interpolation_mode: Int8InterpolationMode,
1362 proto_dequant_texture: Texture,
1364 proto_mask_logit_int8_bilinear_program: GlProgram,
1365 proto_mask_logit_int8_nearest_program: GlProgram,
1366 proto_mask_logit_f32_program: GlProgram,
1367 mask_fbo: u32,
1369 mask_fbo_texture: u32,
1371 mask_fbo_width: usize,
1373 mask_fbo_height: usize,
1375 mask_atlas_pbo: u32,
1377 vertex_buffer: Buffer,
1378 texture_buffer: Buffer,
1379 convert_fbo: FrameBuffer,
1382 src_egl_cache: EglImageCache,
1384 dst_egl_cache: EglImageCache,
1386 packed_rgb_intermediate_tex: Texture,
1389 packed_rgb_fbo: FrameBuffer,
1391 packed_rgb_intermediate_size: (usize, usize),
1393 texture_program: GlProgram,
1394 texture_program_yuv: GlProgram,
1395 texture_program_planar: GlProgram,
1396 texture_program_planar_int8: GlProgram,
1398 packed_rgba8_program_2d: GlProgram,
1400 packed_rgba8_int8_program_2d: GlProgram,
1402 texture_int8_program: GlProgram,
1404 texture_int8_program_yuv: GlProgram,
1406 support_rgb_direct: bool,
1408 gl_context: GlContext,
1409}
1410
1411impl Drop for GLProcessorST {
1412 fn drop(&mut self) {
1413 unsafe {
1414 {
1415 if self.mask_fbo != 0 {
1416 gls::gl::DeleteFramebuffers(1, &self.mask_fbo);
1417 }
1418 if self.mask_fbo_texture != 0 {
1419 gls::gl::DeleteTextures(1, &self.mask_fbo_texture);
1420 }
1421 if self.mask_atlas_pbo != 0 {
1422 gls::gl::DeleteBuffers(1, &self.mask_atlas_pbo);
1423 }
1424 }
1425 }
1426 }
1427}
1428
1429impl ImageProcessorTrait for GLProcessorST {
1430 fn convert(
1431 &mut self,
1432 src: &TensorImage,
1433 dst: &mut TensorImage,
1434 rotation: crate::Rotation,
1435 flip: Flip,
1436 crop: Crop,
1437 ) -> crate::Result<()> {
1438 crop.check_crop(src, dst)?;
1439 if !Self::check_src_format_supported(self.gl_context.transfer_backend, src) {
1440 return Err(crate::Error::NotSupported(format!(
1441 "Opengl doesn't support {} source texture",
1442 src.fourcc().display()
1443 )));
1444 }
1445
1446 if !Self::check_dst_format_supported(self.gl_context.transfer_backend, dst, self.has_bgra) {
1447 return Err(crate::Error::NotSupported(format!(
1448 "Opengl doesn't support {} destination texture",
1449 dst.fourcc().display()
1450 )));
1451 }
1452 log::debug!(
1453 "dst tensor: {:?} src tensor :{:?}",
1454 dst.tensor().memory(),
1455 src.tensor().memory()
1456 );
1457 check_gl_error(function!(), line!())?;
1458 if self.gl_context.transfer_backend.is_dma() && dst.tensor().memory() == TensorMemory::Dma {
1459 let res = self.convert_dest_dma(dst, src, rotation, flip, crop);
1461 return res;
1462 }
1463 if src.tensor().memory() == TensorMemory::Pbo && dst.tensor().memory() == TensorMemory::Pbo
1466 {
1467 return self.convert_pbo_to_pbo(dst, src, rotation, flip, crop);
1468 }
1469 if dst.tensor().memory() == TensorMemory::Pbo {
1474 return self.convert_any_to_pbo(dst, src, rotation, flip, crop);
1475 }
1476 if src.tensor().memory() == TensorMemory::Pbo {
1480 return self.convert_pbo_to_mem(dst, src, rotation, flip, crop);
1481 }
1482 let start = Instant::now();
1483 let res = self.convert_dest_non_dma(dst, src, rotation, flip, crop);
1484 log::debug!("convert_dest_non_dma takes {:?}", start.elapsed());
1485 res
1486 }
1487
1488 fn convert_ref(
1489 &mut self,
1490 src: &TensorImage,
1491 dst: &mut TensorImageRef<'_>,
1492 rotation: Rotation,
1493 flip: Flip,
1494 crop: Crop,
1495 ) -> crate::Result<()> {
1496 let mut cpu = CPUProcessor::new();
1498 cpu.convert_ref(src, dst, rotation, flip, crop)
1499 }
1500
1501 fn draw_masks(
1502 &mut self,
1503 dst: &mut TensorImage,
1504 detect: &[DetectBox],
1505 segmentation: &[Segmentation],
1506 ) -> Result<(), crate::Error> {
1507 use crate::FunctionTimer;
1508
1509 let _timer = FunctionTimer::new("GLProcessorST::draw_masks");
1510 if !matches!(dst.fourcc(), RGBA | BGRA | RGB) {
1511 return Err(crate::Error::NotSupported(
1512 "Opengl image rendering only supports RGBA, BGRA, or RGB images".to_string(),
1513 ));
1514 }
1515
1516 let memory = dst.tensor.memory();
1522 let pbo_buffer_id = if memory == edgefirst_tensor::TensorMemory::Pbo {
1523 match &dst.tensor {
1524 edgefirst_tensor::Tensor::Pbo(p) if !p.is_mapped() => Some(p.buffer_id()),
1525 _ => None,
1526 }
1527 } else {
1528 None
1529 };
1530
1531 let is_dma = match memory {
1532 edgefirst_tensor::TensorMemory::Dma if self.setup_renderbuffer_dma(dst).is_ok() => true,
1533 _ if pbo_buffer_id.is_some() => {
1534 self.setup_renderbuffer_from_pbo(dst, pbo_buffer_id.unwrap())?;
1535 false
1536 }
1537 _ => {
1538 self.setup_renderbuffer_non_dma(
1540 dst,
1541 Crop::new().with_dst_rect(Some(Rect::new(0, 0, 0, 0))),
1542 )?;
1543 false
1544 }
1545 };
1546
1547 gls::enable(gls::gl::BLEND);
1548 gls::blend_func_separate(
1549 gls::gl::SRC_ALPHA,
1550 gls::gl::ONE_MINUS_SRC_ALPHA,
1551 gls::gl::ZERO,
1552 gls::gl::ONE,
1553 );
1554
1555 self.render_box(dst, detect)?;
1556 self.render_segmentation(detect, segmentation)?;
1557
1558 gls::finish();
1559 if !is_dma {
1560 let format = match dst.fourcc() {
1561 RGB => gls::gl::RGB,
1562 RGBA => gls::gl::RGBA,
1563 BGRA => 0x80E1, _ => unreachable!(),
1565 };
1566 if let Some(buffer_id) = pbo_buffer_id {
1567 unsafe {
1570 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, buffer_id);
1571 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
1572 gls::gl::ReadnPixels(
1573 0,
1574 0,
1575 dst.width() as i32,
1576 dst.height() as i32,
1577 format,
1578 gls::gl::UNSIGNED_BYTE,
1579 dst.tensor.len() as i32,
1580 std::ptr::null_mut(),
1581 );
1582 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
1583 gls::gl::Finish();
1584 }
1585 check_gl_error(function!(), line!())?;
1586 } else {
1587 let mut dst_map = dst.tensor().map()?;
1588 unsafe {
1589 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
1590 gls::gl::ReadnPixels(
1591 0,
1592 0,
1593 dst.width() as i32,
1594 dst.height() as i32,
1595 format,
1596 gls::gl::UNSIGNED_BYTE,
1597 dst.tensor.len() as i32,
1598 dst_map.as_mut_ptr() as *mut c_void,
1599 );
1600 }
1601 }
1602 }
1603
1604 Ok(())
1605 }
1606
1607 fn draw_masks_proto(
1608 &mut self,
1609 dst: &mut TensorImage,
1610 detect: &[DetectBox],
1611 proto_data: &ProtoData,
1612 ) -> crate::Result<()> {
1613 use crate::FunctionTimer;
1614
1615 let _timer = FunctionTimer::new("GLProcessorST::draw_masks_proto");
1616 if !matches!(dst.fourcc(), RGBA | BGRA | RGB) {
1617 return Err(crate::Error::NotSupported(
1618 "Opengl image rendering only supports RGBA, BGRA, or RGB images".to_string(),
1619 ));
1620 }
1621
1622 let memory = dst.tensor.memory();
1624 let pbo_buffer_id = if memory == edgefirst_tensor::TensorMemory::Pbo {
1625 match &dst.tensor {
1626 edgefirst_tensor::Tensor::Pbo(p) if !p.is_mapped() => Some(p.buffer_id()),
1627 _ => None,
1628 }
1629 } else {
1630 None
1631 };
1632
1633 let is_dma = match memory {
1634 edgefirst_tensor::TensorMemory::Dma if self.setup_renderbuffer_dma(dst).is_ok() => true,
1635 _ if pbo_buffer_id.is_some() => {
1636 self.setup_renderbuffer_from_pbo(dst, pbo_buffer_id.unwrap())?;
1637 false
1638 }
1639 _ => {
1640 self.setup_renderbuffer_non_dma(
1641 dst,
1642 Crop::new().with_dst_rect(Some(Rect::new(0, 0, 0, 0))),
1643 )?;
1644 false
1645 }
1646 };
1647
1648 gls::enable(gls::gl::BLEND);
1649 gls::blend_func_separate(
1650 gls::gl::SRC_ALPHA,
1651 gls::gl::ONE_MINUS_SRC_ALPHA,
1652 gls::gl::ZERO,
1653 gls::gl::ONE,
1654 );
1655
1656 self.render_box(dst, detect)?;
1657 self.render_proto_segmentation(detect, proto_data)?;
1658
1659 gls::finish();
1660 if !is_dma {
1661 let format = match dst.fourcc() {
1662 RGB => gls::gl::RGB,
1663 RGBA => gls::gl::RGBA,
1664 BGRA => 0x80E1, _ => unreachable!(),
1666 };
1667 if let Some(buffer_id) = pbo_buffer_id {
1668 unsafe {
1669 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, buffer_id);
1670 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
1671 gls::gl::ReadnPixels(
1672 0,
1673 0,
1674 dst.width() as i32,
1675 dst.height() as i32,
1676 format,
1677 gls::gl::UNSIGNED_BYTE,
1678 dst.tensor.len() as i32,
1679 std::ptr::null_mut(),
1680 );
1681 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
1682 gls::gl::Finish();
1683 }
1684 check_gl_error(function!(), line!())?;
1685 } else {
1686 let mut dst_map = dst.tensor().map()?;
1687 unsafe {
1688 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
1689 gls::gl::ReadnPixels(
1690 0,
1691 0,
1692 dst.width() as i32,
1693 dst.height() as i32,
1694 format,
1695 gls::gl::UNSIGNED_BYTE,
1696 dst.tensor.len() as i32,
1697 dst_map.as_mut_ptr() as *mut c_void,
1698 );
1699 }
1700 }
1701 }
1702
1703 Ok(())
1704 }
1705
1706 fn decode_masks_atlas(
1707 &mut self,
1708 detect: &[DetectBox],
1709 proto_data: ProtoData,
1710 output_width: usize,
1711 output_height: usize,
1712 ) -> crate::Result<(Vec<u8>, Vec<MaskRegion>)> {
1713 GLProcessorST::decode_masks_atlas(self, detect, &proto_data, output_width, output_height)
1714 }
1715
1716 fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> crate::Result<()> {
1717 if colors.is_empty() {
1718 return Ok(());
1719 }
1720 let mut colors_f32 = colors
1721 .iter()
1722 .map(|c| {
1723 [
1724 c[0] as f32 / 255.0,
1725 c[1] as f32 / 255.0,
1726 c[2] as f32 / 255.0,
1727 c[3] as f32 / 255.0,
1728 ]
1729 })
1730 .take(20)
1731 .collect::<Vec<[f32; 4]>>();
1732
1733 self.segmentation_program
1734 .load_uniform_4fv(c"colors", &colors_f32)?;
1735 self.instanced_segmentation_program
1736 .load_uniform_4fv(c"colors", &colors_f32)?;
1737 self.proto_segmentation_program
1738 .load_uniform_4fv(c"colors", &colors_f32)?;
1739 self.proto_segmentation_int8_nearest_program
1740 .load_uniform_4fv(c"colors", &colors_f32)?;
1741 self.proto_segmentation_int8_bilinear_program
1742 .load_uniform_4fv(c"colors", &colors_f32)?;
1743 self.proto_segmentation_f32_program
1744 .load_uniform_4fv(c"colors", &colors_f32)?;
1745
1746 colors_f32.iter_mut().for_each(|c| {
1747 c[3] = 1.0; });
1749 self.color_program
1750 .load_uniform_4fv(c"colors", &colors_f32)?;
1751
1752 Ok(())
1753 }
1754}
1755
1756impl GLProcessorST {
1757 pub fn new(kind: Option<EglDisplayKind>) -> Result<GLProcessorST, crate::Error> {
1758 let gl_context = GlContext::new(kind)?;
1759 gls::load_with(|s| {
1760 gl_context
1761 .egl
1762 .get_proc_address(s)
1763 .map_or(std::ptr::null(), |p| p as *const _)
1764 });
1765
1766 let (has_float_linear, has_bgra) = Self::gl_check_support()?;
1767
1768 unsafe {
1770 gls::gl::PixelStorei(gls::gl::PACK_ALIGNMENT, 1);
1771 gls::gl::PixelStorei(gls::gl::UNPACK_ALIGNMENT, 1);
1772 }
1773
1774 let texture_program_planar =
1775 GlProgram::new(generate_vertex_shader(), generate_planar_rgb_shader())?;
1776
1777 let texture_program =
1778 GlProgram::new(generate_vertex_shader(), generate_texture_fragment_shader())?;
1779
1780 let texture_program_yuv = GlProgram::new(
1781 generate_vertex_shader(),
1782 generate_texture_fragment_shader_yuv(),
1783 )?;
1784
1785 let segmentation_program =
1786 GlProgram::new(generate_vertex_shader(), generate_segmentation_shader())?;
1787 segmentation_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1788 let instanced_segmentation_program = GlProgram::new(
1789 generate_vertex_shader(),
1790 generate_instanced_segmentation_shader(),
1791 )?;
1792 instanced_segmentation_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1793
1794 let proto_segmentation_program = GlProgram::new(
1796 generate_vertex_shader(),
1797 generate_proto_segmentation_shader(),
1798 )?;
1799 proto_segmentation_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1800
1801 let proto_segmentation_int8_nearest_program = GlProgram::new(
1803 generate_vertex_shader(),
1804 generate_proto_segmentation_shader_int8_nearest(),
1805 )?;
1806 proto_segmentation_int8_nearest_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1807
1808 let proto_segmentation_int8_bilinear_program = GlProgram::new(
1809 generate_vertex_shader(),
1810 generate_proto_segmentation_shader_int8_bilinear(),
1811 )?;
1812 proto_segmentation_int8_bilinear_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1813
1814 let proto_dequant_int8_program = GlProgram::new(
1815 generate_vertex_shader(),
1816 generate_proto_dequant_shader_int8(),
1817 )?;
1818
1819 let proto_segmentation_f32_program = GlProgram::new(
1821 generate_vertex_shader(),
1822 generate_proto_segmentation_shader_f32(),
1823 )?;
1824 proto_segmentation_f32_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1825
1826 let color_program = GlProgram::new(generate_vertex_shader(), generate_color_shader())?;
1827 color_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1828
1829 let proto_mask_logit_int8_nearest_program = GlProgram::new(
1831 generate_vertex_shader(),
1832 generate_proto_mask_logit_shader_int8_nearest(),
1833 )?;
1834 let proto_mask_logit_int8_bilinear_program = GlProgram::new(
1835 generate_vertex_shader(),
1836 generate_proto_mask_logit_shader_int8_bilinear(),
1837 )?;
1838 let proto_mask_logit_f32_program = GlProgram::new(
1839 generate_vertex_shader(),
1840 generate_proto_mask_logit_shader_f32(),
1841 )?;
1842
1843 let texture_program_planar_int8 =
1845 GlProgram::new(generate_vertex_shader(), generate_planar_rgb_int8_shader())?;
1846
1847 let packed_rgba8_program_2d =
1849 GlProgram::new(generate_vertex_shader(), generate_packed_rgba8_shader_2d())?;
1850 let packed_rgba8_int8_program_2d = GlProgram::new(
1851 generate_vertex_shader(),
1852 generate_packed_rgba8_int8_shader_2d(),
1853 )?;
1854
1855 let texture_int8_program =
1857 GlProgram::new(generate_vertex_shader(), generate_texture_int8_shader())?;
1858 let texture_int8_program_yuv =
1859 GlProgram::new(generate_vertex_shader(), generate_texture_int8_shader_yuv())?;
1860
1861 let camera_eglimage_texture = Texture::new();
1862 let camera_normal_texture = Texture::new();
1863 let render_texture = Texture::new();
1864 let segmentation_texture = Texture::new();
1865 let proto_texture = Texture::new();
1866 let proto_dequant_texture = Texture::new();
1867 let vertex_buffer = Buffer::new(0, 3, 100);
1868 let texture_buffer = Buffer::new(1, 2, 100);
1869
1870 let mut converter = GLProcessorST {
1871 gl_context,
1872 texture_program,
1873 texture_program_yuv,
1874 texture_program_planar,
1875 texture_program_planar_int8,
1876 packed_rgba8_program_2d,
1877 packed_rgba8_int8_program_2d,
1878 texture_int8_program,
1879 texture_int8_program_yuv,
1880 support_rgb_direct: false, camera_eglimage_texture,
1882 camera_normal_texture,
1883 segmentation_texture,
1884 proto_texture,
1885 proto_segmentation_int8_nearest_program,
1886 proto_segmentation_int8_bilinear_program,
1887 proto_dequant_int8_program,
1888 proto_segmentation_f32_program,
1889 has_float_linear,
1890 has_bgra,
1891 int8_interpolation_mode: Int8InterpolationMode::Bilinear,
1892 proto_dequant_texture,
1893 proto_mask_logit_int8_bilinear_program,
1894 proto_mask_logit_int8_nearest_program,
1895 proto_mask_logit_f32_program,
1896 mask_fbo: 0,
1897 mask_fbo_texture: 0,
1898 mask_fbo_width: 0,
1899 mask_fbo_height: 0,
1900 mask_atlas_pbo: 0,
1901 vertex_buffer,
1902 texture_buffer,
1903 convert_fbo: FrameBuffer::new(),
1904 src_egl_cache: EglImageCache::new(8),
1905 dst_egl_cache: EglImageCache::new(8),
1906 packed_rgb_intermediate_tex: Texture::new(),
1907 packed_rgb_fbo: FrameBuffer::new(),
1908 packed_rgb_intermediate_size: (0, 0),
1909 render_texture,
1910 segmentation_program,
1911 instanced_segmentation_program,
1912 proto_segmentation_program,
1913 color_program,
1914 };
1915 check_gl_error(function!(), line!())?;
1916
1917 converter.support_rgb_direct = converter.probe_rgb_direct_support();
1919
1920 if converter.gl_context.transfer_backend.is_dma() && !converter.verify_dma_buf_roundtrip() {
1923 log::info!("DMA-buf verification failed — falling back to PBO transfers");
1924 converter.gl_context.transfer_backend = TransferBackend::Pbo;
1925 converter.support_rgb_direct = false;
1927 }
1928
1929 if converter.gl_context.transfer_backend == TransferBackend::Sync {
1931 log::info!("Upgrading transfer backend from Sync to Pbo (GL context available)");
1932 converter.gl_context.transfer_backend = TransferBackend::Pbo;
1933 }
1934
1935 if let Ok(val) = std::env::var("EDGEFIRST_FORCE_TRANSFER") {
1938 let forced = match val.to_ascii_lowercase().as_str() {
1939 "dmabuf" | "dma" => Some(TransferBackend::DmaBuf),
1940 "pbo" => Some(TransferBackend::Pbo),
1941 "sync" => Some(TransferBackend::Sync),
1942 other => {
1943 log::warn!(
1944 "EDGEFIRST_FORCE_TRANSFER={other:?} not recognised \
1945 (expected dmabuf|pbo|sync), ignoring"
1946 );
1947 None
1948 }
1949 };
1950 if let Some(backend) = forced {
1951 log::info!(
1952 "EDGEFIRST_FORCE_TRANSFER override: {:?} → {backend:?}",
1953 converter.gl_context.transfer_backend
1954 );
1955 converter.gl_context.transfer_backend = backend;
1956 if !backend.is_dma() {
1957 converter.support_rgb_direct = false;
1958 }
1959 }
1960 }
1961
1962 log::debug!(
1963 "GLConverter created (transfer={:?}, rgb_direct={})",
1964 converter.gl_context.transfer_backend,
1965 converter.support_rgb_direct
1966 );
1967 Ok(converter)
1968 }
1969
1970 fn probe_rgb_direct_support(&self) -> bool {
1974 if !self.gl_context.transfer_backend.is_dma() {
1975 log::debug!("probe_rgb_direct: no DMA support");
1976 return false;
1977 }
1978
1979 if self
1981 .gl_context
1982 .egl
1983 .get_proc_address("glEGLImageTargetRenderbufferStorageOES")
1984 .is_none()
1985 {
1986 log::debug!("probe_rgb_direct: glEGLImageTargetRenderbufferStorageOES not available");
1987 return false;
1988 }
1989
1990 let test_img = match TensorImage::new(64, 64, RGB, Some(TensorMemory::Dma)) {
1992 Ok(img) => img,
1993 Err(e) => {
1994 log::debug!("probe_rgb_direct: failed to allocate test DMA buffer: {e}");
1995 return false;
1996 }
1997 };
1998
1999 let egl_image =
2001 match self.create_egl_image_with_dims(&test_img, 64, 64, DrmFourcc::Bgr888, 3) {
2002 Ok(img) => img,
2003 Err(e) => {
2004 log::debug!("probe_rgb_direct: EGLImage creation failed: {e}");
2005 return false;
2006 }
2007 };
2008
2009 let result = unsafe {
2011 let mut rbo = 0u32;
2012 gls::gl::GenRenderbuffers(1, &mut rbo);
2013 gls::gl::BindRenderbuffer(gls::gl::RENDERBUFFER, rbo);
2014 gls::gl::EGLImageTargetRenderbufferStorageOES(
2015 gls::gl::RENDERBUFFER,
2016 egl_image.egl_image.as_ptr(),
2017 );
2018
2019 let gl_err = gls::gl::GetError();
2020 if gl_err != gls::gl::NO_ERROR {
2021 log::debug!(
2022 "probe_rgb_direct: EGLImageTargetRenderbufferStorageOES failed: {gl_err:#X}"
2023 );
2024 gls::gl::BindRenderbuffer(gls::gl::RENDERBUFFER, 0);
2025 gls::gl::DeleteRenderbuffers(1, &rbo);
2026 return false;
2027 }
2028
2029 let mut fbo = 0u32;
2030 gls::gl::GenFramebuffers(1, &mut fbo);
2031 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, fbo);
2032 gls::gl::FramebufferRenderbuffer(
2033 gls::gl::FRAMEBUFFER,
2034 gls::gl::COLOR_ATTACHMENT0,
2035 gls::gl::RENDERBUFFER,
2036 rbo,
2037 );
2038
2039 let status = gls::gl::CheckFramebufferStatus(gls::gl::FRAMEBUFFER);
2040 let complete = status == gls::gl::FRAMEBUFFER_COMPLETE;
2041
2042 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, 0);
2044 gls::gl::DeleteFramebuffers(1, &fbo);
2045 gls::gl::BindRenderbuffer(gls::gl::RENDERBUFFER, 0);
2046 gls::gl::DeleteRenderbuffers(1, &rbo);
2047
2048 complete
2049 };
2050 log::info!("probe_rgb_direct: BGR888 renderbuffer FBO support = {result}");
2053 result
2054 }
2055
2056 fn verify_dma_buf_roundtrip(&mut self) -> bool {
2065 let src = match TensorImage::new(64, 64, RGBA, Some(TensorMemory::Dma)) {
2067 Ok(img) => img,
2068 Err(e) => {
2069 log::info!("verify_dma_buf_roundtrip: failed to allocate DMA source: {e}");
2070 return false;
2071 }
2072 };
2073
2074 {
2075 let mut map = match src.tensor().map() {
2076 Ok(m) => m,
2077 Err(e) => {
2078 log::info!("verify_dma_buf_roundtrip: failed to map DMA source: {e}");
2079 return false;
2080 }
2081 };
2082 for pixel in map.chunks_exact_mut(4) {
2083 pixel[0] = 255; pixel[1] = 0; pixel[2] = 0; pixel[3] = 255; }
2088 }
2089
2090 let mut dst = match TensorImage::new(64, 64, RGBA, Some(TensorMemory::Dma)) {
2092 Ok(img) => img,
2093 Err(e) => {
2094 log::info!("verify_dma_buf_roundtrip: failed to allocate DMA destination: {e}");
2095 return false;
2096 }
2097 };
2098
2099 if let Err(e) =
2101 self.convert_dest_dma(&mut dst, &src, Rotation::None, Flip::None, Crop::no_crop())
2102 {
2103 log::info!("verify_dma_buf_roundtrip: convert_dest_dma failed: {e}");
2104 return false;
2105 }
2106
2107 let map = match dst.tensor().map() {
2109 Ok(m) => m,
2110 Err(e) => {
2111 log::info!("verify_dma_buf_roundtrip: failed to map DMA destination: {e}");
2112 return false;
2113 }
2114 };
2115
2116 let offset = (32 * 64 + 32) * 4;
2117 if map.len() < offset + 4 {
2118 log::info!("verify_dma_buf_roundtrip: destination buffer too small");
2119 return false;
2120 }
2121
2122 let r = map[offset];
2123 let g = map[offset + 1];
2124 let b = map[offset + 2];
2125 let a = map[offset + 3];
2126
2127 let pass = r > 250 && g < 5 && b < 5 && a > 250;
2128
2129 if pass {
2130 log::info!("verify_dma_buf_roundtrip: PASSED (center pixel RGBA={r},{g},{b},{a})");
2131 } else {
2132 log::info!(
2133 "verify_dma_buf_roundtrip: FAILED (center pixel RGBA={r},{g},{b},{a}, \
2134 expected ~255,0,0,255)"
2135 );
2136 }
2137
2138 pass
2139 }
2140
2141 fn compute_atlas_regions(
2146 detect: &[DetectBox],
2147 output_width: usize,
2148 output_height: usize,
2149 padding: usize,
2150 ) -> (Vec<MaskRegion>, usize) {
2151 let ow = output_width as i32;
2152 let oh = output_height as i32;
2153 let owf = output_width as f32;
2154 let ohf = output_height as f32;
2155 let pad = padding as i32;
2156
2157 let mut regions = Vec::with_capacity(detect.len());
2158 let mut atlas_y = 0usize;
2159 for det in detect.iter() {
2160 let bbox_x = (det.bbox.xmin * owf).round() as i32;
2161 let bbox_y = (det.bbox.ymin * ohf).round() as i32;
2162 let bbox_w = ((det.bbox.xmax - det.bbox.xmin) * owf).round() as i32;
2163 let bbox_h = ((det.bbox.ymax - det.bbox.ymin) * ohf).round() as i32;
2164 let bbox_x = bbox_x.max(0).min(ow);
2165 let bbox_y = bbox_y.max(0).min(oh);
2166 let bbox_w = bbox_w.max(1).min(ow - bbox_x);
2167 let bbox_h = bbox_h.max(1).min(oh - bbox_y);
2168
2169 let padded_x = (bbox_x - pad).max(0);
2170 let padded_y = (bbox_y - pad).max(0);
2171 let padded_w = ((bbox_x + bbox_w + pad).min(ow) - padded_x).max(1);
2172 let padded_h = ((bbox_y + bbox_h + pad).min(oh) - padded_y).max(1);
2173
2174 regions.push(MaskRegion {
2175 atlas_y_offset: atlas_y,
2176 padded_x: padded_x as usize,
2177 padded_y: padded_y as usize,
2178 padded_w: padded_w as usize,
2179 padded_h: padded_h as usize,
2180 bbox_x: bbox_x as usize,
2181 bbox_y: bbox_y as usize,
2182 bbox_w: bbox_w as usize,
2183 bbox_h: bbox_h as usize,
2184 });
2185 atlas_y += padded_h as usize;
2186 }
2187 (regions, atlas_y)
2188 }
2189
2190 pub fn set_int8_interpolation_mode(&mut self, mode: Int8InterpolationMode) {
2192 self.int8_interpolation_mode = mode;
2193 log::debug!("Int8 interpolation mode set to {:?}", mode);
2194 }
2195
2196 fn ensure_mask_fbo(&mut self, width: usize, height: usize) -> crate::Result<()> {
2199 if self.mask_fbo_width == width && self.mask_fbo_height == height && self.mask_fbo != 0 {
2200 return Ok(());
2201 }
2202
2203 if self.mask_fbo == 0 {
2205 unsafe {
2206 gls::gl::GenFramebuffers(1, &mut self.mask_fbo);
2207 }
2208 }
2209 if self.mask_fbo_texture == 0 {
2211 unsafe {
2212 gls::gl::GenTextures(1, &mut self.mask_fbo_texture);
2213 }
2214 }
2215
2216 unsafe {
2218 gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.mask_fbo_texture);
2219 gls::gl::TexImage2D(
2220 gls::gl::TEXTURE_2D,
2221 0,
2222 gls::gl::R8 as i32,
2223 width as i32,
2224 height as i32,
2225 0,
2226 gls::gl::RED,
2227 gls::gl::UNSIGNED_BYTE,
2228 std::ptr::null(),
2229 );
2230 gls::gl::TexParameteri(
2231 gls::gl::TEXTURE_2D,
2232 gls::gl::TEXTURE_MIN_FILTER,
2233 gls::gl::NEAREST as i32,
2234 );
2235 gls::gl::TexParameteri(
2236 gls::gl::TEXTURE_2D,
2237 gls::gl::TEXTURE_MAG_FILTER,
2238 gls::gl::NEAREST as i32,
2239 );
2240 }
2241
2242 unsafe {
2244 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, self.mask_fbo);
2245 gls::gl::FramebufferTexture2D(
2246 gls::gl::FRAMEBUFFER,
2247 gls::gl::COLOR_ATTACHMENT0,
2248 gls::gl::TEXTURE_2D,
2249 self.mask_fbo_texture,
2250 0,
2251 );
2252 let status = gls::gl::CheckFramebufferStatus(gls::gl::FRAMEBUFFER);
2253 if status != gls::gl::FRAMEBUFFER_COMPLETE {
2254 return Err(crate::Error::OpenGl(format!(
2255 "Mask FBO incomplete: status=0x{status:X}"
2256 )));
2257 }
2258 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, 0);
2259 }
2260
2261 self.mask_fbo_width = width;
2262 self.mask_fbo_height = height;
2263 log::debug!("Mask FBO allocated at {width}x{height}");
2264 Ok(())
2265 }
2266
2267 fn ensure_mask_atlas_size(&mut self, width: usize, atlas_height: usize) -> crate::Result<()> {
2271 if self.mask_fbo_width == width
2272 && self.mask_fbo_height >= atlas_height
2273 && self.mask_fbo != 0
2274 && self.mask_atlas_pbo != 0
2275 {
2276 return Ok(());
2277 }
2278 self.ensure_mask_fbo(width, atlas_height)?;
2279 let pbo_size = width * atlas_height;
2280 unsafe {
2281 if self.mask_atlas_pbo == 0 {
2282 gls::gl::GenBuffers(1, &mut self.mask_atlas_pbo);
2283 }
2284 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, self.mask_atlas_pbo);
2285 gls::gl::BufferData(
2286 gls::gl::PIXEL_PACK_BUFFER,
2287 pbo_size as isize,
2288 std::ptr::null(),
2289 gls::gl::DYNAMIC_READ,
2290 );
2291 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
2292 }
2293 Ok(())
2294 }
2295
2296 pub fn decode_masks_atlas(
2304 &mut self,
2305 detect: &[DetectBox],
2306 proto_data: &ProtoData,
2307 output_width: usize,
2308 output_height: usize,
2309 ) -> crate::Result<(Vec<u8>, Vec<MaskRegion>)> {
2310 use crate::FunctionTimer;
2311
2312 let _timer = FunctionTimer::new("GLProcessorST::decode_masks_atlas");
2313
2314 if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
2315 return Ok((Vec::new(), Vec::new()));
2316 }
2317
2318 let padding = 4usize;
2319
2320 let (height, width, num_protos) = proto_data.protos.dim();
2321 let texture_target = gls::gl::TEXTURE_2D_ARRAY;
2322
2323 let (regions, compact_atlas_height) =
2325 Self::compute_atlas_regions(detect, output_width, output_height, padding);
2326
2327 let (saved_fbo, saved_viewport) = unsafe {
2329 let mut fbo: i32 = 0;
2330 gls::gl::GetIntegerv(gls::gl::FRAMEBUFFER_BINDING, &mut fbo);
2331 let mut vp = [0i32; 4];
2332 gls::gl::GetIntegerv(gls::gl::VIEWPORT, vp.as_mut_ptr());
2333 (fbo as u32, vp)
2334 };
2335
2336 self.ensure_mask_atlas_size(output_width, compact_atlas_height)?;
2338
2339 gls::active_texture(gls::gl::TEXTURE0);
2341 gls::bind_texture(texture_target, self.proto_texture.id);
2342 gls::tex_parameteri(
2343 texture_target,
2344 gls::gl::TEXTURE_MIN_FILTER,
2345 gls::gl::NEAREST as i32,
2346 );
2347 gls::tex_parameteri(
2348 texture_target,
2349 gls::gl::TEXTURE_MAG_FILTER,
2350 gls::gl::NEAREST as i32,
2351 );
2352 gls::tex_parameteri(
2353 texture_target,
2354 gls::gl::TEXTURE_WRAP_S,
2355 gls::gl::CLAMP_TO_EDGE as i32,
2356 );
2357 gls::tex_parameteri(
2358 texture_target,
2359 gls::gl::TEXTURE_WRAP_T,
2360 gls::gl::CLAMP_TO_EDGE as i32,
2361 );
2362
2363 let atlas_result = match &proto_data.protos {
2364 ProtoTensor::Quantized {
2365 protos,
2366 quantization,
2367 } => {
2368 let mut tex_data = vec![0i8; height * width * num_protos];
2369 for k in 0..num_protos {
2370 for y in 0..height {
2371 for x in 0..width {
2372 tex_data[k * height * width + y * width + x] = protos[[y, x, k]];
2373 }
2374 }
2375 }
2376 gls::tex_image3d(
2377 texture_target,
2378 0,
2379 gls::gl::R8I as i32,
2380 width as i32,
2381 height as i32,
2382 num_protos as i32,
2383 0,
2384 gls::gl::RED_INTEGER,
2385 gls::gl::BYTE,
2386 Some(&tex_data),
2387 );
2388
2389 let proto_scale = quantization.scale;
2390 let proto_scaled_zp = -(quantization.zero_point as f32) * quantization.scale;
2391
2392 let program = match self.int8_interpolation_mode {
2393 Int8InterpolationMode::Nearest => &self.proto_mask_logit_int8_nearest_program,
2394 _ => &self.proto_mask_logit_int8_bilinear_program,
2395 };
2396 gls::use_program(program.id);
2397 program.load_uniform_1i(c"num_protos", num_protos as i32)?;
2398 program.load_uniform_1f(c"proto_scale", proto_scale)?;
2399
2400 self.render_mask_atlas_compact(
2401 program,
2402 regions,
2403 &proto_data.mask_coefficients,
2404 output_width,
2405 output_height,
2406 Some(proto_scaled_zp),
2407 )
2408 }
2409 ProtoTensor::Float(protos_f32) => {
2410 let mut tex_data = vec![0.0f32; height * width * num_protos];
2411 for k in 0..num_protos {
2412 for y in 0..height {
2413 for x in 0..width {
2414 tex_data[k * height * width + y * width + x] = protos_f32[[y, x, k]];
2415 }
2416 }
2417 }
2418 gls::tex_image3d(
2419 texture_target,
2420 0,
2421 gls::gl::R32F as i32,
2422 width as i32,
2423 height as i32,
2424 num_protos as i32,
2425 0,
2426 gls::gl::RED,
2427 gls::gl::FLOAT,
2428 Some(&tex_data),
2429 );
2430 if self.has_float_linear {
2431 gls::tex_parameteri(
2432 texture_target,
2433 gls::gl::TEXTURE_MIN_FILTER,
2434 gls::gl::LINEAR as i32,
2435 );
2436 gls::tex_parameteri(
2437 texture_target,
2438 gls::gl::TEXTURE_MAG_FILTER,
2439 gls::gl::LINEAR as i32,
2440 );
2441 }
2442
2443 let program = &self.proto_mask_logit_f32_program;
2444 gls::use_program(program.id);
2445 program.load_uniform_1i(c"num_protos", num_protos as i32)?;
2446
2447 self.render_mask_atlas_compact(
2448 program,
2449 regions,
2450 &proto_data.mask_coefficients,
2451 output_width,
2452 output_height,
2453 None,
2454 )
2455 }
2456 };
2457
2458 unsafe {
2460 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, saved_fbo);
2461 gls::gl::Viewport(
2462 saved_viewport[0],
2463 saved_viewport[1],
2464 saved_viewport[2],
2465 saved_viewport[3],
2466 );
2467 }
2468
2469 let (atlas_pixels, regions) = atlas_result?;
2470 Ok((atlas_pixels, regions))
2471 }
2472
2473 #[allow(clippy::too_many_arguments)]
2483 fn render_mask_atlas_compact(
2484 &self,
2485 program: &GlProgram,
2486 regions: Vec<MaskRegion>,
2487 mask_coefficients: &[Vec<f32>],
2488 output_width: usize,
2489 output_height: usize,
2490 proto_scaled_zp: Option<f32>,
2491 ) -> crate::Result<(Vec<u8>, Vec<MaskRegion>)> {
2492 if regions.is_empty() {
2493 return Ok((Vec::new(), Vec::new()));
2494 }
2495
2496 let owf = output_width as f32;
2497 let ohf = output_height as f32;
2498
2499 let atlas_height = regions.last().map_or(0, |r| r.atlas_y_offset + r.padded_h);
2500 let ahf = atlas_height as f32;
2501
2502 unsafe {
2503 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, self.mask_fbo);
2504 gls::gl::Viewport(0, 0, output_width as i32, atlas_height as i32);
2505 gls::gl::Disable(gls::gl::BLEND);
2506 gls::gl::ClearColor(0.0, 0.0, 0.0, 0.0);
2507 gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
2508 }
2509
2510 if let Some(first_coeff) = mask_coefficients.first() {
2511 if first_coeff.len() > 32 {
2512 log::warn!(
2513 "render_mask_atlas_compact: {} mask coefficients exceeds shader \
2514 limit of 32 — coefficients will be truncated",
2515 first_coeff.len()
2516 );
2517 }
2518 }
2519
2520 for (region, coeff) in regions.iter().zip(mask_coefficients.iter()) {
2521 let mut packed_coeff = [[0.0f32; 4]; 8];
2522 for (j, val) in coeff.iter().enumerate().take(32) {
2523 packed_coeff[j / 4][j % 4] = *val;
2524 }
2525 program.load_uniform_4fv(c"mask_coeff", &packed_coeff)?;
2526
2527 if let Some(szp) = proto_scaled_zp {
2529 let coeff_sum: f32 = coeff.iter().take(32).sum();
2530 program.load_uniform_1f(c"coeff_sum_x_szp", coeff_sum * szp)?;
2531 }
2532
2533 let dst_left = region.padded_x as f32 / owf * 2.0 - 1.0;
2537 let dst_right = (region.padded_x + region.padded_w) as f32 / owf * 2.0 - 1.0;
2538 let dst_bottom = region.atlas_y_offset as f32 / ahf * 2.0 - 1.0;
2539 let dst_top = (region.atlas_y_offset + region.padded_h) as f32 / ahf * 2.0 - 1.0;
2540
2541 let src_left = region.padded_x as f32 / owf;
2543 let src_right = (region.padded_x + region.padded_w) as f32 / owf;
2544 let src_bottom = region.padded_y as f32 / ohf;
2545 let src_top = (region.padded_y + region.padded_h) as f32 / ohf;
2546
2547 unsafe {
2548 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
2549 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
2550 let verts: [f32; 12] = [
2551 dst_left, dst_top, 0.0, dst_right, dst_top, 0.0, dst_right, dst_bottom, 0.0,
2552 dst_left, dst_bottom, 0.0,
2553 ];
2554 gls::gl::BufferSubData(
2555 gls::gl::ARRAY_BUFFER,
2556 0,
2557 (size_of::<f32>() * 12) as isize,
2558 verts.as_ptr() as *const c_void,
2559 );
2560
2561 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
2562 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
2563 let tc: [f32; 8] = [
2564 src_left, src_top, src_right, src_top, src_right, src_bottom, src_left,
2565 src_bottom,
2566 ];
2567 gls::gl::BufferSubData(
2568 gls::gl::ARRAY_BUFFER,
2569 0,
2570 (size_of::<f32>() * 8) as isize,
2571 tc.as_ptr() as *const c_void,
2572 );
2573
2574 let idx: [u32; 4] = [0, 1, 2, 3];
2575 gls::gl::DrawElements(
2576 gls::gl::TRIANGLE_FAN,
2577 4,
2578 gls::gl::UNSIGNED_INT,
2579 idx.as_ptr() as *const c_void,
2580 );
2581 }
2582 }
2583
2584 let atlas_bytes = output_width * atlas_height;
2586 let mut pixels = vec![0u8; atlas_bytes];
2587
2588 unsafe {
2589 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, self.mask_atlas_pbo);
2590 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
2591 gls::gl::ReadnPixels(
2592 0,
2593 0,
2594 output_width as i32,
2595 atlas_height as i32,
2596 gls::gl::RED,
2597 gls::gl::UNSIGNED_BYTE,
2598 atlas_bytes as i32,
2599 std::ptr::null_mut(),
2600 );
2601 gls::gl::Finish();
2602
2603 let ptr = gls::gl::MapBufferRange(
2604 gls::gl::PIXEL_PACK_BUFFER,
2605 0,
2606 atlas_bytes as isize,
2607 gls::gl::MAP_READ_BIT,
2608 );
2609 if ptr.is_null() {
2610 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
2611 return Err(crate::Error::OpenGl(
2612 "Failed to map compact atlas PBO for readback".to_string(),
2613 ));
2614 }
2615 std::ptr::copy_nonoverlapping(ptr as *const u8, pixels.as_mut_ptr(), atlas_bytes);
2616 gls::gl::UnmapBuffer(gls::gl::PIXEL_PACK_BUFFER);
2617 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
2618 }
2619
2620 Ok((pixels, regions))
2621 }
2622
2623 fn check_src_format_supported(backend: TransferBackend, img: &TensorImage) -> bool {
2624 if backend.is_dma() && img.tensor().memory() == TensorMemory::Dma {
2625 matches!(img.fourcc(), RGBA | GREY | YUYV | NV12)
2629 } else {
2630 matches!(img.fourcc(), RGB | RGBA | GREY)
2631 }
2632 }
2633
2634 fn check_dst_format_supported(
2635 backend: TransferBackend,
2636 img: &TensorImage,
2637 has_bgra: bool,
2638 ) -> bool {
2639 if img.fourcc() == BGRA && !has_bgra {
2640 return false;
2641 }
2642 if backend.is_dma() && img.tensor().memory() == TensorMemory::Dma {
2643 matches!(
2644 img.fourcc(),
2645 RGBA | BGRA | GREY | PLANAR_RGB | RGB | RGB_INT8 | PLANAR_RGB_INT8
2646 )
2647 } else {
2648 matches!(img.fourcc(), RGB | RGBA | BGRA | GREY | RGB_INT8)
2649 }
2650 }
2651
2652 fn gl_check_support() -> Result<(bool, bool), crate::Error> {
2655 if let Ok(version) = gls::get_string(gls::gl::SHADING_LANGUAGE_VERSION) {
2656 log::debug!("GL Shading Language Version: {version:?}");
2657 } else {
2658 log::warn!("Could not get GL Shading Language Version");
2659 }
2660
2661 let extensions = unsafe {
2662 let str = gls::gl::GetString(gls::gl::EXTENSIONS);
2663 if str.is_null() {
2664 return Err(crate::Error::GLVersion(
2665 "GL returned no supported extensions".to_string(),
2666 ));
2667 }
2668 CStr::from_ptr(str as *const c_char)
2669 .to_string_lossy()
2670 .to_string()
2671 };
2672 log::debug!("GL Extensions: {extensions}");
2673 let required_ext = ["GL_OES_EGL_image_external_essl3"];
2674 let extensions = extensions.split_ascii_whitespace().collect::<BTreeSet<_>>();
2675 for required in required_ext {
2676 if !extensions.contains(required) {
2677 return Err(crate::Error::GLVersion(format!(
2678 "GL does not support {required} extension",
2679 )));
2680 }
2681 }
2682
2683 let has_float_linear = extensions.contains("GL_OES_texture_float_linear");
2684 log::debug!("GL_OES_texture_float_linear: {has_float_linear}");
2685
2686 let has_bgra = extensions.contains("GL_EXT_texture_format_BGRA8888");
2687 log::debug!("GL_EXT_texture_format_BGRA8888: {has_bgra}");
2688
2689 Ok((has_float_linear, has_bgra))
2690 }
2691
2692 fn setup_renderbuffer_dma(&mut self, dst: &TensorImage) -> crate::Result<()> {
2693 self.convert_fbo.bind();
2694
2695 let (width, height) = if matches!(dst.fourcc(), PLANAR_RGB | PLANAR_RGB_INT8) {
2696 let width = dst.width();
2697 let height = dst.height() * 3;
2698 (width as i32, height as i32)
2699 } else {
2700 (dst.width() as i32, dst.height() as i32)
2701 };
2702 let dest_egl = self.get_or_create_egl_image(CacheKind::Dst, dst)?;
2703 unsafe {
2704 gls::gl::UseProgram(self.texture_program_yuv.id);
2705 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
2706 gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.render_texture.id);
2707 gls::gl::TexParameteri(
2708 gls::gl::TEXTURE_2D,
2709 gls::gl::TEXTURE_MIN_FILTER,
2710 gls::gl::LINEAR as i32,
2711 );
2712 gls::gl::TexParameteri(
2713 gls::gl::TEXTURE_2D,
2714 gls::gl::TEXTURE_MAG_FILTER,
2715 gls::gl::LINEAR as i32,
2716 );
2717 gls::gl::EGLImageTargetTexture2DOES(gls::gl::TEXTURE_2D, dest_egl.as_ptr());
2718 gls::gl::FramebufferTexture2D(
2719 gls::gl::FRAMEBUFFER,
2720 gls::gl::COLOR_ATTACHMENT0,
2721 gls::gl::TEXTURE_2D,
2722 self.render_texture.id,
2723 0,
2724 );
2725 check_gl_error(function!(), line!())?;
2726 gls::gl::Viewport(0, 0, width, height);
2727 }
2728 Ok(())
2729 }
2730
2731 fn convert_dest_dma(
2732 &mut self,
2733 dst: &mut TensorImage,
2734 src: &TensorImage,
2735 rotation: crate::Rotation,
2736 flip: Flip,
2737 crop: Crop,
2738 ) -> crate::Result<()> {
2739 assert!(self.gl_context.transfer_backend.is_dma());
2740 if fourcc_is_packed_rgb(dst.fourcc()) {
2741 if self.support_rgb_direct {
2742 self.convert_to_rgb_direct(src, dst, rotation, flip, crop)
2743 } else {
2744 Err(crate::Error::NotSupported(
2747 "OpenGL two-pass packed RGB disabled (no direct RGB support)".into(),
2748 ))
2749 }
2750 } else if dst.is_planar() {
2751 self.setup_renderbuffer_dma(dst)?;
2752 self.convert_to_planar(src, dst, rotation, flip, crop)
2753 } else {
2754 self.setup_renderbuffer_dma(dst)?;
2755 self.convert_to(src, dst, rotation, flip, crop)
2756 }
2757 }
2758
2759 fn setup_renderbuffer_non_dma(&mut self, dst: &TensorImage, crop: Crop) -> crate::Result<()> {
2760 debug_assert!(matches!(
2761 dst.fourcc(),
2762 RGB | RGBA | BGRA | GREY | PLANAR_RGB | RGB_INT8
2763 ));
2764 let (width, height) = if dst.is_planar() {
2765 let width = dst.width() / 4;
2766 let height = match dst.fourcc() {
2767 RGBA => dst.height() * 4,
2768 RGB => dst.height() * 3,
2769 GREY => dst.height(),
2770 _ => unreachable!(),
2771 };
2772 (width as i32, height as i32)
2773 } else {
2774 (dst.width() as i32, dst.height() as i32)
2775 };
2776
2777 let format = if dst.is_planar() {
2778 gls::gl::RED
2779 } else {
2780 match dst.fourcc() {
2781 RGB | RGB_INT8 => gls::gl::RGB,
2782 RGBA => gls::gl::RGBA,
2783 BGRA => 0x80E1, GREY => gls::gl::RED,
2785 _ => unreachable!(),
2786 }
2787 };
2788
2789 let start = Instant::now();
2790 self.convert_fbo.bind();
2791
2792 let map;
2793
2794 let pixels = if crop.dst_rect.is_none_or(|crop| {
2795 crop.top == 0
2796 && crop.left == 0
2797 && crop.height == dst.height()
2798 && crop.width == dst.width()
2799 }) {
2800 std::ptr::null()
2801 } else {
2802 map = dst.tensor().map()?;
2803 map.as_ptr() as *const c_void
2804 };
2805 unsafe {
2806 gls::gl::UseProgram(self.texture_program.id);
2807 gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.render_texture.id);
2808 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
2809 gls::gl::TexParameteri(
2810 gls::gl::TEXTURE_2D,
2811 gls::gl::TEXTURE_MIN_FILTER,
2812 gls::gl::LINEAR as i32,
2813 );
2814 gls::gl::TexParameteri(
2815 gls::gl::TEXTURE_2D,
2816 gls::gl::TEXTURE_MAG_FILTER,
2817 gls::gl::LINEAR as i32,
2818 );
2819
2820 gls::gl::TexImage2D(
2821 gls::gl::TEXTURE_2D,
2822 0,
2823 format as i32,
2824 width,
2825 height,
2826 0,
2827 format,
2828 gls::gl::UNSIGNED_BYTE,
2829 pixels,
2830 );
2831 check_gl_error(function!(), line!())?;
2832 gls::gl::FramebufferTexture2D(
2833 gls::gl::FRAMEBUFFER,
2834 gls::gl::COLOR_ATTACHMENT0,
2835 gls::gl::TEXTURE_2D,
2836 self.render_texture.id,
2837 0,
2838 );
2839 check_gl_error(function!(), line!())?;
2840 gls::gl::Viewport(0, 0, width, height);
2841 }
2842 log::debug!("Set up framebuffer takes {:?}", start.elapsed());
2843 Ok(())
2844 }
2845
2846 fn setup_renderbuffer_from_pbo(
2854 &mut self,
2855 dst: &TensorImage,
2856 buffer_id: u32,
2857 ) -> crate::Result<()> {
2858 let (width, height) = (dst.width() as i32, dst.height() as i32);
2859 let format = match dst.fourcc() {
2860 RGB => gls::gl::RGB,
2861 RGBA => gls::gl::RGBA,
2862 BGRA => 0x80E1, _ => {
2864 return Err(crate::Error::NotSupported(format!(
2865 "PBO renderbuffer not supported for {}",
2866 dst.fourcc().display()
2867 )))
2868 }
2869 };
2870 self.convert_fbo.bind();
2871 unsafe {
2872 gls::gl::UseProgram(self.texture_program.id);
2873 gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.render_texture.id);
2874 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
2875 gls::gl::TexParameteri(
2876 gls::gl::TEXTURE_2D,
2877 gls::gl::TEXTURE_MIN_FILTER,
2878 gls::gl::LINEAR as i32,
2879 );
2880 gls::gl::TexParameteri(
2881 gls::gl::TEXTURE_2D,
2882 gls::gl::TEXTURE_MAG_FILTER,
2883 gls::gl::LINEAR as i32,
2884 );
2885
2886 gls::gl::BindBuffer(gls::gl::PIXEL_UNPACK_BUFFER, buffer_id);
2889 gls::gl::TexImage2D(
2890 gls::gl::TEXTURE_2D,
2891 0,
2892 format as i32,
2893 width,
2894 height,
2895 0,
2896 format,
2897 gls::gl::UNSIGNED_BYTE,
2898 std::ptr::null(),
2899 );
2900 gls::gl::BindBuffer(gls::gl::PIXEL_UNPACK_BUFFER, 0);
2901
2902 check_gl_error(function!(), line!())?;
2903 gls::gl::FramebufferTexture2D(
2904 gls::gl::FRAMEBUFFER,
2905 gls::gl::COLOR_ATTACHMENT0,
2906 gls::gl::TEXTURE_2D,
2907 self.render_texture.id,
2908 0,
2909 );
2910 check_gl_error(function!(), line!())?;
2911 gls::gl::Viewport(0, 0, width, height);
2912 }
2913 Ok(())
2914 }
2915
2916 fn convert_dest_non_dma(
2917 &mut self,
2918 dst: &mut TensorImage,
2919 src: &TensorImage,
2920 rotation: crate::Rotation,
2921 flip: Flip,
2922 crop: Crop,
2923 ) -> crate::Result<()> {
2924 self.setup_renderbuffer_non_dma(dst, crop)?;
2925 let start = Instant::now();
2926 if dst.is_planar() {
2927 self.convert_to_planar(src, dst, rotation, flip, crop)?;
2928 } else {
2929 self.convert_to(src, dst, rotation, flip, crop)?;
2930 }
2931 log::debug!("Draw to framebuffer takes {:?}", start.elapsed());
2932 let start = Instant::now();
2933 let dest_format = match dst.fourcc() {
2934 RGB | RGB_INT8 => gls::gl::RGB,
2935 RGBA => gls::gl::RGBA,
2936 BGRA => 0x80E1, GREY => gls::gl::RED,
2938 _ => unreachable!(),
2939 };
2940
2941 unsafe {
2942 let mut dst_map = dst.tensor().map()?;
2943 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
2944 gls::gl::ReadnPixels(
2945 0,
2946 0,
2947 dst.width() as i32,
2948 dst.height() as i32,
2949 dest_format,
2950 gls::gl::UNSIGNED_BYTE,
2951 dst.tensor.len() as i32,
2952 dst_map.as_mut_ptr() as *mut c_void,
2953 );
2954 if fourcc_is_int8(dst.fourcc()) {
2956 for byte in dst_map.iter_mut() {
2957 *byte ^= 0x80;
2958 }
2959 }
2960 }
2961 log::debug!("Read from framebuffer takes {:?}", start.elapsed());
2962 Ok(())
2963 }
2964
2965 fn convert_pbo_to_pbo(
2971 &mut self,
2972 dst: &mut TensorImage,
2973 src: &TensorImage,
2974 rotation: crate::Rotation,
2975 flip: Flip,
2976 crop: Crop,
2977 ) -> crate::Result<()> {
2978 let (src_buffer_id, dst_buffer_id) = {
2980 let src_pbo = match &src.tensor {
2981 edgefirst_tensor::Tensor::Pbo(p) => p,
2982 _ => {
2983 return Err(crate::Error::OpenGl(
2984 "convert_pbo_to_pbo: src is not a PBO tensor".to_string(),
2985 ))
2986 }
2987 };
2988 let dst_pbo = match &dst.tensor {
2989 edgefirst_tensor::Tensor::Pbo(p) => p,
2990 _ => {
2991 return Err(crate::Error::OpenGl(
2992 "convert_pbo_to_pbo: dst is not a PBO tensor".to_string(),
2993 ))
2994 }
2995 };
2996
2997 if src_pbo.is_mapped() || dst_pbo.is_mapped() {
2998 return Err(crate::Error::OpenGl(
2999 "Cannot convert PBO tensors while they are mapped".to_string(),
3000 ));
3001 }
3002
3003 (src_pbo.buffer_id(), dst_pbo.buffer_id())
3004 };
3005
3006 self.setup_renderbuffer_non_dma(dst, crop)?;
3008
3009 let start = Instant::now();
3016 self.draw_src_texture_from_pbo(src, src_buffer_id, dst, rotation, flip, crop)?;
3017 log::debug!("PBO render takes {:?}", start.elapsed());
3018
3019 let start_read = Instant::now();
3021 let dest_format = match dst.fourcc() {
3022 crate::RGB | crate::RGB_INT8 => gls::gl::RGB,
3023 crate::RGBA => gls::gl::RGBA,
3024 crate::BGRA => 0x80E1, crate::GREY => gls::gl::RED,
3026 _ => {
3027 return Err(crate::Error::NotSupported(format!(
3028 "PBO readback not supported for {}",
3029 dst.fourcc().display()
3030 )))
3031 }
3032 };
3033
3034 unsafe {
3035 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, dst_buffer_id);
3037 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
3038 gls::gl::ReadnPixels(
3039 0,
3040 0,
3041 dst.width() as i32,
3042 dst.height() as i32,
3043 dest_format,
3044 gls::gl::UNSIGNED_BYTE,
3045 dst.tensor.len() as i32,
3046 std::ptr::null_mut(), );
3048 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
3049 gls::gl::Finish();
3050 }
3051
3052 check_gl_error(function!(), line!())?;
3053
3054 if fourcc_is_int8(dst.fourcc()) {
3057 unsafe {
3058 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, dst_buffer_id);
3059 let ptr = gls::gl::MapBufferRange(
3060 gls::gl::PIXEL_PACK_BUFFER,
3061 0,
3062 dst.tensor.len() as isize,
3063 gls::gl::MAP_READ_BIT | gls::gl::MAP_WRITE_BIT,
3064 );
3065 if !ptr.is_null() {
3066 let slice = std::slice::from_raw_parts_mut(ptr as *mut u8, dst.tensor.len());
3067 for byte in slice.iter_mut() {
3068 *byte ^= 0x80;
3069 }
3070 gls::gl::UnmapBuffer(gls::gl::PIXEL_PACK_BUFFER);
3071 }
3072 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
3073 }
3074 check_gl_error(function!(), line!())?;
3075 }
3076
3077 log::debug!("PBO readback takes {:?}", start_read.elapsed());
3078 Ok(())
3079 }
3080
3081 fn draw_src_texture_from_pbo(
3087 &mut self,
3088 src: &TensorImage,
3089 src_buffer_id: u32,
3090 dst: &TensorImage,
3091 rotation: crate::Rotation,
3092 flip: Flip,
3093 crop: Crop,
3094 ) -> Result<(), Error> {
3095 let texture_target = gls::gl::TEXTURE_2D;
3096 let texture_format = match src.fourcc() {
3097 crate::RGB | crate::RGB_INT8 => gls::gl::RGB,
3098 crate::RGBA => gls::gl::RGBA,
3099 crate::GREY => gls::gl::RED,
3100 _ => {
3101 return Err(Error::NotSupported(format!(
3102 "PBO upload not supported for {:?}",
3103 src.fourcc()
3104 )));
3105 }
3106 };
3107
3108 let has_crop = crop.dst_rect.is_some_and(|x| {
3109 x.left != 0 || x.top != 0 || x.width != dst.width() || x.height != dst.height()
3110 });
3111
3112 let src_roi = if let Some(crop) = crop.src_rect {
3114 RegionOfInterest {
3115 left: crop.left as f32 / src.width() as f32,
3116 top: (crop.top + crop.height) as f32 / src.height() as f32,
3117 right: (crop.left + crop.width) as f32 / src.width() as f32,
3118 bottom: crop.top as f32 / src.height() as f32,
3119 }
3120 } else {
3121 RegionOfInterest {
3122 left: 0.,
3123 top: 1.,
3124 right: 1.,
3125 bottom: 0.,
3126 }
3127 };
3128
3129 let cvt_screen_coord = |normalized| normalized * 2.0 - 1.0;
3130 let mut dst_roi = if let Some(crop) = crop.dst_rect {
3131 RegionOfInterest {
3132 left: cvt_screen_coord(crop.left as f32 / dst.width() as f32),
3133 top: cvt_screen_coord((crop.top + crop.height) as f32 / dst.height() as f32),
3134 right: cvt_screen_coord((crop.left + crop.width) as f32 / dst.width() as f32),
3135 bottom: cvt_screen_coord(crop.top as f32 / dst.height() as f32),
3136 }
3137 } else {
3138 RegionOfInterest {
3139 left: -1.,
3140 top: 1.,
3141 right: 1.,
3142 bottom: -1.,
3143 }
3144 };
3145
3146 let rotation_offset = match rotation {
3147 crate::Rotation::None => 0,
3148 crate::Rotation::Clockwise90 => 1,
3149 crate::Rotation::Rotate180 => 2,
3150 crate::Rotation::CounterClockwise90 => 3,
3151 };
3152
3153 unsafe {
3154 if has_crop {
3155 if let Some(dst_color) = crop.dst_color {
3156 gls::gl::ClearColor(
3157 dst_color[0] as f32 / 255.0,
3158 dst_color[1] as f32 / 255.0,
3159 dst_color[2] as f32 / 255.0,
3160 dst_color[3] as f32 / 255.0,
3161 );
3162 gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
3163 }
3164 }
3165
3166 gls::gl::UseProgram(self.texture_program.id);
3167 gls::gl::BindTexture(texture_target, self.camera_normal_texture.id);
3168 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
3169 gls::gl::TexParameteri(
3170 texture_target,
3171 gls::gl::TEXTURE_MIN_FILTER,
3172 gls::gl::LINEAR as i32,
3173 );
3174 gls::gl::TexParameteri(
3175 texture_target,
3176 gls::gl::TEXTURE_MAG_FILTER,
3177 gls::gl::LINEAR as i32,
3178 );
3179 if src.fourcc() == crate::GREY {
3180 for swizzle in [
3181 gls::gl::TEXTURE_SWIZZLE_R,
3182 gls::gl::TEXTURE_SWIZZLE_G,
3183 gls::gl::TEXTURE_SWIZZLE_B,
3184 ] {
3185 gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, gls::gl::RED as i32);
3186 }
3187 } else {
3188 for (swizzle, src_component) in [
3189 (gls::gl::TEXTURE_SWIZZLE_R, gls::gl::RED),
3190 (gls::gl::TEXTURE_SWIZZLE_G, gls::gl::GREEN),
3191 (gls::gl::TEXTURE_SWIZZLE_B, gls::gl::BLUE),
3192 ] {
3193 gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, src_component as i32);
3194 }
3195 }
3196
3197 gls::gl::BindBuffer(gls::gl::PIXEL_UNPACK_BUFFER, src_buffer_id);
3199 gls::gl::TexImage2D(
3200 texture_target,
3201 0,
3202 texture_format as i32,
3203 src.width() as i32,
3204 src.height() as i32,
3205 0,
3206 texture_format,
3207 gls::gl::UNSIGNED_BYTE,
3208 std::ptr::null(), );
3210 gls::gl::BindBuffer(gls::gl::PIXEL_UNPACK_BUFFER, 0);
3211
3212 self.camera_normal_texture.width = 0;
3214
3215 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
3216 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
3217
3218 match flip {
3219 crate::Flip::None => {}
3220 crate::Flip::Vertical => {
3221 std::mem::swap(&mut dst_roi.top, &mut dst_roi.bottom);
3222 }
3223 crate::Flip::Horizontal => {
3224 std::mem::swap(&mut dst_roi.left, &mut dst_roi.right);
3225 }
3226 }
3227
3228 let camera_vertices: [f32; 12] = [
3229 dst_roi.left,
3230 dst_roi.top,
3231 0., dst_roi.right,
3233 dst_roi.top,
3234 0., dst_roi.right,
3236 dst_roi.bottom,
3237 0., dst_roi.left,
3239 dst_roi.bottom,
3240 0., ];
3242 gls::gl::BufferData(
3243 gls::gl::ARRAY_BUFFER,
3244 (camera_vertices.len() * std::mem::size_of::<f32>()) as isize,
3245 camera_vertices.as_ptr() as *const c_void,
3246 gls::gl::STATIC_DRAW,
3247 );
3248 gls::gl::VertexAttribPointer(
3249 self.vertex_buffer.buffer_index,
3250 3,
3251 gls::gl::FLOAT,
3252 gls::gl::FALSE,
3253 0,
3254 std::ptr::null(),
3255 );
3256
3257 let texture_coords: [[f32; 8]; 4] = [
3258 [
3259 src_roi.left,
3260 src_roi.top,
3261 src_roi.right,
3262 src_roi.top,
3263 src_roi.right,
3264 src_roi.bottom,
3265 src_roi.left,
3266 src_roi.bottom,
3267 ],
3268 [
3269 src_roi.left,
3270 src_roi.bottom,
3271 src_roi.left,
3272 src_roi.top,
3273 src_roi.right,
3274 src_roi.top,
3275 src_roi.right,
3276 src_roi.bottom,
3277 ],
3278 [
3279 src_roi.right,
3280 src_roi.bottom,
3281 src_roi.left,
3282 src_roi.bottom,
3283 src_roi.left,
3284 src_roi.top,
3285 src_roi.right,
3286 src_roi.top,
3287 ],
3288 [
3289 src_roi.right,
3290 src_roi.top,
3291 src_roi.right,
3292 src_roi.bottom,
3293 src_roi.left,
3294 src_roi.bottom,
3295 src_roi.left,
3296 src_roi.top,
3297 ],
3298 ];
3299 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
3300 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
3301 gls::gl::BufferData(
3302 gls::gl::ARRAY_BUFFER,
3303 (texture_coords[0].len() * std::mem::size_of::<f32>()) as isize,
3304 texture_coords[rotation_offset].as_ptr() as *const c_void,
3305 gls::gl::STATIC_DRAW,
3306 );
3307 gls::gl::VertexAttribPointer(
3308 self.texture_buffer.buffer_index,
3309 2,
3310 gls::gl::FLOAT,
3311 gls::gl::FALSE,
3312 0,
3313 std::ptr::null(),
3314 );
3315 gls::gl::DrawArrays(gls::gl::TRIANGLE_FAN, 0, 4);
3316 gls::gl::DisableVertexAttribArray(self.vertex_buffer.buffer_index);
3317 gls::gl::DisableVertexAttribArray(self.texture_buffer.buffer_index);
3318
3319 gls::gl::Finish();
3320 }
3321
3322 check_gl_error(function!(), line!())?;
3323 Ok(())
3324 }
3325
3326 fn convert_any_to_pbo(
3330 &mut self,
3331 dst: &mut TensorImage,
3332 src: &TensorImage,
3333 rotation: crate::Rotation,
3334 flip: Flip,
3335 crop: Crop,
3336 ) -> crate::Result<()> {
3337 let dst_buffer_id = match &dst.tensor {
3338 edgefirst_tensor::Tensor::Pbo(p) => {
3339 if p.is_mapped() {
3340 return Err(crate::Error::OpenGl(
3341 "Cannot convert to a mapped PBO tensor".to_string(),
3342 ));
3343 }
3344 p.buffer_id()
3345 }
3346 _ => {
3347 return Err(crate::Error::OpenGl(
3348 "convert_any_to_pbo: dst is not a PBO tensor".to_string(),
3349 ))
3350 }
3351 };
3352
3353 self.setup_renderbuffer_non_dma(dst, crop)?;
3354 let start = Instant::now();
3355 if dst.is_planar() {
3356 self.convert_to_planar(src, dst, rotation, flip, crop)?;
3357 } else {
3358 self.convert_to(src, dst, rotation, flip, crop)?;
3359 }
3360 log::debug!("any-to-PBO render takes {:?}", start.elapsed());
3361
3362 let start_read = Instant::now();
3364 let dest_format = match dst.fourcc() {
3365 crate::RGB | crate::RGB_INT8 => gls::gl::RGB,
3366 crate::RGBA => gls::gl::RGBA,
3367 crate::BGRA => 0x80E1, crate::GREY => gls::gl::RED,
3369 _ => {
3370 return Err(crate::Error::NotSupported(format!(
3371 "PBO readback not supported for {}",
3372 dst.fourcc().display()
3373 )))
3374 }
3375 };
3376 unsafe {
3377 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, dst_buffer_id);
3378 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
3379 gls::gl::ReadnPixels(
3380 0,
3381 0,
3382 dst.width() as i32,
3383 dst.height() as i32,
3384 dest_format,
3385 gls::gl::UNSIGNED_BYTE,
3386 dst.tensor.len() as i32,
3387 std::ptr::null_mut(),
3388 );
3389 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
3390 gls::gl::Finish();
3391 }
3392 check_gl_error(function!(), line!())?;
3393
3394 if fourcc_is_int8(dst.fourcc()) {
3395 unsafe {
3396 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, dst_buffer_id);
3397 let ptr = gls::gl::MapBufferRange(
3398 gls::gl::PIXEL_PACK_BUFFER,
3399 0,
3400 dst.tensor.len() as isize,
3401 gls::gl::MAP_READ_BIT | gls::gl::MAP_WRITE_BIT,
3402 );
3403 if !ptr.is_null() {
3404 let slice = std::slice::from_raw_parts_mut(ptr as *mut u8, dst.tensor.len());
3405 for byte in slice.iter_mut() {
3406 *byte ^= 0x80;
3407 }
3408 gls::gl::UnmapBuffer(gls::gl::PIXEL_PACK_BUFFER);
3409 }
3410 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
3411 }
3412 check_gl_error(function!(), line!())?;
3413 }
3414
3415 log::debug!("any-to-PBO readback takes {:?}", start_read.elapsed());
3416 Ok(())
3417 }
3418
3419 fn convert_pbo_to_mem(
3423 &mut self,
3424 dst: &mut TensorImage,
3425 src: &TensorImage,
3426 rotation: crate::Rotation,
3427 flip: Flip,
3428 crop: Crop,
3429 ) -> crate::Result<()> {
3430 let src_buffer_id = match &src.tensor {
3431 edgefirst_tensor::Tensor::Pbo(p) => {
3432 if p.is_mapped() {
3433 return Err(crate::Error::OpenGl(
3434 "Cannot convert from a mapped PBO tensor".to_string(),
3435 ));
3436 }
3437 p.buffer_id()
3438 }
3439 _ => {
3440 return Err(crate::Error::OpenGl(
3441 "convert_pbo_to_mem: src is not a PBO tensor".to_string(),
3442 ))
3443 }
3444 };
3445
3446 self.setup_renderbuffer_non_dma(dst, crop)?;
3447 let start = Instant::now();
3448 self.draw_src_texture_from_pbo(src, src_buffer_id, dst, rotation, flip, crop)?;
3449 log::debug!("PBO-to-mem render takes {:?}", start.elapsed());
3450
3451 let start = Instant::now();
3453 let dest_format = match dst.fourcc() {
3454 crate::RGB | crate::RGB_INT8 => gls::gl::RGB,
3455 crate::RGBA => gls::gl::RGBA,
3456 crate::BGRA => 0x80E1, crate::GREY => gls::gl::RED,
3458 _ => {
3459 return Err(crate::Error::NotSupported(format!(
3460 "PBO readback not supported for {}",
3461 dst.fourcc().display()
3462 )))
3463 }
3464 };
3465 unsafe {
3466 let mut dst_map = dst.tensor().map()?;
3467 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
3468 gls::gl::ReadnPixels(
3469 0,
3470 0,
3471 dst.width() as i32,
3472 dst.height() as i32,
3473 dest_format,
3474 gls::gl::UNSIGNED_BYTE,
3475 dst.tensor.len() as i32,
3476 dst_map.as_mut_ptr() as *mut c_void,
3477 );
3478 if fourcc_is_int8(dst.fourcc()) {
3479 for byte in dst_map.iter_mut() {
3480 *byte ^= 0x80;
3481 }
3482 }
3483 }
3484 log::debug!("PBO-to-mem readback takes {:?}", start.elapsed());
3485 Ok(())
3486 }
3487
3488 fn convert_to(
3489 &mut self,
3490 src: &TensorImage,
3491 dst: &TensorImage,
3492 rotation: crate::Rotation,
3493 flip: Flip,
3494 crop: Crop,
3495 ) -> Result<(), crate::Error> {
3496 check_gl_error(function!(), line!())?;
3497
3498 let has_crop = crop.dst_rect.is_some_and(|x| {
3499 x.left != 0 || x.top != 0 || x.width != dst.width() || x.height != dst.height()
3500 });
3501 if has_crop {
3502 if let Some(dst_color) = crop.dst_color {
3503 unsafe {
3504 gls::gl::ClearColor(
3505 dst_color[0] as f32 / 255.0,
3506 dst_color[1] as f32 / 255.0,
3507 dst_color[2] as f32 / 255.0,
3508 dst_color[3] as f32 / 255.0,
3509 );
3510 gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
3511 };
3512 }
3513 }
3514
3515 let src_roi = if let Some(crop) = crop.src_rect {
3517 RegionOfInterest {
3518 left: crop.left as f32 / src.width() as f32,
3519 top: (crop.top + crop.height) as f32 / src.height() as f32,
3520 right: (crop.left + crop.width) as f32 / src.width() as f32,
3521 bottom: crop.top as f32 / src.height() as f32,
3522 }
3523 } else {
3524 RegionOfInterest {
3525 left: 0.,
3526 top: 1.,
3527 right: 1.,
3528 bottom: 0.,
3529 }
3530 };
3531
3532 let cvt_screen_coord = |normalized| normalized * 2.0 - 1.0;
3534 let dst_roi = if let Some(crop) = crop.dst_rect {
3535 RegionOfInterest {
3536 left: cvt_screen_coord(crop.left as f32 / dst.width() as f32),
3537 top: cvt_screen_coord((crop.top + crop.height) as f32 / dst.height() as f32),
3538 right: cvt_screen_coord((crop.left + crop.width) as f32 / dst.width() as f32),
3539 bottom: cvt_screen_coord(crop.top as f32 / dst.height() as f32),
3540 }
3541 } else {
3542 RegionOfInterest {
3543 left: -1.,
3544 top: 1.,
3545 right: 1.,
3546 bottom: -1.,
3547 }
3548 };
3549 let rotation_offset = match rotation {
3550 crate::Rotation::None => 0,
3551 crate::Rotation::Clockwise90 => 1,
3552 crate::Rotation::Rotate180 => 2,
3553 crate::Rotation::CounterClockwise90 => 3,
3554 };
3555 if self.gl_context.transfer_backend.is_dma() && src.tensor().memory() == TensorMemory::Dma {
3556 match self.get_or_create_egl_image(CacheKind::Src, src) {
3557 Ok(src_egl) => self.draw_camera_texture_eglimage(
3558 src,
3559 src_egl,
3560 src_roi,
3561 dst_roi,
3562 rotation_offset,
3563 flip,
3564 )?,
3565 Err(e) => {
3566 log::warn!("EGL image creation failed for {:?}: {:?}", src.fourcc(), e);
3567 let start = Instant::now();
3568 self.draw_src_texture(src, src_roi, dst_roi, rotation_offset, flip)?;
3569 log::debug!("draw_src_texture takes {:?}", start.elapsed());
3570 }
3571 }
3572 } else {
3573 let start = Instant::now();
3574 self.draw_src_texture(src, src_roi, dst_roi, rotation_offset, flip)?;
3575 log::debug!("draw_src_texture takes {:?}", start.elapsed());
3576 }
3577
3578 let start = Instant::now();
3579 unsafe { gls::gl::Finish() };
3580 log::debug!("gl_Finish takes {:?}", start.elapsed());
3581 check_gl_error(function!(), line!())?;
3582 Ok(())
3583 }
3584
3585 fn convert_to_planar(
3586 &mut self,
3587 src: &TensorImage,
3588 dst: &TensorImage,
3589 rotation: crate::Rotation,
3590 flip: Flip,
3591 crop: Crop,
3592 ) -> Result<(), crate::Error> {
3593 let alpha = match dst.fourcc() {
3616 PLANAR_RGB | PLANAR_RGB_INT8 => false,
3617 PLANAR_RGBA => true,
3618 _ => {
3619 return Err(crate::Error::NotSupported(
3620 "Destination format must be PLANAR_RGB, PLANAR_RGB_INT8, or PLANAR_RGBA"
3621 .to_string(),
3622 ));
3623 }
3624 };
3625 let is_int8 = fourcc_is_int8(dst.fourcc());
3626
3627 let src_roi = if let Some(crop) = crop.src_rect {
3629 RegionOfInterest {
3630 left: crop.left as f32 / src.width() as f32,
3631 top: (crop.top + crop.height) as f32 / src.height() as f32,
3632 right: (crop.left + crop.width) as f32 / src.width() as f32,
3633 bottom: crop.top as f32 / src.height() as f32,
3634 }
3635 } else {
3636 RegionOfInterest {
3637 left: 0.,
3638 top: 1.,
3639 right: 1.,
3640 bottom: 0.,
3641 }
3642 };
3643
3644 let cvt_screen_coord = |normalized| normalized * 2.0 - 1.0;
3646 let dst_roi = if let Some(crop) = crop.dst_rect {
3647 RegionOfInterest {
3648 left: cvt_screen_coord(crop.left as f32 / dst.width() as f32),
3649 top: cvt_screen_coord((crop.top + crop.height) as f32 / dst.height() as f32),
3650 right: cvt_screen_coord((crop.left + crop.width) as f32 / dst.width() as f32),
3651 bottom: cvt_screen_coord(crop.top as f32 / dst.height() as f32),
3652 }
3653 } else {
3654 RegionOfInterest {
3655 left: -1.,
3656 top: 1.,
3657 right: 1.,
3658 bottom: -1.,
3659 }
3660 };
3661 let rotation_offset = match rotation {
3662 crate::Rotation::None => 0,
3663 crate::Rotation::Clockwise90 => 1,
3664 crate::Rotation::Rotate180 => 2,
3665 crate::Rotation::CounterClockwise90 => 3,
3666 };
3667
3668 let has_crop = crop.dst_rect.is_some_and(|x| {
3669 x.left != 0 || x.top != 0 || x.width != dst.width() || x.height != dst.height()
3670 });
3671 if has_crop {
3672 if let Some(dst_color) = crop.dst_color {
3673 self.clear_rect_planar(
3674 dst.width(),
3675 dst.height(),
3676 dst_roi,
3677 [
3678 dst_color[0] as f32 / 255.0,
3679 dst_color[1] as f32 / 255.0,
3680 dst_color[2] as f32 / 255.0,
3681 dst_color[3] as f32 / 255.0,
3682 ],
3683 alpha,
3684 )?;
3685 }
3686 }
3687
3688 let src_egl = self.get_or_create_egl_image(CacheKind::Src, src)?;
3689
3690 self.draw_camera_texture_to_rgb_planar(
3691 src_egl,
3692 src_roi,
3693 dst_roi,
3694 rotation_offset,
3695 flip,
3696 alpha,
3697 is_int8,
3698 )?;
3699 unsafe { gls::gl::Finish() };
3700
3701 Ok(())
3702 }
3703
3704 fn convert_to_packed_rgb(
3714 &mut self,
3715 src: &TensorImage,
3716 dst: &mut TensorImage,
3717 rotation: crate::Rotation,
3718 flip: Flip,
3719 crop: Crop,
3720 ) -> crate::Result<()> {
3721 let dst_w = dst.width();
3722 let dst_h = dst.height();
3723 let is_int8 = fourcc_is_int8(dst.fourcc());
3724
3725 if !(dst_w * 3).is_multiple_of(4) {
3727 return Err(crate::Error::NotSupported(format!(
3728 "Packed RGB requires width*3 divisible by 4, got width={dst_w}"
3729 )));
3730 }
3731
3732 let render_w = dst_w * 3 / 4;
3733 let render_h = dst_h;
3734
3735 log::debug!(
3736 "convert_to_packed_rgb: {dst_w}x{dst_h} -> {render_w}x{render_h} two-pass int8={is_int8}",
3737 );
3738
3739 self.ensure_packed_rgb_intermediate(dst_w, dst_h)?;
3741 self.packed_rgb_fbo.bind();
3742 unsafe {
3743 gls::gl::FramebufferTexture2D(
3744 gls::gl::FRAMEBUFFER,
3745 gls::gl::COLOR_ATTACHMENT0,
3746 gls::gl::TEXTURE_2D,
3747 self.packed_rgb_intermediate_tex.id,
3748 0,
3749 );
3750 check_gl_error(function!(), line!())?;
3751 gls::gl::Viewport(0, 0, dst_w as i32, dst_h as i32);
3752 }
3753 self.convert_to(src, dst, rotation, flip, crop)?;
3757
3758 self.convert_fbo.bind();
3760 let dest_egl =
3761 self.get_or_create_egl_image_rgb(dst, render_w, render_h, DrmFourcc::Abgr8888, 4)?;
3762 unsafe {
3763 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
3764 gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.render_texture.id);
3765 gls::gl::TexParameteri(
3766 gls::gl::TEXTURE_2D,
3767 gls::gl::TEXTURE_MIN_FILTER,
3768 gls::gl::NEAREST as i32,
3769 );
3770 gls::gl::TexParameteri(
3771 gls::gl::TEXTURE_2D,
3772 gls::gl::TEXTURE_MAG_FILTER,
3773 gls::gl::NEAREST as i32,
3774 );
3775 gls::gl::EGLImageTargetTexture2DOES(gls::gl::TEXTURE_2D, dest_egl.as_ptr());
3776 gls::gl::FramebufferTexture2D(
3777 gls::gl::FRAMEBUFFER,
3778 gls::gl::COLOR_ATTACHMENT0,
3779 gls::gl::TEXTURE_2D,
3780 self.render_texture.id,
3781 0,
3782 );
3783 check_gl_error(function!(), line!())?;
3784 gls::gl::Viewport(0, 0, render_w as i32, render_h as i32);
3785 }
3786
3787 let program = if is_int8 {
3789 &self.packed_rgba8_int8_program_2d
3790 } else {
3791 &self.packed_rgba8_program_2d
3792 };
3793 unsafe {
3794 gls::gl::UseProgram(program.id);
3795 gls::gl::ActiveTexture(gls::gl::TEXTURE1);
3796 gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.packed_rgb_intermediate_tex.id);
3797 gls::gl::TexParameteri(
3798 gls::gl::TEXTURE_2D,
3799 gls::gl::TEXTURE_MIN_FILTER,
3800 gls::gl::NEAREST as i32,
3801 );
3802 gls::gl::TexParameteri(
3803 gls::gl::TEXTURE_2D,
3804 gls::gl::TEXTURE_MAG_FILTER,
3805 gls::gl::NEAREST as i32,
3806 );
3807 }
3808
3809 unsafe {
3811 let loc_tex = gls::gl::GetUniformLocation(program.id, c"tex".as_ptr());
3812 gls::gl::Uniform1i(loc_tex, 1);
3813 }
3814
3815 self.draw_fullscreen_quad()?;
3817
3818 unsafe { gls::gl::Finish() };
3819 Ok(())
3820 }
3821
3822 fn convert_to_rgb_direct(
3825 &mut self,
3826 src: &TensorImage,
3827 dst: &mut TensorImage,
3828 rotation: crate::Rotation,
3829 flip: Flip,
3830 crop: Crop,
3831 ) -> crate::Result<()> {
3832 let is_int8 = fourcc_is_int8(dst.fourcc());
3833
3834 log::debug!(
3835 "convert_to_rgb_direct: {}x{} single-pass int8={is_int8}",
3836 dst.width(),
3837 dst.height(),
3838 );
3839
3840 let (rbo, width, height) = self.get_or_create_rgb_direct_rbo(dst)?;
3842
3843 self.convert_fbo.bind();
3845 unsafe {
3846 gls::gl::FramebufferRenderbuffer(
3847 gls::gl::FRAMEBUFFER,
3848 gls::gl::COLOR_ATTACHMENT0,
3849 gls::gl::RENDERBUFFER,
3850 rbo,
3851 );
3852 check_gl_error(function!(), line!())?;
3853
3854 let status = gls::gl::CheckFramebufferStatus(gls::gl::FRAMEBUFFER);
3855 if status != gls::gl::FRAMEBUFFER_COMPLETE {
3856 log::warn!("convert_to_rgb_direct: FBO incomplete (0x{status:x}), falling back");
3857 return self.convert_to_packed_rgb(src, dst, rotation, flip, crop);
3858 }
3859
3860 gls::gl::Viewport(0, 0, width, height);
3861 }
3862
3863 let crop = if is_int8 {
3865 std::mem::swap(&mut self.texture_program, &mut self.texture_int8_program);
3866 std::mem::swap(
3867 &mut self.texture_program_yuv,
3868 &mut self.texture_int8_program_yuv,
3869 );
3870 let mut crop = crop;
3873 if let Some(ref mut color) = crop.dst_color {
3874 color[0] ^= 0x80;
3875 color[1] ^= 0x80;
3876 color[2] ^= 0x80;
3877 }
3878 crop
3879 } else {
3880 crop
3881 };
3882
3883 let result = self.convert_to(src, dst, rotation, flip, crop);
3884
3885 if is_int8 {
3887 std::mem::swap(&mut self.texture_program, &mut self.texture_int8_program);
3888 std::mem::swap(
3889 &mut self.texture_program_yuv,
3890 &mut self.texture_int8_program_yuv,
3891 );
3892 }
3893
3894 result
3895 }
3896
3897 fn ensure_packed_rgb_intermediate(&mut self, width: usize, height: usize) -> crate::Result<()> {
3899 if self.packed_rgb_intermediate_size == (width, height) {
3900 return Ok(());
3901 }
3902 unsafe {
3903 gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.packed_rgb_intermediate_tex.id);
3904 gls::gl::TexParameteri(
3905 gls::gl::TEXTURE_2D,
3906 gls::gl::TEXTURE_MIN_FILTER,
3907 gls::gl::NEAREST as i32,
3908 );
3909 gls::gl::TexParameteri(
3910 gls::gl::TEXTURE_2D,
3911 gls::gl::TEXTURE_MAG_FILTER,
3912 gls::gl::NEAREST as i32,
3913 );
3914 gls::gl::TexImage2D(
3915 gls::gl::TEXTURE_2D,
3916 0,
3917 gls::gl::RGBA as i32,
3918 width as i32,
3919 height as i32,
3920 0,
3921 gls::gl::RGBA,
3922 gls::gl::UNSIGNED_BYTE,
3923 std::ptr::null(),
3924 );
3925 check_gl_error(function!(), line!())?;
3926 }
3927 self.packed_rgb_intermediate_size = (width, height);
3928 Ok(())
3929 }
3930
3931 fn draw_fullscreen_quad(&self) -> Result<(), crate::Error> {
3934 unsafe {
3935 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
3936 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
3937
3938 let vertices: [f32; 12] = [
3939 -1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, -1.0, 0.0, -1.0, -1.0, 0.0, ];
3944 gls::gl::BufferSubData(
3945 gls::gl::ARRAY_BUFFER,
3946 0,
3947 (size_of::<f32>() * vertices.len()) as isize,
3948 vertices.as_ptr() as *const c_void,
3949 );
3950
3951 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
3952 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
3953
3954 let tex_coords: [f32; 8] = [0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0];
3957 gls::gl::BufferSubData(
3958 gls::gl::ARRAY_BUFFER,
3959 0,
3960 (size_of::<f32>() * tex_coords.len()) as isize,
3961 tex_coords.as_ptr() as *const c_void,
3962 );
3963
3964 let indices: [u32; 4] = [0, 1, 2, 3];
3965 gls::gl::DrawElements(
3966 gls::gl::TRIANGLE_FAN,
3967 indices.len() as i32,
3968 gls::gl::UNSIGNED_INT,
3969 indices.as_ptr() as *const c_void,
3970 );
3971 }
3972 check_gl_error(function!(), line!())?;
3973 Ok(())
3974 }
3975
3976 fn clear_rect_planar(
3977 &self,
3978 width: usize,
3979 height: usize,
3980 dst_roi: RegionOfInterest,
3981 color: [f32; 4],
3982 alpha: bool,
3983 ) -> Result<(), Error> {
3984 if !alpha && color[0] == color[1] && color[1] == color[2] {
3985 unsafe {
3986 gls::gl::ClearColor(color[0], color[0], color[0], 1.0);
3987 gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
3988 };
3989 }
3990
3991 let split = if alpha { 4 } else { 3 };
3992
3993 unsafe {
3994 gls::gl::Enable(gls::gl::SCISSOR_TEST);
3995 let x = (((dst_roi.left + 1.0) / 2.0) * width as f32).round() as i32;
3996 let y = (((dst_roi.bottom + 1.0) / 2.0) * height as f32).round() as i32;
3997 let width = (((dst_roi.right - dst_roi.left) / 2.0) * width as f32).round() as i32;
3998 let height = (((dst_roi.top - dst_roi.bottom) / 2.0) * height as f32 / split as f32)
3999 .round() as i32;
4000 for (i, c) in color.iter().enumerate().take(split) {
4001 gls::gl::Scissor(x, y + i as i32 * height, width, height);
4002 gls::gl::ClearColor(*c, *c, *c, 1.0);
4003 gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
4004 }
4005 gls::gl::Disable(gls::gl::SCISSOR_TEST);
4006 }
4007 Ok(())
4008 }
4009
4010 #[allow(clippy::too_many_arguments)]
4011 fn draw_camera_texture_to_rgb_planar(
4012 &self,
4013 egl_img: egl::Image,
4014 src_roi: RegionOfInterest,
4015 mut dst_roi: RegionOfInterest,
4016 rotation_offset: usize,
4017 flip: Flip,
4018 alpha: bool,
4019 int8: bool,
4020 ) -> Result<(), Error> {
4021 let texture_target = gls::gl::TEXTURE_EXTERNAL_OES;
4022 match flip {
4023 Flip::None => {}
4024 Flip::Vertical => {
4025 std::mem::swap(&mut dst_roi.top, &mut dst_roi.bottom);
4026 }
4027 Flip::Horizontal => {
4028 std::mem::swap(&mut dst_roi.left, &mut dst_roi.right);
4029 }
4030 }
4031 unsafe {
4032 let program = if int8 {
4033 &self.texture_program_planar_int8
4034 } else {
4035 &self.texture_program_planar
4036 };
4037 gls::gl::UseProgram(program.id);
4038 gls::gl::BindTexture(texture_target, self.camera_eglimage_texture.id);
4039 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
4040 gls::gl::TexParameteri(
4041 texture_target,
4042 gls::gl::TEXTURE_MIN_FILTER,
4043 gls::gl::LINEAR as i32,
4044 );
4045 gls::gl::TexParameteri(
4046 texture_target,
4047 gls::gl::TEXTURE_MAG_FILTER,
4048 gls::gl::LINEAR as i32,
4049 );
4050 gls::gl::TexParameteri(
4051 texture_target,
4052 gls::gl::TEXTURE_WRAP_S,
4053 gls::gl::CLAMP_TO_EDGE as i32,
4054 );
4055
4056 gls::gl::TexParameteri(
4057 texture_target,
4058 gls::gl::TEXTURE_WRAP_T,
4059 gls::gl::CLAMP_TO_EDGE as i32,
4060 );
4061
4062 gls::egl_image_target_texture_2d_oes(texture_target, egl_img.as_ptr());
4063 check_gl_error(function!(), line!())?;
4064 let y_centers = if alpha {
4065 vec![-3.0 / 4.0, -1.0 / 4.0, 1.0 / 4.0, 3.0 / 4.0]
4066 } else {
4067 vec![-2.0 / 3.0, 0.0, 2.0 / 3.0]
4068 };
4069 let swizzles = [gls::gl::RED, gls::gl::GREEN, gls::gl::BLUE, gls::gl::ALPHA];
4070 for (i, y_center) in y_centers.iter().enumerate() {
4072 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
4073 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
4074 let camera_vertices: [f32; 12] = [
4075 dst_roi.left,
4076 dst_roi.top / 3.0 + y_center,
4077 0., dst_roi.right,
4079 dst_roi.top / 3.0 + y_center,
4080 0., dst_roi.right,
4082 dst_roi.bottom / 3.0 + y_center,
4083 0., dst_roi.left,
4085 dst_roi.bottom / 3.0 + y_center,
4086 0., ];
4088 gls::gl::BufferData(
4089 gls::gl::ARRAY_BUFFER,
4090 (size_of::<f32>() * camera_vertices.len()) as isize,
4091 camera_vertices.as_ptr() as *const c_void,
4092 gls::gl::DYNAMIC_DRAW,
4093 );
4094
4095 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
4096 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
4097 let texture_vertices: [f32; 16] = [
4098 src_roi.left,
4099 src_roi.top,
4100 src_roi.right,
4101 src_roi.top,
4102 src_roi.right,
4103 src_roi.bottom,
4104 src_roi.left,
4105 src_roi.bottom,
4106 src_roi.left,
4107 src_roi.top,
4108 src_roi.right,
4109 src_roi.top,
4110 src_roi.right,
4111 src_roi.bottom,
4112 src_roi.left,
4113 src_roi.bottom,
4114 ];
4115
4116 gls::gl::BufferData(
4117 gls::gl::ARRAY_BUFFER,
4118 (size_of::<f32>() * 8) as isize,
4119 (texture_vertices[(rotation_offset * 2)..]).as_ptr() as *const c_void,
4120 gls::gl::DYNAMIC_DRAW,
4121 );
4122 let vertices_index: [u32; 4] = [0, 1, 2, 3];
4123 gls::gl::TexParameteri(
4127 texture_target,
4128 gls::gl::TEXTURE_SWIZZLE_R,
4129 swizzles[i] as i32,
4130 );
4131
4132 gls::gl::DrawElements(
4133 gls::gl::TRIANGLE_FAN,
4134 vertices_index.len() as i32,
4135 gls::gl::UNSIGNED_INT,
4136 vertices_index.as_ptr() as *const c_void,
4137 );
4138 }
4139 check_gl_error(function!(), line!())?;
4140 }
4141 Ok(())
4142 }
4143
4144 fn draw_src_texture(
4145 &mut self,
4146 src: &TensorImage,
4147 src_roi: RegionOfInterest,
4148 mut dst_roi: RegionOfInterest,
4149 rotation_offset: usize,
4150 flip: Flip,
4151 ) -> Result<(), Error> {
4152 let texture_target = gls::gl::TEXTURE_2D;
4153 let texture_format = match src.fourcc() {
4154 RGB => gls::gl::RGB,
4155 RGBA => gls::gl::RGBA,
4156 GREY => gls::gl::RED,
4157 _ => {
4158 return Err(Error::NotSupported(format!(
4159 "draw_src_texture does not support {:?} (use DMA-BUF path for YUV)",
4160 src.fourcc()
4161 )));
4162 }
4163 };
4164 unsafe {
4165 gls::gl::UseProgram(self.texture_program.id);
4166 gls::gl::BindTexture(texture_target, self.camera_normal_texture.id);
4167 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
4168 gls::gl::TexParameteri(
4169 texture_target,
4170 gls::gl::TEXTURE_MIN_FILTER,
4171 gls::gl::LINEAR as i32,
4172 );
4173 gls::gl::TexParameteri(
4174 texture_target,
4175 gls::gl::TEXTURE_MAG_FILTER,
4176 gls::gl::LINEAR as i32,
4177 );
4178 if src.fourcc() == GREY {
4179 for swizzle in [
4180 gls::gl::TEXTURE_SWIZZLE_R,
4181 gls::gl::TEXTURE_SWIZZLE_G,
4182 gls::gl::TEXTURE_SWIZZLE_B,
4183 ] {
4184 gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, gls::gl::RED as i32);
4185 }
4186 } else {
4187 for (swizzle, src) in [
4188 (gls::gl::TEXTURE_SWIZZLE_R, gls::gl::RED),
4189 (gls::gl::TEXTURE_SWIZZLE_G, gls::gl::GREEN),
4190 (gls::gl::TEXTURE_SWIZZLE_B, gls::gl::BLUE),
4191 ] {
4192 gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, src as i32);
4193 }
4194 }
4195 self.camera_normal_texture.update_texture(
4196 texture_target,
4197 src.width(),
4198 src.height(),
4199 texture_format,
4200 &src.tensor().map()?,
4201 );
4202
4203 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
4204 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
4205
4206 match flip {
4207 Flip::None => {}
4208 Flip::Vertical => {
4209 std::mem::swap(&mut dst_roi.top, &mut dst_roi.bottom);
4210 }
4211 Flip::Horizontal => {
4212 std::mem::swap(&mut dst_roi.left, &mut dst_roi.right);
4213 }
4214 }
4215
4216 let camera_vertices: [f32; 12] = [
4217 dst_roi.left,
4218 dst_roi.top,
4219 0., dst_roi.right,
4221 dst_roi.top,
4222 0., dst_roi.right,
4224 dst_roi.bottom,
4225 0., dst_roi.left,
4227 dst_roi.bottom,
4228 0., ];
4230 gls::gl::BufferData(
4231 gls::gl::ARRAY_BUFFER,
4232 (size_of::<f32>() * camera_vertices.len()) as isize,
4233 camera_vertices.as_ptr() as *const c_void,
4234 gls::gl::DYNAMIC_DRAW,
4235 );
4236 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
4237 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
4238 let texture_vertices: [f32; 16] = [
4239 src_roi.left,
4240 src_roi.top,
4241 src_roi.right,
4242 src_roi.top,
4243 src_roi.right,
4244 src_roi.bottom,
4245 src_roi.left,
4246 src_roi.bottom,
4247 src_roi.left,
4248 src_roi.top,
4249 src_roi.right,
4250 src_roi.top,
4251 src_roi.right,
4252 src_roi.bottom,
4253 src_roi.left,
4254 src_roi.bottom,
4255 ];
4256
4257 gls::gl::BufferData(
4258 gls::gl::ARRAY_BUFFER,
4259 (size_of::<f32>() * 8) as isize,
4260 (texture_vertices[(rotation_offset * 2)..]).as_ptr() as *const c_void,
4261 gls::gl::DYNAMIC_DRAW,
4262 );
4263 let vertices_index: [u32; 4] = [0, 1, 2, 3];
4264 gls::gl::DrawElements(
4265 gls::gl::TRIANGLE_FAN,
4266 vertices_index.len() as i32,
4267 gls::gl::UNSIGNED_INT,
4268 vertices_index.as_ptr() as *const c_void,
4269 );
4270 check_gl_error(function!(), line!())?;
4271
4272 Ok(())
4273 }
4274 }
4275
4276 fn draw_camera_texture_eglimage(
4277 &self,
4278 src: &TensorImage,
4279 egl_img: egl::Image,
4280 src_roi: RegionOfInterest,
4281 mut dst_roi: RegionOfInterest,
4282 rotation_offset: usize,
4283 flip: Flip,
4284 ) -> Result<(), Error> {
4285 let texture_target = gls::gl::TEXTURE_EXTERNAL_OES;
4287 unsafe {
4288 gls::gl::UseProgram(self.texture_program_yuv.id);
4289 gls::gl::BindTexture(texture_target, self.camera_eglimage_texture.id);
4290 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
4291 gls::gl::TexParameteri(
4292 texture_target,
4293 gls::gl::TEXTURE_MIN_FILTER,
4294 gls::gl::LINEAR as i32,
4295 );
4296 gls::gl::TexParameteri(
4297 texture_target,
4298 gls::gl::TEXTURE_MAG_FILTER,
4299 gls::gl::LINEAR as i32,
4300 );
4301
4302 if src.fourcc() == GREY {
4303 for swizzle in [
4304 gls::gl::TEXTURE_SWIZZLE_R,
4305 gls::gl::TEXTURE_SWIZZLE_G,
4306 gls::gl::TEXTURE_SWIZZLE_B,
4307 ] {
4308 gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, gls::gl::RED as i32);
4309 }
4310 } else {
4311 for (swizzle, src) in [
4312 (gls::gl::TEXTURE_SWIZZLE_R, gls::gl::RED),
4313 (gls::gl::TEXTURE_SWIZZLE_G, gls::gl::GREEN),
4314 (gls::gl::TEXTURE_SWIZZLE_B, gls::gl::BLUE),
4315 ] {
4316 gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, src as i32);
4317 }
4318 }
4319
4320 gls::egl_image_target_texture_2d_oes(texture_target, egl_img.as_ptr());
4321 check_gl_error(function!(), line!())?;
4322 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
4323 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
4324
4325 match flip {
4326 Flip::None => {}
4327 Flip::Vertical => {
4328 std::mem::swap(&mut dst_roi.top, &mut dst_roi.bottom);
4329 }
4330 Flip::Horizontal => {
4331 std::mem::swap(&mut dst_roi.left, &mut dst_roi.right);
4332 }
4333 }
4334
4335 let camera_vertices: [f32; 12] = [
4336 dst_roi.left,
4337 dst_roi.top,
4338 0., dst_roi.right,
4340 dst_roi.top,
4341 0., dst_roi.right,
4343 dst_roi.bottom,
4344 0., dst_roi.left,
4346 dst_roi.bottom,
4347 0., ];
4349 gls::gl::BufferSubData(
4350 gls::gl::ARRAY_BUFFER,
4351 0,
4352 (size_of::<f32>() * camera_vertices.len()) as isize,
4353 camera_vertices.as_ptr() as *const c_void,
4354 );
4355
4356 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
4357 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
4358
4359 let texture_vertices: [f32; 16] = [
4360 src_roi.left,
4361 src_roi.top,
4362 src_roi.right,
4363 src_roi.top,
4364 src_roi.right,
4365 src_roi.bottom,
4366 src_roi.left,
4367 src_roi.bottom,
4368 src_roi.left,
4369 src_roi.top,
4370 src_roi.right,
4371 src_roi.top,
4372 src_roi.right,
4373 src_roi.bottom,
4374 src_roi.left,
4375 src_roi.bottom,
4376 ];
4377 gls::gl::BufferSubData(
4378 gls::gl::ARRAY_BUFFER,
4379 0,
4380 (size_of::<f32>() * 8) as isize,
4381 (texture_vertices[(rotation_offset * 2)..]).as_ptr() as *const c_void,
4382 );
4383
4384 let vertices_index: [u32; 4] = [0, 1, 2, 3];
4385 gls::gl::DrawElements(
4386 gls::gl::TRIANGLE_FAN,
4387 vertices_index.len() as i32,
4388 gls::gl::UNSIGNED_INT,
4389 vertices_index.as_ptr() as *const c_void,
4390 );
4391 }
4392 check_gl_error(function!(), line!())?;
4393 Ok(())
4394 }
4395
4396 fn create_image_from_dma2(&self, src: &TensorImage) -> Result<EglImage, crate::Error> {
4397 let width;
4398 let height;
4399 let format;
4400 let channels;
4401
4402 if src.fourcc() == NV12 {
4404 if !src.width().is_multiple_of(4) {
4405 return Err(Error::NotSupported(
4406 "OpenGL EGLImage doesn't support image widths which are not multiples of 4"
4407 .to_string(),
4408 ));
4409 }
4410 width = src.width();
4411 height = src.height();
4412 format = fourcc_to_drm(NV12)?;
4413 channels = 1; } else if src.is_planar() {
4415 if !src.width().is_multiple_of(16) {
4416 return Err(Error::NotSupported(
4417 "OpenGL Planar RGB EGLImage doesn't support image widths which are not multiples of 16"
4418 .to_string(),
4419 ));
4420 }
4421 match src.fourcc() {
4422 PLANAR_RGB | PLANAR_RGB_INT8 => {
4423 format = DrmFourcc::R8;
4424 width = src.width();
4425 height = src.height() * 3;
4426 channels = 1;
4427 }
4428 fourcc => {
4429 return Err(crate::Error::NotSupported(format!(
4430 "Unsupported Planar FourCC {fourcc:?}"
4431 )));
4432 }
4433 };
4434 } else {
4435 if !src.width().is_multiple_of(4) {
4436 return Err(Error::NotSupported(
4437 "OpenGL EGLImage doesn't support image widths which are not multiples of 4"
4438 .to_string(),
4439 ));
4440 }
4441 width = src.width();
4442 height = src.height();
4443 format = fourcc_to_drm(src.fourcc())?;
4444 channels = src.channels();
4445 }
4446
4447 let fd = match &src.tensor {
4448 edgefirst_tensor::Tensor::Dma(dma_tensor) => dma_tensor.fd.as_raw_fd(),
4449 edgefirst_tensor::Tensor::Shm(_) => {
4450 return Err(Error::NotImplemented(
4451 "OpenGL EGLImage doesn't support SHM".to_string(),
4452 ));
4453 }
4454 edgefirst_tensor::Tensor::Mem(_) => {
4455 return Err(Error::NotImplemented(
4456 "OpenGL EGLImage doesn't support MEM".to_string(),
4457 ));
4458 }
4459 edgefirst_tensor::Tensor::Pbo(_) => {
4460 return Err(Error::NotImplemented(
4461 "OpenGL EGLImage doesn't support PBO".to_string(),
4462 ));
4463 }
4464 };
4465
4466 let plane0_pitch = if src.fourcc() == NV12 {
4469 width
4470 } else {
4471 width * channels
4472 };
4473
4474 let mut egl_img_attr = vec![
4475 egl_ext::LINUX_DRM_FOURCC as Attrib,
4476 format as Attrib,
4477 khronos_egl::WIDTH as Attrib,
4478 width as Attrib,
4479 khronos_egl::HEIGHT as Attrib,
4480 height as Attrib,
4481 egl_ext::DMA_BUF_PLANE0_PITCH as Attrib,
4482 plane0_pitch as Attrib,
4483 egl_ext::DMA_BUF_PLANE0_OFFSET as Attrib,
4484 0 as Attrib,
4485 egl_ext::DMA_BUF_PLANE0_FD as Attrib,
4486 fd as Attrib,
4487 egl::IMAGE_PRESERVED as Attrib,
4488 egl::TRUE as Attrib,
4489 ];
4490
4491 if src.fourcc() == NV12 {
4493 let uv_offset = width * height; egl_img_attr.append(&mut vec![
4495 egl_ext::DMA_BUF_PLANE1_FD as Attrib,
4496 fd as Attrib,
4497 egl_ext::DMA_BUF_PLANE1_OFFSET as Attrib,
4498 uv_offset as Attrib,
4499 egl_ext::DMA_BUF_PLANE1_PITCH as Attrib,
4500 width as Attrib, ]);
4502 }
4503
4504 if matches!(src.fourcc(), YUYV | VYUY | NV12) {
4505 egl_img_attr.append(&mut vec![
4506 egl_ext::YUV_COLOR_SPACE_HINT as Attrib,
4507 egl_ext::ITU_REC709 as Attrib,
4508 egl_ext::SAMPLE_RANGE_HINT as Attrib,
4509 egl_ext::YUV_NARROW_RANGE as Attrib,
4510 ]);
4511 }
4512
4513 egl_img_attr.push(khronos_egl::NONE as Attrib);
4514
4515 match self.new_egl_image_owned(egl_ext::LINUX_DMA_BUF, &egl_img_attr) {
4516 Ok(v) => Ok(v),
4517 Err(e) => Err(e),
4518 }
4519 }
4520
4521 fn new_egl_image_owned(
4522 &'_ self,
4523 target: egl::Enum,
4524 attrib_list: &[Attrib],
4525 ) -> Result<EglImage, Error> {
4526 let image = GlContext::egl_create_image_with_fallback(
4527 &self.gl_context.egl,
4528 self.gl_context.display.as_display(),
4529 unsafe { egl::Context::from_ptr(egl::NO_CONTEXT) },
4530 target,
4531 unsafe { egl::ClientBuffer::from_ptr(null_mut()) },
4532 attrib_list,
4533 )?;
4534 Ok(EglImage {
4535 egl_image: image,
4536 display: self.gl_context.display.as_display(),
4537 egl: Rc::clone(&self.gl_context.egl),
4538 })
4539 }
4540
4541 fn get_or_create_egl_image(
4547 &mut self,
4548 cache: CacheKind,
4549 img: &TensorImage,
4550 ) -> Result<egl::Image, crate::Error> {
4551 let id = img.buffer_identity().id();
4552
4553 match cache {
4555 CacheKind::Src => self.src_egl_cache.sweep(),
4556 CacheKind::Dst => self.dst_egl_cache.sweep(),
4557 }
4558
4559 {
4560 let egl_cache = match cache {
4561 CacheKind::Src => &mut self.src_egl_cache,
4562 CacheKind::Dst => &mut self.dst_egl_cache,
4563 };
4564 let ts = egl_cache.next_timestamp();
4565 if let Some(cached) = egl_cache.entries.get_mut(&id) {
4566 egl_cache.hits += 1;
4567 cached.last_used = ts;
4568 log::trace!("EglImageCache {:?} hit: id={id:#x}", cache);
4569 return Ok(cached.egl_image.egl_image);
4570 }
4571 egl_cache.misses += 1;
4572 log::trace!("EglImageCache {:?} miss: id={id:#x}", cache);
4573 if egl_cache.entries.len() >= egl_cache.capacity {
4575 egl_cache.evict_lru();
4576 }
4577 }
4578
4579 let egl_image_obj = self.create_image_from_dma2(img)?;
4580 let handle = egl_image_obj.egl_image;
4581 let guard = img.buffer_identity().weak();
4582 let egl_cache = match cache {
4583 CacheKind::Src => &mut self.src_egl_cache,
4584 CacheKind::Dst => &mut self.dst_egl_cache,
4585 };
4586 let ts = egl_cache.next_timestamp();
4587 egl_cache.entries.insert(
4588 id,
4589 CachedEglImage {
4590 egl_image: egl_image_obj,
4591 guard,
4592 renderbuffer: None,
4593 last_used: ts,
4594 },
4595 );
4596 Ok(handle)
4597 }
4598
4599 fn create_egl_image_with_dims(
4603 &self,
4604 img: &TensorImage,
4605 width: usize,
4606 height: usize,
4607 drm_format: DrmFourcc,
4608 bpp: usize,
4609 ) -> Result<EglImage, crate::Error> {
4610 let fd = match &img.tensor {
4611 edgefirst_tensor::Tensor::Dma(dma_tensor) => dma_tensor.fd.as_raw_fd(),
4612 _ => {
4613 return Err(Error::NotImplemented(
4614 "create_egl_image_with_dims requires DMA tensor".to_string(),
4615 ));
4616 }
4617 };
4618
4619 let pitch = width * bpp;
4620 let egl_img_attr = vec![
4621 egl_ext::LINUX_DRM_FOURCC as Attrib,
4622 drm_format as u32 as Attrib,
4623 khronos_egl::WIDTH as Attrib,
4624 width as Attrib,
4625 khronos_egl::HEIGHT as Attrib,
4626 height as Attrib,
4627 egl_ext::DMA_BUF_PLANE0_PITCH as Attrib,
4628 pitch as Attrib,
4629 egl_ext::DMA_BUF_PLANE0_OFFSET as Attrib,
4630 0 as Attrib,
4631 egl_ext::DMA_BUF_PLANE0_FD as Attrib,
4632 fd as Attrib,
4633 egl::IMAGE_PRESERVED as Attrib,
4634 egl::TRUE as Attrib,
4635 khronos_egl::NONE as Attrib,
4636 ];
4637
4638 self.new_egl_image_owned(egl_ext::LINUX_DMA_BUF, &egl_img_attr)
4639 }
4640
4641 fn get_or_create_egl_image_rgb(
4644 &mut self,
4645 img: &TensorImage,
4646 width: usize,
4647 height: usize,
4648 drm_format: DrmFourcc,
4649 bpp: usize,
4650 ) -> Result<egl::Image, crate::Error> {
4651 let id = img.buffer_identity().id();
4652 self.dst_egl_cache.sweep();
4653
4654 let ts = self.dst_egl_cache.next_timestamp();
4655 if let Some(cached) = self.dst_egl_cache.entries.get_mut(&id) {
4656 self.dst_egl_cache.hits += 1;
4657 cached.last_used = ts;
4658 log::trace!("EglImageCache dst (RGB) hit: id={id:#x}");
4659 return Ok(cached.egl_image.egl_image);
4660 }
4661 self.dst_egl_cache.misses += 1;
4662 log::trace!("EglImageCache dst (RGB) miss: id={id:#x}");
4663
4664 if self.dst_egl_cache.entries.len() >= self.dst_egl_cache.capacity {
4665 self.dst_egl_cache.evict_lru();
4666 }
4667
4668 let egl_image_obj = self.create_egl_image_with_dims(img, width, height, drm_format, bpp)?;
4669 let handle = egl_image_obj.egl_image;
4670 let guard = img.buffer_identity().weak();
4671 let ts = self.dst_egl_cache.next_timestamp();
4672 self.dst_egl_cache.entries.insert(
4673 id,
4674 CachedEglImage {
4675 egl_image: egl_image_obj,
4676 guard,
4677 renderbuffer: None,
4678 last_used: ts,
4679 },
4680 );
4681 Ok(handle)
4682 }
4683
4684 fn get_or_create_rgb_direct_rbo(
4688 &mut self,
4689 dst: &TensorImage,
4690 ) -> crate::Result<(u32, i32, i32)> {
4691 let id = dst.buffer_identity().id();
4692 let width = dst.width() as i32;
4693 let height = dst.height() as i32;
4694
4695 self.dst_egl_cache.sweep();
4696
4697 let ts = self.dst_egl_cache.next_timestamp();
4699 if let Some(cached) = self.dst_egl_cache.entries.get_mut(&id) {
4700 if let Some(rbo) = cached.renderbuffer {
4701 self.dst_egl_cache.hits += 1;
4702 cached.last_used = ts;
4703 log::trace!("EglImageCache dst (rgb_direct) hit: id={id:#x}");
4704 return Ok((rbo, width, height));
4705 }
4706 }
4707 self.dst_egl_cache.misses += 1;
4708 log::trace!("EglImageCache dst (rgb_direct) miss: id={id:#x}");
4709
4710 if self.dst_egl_cache.entries.len() >= self.dst_egl_cache.capacity {
4712 self.dst_egl_cache.evict_lru();
4713 }
4714
4715 let egl_image_obj =
4717 self.create_egl_image_with_dims(dst, dst.width(), dst.height(), DrmFourcc::Bgr888, 3)?;
4718
4719 let rbo = unsafe {
4721 let mut rbo = 0u32;
4722 gls::gl::GenRenderbuffers(1, &mut rbo);
4723 gls::gl::BindRenderbuffer(gls::gl::RENDERBUFFER, rbo);
4724 gls::gl::EGLImageTargetRenderbufferStorageOES(
4725 gls::gl::RENDERBUFFER,
4726 egl_image_obj.egl_image.as_ptr(),
4727 );
4728 if let Err(e) = check_gl_error(function!(), line!()) {
4729 gls::gl::DeleteRenderbuffers(1, &rbo);
4730 return Err(e);
4731 }
4732 rbo
4733 };
4734
4735 let guard = dst.buffer_identity().weak();
4737 let ts = self.dst_egl_cache.next_timestamp();
4738 self.dst_egl_cache.entries.insert(
4739 id,
4740 CachedEglImage {
4741 egl_image: egl_image_obj,
4742 guard,
4743 renderbuffer: Some(rbo),
4744 last_used: ts,
4745 },
4746 );
4747
4748 Ok((rbo, width, height))
4749 }
4750
4751 fn reshape_segmentation_to_rgba(&self, segmentation: &[u8], shape: [usize; 3]) -> Vec<u8> {
4753 let [height, width, classes] = shape;
4754
4755 let n_layer_stride = height * width * 4;
4756 let n_row_stride = width * 4;
4757 let n_col_stride = 4;
4758 let row_stride = width * classes;
4759 let col_stride = classes;
4760
4761 let mut new_segmentation = vec![0u8; n_layer_stride * classes.div_ceil(4)];
4762
4763 for i in 0..height {
4764 for j in 0..width {
4765 for k in 0..classes.div_ceil(4) * 4 {
4766 if k >= classes {
4767 new_segmentation[n_layer_stride * (k / 4)
4768 + i * n_row_stride
4769 + j * n_col_stride
4770 + k % 4] = 0;
4771 } else {
4772 new_segmentation[n_layer_stride * (k / 4)
4773 + i * n_row_stride
4774 + j * n_col_stride
4775 + k % 4] = segmentation[i * row_stride + j * col_stride + k];
4776 }
4777 }
4778 }
4779 }
4780
4781 new_segmentation
4782 }
4783
4784 fn render_modelpack_segmentation(
4785 &mut self,
4786 dst_roi: RegionOfInterest,
4787 segmentation: &[u8],
4788 shape: [usize; 3],
4789 ) -> Result<(), crate::Error> {
4790 log::debug!("start render_segmentation_to_image");
4791
4792 let new_segmentation = self.reshape_segmentation_to_rgba(segmentation, shape);
4795
4796 let [height, width, classes] = shape;
4797
4798 let format = gls::gl::RGBA;
4799 let texture_target = gls::gl::TEXTURE_2D_ARRAY;
4800 self.segmentation_program
4801 .load_uniform_1i(c"background_index", shape[2] as i32 - 1)?;
4802
4803 gls::use_program(self.segmentation_program.id);
4804
4805 gls::bind_texture(texture_target, self.segmentation_texture.id);
4806 gls::active_texture(gls::gl::TEXTURE0);
4807 gls::tex_parameteri(
4808 texture_target,
4809 gls::gl::TEXTURE_MIN_FILTER,
4810 gls::gl::LINEAR as i32,
4811 );
4812 gls::tex_parameteri(
4813 texture_target,
4814 gls::gl::TEXTURE_MAG_FILTER,
4815 gls::gl::LINEAR as i32,
4816 );
4817 gls::tex_parameteri(
4818 texture_target,
4819 gls::gl::TEXTURE_WRAP_S,
4820 gls::gl::CLAMP_TO_EDGE as i32,
4821 );
4822
4823 gls::tex_parameteri(
4824 texture_target,
4825 gls::gl::TEXTURE_WRAP_T,
4826 gls::gl::CLAMP_TO_EDGE as i32,
4827 );
4828
4829 gls::tex_image3d(
4830 texture_target,
4831 0,
4832 format as i32,
4833 width as i32,
4834 height as i32,
4835 classes.div_ceil(4) as i32,
4836 0,
4837 format,
4838 gls::gl::UNSIGNED_BYTE,
4839 Some(&new_segmentation),
4840 );
4841
4842 let src_roi = RegionOfInterest {
4843 left: 0.,
4844 top: 1.,
4845 right: 1.,
4846 bottom: 0.,
4847 };
4848
4849 unsafe {
4850 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
4851 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
4852
4853 let camera_vertices: [f32; 12] = [
4854 dst_roi.left,
4855 dst_roi.top,
4856 0., dst_roi.right,
4858 dst_roi.top,
4859 0., dst_roi.right,
4861 dst_roi.bottom,
4862 0., dst_roi.left,
4864 dst_roi.bottom,
4865 0., ];
4867 gls::gl::BufferSubData(
4868 gls::gl::ARRAY_BUFFER,
4869 0,
4870 (size_of::<f32>() * camera_vertices.len()) as isize,
4871 camera_vertices.as_ptr() as *const c_void,
4872 );
4873
4874 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
4875 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
4876
4877 let texture_vertices: [f32; 8] = [
4878 src_roi.left,
4879 src_roi.top,
4880 src_roi.right,
4881 src_roi.top,
4882 src_roi.right,
4883 src_roi.bottom,
4884 src_roi.left,
4885 src_roi.bottom,
4886 ];
4887 gls::gl::BufferSubData(
4888 gls::gl::ARRAY_BUFFER,
4889 0,
4890 (size_of::<f32>() * 8) as isize,
4891 (texture_vertices[0..]).as_ptr() as *const c_void,
4892 );
4893
4894 let vertices_index: [u32; 4] = [0, 1, 2, 3];
4895 gls::gl::DrawElements(
4896 gls::gl::TRIANGLE_FAN,
4897 vertices_index.len() as i32,
4898 gls::gl::UNSIGNED_INT,
4899 vertices_index.as_ptr() as *const c_void,
4900 );
4901 }
4902
4903 Ok(())
4904 }
4905
4906 fn render_yolo_segmentation(
4907 &mut self,
4908 dst_roi: RegionOfInterest,
4909 segmentation: &[u8],
4910 shape: [usize; 2],
4911 class: usize,
4912 ) -> Result<(), crate::Error> {
4913 log::debug!("start render_yolo_segmentation");
4914
4915 let [height, width] = shape;
4916
4917 let format = gls::gl::RED;
4918 let texture_target = gls::gl::TEXTURE_2D;
4919 gls::use_program(self.instanced_segmentation_program.id);
4920 self.instanced_segmentation_program
4921 .load_uniform_1i(c"class_index", class as i32)?;
4922 gls::bind_texture(texture_target, self.segmentation_texture.id);
4923 gls::active_texture(gls::gl::TEXTURE0);
4924 gls::tex_parameteri(
4925 texture_target,
4926 gls::gl::TEXTURE_MIN_FILTER,
4927 gls::gl::LINEAR as i32,
4928 );
4929 gls::tex_parameteri(
4930 texture_target,
4931 gls::gl::TEXTURE_MAG_FILTER,
4932 gls::gl::LINEAR as i32,
4933 );
4934 gls::tex_parameteri(
4935 texture_target,
4936 gls::gl::TEXTURE_WRAP_S,
4937 gls::gl::CLAMP_TO_EDGE as i32,
4938 );
4939
4940 gls::tex_parameteri(
4941 texture_target,
4942 gls::gl::TEXTURE_WRAP_T,
4943 gls::gl::CLAMP_TO_EDGE as i32,
4944 );
4945
4946 gls::tex_image2d(
4947 texture_target,
4948 0,
4949 format as i32,
4950 width as i32,
4951 height as i32,
4952 0,
4953 format,
4954 gls::gl::UNSIGNED_BYTE,
4955 Some(segmentation),
4956 );
4957
4958 let src_roi = RegionOfInterest {
4959 left: 0.,
4960 top: 1.,
4961 right: 1.,
4962 bottom: 0.,
4963 };
4964
4965 unsafe {
4966 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
4967 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
4968
4969 let camera_vertices: [f32; 12] = [
4970 dst_roi.left,
4971 dst_roi.top,
4972 0., dst_roi.right,
4974 dst_roi.top,
4975 0., dst_roi.right,
4977 dst_roi.bottom,
4978 0., dst_roi.left,
4980 dst_roi.bottom,
4981 0., ];
4983 gls::gl::BufferSubData(
4984 gls::gl::ARRAY_BUFFER,
4985 0,
4986 (size_of::<f32>() * camera_vertices.len()) as isize,
4987 camera_vertices.as_ptr() as *const c_void,
4988 );
4989
4990 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
4991 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
4992
4993 let texture_vertices: [f32; 8] = [
4994 src_roi.left,
4995 src_roi.top,
4996 src_roi.right,
4997 src_roi.top,
4998 src_roi.right,
4999 src_roi.bottom,
5000 src_roi.left,
5001 src_roi.bottom,
5002 ];
5003 gls::gl::BufferSubData(
5004 gls::gl::ARRAY_BUFFER,
5005 0,
5006 (size_of::<f32>() * 8) as isize,
5007 (texture_vertices).as_ptr() as *const c_void,
5008 );
5009
5010 let vertices_index: [u32; 4] = [0, 1, 2, 3];
5011 gls::gl::DrawElements(
5012 gls::gl::TRIANGLE_FAN,
5013 vertices_index.len() as i32,
5014 gls::gl::UNSIGNED_INT,
5015 vertices_index.as_ptr() as *const c_void,
5016 );
5017 gls::gl::Finish();
5018 }
5019
5020 Ok(())
5021 }
5022
5023 fn repack_protos_to_rgba_f16(protos: &ndarray::Array3<f32>) -> (Vec<u8>, usize) {
5028 let (height, width, num_protos) = protos.dim();
5029 let num_layers = num_protos.div_ceil(4);
5030 let layer_stride = height * width * 4;
5032 let mut buf = vec![0u16; layer_stride * num_layers];
5033
5034 for y in 0..height {
5035 for x in 0..width {
5036 for k in 0..num_layers * 4 {
5037 let val = if k < num_protos {
5038 half::f16::from_f32(protos[[y, x, k]])
5039 } else {
5040 half::f16::ZERO
5041 };
5042 let layer = k / 4;
5043 let channel = k % 4;
5044 buf[layer * layer_stride + y * width * 4 + x * 4 + channel] = val.to_bits();
5045 }
5046 }
5047 }
5048
5049 let byte_buf = unsafe {
5051 std::slice::from_raw_parts(buf.as_ptr() as *const u8, buf.len() * 2).to_vec()
5052 };
5053 (byte_buf, num_layers)
5054 }
5055
5056 fn render_proto_segmentation(
5063 &mut self,
5064 detect: &[DetectBox],
5065 proto_data: &ProtoData,
5066 ) -> crate::Result<()> {
5067 if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
5068 return Ok(());
5069 }
5070
5071 let (height, width, num_protos) = proto_data.protos.dim();
5072 let texture_target = gls::gl::TEXTURE_2D_ARRAY;
5073
5074 match &proto_data.protos {
5075 ProtoTensor::Quantized {
5076 protos,
5077 quantization,
5078 } => {
5079 self.render_proto_segmentation_int8(
5080 detect,
5081 &proto_data.mask_coefficients,
5082 protos,
5083 quantization,
5084 height,
5085 width,
5086 num_protos,
5087 texture_target,
5088 )?;
5089 }
5090 ProtoTensor::Float(protos_f32) => {
5091 if self.has_float_linear {
5092 self.render_proto_segmentation_f32(
5093 detect,
5094 &proto_data.mask_coefficients,
5095 protos_f32,
5096 height,
5097 width,
5098 num_protos,
5099 texture_target,
5100 )?;
5101 } else {
5102 self.render_proto_segmentation_f16(
5104 detect,
5105 &proto_data.mask_coefficients,
5106 protos_f32,
5107 height,
5108 width,
5109 num_protos,
5110 texture_target,
5111 )?;
5112 }
5113 }
5114 }
5115
5116 unsafe { gls::gl::Finish() };
5117 Ok(())
5118 }
5119
5120 fn render_proto_detection_quads(
5123 &self,
5124 program: &GlProgram,
5125 detect: &[DetectBox],
5126 mask_coefficients: &[Vec<f32>],
5127 ) -> crate::Result<()> {
5128 let cvt_screen_coord = |normalized: f32| normalized * 2.0 - 1.0;
5129
5130 for (det, coeff) in detect.iter().zip(mask_coefficients.iter()) {
5131 let mut packed_coeff = [[0.0f32; 4]; 8];
5132 for (i, val) in coeff.iter().enumerate().take(32) {
5133 packed_coeff[i / 4][i % 4] = *val;
5134 }
5135
5136 program.load_uniform_4fv(c"mask_coeff", &packed_coeff)?;
5137 program.load_uniform_1i(c"class_index", det.label as i32)?;
5138
5139 let dst_roi = RegionOfInterest {
5140 left: cvt_screen_coord(det.bbox.xmin),
5141 top: cvt_screen_coord(det.bbox.ymax),
5142 right: cvt_screen_coord(det.bbox.xmax),
5143 bottom: cvt_screen_coord(det.bbox.ymin),
5144 };
5145
5146 let src_roi = RegionOfInterest {
5154 left: det.bbox.xmin,
5155 top: det.bbox.ymax,
5156 right: det.bbox.xmax,
5157 bottom: det.bbox.ymin,
5158 };
5159
5160 unsafe {
5161 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
5162 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
5163
5164 let camera_vertices: [f32; 12] = [
5165 dst_roi.left,
5166 dst_roi.top,
5167 0.,
5168 dst_roi.right,
5169 dst_roi.top,
5170 0.,
5171 dst_roi.right,
5172 dst_roi.bottom,
5173 0.,
5174 dst_roi.left,
5175 dst_roi.bottom,
5176 0.,
5177 ];
5178 gls::gl::BufferSubData(
5179 gls::gl::ARRAY_BUFFER,
5180 0,
5181 (size_of::<f32>() * camera_vertices.len()) as isize,
5182 camera_vertices.as_ptr() as *const c_void,
5183 );
5184
5185 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
5186 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
5187
5188 let texture_vertices: [f32; 8] = [
5189 src_roi.left,
5190 src_roi.top,
5191 src_roi.right,
5192 src_roi.top,
5193 src_roi.right,
5194 src_roi.bottom,
5195 src_roi.left,
5196 src_roi.bottom,
5197 ];
5198 gls::gl::BufferSubData(
5199 gls::gl::ARRAY_BUFFER,
5200 0,
5201 (size_of::<f32>() * 8) as isize,
5202 texture_vertices.as_ptr() as *const c_void,
5203 );
5204
5205 let vertices_index: [u32; 4] = [0, 1, 2, 3];
5206 gls::gl::DrawElements(
5207 gls::gl::TRIANGLE_FAN,
5208 vertices_index.len() as i32,
5209 gls::gl::UNSIGNED_INT,
5210 vertices_index.as_ptr() as *const c_void,
5211 );
5212 }
5213 }
5214 Ok(())
5215 }
5216
5217 #[allow(clippy::too_many_arguments)]
5220 fn render_proto_segmentation_int8(
5221 &mut self,
5222 detect: &[DetectBox],
5223 mask_coefficients: &[Vec<f32>],
5224 protos: &ndarray::Array3<i8>,
5225 quantization: &edgefirst_decoder::Quantization,
5226 height: usize,
5227 width: usize,
5228 num_protos: usize,
5229 texture_target: u32,
5230 ) -> crate::Result<()> {
5231 gls::bind_texture(texture_target, self.proto_texture.id);
5233 gls::active_texture(gls::gl::TEXTURE0);
5234 gls::tex_parameteri(
5235 texture_target,
5236 gls::gl::TEXTURE_MIN_FILTER,
5237 gls::gl::NEAREST as i32,
5238 );
5239 gls::tex_parameteri(
5240 texture_target,
5241 gls::gl::TEXTURE_MAG_FILTER,
5242 gls::gl::NEAREST as i32,
5243 );
5244 gls::tex_parameteri(
5245 texture_target,
5246 gls::gl::TEXTURE_WRAP_S,
5247 gls::gl::CLAMP_TO_EDGE as i32,
5248 );
5249 gls::tex_parameteri(
5250 texture_target,
5251 gls::gl::TEXTURE_WRAP_T,
5252 gls::gl::CLAMP_TO_EDGE as i32,
5253 );
5254
5255 let mut tex_data = vec![0i8; height * width * num_protos];
5258 for k in 0..num_protos {
5259 for y in 0..height {
5260 for x in 0..width {
5261 tex_data[k * height * width + y * width + x] = protos[[y, x, k]];
5262 }
5263 }
5264 }
5265
5266 gls::tex_image3d(
5267 texture_target,
5268 0,
5269 gls::gl::R8I as i32,
5270 width as i32,
5271 height as i32,
5272 num_protos as i32,
5273 0,
5274 gls::gl::RED_INTEGER,
5275 gls::gl::BYTE,
5276 Some(&tex_data),
5277 );
5278
5279 let proto_scale = quantization.scale;
5280 let proto_scaled_zp = -(quantization.zero_point as f32) * quantization.scale;
5281
5282 match self.int8_interpolation_mode {
5283 Int8InterpolationMode::Nearest => {
5284 let program = &self.proto_segmentation_int8_nearest_program;
5285 gls::use_program(program.id);
5286 program.load_uniform_1i(c"num_protos", num_protos as i32)?;
5287 program.load_uniform_1f(c"proto_scale", proto_scale)?;
5288 program.load_uniform_1f(c"proto_scaled_zp", proto_scaled_zp)?;
5289 self.render_proto_detection_quads(program, detect, mask_coefficients)?;
5290 }
5291 Int8InterpolationMode::Bilinear => {
5292 let program = &self.proto_segmentation_int8_bilinear_program;
5293 gls::use_program(program.id);
5294 program.load_uniform_1i(c"num_protos", num_protos as i32)?;
5295 program.load_uniform_1f(c"proto_scale", proto_scale)?;
5296 program.load_uniform_1f(c"proto_scaled_zp", proto_scaled_zp)?;
5297 self.render_proto_detection_quads(program, detect, mask_coefficients)?;
5298 }
5299 Int8InterpolationMode::TwoPass => {
5300 self.render_proto_int8_two_pass(
5301 detect,
5302 mask_coefficients,
5303 quantization,
5304 height,
5305 width,
5306 num_protos,
5307 texture_target,
5308 )?;
5309 }
5310 }
5311
5312 Ok(())
5313 }
5314
5315 #[allow(clippy::too_many_arguments)]
5318 fn render_proto_int8_two_pass(
5319 &self,
5320 detect: &[DetectBox],
5321 mask_coefficients: &[Vec<f32>],
5322 quantization: &edgefirst_decoder::Quantization,
5323 height: usize,
5324 width: usize,
5325 num_protos: usize,
5326 texture_target: u32,
5327 ) -> crate::Result<()> {
5328 let num_layers = num_protos.div_ceil(4);
5329
5330 let (saved_fbo, saved_viewport) = unsafe {
5332 let mut fbo: i32 = 0;
5333 gls::gl::GetIntegerv(gls::gl::FRAMEBUFFER_BINDING, &mut fbo);
5334 let mut vp = [0i32; 4];
5335 gls::gl::GetIntegerv(gls::gl::VIEWPORT, vp.as_mut_ptr());
5336 (fbo as u32, vp)
5337 };
5338
5339 let dequant_fbo = FrameBuffer::new();
5341 gls::bind_texture(texture_target, self.proto_dequant_texture.id);
5342 gls::tex_image3d::<u8>(
5343 texture_target,
5344 0,
5345 gls::gl::RGBA16F as i32,
5346 width as i32,
5347 height as i32,
5348 num_layers as i32,
5349 0,
5350 gls::gl::RGBA,
5351 gls::gl::HALF_FLOAT,
5352 None,
5353 );
5354 gls::tex_parameteri(
5355 texture_target,
5356 gls::gl::TEXTURE_MIN_FILTER,
5357 gls::gl::LINEAR as i32,
5358 );
5359 gls::tex_parameteri(
5360 texture_target,
5361 gls::gl::TEXTURE_MAG_FILTER,
5362 gls::gl::LINEAR as i32,
5363 );
5364 gls::tex_parameteri(
5365 texture_target,
5366 gls::gl::TEXTURE_WRAP_S,
5367 gls::gl::CLAMP_TO_EDGE as i32,
5368 );
5369 gls::tex_parameteri(
5370 texture_target,
5371 gls::gl::TEXTURE_WRAP_T,
5372 gls::gl::CLAMP_TO_EDGE as i32,
5373 );
5374
5375 let proto_scale = quantization.scale;
5376 let proto_scaled_zp = -(quantization.zero_point as f32) * quantization.scale;
5377
5378 let dequant_program = &self.proto_dequant_int8_program;
5379 gls::use_program(dequant_program.id);
5380 dequant_program.load_uniform_1f(c"proto_scale", proto_scale)?;
5381 dequant_program.load_uniform_1f(c"proto_scaled_zp", proto_scaled_zp)?;
5382
5383 gls::active_texture(gls::gl::TEXTURE0);
5385 gls::bind_texture(texture_target, self.proto_texture.id);
5386
5387 for layer in 0..num_layers {
5389 dequant_fbo.bind();
5390 unsafe {
5391 gls::gl::FramebufferTextureLayer(
5392 gls::gl::FRAMEBUFFER,
5393 gls::gl::COLOR_ATTACHMENT0,
5394 self.proto_dequant_texture.id,
5395 0,
5396 layer as i32,
5397 );
5398 gls::gl::Viewport(0, 0, width as i32, height as i32);
5399 }
5400 dequant_program.load_uniform_1i(c"base_layer", (layer * 4) as i32)?;
5401
5402 unsafe {
5404 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
5405 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
5406 let verts: [f32; 12] = [
5407 -1.0, -1.0, 0.0, 1.0, -1.0, 0.0, 1.0, 1.0, 0.0, -1.0, 1.0, 0.0,
5408 ];
5409 gls::gl::BufferSubData(
5410 gls::gl::ARRAY_BUFFER,
5411 0,
5412 (size_of::<f32>() * 12) as isize,
5413 verts.as_ptr() as *const c_void,
5414 );
5415 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
5416 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
5417 let tc: [f32; 8] = [0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0];
5418 gls::gl::BufferSubData(
5419 gls::gl::ARRAY_BUFFER,
5420 0,
5421 (size_of::<f32>() * 8) as isize,
5422 tc.as_ptr() as *const c_void,
5423 );
5424 let idx: [u32; 4] = [0, 1, 2, 3];
5425 gls::gl::DrawElements(
5426 gls::gl::TRIANGLE_FAN,
5427 4,
5428 gls::gl::UNSIGNED_INT,
5429 idx.as_ptr() as *const c_void,
5430 );
5431 }
5432 }
5433
5434 drop(dequant_fbo);
5436 unsafe {
5437 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, saved_fbo);
5438 gls::gl::Viewport(
5439 saved_viewport[0],
5440 saved_viewport[1],
5441 saved_viewport[2],
5442 saved_viewport[3],
5443 );
5444 }
5445
5446 let program = &self.proto_segmentation_program;
5448 gls::use_program(program.id);
5449 gls::active_texture(gls::gl::TEXTURE0);
5450 gls::bind_texture(texture_target, self.proto_dequant_texture.id);
5451 program.load_uniform_1i(c"num_layers", num_layers as i32)?;
5452 self.render_proto_detection_quads(program, detect, mask_coefficients)?;
5453
5454 Ok(())
5455 }
5456
5457 #[allow(clippy::too_many_arguments)]
5459 fn render_proto_segmentation_f32(
5460 &self,
5461 detect: &[DetectBox],
5462 mask_coefficients: &[Vec<f32>],
5463 protos_f32: &ndarray::Array3<f32>,
5464 height: usize,
5465 width: usize,
5466 num_protos: usize,
5467 texture_target: u32,
5468 ) -> crate::Result<()> {
5469 let program = &self.proto_segmentation_f32_program;
5470 gls::use_program(program.id);
5471 gls::bind_texture(texture_target, self.proto_texture.id);
5472 gls::active_texture(gls::gl::TEXTURE0);
5473 gls::tex_parameteri(
5474 texture_target,
5475 gls::gl::TEXTURE_MIN_FILTER,
5476 gls::gl::LINEAR as i32,
5477 );
5478 gls::tex_parameteri(
5479 texture_target,
5480 gls::gl::TEXTURE_MAG_FILTER,
5481 gls::gl::LINEAR as i32,
5482 );
5483 gls::tex_parameteri(
5484 texture_target,
5485 gls::gl::TEXTURE_WRAP_S,
5486 gls::gl::CLAMP_TO_EDGE as i32,
5487 );
5488 gls::tex_parameteri(
5489 texture_target,
5490 gls::gl::TEXTURE_WRAP_T,
5491 gls::gl::CLAMP_TO_EDGE as i32,
5492 );
5493
5494 let mut tex_data = vec![0.0f32; height * width * num_protos];
5496 for k in 0..num_protos {
5497 for y in 0..height {
5498 for x in 0..width {
5499 tex_data[k * height * width + y * width + x] = protos_f32[[y, x, k]];
5500 }
5501 }
5502 }
5503
5504 gls::tex_image3d(
5505 texture_target,
5506 0,
5507 gls::gl::R32F as i32,
5508 width as i32,
5509 height as i32,
5510 num_protos as i32,
5511 0,
5512 gls::gl::RED,
5513 gls::gl::FLOAT,
5514 Some(&tex_data),
5515 );
5516
5517 program.load_uniform_1i(c"num_protos", num_protos as i32)?;
5518 self.render_proto_detection_quads(program, detect, mask_coefficients)?;
5519
5520 Ok(())
5521 }
5522
5523 #[allow(clippy::too_many_arguments)]
5527 fn render_proto_segmentation_f16(
5528 &self,
5529 detect: &[DetectBox],
5530 mask_coefficients: &[Vec<f32>],
5531 protos_f32: &ndarray::Array3<f32>,
5532 height: usize,
5533 width: usize,
5534 num_protos: usize,
5535 texture_target: u32,
5536 ) -> crate::Result<()> {
5537 let num_layers = num_protos.div_ceil(4);
5538 let (tex_data, _) = Self::repack_protos_to_rgba_f16(protos_f32);
5539
5540 let program = &self.proto_segmentation_program;
5541 gls::use_program(program.id);
5542 gls::bind_texture(texture_target, self.proto_texture.id);
5543 gls::active_texture(gls::gl::TEXTURE0);
5544 gls::tex_parameteri(
5545 texture_target,
5546 gls::gl::TEXTURE_MIN_FILTER,
5547 gls::gl::LINEAR as i32,
5548 );
5549 gls::tex_parameteri(
5550 texture_target,
5551 gls::gl::TEXTURE_MAG_FILTER,
5552 gls::gl::LINEAR as i32,
5553 );
5554 gls::tex_parameteri(
5555 texture_target,
5556 gls::gl::TEXTURE_WRAP_S,
5557 gls::gl::CLAMP_TO_EDGE as i32,
5558 );
5559 gls::tex_parameteri(
5560 texture_target,
5561 gls::gl::TEXTURE_WRAP_T,
5562 gls::gl::CLAMP_TO_EDGE as i32,
5563 );
5564
5565 gls::tex_image3d(
5566 texture_target,
5567 0,
5568 gls::gl::RGBA16F as i32,
5569 width as i32,
5570 height as i32,
5571 num_layers as i32,
5572 0,
5573 gls::gl::RGBA,
5574 gls::gl::HALF_FLOAT,
5575 Some(&tex_data),
5576 );
5577
5578 program.load_uniform_1i(c"num_layers", num_layers as i32)?;
5579 self.render_proto_detection_quads(program, detect, mask_coefficients)?;
5580
5581 Ok(())
5582 }
5583
5584 fn render_segmentation(
5585 &mut self,
5586 detect: &[DetectBox],
5587 segmentation: &[Segmentation],
5588 ) -> crate::Result<()> {
5589 if segmentation.is_empty() {
5590 return Ok(());
5591 }
5592
5593 let is_modelpack = segmentation[0].segmentation.shape()[2] > 1;
5594 let cvt_screen_coord = |normalized| normalized * 2.0 - 1.0;
5596 if is_modelpack {
5597 let seg = &segmentation[0];
5598 let dst_roi = RegionOfInterest {
5599 left: cvt_screen_coord(seg.xmin),
5600 top: cvt_screen_coord(seg.ymax),
5601 right: cvt_screen_coord(seg.xmax),
5602 bottom: cvt_screen_coord(seg.ymin),
5603 };
5604 let segment = seg.segmentation.as_standard_layout();
5605 let slice = segment.as_slice().ok_or(Error::Internal(
5606 "Cannot get slice of segmentation".to_owned(),
5607 ))?;
5608
5609 self.render_modelpack_segmentation(
5610 dst_roi,
5611 slice,
5612 [
5613 seg.segmentation.shape()[0],
5614 seg.segmentation.shape()[1],
5615 seg.segmentation.shape()[2],
5616 ],
5617 )?;
5618 } else {
5619 for (seg, det) in segmentation.iter().zip(detect) {
5620 let dst_roi = RegionOfInterest {
5621 left: cvt_screen_coord(seg.xmin),
5622 top: cvt_screen_coord(seg.ymax),
5623 right: cvt_screen_coord(seg.xmax),
5624 bottom: cvt_screen_coord(seg.ymin),
5625 };
5626
5627 let segment = seg.segmentation.as_standard_layout();
5628 let slice = segment.as_slice().ok_or(Error::Internal(
5629 "Cannot get slice of segmentation".to_owned(),
5630 ))?;
5631
5632 self.render_yolo_segmentation(
5633 dst_roi,
5634 slice,
5635 [seg.segmentation.shape()[0], seg.segmentation.shape()[1]],
5636 det.label,
5637 )?;
5638 }
5639 }
5640
5641 gls::disable(gls::gl::BLEND);
5642 Ok(())
5643 }
5644
5645 fn render_box(&mut self, dst: &TensorImage, detect: &[DetectBox]) -> Result<(), Error> {
5646 unsafe {
5647 gls::gl::UseProgram(self.color_program.id);
5648 let rescale = |x: f32| x * 2.0 - 1.0;
5649 let thickness = 3.0;
5650 for d in detect {
5651 self.color_program
5652 .load_uniform_1i(c"class_index", d.label as i32)?;
5653 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
5654 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
5655 let bbox: [f32; 4] = d.bbox.into();
5656 let outer_box = [
5657 bbox[0] - thickness / dst.width() as f32,
5658 bbox[1] - thickness / dst.height() as f32,
5659 bbox[2] + thickness / dst.width() as f32,
5660 bbox[3] + thickness / dst.height() as f32,
5661 ];
5662 let camera_vertices: [f32; 24] = [
5663 rescale(bbox[0]),
5664 rescale(bbox[3]),
5665 0., rescale(bbox[2]),
5667 rescale(bbox[3]),
5668 0., rescale(bbox[2]),
5670 rescale(bbox[1]),
5671 0., rescale(bbox[0]),
5673 rescale(bbox[1]),
5674 0., rescale(outer_box[0]),
5676 rescale(outer_box[3]),
5677 0., rescale(outer_box[2]),
5679 rescale(outer_box[3]),
5680 0., rescale(outer_box[2]),
5682 rescale(outer_box[1]),
5683 0., rescale(outer_box[0]),
5685 rescale(outer_box[1]),
5686 0., ];
5688 gls::gl::BufferData(
5689 gls::gl::ARRAY_BUFFER,
5690 (size_of::<f32>() * camera_vertices.len()) as isize,
5691 camera_vertices.as_ptr() as *const c_void,
5692 gls::gl::DYNAMIC_DRAW,
5693 );
5694
5695 let vertices_index: [u32; 10] = [0, 1, 5, 2, 6, 3, 7, 0, 4, 5];
5696 gls::gl::DrawElements(
5697 gls::gl::TRIANGLE_STRIP,
5698 vertices_index.len() as i32,
5699 gls::gl::UNSIGNED_INT,
5700 vertices_index.as_ptr() as *const c_void,
5701 );
5702 }
5703 }
5704 check_gl_error(function!(), line!())?;
5705 Ok(())
5706 }
5707}
5708struct EglImage {
5709 egl_image: egl::Image,
5710 egl: Rc<Egl>,
5711 display: egl::Display,
5712}
5713
5714impl Drop for EglImage {
5715 fn drop(&mut self) {
5716 if self.egl_image.as_ptr() == egl::NO_IMAGE {
5717 return;
5718 }
5719
5720 let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5721 let e =
5722 GlContext::egl_destroy_image_with_fallback(&self.egl, self.display, self.egl_image);
5723 if let Err(e) = e {
5724 error!("Could not destroy EGL image: {e:?}");
5725 }
5726 }));
5727 }
5728}
5729
5730struct Texture {
5731 id: u32,
5732 target: gls::gl::types::GLenum,
5733 width: usize,
5734 height: usize,
5735 format: gls::gl::types::GLenum,
5736}
5737
5738impl Default for Texture {
5739 fn default() -> Self {
5740 Self::new()
5741 }
5742}
5743
5744impl Texture {
5745 fn new() -> Self {
5746 let mut id = 0;
5747 unsafe { gls::gl::GenTextures(1, &raw mut id) };
5748 Self {
5749 id,
5750 target: 0,
5751 width: 0,
5752 height: 0,
5753 format: 0,
5754 }
5755 }
5756
5757 fn update_texture(
5758 &mut self,
5759 target: gls::gl::types::GLenum,
5760 width: usize,
5761 height: usize,
5762 format: gls::gl::types::GLenum,
5763 data: &[u8],
5764 ) {
5765 if target != self.target
5766 || width != self.width
5767 || height != self.height
5768 || format != self.format
5769 {
5770 unsafe {
5771 gls::gl::TexImage2D(
5772 target,
5773 0,
5774 format as i32,
5775 width as i32,
5776 height as i32,
5777 0,
5778 format,
5779 gls::gl::UNSIGNED_BYTE,
5780 data.as_ptr() as *const c_void,
5781 );
5782 }
5783 self.target = target;
5784 self.format = format;
5785 self.width = width;
5786 self.height = height;
5787 } else {
5788 unsafe {
5789 gls::gl::TexSubImage2D(
5790 target,
5791 0,
5792 0,
5793 0,
5794 width as i32,
5795 height as i32,
5796 format,
5797 gls::gl::UNSIGNED_BYTE,
5798 data.as_ptr() as *const c_void,
5799 );
5800 }
5801 }
5802 }
5803}
5804
5805impl Drop for Texture {
5806 fn drop(&mut self) {
5807 let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| unsafe {
5808 gls::gl::DeleteTextures(1, &raw mut self.id)
5809 }));
5810 }
5811}
5812
5813struct Buffer {
5814 id: u32,
5815 buffer_index: u32,
5816}
5817
5818impl Buffer {
5819 fn new(buffer_index: u32, size_per_point: usize, max_points: usize) -> Buffer {
5820 let mut id = 0;
5821 unsafe {
5822 gls::gl::EnableVertexAttribArray(buffer_index);
5823 gls::gl::GenBuffers(1, &raw mut id);
5824 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, id);
5825 gls::gl::VertexAttribPointer(
5826 buffer_index,
5827 size_per_point as i32,
5828 gls::gl::FLOAT,
5829 gls::gl::FALSE,
5830 0,
5831 null(),
5832 );
5833 gls::gl::BufferData(
5834 gls::gl::ARRAY_BUFFER,
5835 (size_of::<f32>() * size_per_point * max_points) as isize,
5836 null(),
5837 gls::gl::DYNAMIC_DRAW,
5838 );
5839 }
5840
5841 Buffer { id, buffer_index }
5842 }
5843}
5844
5845impl Drop for Buffer {
5846 fn drop(&mut self) {
5847 let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| unsafe {
5848 gls::gl::DeleteBuffers(1, &raw mut self.id)
5849 }));
5850 }
5851}
5852
5853struct FrameBuffer {
5854 id: u32,
5855}
5856
5857impl FrameBuffer {
5858 fn new() -> FrameBuffer {
5859 let mut id = 0;
5860 unsafe {
5861 gls::gl::GenFramebuffers(1, &raw mut id);
5862 }
5863
5864 FrameBuffer { id }
5865 }
5866
5867 fn bind(&self) {
5868 unsafe { gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, self.id) };
5869 }
5870
5871 fn unbind(&self) {
5872 unsafe { gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, 0) };
5873 }
5874}
5875
5876impl Drop for FrameBuffer {
5877 fn drop(&mut self) {
5878 let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5879 self.unbind();
5880 unsafe {
5881 gls::gl::DeleteFramebuffers(1, &raw mut self.id);
5882 }
5883 }));
5884 }
5885}
5886
5887pub struct GlProgram {
5888 id: u32,
5889 vertex_id: u32,
5890 fragment_id: u32,
5891}
5892
5893impl GlProgram {
5894 fn new(vertex_shader: &str, fragment_shader: &str) -> Result<Self, crate::Error> {
5895 let id = unsafe { gls::gl::CreateProgram() };
5896 let vertex_id = unsafe { gls::gl::CreateShader(gls::gl::VERTEX_SHADER) };
5897 if compile_shader_from_str(vertex_id, vertex_shader, "shader_vert").is_err() {
5898 log::debug!("Vertex shader source:\n{}", vertex_shader);
5899 return Err(crate::Error::OpenGl(format!(
5900 "Shader compile error: {vertex_shader}"
5901 )));
5902 }
5903 unsafe {
5904 gls::gl::AttachShader(id, vertex_id);
5905 }
5906
5907 let fragment_id = unsafe { gls::gl::CreateShader(gls::gl::FRAGMENT_SHADER) };
5908 if compile_shader_from_str(fragment_id, fragment_shader, "shader_frag").is_err() {
5909 log::debug!("Fragment shader source:\n{}", fragment_shader);
5910 return Err(crate::Error::OpenGl(format!(
5911 "Shader compile error: {fragment_shader}"
5912 )));
5913 }
5914
5915 unsafe {
5916 gls::gl::AttachShader(id, fragment_id);
5917 gls::gl::LinkProgram(id);
5918 gls::gl::UseProgram(id);
5919 }
5920
5921 Ok(Self {
5922 id,
5923 vertex_id,
5924 fragment_id,
5925 })
5926 }
5927
5928 #[allow(dead_code)]
5929 fn load_uniform_1f(&self, name: &CStr, value: f32) -> Result<(), crate::Error> {
5930 unsafe {
5931 gls::gl::UseProgram(self.id);
5932 let location = gls::gl::GetUniformLocation(self.id, name.as_ptr());
5933 gls::gl::Uniform1f(location, value);
5934 }
5935 Ok(())
5936 }
5937
5938 #[allow(dead_code)]
5939 fn load_uniform_1i(&self, name: &CStr, value: i32) -> Result<(), crate::Error> {
5940 unsafe {
5941 gls::gl::UseProgram(self.id);
5942 let location = gls::gl::GetUniformLocation(self.id, name.as_ptr());
5943 gls::gl::Uniform1i(location, value);
5944 }
5945 Ok(())
5946 }
5947
5948 fn load_uniform_4fv(&self, name: &CStr, value: &[[f32; 4]]) -> Result<(), crate::Error> {
5949 unsafe {
5950 gls::gl::UseProgram(self.id);
5951 let location = gls::gl::GetUniformLocation(self.id, name.as_ptr());
5952 if location == -1 {
5953 return Err(crate::Error::OpenGl(format!(
5954 "Could not find uniform location for '{}'",
5955 name.to_string_lossy().into_owned()
5956 )));
5957 }
5958 gls::gl::Uniform4fv(location, value.len() as i32, value.as_flattened().as_ptr());
5959 }
5960 check_gl_error(function!(), line!())?;
5961 Ok(())
5962 }
5963}
5964
5965impl Drop for GlProgram {
5966 fn drop(&mut self) {
5967 let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| unsafe {
5968 gls::gl::DeleteProgram(self.id);
5969 gls::gl::DeleteShader(self.fragment_id);
5970 gls::gl::DeleteShader(self.vertex_id);
5971 }));
5972 }
5973}
5974
5975fn compile_shader_from_str(shader: u32, shader_source: &str, shader_name: &str) -> Result<(), ()> {
5976 let src = match CString::from_str(shader_source) {
5977 Ok(v) => v,
5978 Err(_) => return Err(()),
5979 };
5980 let src_ptr = src.as_ptr();
5981 unsafe {
5982 gls::gl::ShaderSource(shader, 1, &raw const src_ptr, null());
5983 gls::gl::CompileShader(shader);
5984 let mut is_compiled = 0;
5985 gls::gl::GetShaderiv(shader, gls::gl::COMPILE_STATUS, &raw mut is_compiled);
5986 if is_compiled == 0 {
5987 let mut max_length = 0;
5988 gls::gl::GetShaderiv(shader, gls::gl::INFO_LOG_LENGTH, &raw mut max_length);
5989 let mut error_log: Vec<u8> = vec![0; max_length as usize];
5990 gls::gl::GetShaderInfoLog(
5991 shader,
5992 max_length,
5993 &raw mut max_length,
5994 error_log.as_mut_ptr() as *mut c_char,
5995 );
5996 error!(
5997 "Shader '{}' failed: {:?}\n",
5998 shader_name,
5999 CString::from_vec_with_nul(error_log)
6000 .unwrap()
6001 .into_string()
6002 .unwrap()
6003 );
6004 gls::gl::DeleteShader(shader);
6005 return Err(());
6006 }
6007 Ok(())
6008 }
6009}
6010
6011fn check_gl_error(name: &str, line: u32) -> Result<(), Error> {
6012 unsafe {
6013 let err = gls::gl::GetError();
6014 if err != gls::gl::NO_ERROR {
6015 error!("GL Error: {name}:{line}: {err:#X}");
6016 return Err(Error::OpenGl(format!("{err:#X}")));
6018 }
6019 }
6020 Ok(())
6021}
6022
6023fn fourcc_to_drm(fourcc: FourCharCode) -> Result<DrmFourcc, Error> {
6024 match fourcc {
6025 RGBA => Ok(DrmFourcc::Abgr8888),
6026 BGRA => Ok(DrmFourcc::Argb8888),
6027 YUYV => Ok(DrmFourcc::Yuyv),
6028 VYUY => Ok(DrmFourcc::Vyuy),
6029 RGB | RGB_INT8 => Ok(DrmFourcc::Bgr888),
6030 GREY => Ok(DrmFourcc::R8),
6031 NV12 => Ok(DrmFourcc::Nv12),
6032 PLANAR_RGB | PLANAR_RGB_INT8 => Ok(DrmFourcc::R8),
6033 _ => Err(Error::NotSupported(format!(
6034 "FourCC {fourcc:?} has no DRM format mapping"
6035 ))),
6036 }
6037}
6038
6039mod egl_ext {
6040 #![allow(dead_code)]
6041 pub(crate) const LINUX_DMA_BUF: u32 = 0x3270;
6042 pub(crate) const LINUX_DRM_FOURCC: u32 = 0x3271;
6043 pub(crate) const DMA_BUF_PLANE0_FD: u32 = 0x3272;
6044 pub(crate) const DMA_BUF_PLANE0_OFFSET: u32 = 0x3273;
6045 pub(crate) const DMA_BUF_PLANE0_PITCH: u32 = 0x3274;
6046 pub(crate) const DMA_BUF_PLANE1_FD: u32 = 0x3275;
6047 pub(crate) const DMA_BUF_PLANE1_OFFSET: u32 = 0x3276;
6048 pub(crate) const DMA_BUF_PLANE1_PITCH: u32 = 0x3277;
6049 pub(crate) const DMA_BUF_PLANE2_FD: u32 = 0x3278;
6050 pub(crate) const DMA_BUF_PLANE2_OFFSET: u32 = 0x3279;
6051 pub(crate) const DMA_BUF_PLANE2_PITCH: u32 = 0x327A;
6052 pub(crate) const YUV_COLOR_SPACE_HINT: u32 = 0x327B;
6053 pub(crate) const SAMPLE_RANGE_HINT: u32 = 0x327C;
6054 pub(crate) const YUV_CHROMA_HORIZONTAL_SITING_HINT: u32 = 0x327D;
6055 pub(crate) const YUV_CHROMA_VERTICAL_SITING_HINT: u32 = 0x327E;
6056
6057 pub(crate) const ITU_REC601: u32 = 0x327F;
6058 pub(crate) const ITU_REC709: u32 = 0x3280;
6059 pub(crate) const ITU_REC2020: u32 = 0x3281;
6060
6061 pub(crate) const YUV_FULL_RANGE: u32 = 0x3282;
6062 pub(crate) const YUV_NARROW_RANGE: u32 = 0x3283;
6063
6064 pub(crate) const YUV_CHROMA_SITING_0: u32 = 0x3284;
6065 pub(crate) const YUV_CHROMA_SITING_0_5: u32 = 0x3285;
6066
6067 pub(crate) const PLATFORM_GBM_KHR: u32 = 0x31D7;
6068
6069 pub(crate) const PLATFORM_DEVICE_EXT: u32 = 0x313F;
6070
6071 pub(crate) const NO_CONFIG_KHR: khronos_egl::Config =
6078 unsafe { std::mem::transmute(std::ptr::null_mut::<std::ffi::c_void>()) };
6079}
6080
6081fn generate_vertex_shader() -> &'static str {
6082 "\
6083#version 300 es
6084precision mediump float;
6085layout(location = 0) in vec3 pos;
6086layout(location = 1) in vec2 texCoord;
6087
6088out vec3 fragPos;
6089out vec2 tc;
6090
6091void main() {
6092 fragPos = pos;
6093 tc = texCoord;
6094
6095 gl_Position = vec4(pos, 1.0);
6096}
6097"
6098}
6099
6100fn generate_texture_fragment_shader() -> &'static str {
6101 "\
6102#version 300 es
6103
6104precision mediump float;
6105uniform sampler2D tex;
6106in vec3 fragPos;
6107in vec2 tc;
6108
6109out vec4 color;
6110
6111void main(){
6112 color = texture(tex, tc);
6113}
6114"
6115}
6116
6117fn generate_texture_fragment_shader_yuv() -> &'static str {
6118 "\
6119#version 300 es
6120#extension GL_OES_EGL_image_external_essl3 : require
6121precision mediump float;
6122uniform samplerExternalOES tex;
6123in vec3 fragPos;
6124in vec2 tc;
6125
6126out vec4 color;
6127
6128void main(){
6129 color = texture(tex, tc);
6130}
6131"
6132}
6133
6134fn generate_planar_rgb_shader() -> &'static str {
6135 "\
6136#version 300 es
6137#extension GL_OES_EGL_image_external_essl3 : require
6138precision mediump float;
6139uniform samplerExternalOES tex;
6140in vec3 fragPos;
6141in vec2 tc;
6142
6143out vec4 color;
6144
6145void main(){
6146 color = texture(tex, tc);
6147}
6148"
6149}
6150
6151fn generate_planar_rgb_int8_shader() -> &'static str {
6155 "\
6156#version 300 es
6157#extension GL_OES_EGL_image_external_essl3 : require
6158precision highp float;
6159uniform samplerExternalOES tex;
6160in vec3 fragPos;
6161in vec2 tc;
6162
6163out vec4 color;
6164
6165vec3 int8_bias(vec3 v) {
6166 vec3 q = floor(v * 255.0 + 0.5);
6167 return mod(q + 128.0, 256.0) / 255.0;
6168}
6169
6170void main(){
6171 vec4 c = texture(tex, tc);
6172 color = vec4(int8_bias(c.rgb), c.a);
6173}
6174"
6175}
6176
6177fn generate_texture_int8_shader() -> &'static str {
6181 "\
6182#version 300 es
6183precision highp float;
6184uniform sampler2D tex;
6185in vec3 fragPos;
6186in vec2 tc;
6187
6188out vec4 color;
6189
6190// XOR 0x80 bias: quantize to uint8, add 128 mod 256, normalize back.
6191// This matches the CPU `byte ^ 0x80` operation exactly.
6192vec3 int8_bias(vec3 v) {
6193 vec3 q = floor(v * 255.0 + 0.5);
6194 return mod(q + 128.0, 256.0) / 255.0;
6195}
6196
6197void main(){
6198 vec4 c = texture(tex, tc);
6199 color = vec4(int8_bias(c.rgb), c.a);
6200}
6201"
6202}
6203
6204fn generate_texture_int8_shader_yuv() -> &'static str {
6208 "\
6209#version 300 es
6210#extension GL_OES_EGL_image_external_essl3 : require
6211precision highp float;
6212uniform samplerExternalOES tex;
6213in vec3 fragPos;
6214in vec2 tc;
6215
6216out vec4 color;
6217
6218vec3 int8_bias(vec3 v) {
6219 vec3 q = floor(v * 255.0 + 0.5);
6220 return mod(q + 128.0, 256.0) / 255.0;
6221}
6222
6223void main(){
6224 vec4 c = texture(tex, tc);
6225 color = vec4(int8_bias(c.rgb), c.a);
6226}
6227"
6228}
6229
6230fn generate_segmentation_shader() -> &'static str {
6233 "\
6234#version 300 es
6235precision mediump float;
6236precision mediump sampler2DArray;
6237
6238uniform sampler2DArray tex;
6239uniform vec4 colors[20];
6240uniform int background_index;
6241
6242in vec3 fragPos;
6243in vec2 tc;
6244in vec4 fragColor;
6245
6246out vec4 color;
6247
6248float max_arg(const in vec4 args, out int argmax) {
6249 if (args[0] >= args[1] && args[0] >= args[2] && args[0] >= args[3]) {
6250 argmax = 0;
6251 return args[0];
6252 }
6253 if (args[1] >= args[0] && args[1] >= args[2] && args[1] >= args[3]) {
6254 argmax = 1;
6255 return args[1];
6256 }
6257 if (args[2] >= args[0] && args[2] >= args[1] && args[2] >= args[3]) {
6258 argmax = 2;
6259 return args[2];
6260 }
6261 argmax = 3;
6262 return args[3];
6263}
6264
6265void main() {
6266 mediump int layers = textureSize(tex, 0).z;
6267 float max_all = -4.0;
6268 int max_ind = 0;
6269 for (int i = 0; i < layers; i++) {
6270 vec4 d = texture(tex, vec3(tc, i));
6271 int max_ind_ = 0;
6272 float max_ = max_arg(d, max_ind_);
6273 if (max_ <= max_all) { continue; }
6274 max_all = max_;
6275 max_ind = i*4 + max_ind_;
6276 }
6277 if (max_ind == background_index) {
6278 discard;
6279 }
6280 max_ind = max_ind % 20;
6281 color = colors[max_ind];
6282}
6283"
6284}
6285
6286fn generate_instanced_segmentation_shader() -> &'static str {
6287 "\
6288#version 300 es
6289precision mediump float;
6290uniform sampler2D mask0;
6291uniform vec4 colors[20];
6292uniform int class_index;
6293in vec3 fragPos;
6294in vec2 tc;
6295in vec4 fragColor;
6296
6297out vec4 color;
6298void main() {
6299 float r0 = texture(mask0, tc).r;
6300 int arg = int(r0>=0.5);
6301 if (arg == 0) {
6302 discard;
6303 }
6304 color = colors[class_index % 20];
6305}
6306"
6307}
6308
6309fn generate_proto_segmentation_shader() -> &'static str {
6310 "\
6311#version 300 es
6312precision highp float;
6313precision highp sampler2DArray;
6314
6315uniform sampler2DArray proto_tex; // ceil(num_protos/4) layers, RGBA = 4 channels per layer
6316uniform vec4 mask_coeff[8]; // 32 coefficients packed as 8 vec4s
6317uniform vec4 colors[20];
6318uniform int class_index;
6319uniform int num_layers;
6320
6321in vec2 tc;
6322out vec4 color;
6323
6324void main() {
6325 float acc = 0.0;
6326 for (int i = 0; i < num_layers; i++) {
6327 // texture() returns bilinearly interpolated proto values (GL_LINEAR)
6328 acc += dot(mask_coeff[i], texture(proto_tex, vec3(tc, float(i))));
6329 }
6330 float mask = 1.0 / (1.0 + exp(-acc)); // sigmoid
6331 if (mask < 0.5) discard;
6332 color = colors[class_index % 20];
6333}
6334"
6335}
6336
6337fn generate_proto_segmentation_shader_int8_nearest() -> &'static str {
6345 "\
6346#version 300 es
6347precision highp float;
6348precision highp int;
6349precision highp isampler2DArray;
6350
6351uniform isampler2DArray proto_tex; // 32 layers, R channel = 1 proto per layer
6352uniform vec4 mask_coeff[8]; // 32 coefficients packed as 8 vec4s
6353uniform vec4 colors[20];
6354uniform int class_index;
6355uniform int num_protos;
6356uniform float proto_scale;
6357uniform float proto_scaled_zp; // -zero_point * scale
6358
6359in vec2 tc;
6360out vec4 color;
6361
6362void main() {
6363 ivec3 tex_size = textureSize(proto_tex, 0);
6364 int ix = clamp(int(tc.x * float(tex_size.x)), 0, tex_size.x - 1);
6365 int iy = clamp(int(tc.y * float(tex_size.y)), 0, tex_size.y - 1);
6366
6367 float acc = 0.0;
6368 for (int k = 0; k < num_protos; k++) {
6369 float raw = float(texelFetch(proto_tex, ivec3(ix, iy, k), 0).r);
6370 float val = raw * proto_scale + proto_scaled_zp;
6371 acc += mask_coeff[k / 4][k % 4] * val;
6372 }
6373 float mask = 1.0 / (1.0 + exp(-acc));
6374 if (mask < 0.5) discard;
6375 color = colors[class_index % 20];
6376}
6377"
6378}
6379
6380fn generate_proto_segmentation_shader_int8_bilinear() -> &'static str {
6387 "\
6388#version 300 es
6389precision highp float;
6390precision highp int;
6391precision highp isampler2DArray;
6392
6393uniform isampler2DArray proto_tex; // 32 layers, R channel = 1 proto per layer
6394uniform vec4 mask_coeff[8]; // 32 coefficients packed as 8 vec4s
6395uniform vec4 colors[20];
6396uniform int class_index;
6397uniform int num_protos;
6398uniform float proto_scale;
6399uniform float proto_scaled_zp; // -zero_point * scale
6400
6401in vec2 tc;
6402out vec4 color;
6403
6404void main() {
6405 ivec3 tex_size = textureSize(proto_tex, 0);
6406 // Compute continuous position (matching GL_LINEAR convention: center at +0.5)
6407 vec2 pos = tc * vec2(tex_size.xy) - 0.5;
6408 vec2 f = fract(pos);
6409 ivec2 p0 = ivec2(floor(pos));
6410 ivec2 p1 = p0 + 1;
6411 // Clamp to texture bounds
6412 p0 = clamp(p0, ivec2(0), tex_size.xy - 1);
6413 p1 = clamp(p1, ivec2(0), tex_size.xy - 1);
6414
6415 float w00 = (1.0 - f.x) * (1.0 - f.y);
6416 float w10 = f.x * (1.0 - f.y);
6417 float w01 = (1.0 - f.x) * f.y;
6418 float w11 = f.x * f.y;
6419
6420 float acc = 0.0;
6421 for (int k = 0; k < num_protos; k++) {
6422 float r00 = float(texelFetch(proto_tex, ivec3(p0.x, p0.y, k), 0).r);
6423 float r10 = float(texelFetch(proto_tex, ivec3(p1.x, p0.y, k), 0).r);
6424 float r01 = float(texelFetch(proto_tex, ivec3(p0.x, p1.y, k), 0).r);
6425 float r11 = float(texelFetch(proto_tex, ivec3(p1.x, p1.y, k), 0).r);
6426 float interp = r00 * w00 + r10 * w10 + r01 * w01 + r11 * w11;
6427 float val = interp * proto_scale + proto_scaled_zp;
6428 acc += mask_coeff[k / 4][k % 4] * val;
6429 }
6430 float mask = 1.0 / (1.0 + exp(-acc));
6431 if (mask < 0.5) discard;
6432 color = colors[class_index % 20];
6433}
6434"
6435}
6436
6437fn generate_proto_dequant_shader_int8() -> &'static str {
6444 "\
6445#version 300 es
6446precision highp float;
6447precision highp int;
6448precision highp isampler2DArray;
6449
6450uniform isampler2DArray proto_tex; // 32 layers of R8I (1 proto per layer)
6451uniform float proto_scale;
6452uniform float proto_scaled_zp; // -zero_point * scale
6453uniform int base_layer; // first proto index for this output layer (0, 4, 8, ...)
6454
6455in vec2 tc;
6456out vec4 color;
6457
6458void main() {
6459 ivec3 tex_size = textureSize(proto_tex, 0);
6460 int ix = clamp(int(tc.x * float(tex_size.x)), 0, tex_size.x - 1);
6461 int iy = clamp(int(tc.y * float(tex_size.y)), 0, tex_size.y - 1);
6462
6463 vec4 result;
6464 for (int c = 0; c < 4; c++) {
6465 int layer = base_layer + c;
6466 float raw = float(texelFetch(proto_tex, ivec3(ix, iy, layer), 0).r);
6467 result[c] = raw * proto_scale + proto_scaled_zp;
6468 }
6469 color = result;
6470}
6471"
6472}
6473
6474fn generate_proto_segmentation_shader_f32() -> &'static str {
6481 "\
6482#version 300 es
6483precision highp float;
6484precision highp sampler2DArray;
6485
6486uniform sampler2DArray proto_tex; // 32 layers, R channel = 1 proto per layer
6487uniform vec4 mask_coeff[8]; // 32 coefficients packed as 8 vec4s
6488uniform vec4 colors[20];
6489uniform int class_index;
6490uniform int num_protos;
6491
6492in vec2 tc;
6493out vec4 color;
6494
6495void main() {
6496 float acc = 0.0;
6497 for (int k = 0; k < num_protos; k++) {
6498 // texture() returns bilinearly interpolated proto value (GL_LINEAR on R32F)
6499 float val = texture(proto_tex, vec3(tc, float(k))).r;
6500 acc += mask_coeff[k / 4][k % 4] * val;
6501 }
6502 float mask = 1.0 / (1.0 + exp(-acc));
6503 if (mask < 0.5) discard;
6504 color = colors[class_index % 20];
6505}
6506"
6507}
6508
6509fn generate_proto_mask_logit_shader_int8_nearest() -> &'static str {
6515 "\
6516#version 300 es
6517precision highp float;
6518precision highp int;
6519precision highp isampler2DArray;
6520
6521uniform isampler2DArray proto_tex;
6522uniform vec4 mask_coeff[8];
6523uniform int num_protos;
6524uniform float proto_scale;
6525uniform float coeff_sum_x_szp;
6526
6527in vec2 tc;
6528out vec4 color;
6529
6530void main() {
6531 ivec3 tex_size = textureSize(proto_tex, 0);
6532 int ix = clamp(int(tc.x * float(tex_size.x)), 0, tex_size.x - 1);
6533 int iy = clamp(int(tc.y * float(tex_size.y)), 0, tex_size.y - 1);
6534
6535 int groups = (num_protos + 3) / 4;
6536 float acc = 0.0;
6537 for (int i = 0; i < groups; i++) {
6538 int base = i * 4;
6539 vec4 raw = vec4(
6540 float(texelFetch(proto_tex, ivec3(ix, iy, min(base, num_protos - 1)), 0).r),
6541 float(texelFetch(proto_tex, ivec3(ix, iy, min(base + 1, num_protos - 1)), 0).r),
6542 float(texelFetch(proto_tex, ivec3(ix, iy, min(base + 2, num_protos - 1)), 0).r),
6543 float(texelFetch(proto_tex, ivec3(ix, iy, min(base + 3, num_protos - 1)), 0).r)
6544 );
6545 acc += dot(mask_coeff[i], raw);
6546 }
6547 float logit = acc * proto_scale + coeff_sum_x_szp;
6548 float mask = logit > 0.0 ? 1.0 : 0.0;
6549 color = vec4(mask, 0.0, 0.0, 1.0);
6550}
6551"
6552}
6553
6554fn generate_proto_mask_logit_shader_int8_bilinear() -> &'static str {
6559 "\
6560#version 300 es
6561precision highp float;
6562precision highp int;
6563precision highp isampler2DArray;
6564
6565uniform isampler2DArray proto_tex;
6566uniform vec4 mask_coeff[8];
6567uniform int num_protos;
6568uniform float proto_scale;
6569uniform float coeff_sum_x_szp;
6570
6571in vec2 tc;
6572out vec4 color;
6573
6574void main() {
6575 ivec3 tex_size = textureSize(proto_tex, 0);
6576 vec2 pos = tc * vec2(tex_size.xy) - 0.5;
6577 vec2 f = fract(pos);
6578 ivec2 p0 = ivec2(floor(pos));
6579 ivec2 p1 = p0 + 1;
6580 p0 = clamp(p0, ivec2(0), tex_size.xy - 1);
6581 p1 = clamp(p1, ivec2(0), tex_size.xy - 1);
6582
6583 float w00 = (1.0 - f.x) * (1.0 - f.y);
6584 float w10 = f.x * (1.0 - f.y);
6585 float w01 = (1.0 - f.x) * f.y;
6586 float w11 = f.x * f.y;
6587
6588 int groups = (num_protos + 3) / 4;
6589 float acc = 0.0;
6590 for (int i = 0; i < groups; i++) {
6591 int base = i * 4;
6592 int l0 = min(base, num_protos - 1);
6593 int l1 = min(base + 1, num_protos - 1);
6594 int l2 = min(base + 2, num_protos - 1);
6595 int l3 = min(base + 3, num_protos - 1);
6596 vec4 r00 = vec4(
6597 float(texelFetch(proto_tex, ivec3(p0.x, p0.y, l0), 0).r),
6598 float(texelFetch(proto_tex, ivec3(p0.x, p0.y, l1), 0).r),
6599 float(texelFetch(proto_tex, ivec3(p0.x, p0.y, l2), 0).r),
6600 float(texelFetch(proto_tex, ivec3(p0.x, p0.y, l3), 0).r)
6601 );
6602 vec4 r10 = vec4(
6603 float(texelFetch(proto_tex, ivec3(p1.x, p0.y, l0), 0).r),
6604 float(texelFetch(proto_tex, ivec3(p1.x, p0.y, l1), 0).r),
6605 float(texelFetch(proto_tex, ivec3(p1.x, p0.y, l2), 0).r),
6606 float(texelFetch(proto_tex, ivec3(p1.x, p0.y, l3), 0).r)
6607 );
6608 vec4 r01 = vec4(
6609 float(texelFetch(proto_tex, ivec3(p0.x, p1.y, l0), 0).r),
6610 float(texelFetch(proto_tex, ivec3(p0.x, p1.y, l1), 0).r),
6611 float(texelFetch(proto_tex, ivec3(p0.x, p1.y, l2), 0).r),
6612 float(texelFetch(proto_tex, ivec3(p0.x, p1.y, l3), 0).r)
6613 );
6614 vec4 r11 = vec4(
6615 float(texelFetch(proto_tex, ivec3(p1.x, p1.y, l0), 0).r),
6616 float(texelFetch(proto_tex, ivec3(p1.x, p1.y, l1), 0).r),
6617 float(texelFetch(proto_tex, ivec3(p1.x, p1.y, l2), 0).r),
6618 float(texelFetch(proto_tex, ivec3(p1.x, p1.y, l3), 0).r)
6619 );
6620 vec4 interp = r00 * w00 + r10 * w10 + r01 * w01 + r11 * w11;
6621 acc += dot(mask_coeff[i], interp);
6622 }
6623 float logit = acc * proto_scale + coeff_sum_x_szp;
6624 float mask = logit > 0.0 ? 1.0 : 0.0;
6625 color = vec4(mask, 0.0, 0.0, 1.0);
6626}
6627"
6628}
6629
6630fn generate_proto_mask_logit_shader_f32() -> &'static str {
6635 "\
6636#version 300 es
6637precision highp float;
6638precision highp sampler2DArray;
6639
6640uniform sampler2DArray proto_tex;
6641uniform vec4 mask_coeff[8];
6642uniform int num_protos;
6643
6644in vec2 tc;
6645out vec4 color;
6646
6647void main() {
6648 int groups = (num_protos + 3) / 4;
6649 float acc = 0.0;
6650 for (int i = 0; i < groups; i++) {
6651 int base = i * 4;
6652 vec4 val = vec4(
6653 texture(proto_tex, vec3(tc, float(min(base, num_protos - 1)))).r,
6654 texture(proto_tex, vec3(tc, float(min(base + 1, num_protos - 1)))).r,
6655 texture(proto_tex, vec3(tc, float(min(base + 2, num_protos - 1)))).r,
6656 texture(proto_tex, vec3(tc, float(min(base + 3, num_protos - 1)))).r
6657 );
6658 acc += dot(mask_coeff[i], val);
6659 }
6660 float mask = acc > 0.0 ? 1.0 : 0.0;
6661 color = vec4(mask, 0.0, 0.0, 1.0);
6662}
6663"
6664}
6665
6666fn generate_color_shader() -> &'static str {
6667 "\
6668#version 300 es
6669precision mediump float;
6670uniform vec4 colors[20];
6671uniform int class_index;
6672
6673out vec4 color;
6674void main() {
6675 int index = class_index % 20;
6676 color = colors[index];
6677}
6678"
6679}
6680
6681fn generate_packed_rgba8_shader_2d() -> &'static str {
6689 "\
6690#version 300 es
6691precision highp float;
6692precision highp int;
6693uniform sampler2D tex;
6694out vec4 color;
6695void main() {
6696 // gl_FragCoord is at pixel center (n+0.5). Use floor() for robust
6697 // integer pixel index on all GPUs (Vivante, Mali, Adreno).
6698 int out_x = int(floor(gl_FragCoord.x));
6699 int out_y = int(floor(gl_FragCoord.y));
6700 int base = out_x * 4;
6701 // 4 consecutive byte indices map to at most 2 source pixels
6702 int px0 = base / 3;
6703 int px1 = (base + 3) / 3;
6704 vec4 s0 = texelFetch(tex, ivec2(px0, out_y), 0);
6705 vec4 s1 = (px1 != px0) ? texelFetch(tex, ivec2(px1, out_y), 0) : s0;
6706 // Extract channels based on phase (base % 3)
6707 int phase = base - px0 * 3;
6708 if (phase == 0) {
6709 color = vec4(s0.r, s0.g, s0.b, s1.r);
6710 } else if (phase == 1) {
6711 color = vec4(s0.g, s0.b, s1.r, s1.g);
6712 } else {
6713 color = vec4(s0.b, s1.r, s1.g, s1.b);
6714 }
6715}
6716"
6717}
6718
6719fn generate_packed_rgba8_int8_shader_2d() -> &'static str {
6725 "\
6726#version 300 es
6727precision highp float;
6728precision highp int;
6729uniform sampler2D tex;
6730out vec4 color;
6731
6732vec4 int8_bias(vec4 v) {
6733 vec4 q = floor(v * 255.0 + 0.5);
6734 return mod(q + 128.0, 256.0) / 255.0;
6735}
6736
6737void main() {
6738 // gl_FragCoord is at pixel center (n+0.5). Use floor() for robust
6739 // integer pixel index on all GPUs (Vivante, Mali, Adreno).
6740 int out_x = int(floor(gl_FragCoord.x));
6741 int out_y = int(floor(gl_FragCoord.y));
6742 int base = out_x * 4;
6743 // 4 consecutive byte indices map to at most 2 source pixels
6744 int px0 = base / 3;
6745 int px1 = (base + 3) / 3;
6746 vec4 s0 = texelFetch(tex, ivec2(px0, out_y), 0);
6747 vec4 s1 = (px1 != px0) ? texelFetch(tex, ivec2(px1, out_y), 0) : s0;
6748 // Extract channels based on phase (base % 3), then apply int8 bias
6749 int phase = base - px0 * 3;
6750 if (phase == 0) {
6751 color = int8_bias(vec4(s0.r, s0.g, s0.b, s1.r));
6752 } else if (phase == 1) {
6753 color = int8_bias(vec4(s0.g, s0.b, s1.r, s1.g));
6754 } else {
6755 color = int8_bias(vec4(s0.b, s1.r, s1.g, s1.b));
6756 }
6757}
6758"
6759}
6760
6761#[cfg(test)]
6762#[cfg(feature = "opengl")]
6763mod gl_tests {
6764 use super::*;
6765 use crate::{TensorImage, BGRA, RGBA};
6766 #[cfg(feature = "dma_test_formats")]
6767 use crate::{NV12, YUYV};
6768 #[cfg(feature = "dma_test_formats")]
6769 use edgefirst_tensor::{is_dma_available, TensorMemory};
6770 use edgefirst_tensor::{TensorMapTrait, TensorTrait};
6771 use image::buffer::ConvertBuffer;
6772 use ndarray::Array3;
6773
6774 #[test]
6775 fn test_segmentation() {
6776 use edgefirst_decoder::Segmentation;
6777
6778 if !is_opengl_available() {
6779 eprintln!("SKIPPED: {} - OpenGL not available", function!());
6780 return;
6781 }
6782
6783 let mut image = TensorImage::load(
6784 include_bytes!("../../../testdata/giraffe.jpg"),
6785 Some(RGBA),
6786 None,
6787 )
6788 .unwrap();
6789
6790 let mut segmentation = Array3::from_shape_vec(
6791 (2, 160, 160),
6792 include_bytes!("../../../testdata/modelpack_seg_2x160x160.bin").to_vec(),
6793 )
6794 .unwrap();
6795 segmentation.swap_axes(0, 1);
6796 segmentation.swap_axes(1, 2);
6797 let segmentation = segmentation.as_standard_layout().to_owned();
6798
6799 let seg = Segmentation {
6800 segmentation,
6801 xmin: 0.0,
6802 ymin: 0.0,
6803 xmax: 1.0,
6804 ymax: 1.0,
6805 };
6806
6807 let mut renderer = GLProcessorThreaded::new(None).unwrap();
6808 renderer.draw_masks(&mut image, &[], &[seg]).unwrap();
6809 }
6810
6811 #[test]
6812 fn test_segmentation_mem() {
6813 use edgefirst_decoder::Segmentation;
6814
6815 if !is_opengl_available() {
6816 eprintln!("SKIPPED: {} - OpenGL not available", function!());
6817 return;
6818 }
6819
6820 let mut image = TensorImage::load(
6821 include_bytes!("../../../testdata/giraffe.jpg"),
6822 Some(RGBA),
6823 Some(edgefirst_tensor::TensorMemory::Mem),
6824 )
6825 .unwrap();
6826
6827 let mut segmentation = Array3::from_shape_vec(
6828 (2, 160, 160),
6829 include_bytes!("../../../testdata/modelpack_seg_2x160x160.bin").to_vec(),
6830 )
6831 .unwrap();
6832 segmentation.swap_axes(0, 1);
6833 segmentation.swap_axes(1, 2);
6834 let segmentation = segmentation.as_standard_layout().to_owned();
6835
6836 let seg = Segmentation {
6837 segmentation,
6838 xmin: 0.0,
6839 ymin: 0.0,
6840 xmax: 1.0,
6841 ymax: 1.0,
6842 };
6843
6844 let mut renderer = GLProcessorThreaded::new(None).unwrap();
6845 renderer.draw_masks(&mut image, &[], &[seg]).unwrap();
6846 }
6847
6848 #[test]
6849 fn test_segmentation_yolo() {
6850 use edgefirst_decoder::Segmentation;
6851 use ndarray::Array3;
6852
6853 if !is_opengl_available() {
6854 eprintln!("SKIPPED: {} - OpenGL not available", function!());
6855 return;
6856 }
6857
6858 let mut image = TensorImage::load(
6859 include_bytes!("../../../testdata/giraffe.jpg"),
6860 Some(RGBA),
6861 None,
6862 )
6863 .unwrap();
6864
6865 let segmentation = Array3::from_shape_vec(
6866 (76, 55, 1),
6867 include_bytes!("../../../testdata/yolov8_seg_crop_76x55.bin").to_vec(),
6868 )
6869 .unwrap();
6870
6871 let detect = DetectBox {
6872 bbox: [0.59375, 0.25, 0.9375, 0.725].into(),
6873 score: 0.99,
6874 label: 1,
6875 };
6876
6877 let seg = Segmentation {
6878 segmentation,
6879 xmin: 0.59375,
6880 ymin: 0.25,
6881 xmax: 0.9375,
6882 ymax: 0.725,
6883 };
6884
6885 let mut renderer = GLProcessorThreaded::new(None).unwrap();
6886 renderer
6887 .set_class_colors(&[[255, 255, 0, 233], [128, 128, 255, 100]])
6888 .unwrap();
6889 renderer.draw_masks(&mut image, &[detect], &[seg]).unwrap();
6890
6891 let expected = TensorImage::load(
6892 include_bytes!("../../../testdata/output_render_gl.jpg"),
6893 Some(RGBA),
6894 None,
6895 )
6896 .unwrap();
6897
6898 compare_images(&image, &expected, 0.99, function!());
6899 }
6900
6901 #[test]
6902 fn test_boxes() {
6903 use edgefirst_decoder::DetectBox;
6904
6905 if !is_opengl_available() {
6906 eprintln!("SKIPPED: {} - OpenGL not available", function!());
6907 return;
6908 }
6909
6910 let mut image = TensorImage::load(
6911 include_bytes!("../../../testdata/giraffe.jpg"),
6912 Some(RGBA),
6913 None,
6914 )
6915 .unwrap();
6916
6917 let detect = DetectBox {
6918 bbox: [0.59375, 0.25, 0.9375, 0.725].into(),
6919 score: 0.99,
6920 label: 0,
6921 };
6922 let mut renderer = GLProcessorThreaded::new(None).unwrap();
6923 renderer
6924 .set_class_colors(&[[255, 255, 0, 233], [128, 128, 255, 100]])
6925 .unwrap();
6926 renderer.draw_masks(&mut image, &[detect], &[]).unwrap();
6927 }
6928
6929 static GL_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
6930 fn is_opengl_available() -> bool {
6932 #[cfg(all(target_os = "linux", feature = "opengl"))]
6933 {
6934 *GL_AVAILABLE.get_or_init(|| GLProcessorThreaded::new(None).is_ok())
6935 }
6936
6937 #[cfg(not(all(target_os = "linux", feature = "opengl")))]
6938 {
6939 false
6940 }
6941 }
6942
6943 fn compare_images(img1: &TensorImage, img2: &TensorImage, threshold: f64, name: &str) {
6944 assert_eq!(img1.height(), img2.height(), "Heights differ");
6945 assert_eq!(img1.width(), img2.width(), "Widths differ");
6946 assert_eq!(img1.fourcc(), img2.fourcc(), "FourCC differ");
6947 assert!(
6948 matches!(img1.fourcc(), RGB | RGBA | GREY | PLANAR_RGB),
6949 "FourCC must be RGB or RGBA for comparison"
6950 );
6951
6952 let image1 = match img1.fourcc() {
6953 RGB => image::RgbImage::from_vec(
6954 img1.width() as u32,
6955 img1.height() as u32,
6956 img1.tensor().map().unwrap().to_vec(),
6957 )
6958 .unwrap(),
6959 RGBA => image::RgbaImage::from_vec(
6960 img1.width() as u32,
6961 img1.height() as u32,
6962 img1.tensor().map().unwrap().to_vec(),
6963 )
6964 .unwrap()
6965 .convert(),
6966 GREY => image::GrayImage::from_vec(
6967 img1.width() as u32,
6968 img1.height() as u32,
6969 img1.tensor().map().unwrap().to_vec(),
6970 )
6971 .unwrap()
6972 .convert(),
6973 PLANAR_RGB => image::GrayImage::from_vec(
6974 img1.width() as u32,
6975 (img1.height() * 3) as u32,
6976 img1.tensor().map().unwrap().to_vec(),
6977 )
6978 .unwrap()
6979 .convert(),
6980 _ => return,
6981 };
6982
6983 let image2 = match img2.fourcc() {
6984 RGB => image::RgbImage::from_vec(
6985 img2.width() as u32,
6986 img2.height() as u32,
6987 img2.tensor().map().unwrap().to_vec(),
6988 )
6989 .unwrap(),
6990 RGBA => image::RgbaImage::from_vec(
6991 img2.width() as u32,
6992 img2.height() as u32,
6993 img2.tensor().map().unwrap().to_vec(),
6994 )
6995 .unwrap()
6996 .convert(),
6997 GREY => image::GrayImage::from_vec(
6998 img2.width() as u32,
6999 img2.height() as u32,
7000 img2.tensor().map().unwrap().to_vec(),
7001 )
7002 .unwrap()
7003 .convert(),
7004 PLANAR_RGB => image::GrayImage::from_vec(
7005 img2.width() as u32,
7006 (img2.height() * 3) as u32,
7007 img2.tensor().map().unwrap().to_vec(),
7008 )
7009 .unwrap()
7010 .convert(),
7011 _ => return,
7012 };
7013
7014 let similarity = image_compare::rgb_similarity_structure(
7015 &image_compare::Algorithm::RootMeanSquared,
7016 &image1,
7017 &image2,
7018 )
7019 .expect("Image Comparison failed");
7020 if similarity.score < threshold {
7021 similarity
7024 .image
7025 .to_color_map()
7026 .save(format!("{name}.png"))
7027 .unwrap();
7028 panic!(
7029 "{name}: converted image and target image have similarity score too low: {} < {}",
7030 similarity.score, threshold
7031 )
7032 }
7033 }
7034
7035 #[cfg(feature = "dma_test_formats")]
7042 fn load_raw_image(
7043 width: usize,
7044 height: usize,
7045 fourcc: FourCharCode,
7046 memory: Option<TensorMemory>,
7047 bytes: &[u8],
7048 ) -> Result<TensorImage, crate::Error> {
7049 let img = TensorImage::new(width, height, fourcc, memory)?;
7050 let mut map = img.tensor().map()?;
7051 map.as_mut_slice()[..bytes.len()].copy_from_slice(bytes);
7052 Ok(img)
7053 }
7054
7055 #[test]
7057 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7058 fn test_opengl_nv12_to_rgba_reference() {
7059 if !is_dma_available() {
7060 return;
7061 }
7062 let src = load_raw_image(
7064 1280,
7065 720,
7066 NV12,
7067 Some(TensorMemory::Dma),
7068 include_bytes!("../../../testdata/camera720p.nv12"),
7069 )
7070 .unwrap();
7071
7072 let reference = load_raw_image(
7074 1280,
7075 720,
7076 RGBA,
7077 None,
7078 include_bytes!("../../../testdata/camera720p.rgba"),
7079 )
7080 .unwrap();
7081
7082 let mut dst = TensorImage::new(1280, 720, RGBA, Some(TensorMemory::Dma)).unwrap();
7084 let mut gl = GLProcessorThreaded::new(None).unwrap();
7085 gl.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::no_crop())
7086 .unwrap();
7087
7088 let cpu_dst = TensorImage::new(1280, 720, RGBA, None).unwrap();
7090 cpu_dst
7091 .tensor()
7092 .map()
7093 .unwrap()
7094 .as_mut_slice()
7095 .copy_from_slice(dst.tensor().map().unwrap().as_slice());
7096
7097 compare_images(&reference, &cpu_dst, 0.98, "opengl_nv12_to_rgba_reference");
7098 }
7099
7100 #[test]
7102 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7103 fn test_opengl_yuyv_to_rgba_reference() {
7104 if !is_dma_available() {
7105 return;
7106 }
7107 let src = load_raw_image(
7109 1280,
7110 720,
7111 YUYV,
7112 Some(TensorMemory::Dma),
7113 include_bytes!("../../../testdata/camera720p.yuyv"),
7114 )
7115 .unwrap();
7116
7117 let reference = load_raw_image(
7119 1280,
7120 720,
7121 RGBA,
7122 None,
7123 include_bytes!("../../../testdata/camera720p.rgba"),
7124 )
7125 .unwrap();
7126
7127 let mut dst = TensorImage::new(1280, 720, RGBA, Some(TensorMemory::Dma)).unwrap();
7129 let mut gl = GLProcessorThreaded::new(None).unwrap();
7130 gl.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::no_crop())
7131 .unwrap();
7132
7133 let cpu_dst = TensorImage::new(1280, 720, RGBA, None).unwrap();
7135 cpu_dst
7136 .tensor()
7137 .map()
7138 .unwrap()
7139 .as_mut_slice()
7140 .copy_from_slice(dst.tensor().map().unwrap().as_slice());
7141
7142 compare_images(&reference, &cpu_dst, 0.98, "opengl_yuyv_to_rgba_reference");
7143 }
7144
7145 #[test]
7156 fn test_probe_egl_displays() {
7157 let displays = match probe_egl_displays() {
7158 Ok(d) => d,
7159 Err(e) => {
7160 eprintln!("SKIPPED: {} - EGL not available: {e:?}", function!());
7161 return;
7162 }
7163 };
7164
7165 if displays.is_empty() {
7166 eprintln!("SKIPPED: {} - No EGL displays available", function!());
7167 return;
7168 }
7169
7170 let kinds: Vec<_> = displays.iter().map(|d| d.kind).collect();
7171 eprintln!("Probed EGL displays: {kinds:?}");
7172 for d in &displays {
7173 eprintln!(" {:?}: {}", d.kind, d.description);
7174 }
7175
7176 let priority = |k: &EglDisplayKind| match k {
7180 EglDisplayKind::PlatformDevice => 0,
7181 EglDisplayKind::Gbm => 1,
7182 EglDisplayKind::Default => 2,
7183 };
7184 for w in kinds.windows(2) {
7185 assert!(
7186 priority(&w[0]) < priority(&w[1]),
7187 "Display ordering violated: {:?} should come after {:?}",
7188 w[1],
7189 w[0],
7190 );
7191 }
7192 }
7193
7194 #[test]
7198 fn test_override_each_display_kind() {
7199 let displays = match probe_egl_displays() {
7200 Ok(d) => d,
7201 Err(e) => {
7202 eprintln!("SKIPPED: {} - EGL not available: {e:?}", function!());
7203 return;
7204 }
7205 };
7206
7207 if displays.is_empty() {
7208 eprintln!("SKIPPED: {} - No EGL displays available", function!());
7209 return;
7210 }
7211
7212 for display in &displays {
7213 eprintln!(
7214 "Testing override: {:?} ({})",
7215 display.kind, display.description
7216 );
7217 let mut gl = GLProcessorThreaded::new(Some(display.kind)).unwrap_or_else(|e| {
7218 panic!(
7219 "GLProcessorThreaded::new(Some({:?})) failed: {e:?}",
7220 display.kind
7221 )
7222 });
7223
7224 let src = TensorImage::load(
7227 include_bytes!("../../../testdata/zidane.jpg"),
7228 Some(RGBA),
7229 None,
7230 )
7231 .unwrap();
7232 let mut dst = TensorImage::new(320, 240, RGBA, None).unwrap();
7233 gl.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::no_crop())
7234 .unwrap_or_else(|e| {
7235 panic!("convert() with {:?} display failed: {e:?}", display.kind)
7236 });
7237 eprintln!(" {:?} display: convert OK", display.kind);
7238 }
7239 }
7240
7241 #[test]
7244 fn test_override_unavailable_display_errors() {
7245 let displays = match probe_egl_displays() {
7246 Ok(d) => d,
7247 Err(e) => {
7248 eprintln!("SKIPPED: {} - EGL not available: {e:?}", function!());
7249 return;
7250 }
7251 };
7252 let available_kinds: Vec<_> = displays.iter().map(|d| d.kind).collect();
7253
7254 let unavailable = [
7257 EglDisplayKind::PlatformDevice,
7258 EglDisplayKind::Gbm,
7259 EglDisplayKind::Default,
7260 ]
7261 .into_iter()
7262 .find(|k| !available_kinds.contains(k));
7263
7264 if let Some(kind) = unavailable {
7265 eprintln!("Testing override with unavailable kind: {kind:?}");
7266 let result = GLProcessorThreaded::new(Some(kind));
7267 assert!(
7268 result.is_err(),
7269 "Expected error for unavailable display kind {kind:?}, got Ok"
7270 );
7271 eprintln!(" Correctly returned error: {:?}", result.unwrap_err());
7272 } else {
7273 eprintln!(
7274 "SKIPPED: {} - All three display kinds are available",
7275 function!()
7276 );
7277 }
7278 }
7279
7280 #[test]
7283 fn test_auto_detect_display() {
7284 if !is_opengl_available() {
7285 eprintln!("SKIPPED: {} - OpenGL not available", function!());
7286 return;
7287 }
7288
7289 let mut gl = GLProcessorThreaded::new(None).expect("auto-detect should succeed");
7290 let src = TensorImage::load(
7291 include_bytes!("../../../testdata/zidane.jpg"),
7292 Some(RGBA),
7293 None,
7294 )
7295 .unwrap();
7296 let mut dst = TensorImage::new(320, 240, RGBA, None).unwrap();
7297 gl.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::no_crop())
7298 .expect("auto-detect convert should succeed");
7299 }
7300
7301 #[test]
7302 fn test_packed_rgb_width_constraint() {
7303 assert_eq!((640usize * 3) % 4, 0);
7305 assert_eq!((320usize * 3) % 4, 0);
7306 assert_eq!((1280usize * 3) % 4, 0);
7307
7308 assert_ne!((322usize * 3) % 4, 0);
7310 assert_ne!((333usize * 3) % 4, 0);
7311 }
7312
7313 #[cfg(feature = "dma_test_formats")]
7324 fn assert_pixels_match(expected: &[u8], actual: &[u8], tolerance: u8) {
7325 assert_eq!(expected.len(), actual.len(), "Buffer size mismatch");
7326 let mut max_diff: u8 = 0;
7327 let mut diff_count: usize = 0;
7328 let mut first_diff_idx = None;
7329 for (i, (&e, &a)) in expected.iter().zip(actual.iter()).enumerate() {
7330 let diff = (e as i16 - a as i16).unsigned_abs() as u8;
7331 if diff > tolerance {
7332 diff_count += 1;
7333 if first_diff_idx.is_none() {
7334 first_diff_idx = Some(i);
7335 }
7336 }
7337 max_diff = max_diff.max(diff);
7338 }
7339 assert!(
7340 diff_count == 0,
7341 "Pixel mismatch: {diff_count} bytes differ (max_diff={max_diff}, first at index {})",
7342 first_diff_idx.unwrap_or(0)
7343 );
7344 }
7345
7346 #[cfg(feature = "dma_test_formats")]
7348 fn letterbox_crop(src_w: usize, src_h: usize, dst_w: usize, dst_h: usize) -> Crop {
7349 let src_aspect = src_w as f64 / src_h as f64;
7350 let dst_aspect = dst_w as f64 / dst_h as f64;
7351 let (new_w, new_h) = if src_aspect > dst_aspect {
7352 let new_h = (dst_w as f64 / src_aspect).round() as usize;
7353 (dst_w, new_h)
7354 } else {
7355 let new_w = (dst_h as f64 * src_aspect).round() as usize;
7356 (new_w, dst_h)
7357 };
7358 let left = (dst_w - new_w) / 2;
7359 let top = (dst_h - new_h) / 2;
7360 Crop::new()
7361 .with_dst_rect(Some(crate::Rect::new(left, top, new_w, new_h)))
7362 .with_dst_color(Some([114, 114, 114, 255]))
7363 }
7364
7365 #[cfg(feature = "dma_test_formats")]
7367 fn rgba_to_rgb(rgba: &[u8]) -> Vec<u8> {
7368 assert_eq!(
7369 rgba.len() % 4,
7370 0,
7371 "RGBA buffer length must be divisible by 4"
7372 );
7373 let mut rgb = Vec::with_capacity(rgba.len() / 4 * 3);
7374 for pixel in rgba.chunks_exact(4) {
7375 rgb.push(pixel[0]);
7376 rgb.push(pixel[1]);
7377 rgb.push(pixel[2]);
7378 }
7379 rgb
7380 }
7381
7382 #[cfg(feature = "dma_test_formats")]
7384 fn uint8_to_int8(data: &[u8]) -> Vec<u8> {
7385 data.iter().map(|&b| b ^ 0x80).collect()
7386 }
7387
7388 #[test]
7391 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7392 fn test_opengl_rgb_correctness() {
7393 if !is_dma_available() {
7394 return;
7395 }
7396 let src_dma = load_raw_image(
7397 1920,
7398 1080,
7399 YUYV,
7400 Some(TensorMemory::Dma),
7401 include_bytes!("../../../testdata/camera1080p.yuyv"),
7402 )
7403 .unwrap();
7404
7405 let crop = letterbox_crop(1920, 1080, 640, 640);
7406 let mut gl = GLProcessorThreaded::new(None).unwrap();
7407
7408 let mut dst_rgba = TensorImage::new(640, 640, RGBA, Some(TensorMemory::Dma)).unwrap();
7410 gl.convert(&src_dma, &mut dst_rgba, Rotation::None, Flip::None, crop)
7411 .unwrap();
7412
7413 let mut dst_rgb = TensorImage::new(640, 640, RGB, Some(TensorMemory::Dma)).unwrap();
7415 gl.convert(&src_dma, &mut dst_rgb, Rotation::None, Flip::None, crop)
7416 .unwrap();
7417
7418 let rgba_data = dst_rgba.tensor().map().unwrap();
7419 let expected_rgb = rgba_to_rgb(rgba_data.as_slice());
7420 let gl_data = dst_rgb.tensor().map().unwrap();
7421 assert_pixels_match(&expected_rgb, gl_data.as_slice(), 1);
7422 }
7423
7424 #[test]
7427 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7428 fn test_opengl_rgb_int8_correctness() {
7429 if !is_dma_available() {
7430 return;
7431 }
7432 let src_dma = load_raw_image(
7433 1920,
7434 1080,
7435 YUYV,
7436 Some(TensorMemory::Dma),
7437 include_bytes!("../../../testdata/camera1080p.yuyv"),
7438 )
7439 .unwrap();
7440
7441 let crop = letterbox_crop(1920, 1080, 640, 640);
7442 let mut gl = match GLProcessorST::new(None) {
7448 Ok(gl) => gl,
7449 Err(e) => {
7450 eprintln!("SKIPPED: {} - GL not available: {e}", function!());
7451 return;
7452 }
7453 };
7454 gl.support_rgb_direct = false;
7455
7456 let mut dst_rgba = TensorImage::new(640, 640, RGBA, Some(TensorMemory::Dma)).unwrap();
7458 gl.convert(&src_dma, &mut dst_rgba, Rotation::None, Flip::None, crop)
7459 .unwrap();
7460
7461 let mut dst_rgb = TensorImage::new(640, 640, RGB_INT8, Some(TensorMemory::Dma)).unwrap();
7463 gl.convert(&src_dma, &mut dst_rgb, Rotation::None, Flip::None, crop)
7464 .unwrap();
7465
7466 let rgba_data = dst_rgba.tensor().map().unwrap();
7467 let expected_rgb = uint8_to_int8(&rgba_to_rgb(rgba_data.as_slice()));
7468 let gl_data = dst_rgb.tensor().map().unwrap();
7469 assert_pixels_match(&expected_rgb, gl_data.as_slice(), 1);
7470 }
7471
7472 #[test]
7475 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7476 fn test_opengl_rgb_no_letterbox_correctness() {
7477 if !is_dma_available() {
7478 return;
7479 }
7480 let src_dma = load_raw_image(
7481 1920,
7482 1080,
7483 YUYV,
7484 Some(TensorMemory::Dma),
7485 include_bytes!("../../../testdata/camera1080p.yuyv"),
7486 )
7487 .unwrap();
7488
7489 let mut gl = GLProcessorThreaded::new(None).unwrap();
7490
7491 let mut dst_rgba = TensorImage::new(1920, 1080, RGBA, Some(TensorMemory::Dma)).unwrap();
7493 gl.convert(
7494 &src_dma,
7495 &mut dst_rgba,
7496 Rotation::None,
7497 Flip::None,
7498 Crop::no_crop(),
7499 )
7500 .unwrap();
7501
7502 let mut dst_rgb = TensorImage::new(1920, 1080, RGB, Some(TensorMemory::Dma)).unwrap();
7504 gl.convert(
7505 &src_dma,
7506 &mut dst_rgb,
7507 Rotation::None,
7508 Flip::None,
7509 Crop::no_crop(),
7510 )
7511 .unwrap();
7512
7513 let rgba_data = dst_rgba.tensor().map().unwrap();
7514 let expected_rgb = rgba_to_rgb(rgba_data.as_slice());
7515 let gl_data = dst_rgb.tensor().map().unwrap();
7516 assert_pixels_match(&expected_rgb, gl_data.as_slice(), 1);
7517 }
7518
7519 #[test]
7527 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7528 fn test_probe_rgb_direct_support() {
7529 if !is_dma_available() {
7530 eprintln!("SKIPPED: {} - DMA not available", function!());
7531 return;
7532 }
7533 let gl = match GLProcessorST::new(None) {
7534 Ok(gl) => gl,
7535 Err(e) => {
7536 eprintln!("SKIPPED: {} - GL not available: {e}", function!());
7537 return;
7538 }
7539 };
7540 eprintln!(
7542 "support_rgb_direct = {} (probe completed without crash)",
7543 gl.support_rgb_direct
7544 );
7545 }
7546
7547 #[test]
7550 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7551 fn test_opengl_rgb_direct_matches_two_pass() {
7552 if !is_dma_available() {
7553 eprintln!("SKIPPED: {} - DMA not available", function!());
7554 return;
7555 }
7556 let mut gl = match GLProcessorST::new(None) {
7557 Ok(gl) => gl,
7558 Err(e) => {
7559 eprintln!("SKIPPED: {} - GL not available: {e}", function!());
7560 return;
7561 }
7562 };
7563 if !gl.support_rgb_direct {
7564 eprintln!("SKIPPED: {} - GPU does not support direct RGB", function!());
7565 return;
7566 }
7567
7568 let src = TensorImage::new(640, 480, RGBA, Some(TensorMemory::Dma)).unwrap();
7572 {
7573 let mut map = src.tensor().map().unwrap();
7574 for (i, byte) in map.as_mut_slice().iter_mut().enumerate() {
7575 *byte = (i % 251) as u8; }
7577 }
7578
7579 let crop = crate::Crop {
7580 src_rect: None,
7581 dst_rect: None,
7582 dst_color: None,
7583 };
7584
7585 let mut dst_direct = TensorImage::new(320, 320, RGB, Some(TensorMemory::Dma)).unwrap();
7587 gl.convert(&src, &mut dst_direct, Rotation::None, Flip::None, crop)
7588 .unwrap();
7589
7590 gl.support_rgb_direct = false;
7592 let mut dst_twop = TensorImage::new(320, 320, RGB, Some(TensorMemory::Dma)).unwrap();
7593 gl.convert(&src, &mut dst_twop, Rotation::None, Flip::None, crop)
7594 .unwrap();
7595 gl.support_rgb_direct = true;
7596
7597 let map_direct = dst_direct.tensor().map().unwrap();
7599 let map_twop = dst_twop.tensor().map().unwrap();
7600 let mut max_diff = 0i32;
7602 for (a, b) in map_direct.as_slice().iter().zip(map_twop.as_slice().iter()) {
7603 let diff = (*a as i32 - *b as i32).abs();
7604 max_diff = max_diff.max(diff);
7605 }
7606 eprintln!("RGB direct vs two-pass max pixel diff: {max_diff}");
7607 assert!(max_diff <= 1, "Pixel mismatch > 1: max_diff={max_diff}");
7608 }
7609
7610 #[test]
7615 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7616 fn test_opengl_nv12_to_bgra() {
7617 if !is_dma_available() {
7618 eprintln!("SKIPPED: test_opengl_nv12_to_bgra - DMA not available");
7619 return;
7620 }
7621
7622 let src = load_raw_image(
7623 1280,
7624 720,
7625 NV12,
7626 Some(TensorMemory::Dma),
7627 include_bytes!("../../../testdata/camera720p.nv12"),
7628 )
7629 .unwrap();
7630
7631 let mut gl = GLProcessorThreaded::new(None).unwrap();
7632
7633 let mut rgba_dst = TensorImage::new(1280, 720, RGBA, Some(TensorMemory::Dma)).unwrap();
7635 gl.convert(
7636 &src,
7637 &mut rgba_dst,
7638 Rotation::None,
7639 Flip::None,
7640 Crop::no_crop(),
7641 )
7642 .unwrap();
7643
7644 let mut bgra_dst = TensorImage::new(1280, 720, BGRA, Some(TensorMemory::Dma)).unwrap();
7646 gl.convert(
7647 &src,
7648 &mut bgra_dst,
7649 Rotation::None,
7650 Flip::None,
7651 Crop::no_crop(),
7652 )
7653 .unwrap();
7654
7655 let bgra_map = bgra_dst.tensor().map().unwrap();
7657 let rgba_map = rgba_dst.tensor().map().unwrap();
7658 let bgra_buf = bgra_map.as_slice();
7659 let rgba_buf = rgba_map.as_slice();
7660
7661 assert_eq!(bgra_buf.len(), rgba_buf.len());
7662 let mut max_diff = 0i32;
7663 for (bc, rc) in bgra_buf.chunks_exact(4).zip(rgba_buf.chunks_exact(4)) {
7664 max_diff = max_diff.max((bc[0] as i32 - rc[2] as i32).abs()); max_diff = max_diff.max((bc[1] as i32 - rc[1] as i32).abs()); max_diff = max_diff.max((bc[2] as i32 - rc[0] as i32).abs()); max_diff = max_diff.max((bc[3] as i32 - rc[3] as i32).abs()); }
7669 eprintln!("NV12→BGRA vs NV12→RGBA max channel diff: {max_diff}");
7670 assert!(
7671 max_diff <= 1,
7672 "BGRA/RGBA channel mismatch > 1: max_diff={max_diff}"
7673 );
7674 }
7675
7676 #[test]
7678 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7679 fn test_opengl_yuyv_to_bgra() {
7680 if !is_dma_available() {
7681 eprintln!("SKIPPED: test_opengl_yuyv_to_bgra - DMA not available");
7682 return;
7683 }
7684
7685 let src = load_raw_image(
7686 1280,
7687 720,
7688 YUYV,
7689 Some(TensorMemory::Dma),
7690 include_bytes!("../../../testdata/camera720p.yuyv"),
7691 )
7692 .unwrap();
7693
7694 let mut gl = GLProcessorThreaded::new(None).unwrap();
7695
7696 let mut rgba_dst = TensorImage::new(1280, 720, RGBA, Some(TensorMemory::Dma)).unwrap();
7697 gl.convert(
7698 &src,
7699 &mut rgba_dst,
7700 Rotation::None,
7701 Flip::None,
7702 Crop::no_crop(),
7703 )
7704 .unwrap();
7705
7706 let mut bgra_dst = TensorImage::new(1280, 720, BGRA, Some(TensorMemory::Dma)).unwrap();
7707 gl.convert(
7708 &src,
7709 &mut bgra_dst,
7710 Rotation::None,
7711 Flip::None,
7712 Crop::no_crop(),
7713 )
7714 .unwrap();
7715
7716 let bgra_map = bgra_dst.tensor().map().unwrap();
7717 let rgba_map = rgba_dst.tensor().map().unwrap();
7718 let bgra_buf = bgra_map.as_slice();
7719 let rgba_buf = rgba_map.as_slice();
7720
7721 let mut max_diff = 0i32;
7722 for (bc, rc) in bgra_buf.chunks_exact(4).zip(rgba_buf.chunks_exact(4)) {
7723 max_diff = max_diff.max((bc[0] as i32 - rc[2] as i32).abs());
7724 max_diff = max_diff.max((bc[1] as i32 - rc[1] as i32).abs());
7725 max_diff = max_diff.max((bc[2] as i32 - rc[0] as i32).abs());
7726 max_diff = max_diff.max((bc[3] as i32 - rc[3] as i32).abs());
7727 }
7728 eprintln!("YUYV→BGRA vs YUYV→RGBA max channel diff: {max_diff}");
7729 assert!(
7730 max_diff <= 1,
7731 "BGRA/RGBA channel mismatch > 1: max_diff={max_diff}"
7732 );
7733 }
7734
7735 #[test]
7738 fn test_draw_masks_bgra() {
7739 use edgefirst_decoder::Segmentation;
7740
7741 if !is_opengl_available() {
7742 eprintln!("SKIPPED: test_draw_masks_bgra - OpenGL not available");
7743 return;
7744 }
7745
7746 let seg_bytes = include_bytes!("../../../testdata/modelpack_seg_2x160x160.bin").to_vec();
7747
7748 let make_seg = || {
7750 let mut s = Array3::from_shape_vec((2, 160, 160), seg_bytes.clone()).unwrap();
7751 s.swap_axes(0, 1);
7752 s.swap_axes(1, 2);
7753 let s = s.as_standard_layout().to_owned();
7754 Segmentation {
7755 segmentation: s,
7756 xmin: 0.0,
7757 ymin: 0.0,
7758 xmax: 1.0,
7759 ymax: 1.0,
7760 }
7761 };
7762
7763 let mut gl = GLProcessorThreaded::new(None).unwrap();
7764
7765 let mut rgba_img = TensorImage::load(
7767 include_bytes!("../../../testdata/giraffe.jpg"),
7768 Some(RGBA),
7769 None,
7770 )
7771 .unwrap();
7772 gl.draw_masks(&mut rgba_img, &[], &[make_seg()]).unwrap();
7773
7774 let rgba_src = TensorImage::load(
7776 include_bytes!("../../../testdata/giraffe.jpg"),
7777 Some(RGBA),
7778 None,
7779 )
7780 .unwrap();
7781 let mut bgra_img =
7782 TensorImage::new(rgba_src.width(), rgba_src.height(), BGRA, None).unwrap();
7783 gl.convert(
7784 &rgba_src,
7785 &mut bgra_img,
7786 Rotation::None,
7787 Flip::None,
7788 Crop::no_crop(),
7789 )
7790 .unwrap();
7791 gl.draw_masks(&mut bgra_img, &[], &[make_seg()]).unwrap();
7792
7793 let rgba_map = rgba_img.tensor().map().unwrap();
7795 let bgra_map = bgra_img.tensor().map().unwrap();
7796 let rgba_buf = rgba_map.as_slice();
7797 let bgra_buf = bgra_map.as_slice();
7798 assert_eq!(rgba_buf.len(), bgra_buf.len());
7799
7800 let mut max_diff = 0i32;
7801 for (rc, bc) in rgba_buf.chunks_exact(4).zip(bgra_buf.chunks_exact(4)) {
7802 max_diff = max_diff.max((rc[0] as i32 - bc[2] as i32).abs()); max_diff = max_diff.max((rc[1] as i32 - bc[1] as i32).abs()); max_diff = max_diff.max((rc[2] as i32 - bc[0] as i32).abs()); max_diff = max_diff.max((rc[3] as i32 - bc[3] as i32).abs()); }
7807 eprintln!("draw_masks BGRA vs RGBA max channel diff: {max_diff}");
7808 assert!(
7809 max_diff <= 1,
7810 "draw_masks BGRA/RGBA channel mismatch > 1: max_diff={max_diff}"
7811 );
7812 }
7813
7814 #[test]
7817 fn test_draw_masks_bgra_mem() {
7818 use edgefirst_decoder::DetectBox;
7819
7820 if !is_opengl_available() {
7821 eprintln!("SKIPPED: test_draw_masks_bgra_mem - OpenGL not available");
7822 return;
7823 }
7824
7825 let detect = DetectBox {
7826 bbox: [0.59375, 0.25, 0.9375, 0.725].into(),
7827 score: 0.99,
7828 label: 0,
7829 };
7830 let colors = [[255, 255, 0, 233], [128, 128, 255, 100]];
7831
7832 let mut gl = GLProcessorThreaded::new(None).unwrap();
7833 gl.set_class_colors(&colors).unwrap();
7834
7835 let mut rgba_img = TensorImage::load(
7837 include_bytes!("../../../testdata/giraffe.jpg"),
7838 Some(RGBA),
7839 Some(edgefirst_tensor::TensorMemory::Mem),
7840 )
7841 .unwrap();
7842 gl.draw_masks(&mut rgba_img, &[detect], &[]).unwrap();
7843
7844 let rgba_src = TensorImage::load(
7846 include_bytes!("../../../testdata/giraffe.jpg"),
7847 Some(RGBA),
7848 Some(edgefirst_tensor::TensorMemory::Mem),
7849 )
7850 .unwrap();
7851 let mut bgra_img = TensorImage::new(
7852 rgba_src.width(),
7853 rgba_src.height(),
7854 BGRA,
7855 Some(edgefirst_tensor::TensorMemory::Mem),
7856 )
7857 .unwrap();
7858 gl.convert(
7859 &rgba_src,
7860 &mut bgra_img,
7861 Rotation::None,
7862 Flip::None,
7863 Crop::no_crop(),
7864 )
7865 .unwrap();
7866 gl.draw_masks(&mut bgra_img, &[detect], &[]).unwrap();
7867
7868 let rgba_map = rgba_img.tensor().map().unwrap();
7870 let bgra_map = bgra_img.tensor().map().unwrap();
7871 let rgba_buf = rgba_map.as_slice();
7872 let bgra_buf = bgra_map.as_slice();
7873
7874 let mut max_diff = 0i32;
7875 for (rc, bc) in rgba_buf.chunks_exact(4).zip(bgra_buf.chunks_exact(4)) {
7876 max_diff = max_diff.max((rc[0] as i32 - bc[2] as i32).abs());
7877 max_diff = max_diff.max((rc[1] as i32 - bc[1] as i32).abs());
7878 max_diff = max_diff.max((rc[2] as i32 - bc[0] as i32).abs());
7879 max_diff = max_diff.max((rc[3] as i32 - bc[3] as i32).abs());
7880 }
7881 eprintln!("draw_masks_mem BGRA vs RGBA max channel diff: {max_diff}");
7882 assert!(
7883 max_diff <= 1,
7884 "draw_masks_mem BGRA/RGBA channel mismatch > 1: max_diff={max_diff}"
7885 );
7886 }
7887
7888 #[test]
7893 fn test_gl_mask_render_smoke() {
7894 if !is_opengl_available() {
7895 eprintln!("SKIPPED: {} - OpenGL not available", function!());
7896 return;
7897 }
7898
7899 let mut gl = GLProcessorThreaded::new(None).unwrap();
7900 let mut image = TensorImage::new(64, 64, RGBA, None).unwrap();
7901
7902 let result = gl.draw_masks(&mut image, &[], &[]);
7904 assert!(
7905 result.is_ok(),
7906 "GL mask render with empty data should succeed: {result:?}"
7907 );
7908
7909 assert_eq!(image.width(), 64);
7911 assert_eq!(image.height(), 64);
7912 }
7913
7914 #[test]
7915 fn test_gl_pbo_destination_smoke() {
7916 if !is_opengl_available() {
7917 eprintln!("SKIPPED: {} - OpenGL not available", function!());
7918 return;
7919 }
7920
7921 let gl = GLProcessorThreaded::new(None).unwrap();
7922 let result = gl.create_pbo_image(64, 64, RGBA);
7923 match result {
7924 Ok(pbo_img) => {
7925 assert_eq!(pbo_img.width(), 64);
7926 assert_eq!(pbo_img.height(), 64);
7927 assert_eq!(pbo_img.fourcc(), RGBA);
7928 }
7929 Err(e) => {
7930 eprintln!("SKIPPED: {} - PBO not supported: {e:?}", function!());
7932 }
7933 }
7934 }
7935}