1#![cfg(target_os = "linux")]
5#![cfg(feature = "opengl")]
6
7use edgefirst_decoder::{DetectBox, ProtoData, ProtoTensor, Segmentation};
8use edgefirst_tensor::{TensorMemory, TensorTrait};
9use four_char_code::FourCharCode;
10use gbm::{
11 drm::{buffer::DrmFourcc, control::Device as DrmControlDevice, Device as DrmDevice},
12 AsRaw, Device,
13};
14use khronos_egl::{self as egl, Attrib, Display, Dynamic, Instance, EGL1_4};
15use log::{debug, error};
16use std::{
17 collections::BTreeSet,
18 ffi::{c_char, c_void, CStr, CString},
19 mem::ManuallyDrop,
20 os::fd::AsRawFd,
21 ptr::{null, null_mut, NonNull},
22 rc::Rc,
23 str::FromStr,
24 sync::OnceLock,
25 thread::JoinHandle,
26 time::Instant,
27};
28use tokio::sync::mpsc::{Sender, WeakSender};
29
30macro_rules! function {
31 () => {{
32 fn f() {}
33 fn type_name_of<T>(_: T) -> &'static str {
34 std::any::type_name::<T>()
35 }
36 let name = type_name_of(f);
37
38 match &name[..name.len() - 3].rfind(':') {
40 Some(pos) => &name[pos + 1..name.len() - 3],
41 None => &name[..name.len() - 3],
42 }
43 }};
44}
45
46use crate::{
47 fourcc_is_int8, fourcc_is_packed_rgb, CPUProcessor, Crop, Error, Flip, ImageProcessorTrait,
48 MaskRegion, Rect, Rotation, TensorImage, TensorImageRef, DEFAULT_COLORS, GREY, NV12,
49 PLANAR_RGB, PLANAR_RGBA, PLANAR_RGB_INT8, RGB, RGBA, RGB_INT8, VYUY, YUYV,
50};
51
52#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
84pub enum EglDisplayKind {
85 Gbm,
86 PlatformDevice,
87 Default,
88}
89
90impl std::fmt::Display for EglDisplayKind {
91 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
92 match self {
93 EglDisplayKind::Gbm => write!(f, "GBM"),
94 EglDisplayKind::PlatformDevice => write!(f, "PlatformDevice"),
95 EglDisplayKind::Default => write!(f, "Default"),
96 }
97 }
98}
99
100#[derive(Debug, Clone)]
102pub struct EglDisplayInfo {
103 pub kind: EglDisplayKind,
105 pub description: String,
108}
109
110static EGL_LIB: OnceLock<&'static libloading::Library> = OnceLock::new();
114
115fn get_egl_lib() -> Result<&'static libloading::Library, crate::Error> {
116 if let Some(egl) = EGL_LIB.get() {
117 Ok(egl)
118 } else {
119 let egl = unsafe { libloading::Library::new("libEGL.so.1")? };
120 let egl: &'static libloading::Library = Box::leak(Box::new(egl));
122 Ok(EGL_LIB.get_or_init(|| egl))
123 }
124}
125
126type Egl = Instance<Dynamic<&'static libloading::Library, EGL1_4>>;
127
128fn probe_display_extensions(egl: &Egl, display: egl::Display) -> bool {
134 let Ok(ext_str) = egl.query_string(Some(display), egl::EXTENSIONS) else {
135 return false;
136 };
137 let exts = ext_str.to_string_lossy();
138
139 let required = ["EGL_KHR_surfaceless_context", "EGL_KHR_no_config_context"];
140
141 for r in &required {
142 if !exts.contains(r) {
143 log::debug!("Display missing required extension: {r}");
144 return false;
145 }
146 }
147
148 egl.bind_api(egl::OPENGL_ES_API).is_ok()
149}
150
151pub fn probe_egl_displays() -> Result<Vec<EglDisplayInfo>, Error> {
166 let egl: Egl = unsafe { Instance::<Dynamic<_, EGL1_4>>::load_required_from(get_egl_lib()?)? };
167
168 let mut results = Vec::new();
169
170 if let Ok(display_type) = GlContext::egl_get_platform_display_from_device(&egl) {
172 let display = display_type.as_display();
173 if egl.initialize(display).is_ok() {
174 if probe_display_extensions(&egl, display) {
175 results.push(EglDisplayInfo {
176 kind: EglDisplayKind::PlatformDevice,
177 description: "EGL platform device via EGL_EXT_device_enumeration".to_string(),
178 });
179 }
180 let _ = egl.terminate(display);
181 }
182 }
183
184 if let Ok(display_type) = GlContext::egl_get_gbm_display(&egl) {
186 let display = display_type.as_display();
187 if egl.initialize(display).is_ok() {
188 if probe_display_extensions(&egl, display) {
189 results.push(EglDisplayInfo {
190 kind: EglDisplayKind::Gbm,
191 description: "GBM via /dev/dri/renderD128".to_string(),
192 });
193 }
194 let _ = egl.terminate(display);
195 }
196 }
197
198 if let Ok(display_type) = GlContext::egl_get_default_display(&egl) {
200 let display = display_type.as_display();
201 if egl.initialize(display).is_ok() {
202 if probe_display_extensions(&egl, display) {
203 results.push(EglDisplayInfo {
204 kind: EglDisplayKind::Default,
205 description: "EGL default display".to_string(),
206 });
207 }
208 let _ = egl.terminate(display);
209 }
210 }
211
212 Ok(results)
213}
214
215#[derive(Debug, Clone, Copy, PartialEq, Eq)]
218pub(crate) enum TransferBackend {
219 DmaBuf,
224
225 Pbo,
228
229 Sync,
234}
235
236impl TransferBackend {
237 pub(crate) fn is_dma(self) -> bool {
239 self == TransferBackend::DmaBuf
240 }
241
242 #[allow(dead_code)]
244 pub(crate) fn is_pbo(self) -> bool {
245 self == TransferBackend::Pbo
246 }
247}
248
249pub(crate) struct GlContext {
250 pub(crate) transfer_backend: TransferBackend,
251 pub(crate) display: EglDisplayType,
252 pub(crate) ctx: egl::Context,
253 pub(crate) egl: ManuallyDrop<Rc<Egl>>,
258}
259
260pub(crate) enum EglDisplayType {
261 Default(egl::Display),
262 Gbm(egl::Display, #[allow(dead_code)] Device<Card>),
263 PlatformDisplay(egl::Display),
264}
265
266impl EglDisplayType {
267 fn as_display(&self) -> egl::Display {
268 match self {
269 EglDisplayType::Default(disp) => *disp,
270 EglDisplayType::Gbm(disp, _) => *disp,
271 EglDisplayType::PlatformDisplay(disp) => *disp,
272 }
273 }
274}
275
276impl GlContext {
277 pub(crate) fn new(kind: Option<EglDisplayKind>) -> Result<GlContext, crate::Error> {
278 let egl: Rc<Egl> =
280 Rc::new(unsafe { Instance::<Dynamic<_, EGL1_4>>::load_required_from(get_egl_lib()?)? });
281
282 if let Some(kind) = kind {
283 let display_fn = match kind {
285 EglDisplayKind::Gbm => Self::egl_get_gbm_display as fn(&Egl) -> _,
286 EglDisplayKind::PlatformDevice => Self::egl_get_platform_display_from_device,
287 EglDisplayKind::Default => Self::egl_get_default_display,
288 };
289 return Self::try_initialize_egl(egl, display_fn).map_err(|e| {
290 log::debug!("Failed to initialize EGL with {kind} display: {e:?}");
291 e
292 });
293 }
294
295 if let Ok(headless) =
297 Self::try_initialize_egl(egl.clone(), Self::egl_get_platform_display_from_device)
298 {
299 return Ok(headless);
300 } else {
301 log::debug!("Didn't initialize EGL with platform display from device enumeration");
302 }
303
304 if let Ok(headless) = Self::try_initialize_egl(egl.clone(), Self::egl_get_gbm_display) {
306 return Ok(headless);
307 } else {
308 log::debug!("Didn't initialize EGL with GBM Display");
309 }
310
311 if let Ok(headless) = Self::try_initialize_egl(egl.clone(), Self::egl_get_default_display) {
313 return Ok(headless);
314 } else {
315 log::debug!("Didn't initialize EGL with Default Display");
316 }
317
318 Err(Error::OpenGl(
319 "Could not initialize EGL with any known method".to_string(),
320 ))
321 }
322
323 fn try_initialize_egl(
324 egl: Rc<Egl>,
325 display_fn: impl Fn(&Egl) -> Result<EglDisplayType, crate::Error>,
326 ) -> Result<GlContext, crate::Error> {
327 let display = display_fn(&egl)?;
328 log::debug!("egl initialize with display: {:x?}", display.as_display());
329 egl.initialize(display.as_display())?;
330
331 let ext_str = egl.query_string(Some(display.as_display()), egl::EXTENSIONS)?;
333 let exts = ext_str.to_string_lossy();
334
335 if !exts.contains("EGL_KHR_surfaceless_context") {
336 return Err(crate::Error::GLVersion(
337 "EGL display does not support EGL_KHR_surfaceless_context".to_string(),
338 ));
339 }
340
341 if !exts.contains("EGL_KHR_no_config_context") {
342 return Err(crate::Error::GLVersion(
343 "EGL display does not support EGL_KHR_no_config_context".to_string(),
344 ));
345 }
346
347 egl.bind_api(egl::OPENGL_ES_API)?;
348
349 let context_attributes = [egl::CONTEXT_MAJOR_VERSION, 3, egl::NONE, egl::NONE];
353 let ctx = egl.create_context(
354 display.as_display(),
355 egl_ext::NO_CONFIG_KHR,
356 None,
357 &context_attributes,
358 )?;
359 debug!("ctx: {ctx:?}");
360
361 egl.make_current(display.as_display(), None, None, Some(ctx))?;
364
365 let has_dma_extensions = Self::egl_check_support_dma(&egl).is_ok();
366 let transfer_backend = if has_dma_extensions {
367 TransferBackend::DmaBuf
368 } else {
369 TransferBackend::Sync
370 };
371 Ok(GlContext {
372 display,
373 ctx,
374 egl: ManuallyDrop::new(egl),
375 transfer_backend,
376 })
377 }
378
379 fn egl_get_default_display(egl: &Egl) -> Result<EglDisplayType, crate::Error> {
380 if let Some(display) = unsafe { egl.get_display(egl::DEFAULT_DISPLAY) } {
382 debug!("default display: {display:?}");
383 return Ok(EglDisplayType::Default(display));
384 }
385
386 Err(Error::OpenGl(
387 "Could not obtain EGL Default Display".to_string(),
388 ))
389 }
390
391 fn egl_get_gbm_display(egl: &Egl) -> Result<EglDisplayType, crate::Error> {
392 let gbm = Device::new(Card::open_global()?)?;
394
395 debug!("gbm: {gbm:?}");
396 let display = Self::egl_get_platform_display_with_fallback(
397 egl,
398 egl_ext::PLATFORM_GBM_KHR,
399 gbm.as_raw() as *mut c_void,
400 &[egl::ATTRIB_NONE],
401 )?;
402
403 Ok(EglDisplayType::Gbm(display, gbm))
404 }
405
406 fn egl_get_platform_display_from_device(egl: &Egl) -> Result<EglDisplayType, crate::Error> {
407 let extensions = egl.query_string(None, egl::EXTENSIONS)?;
408 let extensions = extensions.to_string_lossy();
409 log::debug!("EGL Extensions: {}", extensions);
410
411 if !extensions.contains("EGL_EXT_device_enumeration") {
412 return Err(Error::GLVersion(
413 "EGL doesn't supported EGL_EXT_device_enumeration extension".to_string(),
414 ));
415 }
416
417 type EGLDeviceEXT = *mut c_void;
418 let devices = if let Some(ext) = egl.get_proc_address("eglQueryDevicesEXT") {
419 let func: unsafe extern "system" fn(
420 max_devices: egl::Int,
421 devices: *mut EGLDeviceEXT,
422 num_devices: *mut egl::Int,
423 ) -> *const c_char = unsafe { std::mem::transmute(ext) };
424 let mut devices = [std::ptr::null_mut(); 10];
425 let mut num_devices = 0;
426 unsafe { func(devices.len() as i32, devices.as_mut_ptr(), &mut num_devices) };
427 for i in 0..num_devices {
428 log::debug!("EGL device: {:?}", devices[i as usize]);
429 }
430 devices[0..num_devices as usize].to_vec()
431 } else {
432 return Err(Error::GLVersion(
433 "EGL doesn't supported eglQueryDevicesEXT function".to_string(),
434 ));
435 };
436
437 if !extensions.contains("EGL_EXT_platform_device") {
438 return Err(Error::GLVersion(
439 "EGL doesn't supported EGL_EXT_platform_device extension".to_string(),
440 ));
441 }
442
443 if devices.is_empty() {
444 return Err(Error::GLVersion(
445 "EGL_EXT_device_enumeration returned 0 devices".to_string(),
446 ));
447 }
448 let disp = Self::egl_get_platform_display_with_fallback(
449 egl,
450 egl_ext::PLATFORM_DEVICE_EXT,
451 devices[0],
452 &[egl::ATTRIB_NONE],
453 )?;
454 Ok(EglDisplayType::PlatformDisplay(disp))
455 }
456
457 fn egl_check_support_dma(egl: &Egl) -> Result<(), crate::Error> {
458 let extensions = egl.query_string(None, egl::EXTENSIONS)?;
459 let extensions = extensions.to_string_lossy();
460 log::debug!("EGL Extensions: {}", extensions);
461
462 if egl.upcast::<egl::EGL1_5>().is_some() {
463 return Ok(());
464 }
465
466 if !extensions.contains("EGL_EXT_image_dma_buf_import") {
467 return Err(crate::Error::GLVersion(
468 "EGL does not support EGL_EXT_image_dma_buf_import extension".to_string(),
469 ));
470 }
471
472 if egl.get_proc_address("eglCreateImageKHR").is_none() {
473 return Err(crate::Error::GLVersion(
474 "EGL does not support eglCreateImageKHR function".to_string(),
475 ));
476 }
477
478 if egl.get_proc_address("eglDestroyImageKHR").is_none() {
479 return Err(crate::Error::GLVersion(
480 "EGL does not support eglDestroyImageKHR function".to_string(),
481 ));
482 }
483 Ok(())
484 }
485
486 fn egl_get_platform_display_with_fallback(
487 egl: &Egl,
488 platform: egl::Enum,
489 native_display: *mut c_void,
490 attrib_list: &[Attrib],
491 ) -> Result<Display, Error> {
492 if let Some(egl) = egl.upcast::<egl::EGL1_5>() {
493 unsafe { egl.get_platform_display(platform, native_display, attrib_list) }
494 .map_err(|e| e.into())
495 } else if let Some(ext) = egl.get_proc_address("eglGetPlatformDisplayEXT") {
496 let func: unsafe extern "system" fn(
497 platform: egl::Enum,
498 native_display: *mut c_void,
499 attrib_list: *const Attrib,
500 ) -> egl::EGLDisplay = unsafe { std::mem::transmute(ext) };
501 let disp = unsafe { func(platform, native_display, attrib_list.as_ptr()) };
502 if disp != egl::NO_DISPLAY {
503 Ok(unsafe { Display::from_ptr(disp) })
504 } else {
505 Err(egl.get_error().map(|e| e.into()).unwrap_or(Error::Internal(
506 "EGL failed but no error was reported".to_owned(),
507 )))
508 }
509 } else {
510 Err(Error::EGLLoad(egl::LoadError::InvalidVersion {
511 provided: egl.version(),
512 required: khronos_egl::Version::EGL1_5,
513 }))
514 }
515 }
516
517 fn egl_create_image_with_fallback(
518 egl: &Egl,
519 display: Display,
520 ctx: egl::Context,
521 target: egl::Enum,
522 buffer: egl::ClientBuffer,
523 attrib_list: &[Attrib],
524 ) -> Result<egl::Image, Error> {
525 if let Some(egl) = egl.upcast::<egl::EGL1_5>() {
526 egl.create_image(display, ctx, target, buffer, attrib_list)
527 .map_err(|e| e.into())
528 } else if let Some(ext) = egl.get_proc_address("eglCreateImageKHR") {
529 log::trace!("eglCreateImageKHR addr: {:?}", ext);
530 let func: unsafe extern "system" fn(
531 display: egl::EGLDisplay,
532 ctx: egl::EGLContext,
533 target: egl::Enum,
534 buffer: egl::EGLClientBuffer,
535 attrib_list: *const egl::Int,
536 ) -> egl::EGLImage = unsafe { std::mem::transmute(ext) };
537 let new_attrib_list = attrib_list
538 .iter()
539 .map(|x| *x as egl::Int)
540 .collect::<Vec<_>>();
541
542 let image = unsafe {
543 func(
544 display.as_ptr(),
545 ctx.as_ptr(),
546 target,
547 buffer.as_ptr(),
548 new_attrib_list.as_ptr(),
549 )
550 };
551 if image != egl::NO_IMAGE {
552 Ok(unsafe { egl::Image::from_ptr(image) })
553 } else {
554 Err(egl.get_error().map(|e| e.into()).unwrap_or(Error::Internal(
555 "EGL failed but no error was reported".to_owned(),
556 )))
557 }
558 } else {
559 Err(Error::EGLLoad(egl::LoadError::InvalidVersion {
560 provided: egl.version(),
561 required: khronos_egl::Version::EGL1_5,
562 }))
563 }
564 }
565
566 fn egl_destroy_image_with_fallback(
567 egl: &Egl,
568 display: Display,
569 image: egl::Image,
570 ) -> Result<(), Error> {
571 if let Some(egl) = egl.upcast::<egl::EGL1_5>() {
572 egl.destroy_image(display, image).map_err(|e| e.into())
573 } else if let Some(ext) = egl.get_proc_address("eglDestroyImageKHR") {
574 let func: unsafe extern "system" fn(
575 display: egl::EGLDisplay,
576 image: egl::EGLImage,
577 ) -> egl::Boolean = unsafe { std::mem::transmute(ext) };
578 let res = unsafe { func(display.as_ptr(), image.as_ptr()) };
579 if res == egl::TRUE {
580 Ok(())
581 } else {
582 Err(egl.get_error().map(|e| e.into()).unwrap_or(Error::Internal(
583 "EGL failed but no error was reported".to_owned(),
584 )))
585 }
586 } else {
587 Err(Error::EGLLoad(egl::LoadError::InvalidVersion {
588 provided: egl.version(),
589 required: khronos_egl::Version::EGL1_5,
590 }))
591 }
592 }
593}
594
595impl Drop for GlContext {
596 fn drop(&mut self) {
597 let prev_hook = std::panic::take_hook();
602 std::panic::set_hook(Box::new(|_| {}));
603 let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
604 let _ = self
605 .egl
606 .make_current(self.display.as_display(), None, None, None);
607
608 let _ = self
609 .egl
610 .destroy_context(self.display.as_display(), self.ctx);
611
612 let _ = self.egl.terminate(self.display.as_display());
617 }));
618 std::panic::set_hook(prev_hook);
619
620 }
625}
626
627#[derive(Debug)]
628pub(crate) struct Card(std::fs::File);
630
631impl std::os::unix::io::AsFd for Card {
634 fn as_fd(&self) -> std::os::unix::io::BorrowedFd<'_> {
635 self.0.as_fd()
636 }
637}
638
639impl DrmDevice for Card {}
641impl DrmControlDevice for Card {}
642
643impl Card {
645 pub fn open(path: &str) -> Result<Self, crate::Error> {
646 let mut options = std::fs::OpenOptions::new();
647 options.read(true);
648 options.write(true);
649 let c = options.open(path);
650 match c {
651 Ok(c) => Ok(Card(c)),
652 Err(e) if e.kind() == std::io::ErrorKind::NotFound => {
653 Err(Error::NotFound(format!("File not found: {path}")))
654 }
655 Err(e) => Err(e.into()),
656 }
657 }
658
659 pub fn open_global() -> Result<Self, crate::Error> {
660 let targets = ["/dev/dri/renderD128", "/dev/dri/card0", "/dev/dri/card1"];
661 let e = Self::open(targets[0]);
662 if let Ok(t) = e {
663 return Ok(t);
664 }
665 for t in &targets[1..] {
666 if let Ok(t) = Self::open(t) {
667 return Ok(t);
668 }
669 }
670 e
671 }
672}
673
674#[derive(Debug, Clone, Copy)]
675struct RegionOfInterest {
676 left: f32,
677 top: f32,
678 right: f32,
679 bottom: f32,
680}
681
682#[allow(clippy::type_complexity)]
683enum GLProcessorMessage {
684 ImageConvert(
685 SendablePtr<TensorImage>,
686 SendablePtr<TensorImage>,
687 Rotation,
688 Flip,
689 Crop,
690 tokio::sync::oneshot::Sender<Result<(), Error>>,
691 ),
692 SetColors(
693 Vec<[u8; 4]>,
694 tokio::sync::oneshot::Sender<Result<(), Error>>,
695 ),
696 DrawMasks(
697 SendablePtr<TensorImage>,
698 SendablePtr<DetectBox>,
699 SendablePtr<Segmentation>,
700 tokio::sync::oneshot::Sender<Result<(), Error>>,
701 ),
702 DrawMasksProto(
703 SendablePtr<TensorImage>,
704 SendablePtr<DetectBox>,
705 Box<ProtoData>,
706 tokio::sync::oneshot::Sender<Result<(), Error>>,
707 ),
708 SetInt8Interpolation(
709 Int8InterpolationMode,
710 tokio::sync::oneshot::Sender<Result<(), Error>>,
711 ),
712 DecodeMasksAtlas(
713 SendablePtr<DetectBox>,
714 Box<ProtoData>,
715 usize, usize, tokio::sync::oneshot::Sender<Result<(Vec<u8>, Vec<MaskRegion>), Error>>,
718 ),
719 PboCreate(
720 usize, tokio::sync::oneshot::Sender<Result<u32, Error>>,
722 ),
723 PboMap(
724 u32, usize, tokio::sync::oneshot::Sender<Result<edgefirst_tensor::PboMapping, Error>>,
727 ),
728 PboUnmap(
729 u32, tokio::sync::oneshot::Sender<Result<(), Error>>,
731 ),
732 PboDelete(u32), }
734
735struct GlPboOps {
742 sender: WeakSender<GLProcessorMessage>,
743}
744
745unsafe impl edgefirst_tensor::PboOps for GlPboOps {
750 fn map_buffer(
751 &self,
752 buffer_id: u32,
753 size: usize,
754 ) -> edgefirst_tensor::Result<edgefirst_tensor::PboMapping> {
755 let sender = self
756 .sender
757 .upgrade()
758 .ok_or(edgefirst_tensor::Error::PboDisconnected)?;
759 let (tx, rx) = tokio::sync::oneshot::channel();
760 sender
761 .blocking_send(GLProcessorMessage::PboMap(buffer_id, size, tx))
762 .map_err(|_| edgefirst_tensor::Error::PboDisconnected)?;
763 rx.blocking_recv()
764 .map_err(|_| edgefirst_tensor::Error::PboDisconnected)?
765 .map_err(|e| {
766 edgefirst_tensor::Error::NotImplemented(format!("GL PBO map failed: {e:?}"))
767 })
768 }
769
770 fn unmap_buffer(&self, buffer_id: u32) -> edgefirst_tensor::Result<()> {
771 let sender = self
772 .sender
773 .upgrade()
774 .ok_or(edgefirst_tensor::Error::PboDisconnected)?;
775 let (tx, rx) = tokio::sync::oneshot::channel();
776 sender
777 .blocking_send(GLProcessorMessage::PboUnmap(buffer_id, tx))
778 .map_err(|_| edgefirst_tensor::Error::PboDisconnected)?;
779 rx.blocking_recv()
780 .map_err(|_| edgefirst_tensor::Error::PboDisconnected)?
781 .map_err(|e| {
782 edgefirst_tensor::Error::NotImplemented(format!("GL PBO unmap failed: {e:?}"))
783 })
784 }
785
786 fn delete_buffer(&self, buffer_id: u32) {
787 if let Some(sender) = self.sender.upgrade() {
788 let _ = sender.blocking_send(GLProcessorMessage::PboDelete(buffer_id));
789 }
790 }
791}
792
793#[derive(Debug)]
798pub struct GLProcessorThreaded {
799 handle: Option<JoinHandle<()>>,
801
802 sender: Option<Sender<GLProcessorMessage>>,
804 transfer_backend: TransferBackend,
805}
806
807unsafe impl Send for GLProcessorThreaded {}
808unsafe impl Sync for GLProcessorThreaded {}
809
810struct SendablePtr<T: Send> {
811 ptr: NonNull<T>,
812 len: usize,
813}
814
815unsafe impl<T> Send for SendablePtr<T> where T: Send {}
816
817impl GLProcessorThreaded {
818 pub fn new(kind: Option<EglDisplayKind>) -> Result<Self, Error> {
820 let (send, mut recv) = tokio::sync::mpsc::channel::<GLProcessorMessage>(1);
821
822 let (create_ctx_send, create_ctx_recv) = tokio::sync::oneshot::channel();
823
824 let func = move || {
825 let mut gl_converter = match GLProcessorST::new(kind) {
826 Ok(gl) => gl,
827 Err(e) => {
828 let _ = create_ctx_send.send(Err(e));
829 return;
830 }
831 };
832 let _ = create_ctx_send.send(Ok(gl_converter.gl_context.transfer_backend));
833 while let Some(msg) = recv.blocking_recv() {
834 match msg {
835 GLProcessorMessage::ImageConvert(src, mut dst, rotation, flip, crop, resp) => {
836 let src = unsafe { src.ptr.as_ref() };
839 let dst = unsafe { dst.ptr.as_mut() };
840 let res = gl_converter.convert(src, dst, rotation, flip, crop);
841 let _ = resp.send(res);
842 }
843 GLProcessorMessage::DrawMasks(mut dst, det, seg, resp) => {
844 let dst = unsafe { dst.ptr.as_mut() };
848 let det = unsafe { std::slice::from_raw_parts(det.ptr.as_ptr(), det.len) };
849 let seg = unsafe { std::slice::from_raw_parts(seg.ptr.as_ptr(), seg.len) };
850 let res = gl_converter.draw_masks(dst, det, seg);
851 let _ = resp.send(res);
852 }
853 GLProcessorMessage::DrawMasksProto(mut dst, det, proto_data, resp) => {
854 let dst = unsafe { dst.ptr.as_mut() };
857 let det = unsafe { std::slice::from_raw_parts(det.ptr.as_ptr(), det.len) };
858 let res = gl_converter.draw_masks_proto(dst, det, &proto_data);
859 let _ = resp.send(res);
860 }
861 GLProcessorMessage::SetColors(colors, resp) => {
862 let res = gl_converter.set_class_colors(&colors);
863 let _ = resp.send(res);
864 }
865 GLProcessorMessage::SetInt8Interpolation(mode, resp) => {
866 gl_converter.set_int8_interpolation_mode(mode);
867 let _ = resp.send(Ok(()));
868 }
869 GLProcessorMessage::DecodeMasksAtlas(
870 det,
871 proto_data,
872 output_width,
873 output_height,
874 resp,
875 ) => {
876 let det = unsafe { std::slice::from_raw_parts(det.ptr.as_ptr(), det.len) };
877 let res = gl_converter.decode_masks_atlas(
878 det,
879 &proto_data,
880 output_width,
881 output_height,
882 );
883 let _ = resp.send(res);
884 }
885 GLProcessorMessage::PboCreate(size, resp) => {
886 let result = unsafe {
887 let mut id: u32 = 0;
888 gls::gl::GenBuffers(1, &mut id);
889 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, id);
890 gls::gl::BufferData(
891 gls::gl::PIXEL_PACK_BUFFER,
892 size as isize,
893 std::ptr::null(),
894 gls::gl::STREAM_COPY,
895 );
896 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
897 match check_gl_error("PboCreate", 0) {
898 Ok(()) => Ok(id),
899 Err(e) => {
900 gls::gl::DeleteBuffers(1, &id);
901 Err(e)
902 }
903 }
904 };
905 let _ = resp.send(result);
906 }
907 GLProcessorMessage::PboMap(buffer_id, size, resp) => {
908 let result = unsafe {
909 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, buffer_id);
910 let ptr = gls::gl::MapBufferRange(
911 gls::gl::PIXEL_PACK_BUFFER,
912 0,
913 size as isize,
914 gls::gl::MAP_READ_BIT | gls::gl::MAP_WRITE_BIT,
915 );
916 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
917 if ptr.is_null() {
918 Err(crate::Error::OpenGl(
919 "glMapBufferRange returned null".to_string(),
920 ))
921 } else {
922 Ok(edgefirst_tensor::PboMapping {
923 ptr: ptr as *mut u8,
924 size,
925 })
926 }
927 };
928 let _ = resp.send(result);
929 }
930 GLProcessorMessage::PboUnmap(buffer_id, resp) => {
931 let result = unsafe {
932 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, buffer_id);
933 let ok = gls::gl::UnmapBuffer(gls::gl::PIXEL_PACK_BUFFER);
934 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
935 if ok == gls::gl::FALSE {
936 Err(Error::OpenGl(
937 "PBO data was corrupted during mapping".into(),
938 ))
939 } else {
940 check_gl_error("PboUnmap", 0)
941 }
942 };
943 let _ = resp.send(result);
944 }
945 GLProcessorMessage::PboDelete(buffer_id) => unsafe {
946 gls::gl::DeleteBuffers(1, &buffer_id);
947 },
948 }
949 }
950 };
951
952 let handle = std::thread::spawn(func);
954
955 let transfer_backend = match create_ctx_recv.blocking_recv() {
956 Ok(Err(e)) => return Err(e),
957 Err(_) => {
958 return Err(Error::Internal(
959 "GL converter error messaging closed without update".to_string(),
960 ));
961 }
962 Ok(Ok(tb)) => tb,
963 };
964
965 Ok(Self {
966 handle: Some(handle),
967 sender: Some(send),
968 transfer_backend,
969 })
970 }
971}
972
973impl ImageProcessorTrait for GLProcessorThreaded {
974 fn convert(
975 &mut self,
976 src: &TensorImage,
977 dst: &mut TensorImage,
978 rotation: crate::Rotation,
979 flip: Flip,
980 crop: Crop,
981 ) -> crate::Result<()> {
982 crop.check_crop(src, dst)?;
983 if !GLProcessorST::check_src_format_supported(self.transfer_backend, src) {
984 return Err(crate::Error::NotSupported(format!(
985 "Opengl doesn't support {} source texture",
986 src.fourcc().display()
987 )));
988 }
989
990 if !GLProcessorST::check_dst_format_supported(self.transfer_backend, dst) {
991 return Err(crate::Error::NotSupported(format!(
992 "Opengl doesn't support {} destination texture",
993 dst.fourcc().display()
994 )));
995 }
996
997 let (err_send, err_recv) = tokio::sync::oneshot::channel();
998 self.sender
999 .as_ref()
1000 .unwrap()
1001 .blocking_send(GLProcessorMessage::ImageConvert(
1002 SendablePtr {
1003 ptr: src.into(),
1004 len: 1,
1005 },
1006 SendablePtr {
1007 ptr: dst.into(),
1008 len: 1,
1009 },
1010 rotation,
1011 flip,
1012 crop,
1013 err_send,
1014 ))
1015 .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
1016 err_recv.blocking_recv().map_err(|_| {
1017 Error::Internal("GL converter error messaging closed without update".to_string())
1018 })?
1019 }
1020
1021 fn convert_ref(
1022 &mut self,
1023 src: &TensorImage,
1024 dst: &mut TensorImageRef<'_>,
1025 rotation: Rotation,
1026 flip: Flip,
1027 crop: Crop,
1028 ) -> crate::Result<()> {
1029 let mut cpu = CPUProcessor::new();
1031 cpu.convert_ref(src, dst, rotation, flip, crop)
1032 }
1033
1034 fn draw_masks(
1035 &mut self,
1036 dst: &mut TensorImage,
1037 detect: &[crate::DetectBox],
1038 segmentation: &[crate::Segmentation],
1039 ) -> crate::Result<()> {
1040 let (err_send, err_recv) = tokio::sync::oneshot::channel();
1041 self.sender
1042 .as_ref()
1043 .unwrap()
1044 .blocking_send(GLProcessorMessage::DrawMasks(
1045 SendablePtr {
1046 ptr: dst.into(),
1047 len: 1,
1048 },
1049 SendablePtr {
1050 ptr: NonNull::new(detect.as_ptr() as *mut DetectBox).unwrap(),
1051 len: detect.len(),
1052 },
1053 SendablePtr {
1054 ptr: NonNull::new(segmentation.as_ptr() as *mut Segmentation).unwrap(),
1055 len: segmentation.len(),
1056 },
1057 err_send,
1058 ))
1059 .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
1060 err_recv.blocking_recv().map_err(|_| {
1061 Error::Internal("GL converter error messaging closed without update".to_string())
1062 })?
1063 }
1064
1065 fn draw_masks_proto(
1066 &mut self,
1067 dst: &mut TensorImage,
1068 detect: &[DetectBox],
1069 proto_data: &ProtoData,
1070 ) -> crate::Result<()> {
1071 let (err_send, err_recv) = tokio::sync::oneshot::channel();
1072 self.sender
1073 .as_ref()
1074 .unwrap()
1075 .blocking_send(GLProcessorMessage::DrawMasksProto(
1076 SendablePtr {
1077 ptr: NonNull::new(dst as *mut TensorImage).unwrap(),
1078 len: 1,
1079 },
1080 SendablePtr {
1081 ptr: NonNull::new(detect.as_ptr() as *mut DetectBox).unwrap(),
1082 len: detect.len(),
1083 },
1084 Box::new(proto_data.clone()),
1085 err_send,
1086 ))
1087 .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
1088 err_recv.blocking_recv().map_err(|_| {
1089 Error::Internal("GL converter error messaging closed without update".to_string())
1090 })?
1091 }
1092
1093 fn decode_masks_atlas(
1094 &mut self,
1095 detect: &[DetectBox],
1096 proto_data: ProtoData,
1097 output_width: usize,
1098 output_height: usize,
1099 ) -> crate::Result<(Vec<u8>, Vec<MaskRegion>)> {
1100 GLProcessorThreaded::decode_masks_atlas(
1101 self,
1102 detect,
1103 proto_data,
1104 output_width,
1105 output_height,
1106 )
1107 }
1108
1109 fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> Result<(), crate::Error> {
1110 let (err_send, err_recv) = tokio::sync::oneshot::channel();
1111 self.sender
1112 .as_ref()
1113 .unwrap()
1114 .blocking_send(GLProcessorMessage::SetColors(colors.to_vec(), err_send))
1115 .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
1116 err_recv.blocking_recv().map_err(|_| {
1117 Error::Internal("GL converter error messaging closed without update".to_string())
1118 })?
1119 }
1120}
1121
1122impl GLProcessorThreaded {
1123 pub fn set_int8_interpolation_mode(
1125 &mut self,
1126 mode: Int8InterpolationMode,
1127 ) -> Result<(), crate::Error> {
1128 let (err_send, err_recv) = tokio::sync::oneshot::channel();
1129 self.sender
1130 .as_ref()
1131 .unwrap()
1132 .blocking_send(GLProcessorMessage::SetInt8Interpolation(mode, err_send))
1133 .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
1134 err_recv.blocking_recv().map_err(|_| {
1135 Error::Internal("GL converter error messaging closed without update".to_string())
1136 })?
1137 }
1138
1139 pub fn decode_masks_atlas(
1145 &mut self,
1146 detect: &[DetectBox],
1147 proto_data: ProtoData,
1148 output_width: usize,
1149 output_height: usize,
1150 ) -> Result<(Vec<u8>, Vec<MaskRegion>), crate::Error> {
1151 let (resp_send, resp_recv) = tokio::sync::oneshot::channel();
1152 self.sender
1153 .as_ref()
1154 .unwrap()
1155 .blocking_send(GLProcessorMessage::DecodeMasksAtlas(
1156 SendablePtr {
1157 ptr: NonNull::new(detect.as_ptr() as *mut DetectBox).unwrap(),
1158 len: detect.len(),
1159 },
1160 Box::new(proto_data),
1161 output_width,
1162 output_height,
1163 resp_send,
1164 ))
1165 .map_err(|_| Error::Internal("GL converter thread exited".to_string()))?;
1166 resp_recv.blocking_recv().map_err(|_| {
1167 Error::Internal("GL converter error messaging closed without update".to_string())
1168 })?
1169 }
1170
1171 pub fn create_pbo_image(
1173 &self,
1174 width: usize,
1175 height: usize,
1176 fourcc: four_char_code::FourCharCode,
1177 ) -> Result<crate::TensorImage, Error> {
1178 let sender = self
1179 .sender
1180 .as_ref()
1181 .ok_or(Error::OpenGl("GL processor is shutting down".to_string()))?;
1182
1183 let channels = crate::fourcc_channels(fourcc)?;
1184 let size = width * height * channels;
1185 if size == 0 {
1186 return Err(Error::OpenGl("Invalid image dimensions".to_string()));
1187 }
1188
1189 let (tx, rx) = tokio::sync::oneshot::channel();
1191 sender
1192 .blocking_send(GLProcessorMessage::PboCreate(size, tx))
1193 .map_err(|_| Error::OpenGl("GL thread channel closed".to_string()))?;
1194 let buffer_id = rx
1195 .blocking_recv()
1196 .map_err(|_| Error::OpenGl("GL thread did not respond".to_string()))??;
1197
1198 let ops: std::sync::Arc<dyn edgefirst_tensor::PboOps> = std::sync::Arc::new(GlPboOps {
1199 sender: sender.downgrade(),
1200 });
1201
1202 let shape = if crate::fourcc_planar(fourcc)? {
1203 vec![channels, height, width]
1204 } else {
1205 vec![height, width, channels]
1206 };
1207
1208 let pbo_tensor =
1209 edgefirst_tensor::PboTensor::<u8>::from_pbo(buffer_id, size, &shape, None, ops)
1210 .map_err(|e| Error::OpenGl(format!("PBO tensor creation failed: {e:?}")))?;
1211 let tensor = edgefirst_tensor::Tensor::Pbo(pbo_tensor);
1212 crate::TensorImage::from_tensor(tensor, fourcc)
1213 .map_err(|e| Error::OpenGl(format!("Failed to wrap PBO tensor as image: {e:?}")))
1214 }
1215
1216 #[allow(dead_code)]
1218 pub(crate) fn transfer_backend(&self) -> TransferBackend {
1219 self.transfer_backend
1220 }
1221}
1222
1223impl Drop for GLProcessorThreaded {
1224 fn drop(&mut self) {
1225 drop(self.sender.take());
1226 let _ = self.handle.take().and_then(|h| h.join().ok());
1227 }
1228}
1229
1230#[derive(Debug, Clone, Copy, PartialEq, Eq)]
1232pub enum Int8InterpolationMode {
1233 Nearest,
1235 Bilinear,
1237 TwoPass,
1239}
1240
1241#[derive(Debug)]
1243enum CacheKind {
1244 Src,
1245 Dst,
1246}
1247
1248struct CachedEglImage {
1250 egl_image: EglImage,
1251 guard: std::sync::Weak<()>,
1253 renderbuffer: Option<u32>,
1255 last_used: u64,
1257}
1258
1259struct EglImageCache {
1265 entries: std::collections::HashMap<u64, CachedEglImage>,
1266 capacity: usize,
1267 hits: u64,
1268 misses: u64,
1269 access_counter: u64,
1271}
1272
1273impl EglImageCache {
1274 fn new(capacity: usize) -> Self {
1275 Self {
1276 entries: std::collections::HashMap::with_capacity(capacity),
1277 capacity,
1278 hits: 0,
1279 misses: 0,
1280 access_counter: 0,
1281 }
1282 }
1283
1284 fn next_timestamp(&mut self) -> u64 {
1286 self.access_counter += 1;
1287 self.access_counter
1288 }
1289
1290 fn evict_lru(&mut self) {
1292 if let Some((&evict_id, _)) = self.entries.iter().min_by_key(|(_, entry)| entry.last_used) {
1293 if let Some(evicted) = self.entries.remove(&evict_id) {
1294 if let Some(rbo) = evicted.renderbuffer {
1295 unsafe { gls::gl::DeleteRenderbuffers(1, &rbo) };
1296 }
1297 }
1298 }
1299 }
1300
1301 fn sweep(&mut self) {
1303 let before = self.entries.len();
1304 self.entries.retain(|_id, entry| {
1305 let alive = entry.guard.upgrade().is_some();
1306 if !alive {
1307 if let Some(rbo) = entry.renderbuffer {
1308 unsafe { gls::gl::DeleteRenderbuffers(1, &rbo) };
1309 }
1310 }
1311 alive
1312 });
1313 let swept = before - self.entries.len();
1314 if swept > 0 {
1315 log::debug!("EglImageCache: swept {swept} dead entries");
1316 }
1317 }
1318}
1319
1320impl Drop for EglImageCache {
1321 fn drop(&mut self) {
1322 for entry in self.entries.values() {
1323 if let Some(rbo) = entry.renderbuffer {
1324 unsafe { gls::gl::DeleteRenderbuffers(1, &rbo) };
1325 }
1326 }
1327 log::debug!(
1328 "EglImageCache stats: {} hits, {} misses, {} entries remaining",
1329 self.hits,
1330 self.misses,
1331 self.entries.len()
1332 );
1333 }
1334}
1335
1336pub struct GLProcessorST {
1338 camera_eglimage_texture: Texture,
1339 camera_normal_texture: Texture,
1340 render_texture: Texture,
1341 segmentation_texture: Texture,
1342 segmentation_program: GlProgram,
1343 instanced_segmentation_program: GlProgram,
1344 proto_texture: Texture,
1345 proto_segmentation_program: GlProgram,
1346 proto_segmentation_int8_nearest_program: GlProgram,
1347 proto_segmentation_int8_bilinear_program: GlProgram,
1348 proto_dequant_int8_program: GlProgram,
1349 proto_segmentation_f32_program: GlProgram,
1350 color_program: GlProgram,
1351 has_float_linear: bool,
1353 int8_interpolation_mode: Int8InterpolationMode,
1355 proto_dequant_texture: Texture,
1357 proto_mask_logit_int8_bilinear_program: GlProgram,
1358 proto_mask_logit_int8_nearest_program: GlProgram,
1359 proto_mask_logit_f32_program: GlProgram,
1360 mask_fbo: u32,
1362 mask_fbo_texture: u32,
1364 mask_fbo_width: usize,
1366 mask_fbo_height: usize,
1368 mask_atlas_pbo: u32,
1370 vertex_buffer: Buffer,
1371 texture_buffer: Buffer,
1372 convert_fbo: FrameBuffer,
1375 src_egl_cache: EglImageCache,
1377 dst_egl_cache: EglImageCache,
1379 packed_rgb_intermediate_tex: Texture,
1382 packed_rgb_fbo: FrameBuffer,
1384 packed_rgb_intermediate_size: (usize, usize),
1386 texture_program: GlProgram,
1387 texture_program_yuv: GlProgram,
1388 texture_program_planar: GlProgram,
1389 texture_program_planar_int8: GlProgram,
1391 packed_rgba8_program_2d: GlProgram,
1393 packed_rgba8_int8_program_2d: GlProgram,
1395 texture_int8_program: GlProgram,
1397 texture_int8_program_yuv: GlProgram,
1399 support_rgb_direct: bool,
1401 gl_context: GlContext,
1402}
1403
1404impl Drop for GLProcessorST {
1405 fn drop(&mut self) {
1406 unsafe {
1407 {
1408 if self.mask_fbo != 0 {
1409 gls::gl::DeleteFramebuffers(1, &self.mask_fbo);
1410 }
1411 if self.mask_fbo_texture != 0 {
1412 gls::gl::DeleteTextures(1, &self.mask_fbo_texture);
1413 }
1414 if self.mask_atlas_pbo != 0 {
1415 gls::gl::DeleteBuffers(1, &self.mask_atlas_pbo);
1416 }
1417 }
1418 }
1419 }
1420}
1421
1422impl ImageProcessorTrait for GLProcessorST {
1423 fn convert(
1424 &mut self,
1425 src: &TensorImage,
1426 dst: &mut TensorImage,
1427 rotation: crate::Rotation,
1428 flip: Flip,
1429 crop: Crop,
1430 ) -> crate::Result<()> {
1431 crop.check_crop(src, dst)?;
1432 if !Self::check_src_format_supported(self.gl_context.transfer_backend, src) {
1433 return Err(crate::Error::NotSupported(format!(
1434 "Opengl doesn't support {} source texture",
1435 src.fourcc().display()
1436 )));
1437 }
1438
1439 if !Self::check_dst_format_supported(self.gl_context.transfer_backend, dst) {
1440 return Err(crate::Error::NotSupported(format!(
1441 "Opengl doesn't support {} destination texture",
1442 dst.fourcc().display()
1443 )));
1444 }
1445 log::debug!(
1446 "dst tensor: {:?} src tensor :{:?}",
1447 dst.tensor().memory(),
1448 src.tensor().memory()
1449 );
1450 check_gl_error(function!(), line!())?;
1451 if self.gl_context.transfer_backend.is_dma() && dst.tensor().memory() == TensorMemory::Dma {
1452 let res = self.convert_dest_dma(dst, src, rotation, flip, crop);
1454 return res;
1455 }
1456 if src.tensor().memory() == TensorMemory::Pbo && dst.tensor().memory() == TensorMemory::Pbo
1459 {
1460 return self.convert_pbo_to_pbo(dst, src, rotation, flip, crop);
1461 }
1462 if dst.tensor().memory() == TensorMemory::Pbo {
1467 return self.convert_any_to_pbo(dst, src, rotation, flip, crop);
1468 }
1469 if src.tensor().memory() == TensorMemory::Pbo {
1473 return self.convert_pbo_to_mem(dst, src, rotation, flip, crop);
1474 }
1475 let start = Instant::now();
1476 let res = self.convert_dest_non_dma(dst, src, rotation, flip, crop);
1477 log::debug!("convert_dest_non_dma takes {:?}", start.elapsed());
1478 res
1479 }
1480
1481 fn convert_ref(
1482 &mut self,
1483 src: &TensorImage,
1484 dst: &mut TensorImageRef<'_>,
1485 rotation: Rotation,
1486 flip: Flip,
1487 crop: Crop,
1488 ) -> crate::Result<()> {
1489 let mut cpu = CPUProcessor::new();
1491 cpu.convert_ref(src, dst, rotation, flip, crop)
1492 }
1493
1494 fn draw_masks(
1495 &mut self,
1496 dst: &mut TensorImage,
1497 detect: &[DetectBox],
1498 segmentation: &[Segmentation],
1499 ) -> Result<(), crate::Error> {
1500 use crate::FunctionTimer;
1501
1502 let _timer = FunctionTimer::new("GLProcessorST::draw_masks");
1503 if !matches!(dst.fourcc(), RGBA | RGB) {
1504 return Err(crate::Error::NotSupported(
1505 "Opengl image rendering only supports RGBA or RGB images".to_string(),
1506 ));
1507 }
1508
1509 let is_dma = match dst.tensor.memory() {
1510 edgefirst_tensor::TensorMemory::Dma if self.setup_renderbuffer_dma(dst).is_ok() => true,
1511 _ => {
1512 self.setup_renderbuffer_non_dma(
1514 dst,
1515 Crop::new().with_dst_rect(Some(Rect::new(0, 0, 0, 0))),
1516 )?;
1517 false
1518 }
1519 };
1520
1521 gls::enable(gls::gl::BLEND);
1522 gls::blend_func_separate(
1523 gls::gl::SRC_ALPHA,
1524 gls::gl::ONE_MINUS_SRC_ALPHA,
1525 gls::gl::ZERO,
1526 gls::gl::ONE,
1527 );
1528
1529 self.render_box(dst, detect)?;
1530 self.render_segmentation(detect, segmentation)?;
1531
1532 gls::finish();
1533 if !is_dma {
1534 let mut dst_map = dst.tensor().map()?;
1535 let format = match dst.fourcc() {
1536 RGB => gls::gl::RGB,
1537 RGBA => gls::gl::RGBA,
1538 _ => unreachable!(),
1539 };
1540 unsafe {
1541 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
1542 gls::gl::ReadnPixels(
1543 0,
1544 0,
1545 dst.width() as i32,
1546 dst.height() as i32,
1547 format,
1548 gls::gl::UNSIGNED_BYTE,
1549 dst.tensor.len() as i32,
1550 dst_map.as_mut_ptr() as *mut c_void,
1551 );
1552 }
1553 }
1554
1555 Ok(())
1556 }
1557
1558 fn draw_masks_proto(
1559 &mut self,
1560 dst: &mut TensorImage,
1561 detect: &[DetectBox],
1562 proto_data: &ProtoData,
1563 ) -> crate::Result<()> {
1564 use crate::FunctionTimer;
1565
1566 let _timer = FunctionTimer::new("GLProcessorST::draw_masks_proto");
1567 if !matches!(dst.fourcc(), RGBA | RGB) {
1568 return Err(crate::Error::NotSupported(
1569 "Opengl image rendering only supports RGBA or RGB images".to_string(),
1570 ));
1571 }
1572
1573 let is_dma = match dst.tensor.memory() {
1574 edgefirst_tensor::TensorMemory::Dma if self.setup_renderbuffer_dma(dst).is_ok() => true,
1575 _ => {
1576 self.setup_renderbuffer_non_dma(
1577 dst,
1578 Crop::new().with_dst_rect(Some(Rect::new(0, 0, 0, 0))),
1579 )?;
1580 false
1581 }
1582 };
1583
1584 gls::enable(gls::gl::BLEND);
1585 gls::blend_func_separate(
1586 gls::gl::SRC_ALPHA,
1587 gls::gl::ONE_MINUS_SRC_ALPHA,
1588 gls::gl::ZERO,
1589 gls::gl::ONE,
1590 );
1591
1592 self.render_box(dst, detect)?;
1593 self.render_proto_segmentation(detect, proto_data)?;
1594
1595 gls::finish();
1596 if !is_dma {
1597 let mut dst_map = dst.tensor().map()?;
1598 let format = match dst.fourcc() {
1599 RGB => gls::gl::RGB,
1600 RGBA => gls::gl::RGBA,
1601 _ => unreachable!(),
1602 };
1603 unsafe {
1604 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
1605 gls::gl::ReadnPixels(
1606 0,
1607 0,
1608 dst.width() as i32,
1609 dst.height() as i32,
1610 format,
1611 gls::gl::UNSIGNED_BYTE,
1612 dst.tensor.len() as i32,
1613 dst_map.as_mut_ptr() as *mut c_void,
1614 );
1615 }
1616 }
1617
1618 Ok(())
1619 }
1620
1621 fn decode_masks_atlas(
1622 &mut self,
1623 detect: &[DetectBox],
1624 proto_data: ProtoData,
1625 output_width: usize,
1626 output_height: usize,
1627 ) -> crate::Result<(Vec<u8>, Vec<MaskRegion>)> {
1628 GLProcessorST::decode_masks_atlas(self, detect, &proto_data, output_width, output_height)
1629 }
1630
1631 fn set_class_colors(&mut self, colors: &[[u8; 4]]) -> crate::Result<()> {
1632 if colors.is_empty() {
1633 return Ok(());
1634 }
1635 let mut colors_f32 = colors
1636 .iter()
1637 .map(|c| {
1638 [
1639 c[0] as f32 / 255.0,
1640 c[1] as f32 / 255.0,
1641 c[2] as f32 / 255.0,
1642 c[3] as f32 / 255.0,
1643 ]
1644 })
1645 .take(20)
1646 .collect::<Vec<[f32; 4]>>();
1647
1648 self.segmentation_program
1649 .load_uniform_4fv(c"colors", &colors_f32)?;
1650 self.instanced_segmentation_program
1651 .load_uniform_4fv(c"colors", &colors_f32)?;
1652 self.proto_segmentation_program
1653 .load_uniform_4fv(c"colors", &colors_f32)?;
1654 self.proto_segmentation_int8_nearest_program
1655 .load_uniform_4fv(c"colors", &colors_f32)?;
1656 self.proto_segmentation_int8_bilinear_program
1657 .load_uniform_4fv(c"colors", &colors_f32)?;
1658 self.proto_segmentation_f32_program
1659 .load_uniform_4fv(c"colors", &colors_f32)?;
1660
1661 colors_f32.iter_mut().for_each(|c| {
1662 c[3] = 1.0; });
1664 self.color_program
1665 .load_uniform_4fv(c"colors", &colors_f32)?;
1666
1667 Ok(())
1668 }
1669}
1670
1671impl GLProcessorST {
1672 pub fn new(kind: Option<EglDisplayKind>) -> Result<GLProcessorST, crate::Error> {
1673 let gl_context = GlContext::new(kind)?;
1674 gls::load_with(|s| {
1675 gl_context
1676 .egl
1677 .get_proc_address(s)
1678 .map_or(std::ptr::null(), |p| p as *const _)
1679 });
1680
1681 let has_float_linear = Self::gl_check_support()?;
1682
1683 unsafe {
1685 gls::gl::PixelStorei(gls::gl::PACK_ALIGNMENT, 1);
1686 gls::gl::PixelStorei(gls::gl::UNPACK_ALIGNMENT, 1);
1687 }
1688
1689 let texture_program_planar =
1690 GlProgram::new(generate_vertex_shader(), generate_planar_rgb_shader())?;
1691
1692 let texture_program =
1693 GlProgram::new(generate_vertex_shader(), generate_texture_fragment_shader())?;
1694
1695 let texture_program_yuv = GlProgram::new(
1696 generate_vertex_shader(),
1697 generate_texture_fragment_shader_yuv(),
1698 )?;
1699
1700 let segmentation_program =
1701 GlProgram::new(generate_vertex_shader(), generate_segmentation_shader())?;
1702 segmentation_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1703 let instanced_segmentation_program = GlProgram::new(
1704 generate_vertex_shader(),
1705 generate_instanced_segmentation_shader(),
1706 )?;
1707 instanced_segmentation_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1708
1709 let proto_segmentation_program = GlProgram::new(
1711 generate_vertex_shader(),
1712 generate_proto_segmentation_shader(),
1713 )?;
1714 proto_segmentation_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1715
1716 let proto_segmentation_int8_nearest_program = GlProgram::new(
1718 generate_vertex_shader(),
1719 generate_proto_segmentation_shader_int8_nearest(),
1720 )?;
1721 proto_segmentation_int8_nearest_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1722
1723 let proto_segmentation_int8_bilinear_program = GlProgram::new(
1724 generate_vertex_shader(),
1725 generate_proto_segmentation_shader_int8_bilinear(),
1726 )?;
1727 proto_segmentation_int8_bilinear_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1728
1729 let proto_dequant_int8_program = GlProgram::new(
1730 generate_vertex_shader(),
1731 generate_proto_dequant_shader_int8(),
1732 )?;
1733
1734 let proto_segmentation_f32_program = GlProgram::new(
1736 generate_vertex_shader(),
1737 generate_proto_segmentation_shader_f32(),
1738 )?;
1739 proto_segmentation_f32_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1740
1741 let color_program = GlProgram::new(generate_vertex_shader(), generate_color_shader())?;
1742 color_program.load_uniform_4fv(c"colors", &DEFAULT_COLORS)?;
1743
1744 let proto_mask_logit_int8_nearest_program = GlProgram::new(
1746 generate_vertex_shader(),
1747 generate_proto_mask_logit_shader_int8_nearest(),
1748 )?;
1749 let proto_mask_logit_int8_bilinear_program = GlProgram::new(
1750 generate_vertex_shader(),
1751 generate_proto_mask_logit_shader_int8_bilinear(),
1752 )?;
1753 let proto_mask_logit_f32_program = GlProgram::new(
1754 generate_vertex_shader(),
1755 generate_proto_mask_logit_shader_f32(),
1756 )?;
1757
1758 let texture_program_planar_int8 =
1760 GlProgram::new(generate_vertex_shader(), generate_planar_rgb_int8_shader())?;
1761
1762 let packed_rgba8_program_2d =
1764 GlProgram::new(generate_vertex_shader(), generate_packed_rgba8_shader_2d())?;
1765 let packed_rgba8_int8_program_2d = GlProgram::new(
1766 generate_vertex_shader(),
1767 generate_packed_rgba8_int8_shader_2d(),
1768 )?;
1769
1770 let texture_int8_program =
1772 GlProgram::new(generate_vertex_shader(), generate_texture_int8_shader())?;
1773 let texture_int8_program_yuv =
1774 GlProgram::new(generate_vertex_shader(), generate_texture_int8_shader_yuv())?;
1775
1776 let camera_eglimage_texture = Texture::new();
1777 let camera_normal_texture = Texture::new();
1778 let render_texture = Texture::new();
1779 let segmentation_texture = Texture::new();
1780 let proto_texture = Texture::new();
1781 let proto_dequant_texture = Texture::new();
1782 let vertex_buffer = Buffer::new(0, 3, 100);
1783 let texture_buffer = Buffer::new(1, 2, 100);
1784
1785 let mut converter = GLProcessorST {
1786 gl_context,
1787 texture_program,
1788 texture_program_yuv,
1789 texture_program_planar,
1790 texture_program_planar_int8,
1791 packed_rgba8_program_2d,
1792 packed_rgba8_int8_program_2d,
1793 texture_int8_program,
1794 texture_int8_program_yuv,
1795 support_rgb_direct: false, camera_eglimage_texture,
1797 camera_normal_texture,
1798 segmentation_texture,
1799 proto_texture,
1800 proto_segmentation_int8_nearest_program,
1801 proto_segmentation_int8_bilinear_program,
1802 proto_dequant_int8_program,
1803 proto_segmentation_f32_program,
1804 has_float_linear,
1805 int8_interpolation_mode: Int8InterpolationMode::Bilinear,
1806 proto_dequant_texture,
1807 proto_mask_logit_int8_bilinear_program,
1808 proto_mask_logit_int8_nearest_program,
1809 proto_mask_logit_f32_program,
1810 mask_fbo: 0,
1811 mask_fbo_texture: 0,
1812 mask_fbo_width: 0,
1813 mask_fbo_height: 0,
1814 mask_atlas_pbo: 0,
1815 vertex_buffer,
1816 texture_buffer,
1817 convert_fbo: FrameBuffer::new(),
1818 src_egl_cache: EglImageCache::new(8),
1819 dst_egl_cache: EglImageCache::new(8),
1820 packed_rgb_intermediate_tex: Texture::new(),
1821 packed_rgb_fbo: FrameBuffer::new(),
1822 packed_rgb_intermediate_size: (0, 0),
1823 render_texture,
1824 segmentation_program,
1825 instanced_segmentation_program,
1826 proto_segmentation_program,
1827 color_program,
1828 };
1829 check_gl_error(function!(), line!())?;
1830
1831 converter.support_rgb_direct = converter.probe_rgb_direct_support();
1833
1834 if converter.gl_context.transfer_backend.is_dma() && !converter.verify_dma_buf_roundtrip() {
1837 log::info!("DMA-buf verification failed — falling back to PBO transfers");
1838 converter.gl_context.transfer_backend = TransferBackend::Pbo;
1839 converter.support_rgb_direct = false;
1841 }
1842
1843 if converter.gl_context.transfer_backend == TransferBackend::Sync {
1845 log::info!("Upgrading transfer backend from Sync to Pbo (GL context available)");
1846 converter.gl_context.transfer_backend = TransferBackend::Pbo;
1847 }
1848
1849 log::debug!(
1850 "GLConverter created (transfer={:?}, rgb_direct={})",
1851 converter.gl_context.transfer_backend,
1852 converter.support_rgb_direct
1853 );
1854 Ok(converter)
1855 }
1856
1857 fn probe_rgb_direct_support(&self) -> bool {
1861 if !self.gl_context.transfer_backend.is_dma() {
1862 log::debug!("probe_rgb_direct: no DMA support");
1863 return false;
1864 }
1865
1866 if self
1868 .gl_context
1869 .egl
1870 .get_proc_address("glEGLImageTargetRenderbufferStorageOES")
1871 .is_none()
1872 {
1873 log::debug!("probe_rgb_direct: glEGLImageTargetRenderbufferStorageOES not available");
1874 return false;
1875 }
1876
1877 let test_img = match TensorImage::new(64, 64, RGB, Some(TensorMemory::Dma)) {
1879 Ok(img) => img,
1880 Err(e) => {
1881 log::debug!("probe_rgb_direct: failed to allocate test DMA buffer: {e}");
1882 return false;
1883 }
1884 };
1885
1886 let egl_image =
1888 match self.create_egl_image_with_dims(&test_img, 64, 64, DrmFourcc::Bgr888, 3) {
1889 Ok(img) => img,
1890 Err(e) => {
1891 log::debug!("probe_rgb_direct: EGLImage creation failed: {e}");
1892 return false;
1893 }
1894 };
1895
1896 let result = unsafe {
1898 let mut rbo = 0u32;
1899 gls::gl::GenRenderbuffers(1, &mut rbo);
1900 gls::gl::BindRenderbuffer(gls::gl::RENDERBUFFER, rbo);
1901 gls::gl::EGLImageTargetRenderbufferStorageOES(
1902 gls::gl::RENDERBUFFER,
1903 egl_image.egl_image.as_ptr(),
1904 );
1905
1906 let gl_err = gls::gl::GetError();
1907 if gl_err != gls::gl::NO_ERROR {
1908 log::debug!(
1909 "probe_rgb_direct: EGLImageTargetRenderbufferStorageOES failed: {gl_err:#X}"
1910 );
1911 gls::gl::BindRenderbuffer(gls::gl::RENDERBUFFER, 0);
1912 gls::gl::DeleteRenderbuffers(1, &rbo);
1913 return false;
1914 }
1915
1916 let mut fbo = 0u32;
1917 gls::gl::GenFramebuffers(1, &mut fbo);
1918 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, fbo);
1919 gls::gl::FramebufferRenderbuffer(
1920 gls::gl::FRAMEBUFFER,
1921 gls::gl::COLOR_ATTACHMENT0,
1922 gls::gl::RENDERBUFFER,
1923 rbo,
1924 );
1925
1926 let status = gls::gl::CheckFramebufferStatus(gls::gl::FRAMEBUFFER);
1927 let complete = status == gls::gl::FRAMEBUFFER_COMPLETE;
1928
1929 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, 0);
1931 gls::gl::DeleteFramebuffers(1, &fbo);
1932 gls::gl::BindRenderbuffer(gls::gl::RENDERBUFFER, 0);
1933 gls::gl::DeleteRenderbuffers(1, &rbo);
1934
1935 complete
1936 };
1937 log::info!("probe_rgb_direct: BGR888 renderbuffer FBO support = {result}");
1940 result
1941 }
1942
1943 fn verify_dma_buf_roundtrip(&mut self) -> bool {
1952 let src = match TensorImage::new(64, 64, RGBA, Some(TensorMemory::Dma)) {
1954 Ok(img) => img,
1955 Err(e) => {
1956 log::info!("verify_dma_buf_roundtrip: failed to allocate DMA source: {e}");
1957 return false;
1958 }
1959 };
1960
1961 {
1962 let mut map = match src.tensor().map() {
1963 Ok(m) => m,
1964 Err(e) => {
1965 log::info!("verify_dma_buf_roundtrip: failed to map DMA source: {e}");
1966 return false;
1967 }
1968 };
1969 for pixel in map.chunks_exact_mut(4) {
1970 pixel[0] = 255; pixel[1] = 0; pixel[2] = 0; pixel[3] = 255; }
1975 }
1976
1977 let mut dst = match TensorImage::new(64, 64, RGBA, Some(TensorMemory::Dma)) {
1979 Ok(img) => img,
1980 Err(e) => {
1981 log::info!("verify_dma_buf_roundtrip: failed to allocate DMA destination: {e}");
1982 return false;
1983 }
1984 };
1985
1986 if let Err(e) =
1988 self.convert_dest_dma(&mut dst, &src, Rotation::None, Flip::None, Crop::no_crop())
1989 {
1990 log::info!("verify_dma_buf_roundtrip: convert_dest_dma failed: {e}");
1991 return false;
1992 }
1993
1994 let map = match dst.tensor().map() {
1996 Ok(m) => m,
1997 Err(e) => {
1998 log::info!("verify_dma_buf_roundtrip: failed to map DMA destination: {e}");
1999 return false;
2000 }
2001 };
2002
2003 let offset = (32 * 64 + 32) * 4;
2004 if map.len() < offset + 4 {
2005 log::info!("verify_dma_buf_roundtrip: destination buffer too small");
2006 return false;
2007 }
2008
2009 let r = map[offset];
2010 let g = map[offset + 1];
2011 let b = map[offset + 2];
2012 let a = map[offset + 3];
2013
2014 let pass = r > 250 && g < 5 && b < 5 && a > 250;
2015
2016 if pass {
2017 log::info!("verify_dma_buf_roundtrip: PASSED (center pixel RGBA={r},{g},{b},{a})");
2018 } else {
2019 log::info!(
2020 "verify_dma_buf_roundtrip: FAILED (center pixel RGBA={r},{g},{b},{a}, \
2021 expected ~255,0,0,255)"
2022 );
2023 }
2024
2025 pass
2026 }
2027
2028 fn compute_atlas_regions(
2033 detect: &[DetectBox],
2034 output_width: usize,
2035 output_height: usize,
2036 padding: usize,
2037 ) -> (Vec<MaskRegion>, usize) {
2038 let ow = output_width as i32;
2039 let oh = output_height as i32;
2040 let owf = output_width as f32;
2041 let ohf = output_height as f32;
2042 let pad = padding as i32;
2043
2044 let mut regions = Vec::with_capacity(detect.len());
2045 let mut atlas_y = 0usize;
2046 for det in detect.iter() {
2047 let bbox_x = (det.bbox.xmin * owf).round() as i32;
2048 let bbox_y = (det.bbox.ymin * ohf).round() as i32;
2049 let bbox_w = ((det.bbox.xmax - det.bbox.xmin) * owf).round() as i32;
2050 let bbox_h = ((det.bbox.ymax - det.bbox.ymin) * ohf).round() as i32;
2051 let bbox_x = bbox_x.max(0).min(ow);
2052 let bbox_y = bbox_y.max(0).min(oh);
2053 let bbox_w = bbox_w.max(1).min(ow - bbox_x);
2054 let bbox_h = bbox_h.max(1).min(oh - bbox_y);
2055
2056 let padded_x = (bbox_x - pad).max(0);
2057 let padded_y = (bbox_y - pad).max(0);
2058 let padded_w = ((bbox_x + bbox_w + pad).min(ow) - padded_x).max(1);
2059 let padded_h = ((bbox_y + bbox_h + pad).min(oh) - padded_y).max(1);
2060
2061 regions.push(MaskRegion {
2062 atlas_y_offset: atlas_y,
2063 padded_x: padded_x as usize,
2064 padded_y: padded_y as usize,
2065 padded_w: padded_w as usize,
2066 padded_h: padded_h as usize,
2067 bbox_x: bbox_x as usize,
2068 bbox_y: bbox_y as usize,
2069 bbox_w: bbox_w as usize,
2070 bbox_h: bbox_h as usize,
2071 });
2072 atlas_y += padded_h as usize;
2073 }
2074 (regions, atlas_y)
2075 }
2076
2077 pub fn set_int8_interpolation_mode(&mut self, mode: Int8InterpolationMode) {
2079 self.int8_interpolation_mode = mode;
2080 log::debug!("Int8 interpolation mode set to {:?}", mode);
2081 }
2082
2083 fn ensure_mask_fbo(&mut self, width: usize, height: usize) -> crate::Result<()> {
2086 if self.mask_fbo_width == width && self.mask_fbo_height == height && self.mask_fbo != 0 {
2087 return Ok(());
2088 }
2089
2090 if self.mask_fbo == 0 {
2092 unsafe {
2093 gls::gl::GenFramebuffers(1, &mut self.mask_fbo);
2094 }
2095 }
2096 if self.mask_fbo_texture == 0 {
2098 unsafe {
2099 gls::gl::GenTextures(1, &mut self.mask_fbo_texture);
2100 }
2101 }
2102
2103 unsafe {
2105 gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.mask_fbo_texture);
2106 gls::gl::TexImage2D(
2107 gls::gl::TEXTURE_2D,
2108 0,
2109 gls::gl::R8 as i32,
2110 width as i32,
2111 height as i32,
2112 0,
2113 gls::gl::RED,
2114 gls::gl::UNSIGNED_BYTE,
2115 std::ptr::null(),
2116 );
2117 gls::gl::TexParameteri(
2118 gls::gl::TEXTURE_2D,
2119 gls::gl::TEXTURE_MIN_FILTER,
2120 gls::gl::NEAREST as i32,
2121 );
2122 gls::gl::TexParameteri(
2123 gls::gl::TEXTURE_2D,
2124 gls::gl::TEXTURE_MAG_FILTER,
2125 gls::gl::NEAREST as i32,
2126 );
2127 }
2128
2129 unsafe {
2131 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, self.mask_fbo);
2132 gls::gl::FramebufferTexture2D(
2133 gls::gl::FRAMEBUFFER,
2134 gls::gl::COLOR_ATTACHMENT0,
2135 gls::gl::TEXTURE_2D,
2136 self.mask_fbo_texture,
2137 0,
2138 );
2139 let status = gls::gl::CheckFramebufferStatus(gls::gl::FRAMEBUFFER);
2140 if status != gls::gl::FRAMEBUFFER_COMPLETE {
2141 return Err(crate::Error::OpenGl(format!(
2142 "Mask FBO incomplete: status=0x{status:X}"
2143 )));
2144 }
2145 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, 0);
2146 }
2147
2148 self.mask_fbo_width = width;
2149 self.mask_fbo_height = height;
2150 log::debug!("Mask FBO allocated at {width}x{height}");
2151 Ok(())
2152 }
2153
2154 fn ensure_mask_atlas_size(&mut self, width: usize, atlas_height: usize) -> crate::Result<()> {
2158 if self.mask_fbo_width == width
2159 && self.mask_fbo_height >= atlas_height
2160 && self.mask_fbo != 0
2161 && self.mask_atlas_pbo != 0
2162 {
2163 return Ok(());
2164 }
2165 self.ensure_mask_fbo(width, atlas_height)?;
2166 let pbo_size = width * atlas_height;
2167 unsafe {
2168 if self.mask_atlas_pbo == 0 {
2169 gls::gl::GenBuffers(1, &mut self.mask_atlas_pbo);
2170 }
2171 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, self.mask_atlas_pbo);
2172 gls::gl::BufferData(
2173 gls::gl::PIXEL_PACK_BUFFER,
2174 pbo_size as isize,
2175 std::ptr::null(),
2176 gls::gl::DYNAMIC_READ,
2177 );
2178 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
2179 }
2180 Ok(())
2181 }
2182
2183 pub fn decode_masks_atlas(
2191 &mut self,
2192 detect: &[DetectBox],
2193 proto_data: &ProtoData,
2194 output_width: usize,
2195 output_height: usize,
2196 ) -> crate::Result<(Vec<u8>, Vec<MaskRegion>)> {
2197 use crate::FunctionTimer;
2198
2199 let _timer = FunctionTimer::new("GLProcessorST::decode_masks_atlas");
2200
2201 if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
2202 return Ok((Vec::new(), Vec::new()));
2203 }
2204
2205 let padding = 4usize;
2206
2207 let (height, width, num_protos) = proto_data.protos.dim();
2208 let texture_target = gls::gl::TEXTURE_2D_ARRAY;
2209
2210 let (regions, compact_atlas_height) =
2212 Self::compute_atlas_regions(detect, output_width, output_height, padding);
2213
2214 let (saved_fbo, saved_viewport) = unsafe {
2216 let mut fbo: i32 = 0;
2217 gls::gl::GetIntegerv(gls::gl::FRAMEBUFFER_BINDING, &mut fbo);
2218 let mut vp = [0i32; 4];
2219 gls::gl::GetIntegerv(gls::gl::VIEWPORT, vp.as_mut_ptr());
2220 (fbo as u32, vp)
2221 };
2222
2223 self.ensure_mask_atlas_size(output_width, compact_atlas_height)?;
2225
2226 gls::active_texture(gls::gl::TEXTURE0);
2228 gls::bind_texture(texture_target, self.proto_texture.id);
2229 gls::tex_parameteri(
2230 texture_target,
2231 gls::gl::TEXTURE_MIN_FILTER,
2232 gls::gl::NEAREST as i32,
2233 );
2234 gls::tex_parameteri(
2235 texture_target,
2236 gls::gl::TEXTURE_MAG_FILTER,
2237 gls::gl::NEAREST as i32,
2238 );
2239 gls::tex_parameteri(
2240 texture_target,
2241 gls::gl::TEXTURE_WRAP_S,
2242 gls::gl::CLAMP_TO_EDGE as i32,
2243 );
2244 gls::tex_parameteri(
2245 texture_target,
2246 gls::gl::TEXTURE_WRAP_T,
2247 gls::gl::CLAMP_TO_EDGE as i32,
2248 );
2249
2250 let atlas_result = match &proto_data.protos {
2251 ProtoTensor::Quantized {
2252 protos,
2253 quantization,
2254 } => {
2255 let mut tex_data = vec![0i8; height * width * num_protos];
2256 for k in 0..num_protos {
2257 for y in 0..height {
2258 for x in 0..width {
2259 tex_data[k * height * width + y * width + x] = protos[[y, x, k]];
2260 }
2261 }
2262 }
2263 gls::tex_image3d(
2264 texture_target,
2265 0,
2266 gls::gl::R8I as i32,
2267 width as i32,
2268 height as i32,
2269 num_protos as i32,
2270 0,
2271 gls::gl::RED_INTEGER,
2272 gls::gl::BYTE,
2273 Some(&tex_data),
2274 );
2275
2276 let proto_scale = quantization.scale;
2277 let proto_scaled_zp = -(quantization.zero_point as f32) * quantization.scale;
2278
2279 let program = match self.int8_interpolation_mode {
2280 Int8InterpolationMode::Nearest => &self.proto_mask_logit_int8_nearest_program,
2281 _ => &self.proto_mask_logit_int8_bilinear_program,
2282 };
2283 gls::use_program(program.id);
2284 program.load_uniform_1i(c"num_protos", num_protos as i32)?;
2285 program.load_uniform_1f(c"proto_scale", proto_scale)?;
2286
2287 self.render_mask_atlas_compact(
2288 program,
2289 regions,
2290 &proto_data.mask_coefficients,
2291 output_width,
2292 output_height,
2293 Some(proto_scaled_zp),
2294 )
2295 }
2296 ProtoTensor::Float(protos_f32) => {
2297 let mut tex_data = vec![0.0f32; height * width * num_protos];
2298 for k in 0..num_protos {
2299 for y in 0..height {
2300 for x in 0..width {
2301 tex_data[k * height * width + y * width + x] = protos_f32[[y, x, k]];
2302 }
2303 }
2304 }
2305 gls::tex_image3d(
2306 texture_target,
2307 0,
2308 gls::gl::R32F as i32,
2309 width as i32,
2310 height as i32,
2311 num_protos as i32,
2312 0,
2313 gls::gl::RED,
2314 gls::gl::FLOAT,
2315 Some(&tex_data),
2316 );
2317 if self.has_float_linear {
2318 gls::tex_parameteri(
2319 texture_target,
2320 gls::gl::TEXTURE_MIN_FILTER,
2321 gls::gl::LINEAR as i32,
2322 );
2323 gls::tex_parameteri(
2324 texture_target,
2325 gls::gl::TEXTURE_MAG_FILTER,
2326 gls::gl::LINEAR as i32,
2327 );
2328 }
2329
2330 let program = &self.proto_mask_logit_f32_program;
2331 gls::use_program(program.id);
2332 program.load_uniform_1i(c"num_protos", num_protos as i32)?;
2333
2334 self.render_mask_atlas_compact(
2335 program,
2336 regions,
2337 &proto_data.mask_coefficients,
2338 output_width,
2339 output_height,
2340 None,
2341 )
2342 }
2343 };
2344
2345 unsafe {
2347 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, saved_fbo);
2348 gls::gl::Viewport(
2349 saved_viewport[0],
2350 saved_viewport[1],
2351 saved_viewport[2],
2352 saved_viewport[3],
2353 );
2354 }
2355
2356 let (atlas_pixels, regions) = atlas_result?;
2357 Ok((atlas_pixels, regions))
2358 }
2359
2360 #[allow(clippy::too_many_arguments)]
2370 fn render_mask_atlas_compact(
2371 &self,
2372 program: &GlProgram,
2373 regions: Vec<MaskRegion>,
2374 mask_coefficients: &[Vec<f32>],
2375 output_width: usize,
2376 output_height: usize,
2377 proto_scaled_zp: Option<f32>,
2378 ) -> crate::Result<(Vec<u8>, Vec<MaskRegion>)> {
2379 if regions.is_empty() {
2380 return Ok((Vec::new(), Vec::new()));
2381 }
2382
2383 let owf = output_width as f32;
2384 let ohf = output_height as f32;
2385
2386 let atlas_height = regions.last().map_or(0, |r| r.atlas_y_offset + r.padded_h);
2387 let ahf = atlas_height as f32;
2388
2389 unsafe {
2390 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, self.mask_fbo);
2391 gls::gl::Viewport(0, 0, output_width as i32, atlas_height as i32);
2392 gls::gl::Disable(gls::gl::BLEND);
2393 gls::gl::ClearColor(0.0, 0.0, 0.0, 0.0);
2394 gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
2395 }
2396
2397 if let Some(first_coeff) = mask_coefficients.first() {
2398 if first_coeff.len() > 32 {
2399 log::warn!(
2400 "render_mask_atlas_compact: {} mask coefficients exceeds shader \
2401 limit of 32 — coefficients will be truncated",
2402 first_coeff.len()
2403 );
2404 }
2405 }
2406
2407 for (region, coeff) in regions.iter().zip(mask_coefficients.iter()) {
2408 let mut packed_coeff = [[0.0f32; 4]; 8];
2409 for (j, val) in coeff.iter().enumerate().take(32) {
2410 packed_coeff[j / 4][j % 4] = *val;
2411 }
2412 program.load_uniform_4fv(c"mask_coeff", &packed_coeff)?;
2413
2414 if let Some(szp) = proto_scaled_zp {
2416 let coeff_sum: f32 = coeff.iter().take(32).sum();
2417 program.load_uniform_1f(c"coeff_sum_x_szp", coeff_sum * szp)?;
2418 }
2419
2420 let dst_left = region.padded_x as f32 / owf * 2.0 - 1.0;
2424 let dst_right = (region.padded_x + region.padded_w) as f32 / owf * 2.0 - 1.0;
2425 let dst_bottom = region.atlas_y_offset as f32 / ahf * 2.0 - 1.0;
2426 let dst_top = (region.atlas_y_offset + region.padded_h) as f32 / ahf * 2.0 - 1.0;
2427
2428 let src_left = region.padded_x as f32 / owf;
2430 let src_right = (region.padded_x + region.padded_w) as f32 / owf;
2431 let src_bottom = region.padded_y as f32 / ohf;
2432 let src_top = (region.padded_y + region.padded_h) as f32 / ohf;
2433
2434 unsafe {
2435 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
2436 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
2437 let verts: [f32; 12] = [
2438 dst_left, dst_top, 0.0, dst_right, dst_top, 0.0, dst_right, dst_bottom, 0.0,
2439 dst_left, dst_bottom, 0.0,
2440 ];
2441 gls::gl::BufferSubData(
2442 gls::gl::ARRAY_BUFFER,
2443 0,
2444 (size_of::<f32>() * 12) as isize,
2445 verts.as_ptr() as *const c_void,
2446 );
2447
2448 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
2449 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
2450 let tc: [f32; 8] = [
2451 src_left, src_top, src_right, src_top, src_right, src_bottom, src_left,
2452 src_bottom,
2453 ];
2454 gls::gl::BufferSubData(
2455 gls::gl::ARRAY_BUFFER,
2456 0,
2457 (size_of::<f32>() * 8) as isize,
2458 tc.as_ptr() as *const c_void,
2459 );
2460
2461 let idx: [u32; 4] = [0, 1, 2, 3];
2462 gls::gl::DrawElements(
2463 gls::gl::TRIANGLE_FAN,
2464 4,
2465 gls::gl::UNSIGNED_INT,
2466 idx.as_ptr() as *const c_void,
2467 );
2468 }
2469 }
2470
2471 let atlas_bytes = output_width * atlas_height;
2473 let mut pixels = vec![0u8; atlas_bytes];
2474
2475 unsafe {
2476 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, self.mask_atlas_pbo);
2477 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
2478 gls::gl::ReadnPixels(
2479 0,
2480 0,
2481 output_width as i32,
2482 atlas_height as i32,
2483 gls::gl::RED,
2484 gls::gl::UNSIGNED_BYTE,
2485 atlas_bytes as i32,
2486 std::ptr::null_mut(),
2487 );
2488 gls::gl::Finish();
2489
2490 let ptr = gls::gl::MapBufferRange(
2491 gls::gl::PIXEL_PACK_BUFFER,
2492 0,
2493 atlas_bytes as isize,
2494 gls::gl::MAP_READ_BIT,
2495 );
2496 if ptr.is_null() {
2497 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
2498 return Err(crate::Error::OpenGl(
2499 "Failed to map compact atlas PBO for readback".to_string(),
2500 ));
2501 }
2502 std::ptr::copy_nonoverlapping(ptr as *const u8, pixels.as_mut_ptr(), atlas_bytes);
2503 gls::gl::UnmapBuffer(gls::gl::PIXEL_PACK_BUFFER);
2504 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
2505 }
2506
2507 Ok((pixels, regions))
2508 }
2509
2510 fn check_src_format_supported(backend: TransferBackend, img: &TensorImage) -> bool {
2511 if backend.is_dma() && img.tensor().memory() == TensorMemory::Dma {
2512 matches!(img.fourcc(), RGBA | GREY | YUYV | NV12)
2516 } else {
2517 matches!(img.fourcc(), RGB | RGBA | GREY)
2518 }
2519 }
2520
2521 fn check_dst_format_supported(backend: TransferBackend, img: &TensorImage) -> bool {
2522 if backend.is_dma() && img.tensor().memory() == TensorMemory::Dma {
2523 matches!(
2524 img.fourcc(),
2525 RGBA | GREY | PLANAR_RGB | RGB | RGB_INT8 | PLANAR_RGB_INT8
2526 )
2527 } else {
2528 matches!(img.fourcc(), RGB | RGBA | GREY | RGB_INT8)
2529 }
2530 }
2531
2532 fn gl_check_support() -> Result<bool, crate::Error> {
2535 if let Ok(version) = gls::get_string(gls::gl::SHADING_LANGUAGE_VERSION) {
2536 log::debug!("GL Shading Language Version: {version:?}");
2537 } else {
2538 log::warn!("Could not get GL Shading Language Version");
2539 }
2540
2541 let extensions = unsafe {
2542 let str = gls::gl::GetString(gls::gl::EXTENSIONS);
2543 if str.is_null() {
2544 return Err(crate::Error::GLVersion(
2545 "GL returned no supported extensions".to_string(),
2546 ));
2547 }
2548 CStr::from_ptr(str as *const c_char)
2549 .to_string_lossy()
2550 .to_string()
2551 };
2552 log::debug!("GL Extensions: {extensions}");
2553 let required_ext = ["GL_OES_EGL_image_external_essl3"];
2554 let extensions = extensions.split_ascii_whitespace().collect::<BTreeSet<_>>();
2555 for required in required_ext {
2556 if !extensions.contains(required) {
2557 return Err(crate::Error::GLVersion(format!(
2558 "GL does not support {required} extension",
2559 )));
2560 }
2561 }
2562
2563 let has_float_linear = extensions.contains("GL_OES_texture_float_linear");
2564 log::debug!("GL_OES_texture_float_linear: {has_float_linear}");
2565
2566 Ok(has_float_linear)
2567 }
2568
2569 fn setup_renderbuffer_dma(&mut self, dst: &TensorImage) -> crate::Result<()> {
2570 self.convert_fbo.bind();
2571
2572 let (width, height) = if matches!(dst.fourcc(), PLANAR_RGB | PLANAR_RGB_INT8) {
2573 let width = dst.width();
2574 let height = dst.height() * 3;
2575 (width as i32, height as i32)
2576 } else {
2577 (dst.width() as i32, dst.height() as i32)
2578 };
2579 let dest_egl = self.get_or_create_egl_image(CacheKind::Dst, dst)?;
2580 unsafe {
2581 gls::gl::UseProgram(self.texture_program_yuv.id);
2582 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
2583 gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.render_texture.id);
2584 gls::gl::TexParameteri(
2585 gls::gl::TEXTURE_2D,
2586 gls::gl::TEXTURE_MIN_FILTER,
2587 gls::gl::LINEAR as i32,
2588 );
2589 gls::gl::TexParameteri(
2590 gls::gl::TEXTURE_2D,
2591 gls::gl::TEXTURE_MAG_FILTER,
2592 gls::gl::LINEAR as i32,
2593 );
2594 gls::gl::EGLImageTargetTexture2DOES(gls::gl::TEXTURE_2D, dest_egl.as_ptr());
2595 gls::gl::FramebufferTexture2D(
2596 gls::gl::FRAMEBUFFER,
2597 gls::gl::COLOR_ATTACHMENT0,
2598 gls::gl::TEXTURE_2D,
2599 self.render_texture.id,
2600 0,
2601 );
2602 check_gl_error(function!(), line!())?;
2603 gls::gl::Viewport(0, 0, width, height);
2604 }
2605 Ok(())
2606 }
2607
2608 fn convert_dest_dma(
2609 &mut self,
2610 dst: &mut TensorImage,
2611 src: &TensorImage,
2612 rotation: crate::Rotation,
2613 flip: Flip,
2614 crop: Crop,
2615 ) -> crate::Result<()> {
2616 assert!(self.gl_context.transfer_backend.is_dma());
2617 if fourcc_is_packed_rgb(dst.fourcc()) {
2618 if self.support_rgb_direct {
2619 self.convert_to_rgb_direct(src, dst, rotation, flip, crop)
2620 } else {
2621 Err(crate::Error::NotSupported(
2624 "OpenGL two-pass packed RGB disabled (no direct RGB support)".into(),
2625 ))
2626 }
2627 } else if dst.is_planar() {
2628 self.setup_renderbuffer_dma(dst)?;
2629 self.convert_to_planar(src, dst, rotation, flip, crop)
2630 } else {
2631 self.setup_renderbuffer_dma(dst)?;
2632 self.convert_to(src, dst, rotation, flip, crop)
2633 }
2634 }
2635
2636 fn setup_renderbuffer_non_dma(&mut self, dst: &TensorImage, crop: Crop) -> crate::Result<()> {
2637 debug_assert!(matches!(
2638 dst.fourcc(),
2639 RGB | RGBA | GREY | PLANAR_RGB | RGB_INT8
2640 ));
2641 let (width, height) = if dst.is_planar() {
2642 let width = dst.width() / 4;
2643 let height = match dst.fourcc() {
2644 RGBA => dst.height() * 4,
2645 RGB => dst.height() * 3,
2646 GREY => dst.height(),
2647 _ => unreachable!(),
2648 };
2649 (width as i32, height as i32)
2650 } else {
2651 (dst.width() as i32, dst.height() as i32)
2652 };
2653
2654 let format = if dst.is_planar() {
2655 gls::gl::RED
2656 } else {
2657 match dst.fourcc() {
2658 RGB | RGB_INT8 => gls::gl::RGB,
2659 RGBA => gls::gl::RGBA,
2660 GREY => gls::gl::RED,
2661 _ => unreachable!(),
2662 }
2663 };
2664
2665 let start = Instant::now();
2666 self.convert_fbo.bind();
2667
2668 let map;
2669
2670 let pixels = if crop.dst_rect.is_none_or(|crop| {
2671 crop.top == 0
2672 && crop.left == 0
2673 && crop.height == dst.height()
2674 && crop.width == dst.width()
2675 }) {
2676 std::ptr::null()
2677 } else {
2678 map = dst.tensor().map()?;
2679 map.as_ptr() as *const c_void
2680 };
2681 unsafe {
2682 gls::gl::UseProgram(self.texture_program.id);
2683 gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.render_texture.id);
2684 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
2685 gls::gl::TexParameteri(
2686 gls::gl::TEXTURE_2D,
2687 gls::gl::TEXTURE_MIN_FILTER,
2688 gls::gl::LINEAR as i32,
2689 );
2690 gls::gl::TexParameteri(
2691 gls::gl::TEXTURE_2D,
2692 gls::gl::TEXTURE_MAG_FILTER,
2693 gls::gl::LINEAR as i32,
2694 );
2695
2696 gls::gl::TexImage2D(
2697 gls::gl::TEXTURE_2D,
2698 0,
2699 format as i32,
2700 width,
2701 height,
2702 0,
2703 format,
2704 gls::gl::UNSIGNED_BYTE,
2705 pixels,
2706 );
2707 check_gl_error(function!(), line!())?;
2708 gls::gl::FramebufferTexture2D(
2709 gls::gl::FRAMEBUFFER,
2710 gls::gl::COLOR_ATTACHMENT0,
2711 gls::gl::TEXTURE_2D,
2712 self.render_texture.id,
2713 0,
2714 );
2715 check_gl_error(function!(), line!())?;
2716 gls::gl::Viewport(0, 0, width, height);
2717 }
2718 log::debug!("Set up framebuffer takes {:?}", start.elapsed());
2719 Ok(())
2720 }
2721
2722 fn convert_dest_non_dma(
2723 &mut self,
2724 dst: &mut TensorImage,
2725 src: &TensorImage,
2726 rotation: crate::Rotation,
2727 flip: Flip,
2728 crop: Crop,
2729 ) -> crate::Result<()> {
2730 self.setup_renderbuffer_non_dma(dst, crop)?;
2731 let start = Instant::now();
2732 if dst.is_planar() {
2733 self.convert_to_planar(src, dst, rotation, flip, crop)?;
2734 } else {
2735 self.convert_to(src, dst, rotation, flip, crop)?;
2736 }
2737 log::debug!("Draw to framebuffer takes {:?}", start.elapsed());
2738 let start = Instant::now();
2739 let dest_format = match dst.fourcc() {
2740 RGB | RGB_INT8 => gls::gl::RGB,
2741 RGBA => gls::gl::RGBA,
2742 GREY => gls::gl::RED,
2743 _ => unreachable!(),
2744 };
2745
2746 unsafe {
2747 let mut dst_map = dst.tensor().map()?;
2748 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
2749 gls::gl::ReadnPixels(
2750 0,
2751 0,
2752 dst.width() as i32,
2753 dst.height() as i32,
2754 dest_format,
2755 gls::gl::UNSIGNED_BYTE,
2756 dst.tensor.len() as i32,
2757 dst_map.as_mut_ptr() as *mut c_void,
2758 );
2759 if fourcc_is_int8(dst.fourcc()) {
2761 for byte in dst_map.iter_mut() {
2762 *byte ^= 0x80;
2763 }
2764 }
2765 }
2766 log::debug!("Read from framebuffer takes {:?}", start.elapsed());
2767 Ok(())
2768 }
2769
2770 fn convert_pbo_to_pbo(
2776 &mut self,
2777 dst: &mut TensorImage,
2778 src: &TensorImage,
2779 rotation: crate::Rotation,
2780 flip: Flip,
2781 crop: Crop,
2782 ) -> crate::Result<()> {
2783 let (src_buffer_id, dst_buffer_id) = {
2785 let src_pbo = match &src.tensor {
2786 edgefirst_tensor::Tensor::Pbo(p) => p,
2787 _ => {
2788 return Err(crate::Error::OpenGl(
2789 "convert_pbo_to_pbo: src is not a PBO tensor".to_string(),
2790 ))
2791 }
2792 };
2793 let dst_pbo = match &dst.tensor {
2794 edgefirst_tensor::Tensor::Pbo(p) => p,
2795 _ => {
2796 return Err(crate::Error::OpenGl(
2797 "convert_pbo_to_pbo: dst is not a PBO tensor".to_string(),
2798 ))
2799 }
2800 };
2801
2802 if src_pbo.is_mapped() || dst_pbo.is_mapped() {
2803 return Err(crate::Error::OpenGl(
2804 "Cannot convert PBO tensors while they are mapped".to_string(),
2805 ));
2806 }
2807
2808 (src_pbo.buffer_id(), dst_pbo.buffer_id())
2809 };
2810
2811 self.setup_renderbuffer_non_dma(dst, crop)?;
2813
2814 let start = Instant::now();
2821 self.draw_src_texture_from_pbo(src, src_buffer_id, dst, rotation, flip, crop)?;
2822 log::debug!("PBO render takes {:?}", start.elapsed());
2823
2824 let start_read = Instant::now();
2826 let dest_format = match dst.fourcc() {
2827 crate::RGB | crate::RGB_INT8 => gls::gl::RGB,
2828 crate::RGBA => gls::gl::RGBA,
2829 crate::GREY => gls::gl::RED,
2830 _ => {
2831 return Err(crate::Error::NotSupported(format!(
2832 "PBO readback not supported for {}",
2833 dst.fourcc().display()
2834 )))
2835 }
2836 };
2837
2838 unsafe {
2839 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, dst_buffer_id);
2841 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
2842 gls::gl::ReadnPixels(
2843 0,
2844 0,
2845 dst.width() as i32,
2846 dst.height() as i32,
2847 dest_format,
2848 gls::gl::UNSIGNED_BYTE,
2849 dst.tensor.len() as i32,
2850 std::ptr::null_mut(), );
2852 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
2853 gls::gl::Finish();
2854 }
2855
2856 check_gl_error(function!(), line!())?;
2857
2858 if fourcc_is_int8(dst.fourcc()) {
2861 unsafe {
2862 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, dst_buffer_id);
2863 let ptr = gls::gl::MapBufferRange(
2864 gls::gl::PIXEL_PACK_BUFFER,
2865 0,
2866 dst.tensor.len() as isize,
2867 gls::gl::MAP_READ_BIT | gls::gl::MAP_WRITE_BIT,
2868 );
2869 if !ptr.is_null() {
2870 let slice = std::slice::from_raw_parts_mut(ptr as *mut u8, dst.tensor.len());
2871 for byte in slice.iter_mut() {
2872 *byte ^= 0x80;
2873 }
2874 gls::gl::UnmapBuffer(gls::gl::PIXEL_PACK_BUFFER);
2875 }
2876 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
2877 }
2878 check_gl_error(function!(), line!())?;
2879 }
2880
2881 log::debug!("PBO readback takes {:?}", start_read.elapsed());
2882 Ok(())
2883 }
2884
2885 fn draw_src_texture_from_pbo(
2891 &mut self,
2892 src: &TensorImage,
2893 src_buffer_id: u32,
2894 dst: &TensorImage,
2895 rotation: crate::Rotation,
2896 flip: Flip,
2897 crop: Crop,
2898 ) -> Result<(), Error> {
2899 let texture_target = gls::gl::TEXTURE_2D;
2900 let texture_format = match src.fourcc() {
2901 crate::RGB | crate::RGB_INT8 => gls::gl::RGB,
2902 crate::RGBA => gls::gl::RGBA,
2903 crate::GREY => gls::gl::RED,
2904 _ => {
2905 return Err(Error::NotSupported(format!(
2906 "PBO upload not supported for {:?}",
2907 src.fourcc()
2908 )));
2909 }
2910 };
2911
2912 let has_crop = crop.dst_rect.is_some_and(|x| {
2913 x.left != 0 || x.top != 0 || x.width != dst.width() || x.height != dst.height()
2914 });
2915
2916 let src_roi = if let Some(crop) = crop.src_rect {
2918 RegionOfInterest {
2919 left: crop.left as f32 / src.width() as f32,
2920 top: (crop.top + crop.height) as f32 / src.height() as f32,
2921 right: (crop.left + crop.width) as f32 / src.width() as f32,
2922 bottom: crop.top as f32 / src.height() as f32,
2923 }
2924 } else {
2925 RegionOfInterest {
2926 left: 0.,
2927 top: 1.,
2928 right: 1.,
2929 bottom: 0.,
2930 }
2931 };
2932
2933 let cvt_screen_coord = |normalized| normalized * 2.0 - 1.0;
2934 let mut dst_roi = if let Some(crop) = crop.dst_rect {
2935 RegionOfInterest {
2936 left: cvt_screen_coord(crop.left as f32 / dst.width() as f32),
2937 top: cvt_screen_coord((crop.top + crop.height) as f32 / dst.height() as f32),
2938 right: cvt_screen_coord((crop.left + crop.width) as f32 / dst.width() as f32),
2939 bottom: cvt_screen_coord(crop.top as f32 / dst.height() as f32),
2940 }
2941 } else {
2942 RegionOfInterest {
2943 left: -1.,
2944 top: 1.,
2945 right: 1.,
2946 bottom: -1.,
2947 }
2948 };
2949
2950 let rotation_offset = match rotation {
2951 crate::Rotation::None => 0,
2952 crate::Rotation::Clockwise90 => 1,
2953 crate::Rotation::Rotate180 => 2,
2954 crate::Rotation::CounterClockwise90 => 3,
2955 };
2956
2957 unsafe {
2958 if has_crop {
2959 if let Some(dst_color) = crop.dst_color {
2960 gls::gl::ClearColor(
2961 dst_color[0] as f32 / 255.0,
2962 dst_color[1] as f32 / 255.0,
2963 dst_color[2] as f32 / 255.0,
2964 dst_color[3] as f32 / 255.0,
2965 );
2966 gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
2967 }
2968 }
2969
2970 gls::gl::UseProgram(self.texture_program.id);
2971 gls::gl::BindTexture(texture_target, self.camera_normal_texture.id);
2972 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
2973 gls::gl::TexParameteri(
2974 texture_target,
2975 gls::gl::TEXTURE_MIN_FILTER,
2976 gls::gl::LINEAR as i32,
2977 );
2978 gls::gl::TexParameteri(
2979 texture_target,
2980 gls::gl::TEXTURE_MAG_FILTER,
2981 gls::gl::LINEAR as i32,
2982 );
2983 if src.fourcc() == crate::GREY {
2984 for swizzle in [
2985 gls::gl::TEXTURE_SWIZZLE_R,
2986 gls::gl::TEXTURE_SWIZZLE_G,
2987 gls::gl::TEXTURE_SWIZZLE_B,
2988 ] {
2989 gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, gls::gl::RED as i32);
2990 }
2991 } else {
2992 for (swizzle, src_component) in [
2993 (gls::gl::TEXTURE_SWIZZLE_R, gls::gl::RED),
2994 (gls::gl::TEXTURE_SWIZZLE_G, gls::gl::GREEN),
2995 (gls::gl::TEXTURE_SWIZZLE_B, gls::gl::BLUE),
2996 ] {
2997 gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, src_component as i32);
2998 }
2999 }
3000
3001 gls::gl::BindBuffer(gls::gl::PIXEL_UNPACK_BUFFER, src_buffer_id);
3003 gls::gl::TexImage2D(
3004 texture_target,
3005 0,
3006 texture_format as i32,
3007 src.width() as i32,
3008 src.height() as i32,
3009 0,
3010 texture_format,
3011 gls::gl::UNSIGNED_BYTE,
3012 std::ptr::null(), );
3014 gls::gl::BindBuffer(gls::gl::PIXEL_UNPACK_BUFFER, 0);
3015
3016 self.camera_normal_texture.width = 0;
3018
3019 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
3020 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
3021
3022 match flip {
3023 crate::Flip::None => {}
3024 crate::Flip::Vertical => {
3025 std::mem::swap(&mut dst_roi.top, &mut dst_roi.bottom);
3026 }
3027 crate::Flip::Horizontal => {
3028 std::mem::swap(&mut dst_roi.left, &mut dst_roi.right);
3029 }
3030 }
3031
3032 let camera_vertices: [f32; 12] = [
3033 dst_roi.left,
3034 dst_roi.top,
3035 0., dst_roi.right,
3037 dst_roi.top,
3038 0., dst_roi.right,
3040 dst_roi.bottom,
3041 0., dst_roi.left,
3043 dst_roi.bottom,
3044 0., ];
3046 gls::gl::BufferData(
3047 gls::gl::ARRAY_BUFFER,
3048 (camera_vertices.len() * std::mem::size_of::<f32>()) as isize,
3049 camera_vertices.as_ptr() as *const c_void,
3050 gls::gl::STATIC_DRAW,
3051 );
3052 gls::gl::VertexAttribPointer(
3053 self.vertex_buffer.buffer_index,
3054 3,
3055 gls::gl::FLOAT,
3056 gls::gl::FALSE,
3057 0,
3058 std::ptr::null(),
3059 );
3060
3061 let texture_coords: [[f32; 8]; 4] = [
3062 [
3063 src_roi.left,
3064 src_roi.top,
3065 src_roi.right,
3066 src_roi.top,
3067 src_roi.right,
3068 src_roi.bottom,
3069 src_roi.left,
3070 src_roi.bottom,
3071 ],
3072 [
3073 src_roi.left,
3074 src_roi.bottom,
3075 src_roi.left,
3076 src_roi.top,
3077 src_roi.right,
3078 src_roi.top,
3079 src_roi.right,
3080 src_roi.bottom,
3081 ],
3082 [
3083 src_roi.right,
3084 src_roi.bottom,
3085 src_roi.left,
3086 src_roi.bottom,
3087 src_roi.left,
3088 src_roi.top,
3089 src_roi.right,
3090 src_roi.top,
3091 ],
3092 [
3093 src_roi.right,
3094 src_roi.top,
3095 src_roi.right,
3096 src_roi.bottom,
3097 src_roi.left,
3098 src_roi.bottom,
3099 src_roi.left,
3100 src_roi.top,
3101 ],
3102 ];
3103 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
3104 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
3105 gls::gl::BufferData(
3106 gls::gl::ARRAY_BUFFER,
3107 (texture_coords[0].len() * std::mem::size_of::<f32>()) as isize,
3108 texture_coords[rotation_offset].as_ptr() as *const c_void,
3109 gls::gl::STATIC_DRAW,
3110 );
3111 gls::gl::VertexAttribPointer(
3112 self.texture_buffer.buffer_index,
3113 2,
3114 gls::gl::FLOAT,
3115 gls::gl::FALSE,
3116 0,
3117 std::ptr::null(),
3118 );
3119 gls::gl::DrawArrays(gls::gl::TRIANGLE_FAN, 0, 4);
3120 gls::gl::DisableVertexAttribArray(self.vertex_buffer.buffer_index);
3121 gls::gl::DisableVertexAttribArray(self.texture_buffer.buffer_index);
3122
3123 gls::gl::Finish();
3124 }
3125
3126 check_gl_error(function!(), line!())?;
3127 Ok(())
3128 }
3129
3130 fn convert_any_to_pbo(
3134 &mut self,
3135 dst: &mut TensorImage,
3136 src: &TensorImage,
3137 rotation: crate::Rotation,
3138 flip: Flip,
3139 crop: Crop,
3140 ) -> crate::Result<()> {
3141 let dst_buffer_id = match &dst.tensor {
3142 edgefirst_tensor::Tensor::Pbo(p) => {
3143 if p.is_mapped() {
3144 return Err(crate::Error::OpenGl(
3145 "Cannot convert to a mapped PBO tensor".to_string(),
3146 ));
3147 }
3148 p.buffer_id()
3149 }
3150 _ => {
3151 return Err(crate::Error::OpenGl(
3152 "convert_any_to_pbo: dst is not a PBO tensor".to_string(),
3153 ))
3154 }
3155 };
3156
3157 self.setup_renderbuffer_non_dma(dst, crop)?;
3158 let start = Instant::now();
3159 if dst.is_planar() {
3160 self.convert_to_planar(src, dst, rotation, flip, crop)?;
3161 } else {
3162 self.convert_to(src, dst, rotation, flip, crop)?;
3163 }
3164 log::debug!("any-to-PBO render takes {:?}", start.elapsed());
3165
3166 let start_read = Instant::now();
3168 let dest_format = match dst.fourcc() {
3169 crate::RGB | crate::RGB_INT8 => gls::gl::RGB,
3170 crate::RGBA => gls::gl::RGBA,
3171 crate::GREY => gls::gl::RED,
3172 _ => {
3173 return Err(crate::Error::NotSupported(format!(
3174 "PBO readback not supported for {}",
3175 dst.fourcc().display()
3176 )))
3177 }
3178 };
3179 unsafe {
3180 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, dst_buffer_id);
3181 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
3182 gls::gl::ReadnPixels(
3183 0,
3184 0,
3185 dst.width() as i32,
3186 dst.height() as i32,
3187 dest_format,
3188 gls::gl::UNSIGNED_BYTE,
3189 dst.tensor.len() as i32,
3190 std::ptr::null_mut(),
3191 );
3192 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
3193 gls::gl::Finish();
3194 }
3195 check_gl_error(function!(), line!())?;
3196
3197 if fourcc_is_int8(dst.fourcc()) {
3198 unsafe {
3199 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, dst_buffer_id);
3200 let ptr = gls::gl::MapBufferRange(
3201 gls::gl::PIXEL_PACK_BUFFER,
3202 0,
3203 dst.tensor.len() as isize,
3204 gls::gl::MAP_READ_BIT | gls::gl::MAP_WRITE_BIT,
3205 );
3206 if !ptr.is_null() {
3207 let slice = std::slice::from_raw_parts_mut(ptr as *mut u8, dst.tensor.len());
3208 for byte in slice.iter_mut() {
3209 *byte ^= 0x80;
3210 }
3211 gls::gl::UnmapBuffer(gls::gl::PIXEL_PACK_BUFFER);
3212 }
3213 gls::gl::BindBuffer(gls::gl::PIXEL_PACK_BUFFER, 0);
3214 }
3215 check_gl_error(function!(), line!())?;
3216 }
3217
3218 log::debug!("any-to-PBO readback takes {:?}", start_read.elapsed());
3219 Ok(())
3220 }
3221
3222 fn convert_pbo_to_mem(
3226 &mut self,
3227 dst: &mut TensorImage,
3228 src: &TensorImage,
3229 rotation: crate::Rotation,
3230 flip: Flip,
3231 crop: Crop,
3232 ) -> crate::Result<()> {
3233 let src_buffer_id = match &src.tensor {
3234 edgefirst_tensor::Tensor::Pbo(p) => {
3235 if p.is_mapped() {
3236 return Err(crate::Error::OpenGl(
3237 "Cannot convert from a mapped PBO tensor".to_string(),
3238 ));
3239 }
3240 p.buffer_id()
3241 }
3242 _ => {
3243 return Err(crate::Error::OpenGl(
3244 "convert_pbo_to_mem: src is not a PBO tensor".to_string(),
3245 ))
3246 }
3247 };
3248
3249 self.setup_renderbuffer_non_dma(dst, crop)?;
3250 let start = Instant::now();
3251 self.draw_src_texture_from_pbo(src, src_buffer_id, dst, rotation, flip, crop)?;
3252 log::debug!("PBO-to-mem render takes {:?}", start.elapsed());
3253
3254 let start = Instant::now();
3256 let dest_format = match dst.fourcc() {
3257 crate::RGB | crate::RGB_INT8 => gls::gl::RGB,
3258 crate::RGBA => gls::gl::RGBA,
3259 crate::GREY => gls::gl::RED,
3260 _ => unreachable!(),
3261 };
3262 unsafe {
3263 let mut dst_map = dst.tensor().map()?;
3264 gls::gl::ReadBuffer(gls::gl::COLOR_ATTACHMENT0);
3265 gls::gl::ReadnPixels(
3266 0,
3267 0,
3268 dst.width() as i32,
3269 dst.height() as i32,
3270 dest_format,
3271 gls::gl::UNSIGNED_BYTE,
3272 dst.tensor.len() as i32,
3273 dst_map.as_mut_ptr() as *mut c_void,
3274 );
3275 if fourcc_is_int8(dst.fourcc()) {
3276 for byte in dst_map.iter_mut() {
3277 *byte ^= 0x80;
3278 }
3279 }
3280 }
3281 log::debug!("PBO-to-mem readback takes {:?}", start.elapsed());
3282 Ok(())
3283 }
3284
3285 fn convert_to(
3286 &mut self,
3287 src: &TensorImage,
3288 dst: &TensorImage,
3289 rotation: crate::Rotation,
3290 flip: Flip,
3291 crop: Crop,
3292 ) -> Result<(), crate::Error> {
3293 check_gl_error(function!(), line!())?;
3294
3295 let has_crop = crop.dst_rect.is_some_and(|x| {
3296 x.left != 0 || x.top != 0 || x.width != dst.width() || x.height != dst.height()
3297 });
3298 if has_crop {
3299 if let Some(dst_color) = crop.dst_color {
3300 unsafe {
3301 gls::gl::ClearColor(
3302 dst_color[0] as f32 / 255.0,
3303 dst_color[1] as f32 / 255.0,
3304 dst_color[2] as f32 / 255.0,
3305 dst_color[3] as f32 / 255.0,
3306 );
3307 gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
3308 };
3309 }
3310 }
3311
3312 let src_roi = if let Some(crop) = crop.src_rect {
3314 RegionOfInterest {
3315 left: crop.left as f32 / src.width() as f32,
3316 top: (crop.top + crop.height) as f32 / src.height() as f32,
3317 right: (crop.left + crop.width) as f32 / src.width() as f32,
3318 bottom: crop.top as f32 / src.height() as f32,
3319 }
3320 } else {
3321 RegionOfInterest {
3322 left: 0.,
3323 top: 1.,
3324 right: 1.,
3325 bottom: 0.,
3326 }
3327 };
3328
3329 let cvt_screen_coord = |normalized| normalized * 2.0 - 1.0;
3331 let dst_roi = if let Some(crop) = crop.dst_rect {
3332 RegionOfInterest {
3333 left: cvt_screen_coord(crop.left as f32 / dst.width() as f32),
3334 top: cvt_screen_coord((crop.top + crop.height) as f32 / dst.height() as f32),
3335 right: cvt_screen_coord((crop.left + crop.width) as f32 / dst.width() as f32),
3336 bottom: cvt_screen_coord(crop.top as f32 / dst.height() as f32),
3337 }
3338 } else {
3339 RegionOfInterest {
3340 left: -1.,
3341 top: 1.,
3342 right: 1.,
3343 bottom: -1.,
3344 }
3345 };
3346 let rotation_offset = match rotation {
3347 crate::Rotation::None => 0,
3348 crate::Rotation::Clockwise90 => 1,
3349 crate::Rotation::Rotate180 => 2,
3350 crate::Rotation::CounterClockwise90 => 3,
3351 };
3352 if self.gl_context.transfer_backend.is_dma() && src.tensor().memory() == TensorMemory::Dma {
3353 match self.get_or_create_egl_image(CacheKind::Src, src) {
3354 Ok(src_egl) => self.draw_camera_texture_eglimage(
3355 src,
3356 src_egl,
3357 src_roi,
3358 dst_roi,
3359 rotation_offset,
3360 flip,
3361 )?,
3362 Err(e) => {
3363 log::warn!("EGL image creation failed for {:?}: {:?}", src.fourcc(), e);
3364 let start = Instant::now();
3365 self.draw_src_texture(src, src_roi, dst_roi, rotation_offset, flip)?;
3366 log::debug!("draw_src_texture takes {:?}", start.elapsed());
3367 }
3368 }
3369 } else {
3370 let start = Instant::now();
3371 self.draw_src_texture(src, src_roi, dst_roi, rotation_offset, flip)?;
3372 log::debug!("draw_src_texture takes {:?}", start.elapsed());
3373 }
3374
3375 let start = Instant::now();
3376 unsafe { gls::gl::Finish() };
3377 log::debug!("gl_Finish takes {:?}", start.elapsed());
3378 check_gl_error(function!(), line!())?;
3379 Ok(())
3380 }
3381
3382 fn convert_to_planar(
3383 &mut self,
3384 src: &TensorImage,
3385 dst: &TensorImage,
3386 rotation: crate::Rotation,
3387 flip: Flip,
3388 crop: Crop,
3389 ) -> Result<(), crate::Error> {
3390 let alpha = match dst.fourcc() {
3413 PLANAR_RGB | PLANAR_RGB_INT8 => false,
3414 PLANAR_RGBA => true,
3415 _ => {
3416 return Err(crate::Error::NotSupported(
3417 "Destination format must be PLANAR_RGB, PLANAR_RGB_INT8, or PLANAR_RGBA"
3418 .to_string(),
3419 ));
3420 }
3421 };
3422 let is_int8 = fourcc_is_int8(dst.fourcc());
3423
3424 let src_roi = if let Some(crop) = crop.src_rect {
3426 RegionOfInterest {
3427 left: crop.left as f32 / src.width() as f32,
3428 top: (crop.top + crop.height) as f32 / src.height() as f32,
3429 right: (crop.left + crop.width) as f32 / src.width() as f32,
3430 bottom: crop.top as f32 / src.height() as f32,
3431 }
3432 } else {
3433 RegionOfInterest {
3434 left: 0.,
3435 top: 1.,
3436 right: 1.,
3437 bottom: 0.,
3438 }
3439 };
3440
3441 let cvt_screen_coord = |normalized| normalized * 2.0 - 1.0;
3443 let dst_roi = if let Some(crop) = crop.dst_rect {
3444 RegionOfInterest {
3445 left: cvt_screen_coord(crop.left as f32 / dst.width() as f32),
3446 top: cvt_screen_coord((crop.top + crop.height) as f32 / dst.height() as f32),
3447 right: cvt_screen_coord((crop.left + crop.width) as f32 / dst.width() as f32),
3448 bottom: cvt_screen_coord(crop.top as f32 / dst.height() as f32),
3449 }
3450 } else {
3451 RegionOfInterest {
3452 left: -1.,
3453 top: 1.,
3454 right: 1.,
3455 bottom: -1.,
3456 }
3457 };
3458 let rotation_offset = match rotation {
3459 crate::Rotation::None => 0,
3460 crate::Rotation::Clockwise90 => 1,
3461 crate::Rotation::Rotate180 => 2,
3462 crate::Rotation::CounterClockwise90 => 3,
3463 };
3464
3465 let has_crop = crop.dst_rect.is_some_and(|x| {
3466 x.left != 0 || x.top != 0 || x.width != dst.width() || x.height != dst.height()
3467 });
3468 if has_crop {
3469 if let Some(dst_color) = crop.dst_color {
3470 self.clear_rect_planar(
3471 dst.width(),
3472 dst.height(),
3473 dst_roi,
3474 [
3475 dst_color[0] as f32 / 255.0,
3476 dst_color[1] as f32 / 255.0,
3477 dst_color[2] as f32 / 255.0,
3478 dst_color[3] as f32 / 255.0,
3479 ],
3480 alpha,
3481 )?;
3482 }
3483 }
3484
3485 let src_egl = self.get_or_create_egl_image(CacheKind::Src, src)?;
3486
3487 self.draw_camera_texture_to_rgb_planar(
3488 src_egl,
3489 src_roi,
3490 dst_roi,
3491 rotation_offset,
3492 flip,
3493 alpha,
3494 is_int8,
3495 )?;
3496 unsafe { gls::gl::Finish() };
3497
3498 Ok(())
3499 }
3500
3501 fn convert_to_packed_rgb(
3511 &mut self,
3512 src: &TensorImage,
3513 dst: &mut TensorImage,
3514 rotation: crate::Rotation,
3515 flip: Flip,
3516 crop: Crop,
3517 ) -> crate::Result<()> {
3518 let dst_w = dst.width();
3519 let dst_h = dst.height();
3520 let is_int8 = fourcc_is_int8(dst.fourcc());
3521
3522 if !(dst_w * 3).is_multiple_of(4) {
3524 return Err(crate::Error::NotSupported(format!(
3525 "Packed RGB requires width*3 divisible by 4, got width={dst_w}"
3526 )));
3527 }
3528
3529 let render_w = dst_w * 3 / 4;
3530 let render_h = dst_h;
3531
3532 log::debug!(
3533 "convert_to_packed_rgb: {dst_w}x{dst_h} -> {render_w}x{render_h} two-pass int8={is_int8}",
3534 );
3535
3536 self.ensure_packed_rgb_intermediate(dst_w, dst_h)?;
3538 self.packed_rgb_fbo.bind();
3539 unsafe {
3540 gls::gl::FramebufferTexture2D(
3541 gls::gl::FRAMEBUFFER,
3542 gls::gl::COLOR_ATTACHMENT0,
3543 gls::gl::TEXTURE_2D,
3544 self.packed_rgb_intermediate_tex.id,
3545 0,
3546 );
3547 check_gl_error(function!(), line!())?;
3548 gls::gl::Viewport(0, 0, dst_w as i32, dst_h as i32);
3549 }
3550 self.convert_to(src, dst, rotation, flip, crop)?;
3554
3555 self.convert_fbo.bind();
3557 let dest_egl =
3558 self.get_or_create_egl_image_rgb(dst, render_w, render_h, DrmFourcc::Abgr8888, 4)?;
3559 unsafe {
3560 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
3561 gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.render_texture.id);
3562 gls::gl::TexParameteri(
3563 gls::gl::TEXTURE_2D,
3564 gls::gl::TEXTURE_MIN_FILTER,
3565 gls::gl::NEAREST as i32,
3566 );
3567 gls::gl::TexParameteri(
3568 gls::gl::TEXTURE_2D,
3569 gls::gl::TEXTURE_MAG_FILTER,
3570 gls::gl::NEAREST as i32,
3571 );
3572 gls::gl::EGLImageTargetTexture2DOES(gls::gl::TEXTURE_2D, dest_egl.as_ptr());
3573 gls::gl::FramebufferTexture2D(
3574 gls::gl::FRAMEBUFFER,
3575 gls::gl::COLOR_ATTACHMENT0,
3576 gls::gl::TEXTURE_2D,
3577 self.render_texture.id,
3578 0,
3579 );
3580 check_gl_error(function!(), line!())?;
3581 gls::gl::Viewport(0, 0, render_w as i32, render_h as i32);
3582 }
3583
3584 let program = if is_int8 {
3586 &self.packed_rgba8_int8_program_2d
3587 } else {
3588 &self.packed_rgba8_program_2d
3589 };
3590 unsafe {
3591 gls::gl::UseProgram(program.id);
3592 gls::gl::ActiveTexture(gls::gl::TEXTURE1);
3593 gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.packed_rgb_intermediate_tex.id);
3594 gls::gl::TexParameteri(
3595 gls::gl::TEXTURE_2D,
3596 gls::gl::TEXTURE_MIN_FILTER,
3597 gls::gl::NEAREST as i32,
3598 );
3599 gls::gl::TexParameteri(
3600 gls::gl::TEXTURE_2D,
3601 gls::gl::TEXTURE_MAG_FILTER,
3602 gls::gl::NEAREST as i32,
3603 );
3604 }
3605
3606 unsafe {
3608 let loc_tex = gls::gl::GetUniformLocation(program.id, c"tex".as_ptr());
3609 gls::gl::Uniform1i(loc_tex, 1);
3610 }
3611
3612 self.draw_fullscreen_quad()?;
3614
3615 unsafe { gls::gl::Finish() };
3616 Ok(())
3617 }
3618
3619 fn convert_to_rgb_direct(
3622 &mut self,
3623 src: &TensorImage,
3624 dst: &mut TensorImage,
3625 rotation: crate::Rotation,
3626 flip: Flip,
3627 crop: Crop,
3628 ) -> crate::Result<()> {
3629 let is_int8 = fourcc_is_int8(dst.fourcc());
3630
3631 log::debug!(
3632 "convert_to_rgb_direct: {}x{} single-pass int8={is_int8}",
3633 dst.width(),
3634 dst.height(),
3635 );
3636
3637 let (rbo, width, height) = self.get_or_create_rgb_direct_rbo(dst)?;
3639
3640 self.convert_fbo.bind();
3642 unsafe {
3643 gls::gl::FramebufferRenderbuffer(
3644 gls::gl::FRAMEBUFFER,
3645 gls::gl::COLOR_ATTACHMENT0,
3646 gls::gl::RENDERBUFFER,
3647 rbo,
3648 );
3649 check_gl_error(function!(), line!())?;
3650
3651 let status = gls::gl::CheckFramebufferStatus(gls::gl::FRAMEBUFFER);
3652 if status != gls::gl::FRAMEBUFFER_COMPLETE {
3653 log::warn!("convert_to_rgb_direct: FBO incomplete (0x{status:x}), falling back");
3654 return self.convert_to_packed_rgb(src, dst, rotation, flip, crop);
3655 }
3656
3657 gls::gl::Viewport(0, 0, width, height);
3658 }
3659
3660 let crop = if is_int8 {
3662 std::mem::swap(&mut self.texture_program, &mut self.texture_int8_program);
3663 std::mem::swap(
3664 &mut self.texture_program_yuv,
3665 &mut self.texture_int8_program_yuv,
3666 );
3667 let mut crop = crop;
3670 if let Some(ref mut color) = crop.dst_color {
3671 color[0] ^= 0x80;
3672 color[1] ^= 0x80;
3673 color[2] ^= 0x80;
3674 }
3675 crop
3676 } else {
3677 crop
3678 };
3679
3680 let result = self.convert_to(src, dst, rotation, flip, crop);
3681
3682 if is_int8 {
3684 std::mem::swap(&mut self.texture_program, &mut self.texture_int8_program);
3685 std::mem::swap(
3686 &mut self.texture_program_yuv,
3687 &mut self.texture_int8_program_yuv,
3688 );
3689 }
3690
3691 result
3692 }
3693
3694 fn ensure_packed_rgb_intermediate(&mut self, width: usize, height: usize) -> crate::Result<()> {
3696 if self.packed_rgb_intermediate_size == (width, height) {
3697 return Ok(());
3698 }
3699 unsafe {
3700 gls::gl::BindTexture(gls::gl::TEXTURE_2D, self.packed_rgb_intermediate_tex.id);
3701 gls::gl::TexParameteri(
3702 gls::gl::TEXTURE_2D,
3703 gls::gl::TEXTURE_MIN_FILTER,
3704 gls::gl::NEAREST as i32,
3705 );
3706 gls::gl::TexParameteri(
3707 gls::gl::TEXTURE_2D,
3708 gls::gl::TEXTURE_MAG_FILTER,
3709 gls::gl::NEAREST as i32,
3710 );
3711 gls::gl::TexImage2D(
3712 gls::gl::TEXTURE_2D,
3713 0,
3714 gls::gl::RGBA as i32,
3715 width as i32,
3716 height as i32,
3717 0,
3718 gls::gl::RGBA,
3719 gls::gl::UNSIGNED_BYTE,
3720 std::ptr::null(),
3721 );
3722 check_gl_error(function!(), line!())?;
3723 }
3724 self.packed_rgb_intermediate_size = (width, height);
3725 Ok(())
3726 }
3727
3728 fn draw_fullscreen_quad(&self) -> Result<(), crate::Error> {
3731 unsafe {
3732 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
3733 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
3734
3735 let vertices: [f32; 12] = [
3736 -1.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0, -1.0, 0.0, -1.0, -1.0, 0.0, ];
3741 gls::gl::BufferSubData(
3742 gls::gl::ARRAY_BUFFER,
3743 0,
3744 (size_of::<f32>() * vertices.len()) as isize,
3745 vertices.as_ptr() as *const c_void,
3746 );
3747
3748 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
3749 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
3750
3751 let tex_coords: [f32; 8] = [0.0, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.0];
3754 gls::gl::BufferSubData(
3755 gls::gl::ARRAY_BUFFER,
3756 0,
3757 (size_of::<f32>() * tex_coords.len()) as isize,
3758 tex_coords.as_ptr() as *const c_void,
3759 );
3760
3761 let indices: [u32; 4] = [0, 1, 2, 3];
3762 gls::gl::DrawElements(
3763 gls::gl::TRIANGLE_FAN,
3764 indices.len() as i32,
3765 gls::gl::UNSIGNED_INT,
3766 indices.as_ptr() as *const c_void,
3767 );
3768 }
3769 check_gl_error(function!(), line!())?;
3770 Ok(())
3771 }
3772
3773 fn clear_rect_planar(
3774 &self,
3775 width: usize,
3776 height: usize,
3777 dst_roi: RegionOfInterest,
3778 color: [f32; 4],
3779 alpha: bool,
3780 ) -> Result<(), Error> {
3781 if !alpha && color[0] == color[1] && color[1] == color[2] {
3782 unsafe {
3783 gls::gl::ClearColor(color[0], color[0], color[0], 1.0);
3784 gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
3785 };
3786 }
3787
3788 let split = if alpha { 4 } else { 3 };
3789
3790 unsafe {
3791 gls::gl::Enable(gls::gl::SCISSOR_TEST);
3792 let x = (((dst_roi.left + 1.0) / 2.0) * width as f32).round() as i32;
3793 let y = (((dst_roi.bottom + 1.0) / 2.0) * height as f32).round() as i32;
3794 let width = (((dst_roi.right - dst_roi.left) / 2.0) * width as f32).round() as i32;
3795 let height = (((dst_roi.top - dst_roi.bottom) / 2.0) * height as f32 / split as f32)
3796 .round() as i32;
3797 for (i, c) in color.iter().enumerate().take(split) {
3798 gls::gl::Scissor(x, y + i as i32 * height, width, height);
3799 gls::gl::ClearColor(*c, *c, *c, 1.0);
3800 gls::gl::Clear(gls::gl::COLOR_BUFFER_BIT);
3801 }
3802 gls::gl::Disable(gls::gl::SCISSOR_TEST);
3803 }
3804 Ok(())
3805 }
3806
3807 #[allow(clippy::too_many_arguments)]
3808 fn draw_camera_texture_to_rgb_planar(
3809 &self,
3810 egl_img: egl::Image,
3811 src_roi: RegionOfInterest,
3812 mut dst_roi: RegionOfInterest,
3813 rotation_offset: usize,
3814 flip: Flip,
3815 alpha: bool,
3816 int8: bool,
3817 ) -> Result<(), Error> {
3818 let texture_target = gls::gl::TEXTURE_EXTERNAL_OES;
3819 match flip {
3820 Flip::None => {}
3821 Flip::Vertical => {
3822 std::mem::swap(&mut dst_roi.top, &mut dst_roi.bottom);
3823 }
3824 Flip::Horizontal => {
3825 std::mem::swap(&mut dst_roi.left, &mut dst_roi.right);
3826 }
3827 }
3828 unsafe {
3829 let program = if int8 {
3830 &self.texture_program_planar_int8
3831 } else {
3832 &self.texture_program_planar
3833 };
3834 gls::gl::UseProgram(program.id);
3835 gls::gl::BindTexture(texture_target, self.camera_eglimage_texture.id);
3836 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
3837 gls::gl::TexParameteri(
3838 texture_target,
3839 gls::gl::TEXTURE_MIN_FILTER,
3840 gls::gl::LINEAR as i32,
3841 );
3842 gls::gl::TexParameteri(
3843 texture_target,
3844 gls::gl::TEXTURE_MAG_FILTER,
3845 gls::gl::LINEAR as i32,
3846 );
3847 gls::gl::TexParameteri(
3848 texture_target,
3849 gls::gl::TEXTURE_WRAP_S,
3850 gls::gl::CLAMP_TO_EDGE as i32,
3851 );
3852
3853 gls::gl::TexParameteri(
3854 texture_target,
3855 gls::gl::TEXTURE_WRAP_T,
3856 gls::gl::CLAMP_TO_EDGE as i32,
3857 );
3858
3859 gls::egl_image_target_texture_2d_oes(texture_target, egl_img.as_ptr());
3860 check_gl_error(function!(), line!())?;
3861 let y_centers = if alpha {
3862 vec![-3.0 / 4.0, -1.0 / 4.0, 1.0 / 4.0, 3.0 / 4.0]
3863 } else {
3864 vec![-2.0 / 3.0, 0.0, 2.0 / 3.0]
3865 };
3866 let swizzles = [gls::gl::RED, gls::gl::GREEN, gls::gl::BLUE, gls::gl::ALPHA];
3867 for (i, y_center) in y_centers.iter().enumerate() {
3869 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
3870 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
3871 let camera_vertices: [f32; 12] = [
3872 dst_roi.left,
3873 dst_roi.top / 3.0 + y_center,
3874 0., dst_roi.right,
3876 dst_roi.top / 3.0 + y_center,
3877 0., dst_roi.right,
3879 dst_roi.bottom / 3.0 + y_center,
3880 0., dst_roi.left,
3882 dst_roi.bottom / 3.0 + y_center,
3883 0., ];
3885 gls::gl::BufferData(
3886 gls::gl::ARRAY_BUFFER,
3887 (size_of::<f32>() * camera_vertices.len()) as isize,
3888 camera_vertices.as_ptr() as *const c_void,
3889 gls::gl::DYNAMIC_DRAW,
3890 );
3891
3892 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
3893 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
3894 let texture_vertices: [f32; 16] = [
3895 src_roi.left,
3896 src_roi.top,
3897 src_roi.right,
3898 src_roi.top,
3899 src_roi.right,
3900 src_roi.bottom,
3901 src_roi.left,
3902 src_roi.bottom,
3903 src_roi.left,
3904 src_roi.top,
3905 src_roi.right,
3906 src_roi.top,
3907 src_roi.right,
3908 src_roi.bottom,
3909 src_roi.left,
3910 src_roi.bottom,
3911 ];
3912
3913 gls::gl::BufferData(
3914 gls::gl::ARRAY_BUFFER,
3915 (size_of::<f32>() * 8) as isize,
3916 (texture_vertices[(rotation_offset * 2)..]).as_ptr() as *const c_void,
3917 gls::gl::DYNAMIC_DRAW,
3918 );
3919 let vertices_index: [u32; 4] = [0, 1, 2, 3];
3920 gls::gl::TexParameteri(
3924 texture_target,
3925 gls::gl::TEXTURE_SWIZZLE_R,
3926 swizzles[i] as i32,
3927 );
3928
3929 gls::gl::DrawElements(
3930 gls::gl::TRIANGLE_FAN,
3931 vertices_index.len() as i32,
3932 gls::gl::UNSIGNED_INT,
3933 vertices_index.as_ptr() as *const c_void,
3934 );
3935 }
3936 check_gl_error(function!(), line!())?;
3937 }
3938 Ok(())
3939 }
3940
3941 fn draw_src_texture(
3942 &mut self,
3943 src: &TensorImage,
3944 src_roi: RegionOfInterest,
3945 mut dst_roi: RegionOfInterest,
3946 rotation_offset: usize,
3947 flip: Flip,
3948 ) -> Result<(), Error> {
3949 let texture_target = gls::gl::TEXTURE_2D;
3950 let texture_format = match src.fourcc() {
3951 RGB => gls::gl::RGB,
3952 RGBA => gls::gl::RGBA,
3953 GREY => gls::gl::RED,
3954 _ => {
3955 return Err(Error::NotSupported(format!(
3956 "draw_src_texture does not support {:?} (use DMA-BUF path for YUV)",
3957 src.fourcc()
3958 )));
3959 }
3960 };
3961 unsafe {
3962 gls::gl::UseProgram(self.texture_program.id);
3963 gls::gl::BindTexture(texture_target, self.camera_normal_texture.id);
3964 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
3965 gls::gl::TexParameteri(
3966 texture_target,
3967 gls::gl::TEXTURE_MIN_FILTER,
3968 gls::gl::LINEAR as i32,
3969 );
3970 gls::gl::TexParameteri(
3971 texture_target,
3972 gls::gl::TEXTURE_MAG_FILTER,
3973 gls::gl::LINEAR as i32,
3974 );
3975 if src.fourcc() == GREY {
3976 for swizzle in [
3977 gls::gl::TEXTURE_SWIZZLE_R,
3978 gls::gl::TEXTURE_SWIZZLE_G,
3979 gls::gl::TEXTURE_SWIZZLE_B,
3980 ] {
3981 gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, gls::gl::RED as i32);
3982 }
3983 } else {
3984 for (swizzle, src) in [
3985 (gls::gl::TEXTURE_SWIZZLE_R, gls::gl::RED),
3986 (gls::gl::TEXTURE_SWIZZLE_G, gls::gl::GREEN),
3987 (gls::gl::TEXTURE_SWIZZLE_B, gls::gl::BLUE),
3988 ] {
3989 gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, src as i32);
3990 }
3991 }
3992 self.camera_normal_texture.update_texture(
3993 texture_target,
3994 src.width(),
3995 src.height(),
3996 texture_format,
3997 &src.tensor().map()?,
3998 );
3999
4000 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
4001 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
4002
4003 match flip {
4004 Flip::None => {}
4005 Flip::Vertical => {
4006 std::mem::swap(&mut dst_roi.top, &mut dst_roi.bottom);
4007 }
4008 Flip::Horizontal => {
4009 std::mem::swap(&mut dst_roi.left, &mut dst_roi.right);
4010 }
4011 }
4012
4013 let camera_vertices: [f32; 12] = [
4014 dst_roi.left,
4015 dst_roi.top,
4016 0., dst_roi.right,
4018 dst_roi.top,
4019 0., dst_roi.right,
4021 dst_roi.bottom,
4022 0., dst_roi.left,
4024 dst_roi.bottom,
4025 0., ];
4027 gls::gl::BufferData(
4028 gls::gl::ARRAY_BUFFER,
4029 (size_of::<f32>() * camera_vertices.len()) as isize,
4030 camera_vertices.as_ptr() as *const c_void,
4031 gls::gl::DYNAMIC_DRAW,
4032 );
4033 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
4034 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
4035 let texture_vertices: [f32; 16] = [
4036 src_roi.left,
4037 src_roi.top,
4038 src_roi.right,
4039 src_roi.top,
4040 src_roi.right,
4041 src_roi.bottom,
4042 src_roi.left,
4043 src_roi.bottom,
4044 src_roi.left,
4045 src_roi.top,
4046 src_roi.right,
4047 src_roi.top,
4048 src_roi.right,
4049 src_roi.bottom,
4050 src_roi.left,
4051 src_roi.bottom,
4052 ];
4053
4054 gls::gl::BufferData(
4055 gls::gl::ARRAY_BUFFER,
4056 (size_of::<f32>() * 8) as isize,
4057 (texture_vertices[(rotation_offset * 2)..]).as_ptr() as *const c_void,
4058 gls::gl::DYNAMIC_DRAW,
4059 );
4060 let vertices_index: [u32; 4] = [0, 1, 2, 3];
4061 gls::gl::DrawElements(
4062 gls::gl::TRIANGLE_FAN,
4063 vertices_index.len() as i32,
4064 gls::gl::UNSIGNED_INT,
4065 vertices_index.as_ptr() as *const c_void,
4066 );
4067 check_gl_error(function!(), line!())?;
4068
4069 Ok(())
4070 }
4071 }
4072
4073 fn draw_camera_texture_eglimage(
4074 &self,
4075 src: &TensorImage,
4076 egl_img: egl::Image,
4077 src_roi: RegionOfInterest,
4078 mut dst_roi: RegionOfInterest,
4079 rotation_offset: usize,
4080 flip: Flip,
4081 ) -> Result<(), Error> {
4082 let texture_target = gls::gl::TEXTURE_EXTERNAL_OES;
4084 unsafe {
4085 gls::gl::UseProgram(self.texture_program_yuv.id);
4086 gls::gl::BindTexture(texture_target, self.camera_eglimage_texture.id);
4087 gls::gl::ActiveTexture(gls::gl::TEXTURE0);
4088 gls::gl::TexParameteri(
4089 texture_target,
4090 gls::gl::TEXTURE_MIN_FILTER,
4091 gls::gl::LINEAR as i32,
4092 );
4093 gls::gl::TexParameteri(
4094 texture_target,
4095 gls::gl::TEXTURE_MAG_FILTER,
4096 gls::gl::LINEAR as i32,
4097 );
4098
4099 if src.fourcc() == GREY {
4100 for swizzle in [
4101 gls::gl::TEXTURE_SWIZZLE_R,
4102 gls::gl::TEXTURE_SWIZZLE_G,
4103 gls::gl::TEXTURE_SWIZZLE_B,
4104 ] {
4105 gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, gls::gl::RED as i32);
4106 }
4107 } else {
4108 for (swizzle, src) in [
4109 (gls::gl::TEXTURE_SWIZZLE_R, gls::gl::RED),
4110 (gls::gl::TEXTURE_SWIZZLE_G, gls::gl::GREEN),
4111 (gls::gl::TEXTURE_SWIZZLE_B, gls::gl::BLUE),
4112 ] {
4113 gls::gl::TexParameteri(gls::gl::TEXTURE_2D, swizzle, src as i32);
4114 }
4115 }
4116
4117 gls::egl_image_target_texture_2d_oes(texture_target, egl_img.as_ptr());
4118 check_gl_error(function!(), line!())?;
4119 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
4120 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
4121
4122 match flip {
4123 Flip::None => {}
4124 Flip::Vertical => {
4125 std::mem::swap(&mut dst_roi.top, &mut dst_roi.bottom);
4126 }
4127 Flip::Horizontal => {
4128 std::mem::swap(&mut dst_roi.left, &mut dst_roi.right);
4129 }
4130 }
4131
4132 let camera_vertices: [f32; 12] = [
4133 dst_roi.left,
4134 dst_roi.top,
4135 0., dst_roi.right,
4137 dst_roi.top,
4138 0., dst_roi.right,
4140 dst_roi.bottom,
4141 0., dst_roi.left,
4143 dst_roi.bottom,
4144 0., ];
4146 gls::gl::BufferSubData(
4147 gls::gl::ARRAY_BUFFER,
4148 0,
4149 (size_of::<f32>() * camera_vertices.len()) as isize,
4150 camera_vertices.as_ptr() as *const c_void,
4151 );
4152
4153 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
4154 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
4155
4156 let texture_vertices: [f32; 16] = [
4157 src_roi.left,
4158 src_roi.top,
4159 src_roi.right,
4160 src_roi.top,
4161 src_roi.right,
4162 src_roi.bottom,
4163 src_roi.left,
4164 src_roi.bottom,
4165 src_roi.left,
4166 src_roi.top,
4167 src_roi.right,
4168 src_roi.top,
4169 src_roi.right,
4170 src_roi.bottom,
4171 src_roi.left,
4172 src_roi.bottom,
4173 ];
4174 gls::gl::BufferSubData(
4175 gls::gl::ARRAY_BUFFER,
4176 0,
4177 (size_of::<f32>() * 8) as isize,
4178 (texture_vertices[(rotation_offset * 2)..]).as_ptr() as *const c_void,
4179 );
4180
4181 let vertices_index: [u32; 4] = [0, 1, 2, 3];
4182 gls::gl::DrawElements(
4183 gls::gl::TRIANGLE_FAN,
4184 vertices_index.len() as i32,
4185 gls::gl::UNSIGNED_INT,
4186 vertices_index.as_ptr() as *const c_void,
4187 );
4188 }
4189 check_gl_error(function!(), line!())?;
4190 Ok(())
4191 }
4192
4193 fn create_image_from_dma2(&self, src: &TensorImage) -> Result<EglImage, crate::Error> {
4194 let width;
4195 let height;
4196 let format;
4197 let channels;
4198
4199 if src.fourcc() == NV12 {
4201 if !src.width().is_multiple_of(4) {
4202 return Err(Error::NotSupported(
4203 "OpenGL EGLImage doesn't support image widths which are not multiples of 4"
4204 .to_string(),
4205 ));
4206 }
4207 width = src.width();
4208 height = src.height();
4209 format = fourcc_to_drm(NV12)?;
4210 channels = 1; } else if src.is_planar() {
4212 if !src.width().is_multiple_of(16) {
4213 return Err(Error::NotSupported(
4214 "OpenGL Planar RGB EGLImage doesn't support image widths which are not multiples of 16"
4215 .to_string(),
4216 ));
4217 }
4218 match src.fourcc() {
4219 PLANAR_RGB | PLANAR_RGB_INT8 => {
4220 format = DrmFourcc::R8;
4221 width = src.width();
4222 height = src.height() * 3;
4223 channels = 1;
4224 }
4225 fourcc => {
4226 return Err(crate::Error::NotSupported(format!(
4227 "Unsupported Planar FourCC {fourcc:?}"
4228 )));
4229 }
4230 };
4231 } else {
4232 if !src.width().is_multiple_of(4) {
4233 return Err(Error::NotSupported(
4234 "OpenGL EGLImage doesn't support image widths which are not multiples of 4"
4235 .to_string(),
4236 ));
4237 }
4238 width = src.width();
4239 height = src.height();
4240 format = fourcc_to_drm(src.fourcc())?;
4241 channels = src.channels();
4242 }
4243
4244 let fd = match &src.tensor {
4245 edgefirst_tensor::Tensor::Dma(dma_tensor) => dma_tensor.fd.as_raw_fd(),
4246 edgefirst_tensor::Tensor::Shm(_) => {
4247 return Err(Error::NotImplemented(
4248 "OpenGL EGLImage doesn't support SHM".to_string(),
4249 ));
4250 }
4251 edgefirst_tensor::Tensor::Mem(_) => {
4252 return Err(Error::NotImplemented(
4253 "OpenGL EGLImage doesn't support MEM".to_string(),
4254 ));
4255 }
4256 edgefirst_tensor::Tensor::Pbo(_) => {
4257 return Err(Error::NotImplemented(
4258 "OpenGL EGLImage doesn't support PBO".to_string(),
4259 ));
4260 }
4261 };
4262
4263 let plane0_pitch = if src.fourcc() == NV12 {
4266 width
4267 } else {
4268 width * channels
4269 };
4270
4271 let mut egl_img_attr = vec![
4272 egl_ext::LINUX_DRM_FOURCC as Attrib,
4273 format as Attrib,
4274 khronos_egl::WIDTH as Attrib,
4275 width as Attrib,
4276 khronos_egl::HEIGHT as Attrib,
4277 height as Attrib,
4278 egl_ext::DMA_BUF_PLANE0_PITCH as Attrib,
4279 plane0_pitch as Attrib,
4280 egl_ext::DMA_BUF_PLANE0_OFFSET as Attrib,
4281 0 as Attrib,
4282 egl_ext::DMA_BUF_PLANE0_FD as Attrib,
4283 fd as Attrib,
4284 egl::IMAGE_PRESERVED as Attrib,
4285 egl::TRUE as Attrib,
4286 ];
4287
4288 if src.fourcc() == NV12 {
4290 let uv_offset = width * height; egl_img_attr.append(&mut vec![
4292 egl_ext::DMA_BUF_PLANE1_FD as Attrib,
4293 fd as Attrib,
4294 egl_ext::DMA_BUF_PLANE1_OFFSET as Attrib,
4295 uv_offset as Attrib,
4296 egl_ext::DMA_BUF_PLANE1_PITCH as Attrib,
4297 width as Attrib, ]);
4299 }
4300
4301 if matches!(src.fourcc(), YUYV | VYUY | NV12) {
4302 egl_img_attr.append(&mut vec![
4303 egl_ext::YUV_COLOR_SPACE_HINT as Attrib,
4304 egl_ext::ITU_REC709 as Attrib,
4305 egl_ext::SAMPLE_RANGE_HINT as Attrib,
4306 egl_ext::YUV_NARROW_RANGE as Attrib,
4307 ]);
4308 }
4309
4310 egl_img_attr.push(khronos_egl::NONE as Attrib);
4311
4312 match self.new_egl_image_owned(egl_ext::LINUX_DMA_BUF, &egl_img_attr) {
4313 Ok(v) => Ok(v),
4314 Err(e) => Err(e),
4315 }
4316 }
4317
4318 fn new_egl_image_owned(
4319 &'_ self,
4320 target: egl::Enum,
4321 attrib_list: &[Attrib],
4322 ) -> Result<EglImage, Error> {
4323 let image = GlContext::egl_create_image_with_fallback(
4324 &self.gl_context.egl,
4325 self.gl_context.display.as_display(),
4326 unsafe { egl::Context::from_ptr(egl::NO_CONTEXT) },
4327 target,
4328 unsafe { egl::ClientBuffer::from_ptr(null_mut()) },
4329 attrib_list,
4330 )?;
4331 Ok(EglImage {
4332 egl_image: image,
4333 display: self.gl_context.display.as_display(),
4334 egl: Rc::clone(&self.gl_context.egl),
4335 })
4336 }
4337
4338 fn get_or_create_egl_image(
4344 &mut self,
4345 cache: CacheKind,
4346 img: &TensorImage,
4347 ) -> Result<egl::Image, crate::Error> {
4348 let id = img.buffer_identity().id();
4349
4350 match cache {
4352 CacheKind::Src => self.src_egl_cache.sweep(),
4353 CacheKind::Dst => self.dst_egl_cache.sweep(),
4354 }
4355
4356 {
4357 let egl_cache = match cache {
4358 CacheKind::Src => &mut self.src_egl_cache,
4359 CacheKind::Dst => &mut self.dst_egl_cache,
4360 };
4361 let ts = egl_cache.next_timestamp();
4362 if let Some(cached) = egl_cache.entries.get_mut(&id) {
4363 egl_cache.hits += 1;
4364 cached.last_used = ts;
4365 log::trace!("EglImageCache {:?} hit: id={id:#x}", cache);
4366 return Ok(cached.egl_image.egl_image);
4367 }
4368 egl_cache.misses += 1;
4369 log::trace!("EglImageCache {:?} miss: id={id:#x}", cache);
4370 if egl_cache.entries.len() >= egl_cache.capacity {
4372 egl_cache.evict_lru();
4373 }
4374 }
4375
4376 let egl_image_obj = self.create_image_from_dma2(img)?;
4377 let handle = egl_image_obj.egl_image;
4378 let guard = img.buffer_identity().weak();
4379 let egl_cache = match cache {
4380 CacheKind::Src => &mut self.src_egl_cache,
4381 CacheKind::Dst => &mut self.dst_egl_cache,
4382 };
4383 let ts = egl_cache.next_timestamp();
4384 egl_cache.entries.insert(
4385 id,
4386 CachedEglImage {
4387 egl_image: egl_image_obj,
4388 guard,
4389 renderbuffer: None,
4390 last_used: ts,
4391 },
4392 );
4393 Ok(handle)
4394 }
4395
4396 fn create_egl_image_with_dims(
4400 &self,
4401 img: &TensorImage,
4402 width: usize,
4403 height: usize,
4404 drm_format: DrmFourcc,
4405 bpp: usize,
4406 ) -> Result<EglImage, crate::Error> {
4407 let fd = match &img.tensor {
4408 edgefirst_tensor::Tensor::Dma(dma_tensor) => dma_tensor.fd.as_raw_fd(),
4409 _ => {
4410 return Err(Error::NotImplemented(
4411 "create_egl_image_with_dims requires DMA tensor".to_string(),
4412 ));
4413 }
4414 };
4415
4416 let pitch = width * bpp;
4417 let egl_img_attr = vec![
4418 egl_ext::LINUX_DRM_FOURCC as Attrib,
4419 drm_format as u32 as Attrib,
4420 khronos_egl::WIDTH as Attrib,
4421 width as Attrib,
4422 khronos_egl::HEIGHT as Attrib,
4423 height as Attrib,
4424 egl_ext::DMA_BUF_PLANE0_PITCH as Attrib,
4425 pitch as Attrib,
4426 egl_ext::DMA_BUF_PLANE0_OFFSET as Attrib,
4427 0 as Attrib,
4428 egl_ext::DMA_BUF_PLANE0_FD as Attrib,
4429 fd as Attrib,
4430 egl::IMAGE_PRESERVED as Attrib,
4431 egl::TRUE as Attrib,
4432 khronos_egl::NONE as Attrib,
4433 ];
4434
4435 self.new_egl_image_owned(egl_ext::LINUX_DMA_BUF, &egl_img_attr)
4436 }
4437
4438 fn get_or_create_egl_image_rgb(
4441 &mut self,
4442 img: &TensorImage,
4443 width: usize,
4444 height: usize,
4445 drm_format: DrmFourcc,
4446 bpp: usize,
4447 ) -> Result<egl::Image, crate::Error> {
4448 let id = img.buffer_identity().id();
4449 self.dst_egl_cache.sweep();
4450
4451 let ts = self.dst_egl_cache.next_timestamp();
4452 if let Some(cached) = self.dst_egl_cache.entries.get_mut(&id) {
4453 self.dst_egl_cache.hits += 1;
4454 cached.last_used = ts;
4455 log::trace!("EglImageCache dst (RGB) hit: id={id:#x}");
4456 return Ok(cached.egl_image.egl_image);
4457 }
4458 self.dst_egl_cache.misses += 1;
4459 log::trace!("EglImageCache dst (RGB) miss: id={id:#x}");
4460
4461 if self.dst_egl_cache.entries.len() >= self.dst_egl_cache.capacity {
4462 self.dst_egl_cache.evict_lru();
4463 }
4464
4465 let egl_image_obj = self.create_egl_image_with_dims(img, width, height, drm_format, bpp)?;
4466 let handle = egl_image_obj.egl_image;
4467 let guard = img.buffer_identity().weak();
4468 let ts = self.dst_egl_cache.next_timestamp();
4469 self.dst_egl_cache.entries.insert(
4470 id,
4471 CachedEglImage {
4472 egl_image: egl_image_obj,
4473 guard,
4474 renderbuffer: None,
4475 last_used: ts,
4476 },
4477 );
4478 Ok(handle)
4479 }
4480
4481 fn get_or_create_rgb_direct_rbo(
4485 &mut self,
4486 dst: &TensorImage,
4487 ) -> crate::Result<(u32, i32, i32)> {
4488 let id = dst.buffer_identity().id();
4489 let width = dst.width() as i32;
4490 let height = dst.height() as i32;
4491
4492 self.dst_egl_cache.sweep();
4493
4494 let ts = self.dst_egl_cache.next_timestamp();
4496 if let Some(cached) = self.dst_egl_cache.entries.get_mut(&id) {
4497 if let Some(rbo) = cached.renderbuffer {
4498 self.dst_egl_cache.hits += 1;
4499 cached.last_used = ts;
4500 log::trace!("EglImageCache dst (rgb_direct) hit: id={id:#x}");
4501 return Ok((rbo, width, height));
4502 }
4503 }
4504 self.dst_egl_cache.misses += 1;
4505 log::trace!("EglImageCache dst (rgb_direct) miss: id={id:#x}");
4506
4507 if self.dst_egl_cache.entries.len() >= self.dst_egl_cache.capacity {
4509 self.dst_egl_cache.evict_lru();
4510 }
4511
4512 let egl_image_obj =
4514 self.create_egl_image_with_dims(dst, dst.width(), dst.height(), DrmFourcc::Bgr888, 3)?;
4515
4516 let rbo = unsafe {
4518 let mut rbo = 0u32;
4519 gls::gl::GenRenderbuffers(1, &mut rbo);
4520 gls::gl::BindRenderbuffer(gls::gl::RENDERBUFFER, rbo);
4521 gls::gl::EGLImageTargetRenderbufferStorageOES(
4522 gls::gl::RENDERBUFFER,
4523 egl_image_obj.egl_image.as_ptr(),
4524 );
4525 if let Err(e) = check_gl_error(function!(), line!()) {
4526 gls::gl::DeleteRenderbuffers(1, &rbo);
4527 return Err(e);
4528 }
4529 rbo
4530 };
4531
4532 let guard = dst.buffer_identity().weak();
4534 let ts = self.dst_egl_cache.next_timestamp();
4535 self.dst_egl_cache.entries.insert(
4536 id,
4537 CachedEglImage {
4538 egl_image: egl_image_obj,
4539 guard,
4540 renderbuffer: Some(rbo),
4541 last_used: ts,
4542 },
4543 );
4544
4545 Ok((rbo, width, height))
4546 }
4547
4548 fn reshape_segmentation_to_rgba(&self, segmentation: &[u8], shape: [usize; 3]) -> Vec<u8> {
4550 let [height, width, classes] = shape;
4551
4552 let n_layer_stride = height * width * 4;
4553 let n_row_stride = width * 4;
4554 let n_col_stride = 4;
4555 let row_stride = width * classes;
4556 let col_stride = classes;
4557
4558 let mut new_segmentation = vec![0u8; n_layer_stride * classes.div_ceil(4)];
4559
4560 for i in 0..height {
4561 for j in 0..width {
4562 for k in 0..classes.div_ceil(4) * 4 {
4563 if k >= classes {
4564 new_segmentation[n_layer_stride * (k / 4)
4565 + i * n_row_stride
4566 + j * n_col_stride
4567 + k % 4] = 0;
4568 } else {
4569 new_segmentation[n_layer_stride * (k / 4)
4570 + i * n_row_stride
4571 + j * n_col_stride
4572 + k % 4] = segmentation[i * row_stride + j * col_stride + k];
4573 }
4574 }
4575 }
4576 }
4577
4578 new_segmentation
4579 }
4580
4581 fn render_modelpack_segmentation(
4582 &mut self,
4583 dst_roi: RegionOfInterest,
4584 segmentation: &[u8],
4585 shape: [usize; 3],
4586 ) -> Result<(), crate::Error> {
4587 log::debug!("start render_segmentation_to_image");
4588
4589 let new_segmentation = self.reshape_segmentation_to_rgba(segmentation, shape);
4592
4593 let [height, width, classes] = shape;
4594
4595 let format = gls::gl::RGBA;
4596 let texture_target = gls::gl::TEXTURE_2D_ARRAY;
4597 self.segmentation_program
4598 .load_uniform_1i(c"background_index", shape[2] as i32 - 1)?;
4599
4600 gls::use_program(self.segmentation_program.id);
4601
4602 gls::bind_texture(texture_target, self.segmentation_texture.id);
4603 gls::active_texture(gls::gl::TEXTURE0);
4604 gls::tex_parameteri(
4605 texture_target,
4606 gls::gl::TEXTURE_MIN_FILTER,
4607 gls::gl::LINEAR as i32,
4608 );
4609 gls::tex_parameteri(
4610 texture_target,
4611 gls::gl::TEXTURE_MAG_FILTER,
4612 gls::gl::LINEAR as i32,
4613 );
4614 gls::tex_parameteri(
4615 texture_target,
4616 gls::gl::TEXTURE_WRAP_S,
4617 gls::gl::CLAMP_TO_EDGE as i32,
4618 );
4619
4620 gls::tex_parameteri(
4621 texture_target,
4622 gls::gl::TEXTURE_WRAP_T,
4623 gls::gl::CLAMP_TO_EDGE as i32,
4624 );
4625
4626 gls::tex_image3d(
4627 texture_target,
4628 0,
4629 format as i32,
4630 width as i32,
4631 height as i32,
4632 classes.div_ceil(4) as i32,
4633 0,
4634 format,
4635 gls::gl::UNSIGNED_BYTE,
4636 Some(&new_segmentation),
4637 );
4638
4639 let src_roi = RegionOfInterest {
4640 left: 0.,
4641 top: 1.,
4642 right: 1.,
4643 bottom: 0.,
4644 };
4645
4646 unsafe {
4647 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
4648 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
4649
4650 let camera_vertices: [f32; 12] = [
4651 dst_roi.left,
4652 dst_roi.top,
4653 0., dst_roi.right,
4655 dst_roi.top,
4656 0., dst_roi.right,
4658 dst_roi.bottom,
4659 0., dst_roi.left,
4661 dst_roi.bottom,
4662 0., ];
4664 gls::gl::BufferSubData(
4665 gls::gl::ARRAY_BUFFER,
4666 0,
4667 (size_of::<f32>() * camera_vertices.len()) as isize,
4668 camera_vertices.as_ptr() as *const c_void,
4669 );
4670
4671 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
4672 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
4673
4674 let texture_vertices: [f32; 8] = [
4675 src_roi.left,
4676 src_roi.top,
4677 src_roi.right,
4678 src_roi.top,
4679 src_roi.right,
4680 src_roi.bottom,
4681 src_roi.left,
4682 src_roi.bottom,
4683 ];
4684 gls::gl::BufferSubData(
4685 gls::gl::ARRAY_BUFFER,
4686 0,
4687 (size_of::<f32>() * 8) as isize,
4688 (texture_vertices[0..]).as_ptr() as *const c_void,
4689 );
4690
4691 let vertices_index: [u32; 4] = [0, 1, 2, 3];
4692 gls::gl::DrawElements(
4693 gls::gl::TRIANGLE_FAN,
4694 vertices_index.len() as i32,
4695 gls::gl::UNSIGNED_INT,
4696 vertices_index.as_ptr() as *const c_void,
4697 );
4698 }
4699
4700 Ok(())
4701 }
4702
4703 fn render_yolo_segmentation(
4704 &mut self,
4705 dst_roi: RegionOfInterest,
4706 segmentation: &[u8],
4707 shape: [usize; 2],
4708 class: usize,
4709 ) -> Result<(), crate::Error> {
4710 log::debug!("start render_yolo_segmentation");
4711
4712 let [height, width] = shape;
4713
4714 let format = gls::gl::RED;
4715 let texture_target = gls::gl::TEXTURE_2D;
4716 gls::use_program(self.instanced_segmentation_program.id);
4717 self.instanced_segmentation_program
4718 .load_uniform_1i(c"class_index", class as i32)?;
4719 gls::bind_texture(texture_target, self.segmentation_texture.id);
4720 gls::active_texture(gls::gl::TEXTURE0);
4721 gls::tex_parameteri(
4722 texture_target,
4723 gls::gl::TEXTURE_MIN_FILTER,
4724 gls::gl::LINEAR as i32,
4725 );
4726 gls::tex_parameteri(
4727 texture_target,
4728 gls::gl::TEXTURE_MAG_FILTER,
4729 gls::gl::LINEAR as i32,
4730 );
4731 gls::tex_parameteri(
4732 texture_target,
4733 gls::gl::TEXTURE_WRAP_S,
4734 gls::gl::CLAMP_TO_EDGE as i32,
4735 );
4736
4737 gls::tex_parameteri(
4738 texture_target,
4739 gls::gl::TEXTURE_WRAP_T,
4740 gls::gl::CLAMP_TO_EDGE as i32,
4741 );
4742
4743 gls::tex_image2d(
4744 texture_target,
4745 0,
4746 format as i32,
4747 width as i32,
4748 height as i32,
4749 0,
4750 format,
4751 gls::gl::UNSIGNED_BYTE,
4752 Some(segmentation),
4753 );
4754
4755 let src_roi = RegionOfInterest {
4756 left: 0.,
4757 top: 1.,
4758 right: 1.,
4759 bottom: 0.,
4760 };
4761
4762 unsafe {
4763 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
4764 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
4765
4766 let camera_vertices: [f32; 12] = [
4767 dst_roi.left,
4768 dst_roi.top,
4769 0., dst_roi.right,
4771 dst_roi.top,
4772 0., dst_roi.right,
4774 dst_roi.bottom,
4775 0., dst_roi.left,
4777 dst_roi.bottom,
4778 0., ];
4780 gls::gl::BufferSubData(
4781 gls::gl::ARRAY_BUFFER,
4782 0,
4783 (size_of::<f32>() * camera_vertices.len()) as isize,
4784 camera_vertices.as_ptr() as *const c_void,
4785 );
4786
4787 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
4788 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
4789
4790 let texture_vertices: [f32; 8] = [
4791 src_roi.left,
4792 src_roi.top,
4793 src_roi.right,
4794 src_roi.top,
4795 src_roi.right,
4796 src_roi.bottom,
4797 src_roi.left,
4798 src_roi.bottom,
4799 ];
4800 gls::gl::BufferSubData(
4801 gls::gl::ARRAY_BUFFER,
4802 0,
4803 (size_of::<f32>() * 8) as isize,
4804 (texture_vertices).as_ptr() as *const c_void,
4805 );
4806
4807 let vertices_index: [u32; 4] = [0, 1, 2, 3];
4808 gls::gl::DrawElements(
4809 gls::gl::TRIANGLE_FAN,
4810 vertices_index.len() as i32,
4811 gls::gl::UNSIGNED_INT,
4812 vertices_index.as_ptr() as *const c_void,
4813 );
4814 gls::gl::Finish();
4815 }
4816
4817 Ok(())
4818 }
4819
4820 fn repack_protos_to_rgba_f16(protos: &ndarray::Array3<f32>) -> (Vec<u8>, usize) {
4825 let (height, width, num_protos) = protos.dim();
4826 let num_layers = num_protos.div_ceil(4);
4827 let layer_stride = height * width * 4;
4829 let mut buf = vec![0u16; layer_stride * num_layers];
4830
4831 for y in 0..height {
4832 for x in 0..width {
4833 for k in 0..num_layers * 4 {
4834 let val = if k < num_protos {
4835 half::f16::from_f32(protos[[y, x, k]])
4836 } else {
4837 half::f16::ZERO
4838 };
4839 let layer = k / 4;
4840 let channel = k % 4;
4841 buf[layer * layer_stride + y * width * 4 + x * 4 + channel] = val.to_bits();
4842 }
4843 }
4844 }
4845
4846 let byte_buf = unsafe {
4848 std::slice::from_raw_parts(buf.as_ptr() as *const u8, buf.len() * 2).to_vec()
4849 };
4850 (byte_buf, num_layers)
4851 }
4852
4853 fn render_proto_segmentation(
4860 &mut self,
4861 detect: &[DetectBox],
4862 proto_data: &ProtoData,
4863 ) -> crate::Result<()> {
4864 if detect.is_empty() || proto_data.mask_coefficients.is_empty() {
4865 return Ok(());
4866 }
4867
4868 let (height, width, num_protos) = proto_data.protos.dim();
4869 let texture_target = gls::gl::TEXTURE_2D_ARRAY;
4870
4871 match &proto_data.protos {
4872 ProtoTensor::Quantized {
4873 protos,
4874 quantization,
4875 } => {
4876 self.render_proto_segmentation_int8(
4877 detect,
4878 &proto_data.mask_coefficients,
4879 protos,
4880 quantization,
4881 height,
4882 width,
4883 num_protos,
4884 texture_target,
4885 )?;
4886 }
4887 ProtoTensor::Float(protos_f32) => {
4888 if self.has_float_linear {
4889 self.render_proto_segmentation_f32(
4890 detect,
4891 &proto_data.mask_coefficients,
4892 protos_f32,
4893 height,
4894 width,
4895 num_protos,
4896 texture_target,
4897 )?;
4898 } else {
4899 self.render_proto_segmentation_f16(
4901 detect,
4902 &proto_data.mask_coefficients,
4903 protos_f32,
4904 height,
4905 width,
4906 num_protos,
4907 texture_target,
4908 )?;
4909 }
4910 }
4911 }
4912
4913 unsafe { gls::gl::Finish() };
4914 Ok(())
4915 }
4916
4917 fn render_proto_detection_quads(
4920 &self,
4921 program: &GlProgram,
4922 detect: &[DetectBox],
4923 mask_coefficients: &[Vec<f32>],
4924 ) -> crate::Result<()> {
4925 let cvt_screen_coord = |normalized: f32| normalized * 2.0 - 1.0;
4926
4927 for (det, coeff) in detect.iter().zip(mask_coefficients.iter()) {
4928 let mut packed_coeff = [[0.0f32; 4]; 8];
4929 for (i, val) in coeff.iter().enumerate().take(32) {
4930 packed_coeff[i / 4][i % 4] = *val;
4931 }
4932
4933 program.load_uniform_4fv(c"mask_coeff", &packed_coeff)?;
4934 program.load_uniform_1i(c"class_index", det.label as i32)?;
4935
4936 let dst_roi = RegionOfInterest {
4937 left: cvt_screen_coord(det.bbox.xmin),
4938 top: cvt_screen_coord(det.bbox.ymax),
4939 right: cvt_screen_coord(det.bbox.xmax),
4940 bottom: cvt_screen_coord(det.bbox.ymin),
4941 };
4942
4943 let src_roi = RegionOfInterest {
4951 left: det.bbox.xmin,
4952 top: det.bbox.ymax,
4953 right: det.bbox.xmax,
4954 bottom: det.bbox.ymin,
4955 };
4956
4957 unsafe {
4958 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
4959 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
4960
4961 let camera_vertices: [f32; 12] = [
4962 dst_roi.left,
4963 dst_roi.top,
4964 0.,
4965 dst_roi.right,
4966 dst_roi.top,
4967 0.,
4968 dst_roi.right,
4969 dst_roi.bottom,
4970 0.,
4971 dst_roi.left,
4972 dst_roi.bottom,
4973 0.,
4974 ];
4975 gls::gl::BufferSubData(
4976 gls::gl::ARRAY_BUFFER,
4977 0,
4978 (size_of::<f32>() * camera_vertices.len()) as isize,
4979 camera_vertices.as_ptr() as *const c_void,
4980 );
4981
4982 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
4983 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
4984
4985 let texture_vertices: [f32; 8] = [
4986 src_roi.left,
4987 src_roi.top,
4988 src_roi.right,
4989 src_roi.top,
4990 src_roi.right,
4991 src_roi.bottom,
4992 src_roi.left,
4993 src_roi.bottom,
4994 ];
4995 gls::gl::BufferSubData(
4996 gls::gl::ARRAY_BUFFER,
4997 0,
4998 (size_of::<f32>() * 8) as isize,
4999 texture_vertices.as_ptr() as *const c_void,
5000 );
5001
5002 let vertices_index: [u32; 4] = [0, 1, 2, 3];
5003 gls::gl::DrawElements(
5004 gls::gl::TRIANGLE_FAN,
5005 vertices_index.len() as i32,
5006 gls::gl::UNSIGNED_INT,
5007 vertices_index.as_ptr() as *const c_void,
5008 );
5009 }
5010 }
5011 Ok(())
5012 }
5013
5014 #[allow(clippy::too_many_arguments)]
5017 fn render_proto_segmentation_int8(
5018 &mut self,
5019 detect: &[DetectBox],
5020 mask_coefficients: &[Vec<f32>],
5021 protos: &ndarray::Array3<i8>,
5022 quantization: &edgefirst_decoder::Quantization,
5023 height: usize,
5024 width: usize,
5025 num_protos: usize,
5026 texture_target: u32,
5027 ) -> crate::Result<()> {
5028 gls::bind_texture(texture_target, self.proto_texture.id);
5030 gls::active_texture(gls::gl::TEXTURE0);
5031 gls::tex_parameteri(
5032 texture_target,
5033 gls::gl::TEXTURE_MIN_FILTER,
5034 gls::gl::NEAREST as i32,
5035 );
5036 gls::tex_parameteri(
5037 texture_target,
5038 gls::gl::TEXTURE_MAG_FILTER,
5039 gls::gl::NEAREST as i32,
5040 );
5041 gls::tex_parameteri(
5042 texture_target,
5043 gls::gl::TEXTURE_WRAP_S,
5044 gls::gl::CLAMP_TO_EDGE as i32,
5045 );
5046 gls::tex_parameteri(
5047 texture_target,
5048 gls::gl::TEXTURE_WRAP_T,
5049 gls::gl::CLAMP_TO_EDGE as i32,
5050 );
5051
5052 let mut tex_data = vec![0i8; height * width * num_protos];
5055 for k in 0..num_protos {
5056 for y in 0..height {
5057 for x in 0..width {
5058 tex_data[k * height * width + y * width + x] = protos[[y, x, k]];
5059 }
5060 }
5061 }
5062
5063 gls::tex_image3d(
5064 texture_target,
5065 0,
5066 gls::gl::R8I as i32,
5067 width as i32,
5068 height as i32,
5069 num_protos as i32,
5070 0,
5071 gls::gl::RED_INTEGER,
5072 gls::gl::BYTE,
5073 Some(&tex_data),
5074 );
5075
5076 let proto_scale = quantization.scale;
5077 let proto_scaled_zp = -(quantization.zero_point as f32) * quantization.scale;
5078
5079 match self.int8_interpolation_mode {
5080 Int8InterpolationMode::Nearest => {
5081 let program = &self.proto_segmentation_int8_nearest_program;
5082 gls::use_program(program.id);
5083 program.load_uniform_1i(c"num_protos", num_protos as i32)?;
5084 program.load_uniform_1f(c"proto_scale", proto_scale)?;
5085 program.load_uniform_1f(c"proto_scaled_zp", proto_scaled_zp)?;
5086 self.render_proto_detection_quads(program, detect, mask_coefficients)?;
5087 }
5088 Int8InterpolationMode::Bilinear => {
5089 let program = &self.proto_segmentation_int8_bilinear_program;
5090 gls::use_program(program.id);
5091 program.load_uniform_1i(c"num_protos", num_protos as i32)?;
5092 program.load_uniform_1f(c"proto_scale", proto_scale)?;
5093 program.load_uniform_1f(c"proto_scaled_zp", proto_scaled_zp)?;
5094 self.render_proto_detection_quads(program, detect, mask_coefficients)?;
5095 }
5096 Int8InterpolationMode::TwoPass => {
5097 self.render_proto_int8_two_pass(
5098 detect,
5099 mask_coefficients,
5100 quantization,
5101 height,
5102 width,
5103 num_protos,
5104 texture_target,
5105 )?;
5106 }
5107 }
5108
5109 Ok(())
5110 }
5111
5112 #[allow(clippy::too_many_arguments)]
5115 fn render_proto_int8_two_pass(
5116 &self,
5117 detect: &[DetectBox],
5118 mask_coefficients: &[Vec<f32>],
5119 quantization: &edgefirst_decoder::Quantization,
5120 height: usize,
5121 width: usize,
5122 num_protos: usize,
5123 texture_target: u32,
5124 ) -> crate::Result<()> {
5125 let num_layers = num_protos.div_ceil(4);
5126
5127 let (saved_fbo, saved_viewport) = unsafe {
5129 let mut fbo: i32 = 0;
5130 gls::gl::GetIntegerv(gls::gl::FRAMEBUFFER_BINDING, &mut fbo);
5131 let mut vp = [0i32; 4];
5132 gls::gl::GetIntegerv(gls::gl::VIEWPORT, vp.as_mut_ptr());
5133 (fbo as u32, vp)
5134 };
5135
5136 let dequant_fbo = FrameBuffer::new();
5138 gls::bind_texture(texture_target, self.proto_dequant_texture.id);
5139 gls::tex_image3d::<u8>(
5140 texture_target,
5141 0,
5142 gls::gl::RGBA16F as i32,
5143 width as i32,
5144 height as i32,
5145 num_layers as i32,
5146 0,
5147 gls::gl::RGBA,
5148 gls::gl::HALF_FLOAT,
5149 None,
5150 );
5151 gls::tex_parameteri(
5152 texture_target,
5153 gls::gl::TEXTURE_MIN_FILTER,
5154 gls::gl::LINEAR as i32,
5155 );
5156 gls::tex_parameteri(
5157 texture_target,
5158 gls::gl::TEXTURE_MAG_FILTER,
5159 gls::gl::LINEAR as i32,
5160 );
5161 gls::tex_parameteri(
5162 texture_target,
5163 gls::gl::TEXTURE_WRAP_S,
5164 gls::gl::CLAMP_TO_EDGE as i32,
5165 );
5166 gls::tex_parameteri(
5167 texture_target,
5168 gls::gl::TEXTURE_WRAP_T,
5169 gls::gl::CLAMP_TO_EDGE as i32,
5170 );
5171
5172 let proto_scale = quantization.scale;
5173 let proto_scaled_zp = -(quantization.zero_point as f32) * quantization.scale;
5174
5175 let dequant_program = &self.proto_dequant_int8_program;
5176 gls::use_program(dequant_program.id);
5177 dequant_program.load_uniform_1f(c"proto_scale", proto_scale)?;
5178 dequant_program.load_uniform_1f(c"proto_scaled_zp", proto_scaled_zp)?;
5179
5180 gls::active_texture(gls::gl::TEXTURE0);
5182 gls::bind_texture(texture_target, self.proto_texture.id);
5183
5184 for layer in 0..num_layers {
5186 dequant_fbo.bind();
5187 unsafe {
5188 gls::gl::FramebufferTextureLayer(
5189 gls::gl::FRAMEBUFFER,
5190 gls::gl::COLOR_ATTACHMENT0,
5191 self.proto_dequant_texture.id,
5192 0,
5193 layer as i32,
5194 );
5195 gls::gl::Viewport(0, 0, width as i32, height as i32);
5196 }
5197 dequant_program.load_uniform_1i(c"base_layer", (layer * 4) as i32)?;
5198
5199 unsafe {
5201 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
5202 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
5203 let verts: [f32; 12] = [
5204 -1.0, -1.0, 0.0, 1.0, -1.0, 0.0, 1.0, 1.0, 0.0, -1.0, 1.0, 0.0,
5205 ];
5206 gls::gl::BufferSubData(
5207 gls::gl::ARRAY_BUFFER,
5208 0,
5209 (size_of::<f32>() * 12) as isize,
5210 verts.as_ptr() as *const c_void,
5211 );
5212 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.texture_buffer.id);
5213 gls::gl::EnableVertexAttribArray(self.texture_buffer.buffer_index);
5214 let tc: [f32; 8] = [0.0, 0.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0];
5215 gls::gl::BufferSubData(
5216 gls::gl::ARRAY_BUFFER,
5217 0,
5218 (size_of::<f32>() * 8) as isize,
5219 tc.as_ptr() as *const c_void,
5220 );
5221 let idx: [u32; 4] = [0, 1, 2, 3];
5222 gls::gl::DrawElements(
5223 gls::gl::TRIANGLE_FAN,
5224 4,
5225 gls::gl::UNSIGNED_INT,
5226 idx.as_ptr() as *const c_void,
5227 );
5228 }
5229 }
5230
5231 drop(dequant_fbo);
5233 unsafe {
5234 gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, saved_fbo);
5235 gls::gl::Viewport(
5236 saved_viewport[0],
5237 saved_viewport[1],
5238 saved_viewport[2],
5239 saved_viewport[3],
5240 );
5241 }
5242
5243 let program = &self.proto_segmentation_program;
5245 gls::use_program(program.id);
5246 gls::active_texture(gls::gl::TEXTURE0);
5247 gls::bind_texture(texture_target, self.proto_dequant_texture.id);
5248 program.load_uniform_1i(c"num_layers", num_layers as i32)?;
5249 self.render_proto_detection_quads(program, detect, mask_coefficients)?;
5250
5251 Ok(())
5252 }
5253
5254 #[allow(clippy::too_many_arguments)]
5256 fn render_proto_segmentation_f32(
5257 &self,
5258 detect: &[DetectBox],
5259 mask_coefficients: &[Vec<f32>],
5260 protos_f32: &ndarray::Array3<f32>,
5261 height: usize,
5262 width: usize,
5263 num_protos: usize,
5264 texture_target: u32,
5265 ) -> crate::Result<()> {
5266 let program = &self.proto_segmentation_f32_program;
5267 gls::use_program(program.id);
5268 gls::bind_texture(texture_target, self.proto_texture.id);
5269 gls::active_texture(gls::gl::TEXTURE0);
5270 gls::tex_parameteri(
5271 texture_target,
5272 gls::gl::TEXTURE_MIN_FILTER,
5273 gls::gl::LINEAR as i32,
5274 );
5275 gls::tex_parameteri(
5276 texture_target,
5277 gls::gl::TEXTURE_MAG_FILTER,
5278 gls::gl::LINEAR as i32,
5279 );
5280 gls::tex_parameteri(
5281 texture_target,
5282 gls::gl::TEXTURE_WRAP_S,
5283 gls::gl::CLAMP_TO_EDGE as i32,
5284 );
5285 gls::tex_parameteri(
5286 texture_target,
5287 gls::gl::TEXTURE_WRAP_T,
5288 gls::gl::CLAMP_TO_EDGE as i32,
5289 );
5290
5291 let mut tex_data = vec![0.0f32; height * width * num_protos];
5293 for k in 0..num_protos {
5294 for y in 0..height {
5295 for x in 0..width {
5296 tex_data[k * height * width + y * width + x] = protos_f32[[y, x, k]];
5297 }
5298 }
5299 }
5300
5301 gls::tex_image3d(
5302 texture_target,
5303 0,
5304 gls::gl::R32F as i32,
5305 width as i32,
5306 height as i32,
5307 num_protos as i32,
5308 0,
5309 gls::gl::RED,
5310 gls::gl::FLOAT,
5311 Some(&tex_data),
5312 );
5313
5314 program.load_uniform_1i(c"num_protos", num_protos as i32)?;
5315 self.render_proto_detection_quads(program, detect, mask_coefficients)?;
5316
5317 Ok(())
5318 }
5319
5320 #[allow(clippy::too_many_arguments)]
5324 fn render_proto_segmentation_f16(
5325 &self,
5326 detect: &[DetectBox],
5327 mask_coefficients: &[Vec<f32>],
5328 protos_f32: &ndarray::Array3<f32>,
5329 height: usize,
5330 width: usize,
5331 num_protos: usize,
5332 texture_target: u32,
5333 ) -> crate::Result<()> {
5334 let num_layers = num_protos.div_ceil(4);
5335 let (tex_data, _) = Self::repack_protos_to_rgba_f16(protos_f32);
5336
5337 let program = &self.proto_segmentation_program;
5338 gls::use_program(program.id);
5339 gls::bind_texture(texture_target, self.proto_texture.id);
5340 gls::active_texture(gls::gl::TEXTURE0);
5341 gls::tex_parameteri(
5342 texture_target,
5343 gls::gl::TEXTURE_MIN_FILTER,
5344 gls::gl::LINEAR as i32,
5345 );
5346 gls::tex_parameteri(
5347 texture_target,
5348 gls::gl::TEXTURE_MAG_FILTER,
5349 gls::gl::LINEAR as i32,
5350 );
5351 gls::tex_parameteri(
5352 texture_target,
5353 gls::gl::TEXTURE_WRAP_S,
5354 gls::gl::CLAMP_TO_EDGE as i32,
5355 );
5356 gls::tex_parameteri(
5357 texture_target,
5358 gls::gl::TEXTURE_WRAP_T,
5359 gls::gl::CLAMP_TO_EDGE as i32,
5360 );
5361
5362 gls::tex_image3d(
5363 texture_target,
5364 0,
5365 gls::gl::RGBA16F as i32,
5366 width as i32,
5367 height as i32,
5368 num_layers as i32,
5369 0,
5370 gls::gl::RGBA,
5371 gls::gl::HALF_FLOAT,
5372 Some(&tex_data),
5373 );
5374
5375 program.load_uniform_1i(c"num_layers", num_layers as i32)?;
5376 self.render_proto_detection_quads(program, detect, mask_coefficients)?;
5377
5378 Ok(())
5379 }
5380
5381 fn render_segmentation(
5382 &mut self,
5383 detect: &[DetectBox],
5384 segmentation: &[Segmentation],
5385 ) -> crate::Result<()> {
5386 if segmentation.is_empty() {
5387 return Ok(());
5388 }
5389
5390 let is_modelpack = segmentation[0].segmentation.shape()[2] > 1;
5391 let cvt_screen_coord = |normalized| normalized * 2.0 - 1.0;
5393 if is_modelpack {
5394 let seg = &segmentation[0];
5395 let dst_roi = RegionOfInterest {
5396 left: cvt_screen_coord(seg.xmin),
5397 top: cvt_screen_coord(seg.ymax),
5398 right: cvt_screen_coord(seg.xmax),
5399 bottom: cvt_screen_coord(seg.ymin),
5400 };
5401 let segment = seg.segmentation.as_standard_layout();
5402 let slice = segment.as_slice().ok_or(Error::Internal(
5403 "Cannot get slice of segmentation".to_owned(),
5404 ))?;
5405
5406 self.render_modelpack_segmentation(
5407 dst_roi,
5408 slice,
5409 [
5410 seg.segmentation.shape()[0],
5411 seg.segmentation.shape()[1],
5412 seg.segmentation.shape()[2],
5413 ],
5414 )?;
5415 } else {
5416 for (seg, det) in segmentation.iter().zip(detect) {
5417 let dst_roi = RegionOfInterest {
5418 left: cvt_screen_coord(seg.xmin),
5419 top: cvt_screen_coord(seg.ymax),
5420 right: cvt_screen_coord(seg.xmax),
5421 bottom: cvt_screen_coord(seg.ymin),
5422 };
5423
5424 let segment = seg.segmentation.as_standard_layout();
5425 let slice = segment.as_slice().ok_or(Error::Internal(
5426 "Cannot get slice of segmentation".to_owned(),
5427 ))?;
5428
5429 self.render_yolo_segmentation(
5430 dst_roi,
5431 slice,
5432 [seg.segmentation.shape()[0], seg.segmentation.shape()[1]],
5433 det.label,
5434 )?;
5435 }
5436 }
5437
5438 gls::disable(gls::gl::BLEND);
5439 Ok(())
5440 }
5441
5442 fn render_box(&mut self, dst: &TensorImage, detect: &[DetectBox]) -> Result<(), Error> {
5443 unsafe {
5444 gls::gl::UseProgram(self.color_program.id);
5445 let rescale = |x: f32| x * 2.0 - 1.0;
5446 let thickness = 3.0;
5447 for d in detect {
5448 self.color_program
5449 .load_uniform_1i(c"class_index", d.label as i32)?;
5450 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, self.vertex_buffer.id);
5451 gls::gl::EnableVertexAttribArray(self.vertex_buffer.buffer_index);
5452 let bbox: [f32; 4] = d.bbox.into();
5453 let outer_box = [
5454 bbox[0] - thickness / dst.width() as f32,
5455 bbox[1] - thickness / dst.height() as f32,
5456 bbox[2] + thickness / dst.width() as f32,
5457 bbox[3] + thickness / dst.height() as f32,
5458 ];
5459 let camera_vertices: [f32; 24] = [
5460 rescale(bbox[0]),
5461 rescale(bbox[3]),
5462 0., rescale(bbox[2]),
5464 rescale(bbox[3]),
5465 0., rescale(bbox[2]),
5467 rescale(bbox[1]),
5468 0., rescale(bbox[0]),
5470 rescale(bbox[1]),
5471 0., rescale(outer_box[0]),
5473 rescale(outer_box[3]),
5474 0., rescale(outer_box[2]),
5476 rescale(outer_box[3]),
5477 0., rescale(outer_box[2]),
5479 rescale(outer_box[1]),
5480 0., rescale(outer_box[0]),
5482 rescale(outer_box[1]),
5483 0., ];
5485 gls::gl::BufferData(
5486 gls::gl::ARRAY_BUFFER,
5487 (size_of::<f32>() * camera_vertices.len()) as isize,
5488 camera_vertices.as_ptr() as *const c_void,
5489 gls::gl::DYNAMIC_DRAW,
5490 );
5491
5492 let vertices_index: [u32; 10] = [0, 1, 5, 2, 6, 3, 7, 0, 4, 5];
5493 gls::gl::DrawElements(
5494 gls::gl::TRIANGLE_STRIP,
5495 vertices_index.len() as i32,
5496 gls::gl::UNSIGNED_INT,
5497 vertices_index.as_ptr() as *const c_void,
5498 );
5499 }
5500 }
5501 check_gl_error(function!(), line!())?;
5502 Ok(())
5503 }
5504}
5505struct EglImage {
5506 egl_image: egl::Image,
5507 egl: Rc<Egl>,
5508 display: egl::Display,
5509}
5510
5511impl Drop for EglImage {
5512 fn drop(&mut self) {
5513 if self.egl_image.as_ptr() == egl::NO_IMAGE {
5514 return;
5515 }
5516
5517 let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5518 let e =
5519 GlContext::egl_destroy_image_with_fallback(&self.egl, self.display, self.egl_image);
5520 if let Err(e) = e {
5521 error!("Could not destroy EGL image: {e:?}");
5522 }
5523 }));
5524 }
5525}
5526
5527struct Texture {
5528 id: u32,
5529 target: gls::gl::types::GLenum,
5530 width: usize,
5531 height: usize,
5532 format: gls::gl::types::GLenum,
5533}
5534
5535impl Default for Texture {
5536 fn default() -> Self {
5537 Self::new()
5538 }
5539}
5540
5541impl Texture {
5542 fn new() -> Self {
5543 let mut id = 0;
5544 unsafe { gls::gl::GenTextures(1, &raw mut id) };
5545 Self {
5546 id,
5547 target: 0,
5548 width: 0,
5549 height: 0,
5550 format: 0,
5551 }
5552 }
5553
5554 fn update_texture(
5555 &mut self,
5556 target: gls::gl::types::GLenum,
5557 width: usize,
5558 height: usize,
5559 format: gls::gl::types::GLenum,
5560 data: &[u8],
5561 ) {
5562 if target != self.target
5563 || width != self.width
5564 || height != self.height
5565 || format != self.format
5566 {
5567 unsafe {
5568 gls::gl::TexImage2D(
5569 target,
5570 0,
5571 format as i32,
5572 width as i32,
5573 height as i32,
5574 0,
5575 format,
5576 gls::gl::UNSIGNED_BYTE,
5577 data.as_ptr() as *const c_void,
5578 );
5579 }
5580 self.target = target;
5581 self.format = format;
5582 self.width = width;
5583 self.height = height;
5584 } else {
5585 unsafe {
5586 gls::gl::TexSubImage2D(
5587 target,
5588 0,
5589 0,
5590 0,
5591 width as i32,
5592 height as i32,
5593 format,
5594 gls::gl::UNSIGNED_BYTE,
5595 data.as_ptr() as *const c_void,
5596 );
5597 }
5598 }
5599 }
5600}
5601
5602impl Drop for Texture {
5603 fn drop(&mut self) {
5604 let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| unsafe {
5605 gls::gl::DeleteTextures(1, &raw mut self.id)
5606 }));
5607 }
5608}
5609
5610struct Buffer {
5611 id: u32,
5612 buffer_index: u32,
5613}
5614
5615impl Buffer {
5616 fn new(buffer_index: u32, size_per_point: usize, max_points: usize) -> Buffer {
5617 let mut id = 0;
5618 unsafe {
5619 gls::gl::EnableVertexAttribArray(buffer_index);
5620 gls::gl::GenBuffers(1, &raw mut id);
5621 gls::gl::BindBuffer(gls::gl::ARRAY_BUFFER, id);
5622 gls::gl::VertexAttribPointer(
5623 buffer_index,
5624 size_per_point as i32,
5625 gls::gl::FLOAT,
5626 gls::gl::FALSE,
5627 0,
5628 null(),
5629 );
5630 gls::gl::BufferData(
5631 gls::gl::ARRAY_BUFFER,
5632 (size_of::<f32>() * size_per_point * max_points) as isize,
5633 null(),
5634 gls::gl::DYNAMIC_DRAW,
5635 );
5636 }
5637
5638 Buffer { id, buffer_index }
5639 }
5640}
5641
5642impl Drop for Buffer {
5643 fn drop(&mut self) {
5644 let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| unsafe {
5645 gls::gl::DeleteBuffers(1, &raw mut self.id)
5646 }));
5647 }
5648}
5649
5650struct FrameBuffer {
5651 id: u32,
5652}
5653
5654impl FrameBuffer {
5655 fn new() -> FrameBuffer {
5656 let mut id = 0;
5657 unsafe {
5658 gls::gl::GenFramebuffers(1, &raw mut id);
5659 }
5660
5661 FrameBuffer { id }
5662 }
5663
5664 fn bind(&self) {
5665 unsafe { gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, self.id) };
5666 }
5667
5668 fn unbind(&self) {
5669 unsafe { gls::gl::BindFramebuffer(gls::gl::FRAMEBUFFER, 0) };
5670 }
5671}
5672
5673impl Drop for FrameBuffer {
5674 fn drop(&mut self) {
5675 let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| {
5676 self.unbind();
5677 unsafe {
5678 gls::gl::DeleteFramebuffers(1, &raw mut self.id);
5679 }
5680 }));
5681 }
5682}
5683
5684pub struct GlProgram {
5685 id: u32,
5686 vertex_id: u32,
5687 fragment_id: u32,
5688}
5689
5690impl GlProgram {
5691 fn new(vertex_shader: &str, fragment_shader: &str) -> Result<Self, crate::Error> {
5692 let id = unsafe { gls::gl::CreateProgram() };
5693 let vertex_id = unsafe { gls::gl::CreateShader(gls::gl::VERTEX_SHADER) };
5694 if compile_shader_from_str(vertex_id, vertex_shader, "shader_vert").is_err() {
5695 log::debug!("Vertex shader source:\n{}", vertex_shader);
5696 return Err(crate::Error::OpenGl(format!(
5697 "Shader compile error: {vertex_shader}"
5698 )));
5699 }
5700 unsafe {
5701 gls::gl::AttachShader(id, vertex_id);
5702 }
5703
5704 let fragment_id = unsafe { gls::gl::CreateShader(gls::gl::FRAGMENT_SHADER) };
5705 if compile_shader_from_str(fragment_id, fragment_shader, "shader_frag").is_err() {
5706 log::debug!("Fragment shader source:\n{}", fragment_shader);
5707 return Err(crate::Error::OpenGl(format!(
5708 "Shader compile error: {fragment_shader}"
5709 )));
5710 }
5711
5712 unsafe {
5713 gls::gl::AttachShader(id, fragment_id);
5714 gls::gl::LinkProgram(id);
5715 gls::gl::UseProgram(id);
5716 }
5717
5718 Ok(Self {
5719 id,
5720 vertex_id,
5721 fragment_id,
5722 })
5723 }
5724
5725 #[allow(dead_code)]
5726 fn load_uniform_1f(&self, name: &CStr, value: f32) -> Result<(), crate::Error> {
5727 unsafe {
5728 gls::gl::UseProgram(self.id);
5729 let location = gls::gl::GetUniformLocation(self.id, name.as_ptr());
5730 gls::gl::Uniform1f(location, value);
5731 }
5732 Ok(())
5733 }
5734
5735 #[allow(dead_code)]
5736 fn load_uniform_1i(&self, name: &CStr, value: i32) -> Result<(), crate::Error> {
5737 unsafe {
5738 gls::gl::UseProgram(self.id);
5739 let location = gls::gl::GetUniformLocation(self.id, name.as_ptr());
5740 gls::gl::Uniform1i(location, value);
5741 }
5742 Ok(())
5743 }
5744
5745 fn load_uniform_4fv(&self, name: &CStr, value: &[[f32; 4]]) -> Result<(), crate::Error> {
5746 unsafe {
5747 gls::gl::UseProgram(self.id);
5748 let location = gls::gl::GetUniformLocation(self.id, name.as_ptr());
5749 if location == -1 {
5750 return Err(crate::Error::OpenGl(format!(
5751 "Could not find uniform location for '{}'",
5752 name.to_string_lossy().into_owned()
5753 )));
5754 }
5755 gls::gl::Uniform4fv(location, value.len() as i32, value.as_flattened().as_ptr());
5756 }
5757 check_gl_error(function!(), line!())?;
5758 Ok(())
5759 }
5760}
5761
5762impl Drop for GlProgram {
5763 fn drop(&mut self) {
5764 let _ = std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| unsafe {
5765 gls::gl::DeleteProgram(self.id);
5766 gls::gl::DeleteShader(self.fragment_id);
5767 gls::gl::DeleteShader(self.vertex_id);
5768 }));
5769 }
5770}
5771
5772fn compile_shader_from_str(shader: u32, shader_source: &str, shader_name: &str) -> Result<(), ()> {
5773 let src = match CString::from_str(shader_source) {
5774 Ok(v) => v,
5775 Err(_) => return Err(()),
5776 };
5777 let src_ptr = src.as_ptr();
5778 unsafe {
5779 gls::gl::ShaderSource(shader, 1, &raw const src_ptr, null());
5780 gls::gl::CompileShader(shader);
5781 let mut is_compiled = 0;
5782 gls::gl::GetShaderiv(shader, gls::gl::COMPILE_STATUS, &raw mut is_compiled);
5783 if is_compiled == 0 {
5784 let mut max_length = 0;
5785 gls::gl::GetShaderiv(shader, gls::gl::INFO_LOG_LENGTH, &raw mut max_length);
5786 let mut error_log: Vec<u8> = vec![0; max_length as usize];
5787 gls::gl::GetShaderInfoLog(
5788 shader,
5789 max_length,
5790 &raw mut max_length,
5791 error_log.as_mut_ptr() as *mut c_char,
5792 );
5793 error!(
5794 "Shader '{}' failed: {:?}\n",
5795 shader_name,
5796 CString::from_vec_with_nul(error_log)
5797 .unwrap()
5798 .into_string()
5799 .unwrap()
5800 );
5801 gls::gl::DeleteShader(shader);
5802 return Err(());
5803 }
5804 Ok(())
5805 }
5806}
5807
5808fn check_gl_error(name: &str, line: u32) -> Result<(), Error> {
5809 unsafe {
5810 let err = gls::gl::GetError();
5811 if err != gls::gl::NO_ERROR {
5812 error!("GL Error: {name}:{line}: {err:#X}");
5813 return Err(Error::OpenGl(format!("{err:#X}")));
5815 }
5816 }
5817 Ok(())
5818}
5819
5820fn fourcc_to_drm(fourcc: FourCharCode) -> Result<DrmFourcc, Error> {
5821 match fourcc {
5822 RGBA => Ok(DrmFourcc::Abgr8888),
5823 YUYV => Ok(DrmFourcc::Yuyv),
5824 VYUY => Ok(DrmFourcc::Vyuy),
5825 RGB | RGB_INT8 => Ok(DrmFourcc::Bgr888),
5826 GREY => Ok(DrmFourcc::R8),
5827 NV12 => Ok(DrmFourcc::Nv12),
5828 PLANAR_RGB | PLANAR_RGB_INT8 => Ok(DrmFourcc::R8),
5829 _ => Err(Error::NotSupported(format!(
5830 "FourCC {fourcc:?} has no DRM format mapping"
5831 ))),
5832 }
5833}
5834
5835mod egl_ext {
5836 #![allow(dead_code)]
5837 pub(crate) const LINUX_DMA_BUF: u32 = 0x3270;
5838 pub(crate) const LINUX_DRM_FOURCC: u32 = 0x3271;
5839 pub(crate) const DMA_BUF_PLANE0_FD: u32 = 0x3272;
5840 pub(crate) const DMA_BUF_PLANE0_OFFSET: u32 = 0x3273;
5841 pub(crate) const DMA_BUF_PLANE0_PITCH: u32 = 0x3274;
5842 pub(crate) const DMA_BUF_PLANE1_FD: u32 = 0x3275;
5843 pub(crate) const DMA_BUF_PLANE1_OFFSET: u32 = 0x3276;
5844 pub(crate) const DMA_BUF_PLANE1_PITCH: u32 = 0x3277;
5845 pub(crate) const DMA_BUF_PLANE2_FD: u32 = 0x3278;
5846 pub(crate) const DMA_BUF_PLANE2_OFFSET: u32 = 0x3279;
5847 pub(crate) const DMA_BUF_PLANE2_PITCH: u32 = 0x327A;
5848 pub(crate) const YUV_COLOR_SPACE_HINT: u32 = 0x327B;
5849 pub(crate) const SAMPLE_RANGE_HINT: u32 = 0x327C;
5850 pub(crate) const YUV_CHROMA_HORIZONTAL_SITING_HINT: u32 = 0x327D;
5851 pub(crate) const YUV_CHROMA_VERTICAL_SITING_HINT: u32 = 0x327E;
5852
5853 pub(crate) const ITU_REC601: u32 = 0x327F;
5854 pub(crate) const ITU_REC709: u32 = 0x3280;
5855 pub(crate) const ITU_REC2020: u32 = 0x3281;
5856
5857 pub(crate) const YUV_FULL_RANGE: u32 = 0x3282;
5858 pub(crate) const YUV_NARROW_RANGE: u32 = 0x3283;
5859
5860 pub(crate) const YUV_CHROMA_SITING_0: u32 = 0x3284;
5861 pub(crate) const YUV_CHROMA_SITING_0_5: u32 = 0x3285;
5862
5863 pub(crate) const PLATFORM_GBM_KHR: u32 = 0x31D7;
5864
5865 pub(crate) const PLATFORM_DEVICE_EXT: u32 = 0x313F;
5866
5867 pub(crate) const NO_CONFIG_KHR: khronos_egl::Config =
5874 unsafe { std::mem::transmute(std::ptr::null_mut::<std::ffi::c_void>()) };
5875}
5876
5877fn generate_vertex_shader() -> &'static str {
5878 "\
5879#version 300 es
5880precision mediump float;
5881layout(location = 0) in vec3 pos;
5882layout(location = 1) in vec2 texCoord;
5883
5884out vec3 fragPos;
5885out vec2 tc;
5886
5887void main() {
5888 fragPos = pos;
5889 tc = texCoord;
5890
5891 gl_Position = vec4(pos, 1.0);
5892}
5893"
5894}
5895
5896fn generate_texture_fragment_shader() -> &'static str {
5897 "\
5898#version 300 es
5899
5900precision mediump float;
5901uniform sampler2D tex;
5902in vec3 fragPos;
5903in vec2 tc;
5904
5905out vec4 color;
5906
5907void main(){
5908 color = texture(tex, tc);
5909}
5910"
5911}
5912
5913fn generate_texture_fragment_shader_yuv() -> &'static str {
5914 "\
5915#version 300 es
5916#extension GL_OES_EGL_image_external_essl3 : require
5917precision mediump float;
5918uniform samplerExternalOES tex;
5919in vec3 fragPos;
5920in vec2 tc;
5921
5922out vec4 color;
5923
5924void main(){
5925 color = texture(tex, tc);
5926}
5927"
5928}
5929
5930fn generate_planar_rgb_shader() -> &'static str {
5931 "\
5932#version 300 es
5933#extension GL_OES_EGL_image_external_essl3 : require
5934precision mediump float;
5935uniform samplerExternalOES tex;
5936in vec3 fragPos;
5937in vec2 tc;
5938
5939out vec4 color;
5940
5941void main(){
5942 color = texture(tex, tc);
5943}
5944"
5945}
5946
5947fn generate_planar_rgb_int8_shader() -> &'static str {
5951 "\
5952#version 300 es
5953#extension GL_OES_EGL_image_external_essl3 : require
5954precision highp float;
5955uniform samplerExternalOES tex;
5956in vec3 fragPos;
5957in vec2 tc;
5958
5959out vec4 color;
5960
5961vec3 int8_bias(vec3 v) {
5962 vec3 q = floor(v * 255.0 + 0.5);
5963 return mod(q + 128.0, 256.0) / 255.0;
5964}
5965
5966void main(){
5967 vec4 c = texture(tex, tc);
5968 color = vec4(int8_bias(c.rgb), c.a);
5969}
5970"
5971}
5972
5973fn generate_texture_int8_shader() -> &'static str {
5977 "\
5978#version 300 es
5979precision highp float;
5980uniform sampler2D tex;
5981in vec3 fragPos;
5982in vec2 tc;
5983
5984out vec4 color;
5985
5986// XOR 0x80 bias: quantize to uint8, add 128 mod 256, normalize back.
5987// This matches the CPU `byte ^ 0x80` operation exactly.
5988vec3 int8_bias(vec3 v) {
5989 vec3 q = floor(v * 255.0 + 0.5);
5990 return mod(q + 128.0, 256.0) / 255.0;
5991}
5992
5993void main(){
5994 vec4 c = texture(tex, tc);
5995 color = vec4(int8_bias(c.rgb), c.a);
5996}
5997"
5998}
5999
6000fn generate_texture_int8_shader_yuv() -> &'static str {
6004 "\
6005#version 300 es
6006#extension GL_OES_EGL_image_external_essl3 : require
6007precision highp float;
6008uniform samplerExternalOES tex;
6009in vec3 fragPos;
6010in vec2 tc;
6011
6012out vec4 color;
6013
6014vec3 int8_bias(vec3 v) {
6015 vec3 q = floor(v * 255.0 + 0.5);
6016 return mod(q + 128.0, 256.0) / 255.0;
6017}
6018
6019void main(){
6020 vec4 c = texture(tex, tc);
6021 color = vec4(int8_bias(c.rgb), c.a);
6022}
6023"
6024}
6025
6026fn generate_segmentation_shader() -> &'static str {
6029 "\
6030#version 300 es
6031precision mediump float;
6032precision mediump sampler2DArray;
6033
6034uniform sampler2DArray tex;
6035uniform vec4 colors[20];
6036uniform int background_index;
6037
6038in vec3 fragPos;
6039in vec2 tc;
6040in vec4 fragColor;
6041
6042out vec4 color;
6043
6044float max_arg(const in vec4 args, out int argmax) {
6045 if (args[0] >= args[1] && args[0] >= args[2] && args[0] >= args[3]) {
6046 argmax = 0;
6047 return args[0];
6048 }
6049 if (args[1] >= args[0] && args[1] >= args[2] && args[1] >= args[3]) {
6050 argmax = 1;
6051 return args[1];
6052 }
6053 if (args[2] >= args[0] && args[2] >= args[1] && args[2] >= args[3]) {
6054 argmax = 2;
6055 return args[2];
6056 }
6057 argmax = 3;
6058 return args[3];
6059}
6060
6061void main() {
6062 mediump int layers = textureSize(tex, 0).z;
6063 float max_all = -4.0;
6064 int max_ind = 0;
6065 for (int i = 0; i < layers; i++) {
6066 vec4 d = texture(tex, vec3(tc, i));
6067 int max_ind_ = 0;
6068 float max_ = max_arg(d, max_ind_);
6069 if (max_ <= max_all) { continue; }
6070 max_all = max_;
6071 max_ind = i*4 + max_ind_;
6072 }
6073 if (max_ind == background_index) {
6074 discard;
6075 }
6076 max_ind = max_ind % 20;
6077 color = colors[max_ind];
6078}
6079"
6080}
6081
6082fn generate_instanced_segmentation_shader() -> &'static str {
6083 "\
6084#version 300 es
6085precision mediump float;
6086uniform sampler2D mask0;
6087uniform vec4 colors[20];
6088uniform int class_index;
6089in vec3 fragPos;
6090in vec2 tc;
6091in vec4 fragColor;
6092
6093out vec4 color;
6094void main() {
6095 float r0 = texture(mask0, tc).r;
6096 int arg = int(r0>=0.5);
6097 if (arg == 0) {
6098 discard;
6099 }
6100 color = colors[class_index % 20];
6101}
6102"
6103}
6104
6105fn generate_proto_segmentation_shader() -> &'static str {
6106 "\
6107#version 300 es
6108precision highp float;
6109precision highp sampler2DArray;
6110
6111uniform sampler2DArray proto_tex; // ceil(num_protos/4) layers, RGBA = 4 channels per layer
6112uniform vec4 mask_coeff[8]; // 32 coefficients packed as 8 vec4s
6113uniform vec4 colors[20];
6114uniform int class_index;
6115uniform int num_layers;
6116
6117in vec2 tc;
6118out vec4 color;
6119
6120void main() {
6121 float acc = 0.0;
6122 for (int i = 0; i < num_layers; i++) {
6123 // texture() returns bilinearly interpolated proto values (GL_LINEAR)
6124 acc += dot(mask_coeff[i], texture(proto_tex, vec3(tc, float(i))));
6125 }
6126 float mask = 1.0 / (1.0 + exp(-acc)); // sigmoid
6127 if (mask < 0.5) discard;
6128 color = colors[class_index % 20];
6129}
6130"
6131}
6132
6133fn generate_proto_segmentation_shader_int8_nearest() -> &'static str {
6141 "\
6142#version 300 es
6143precision highp float;
6144precision highp int;
6145precision highp isampler2DArray;
6146
6147uniform isampler2DArray proto_tex; // 32 layers, R channel = 1 proto per layer
6148uniform vec4 mask_coeff[8]; // 32 coefficients packed as 8 vec4s
6149uniform vec4 colors[20];
6150uniform int class_index;
6151uniform int num_protos;
6152uniform float proto_scale;
6153uniform float proto_scaled_zp; // -zero_point * scale
6154
6155in vec2 tc;
6156out vec4 color;
6157
6158void main() {
6159 ivec3 tex_size = textureSize(proto_tex, 0);
6160 int ix = clamp(int(tc.x * float(tex_size.x)), 0, tex_size.x - 1);
6161 int iy = clamp(int(tc.y * float(tex_size.y)), 0, tex_size.y - 1);
6162
6163 float acc = 0.0;
6164 for (int k = 0; k < num_protos; k++) {
6165 float raw = float(texelFetch(proto_tex, ivec3(ix, iy, k), 0).r);
6166 float val = raw * proto_scale + proto_scaled_zp;
6167 acc += mask_coeff[k / 4][k % 4] * val;
6168 }
6169 float mask = 1.0 / (1.0 + exp(-acc));
6170 if (mask < 0.5) discard;
6171 color = colors[class_index % 20];
6172}
6173"
6174}
6175
6176fn generate_proto_segmentation_shader_int8_bilinear() -> &'static str {
6183 "\
6184#version 300 es
6185precision highp float;
6186precision highp int;
6187precision highp isampler2DArray;
6188
6189uniform isampler2DArray proto_tex; // 32 layers, R channel = 1 proto per layer
6190uniform vec4 mask_coeff[8]; // 32 coefficients packed as 8 vec4s
6191uniform vec4 colors[20];
6192uniform int class_index;
6193uniform int num_protos;
6194uniform float proto_scale;
6195uniform float proto_scaled_zp; // -zero_point * scale
6196
6197in vec2 tc;
6198out vec4 color;
6199
6200void main() {
6201 ivec3 tex_size = textureSize(proto_tex, 0);
6202 // Compute continuous position (matching GL_LINEAR convention: center at +0.5)
6203 vec2 pos = tc * vec2(tex_size.xy) - 0.5;
6204 vec2 f = fract(pos);
6205 ivec2 p0 = ivec2(floor(pos));
6206 ivec2 p1 = p0 + 1;
6207 // Clamp to texture bounds
6208 p0 = clamp(p0, ivec2(0), tex_size.xy - 1);
6209 p1 = clamp(p1, ivec2(0), tex_size.xy - 1);
6210
6211 float w00 = (1.0 - f.x) * (1.0 - f.y);
6212 float w10 = f.x * (1.0 - f.y);
6213 float w01 = (1.0 - f.x) * f.y;
6214 float w11 = f.x * f.y;
6215
6216 float acc = 0.0;
6217 for (int k = 0; k < num_protos; k++) {
6218 float r00 = float(texelFetch(proto_tex, ivec3(p0.x, p0.y, k), 0).r);
6219 float r10 = float(texelFetch(proto_tex, ivec3(p1.x, p0.y, k), 0).r);
6220 float r01 = float(texelFetch(proto_tex, ivec3(p0.x, p1.y, k), 0).r);
6221 float r11 = float(texelFetch(proto_tex, ivec3(p1.x, p1.y, k), 0).r);
6222 float interp = r00 * w00 + r10 * w10 + r01 * w01 + r11 * w11;
6223 float val = interp * proto_scale + proto_scaled_zp;
6224 acc += mask_coeff[k / 4][k % 4] * val;
6225 }
6226 float mask = 1.0 / (1.0 + exp(-acc));
6227 if (mask < 0.5) discard;
6228 color = colors[class_index % 20];
6229}
6230"
6231}
6232
6233fn generate_proto_dequant_shader_int8() -> &'static str {
6240 "\
6241#version 300 es
6242precision highp float;
6243precision highp int;
6244precision highp isampler2DArray;
6245
6246uniform isampler2DArray proto_tex; // 32 layers of R8I (1 proto per layer)
6247uniform float proto_scale;
6248uniform float proto_scaled_zp; // -zero_point * scale
6249uniform int base_layer; // first proto index for this output layer (0, 4, 8, ...)
6250
6251in vec2 tc;
6252out vec4 color;
6253
6254void main() {
6255 ivec3 tex_size = textureSize(proto_tex, 0);
6256 int ix = clamp(int(tc.x * float(tex_size.x)), 0, tex_size.x - 1);
6257 int iy = clamp(int(tc.y * float(tex_size.y)), 0, tex_size.y - 1);
6258
6259 vec4 result;
6260 for (int c = 0; c < 4; c++) {
6261 int layer = base_layer + c;
6262 float raw = float(texelFetch(proto_tex, ivec3(ix, iy, layer), 0).r);
6263 result[c] = raw * proto_scale + proto_scaled_zp;
6264 }
6265 color = result;
6266}
6267"
6268}
6269
6270fn generate_proto_segmentation_shader_f32() -> &'static str {
6277 "\
6278#version 300 es
6279precision highp float;
6280precision highp sampler2DArray;
6281
6282uniform sampler2DArray proto_tex; // 32 layers, R channel = 1 proto per layer
6283uniform vec4 mask_coeff[8]; // 32 coefficients packed as 8 vec4s
6284uniform vec4 colors[20];
6285uniform int class_index;
6286uniform int num_protos;
6287
6288in vec2 tc;
6289out vec4 color;
6290
6291void main() {
6292 float acc = 0.0;
6293 for (int k = 0; k < num_protos; k++) {
6294 // texture() returns bilinearly interpolated proto value (GL_LINEAR on R32F)
6295 float val = texture(proto_tex, vec3(tc, float(k))).r;
6296 acc += mask_coeff[k / 4][k % 4] * val;
6297 }
6298 float mask = 1.0 / (1.0 + exp(-acc));
6299 if (mask < 0.5) discard;
6300 color = colors[class_index % 20];
6301}
6302"
6303}
6304
6305fn generate_proto_mask_logit_shader_int8_nearest() -> &'static str {
6311 "\
6312#version 300 es
6313precision highp float;
6314precision highp int;
6315precision highp isampler2DArray;
6316
6317uniform isampler2DArray proto_tex;
6318uniform vec4 mask_coeff[8];
6319uniform int num_protos;
6320uniform float proto_scale;
6321uniform float coeff_sum_x_szp;
6322
6323in vec2 tc;
6324out vec4 color;
6325
6326void main() {
6327 ivec3 tex_size = textureSize(proto_tex, 0);
6328 int ix = clamp(int(tc.x * float(tex_size.x)), 0, tex_size.x - 1);
6329 int iy = clamp(int(tc.y * float(tex_size.y)), 0, tex_size.y - 1);
6330
6331 int groups = (num_protos + 3) / 4;
6332 float acc = 0.0;
6333 for (int i = 0; i < groups; i++) {
6334 int base = i * 4;
6335 vec4 raw = vec4(
6336 float(texelFetch(proto_tex, ivec3(ix, iy, min(base, num_protos - 1)), 0).r),
6337 float(texelFetch(proto_tex, ivec3(ix, iy, min(base + 1, num_protos - 1)), 0).r),
6338 float(texelFetch(proto_tex, ivec3(ix, iy, min(base + 2, num_protos - 1)), 0).r),
6339 float(texelFetch(proto_tex, ivec3(ix, iy, min(base + 3, num_protos - 1)), 0).r)
6340 );
6341 acc += dot(mask_coeff[i], raw);
6342 }
6343 float logit = acc * proto_scale + coeff_sum_x_szp;
6344 float mask = logit > 0.0 ? 1.0 : 0.0;
6345 color = vec4(mask, 0.0, 0.0, 1.0);
6346}
6347"
6348}
6349
6350fn generate_proto_mask_logit_shader_int8_bilinear() -> &'static str {
6355 "\
6356#version 300 es
6357precision highp float;
6358precision highp int;
6359precision highp isampler2DArray;
6360
6361uniform isampler2DArray proto_tex;
6362uniform vec4 mask_coeff[8];
6363uniform int num_protos;
6364uniform float proto_scale;
6365uniform float coeff_sum_x_szp;
6366
6367in vec2 tc;
6368out vec4 color;
6369
6370void main() {
6371 ivec3 tex_size = textureSize(proto_tex, 0);
6372 vec2 pos = tc * vec2(tex_size.xy) - 0.5;
6373 vec2 f = fract(pos);
6374 ivec2 p0 = ivec2(floor(pos));
6375 ivec2 p1 = p0 + 1;
6376 p0 = clamp(p0, ivec2(0), tex_size.xy - 1);
6377 p1 = clamp(p1, ivec2(0), tex_size.xy - 1);
6378
6379 float w00 = (1.0 - f.x) * (1.0 - f.y);
6380 float w10 = f.x * (1.0 - f.y);
6381 float w01 = (1.0 - f.x) * f.y;
6382 float w11 = f.x * f.y;
6383
6384 int groups = (num_protos + 3) / 4;
6385 float acc = 0.0;
6386 for (int i = 0; i < groups; i++) {
6387 int base = i * 4;
6388 int l0 = min(base, num_protos - 1);
6389 int l1 = min(base + 1, num_protos - 1);
6390 int l2 = min(base + 2, num_protos - 1);
6391 int l3 = min(base + 3, num_protos - 1);
6392 vec4 r00 = vec4(
6393 float(texelFetch(proto_tex, ivec3(p0.x, p0.y, l0), 0).r),
6394 float(texelFetch(proto_tex, ivec3(p0.x, p0.y, l1), 0).r),
6395 float(texelFetch(proto_tex, ivec3(p0.x, p0.y, l2), 0).r),
6396 float(texelFetch(proto_tex, ivec3(p0.x, p0.y, l3), 0).r)
6397 );
6398 vec4 r10 = vec4(
6399 float(texelFetch(proto_tex, ivec3(p1.x, p0.y, l0), 0).r),
6400 float(texelFetch(proto_tex, ivec3(p1.x, p0.y, l1), 0).r),
6401 float(texelFetch(proto_tex, ivec3(p1.x, p0.y, l2), 0).r),
6402 float(texelFetch(proto_tex, ivec3(p1.x, p0.y, l3), 0).r)
6403 );
6404 vec4 r01 = vec4(
6405 float(texelFetch(proto_tex, ivec3(p0.x, p1.y, l0), 0).r),
6406 float(texelFetch(proto_tex, ivec3(p0.x, p1.y, l1), 0).r),
6407 float(texelFetch(proto_tex, ivec3(p0.x, p1.y, l2), 0).r),
6408 float(texelFetch(proto_tex, ivec3(p0.x, p1.y, l3), 0).r)
6409 );
6410 vec4 r11 = vec4(
6411 float(texelFetch(proto_tex, ivec3(p1.x, p1.y, l0), 0).r),
6412 float(texelFetch(proto_tex, ivec3(p1.x, p1.y, l1), 0).r),
6413 float(texelFetch(proto_tex, ivec3(p1.x, p1.y, l2), 0).r),
6414 float(texelFetch(proto_tex, ivec3(p1.x, p1.y, l3), 0).r)
6415 );
6416 vec4 interp = r00 * w00 + r10 * w10 + r01 * w01 + r11 * w11;
6417 acc += dot(mask_coeff[i], interp);
6418 }
6419 float logit = acc * proto_scale + coeff_sum_x_szp;
6420 float mask = logit > 0.0 ? 1.0 : 0.0;
6421 color = vec4(mask, 0.0, 0.0, 1.0);
6422}
6423"
6424}
6425
6426fn generate_proto_mask_logit_shader_f32() -> &'static str {
6431 "\
6432#version 300 es
6433precision highp float;
6434precision highp sampler2DArray;
6435
6436uniform sampler2DArray proto_tex;
6437uniform vec4 mask_coeff[8];
6438uniform int num_protos;
6439
6440in vec2 tc;
6441out vec4 color;
6442
6443void main() {
6444 int groups = (num_protos + 3) / 4;
6445 float acc = 0.0;
6446 for (int i = 0; i < groups; i++) {
6447 int base = i * 4;
6448 vec4 val = vec4(
6449 texture(proto_tex, vec3(tc, float(min(base, num_protos - 1)))).r,
6450 texture(proto_tex, vec3(tc, float(min(base + 1, num_protos - 1)))).r,
6451 texture(proto_tex, vec3(tc, float(min(base + 2, num_protos - 1)))).r,
6452 texture(proto_tex, vec3(tc, float(min(base + 3, num_protos - 1)))).r
6453 );
6454 acc += dot(mask_coeff[i], val);
6455 }
6456 float mask = acc > 0.0 ? 1.0 : 0.0;
6457 color = vec4(mask, 0.0, 0.0, 1.0);
6458}
6459"
6460}
6461
6462fn generate_color_shader() -> &'static str {
6463 "\
6464#version 300 es
6465precision mediump float;
6466uniform vec4 colors[20];
6467uniform int class_index;
6468
6469out vec4 color;
6470void main() {
6471 int index = class_index % 20;
6472 color = colors[index];
6473}
6474"
6475}
6476
6477fn generate_packed_rgba8_shader_2d() -> &'static str {
6485 "\
6486#version 300 es
6487precision highp float;
6488precision highp int;
6489uniform sampler2D tex;
6490out vec4 color;
6491void main() {
6492 // gl_FragCoord is at pixel center (n+0.5). Use floor() for robust
6493 // integer pixel index on all GPUs (Vivante, Mali, Adreno).
6494 int out_x = int(floor(gl_FragCoord.x));
6495 int out_y = int(floor(gl_FragCoord.y));
6496 int base = out_x * 4;
6497 // 4 consecutive byte indices map to at most 2 source pixels
6498 int px0 = base / 3;
6499 int px1 = (base + 3) / 3;
6500 vec4 s0 = texelFetch(tex, ivec2(px0, out_y), 0);
6501 vec4 s1 = (px1 != px0) ? texelFetch(tex, ivec2(px1, out_y), 0) : s0;
6502 // Extract channels based on phase (base % 3)
6503 int phase = base - px0 * 3;
6504 if (phase == 0) {
6505 color = vec4(s0.r, s0.g, s0.b, s1.r);
6506 } else if (phase == 1) {
6507 color = vec4(s0.g, s0.b, s1.r, s1.g);
6508 } else {
6509 color = vec4(s0.b, s1.r, s1.g, s1.b);
6510 }
6511}
6512"
6513}
6514
6515fn generate_packed_rgba8_int8_shader_2d() -> &'static str {
6521 "\
6522#version 300 es
6523precision highp float;
6524precision highp int;
6525uniform sampler2D tex;
6526out vec4 color;
6527
6528vec4 int8_bias(vec4 v) {
6529 vec4 q = floor(v * 255.0 + 0.5);
6530 return mod(q + 128.0, 256.0) / 255.0;
6531}
6532
6533void main() {
6534 // gl_FragCoord is at pixel center (n+0.5). Use floor() for robust
6535 // integer pixel index on all GPUs (Vivante, Mali, Adreno).
6536 int out_x = int(floor(gl_FragCoord.x));
6537 int out_y = int(floor(gl_FragCoord.y));
6538 int base = out_x * 4;
6539 // 4 consecutive byte indices map to at most 2 source pixels
6540 int px0 = base / 3;
6541 int px1 = (base + 3) / 3;
6542 vec4 s0 = texelFetch(tex, ivec2(px0, out_y), 0);
6543 vec4 s1 = (px1 != px0) ? texelFetch(tex, ivec2(px1, out_y), 0) : s0;
6544 // Extract channels based on phase (base % 3), then apply int8 bias
6545 int phase = base - px0 * 3;
6546 if (phase == 0) {
6547 color = int8_bias(vec4(s0.r, s0.g, s0.b, s1.r));
6548 } else if (phase == 1) {
6549 color = int8_bias(vec4(s0.g, s0.b, s1.r, s1.g));
6550 } else {
6551 color = int8_bias(vec4(s0.b, s1.r, s1.g, s1.b));
6552 }
6553}
6554"
6555}
6556
6557#[cfg(test)]
6558#[cfg(feature = "opengl")]
6559mod gl_tests {
6560 use super::*;
6561 use crate::{TensorImage, RGBA};
6562 #[cfg(feature = "dma_test_formats")]
6563 use crate::{NV12, YUYV};
6564 use edgefirst_tensor::TensorTrait;
6565 #[cfg(feature = "dma_test_formats")]
6566 use edgefirst_tensor::{is_dma_available, TensorMapTrait, TensorMemory};
6567 use image::buffer::ConvertBuffer;
6568 use ndarray::Array3;
6569
6570 #[test]
6571 fn test_segmentation() {
6572 use edgefirst_decoder::Segmentation;
6573
6574 if !is_opengl_available() {
6575 eprintln!("SKIPPED: {} - OpenGL not available", function!());
6576 return;
6577 }
6578
6579 let mut image = TensorImage::load(
6580 include_bytes!("../../../testdata/giraffe.jpg"),
6581 Some(RGBA),
6582 None,
6583 )
6584 .unwrap();
6585
6586 let mut segmentation = Array3::from_shape_vec(
6587 (2, 160, 160),
6588 include_bytes!("../../../testdata/modelpack_seg_2x160x160.bin").to_vec(),
6589 )
6590 .unwrap();
6591 segmentation.swap_axes(0, 1);
6592 segmentation.swap_axes(1, 2);
6593 let segmentation = segmentation.as_standard_layout().to_owned();
6594
6595 let seg = Segmentation {
6596 segmentation,
6597 xmin: 0.0,
6598 ymin: 0.0,
6599 xmax: 1.0,
6600 ymax: 1.0,
6601 };
6602
6603 let mut renderer = GLProcessorThreaded::new(None).unwrap();
6604 renderer.draw_masks(&mut image, &[], &[seg]).unwrap();
6605 }
6606
6607 #[test]
6608 fn test_segmentation_mem() {
6609 use edgefirst_decoder::Segmentation;
6610
6611 if !is_opengl_available() {
6612 eprintln!("SKIPPED: {} - OpenGL not available", function!());
6613 return;
6614 }
6615
6616 let mut image = TensorImage::load(
6617 include_bytes!("../../../testdata/giraffe.jpg"),
6618 Some(RGBA),
6619 Some(edgefirst_tensor::TensorMemory::Mem),
6620 )
6621 .unwrap();
6622
6623 let mut segmentation = Array3::from_shape_vec(
6624 (2, 160, 160),
6625 include_bytes!("../../../testdata/modelpack_seg_2x160x160.bin").to_vec(),
6626 )
6627 .unwrap();
6628 segmentation.swap_axes(0, 1);
6629 segmentation.swap_axes(1, 2);
6630 let segmentation = segmentation.as_standard_layout().to_owned();
6631
6632 let seg = Segmentation {
6633 segmentation,
6634 xmin: 0.0,
6635 ymin: 0.0,
6636 xmax: 1.0,
6637 ymax: 1.0,
6638 };
6639
6640 let mut renderer = GLProcessorThreaded::new(None).unwrap();
6641 renderer.draw_masks(&mut image, &[], &[seg]).unwrap();
6642 }
6643
6644 #[test]
6645 fn test_segmentation_yolo() {
6646 use edgefirst_decoder::Segmentation;
6647 use ndarray::Array3;
6648
6649 if !is_opengl_available() {
6650 eprintln!("SKIPPED: {} - OpenGL not available", function!());
6651 return;
6652 }
6653
6654 let mut image = TensorImage::load(
6655 include_bytes!("../../../testdata/giraffe.jpg"),
6656 Some(RGBA),
6657 None,
6658 )
6659 .unwrap();
6660
6661 let segmentation = Array3::from_shape_vec(
6662 (76, 55, 1),
6663 include_bytes!("../../../testdata/yolov8_seg_crop_76x55.bin").to_vec(),
6664 )
6665 .unwrap();
6666
6667 let detect = DetectBox {
6668 bbox: [0.59375, 0.25, 0.9375, 0.725].into(),
6669 score: 0.99,
6670 label: 1,
6671 };
6672
6673 let seg = Segmentation {
6674 segmentation,
6675 xmin: 0.59375,
6676 ymin: 0.25,
6677 xmax: 0.9375,
6678 ymax: 0.725,
6679 };
6680
6681 let mut renderer = GLProcessorThreaded::new(None).unwrap();
6682 renderer
6683 .set_class_colors(&[[255, 255, 0, 233], [128, 128, 255, 100]])
6684 .unwrap();
6685 renderer.draw_masks(&mut image, &[detect], &[seg]).unwrap();
6686
6687 let expected = TensorImage::load(
6688 include_bytes!("../../../testdata/output_render_gl.jpg"),
6689 Some(RGBA),
6690 None,
6691 )
6692 .unwrap();
6693
6694 compare_images(&image, &expected, 0.99, function!());
6695 }
6696
6697 #[test]
6698 fn test_boxes() {
6699 use edgefirst_decoder::DetectBox;
6700
6701 if !is_opengl_available() {
6702 eprintln!("SKIPPED: {} - OpenGL not available", function!());
6703 return;
6704 }
6705
6706 let mut image = TensorImage::load(
6707 include_bytes!("../../../testdata/giraffe.jpg"),
6708 Some(RGBA),
6709 None,
6710 )
6711 .unwrap();
6712
6713 let detect = DetectBox {
6714 bbox: [0.59375, 0.25, 0.9375, 0.725].into(),
6715 score: 0.99,
6716 label: 0,
6717 };
6718 let mut renderer = GLProcessorThreaded::new(None).unwrap();
6719 renderer
6720 .set_class_colors(&[[255, 255, 0, 233], [128, 128, 255, 100]])
6721 .unwrap();
6722 renderer.draw_masks(&mut image, &[detect], &[]).unwrap();
6723 }
6724
6725 static GL_AVAILABLE: std::sync::OnceLock<bool> = std::sync::OnceLock::new();
6726 fn is_opengl_available() -> bool {
6728 #[cfg(all(target_os = "linux", feature = "opengl"))]
6729 {
6730 *GL_AVAILABLE.get_or_init(|| GLProcessorThreaded::new(None).is_ok())
6731 }
6732
6733 #[cfg(not(all(target_os = "linux", feature = "opengl")))]
6734 {
6735 false
6736 }
6737 }
6738
6739 fn compare_images(img1: &TensorImage, img2: &TensorImage, threshold: f64, name: &str) {
6740 assert_eq!(img1.height(), img2.height(), "Heights differ");
6741 assert_eq!(img1.width(), img2.width(), "Widths differ");
6742 assert_eq!(img1.fourcc(), img2.fourcc(), "FourCC differ");
6743 assert!(
6744 matches!(img1.fourcc(), RGB | RGBA | GREY | PLANAR_RGB),
6745 "FourCC must be RGB or RGBA for comparison"
6746 );
6747
6748 let image1 = match img1.fourcc() {
6749 RGB => image::RgbImage::from_vec(
6750 img1.width() as u32,
6751 img1.height() as u32,
6752 img1.tensor().map().unwrap().to_vec(),
6753 )
6754 .unwrap(),
6755 RGBA => image::RgbaImage::from_vec(
6756 img1.width() as u32,
6757 img1.height() as u32,
6758 img1.tensor().map().unwrap().to_vec(),
6759 )
6760 .unwrap()
6761 .convert(),
6762 GREY => image::GrayImage::from_vec(
6763 img1.width() as u32,
6764 img1.height() as u32,
6765 img1.tensor().map().unwrap().to_vec(),
6766 )
6767 .unwrap()
6768 .convert(),
6769 PLANAR_RGB => image::GrayImage::from_vec(
6770 img1.width() as u32,
6771 (img1.height() * 3) as u32,
6772 img1.tensor().map().unwrap().to_vec(),
6773 )
6774 .unwrap()
6775 .convert(),
6776 _ => return,
6777 };
6778
6779 let image2 = match img2.fourcc() {
6780 RGB => image::RgbImage::from_vec(
6781 img2.width() as u32,
6782 img2.height() as u32,
6783 img2.tensor().map().unwrap().to_vec(),
6784 )
6785 .unwrap(),
6786 RGBA => image::RgbaImage::from_vec(
6787 img2.width() as u32,
6788 img2.height() as u32,
6789 img2.tensor().map().unwrap().to_vec(),
6790 )
6791 .unwrap()
6792 .convert(),
6793 GREY => image::GrayImage::from_vec(
6794 img2.width() as u32,
6795 img2.height() as u32,
6796 img2.tensor().map().unwrap().to_vec(),
6797 )
6798 .unwrap()
6799 .convert(),
6800 PLANAR_RGB => image::GrayImage::from_vec(
6801 img2.width() as u32,
6802 (img2.height() * 3) as u32,
6803 img2.tensor().map().unwrap().to_vec(),
6804 )
6805 .unwrap()
6806 .convert(),
6807 _ => return,
6808 };
6809
6810 let similarity = image_compare::rgb_similarity_structure(
6811 &image_compare::Algorithm::RootMeanSquared,
6812 &image1,
6813 &image2,
6814 )
6815 .expect("Image Comparison failed");
6816 if similarity.score < threshold {
6817 similarity
6820 .image
6821 .to_color_map()
6822 .save(format!("{name}.png"))
6823 .unwrap();
6824 panic!(
6825 "{name}: converted image and target image have similarity score too low: {} < {}",
6826 similarity.score, threshold
6827 )
6828 }
6829 }
6830
6831 #[cfg(feature = "dma_test_formats")]
6838 fn load_raw_image(
6839 width: usize,
6840 height: usize,
6841 fourcc: FourCharCode,
6842 memory: Option<TensorMemory>,
6843 bytes: &[u8],
6844 ) -> Result<TensorImage, crate::Error> {
6845 let img = TensorImage::new(width, height, fourcc, memory)?;
6846 let mut map = img.tensor().map()?;
6847 map.as_mut_slice()[..bytes.len()].copy_from_slice(bytes);
6848 Ok(img)
6849 }
6850
6851 #[test]
6853 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
6854 fn test_opengl_nv12_to_rgba_reference() {
6855 if !is_dma_available() {
6856 return;
6857 }
6858 let src = load_raw_image(
6860 1280,
6861 720,
6862 NV12,
6863 Some(TensorMemory::Dma),
6864 include_bytes!("../../../testdata/camera720p.nv12"),
6865 )
6866 .unwrap();
6867
6868 let reference = load_raw_image(
6870 1280,
6871 720,
6872 RGBA,
6873 None,
6874 include_bytes!("../../../testdata/camera720p.rgba"),
6875 )
6876 .unwrap();
6877
6878 let mut dst = TensorImage::new(1280, 720, RGBA, Some(TensorMemory::Dma)).unwrap();
6880 let mut gl = GLProcessorThreaded::new(None).unwrap();
6881 gl.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::no_crop())
6882 .unwrap();
6883
6884 let cpu_dst = TensorImage::new(1280, 720, RGBA, None).unwrap();
6886 cpu_dst
6887 .tensor()
6888 .map()
6889 .unwrap()
6890 .as_mut_slice()
6891 .copy_from_slice(dst.tensor().map().unwrap().as_slice());
6892
6893 compare_images(&reference, &cpu_dst, 0.98, "opengl_nv12_to_rgba_reference");
6894 }
6895
6896 #[test]
6898 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
6899 fn test_opengl_yuyv_to_rgba_reference() {
6900 if !is_dma_available() {
6901 return;
6902 }
6903 let src = load_raw_image(
6905 1280,
6906 720,
6907 YUYV,
6908 Some(TensorMemory::Dma),
6909 include_bytes!("../../../testdata/camera720p.yuyv"),
6910 )
6911 .unwrap();
6912
6913 let reference = load_raw_image(
6915 1280,
6916 720,
6917 RGBA,
6918 None,
6919 include_bytes!("../../../testdata/camera720p.rgba"),
6920 )
6921 .unwrap();
6922
6923 let mut dst = TensorImage::new(1280, 720, RGBA, Some(TensorMemory::Dma)).unwrap();
6925 let mut gl = GLProcessorThreaded::new(None).unwrap();
6926 gl.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::no_crop())
6927 .unwrap();
6928
6929 let cpu_dst = TensorImage::new(1280, 720, RGBA, None).unwrap();
6931 cpu_dst
6932 .tensor()
6933 .map()
6934 .unwrap()
6935 .as_mut_slice()
6936 .copy_from_slice(dst.tensor().map().unwrap().as_slice());
6937
6938 compare_images(&reference, &cpu_dst, 0.98, "opengl_yuyv_to_rgba_reference");
6939 }
6940
6941 #[test]
6952 fn test_probe_egl_displays() {
6953 let displays = match probe_egl_displays() {
6954 Ok(d) => d,
6955 Err(e) => {
6956 eprintln!("SKIPPED: {} - EGL not available: {e:?}", function!());
6957 return;
6958 }
6959 };
6960
6961 if displays.is_empty() {
6962 eprintln!("SKIPPED: {} - No EGL displays available", function!());
6963 return;
6964 }
6965
6966 let kinds: Vec<_> = displays.iter().map(|d| d.kind).collect();
6967 eprintln!("Probed EGL displays: {kinds:?}");
6968 for d in &displays {
6969 eprintln!(" {:?}: {}", d.kind, d.description);
6970 }
6971
6972 let priority = |k: &EglDisplayKind| match k {
6976 EglDisplayKind::PlatformDevice => 0,
6977 EglDisplayKind::Gbm => 1,
6978 EglDisplayKind::Default => 2,
6979 };
6980 for w in kinds.windows(2) {
6981 assert!(
6982 priority(&w[0]) < priority(&w[1]),
6983 "Display ordering violated: {:?} should come after {:?}",
6984 w[1],
6985 w[0],
6986 );
6987 }
6988 }
6989
6990 #[test]
6994 fn test_override_each_display_kind() {
6995 let displays = match probe_egl_displays() {
6996 Ok(d) => d,
6997 Err(e) => {
6998 eprintln!("SKIPPED: {} - EGL not available: {e:?}", function!());
6999 return;
7000 }
7001 };
7002
7003 if displays.is_empty() {
7004 eprintln!("SKIPPED: {} - No EGL displays available", function!());
7005 return;
7006 }
7007
7008 for display in &displays {
7009 eprintln!(
7010 "Testing override: {:?} ({})",
7011 display.kind, display.description
7012 );
7013 let mut gl = GLProcessorThreaded::new(Some(display.kind)).unwrap_or_else(|e| {
7014 panic!(
7015 "GLProcessorThreaded::new(Some({:?})) failed: {e:?}",
7016 display.kind
7017 )
7018 });
7019
7020 let src = TensorImage::load(
7023 include_bytes!("../../../testdata/zidane.jpg"),
7024 Some(RGBA),
7025 None,
7026 )
7027 .unwrap();
7028 let mut dst = TensorImage::new(320, 240, RGBA, None).unwrap();
7029 gl.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::no_crop())
7030 .unwrap_or_else(|e| {
7031 panic!("convert() with {:?} display failed: {e:?}", display.kind)
7032 });
7033 eprintln!(" {:?} display: convert OK", display.kind);
7034 }
7035 }
7036
7037 #[test]
7040 fn test_override_unavailable_display_errors() {
7041 let displays = match probe_egl_displays() {
7042 Ok(d) => d,
7043 Err(e) => {
7044 eprintln!("SKIPPED: {} - EGL not available: {e:?}", function!());
7045 return;
7046 }
7047 };
7048 let available_kinds: Vec<_> = displays.iter().map(|d| d.kind).collect();
7049
7050 let unavailable = [
7053 EglDisplayKind::PlatformDevice,
7054 EglDisplayKind::Gbm,
7055 EglDisplayKind::Default,
7056 ]
7057 .into_iter()
7058 .find(|k| !available_kinds.contains(k));
7059
7060 if let Some(kind) = unavailable {
7061 eprintln!("Testing override with unavailable kind: {kind:?}");
7062 let result = GLProcessorThreaded::new(Some(kind));
7063 assert!(
7064 result.is_err(),
7065 "Expected error for unavailable display kind {kind:?}, got Ok"
7066 );
7067 eprintln!(" Correctly returned error: {:?}", result.unwrap_err());
7068 } else {
7069 eprintln!(
7070 "SKIPPED: {} - All three display kinds are available",
7071 function!()
7072 );
7073 }
7074 }
7075
7076 #[test]
7079 fn test_auto_detect_display() {
7080 if !is_opengl_available() {
7081 eprintln!("SKIPPED: {} - OpenGL not available", function!());
7082 return;
7083 }
7084
7085 let mut gl = GLProcessorThreaded::new(None).expect("auto-detect should succeed");
7086 let src = TensorImage::load(
7087 include_bytes!("../../../testdata/zidane.jpg"),
7088 Some(RGBA),
7089 None,
7090 )
7091 .unwrap();
7092 let mut dst = TensorImage::new(320, 240, RGBA, None).unwrap();
7093 gl.convert(&src, &mut dst, Rotation::None, Flip::None, Crop::no_crop())
7094 .expect("auto-detect convert should succeed");
7095 }
7096
7097 #[test]
7098 fn test_packed_rgb_width_constraint() {
7099 assert_eq!((640usize * 3) % 4, 0);
7101 assert_eq!((320usize * 3) % 4, 0);
7102 assert_eq!((1280usize * 3) % 4, 0);
7103
7104 assert_ne!((322usize * 3) % 4, 0);
7106 assert_ne!((333usize * 3) % 4, 0);
7107 }
7108
7109 #[cfg(feature = "dma_test_formats")]
7120 fn assert_pixels_match(expected: &[u8], actual: &[u8], tolerance: u8) {
7121 assert_eq!(expected.len(), actual.len(), "Buffer size mismatch");
7122 let mut max_diff: u8 = 0;
7123 let mut diff_count: usize = 0;
7124 let mut first_diff_idx = None;
7125 for (i, (&e, &a)) in expected.iter().zip(actual.iter()).enumerate() {
7126 let diff = (e as i16 - a as i16).unsigned_abs() as u8;
7127 if diff > tolerance {
7128 diff_count += 1;
7129 if first_diff_idx.is_none() {
7130 first_diff_idx = Some(i);
7131 }
7132 }
7133 max_diff = max_diff.max(diff);
7134 }
7135 assert!(
7136 diff_count == 0,
7137 "Pixel mismatch: {diff_count} bytes differ (max_diff={max_diff}, first at index {})",
7138 first_diff_idx.unwrap_or(0)
7139 );
7140 }
7141
7142 #[cfg(feature = "dma_test_formats")]
7144 fn letterbox_crop(src_w: usize, src_h: usize, dst_w: usize, dst_h: usize) -> Crop {
7145 let src_aspect = src_w as f64 / src_h as f64;
7146 let dst_aspect = dst_w as f64 / dst_h as f64;
7147 let (new_w, new_h) = if src_aspect > dst_aspect {
7148 let new_h = (dst_w as f64 / src_aspect).round() as usize;
7149 (dst_w, new_h)
7150 } else {
7151 let new_w = (dst_h as f64 * src_aspect).round() as usize;
7152 (new_w, dst_h)
7153 };
7154 let left = (dst_w - new_w) / 2;
7155 let top = (dst_h - new_h) / 2;
7156 Crop::new()
7157 .with_dst_rect(Some(crate::Rect::new(left, top, new_w, new_h)))
7158 .with_dst_color(Some([114, 114, 114, 255]))
7159 }
7160
7161 #[cfg(feature = "dma_test_formats")]
7163 fn rgba_to_rgb(rgba: &[u8]) -> Vec<u8> {
7164 assert_eq!(
7165 rgba.len() % 4,
7166 0,
7167 "RGBA buffer length must be divisible by 4"
7168 );
7169 let mut rgb = Vec::with_capacity(rgba.len() / 4 * 3);
7170 for pixel in rgba.chunks_exact(4) {
7171 rgb.push(pixel[0]);
7172 rgb.push(pixel[1]);
7173 rgb.push(pixel[2]);
7174 }
7175 rgb
7176 }
7177
7178 #[cfg(feature = "dma_test_formats")]
7180 fn uint8_to_int8(data: &[u8]) -> Vec<u8> {
7181 data.iter().map(|&b| b ^ 0x80).collect()
7182 }
7183
7184 #[test]
7187 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7188 fn test_opengl_rgb_correctness() {
7189 if !is_dma_available() {
7190 return;
7191 }
7192 let src_dma = load_raw_image(
7193 1920,
7194 1080,
7195 YUYV,
7196 Some(TensorMemory::Dma),
7197 include_bytes!("../../../testdata/camera1080p.yuyv"),
7198 )
7199 .unwrap();
7200
7201 let crop = letterbox_crop(1920, 1080, 640, 640);
7202 let mut gl = GLProcessorThreaded::new(None).unwrap();
7203
7204 let mut dst_rgba = TensorImage::new(640, 640, RGBA, Some(TensorMemory::Dma)).unwrap();
7206 gl.convert(&src_dma, &mut dst_rgba, Rotation::None, Flip::None, crop)
7207 .unwrap();
7208
7209 let mut dst_rgb = TensorImage::new(640, 640, RGB, Some(TensorMemory::Dma)).unwrap();
7211 gl.convert(&src_dma, &mut dst_rgb, Rotation::None, Flip::None, crop)
7212 .unwrap();
7213
7214 let rgba_data = dst_rgba.tensor().map().unwrap();
7215 let expected_rgb = rgba_to_rgb(rgba_data.as_slice());
7216 let gl_data = dst_rgb.tensor().map().unwrap();
7217 assert_pixels_match(&expected_rgb, gl_data.as_slice(), 1);
7218 }
7219
7220 #[test]
7223 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7224 fn test_opengl_rgb_int8_correctness() {
7225 if !is_dma_available() {
7226 return;
7227 }
7228 let src_dma = load_raw_image(
7229 1920,
7230 1080,
7231 YUYV,
7232 Some(TensorMemory::Dma),
7233 include_bytes!("../../../testdata/camera1080p.yuyv"),
7234 )
7235 .unwrap();
7236
7237 let crop = letterbox_crop(1920, 1080, 640, 640);
7238 let mut gl = match GLProcessorST::new(None) {
7244 Ok(gl) => gl,
7245 Err(e) => {
7246 eprintln!("SKIPPED: {} - GL not available: {e}", function!());
7247 return;
7248 }
7249 };
7250 gl.support_rgb_direct = false;
7251
7252 let mut dst_rgba = TensorImage::new(640, 640, RGBA, Some(TensorMemory::Dma)).unwrap();
7254 gl.convert(&src_dma, &mut dst_rgba, Rotation::None, Flip::None, crop)
7255 .unwrap();
7256
7257 let mut dst_rgb = TensorImage::new(640, 640, RGB_INT8, Some(TensorMemory::Dma)).unwrap();
7259 gl.convert(&src_dma, &mut dst_rgb, Rotation::None, Flip::None, crop)
7260 .unwrap();
7261
7262 let rgba_data = dst_rgba.tensor().map().unwrap();
7263 let expected_rgb = uint8_to_int8(&rgba_to_rgb(rgba_data.as_slice()));
7264 let gl_data = dst_rgb.tensor().map().unwrap();
7265 assert_pixels_match(&expected_rgb, gl_data.as_slice(), 1);
7266 }
7267
7268 #[test]
7271 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7272 fn test_opengl_rgb_no_letterbox_correctness() {
7273 if !is_dma_available() {
7274 return;
7275 }
7276 let src_dma = load_raw_image(
7277 1920,
7278 1080,
7279 YUYV,
7280 Some(TensorMemory::Dma),
7281 include_bytes!("../../../testdata/camera1080p.yuyv"),
7282 )
7283 .unwrap();
7284
7285 let mut gl = GLProcessorThreaded::new(None).unwrap();
7286
7287 let mut dst_rgba = TensorImage::new(1920, 1080, RGBA, Some(TensorMemory::Dma)).unwrap();
7289 gl.convert(
7290 &src_dma,
7291 &mut dst_rgba,
7292 Rotation::None,
7293 Flip::None,
7294 Crop::no_crop(),
7295 )
7296 .unwrap();
7297
7298 let mut dst_rgb = TensorImage::new(1920, 1080, RGB, Some(TensorMemory::Dma)).unwrap();
7300 gl.convert(
7301 &src_dma,
7302 &mut dst_rgb,
7303 Rotation::None,
7304 Flip::None,
7305 Crop::no_crop(),
7306 )
7307 .unwrap();
7308
7309 let rgba_data = dst_rgba.tensor().map().unwrap();
7310 let expected_rgb = rgba_to_rgb(rgba_data.as_slice());
7311 let gl_data = dst_rgb.tensor().map().unwrap();
7312 assert_pixels_match(&expected_rgb, gl_data.as_slice(), 1);
7313 }
7314
7315 #[test]
7323 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7324 fn test_probe_rgb_direct_support() {
7325 if !is_dma_available() {
7326 eprintln!("SKIPPED: {} - DMA not available", function!());
7327 return;
7328 }
7329 let gl = match GLProcessorST::new(None) {
7330 Ok(gl) => gl,
7331 Err(e) => {
7332 eprintln!("SKIPPED: {} - GL not available: {e}", function!());
7333 return;
7334 }
7335 };
7336 eprintln!(
7338 "support_rgb_direct = {} (probe completed without crash)",
7339 gl.support_rgb_direct
7340 );
7341 }
7342
7343 #[test]
7346 #[cfg(all(target_os = "linux", feature = "dma_test_formats"))]
7347 fn test_opengl_rgb_direct_matches_two_pass() {
7348 if !is_dma_available() {
7349 eprintln!("SKIPPED: {} - DMA not available", function!());
7350 return;
7351 }
7352 let mut gl = match GLProcessorST::new(None) {
7353 Ok(gl) => gl,
7354 Err(e) => {
7355 eprintln!("SKIPPED: {} - GL not available: {e}", function!());
7356 return;
7357 }
7358 };
7359 if !gl.support_rgb_direct {
7360 eprintln!("SKIPPED: {} - GPU does not support direct RGB", function!());
7361 return;
7362 }
7363
7364 let src = TensorImage::new(640, 480, RGBA, Some(TensorMemory::Dma)).unwrap();
7368 {
7369 let mut map = src.tensor().map().unwrap();
7370 for (i, byte) in map.as_mut_slice().iter_mut().enumerate() {
7371 *byte = (i % 251) as u8; }
7373 }
7374
7375 let crop = crate::Crop {
7376 src_rect: None,
7377 dst_rect: None,
7378 dst_color: None,
7379 };
7380
7381 let mut dst_direct = TensorImage::new(320, 320, RGB, Some(TensorMemory::Dma)).unwrap();
7383 gl.convert(&src, &mut dst_direct, Rotation::None, Flip::None, crop)
7384 .unwrap();
7385
7386 gl.support_rgb_direct = false;
7388 let mut dst_twop = TensorImage::new(320, 320, RGB, Some(TensorMemory::Dma)).unwrap();
7389 gl.convert(&src, &mut dst_twop, Rotation::None, Flip::None, crop)
7390 .unwrap();
7391 gl.support_rgb_direct = true;
7392
7393 let map_direct = dst_direct.tensor().map().unwrap();
7395 let map_twop = dst_twop.tensor().map().unwrap();
7396 let mut max_diff = 0i32;
7398 for (a, b) in map_direct.as_slice().iter().zip(map_twop.as_slice().iter()) {
7399 let diff = (*a as i32 - *b as i32).abs();
7400 max_diff = max_diff.max(diff);
7401 }
7402 eprintln!("RGB direct vs two-pass max pixel diff: {max_diff}");
7403 assert!(max_diff <= 1, "Pixel mismatch > 1: max_diff={max_diff}");
7404 }
7405}