use crate::Error;
use edgefirst_tensor::PixelFormat;
use gbm::drm::buffer::DrmFourcc;
use log::error;
use std::ffi::{c_char, CString};
use std::ptr::null;
use std::str::FromStr;
pub(super) fn compile_shader_from_str(
shader: u32,
shader_source: &str,
shader_name: &str,
) -> Result<(), ()> {
let src = match CString::from_str(shader_source) {
Ok(v) => v,
Err(_) => return Err(()),
};
let src_ptr = src.as_ptr();
unsafe {
gls::gl::ShaderSource(shader, 1, &raw const src_ptr, null());
gls::gl::CompileShader(shader);
let mut is_compiled = 0;
gls::gl::GetShaderiv(shader, gls::gl::COMPILE_STATUS, &raw mut is_compiled);
if is_compiled == 0 {
let mut max_length = 0;
gls::gl::GetShaderiv(shader, gls::gl::INFO_LOG_LENGTH, &raw mut max_length);
let mut error_log: Vec<u8> = vec![0; max_length as usize];
gls::gl::GetShaderInfoLog(
shader,
max_length,
&raw mut max_length,
error_log.as_mut_ptr() as *mut c_char,
);
let msg = CString::from_vec_with_nul(error_log)
.ok()
.and_then(|c| c.into_string().ok())
.unwrap_or_else(|| "<non-UTF8 shader log>".to_string());
error!("Shader '{}' failed: {:?}\n", shader_name, msg);
gls::gl::DeleteShader(shader);
return Err(());
}
Ok(())
}
}
pub(super) fn check_gl_error(name: &str, line: u32) -> Result<(), Error> {
unsafe {
let err = gls::gl::GetError();
if err != gls::gl::NO_ERROR {
error!("GL Error: {name}:{line}: {err:#X}");
return Err(Error::OpenGl(format!("{err:#X}")));
}
}
Ok(())
}
pub(super) fn pixel_format_to_drm(fmt: PixelFormat) -> Result<DrmFourcc, Error> {
match fmt {
PixelFormat::Rgba => Ok(DrmFourcc::Abgr8888),
PixelFormat::Bgra => Ok(DrmFourcc::Argb8888),
PixelFormat::Yuyv => Ok(DrmFourcc::Yuyv),
PixelFormat::Vyuy => Ok(DrmFourcc::Vyuy),
PixelFormat::Rgb => Ok(DrmFourcc::Bgr888),
PixelFormat::Grey => Ok(DrmFourcc::R8),
PixelFormat::Nv12 => Ok(DrmFourcc::Nv12),
PixelFormat::PlanarRgb => Ok(DrmFourcc::R8),
_ => Err(Error::NotSupported(format!(
"PixelFormat {fmt:?} has no DRM format mapping"
))),
}
}
pub(super) fn generate_vertex_shader() -> &'static str {
"\
#version 300 es
precision mediump float;
layout(location = 0) in vec3 pos;
layout(location = 1) in vec2 texCoord;
out vec3 fragPos;
out vec2 tc;
void main() {
fragPos = pos;
tc = texCoord;
gl_Position = vec4(pos, 1.0);
}
"
}
pub(super) fn generate_texture_fragment_shader() -> &'static str {
"\
#version 300 es
precision mediump float;
uniform sampler2D tex;
in vec3 fragPos;
in vec2 tc;
out vec4 color;
void main(){
color = texture(tex, tc);
}
"
}
pub(super) fn generate_texture_fragment_shader_yuv() -> &'static str {
"\
#version 300 es
#extension GL_OES_EGL_image_external_essl3 : require
precision mediump float;
uniform samplerExternalOES tex;
in vec3 fragPos;
in vec2 tc;
out vec4 color;
void main(){
color = texture(tex, tc);
}
"
}
pub(super) fn generate_planar_rgb_shader() -> &'static str {
"\
#version 300 es
#extension GL_OES_EGL_image_external_essl3 : require
precision mediump float;
uniform samplerExternalOES tex;
in vec3 fragPos;
in vec2 tc;
out vec4 color;
void main(){
color = texture(tex, tc);
}
"
}
pub(super) fn generate_texture_int8_shader() -> &'static str {
"\
#version 300 es
precision highp float;
uniform sampler2D tex;
in vec3 fragPos;
in vec2 tc;
out vec4 color;
// XOR 0x80 bias: quantize to uint8, add 128 mod 256, normalize back.
// This matches the CPU `byte ^ 0x80` operation exactly.
vec3 int8_bias(vec3 v) {
vec3 q = floor(v * 255.0 + 0.5);
return mod(q + 128.0, 256.0) / 255.0;
}
void main(){
vec4 c = texture(tex, tc);
color = vec4(int8_bias(c.rgb), c.a);
}
"
}
pub(super) fn generate_texture_int8_shader_yuv() -> &'static str {
"\
#version 300 es
#extension GL_OES_EGL_image_external_essl3 : require
precision highp float;
uniform samplerExternalOES tex;
in vec3 fragPos;
in vec2 tc;
out vec4 color;
vec3 int8_bias(vec3 v) {
vec3 q = floor(v * 255.0 + 0.5);
return mod(q + 128.0, 256.0) / 255.0;
}
void main(){
vec4 c = texture(tex, tc);
color = vec4(int8_bias(c.rgb), c.a);
}
"
}
pub(super) fn generate_planar_rgb_int8_shader() -> &'static str {
"\
#version 300 es
#extension GL_OES_EGL_image_external_essl3 : require
precision highp float;
uniform samplerExternalOES tex;
in vec3 fragPos;
in vec2 tc;
out vec4 color;
vec3 int8_bias(vec3 v) {
vec3 q = floor(v * 255.0 + 0.5);
return mod(q + 128.0, 256.0) / 255.0;
}
void main(){
vec4 c = texture(tex, tc);
color = vec4(int8_bias(c.rgb), c.a);
}
"
}
pub(super) fn generate_planar_rgb_shader_2d() -> &'static str {
"\
#version 300 es
precision mediump float;
uniform sampler2D tex;
in vec3 fragPos;
in vec2 tc;
out vec4 color;
void main(){
color = texture(tex, tc);
}
"
}
pub(super) fn generate_planar_rgb_int8_shader_2d() -> &'static str {
"\
#version 300 es
precision highp float;
uniform sampler2D tex;
in vec3 fragPos;
in vec2 tc;
out vec4 color;
vec3 int8_bias(vec3 v) {
vec3 q = floor(v * 255.0 + 0.5);
return mod(q + 128.0, 256.0) / 255.0;
}
void main(){
vec4 c = texture(tex, tc);
color = vec4(int8_bias(c.rgb), c.a);
}
"
}
pub(super) fn generate_segmentation_shader() -> &'static str {
"\
#version 300 es
precision mediump float;
precision mediump sampler2DArray;
uniform sampler2DArray tex;
uniform vec4 colors[20];
uniform int background_index;
uniform float opacity;
in vec3 fragPos;
in vec2 tc;
in vec4 fragColor;
out vec4 color;
float max_arg(const in vec4 args, out int argmax) {
if (args[0] >= args[1] && args[0] >= args[2] && args[0] >= args[3]) {
argmax = 0;
return args[0];
}
if (args[1] >= args[0] && args[1] >= args[2] && args[1] >= args[3]) {
argmax = 1;
return args[1];
}
if (args[2] >= args[0] && args[2] >= args[1] && args[2] >= args[3]) {
argmax = 2;
return args[2];
}
argmax = 3;
return args[3];
}
void main() {
mediump int layers = textureSize(tex, 0).z;
float max_all = -4.0;
int max_ind = 0;
for (int i = 0; i < layers; i++) {
vec4 d = texture(tex, vec3(tc, i));
int max_ind_ = 0;
float max_ = max_arg(d, max_ind_);
if (max_ <= max_all) { continue; }
max_all = max_;
max_ind = i*4 + max_ind_;
}
if (max_ind == background_index) {
discard;
}
max_ind = max_ind % 20;
vec4 c = colors[max_ind];
color = vec4(c.rgb, c.a * opacity);
}
"
}
pub(super) fn generate_instanced_segmentation_shader() -> &'static str {
"\
#version 300 es
precision mediump float;
uniform sampler2D mask0;
uniform vec4 colors[20];
uniform int class_index;
uniform float opacity;
in vec3 fragPos;
in vec2 tc;
in vec4 fragColor;
out vec4 color;
void main() {
float r0 = texture(mask0, tc).r;
float edge = smoothstep(0.5, 0.65, r0);
if (edge <= 0.0) {
discard;
}
vec4 c = colors[class_index % 20];
color = vec4(c.rgb, c.a * edge * opacity);
}
"
}
pub(super) fn generate_proto_segmentation_shader() -> &'static str {
"\
#version 300 es
precision highp float;
precision highp sampler2DArray;
uniform sampler2DArray proto_tex; // ceil(num_protos/4) layers, RGBA = 4 channels per layer
uniform vec4 mask_coeff[8]; // 32 coefficients packed as 8 vec4s
uniform vec4 colors[20];
uniform int class_index;
uniform int num_layers;
uniform float opacity;
in vec2 tc;
out vec4 color;
void main() {
float acc = 0.0;
for (int i = 0; i < num_layers; i++) {
// texture() returns bilinearly interpolated proto values (GL_LINEAR)
acc += dot(mask_coeff[i], texture(proto_tex, vec3(tc, float(i))));
}
float mask = 1.0 / (1.0 + exp(-acc)); // sigmoid
if (mask < 0.5) discard;
vec4 c = colors[class_index % 20];
color = vec4(c.rgb, c.a * opacity);
}
"
}
pub(super) fn generate_proto_segmentation_shader_int8_nearest() -> &'static str {
"\
#version 300 es
precision highp float;
precision highp int;
precision highp isampler2DArray;
uniform isampler2DArray proto_tex; // 32 layers, R channel = 1 proto per layer
uniform vec4 mask_coeff[8]; // 32 coefficients packed as 8 vec4s
uniform vec4 colors[20];
uniform int class_index;
uniform int num_protos;
uniform float proto_scale;
uniform float proto_scaled_zp; // -zero_point * scale
uniform float opacity;
in vec2 tc;
out vec4 color;
void main() {
ivec3 tex_size = textureSize(proto_tex, 0);
int ix = clamp(int(tc.x * float(tex_size.x)), 0, tex_size.x - 1);
int iy = clamp(int(tc.y * float(tex_size.y)), 0, tex_size.y - 1);
float acc = 0.0;
for (int k = 0; k < num_protos; k++) {
float raw = float(texelFetch(proto_tex, ivec3(ix, iy, k), 0).r);
float val = raw * proto_scale + proto_scaled_zp;
acc += mask_coeff[k / 4][k % 4] * val;
}
float mask = 1.0 / (1.0 + exp(-acc));
if (mask < 0.5) discard;
vec4 c = colors[class_index % 20];
color = vec4(c.rgb, c.a * opacity);
}
"
}
pub(super) fn generate_proto_segmentation_shader_int8_bilinear() -> &'static str {
"\
#version 300 es
precision highp float;
precision highp int;
precision highp isampler2DArray;
uniform isampler2DArray proto_tex; // 32 layers, R channel = 1 proto per layer
uniform vec4 mask_coeff[8]; // 32 coefficients packed as 8 vec4s
uniform vec4 colors[20];
uniform int class_index;
uniform int num_protos;
uniform float proto_scale;
uniform float proto_scaled_zp; // -zero_point * scale
uniform float opacity;
in vec2 tc;
out vec4 color;
void main() {
ivec3 tex_size = textureSize(proto_tex, 0);
// Compute continuous position (matching GL_LINEAR convention: center at +0.5)
vec2 pos = tc * vec2(tex_size.xy) - 0.5;
vec2 f = fract(pos);
ivec2 p0 = ivec2(floor(pos));
ivec2 p1 = p0 + 1;
// Clamp to texture bounds
p0 = clamp(p0, ivec2(0), tex_size.xy - 1);
p1 = clamp(p1, ivec2(0), tex_size.xy - 1);
float w00 = (1.0 - f.x) * (1.0 - f.y);
float w10 = f.x * (1.0 - f.y);
float w01 = (1.0 - f.x) * f.y;
float w11 = f.x * f.y;
float acc = 0.0;
for (int k = 0; k < num_protos; k++) {
float r00 = float(texelFetch(proto_tex, ivec3(p0.x, p0.y, k), 0).r);
float r10 = float(texelFetch(proto_tex, ivec3(p1.x, p0.y, k), 0).r);
float r01 = float(texelFetch(proto_tex, ivec3(p0.x, p1.y, k), 0).r);
float r11 = float(texelFetch(proto_tex, ivec3(p1.x, p1.y, k), 0).r);
float interp = r00 * w00 + r10 * w10 + r01 * w01 + r11 * w11;
float val = interp * proto_scale + proto_scaled_zp;
acc += mask_coeff[k / 4][k % 4] * val;
}
float mask = 1.0 / (1.0 + exp(-acc));
if (mask < 0.5) discard;
vec4 c = colors[class_index % 20];
color = vec4(c.rgb, c.a * opacity);
}
"
}
pub(super) fn generate_proto_dequant_shader_int8() -> &'static str {
"\
#version 300 es
precision highp float;
precision highp int;
precision highp isampler2DArray;
uniform isampler2DArray proto_tex; // 32 layers of R8I (1 proto per layer)
uniform float proto_scale;
uniform float proto_scaled_zp; // -zero_point * scale
uniform int base_layer; // first proto index for this output layer (0, 4, 8, ...)
in vec2 tc;
out vec4 color;
void main() {
ivec3 tex_size = textureSize(proto_tex, 0);
int ix = clamp(int(tc.x * float(tex_size.x)), 0, tex_size.x - 1);
int iy = clamp(int(tc.y * float(tex_size.y)), 0, tex_size.y - 1);
vec4 result;
for (int c = 0; c < 4; c++) {
int layer = base_layer + c;
float raw = float(texelFetch(proto_tex, ivec3(ix, iy, layer), 0).r);
result[c] = raw * proto_scale + proto_scaled_zp;
}
color = result;
}
"
}
pub(super) fn generate_proto_segmentation_shader_f32() -> &'static str {
"\
#version 300 es
precision highp float;
precision highp sampler2DArray;
uniform sampler2DArray proto_tex; // 32 layers, R channel = 1 proto per layer
uniform vec4 mask_coeff[8]; // 32 coefficients packed as 8 vec4s
uniform vec4 colors[20];
uniform int class_index;
uniform int num_protos;
uniform float opacity;
in vec2 tc;
out vec4 color;
void main() {
float acc = 0.0;
for (int k = 0; k < num_protos; k++) {
// texture() returns bilinearly interpolated proto value (GL_LINEAR on R32F)
float val = texture(proto_tex, vec3(tc, float(k))).r;
acc += mask_coeff[k / 4][k % 4] * val;
}
float mask = 1.0 / (1.0 + exp(-acc));
if (mask < 0.5) discard;
vec4 c = colors[class_index % 20];
color = vec4(c.rgb, c.a * opacity);
}
"
}
pub(super) fn generate_color_shader() -> &'static str {
"\
#version 300 es
precision mediump float;
uniform vec4 colors[20];
uniform int class_index;
uniform float opacity;
out vec4 color;
void main() {
int index = class_index % 20;
vec4 c = colors[index];
color = vec4(c.rgb, c.a * opacity);
}
"
}
pub(super) fn generate_packed_rgba8_shader_2d() -> &'static str {
"\
#version 300 es
precision highp float;
precision highp int;
uniform sampler2D tex;
out vec4 color;
void main() {
// gl_FragCoord is at pixel center (n+0.5). Use floor() for robust
// integer pixel index on all GPUs (Vivante, Mali, Adreno).
int out_x = int(floor(gl_FragCoord.x));
int out_y = int(floor(gl_FragCoord.y));
int base = out_x * 4;
// 4 consecutive byte indices map to at most 2 source pixels
int px0 = base / 3;
int px1 = (base + 3) / 3;
vec4 s0 = texelFetch(tex, ivec2(px0, out_y), 0);
vec4 s1 = (px1 != px0) ? texelFetch(tex, ivec2(px1, out_y), 0) : s0;
// Extract channels based on phase (base % 3)
int phase = base - px0 * 3;
if (phase == 0) {
color = vec4(s0.r, s0.g, s0.b, s1.r);
} else if (phase == 1) {
color = vec4(s0.g, s0.b, s1.r, s1.g);
} else {
color = vec4(s0.b, s1.r, s1.g, s1.b);
}
}
"
}
pub(super) fn generate_packed_rgba8_int8_shader_2d() -> &'static str {
"\
#version 300 es
precision highp float;
precision highp int;
uniform sampler2D tex;
out vec4 color;
vec4 int8_bias(vec4 v) {
vec4 q = floor(v * 255.0 + 0.5);
return mod(q + 128.0, 256.0) / 255.0;
}
void main() {
// gl_FragCoord is at pixel center (n+0.5). Use floor() for robust
// integer pixel index on all GPUs (Vivante, Mali, Adreno).
int out_x = int(floor(gl_FragCoord.x));
int out_y = int(floor(gl_FragCoord.y));
int base = out_x * 4;
// 4 consecutive byte indices map to at most 2 source pixels
int px0 = base / 3;
int px1 = (base + 3) / 3;
vec4 s0 = texelFetch(tex, ivec2(px0, out_y), 0);
vec4 s1 = (px1 != px0) ? texelFetch(tex, ivec2(px1, out_y), 0) : s0;
// Extract channels based on phase (base % 3), then apply int8 bias
int phase = base - px0 * 3;
if (phase == 0) {
color = int8_bias(vec4(s0.r, s0.g, s0.b, s1.r));
} else if (phase == 1) {
color = int8_bias(vec4(s0.g, s0.b, s1.r, s1.g));
} else {
color = int8_bias(vec4(s0.b, s1.r, s1.g, s1.b));
}
}
"
}
pub(super) fn generate_proto_repack_compute_shader() -> &'static str {
"\
#version 310 es
layout(local_size_x = 16, local_size_y = 16, local_size_z = 1) in;
layout(std430, binding = 0) readonly buffer ProtoSSBO {
int packed_data[];
};
layout(r32i, binding = 0) writeonly uniform highp iimage2DArray dst_tex;
uniform int width;
uniform int height;
uniform int num_protos;
void main() {
int x = int(gl_GlobalInvocationID.x);
int y = int(gl_GlobalInvocationID.y);
if (x >= width || y >= height) return;
int base = (y * width + x) * num_protos;
for (int k = 0; k < num_protos; k++) {
int byte_offset = base + k;
int word_idx = byte_offset >> 2;
int byte_idx = byte_offset & 3;
int word = packed_data[word_idx];
int val = (word >> (byte_idx * 8)) & 0xFF;
if (val >= 128) val -= 256;
imageStore(dst_tex, ivec3(x, y, k), ivec4(val, 0, 0, 0));
}
}
"
}