static ALWAYS_INLINE HalfRGBA8 packRGBA8(I32 a, I32 b) {
#if USE_SSE2
return _mm_packs_epi32(a, b);
#elif USE_NEON
return vcombine_u16(vqmovun_s32(a), vqmovun_s32(b));
#else
return CONVERT(combine(a, b), HalfRGBA8);
#endif
}
static ALWAYS_INLINE WideRGBA8 pack_pixels_RGBA8(const vec4& v,
float scale = 255.0f) {
ivec4 i = round_pixel(v, scale);
HalfRGBA8 xz = packRGBA8(i.z, i.x);
HalfRGBA8 yw = packRGBA8(i.y, i.w);
HalfRGBA8 xyzwl = zipLow(xz, yw);
HalfRGBA8 xyzwh = zipHigh(xz, yw);
HalfRGBA8 lo = zip2Low(xyzwl, xyzwh);
HalfRGBA8 hi = zip2High(xyzwl, xyzwh);
return combine(lo, hi);
}
static ALWAYS_INLINE WideRGBA8 pack_pixels_RGBA8(Float alpha,
float scale = 255.0f) {
I32 i = round_pixel(alpha, scale);
HalfRGBA8 c = packRGBA8(i, i);
c = zipLow(c, c);
return zip(c, c);
}
static ALWAYS_INLINE WideRGBA8 pack_pixels_RGBA8(float alpha,
float scale = 255.0f) {
I32 i = round_pixel(alpha, scale);
return repeat2(packRGBA8(i, i));
}
UNUSED static ALWAYS_INLINE WideRGBA8 pack_pixels_RGBA8(const vec4_scalar& v,
float scale = 255.0f) {
I32 i = round_pixel((Float){v.z, v.y, v.x, v.w}, scale);
return repeat2(packRGBA8(i, i));
}
static ALWAYS_INLINE WideRGBA8 pack_pixels_RGBA8() {
return pack_pixels_RGBA8(fragment_shader->gl_FragColor);
}
static ALWAYS_INLINE WideRGBA8 pack_pixels_RGBA8(WideRGBA32F v,
float scale = 255.0f) {
ivec4 i = round_pixel(bit_cast<vec4>(v), scale);
return combine(packRGBA8(i.x, i.y), packRGBA8(i.z, i.w));
}
static ALWAYS_INLINE WideR8 packR8(I32 a) {
#if USE_SSE2
return lowHalf(bit_cast<V8<uint16_t>>(_mm_packs_epi32(a, a)));
#elif USE_NEON
return vqmovun_s32(a);
#else
return CONVERT(a, WideR8);
#endif
}
static ALWAYS_INLINE WideR8 pack_pixels_R8(Float c, float scale = 255.0f) {
return packR8(round_pixel(c, scale));
}
static ALWAYS_INLINE WideR8 pack_pixels_R8() {
return pack_pixels_R8(fragment_shader->gl_FragColor.x);
}
template <typename V, typename P>
static ALWAYS_INLINE V partial_load_span(const P* src, int span) {
return bit_cast<V>(
(span >= 2
? combine(unaligned_load<V2<P>>(src),
V2<P>{span > 2 ? unaligned_load<P>(src + 2) : P(0), 0})
: V4<P>{unaligned_load<P>(src), 0, 0, 0}));
}
template <typename V, typename P>
static ALWAYS_INLINE void partial_store_span(P* dst, V src, int span) {
auto pixels = bit_cast<V4<P>>(src);
if (span >= 2) {
unaligned_store(dst, lowHalf(pixels));
if (span > 2) {
unaligned_store(dst + 2, pixels.z);
}
} else {
unaligned_store(dst, pixels.x);
}
}
template <typename V, typename P>
static ALWAYS_INLINE V load_span(const P* src, int span) {
if (span >= 4) {
return unaligned_load<V, P>(src);
} else {
return partial_load_span<V, P>(src, span);
}
}
template <typename V, typename P>
static ALWAYS_INLINE void store_span(P* dst, V src, int span) {
if (span >= 4) {
unaligned_store<V, P>(dst, src);
} else {
partial_store_span<V, P>(dst, src, span);
}
}
template <typename T>
static ALWAYS_INLINE T muldiv256(T x, T y) {
return (x * y) >> 8;
}
template <typename T>
static ALWAYS_INLINE T muldiv255(T x, T y) {
return (x * y + x) >> 8;
}
template <typename V>
static ALWAYS_INLINE WideRGBA8 pack_span(uint32_t*, const V& v,
float scale = 255.0f) {
return pack_pixels_RGBA8(v, scale);
}
template <typename C>
static ALWAYS_INLINE WideR8 pack_span(uint8_t*, C c, float scale = 255.0f) {
return pack_pixels_R8(c, scale);
}
struct NoColor {};
template <typename P>
static ALWAYS_INLINE P applyColor(P src, NoColor) {
return src;
}
struct InvertColor {};
template <typename P>
static ALWAYS_INLINE P applyColor(P src, InvertColor) {
return 255 - src;
}
template <typename P>
static ALWAYS_INLINE P applyColor(P src, P color) {
return muldiv255(color, src);
}
static ALWAYS_INLINE WideRGBA8 applyColor(PackedRGBA8 src, WideRGBA8 color) {
return applyColor(unpack(src), color);
}
template <typename P, typename C>
static ALWAYS_INLINE auto packColor(P* buf, C color) {
return pack_span(buf, color, 255.0f);
}
template <typename P>
static ALWAYS_INLINE NoColor packColor(UNUSED P* buf, NoColor noColor) {
return noColor;
}
template <typename P>
static ALWAYS_INLINE InvertColor packColor(UNUSED P* buf,
InvertColor invertColor) {
return invertColor;
}
template <typename P, typename C>
static ALWAYS_INLINE auto packColor(C color) {
return packColor((P*)0, color);
}
template <typename T>
static ALWAYS_INLINE T addlow(T x, T y) {
typedef VectorType<uint8_t, sizeof(T)> bytes;
return bit_cast<T>(bit_cast<bytes>(x) + bit_cast<bytes>(y));
}
template <typename T>
static ALWAYS_INLINE T alphas(T c) {
return SHUFFLE(c, c, 3, 3, 3, 3, 7, 7, 7, 7, 11, 11, 11, 11, 15, 15, 15, 15);
}
template <typename T>
static ALWAYS_INLINE T set_alphas(T c, T a) {
return SHUFFLE(c, a, 0, 1, 2, 19, 4, 5, 6, 23, 8, 9, 10, 27, 12, 13, 14, 31);
}
static ALWAYS_INLINE HalfRGBA8 if_then_else(V8<int16_t> c, HalfRGBA8 t,
HalfRGBA8 e) {
return bit_cast<HalfRGBA8>((c & t) | (~c & e));
}
template <typename T, typename C, int N>
static ALWAYS_INLINE VectorType<T, N> if_then_else(VectorType<C, N> c,
VectorType<T, N> t,
VectorType<T, N> e) {
return combine(if_then_else(lowHalf(c), lowHalf(t), lowHalf(e)),
if_then_else(highHalf(c), highHalf(t), highHalf(e)));
}
static ALWAYS_INLINE HalfRGBA8 min(HalfRGBA8 x, HalfRGBA8 y) {
#if USE_SSE2
return bit_cast<HalfRGBA8>(
_mm_min_epi16(bit_cast<V8<int16_t>>(x), bit_cast<V8<int16_t>>(y)));
#elif USE_NEON
return vminq_u16(x, y);
#else
return if_then_else(x < y, x, y);
#endif
}
template <typename T, int N>
static ALWAYS_INLINE VectorType<T, N> min(VectorType<T, N> x,
VectorType<T, N> y) {
return combine(min(lowHalf(x), lowHalf(y)), min(highHalf(x), highHalf(y)));
}
static ALWAYS_INLINE HalfRGBA8 max(HalfRGBA8 x, HalfRGBA8 y) {
#if USE_SSE2
return bit_cast<HalfRGBA8>(
_mm_max_epi16(bit_cast<V8<int16_t>>(x), bit_cast<V8<int16_t>>(y)));
#elif USE_NEON
return vmaxq_u16(x, y);
#else
return if_then_else(x > y, x, y);
#endif
}
template <typename T, int N>
static ALWAYS_INLINE VectorType<T, N> max(VectorType<T, N> x,
VectorType<T, N> y) {
return combine(max(lowHalf(x), lowHalf(y)), max(highHalf(x), highHalf(y)));
}
template <typename T, int N>
static ALWAYS_INLINE VectorType<T, N> recip(VectorType<T, N> v) {
return combine(recip(lowHalf(v)), recip(highHalf(v)));
}
template <typename V>
static ALWAYS_INLINE V recip_or(V v, float f) {
return if_then_else(v != V(0.0f), recip(v), V(f));
}
template <typename T, int N>
static ALWAYS_INLINE VectorType<T, N> inversesqrt(VectorType<T, N> v) {
return combine(inversesqrt(lowHalf(v)), inversesqrt(highHalf(v)));
}
static ALWAYS_INLINE WideRGBA32F unpremultiply(WideRGBA32F v) {
Float a = recip_or((Float){v[3], v[7], v[11], v[15]}, 0.0f);
return v * a.xxxxyyyyzzzzwwww;
}
static ALWAYS_INLINE vec4 unpack(PackedRGBA32F c) {
return bit_cast<vec4>(
SHUFFLE(c, c, 2, 6, 10, 14, 1, 5, 9, 13, 0, 4, 8, 12, 3, 7, 11, 15));
}
static ALWAYS_INLINE Float lumv3(vec3 v) {
return v.x * 0.30f + v.y * 0.59f + v.z * 0.11f;
}
static ALWAYS_INLINE Float minv3(vec3 v) { return min(min(v.x, v.y), v.z); }
static ALWAYS_INLINE Float maxv3(vec3 v) { return max(max(v.x, v.y), v.z); }
static inline vec3 clip_color(vec3 v, Float lum, Float alpha) {
Float mincol = max(-minv3(v), lum);
Float maxcol = max(maxv3(v), alpha - lum);
return lum + v * (lum * (alpha - lum) * recip_or(mincol * maxcol, 0.0f));
}
static inline vec3 set_lum(vec3 base, vec3 ref, Float alpha) {
return clip_color(base - lumv3(base), lumv3(ref), alpha);
}
static inline vec3 set_lum_sat(vec3 base, vec3 sref, vec3 lref, Float alpha) {
vec3 diff = base - minv3(base);
Float sbase = maxv3(diff);
Float ssat = maxv3(sref) - minv3(sref);
return set_lum(diff * ssat * recip_or(sbase, 0.0f), lref, alpha);
}
enum SWGLClipFlag {
SWGL_CLIP_FLAG_MASK = 1 << 0,
SWGL_CLIP_FLAG_AA = 1 << 1,
SWGL_CLIP_FLAG_BLEND_OVERRIDE = 1 << 2,
};
static int swgl_ClipFlags = 0;
static BlendKey swgl_BlendOverride = BLEND_KEY_NONE;
static WideRGBA8 swgl_BlendColorRGBA8 = {0};
static WideRGBA8 swgl_BlendAlphaRGBA8 = {0};
static void* swgl_SpanBuf = nullptr;
static uint8_t* swgl_ClipMaskBuf = nullptr;
static ALWAYS_INLINE WideR8 expand_mask(UNUSED uint8_t* buf, WideR8 mask) {
return mask;
}
static ALWAYS_INLINE WideRGBA8 expand_mask(UNUSED uint32_t* buf, WideR8 mask) {
WideRG8 maskRG = zip(mask, mask);
return zip(maskRG, maskRG);
}
template <typename P>
static ALWAYS_INLINE uint8_t* get_clip_mask(P* buf) {
return &swgl_ClipMaskBuf[buf - (P*)swgl_SpanBuf];
}
template <typename P>
static ALWAYS_INLINE auto load_clip_mask(P* buf, int span)
-> decltype(expand_mask(buf, 0)) {
return expand_mask(buf,
unpack(load_span<PackedR8>(get_clip_mask(buf), span)));
}
static ALWAYS_INLINE void override_clip_mask() {
blend_key = BlendKey(blend_key - MASK_BLEND_KEY_NONE);
}
static ALWAYS_INLINE void restore_clip_mask() {
blend_key = BlendKey(MASK_BLEND_KEY_NONE + blend_key);
}
static const uint8_t* swgl_OpaqueStart = nullptr;
static uint32_t swgl_OpaqueSize = 0;
static Float swgl_LeftAADist = 0.0f;
static Float swgl_RightAADist = 0.0f;
static Float swgl_AASlope = 0.0f;
template <typename P>
static ALWAYS_INLINE int get_aa_opaque_start(P* buf) {
return max(int((P*)swgl_OpaqueStart - buf), 0);
}
template <typename P>
static ALWAYS_INLINE int get_aa_opaque_size(P* buf) {
return max(int((P*)&swgl_OpaqueStart[swgl_OpaqueSize] - buf), 0);
}
static ALWAYS_INLINE void override_aa() {
blend_key = BlendKey(blend_key - AA_BLEND_KEY_NONE);
}
static ALWAYS_INLINE void restore_aa() {
blend_key = BlendKey(AA_BLEND_KEY_NONE + blend_key);
}
static PREFER_INLINE WideRGBA8 blend_pixels(uint32_t* buf, PackedRGBA8 pdst,
WideRGBA8 src, int span = 4) {
WideRGBA8 dst = unpack(pdst);
const WideRGBA8 RGB_MASK = {0xFFFF, 0xFFFF, 0xFFFF, 0, 0xFFFF, 0xFFFF,
0xFFFF, 0, 0xFFFF, 0xFFFF, 0xFFFF, 0,
0xFFFF, 0xFFFF, 0xFFFF, 0};
const WideRGBA8 ALPHA_MASK = {0, 0, 0, 0xFFFF, 0, 0, 0, 0xFFFF,
0, 0, 0, 0xFFFF, 0, 0, 0, 0xFFFF};
const WideRGBA8 ALPHA_OPAQUE = {0, 0, 0, 255, 0, 0, 0, 255,
0, 0, 0, 255, 0, 0, 0, 255};
#define DO_AA(format, body) \
do { \
int offset = int((const uint8_t*)buf - swgl_OpaqueStart); \
if (uint32_t(offset) >= swgl_OpaqueSize) { \
Float delta = swgl_AASlope * float(offset); \
Float dist = clamp(min(swgl_LeftAADist + delta.x, \
swgl_RightAADist + delta.y), \
0.0f, 256.0f); \
auto aa = pack_pixels_##format(dist, 1.0f); \
body; \
} \
} while (0)
#define BLEND_CASE_KEY(key) \
case AA_##key: \
DO_AA(RGBA8, src = muldiv256(src, aa)); \
goto key; \
case AA_MASK_##key: \
DO_AA(RGBA8, src = muldiv256(src, aa)); \
FALLTHROUGH; \
case MASK_##key: \
src = muldiv255(src, load_clip_mask(buf, span)); \
FALLTHROUGH; \
case key: key
#define BLEND_CASE(...) BLEND_CASE_KEY(BLEND_KEY(__VA_ARGS__))
switch (blend_key) {
BLEND_CASE(GL_ONE, GL_ZERO):
return src;
BLEND_CASE(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA, GL_ONE,
GL_ONE_MINUS_SRC_ALPHA):
return addlow(dst, muldiv255(alphas(src), (src | ALPHA_OPAQUE) - dst));
BLEND_CASE(GL_ONE, GL_ONE_MINUS_SRC_ALPHA):
return src + dst - muldiv255(dst, alphas(src));
BLEND_CASE(GL_ZERO, GL_ONE_MINUS_SRC_COLOR):
return dst - muldiv255(dst, src);
BLEND_CASE(GL_ZERO, GL_ONE_MINUS_SRC_COLOR, GL_ZERO, GL_ONE):
return dst - (muldiv255(dst, src) & RGB_MASK);
BLEND_CASE(GL_ZERO, GL_ONE_MINUS_SRC_ALPHA):
return dst - muldiv255(dst, alphas(src));
BLEND_CASE(GL_ZERO, GL_SRC_COLOR):
return muldiv255(src, dst);
BLEND_CASE(GL_ONE, GL_ONE):
return src + dst;
BLEND_CASE(GL_ONE, GL_ONE, GL_ONE, GL_ONE_MINUS_SRC_ALPHA):
return src + dst - (muldiv255(dst, src) & ALPHA_MASK);
BLEND_CASE(GL_ONE_MINUS_DST_ALPHA, GL_ONE, GL_ZERO, GL_ONE):
return dst + ((src - muldiv255(src, alphas(dst))) & RGB_MASK);
BLEND_CASE(GL_CONSTANT_COLOR, GL_ONE_MINUS_SRC_COLOR):
return addlow(
dst, muldiv255(src, repeat2(ctx->blendcolor) - dst));
case BLEND_KEY(GL_ONE, GL_ONE_MINUS_SRC1_COLOR): {
WideRGBA8 secondary =
applyColor(dst,
packColor<uint32_t>(fragment_shader->gl_SecondaryFragColor));
return src + dst - secondary;
}
case MASK_BLEND_KEY(GL_ONE, GL_ONE_MINUS_SRC1_COLOR): {
WideRGBA8 secondary =
applyColor(dst,
packColor<uint32_t>(fragment_shader->gl_SecondaryFragColor));
WideRGBA8 mask = load_clip_mask(buf, span);
return muldiv255(src, mask) + dst - muldiv255(secondary, mask);
}
case AA_BLEND_KEY(GL_ONE, GL_ONE_MINUS_SRC1_COLOR): {
WideRGBA8 secondary =
applyColor(dst,
packColor<uint32_t>(fragment_shader->gl_SecondaryFragColor));
DO_AA(RGBA8, {
src = muldiv256(src, aa);
secondary = muldiv256(secondary, aa);
});
return src + dst - secondary;
}
case AA_MASK_BLEND_KEY(GL_ONE, GL_ONE_MINUS_SRC1_COLOR): {
WideRGBA8 secondary =
applyColor(dst,
packColor<uint32_t>(fragment_shader->gl_SecondaryFragColor));
WideRGBA8 mask = load_clip_mask(buf, span);
DO_AA(RGBA8, mask = muldiv256(mask, aa));
return muldiv255(src, mask) + dst - muldiv255(secondary, mask);
}
BLEND_CASE(GL_MIN):
return min(src, dst);
BLEND_CASE(GL_MAX):
return max(src, dst);
BLEND_CASE(GL_MULTIPLY_KHR): {
WideRGBA8 diff = muldiv255(alphas(src) - (src & RGB_MASK),
alphas(dst) - (dst & RGB_MASK));
return src + dst + (diff & RGB_MASK) - alphas(diff);
}
BLEND_CASE(GL_SCREEN_KHR):
return src + dst - muldiv255(src, dst);
BLEND_CASE(GL_OVERLAY_KHR): {
WideRGBA8 srcA = alphas(src);
WideRGBA8 dstA = alphas(dst);
WideRGBA8 diff = muldiv255(src, dst) + muldiv255(srcA - src, dstA - dst);
return src + dst +
if_then_else(dst * 2 <= dstA, (diff & RGB_MASK) - alphas(diff),
-diff);
}
BLEND_CASE(GL_DARKEN_KHR):
return src + dst -
max(muldiv255(src, alphas(dst)), muldiv255(dst, alphas(src)));
BLEND_CASE(GL_LIGHTEN_KHR):
return src + dst -
min(muldiv255(src, alphas(dst)), muldiv255(dst, alphas(src)));
BLEND_CASE(GL_COLORDODGE_KHR): {
WideRGBA32F srcF = CONVERT(src, WideRGBA32F);
WideRGBA32F srcA = alphas(srcF);
WideRGBA32F dstF = CONVERT(dst, WideRGBA32F);
WideRGBA32F dstA = alphas(dstF);
return pack_pixels_RGBA8(
srcA * set_alphas(
min(dstA, dstF * srcA * recip_or(srcA - srcF, 255.0f)),
dstF) +
srcF * (255.0f - dstA) + dstF * (255.0f - srcA),
1.0f / 255.0f);
}
BLEND_CASE(GL_COLORBURN_KHR): {
WideRGBA32F srcF = CONVERT(src, WideRGBA32F);
WideRGBA32F srcA = alphas(srcF);
WideRGBA32F dstF = CONVERT(dst, WideRGBA32F);
WideRGBA32F dstA = alphas(dstF);
return pack_pixels_RGBA8(
srcA * set_alphas((dstA - min(dstA, (dstA - dstF) * srcA *
recip_or(srcF, 255.0f))),
dstF) +
srcF * (255.0f - dstA) + dstF * (255.0f - srcA),
1.0f / 255.0f);
}
BLEND_CASE(GL_HARDLIGHT_KHR): {
WideRGBA8 srcA = alphas(src);
WideRGBA8 dstA = alphas(dst);
WideRGBA8 diff = muldiv255(src, dst) + muldiv255(srcA - src, dstA - dst);
return src + dst +
if_then_else(src * 2 <= srcA, (diff & RGB_MASK) - alphas(diff),
-diff);
}
BLEND_CASE(GL_SOFTLIGHT_KHR): {
WideRGBA32F srcF = CONVERT(src, WideRGBA32F);
WideRGBA32F srcA = alphas(srcF);
WideRGBA32F dstF = CONVERT(dst, WideRGBA32F);
WideRGBA32F dstA = alphas(dstF);
WideRGBA32F dstU = unpremultiply(dstF);
WideRGBA32F scale = srcF + srcF - srcA;
return pack_pixels_RGBA8(
dstF * (255.0f +
set_alphas(
scale *
if_then_else(scale < 0.0f, 1.0f - dstU,
min((16.0f * dstU - 12.0f) * dstU + 3.0f,
inversesqrt(dstU) - 1.0f)),
WideRGBA32F(0.0f))) +
srcF * (255.0f - dstA),
1.0f / 255.0f);
}
BLEND_CASE(GL_DIFFERENCE_KHR): {
WideRGBA8 diff =
min(muldiv255(dst, alphas(src)), muldiv255(src, alphas(dst)));
return src + dst - diff - (diff & RGB_MASK);
}
BLEND_CASE(GL_EXCLUSION_KHR): {
WideRGBA8 diff = muldiv255(src, dst);
return src + dst - diff - (diff & RGB_MASK);
}
#define DO_HSL(rgb) \
do { \
vec4 srcV = unpack(CONVERT(src, PackedRGBA32F)); \
vec4 dstV = unpack(CONVERT(dst, PackedRGBA32F)); \
Float srcA = srcV.w * (1.0f / 255.0f); \
Float dstA = dstV.w * (1.0f / 255.0f); \
Float srcDstA = srcV.w * dstA; \
vec3 srcC = vec3(srcV) * dstA; \
vec3 dstC = vec3(dstV) * srcA; \
return pack_pixels_RGBA8(vec4(rgb + vec3(srcV) - srcC + vec3(dstV) - dstC, \
srcV.w + dstV.w - srcDstA), \
1.0f); \
} while (0)
BLEND_CASE(GL_HSL_HUE_KHR):
DO_HSL(set_lum_sat(srcC, dstC, dstC, srcDstA));
BLEND_CASE(GL_HSL_SATURATION_KHR):
DO_HSL(set_lum_sat(dstC, srcC, dstC, srcDstA));
BLEND_CASE(GL_HSL_COLOR_KHR):
DO_HSL(set_lum(srcC, dstC, srcDstA));
BLEND_CASE(GL_HSL_LUMINOSITY_KHR):
DO_HSL(set_lum(dstC, srcC, srcDstA));
BLEND_CASE(SWGL_BLEND_DROP_SHADOW): {
WideRGBA8 color = applyColor(alphas(src), swgl_BlendColorRGBA8);
return color + dst - muldiv255(dst, alphas(color));
}
BLEND_CASE(SWGL_BLEND_SUBPIXEL_TEXT):
return applyColor(src, swgl_BlendColorRGBA8) + dst -
muldiv255(dst, applyColor(src, swgl_BlendAlphaRGBA8));
default:
UNREACHABLE;
}
#undef BLEND_CASE
#undef BLEND_CASE_KEY
}
static PREFER_INLINE WideR8 blend_pixels(uint8_t* buf, WideR8 dst, WideR8 src,
int span = 4) {
#define BLEND_CASE_KEY(key) \
case AA_##key: \
DO_AA(R8, src = muldiv256(src, aa)); \
goto key; \
case AA_MASK_##key: \
DO_AA(R8, src = muldiv256(src, aa)); \
FALLTHROUGH; \
case MASK_##key: \
src = muldiv255(src, load_clip_mask(buf, span)); \
FALLTHROUGH; \
case key: key
#define BLEND_CASE(...) BLEND_CASE_KEY(BLEND_KEY(__VA_ARGS__))
switch (blend_key) {
BLEND_CASE(GL_ONE, GL_ZERO):
return src;
BLEND_CASE(GL_ZERO, GL_SRC_COLOR):
return muldiv255(src, dst);
BLEND_CASE(GL_ONE, GL_ONE):
return src + dst;
default:
UNREACHABLE;
}
#undef BLEND_CASE
#undef BLEND_CASE_KEY
}
static ALWAYS_INLINE void commit_span(uint32_t* buf, WideRGBA8 r) {
unaligned_store(buf, pack(r));
}
static ALWAYS_INLINE void commit_span(uint32_t* buf, WideRGBA8 r, int len) {
partial_store_span(buf, pack(r), len);
}
static ALWAYS_INLINE WideRGBA8 blend_span(uint32_t* buf, WideRGBA8 r) {
return blend_pixels(buf, unaligned_load<PackedRGBA8>(buf), r);
}
static ALWAYS_INLINE WideRGBA8 blend_span(uint32_t* buf, WideRGBA8 r, int len) {
return blend_pixels(buf, partial_load_span<PackedRGBA8>(buf, len), r, len);
}
static ALWAYS_INLINE void commit_span(uint32_t* buf, PackedRGBA8 r) {
unaligned_store(buf, r);
}
static ALWAYS_INLINE void commit_span(uint32_t* buf, PackedRGBA8 r, int len) {
partial_store_span(buf, r, len);
}
static ALWAYS_INLINE PackedRGBA8 blend_span(uint32_t* buf, PackedRGBA8 r) {
return pack(blend_span(buf, unpack(r)));
}
static ALWAYS_INLINE PackedRGBA8 blend_span(uint32_t* buf, PackedRGBA8 r,
int len) {
return pack(blend_span(buf, unpack(r), len));
}
static ALWAYS_INLINE void commit_span(uint8_t* buf, WideR8 r) {
unaligned_store(buf, pack(r));
}
static ALWAYS_INLINE void commit_span(uint8_t* buf, WideR8 r, int len) {
partial_store_span(buf, pack(r), len);
}
static ALWAYS_INLINE WideR8 blend_span(uint8_t* buf, WideR8 r) {
return blend_pixels(buf, unpack(unaligned_load<PackedR8>(buf)), r);
}
static ALWAYS_INLINE WideR8 blend_span(uint8_t* buf, WideR8 r, int len) {
return blend_pixels(buf, unpack(partial_load_span<PackedR8>(buf, len)), r,
len);
}
static ALWAYS_INLINE void commit_span(uint8_t* buf, PackedR8 r) {
unaligned_store(buf, r);
}
static ALWAYS_INLINE void commit_span(uint8_t* buf, PackedR8 r, int len) {
partial_store_span(buf, r, len);
}
static ALWAYS_INLINE PackedR8 blend_span(uint8_t* buf, PackedR8 r) {
return pack(blend_span(buf, unpack(r)));
}
static ALWAYS_INLINE PackedR8 blend_span(uint8_t* buf, PackedR8 r, int len) {
return pack(blend_span(buf, unpack(r), len));
}
template <bool BLEND, typename P, typename R>
static ALWAYS_INLINE void commit_blend_span(P* buf, R r) {
if (BLEND) {
commit_span(buf, blend_span(buf, r));
} else {
commit_span(buf, r);
}
}
template <bool BLEND, typename P, typename R>
static ALWAYS_INLINE void commit_blend_span(P* buf, R r, int len) {
if (BLEND) {
commit_span(buf, blend_span(buf, r, len), len);
} else {
commit_span(buf, r, len);
}
}
template <typename P, typename R>
static ALWAYS_INLINE void commit_blend_solid_span(P* buf, R r, int len) {
for (P* end = &buf[len & ~3]; buf < end; buf += 4) {
commit_span(buf, blend_span(buf, r));
}
len &= 3;
if (len > 0) {
partial_store_span(buf, pack(blend_span(buf, r, len)), len);
}
}
template <bool BLEND>
static void commit_solid_span(uint32_t* buf, WideRGBA8 r, int len) {
commit_blend_solid_span(buf, r, len);
}
template <>
ALWAYS_INLINE void commit_solid_span<false>(uint32_t* buf, WideRGBA8 r,
int len) {
fill_n(buf, len, bit_cast<U32>(pack(r)).x);
}
template <bool BLEND>
static void commit_solid_span(uint8_t* buf, WideR8 r, int len) {
commit_blend_solid_span(buf, r, len);
}
template <>
ALWAYS_INLINE void commit_solid_span<false>(uint8_t* buf, WideR8 r, int len) {
PackedR8 p = pack(r);
if (uintptr_t(buf) & 3) {
int align = 4 - (uintptr_t(buf) & 3);
align = min(align, len);
partial_store_span(buf, p, align);
buf += align;
len -= align;
}
fill_n((uint32_t*)buf, len / 4, bit_cast<uint32_t>(p));
buf += len & ~3;
len &= 3;
if (len > 0) {
partial_store_span(buf, p, len);
}
}