constexpr uint32_t MAX_DEPTH_VALUE = 0xFFFFFF;
constexpr uint32_t MAX_DEPTH_RUN = 255 & ~3;
struct DepthRun {
#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
uint32_t depth : 24;
uint32_t count : 8;
#else
uint32_t count : 8;
uint32_t depth : 24;
#endif
DepthRun() = default;
DepthRun(uint32_t depth, uint8_t count) : depth(depth), count(count) {}
bool is_flat() const { return !count; }
template <int FUNC>
ALWAYS_INLINE bool compare(uint32_t src) const {
switch (FUNC) {
case GL_LEQUAL:
return src <= depth;
case GL_LESS:
return src < depth;
case GL_ALWAYS:
return true;
default:
assert(false);
return false;
}
}
};
static ALWAYS_INLINE void set_depth_runs(DepthRun* runs, uint32_t depth,
uint32_t width) {
for (; width >= MAX_DEPTH_RUN;
runs += MAX_DEPTH_RUN, width -= MAX_DEPTH_RUN) {
*runs = DepthRun(depth, MAX_DEPTH_RUN);
}
if (width > 0) {
*runs = DepthRun(depth, width);
}
}
struct DepthCursor {
DepthRun* cur = nullptr;
DepthRun* start = nullptr;
DepthRun* end = nullptr;
DepthCursor() = default;
DepthCursor(DepthRun* runs, int num_runs, int span_offset, int span_count)
: cur(runs), start(&runs[span_offset]), end(start + span_count) {
assert(!runs->is_flat());
DepthRun* end_runs = &runs[num_runs];
if (end > end_runs) {
end = end_runs;
}
if (start >= end_runs) {
cur = end_runs;
start = end_runs;
return;
}
for (;;) {
assert(cur < end);
DepthRun* next = cur + cur->count;
if (start < next) {
break;
}
cur = next;
}
}
bool valid() const {
return cur >= end || (cur <= start && start < cur + cur->count);
}
template <int FUNC>
int skip_failed(uint32_t val) {
assert(valid());
DepthRun* prev = start;
while (cur < end) {
if (cur->compare<FUNC>(val)) {
return start - prev;
}
cur += cur->count;
start = cur;
}
return -1;
}
ALWAYS_INLINE int skip_failed(uint32_t val, GLenum func) {
switch (func) {
case GL_LEQUAL:
return skip_failed<GL_LEQUAL>(val);
case GL_LESS:
return skip_failed<GL_LESS>(val);
default:
assert(false);
return -1;
}
}
template <int FUNC, bool MASK>
int check_passed(uint32_t val) {
assert(valid());
DepthRun* prev = cur;
while (cur < end) {
if (!cur->compare<FUNC>(val)) {
break;
}
DepthRun* next = cur + cur->count;
if (next > end) {
if (MASK) {
*end = DepthRun(cur->depth, next - end);
}
next = end;
}
cur = next;
}
if (cur <= start) {
return 0;
}
int passed = cur - start;
if (MASK) {
if (prev < start) {
prev->count = start - prev;
}
set_depth_runs(start, val, passed);
}
start = cur;
return passed;
}
template <bool MASK>
ALWAYS_INLINE int check_passed(uint32_t val, GLenum func) {
switch (func) {
case GL_LEQUAL:
return check_passed<GL_LEQUAL, MASK>(val);
case GL_LESS:
return check_passed<GL_LESS, MASK>(val);
default:
assert(false);
return 0;
}
}
ALWAYS_INLINE int check_passed(uint32_t val, GLenum func, bool mask) {
return mask ? check_passed<true>(val, func)
: check_passed<false>(val, func);
}
ALWAYS_INLINE void fill(uint32_t depth) {
check_passed<GL_ALWAYS, true>(depth);
}
};
void Texture::init_depth_runs(uint32_t depth) {
if (!buf) return;
DepthRun* runs = (DepthRun*)buf;
for (int y = 0; y < height; y++) {
set_depth_runs(runs, depth, width);
runs += stride() / sizeof(DepthRun);
}
set_cleared(true);
}
static ALWAYS_INLINE void fill_flat_depth(DepthRun* dst, size_t n,
uint32_t depth) {
fill_n((uint32_t*)dst, n, depth);
}
void Texture::fill_depth_runs(uint32_t depth, const IntRect& scissor) {
if (!buf) return;
assert(cleared());
IntRect bb = bounds().intersection(scissor - offset);
DepthRun* runs = (DepthRun*)sample_ptr(0, bb.y0);
for (int rows = bb.height(); rows > 0; rows--) {
if (bb.width() >= width) {
set_depth_runs(runs, depth, width);
} else if (runs->is_flat()) {
fill_flat_depth(&runs[bb.x0], bb.width(), depth);
} else {
DepthCursor(runs, width, bb.x0, bb.width()).fill(depth);
}
runs += stride() / sizeof(DepthRun);
}
}
using ZMask = I32;
#if USE_SSE2
# define ZMASK_NONE_PASSED 0xFFFF
# define ZMASK_ALL_PASSED 0
static inline uint32_t zmask_code(ZMask mask) {
return _mm_movemask_epi8(mask);
}
#else
# define ZMASK_NONE_PASSED 0xFFFFFFFFU
# define ZMASK_ALL_PASSED 0
static inline uint32_t zmask_code(ZMask mask) {
return bit_cast<uint32_t>(CONVERT(mask, U8));
}
#endif
template <bool DISCARD>
static ALWAYS_INLINE bool check_depth(I32 src, DepthRun* zbuf, ZMask& outmask,
int span = 4) {
I32 dest = unaligned_load<I32>(zbuf);
ZMask mask = ctx->depthfunc == GL_LEQUAL
?
ZMask(src > dest)
:
ZMask(src >= dest);
mask |= ZMask(span) < ZMask{1, 2, 3, 4};
if (zmask_code(mask) == ZMASK_NONE_PASSED) {
return false;
}
if (!DISCARD && ctx->depthmask) {
unaligned_store(zbuf, (mask & dest) | (~mask & src));
}
outmask = mask;
return true;
}
static ALWAYS_INLINE I32 packDepth() {
return cast(fragment_shader->gl_FragCoord.z * MAX_DEPTH_VALUE);
}
static ALWAYS_INLINE void discard_depth(I32 src, DepthRun* zbuf, I32 mask) {
if (ctx->depthmask) {
I32 dest = unaligned_load<I32>(zbuf);
mask |= fragment_shader->swgl_IsPixelDiscarded;
unaligned_store(zbuf, (mask & dest) | (~mask & src));
}
}
static ALWAYS_INLINE void mask_output(uint32_t* buf, ZMask zmask,
int span = 4) {
WideRGBA8 r = pack_pixels_RGBA8();
PackedRGBA8 dst = load_span<PackedRGBA8>(buf, span);
if (blend_key) r = blend_pixels(buf, dst, r, span);
PackedRGBA8 mask = bit_cast<PackedRGBA8>(zmask);
store_span(buf, (mask & dst) | (~mask & pack(r)), span);
}
template <bool DISCARD>
static ALWAYS_INLINE void discard_output(uint32_t* buf, int span = 4) {
mask_output(buf, fragment_shader->swgl_IsPixelDiscarded, span);
}
template <>
ALWAYS_INLINE void discard_output<false>(uint32_t* buf, int span) {
WideRGBA8 r = pack_pixels_RGBA8();
if (blend_key)
r = blend_pixels(buf, load_span<PackedRGBA8>(buf, span), r, span);
store_span(buf, pack(r), span);
}
static ALWAYS_INLINE void mask_output(uint8_t* buf, ZMask zmask, int span = 4) {
WideR8 r = pack_pixels_R8();
WideR8 dst = unpack(load_span<PackedR8>(buf, span));
if (blend_key) r = blend_pixels(buf, dst, r, span);
WideR8 mask = packR8(zmask);
store_span(buf, pack((mask & dst) | (~mask & r)), span);
}
template <bool DISCARD>
static ALWAYS_INLINE void discard_output(uint8_t* buf, int span = 4) {
mask_output(buf, fragment_shader->swgl_IsPixelDiscarded, span);
}
template <>
ALWAYS_INLINE void discard_output<false>(uint8_t* buf, int span) {
WideR8 r = pack_pixels_R8();
if (blend_key)
r = blend_pixels(buf, unpack(load_span<PackedR8>(buf, span)), r, span);
store_span(buf, pack(r), span);
}
struct ClipRect {
float x0;
float y0;
float x1;
float y1;
explicit ClipRect(const IntRect& i)
: x0(i.x0), y0(i.y0), x1(i.x1), y1(i.y1) {}
explicit ClipRect(const Texture& t) : ClipRect(ctx->apply_scissor(t)) {
if (ctx->blend) {
blend_key = ctx->blend_key;
if (swgl_ClipFlags) {
if (swgl_ClipFlags & SWGL_CLIP_FLAG_BLEND_OVERRIDE) {
blend_key = swgl_BlendOverride;
}
if (swgl_ClipFlags & SWGL_CLIP_FLAG_MASK) {
assert(swgl_ClipMask->format == TextureFormat::R8);
swgl_ClipMaskBounds.intersect(IntRect{0, 0, int(swgl_ClipMask->width),
int(swgl_ClipMask->height)});
swgl_ClipMaskOffset += ctx->viewport.origin() - t.offset;
swgl_ClipMaskBounds.offset(swgl_ClipMaskOffset);
intersect(swgl_ClipMaskBounds);
restore_clip_mask();
}
if (swgl_ClipFlags & SWGL_CLIP_FLAG_AA) {
restore_aa();
}
}
} else {
blend_key = BLEND_KEY_NONE;
swgl_ClipFlags = 0;
}
}
FloatRange x_range() const { return {x0, x1}; }
void intersect(const IntRect& c) {
x0 = max(x0, float(c.x0));
y0 = max(y0, float(c.y0));
x1 = min(x1, float(c.x1));
y1 = min(y1, float(c.y1));
}
template <typename P>
void set_clip_mask(int x, int y, P* buf) const {
if (swgl_ClipFlags & SWGL_CLIP_FLAG_MASK) {
swgl_SpanBuf = buf;
swgl_ClipMaskBuf = (uint8_t*)swgl_ClipMask->buf +
(y - swgl_ClipMaskOffset.y) * swgl_ClipMask->stride +
(x - swgl_ClipMaskOffset.x);
}
}
template <typename P>
bool overlaps(int nump, const P* p) const {
int sides = 0;
for (int i = 0; i < nump; i++) {
sides |= p[i].x < x1 ? (p[i].x > x0 ? 1 | 2 : 1) : 2;
sides |= p[i].y < y1 ? (p[i].y > y0 ? 4 | 8 : 4) : 8;
}
return sides == 0xF;
}
};
template <typename E>
static ALWAYS_INLINE FloatRange x_intercepts(const E& e) {
float rad = 0.5f * abs(e.x_slope());
return {e.cur_x() - rad, e.cur_x() + rad};
}
template <typename E>
static ALWAYS_INLINE IntRange aa_edge(const E& e, const FloatRange& bounds) {
return e.edgeMask ? bounds.clip(x_intercepts(e)).round_out()
: bounds.clip({e.cur_x(), e.cur_x()}).round();
}
template <typename E>
static ALWAYS_INLINE FloatRange aa_dist(const E& e, float dir) {
if (e.edgeMask) {
float dx = (dir * 256.0f) * inversesqrt(1.0f + e.x_slope() * e.x_slope());
return {128.0f + dx * (e.cur_x() - 0.5f), -dx};
} else {
return {256.0f, 0.0f};
}
}
template <typename P, typename E>
static ALWAYS_INLINE IntRange aa_span(P* buf, const E& left, const E& right,
const FloatRange& bounds) {
if (!(swgl_ClipFlags & SWGL_CLIP_FLAG_AA)) {
return bounds.clip({left.cur_x(), right.cur_x()}).round();
}
IntRange leftAA = aa_edge(left, bounds);
FloatRange leftDist = aa_dist(left, -1.0f);
IntRange rightAA = aa_edge(right, bounds);
FloatRange rightDist = aa_dist(right, 1.0f);
swgl_OpaqueStart = (const uint8_t*)(buf + leftAA.end);
swgl_OpaqueSize = max(rightAA.start - leftAA.end - 3, 0) * sizeof(P);
Float offset = cast(leftAA.end + (I32){0, 1, 2, 3});
swgl_LeftAADist = leftDist.start + offset * leftDist.end;
swgl_RightAADist = rightDist.start + offset * rightDist.end;
swgl_AASlope =
(Float){leftDist.end, rightDist.end, 0.0f, 0.0f} / float(sizeof(P));
return {leftAA.start, rightAA.end};
}
template <typename E>
static ALWAYS_INLINE IntRange clip_distance_range(const E& left,
const E& right) {
Float leftClip = get_clip_distances(left.interp);
Float rightClip = get_clip_distances(right.interp);
Float clipStep = (rightClip - leftClip) / (right.cur_x() - left.cur_x());
Float clipDist =
clamp(left.cur_x() - leftClip * recip(clipStep), 0.0f, 1.0e6f);
Float start = if_then_else(clipStep > 0.0f, clipDist,
if_then_else(leftClip < 0.0f, 1.0e6f, 0.0f));
Float end = if_then_else(clipStep < 0.0f, clipDist,
if_then_else(rightClip >= 0.0f, 1.0e6f, 0.0f));
start = max(start, start.zwxy);
end = min(end, end.zwxy);
return FloatRange{max(start.x, start.y), min(end.x, end.y)}.round();
}
static void flatten_depth_runs(DepthRun* runs, size_t width) {
if (runs->is_flat()) {
return;
}
while (width > 0) {
size_t n = runs->count;
fill_flat_depth(runs, n, runs->depth);
runs += n;
width -= n;
}
}
template <typename P>
static ALWAYS_INLINE void draw_depth_span(uint32_t z, P* buf,
DepthCursor& cursor) {
for (;;) {
int span = cursor.check_passed(z, ctx->depthfunc, ctx->depthmask);
if (span <= 0) {
break;
}
if (span >= 4) {
if (fragment_shader->has_draw_span(buf)) {
int drawn = fragment_shader->draw_span(buf, span & ~3);
buf += drawn;
span -= drawn;
}
while (span >= 4) {
fragment_shader->run();
discard_output<false>(buf);
buf += 4;
span -= 4;
}
}
if (span > 0) {
fragment_shader->run();
discard_output<false>(buf, span);
buf += span;
}
int skip = cursor.skip_failed(z, ctx->depthfunc);
if (skip <= 0) {
break;
}
fragment_shader->skip(skip - (span > 0 ? 4 - span : 0));
buf += skip;
}
}
template <bool DISCARD, bool W, typename P, typename Z>
static ALWAYS_INLINE void draw_span(P* buf, DepthRun* depth, int span, Z z) {
if (depth) {
for (; span >= 4; span -= 4, buf += 4, depth += 4) {
I32 zsrc = z();
ZMask zmask;
if (check_depth<DISCARD>(zsrc, depth, zmask)) {
fragment_shader->run<W>();
mask_output(buf, zmask);
if (DISCARD) discard_depth(zsrc, depth, zmask);
} else {
fragment_shader->skip<W>();
}
}
if (span > 0) {
I32 zsrc = z();
ZMask zmask;
if (check_depth<DISCARD>(zsrc, depth, zmask, span)) {
fragment_shader->run<W>();
mask_output(buf, zmask, span);
if (DISCARD) discard_depth(zsrc, depth, zmask);
}
}
} else {
for (; span >= 4; span -= 4, buf += 4) {
fragment_shader->run<W>();
discard_output<DISCARD>(buf);
}
if (span > 0) {
fragment_shader->run<W>();
discard_output<DISCARD>(buf, span);
}
}
}
template <typename P>
static inline void prepare_row(Texture& colortex, int y, int startx, int endx,
bool use_discard, DepthRun* depth,
uint32_t z = 0, DepthCursor* cursor = nullptr) {
assert(colortex.delay_clear > 0);
uint32_t& mask = colortex.cleared_rows[y / 32];
if ((mask & (1 << (y & 31))) == 0) {
mask |= 1 << (y & 31);
colortex.delay_clear--;
if (blend_key || use_discard) {
force_clear_row<P>(colortex, y);
} else if (depth) {
if (depth->is_flat() || !cursor) {
force_clear_row<P>(colortex, y);
} else {
int passed =
DepthCursor(*cursor).check_passed<false>(z, ctx->depthfunc);
if (startx > 0 || startx + passed < colortex.width) {
force_clear_row<P>(colortex, y, startx, startx + passed);
}
}
} else if (startx > 0 || endx < colortex.width) {
force_clear_row<P>(colortex, y, startx, endx);
}
}
}
template <typename T>
static ALWAYS_INLINE auto perpDot(T a, T b) {
return a.x * b.y - a.y * b.x;
}
template <typename T>
static ALWAYS_INLINE bool checkIfEdgesFlipped(T l0, T l1, T r0, T r1) {
return l0.x > r0.x || (l0.x == r0.x && perpDot(l1 - l0, r1 - r0) > 0.0f);
}
template <typename P>
static inline void draw_quad_spans(int nump, Point2D p[4], uint32_t z,
Interpolants interp_outs[4],
Texture& colortex, Texture& depthtex,
const ClipRect& clipRect) {
assert(nump == 3 || nump == 4);
Point2D l0, r0, l1, r1;
int l0i, r0i, l1i, r1i;
{
int top = nump > 3 && p[3].y < p[2].y
? (p[0].y < p[1].y ? (p[0].y < p[3].y ? 0 : 3)
: (p[1].y < p[3].y ? 1 : 3))
: (p[0].y < p[1].y ? (p[0].y < p[2].y ? 0 : 2)
: (p[1].y < p[2].y ? 1 : 2));
#define NEXT_POINT(idx) \
({ \
int cur = (idx) + 1; \
cur < nump ? cur : 0; \
})
#define PREV_POINT(idx) \
({ \
int cur = (idx) - 1; \
cur >= 0 ? cur : nump - 1; \
})
int next = NEXT_POINT(top);
int prev = PREV_POINT(top);
if (p[top].y == p[next].y) {
l0i = next;
l1i = NEXT_POINT(next);
r0i = top;
r1i = prev;
} else if (p[top].y == p[prev].y) {
l0i = top;
l1i = next;
r0i = prev;
r1i = PREV_POINT(prev);
} else {
l0i = r0i = top;
l1i = next;
r1i = prev;
}
l0 = p[l0i]; r0 = p[r0i]; l1 = p[l1i]; r1 = p[r1i]; }
struct Edge {
float yScale;
float xSlope;
float x;
Interpolants interpSlope;
Interpolants interp;
bool edgeMask;
Edge(float y, const Point2D& p0, const Point2D& p1, const Interpolants& i0,
const Interpolants& i1, int edgeIndex)
: yScale(1.0f / max(p1.y - p0.y, 1.0f / 256)),
xSlope((p1.x - p0.x) * yScale),
x(p0.x + (y - p0.y) * xSlope),
interpSlope((i1 - i0) * yScale),
interp(i0 + (y - p0.y) * interpSlope),
edgeMask((swgl_AAEdgeMask >> edgeIndex) & 1) {}
void nextRow() {
x += xSlope;
interp += interpSlope;
}
float cur_x() const { return x; }
float x_slope() const { return xSlope; }
};
assert(l0.y == r0.y);
float aaRound = swgl_ClipFlags & SWGL_CLIP_FLAG_AA ? 0.0f : 0.5f;
float y = floor(max(min(l0.y, clipRect.y1), clipRect.y0) + aaRound) + 0.5f;
Edge left(y, l0, l1, interp_outs[l0i], interp_outs[l1i], l1i);
Edge right(y, r0, r1, interp_outs[r0i], interp_outs[r1i], r0i);
bool flipped = checkIfEdgesFlipped(l0, l1, r0, r1);
if (flipped) swap(left, right);
P* fbuf = (P*)colortex.sample_ptr(0, int(y));
DepthRun* fdepth = depthtex.buf != nullptr
? (DepthRun*)depthtex.sample_ptr(0, int(y))
: nullptr;
float checkY = min(min(l1.y, r1.y), clipRect.y1);
FloatRange clipSpan =
clipRect.x_range().clip(x_range(l0, l1).merge(x_range(r0, r1)));
for (;;) {
if (y > checkY) {
if (y > clipRect.y1) break;
#define STEP_EDGE(y, e0i, e0, e1i, e1, STEP_POINT, end) \
do { \
\
e0i = e1i; \
e0 = e1; \
\
e1i = STEP_POINT(e1i); \
e1 = p[e1i]; \
\
if (e0i == end) return; \
\
} while (y > e1.y)
if (y > l1.y) {
STEP_EDGE(y, l0i, l0, l1i, l1, NEXT_POINT, r1i);
(flipped ? right : left) =
Edge(y, l0, l1, interp_outs[l0i], interp_outs[l1i], l1i);
}
if (y > r1.y) {
STEP_EDGE(y, r0i, r0, r1i, r1, PREV_POINT, l1i);
(flipped ? left : right) =
Edge(y, r0, r1, interp_outs[r0i], interp_outs[r1i], r0i);
}
clipSpan =
clipRect.x_range().clip(x_range(l0, l1).merge(x_range(r0, r1)));
checkY = min(ceil(min(l1.y, r1.y) - aaRound), clipRect.y1);
}
IntRange span = aa_span(fbuf, left, right, clipSpan);
if (span.len() > 0) {
if (vertex_shader->use_clip_distance()) {
span = span.intersect(clip_distance_range(left, right));
if (span.len() <= 0) goto next_span;
}
ctx->shaded_rows++;
ctx->shaded_pixels += span.len();
P* buf = fbuf + span.start;
DepthRun* depth =
depthtex.buf != nullptr && depthtex.cleared() ? fdepth : nullptr;
DepthCursor cursor;
bool use_discard = fragment_shader->use_discard();
if (use_discard) {
if (depth) {
if (!depth->is_flat()) {
flatten_depth_runs(depth, depthtex.width);
}
depth += span.start;
}
} else if (depth) {
if (!depth->is_flat()) {
cursor = DepthCursor(depth, depthtex.width, span.start, span.len());
int skipped = cursor.skip_failed(z, ctx->depthfunc);
if (skipped < 0) {
goto next_span;
}
buf += skipped;
span.start += skipped;
} else {
depth += span.start;
}
}
if (colortex.delay_clear) {
prepare_row<P>(colortex, int(y), span.start, span.end, use_discard,
depth, z, &cursor);
}
fragment_shader->gl_FragCoord.x = init_interp(span.start + 0.5f, 1);
fragment_shader->gl_FragCoord.y = y;
{
float stepScale = 1.0f / (right.x - left.x);
if (!isfinite(stepScale)) stepScale = 0.0f;
Interpolants step = (right.interp - left.interp) * stepScale;
Interpolants o = left.interp + step * (span.start + 0.5f - left.x);
fragment_shader->init_span(&o, &step);
}
clipRect.set_clip_mask(span.start, y, buf);
if (!use_discard) {
if (depth) {
if (!depth->is_flat()) {
draw_depth_span(z, buf, cursor);
goto next_span;
}
} else {
if (span.len() >= 4 && fragment_shader->has_draw_span(buf)) {
int drawn = fragment_shader->draw_span(buf, span.len() & ~3);
buf += drawn;
span.start += drawn;
}
}
draw_span<false, false>(buf, depth, span.len(), [=] { return z; });
} else {
draw_span<true, false>(buf, depth, span.len(), [=] { return z; });
}
}
next_span:
y++;
left.nextRow();
right.nextRow();
fbuf += colortex.stride() / sizeof(P);
fdepth += depthtex.stride() / sizeof(DepthRun);
}
}
template <typename P>
static inline void draw_perspective_spans(int nump, Point3D* p,
Interpolants* interp_outs,
Texture& colortex, Texture& depthtex,
const ClipRect& clipRect) {
Point3D l0, r0, l1, r1;
int l0i, r0i, l1i, r1i;
{
int top = 0;
for (int i = 1; i < nump; i++) {
if (p[i].y < p[top].y) {
top = i;
}
}
l0i = top;
for (int i = top + 1; i < nump && p[i].y == p[top].y; i++) {
l0i = i;
}
if (l0i == nump - 1) {
for (int i = 0; i <= top && p[i].y == p[top].y; i++) {
l0i = i;
}
}
r0i = top;
for (int i = top - 1; i >= 0 && p[i].y == p[top].y; i--) {
r0i = i;
}
if (r0i == 0) {
for (int i = nump - 1; i >= top && p[i].y == p[top].y; i--) {
r0i = i;
}
}
l1i = NEXT_POINT(l0i);
r1i = PREV_POINT(r0i);
l0 = p[l0i]; r0 = p[r0i]; l1 = p[l1i]; r1 = p[r1i]; }
struct Edge {
float yScale;
Point3D pSlope;
Point3D p;
Interpolants interpSlope;
Interpolants interp;
bool edgeMask;
Edge(float y, const Point3D& p0, const Point3D& p1, const Interpolants& i0,
const Interpolants& i1, int edgeIndex)
: yScale(1.0f / max(p1.y - p0.y, 1.0f / 256)),
pSlope((p1 - p0) * yScale),
p(p0 + (y - p0.y) * pSlope),
interpSlope((i1 * p1.w - i0 * p0.w) * yScale),
interp(i0 * p0.w + (y - p0.y) * interpSlope),
edgeMask((swgl_AAEdgeMask >> edgeIndex) & 1) {}
float x() const { return p.x; }
vec2_scalar zw() const { return {p.z, p.w}; }
void nextRow() {
p += pSlope;
interp += interpSlope;
}
float cur_x() const { return p.x; }
float x_slope() const { return pSlope.x; }
};
assert(l0.y == r0.y);
float aaRound = swgl_ClipFlags & SWGL_CLIP_FLAG_AA ? 0.0f : 0.5f;
float y = floor(max(min(l0.y, clipRect.y1), clipRect.y0) + aaRound) + 0.5f;
Edge left(y, l0, l1, interp_outs[l0i], interp_outs[l1i], l1i);
Edge right(y, r0, r1, interp_outs[r0i], interp_outs[r1i], r0i);
bool flipped = checkIfEdgesFlipped(l0, l1, r0, r1);
if (flipped) swap(left, right);
P* fbuf = (P*)colortex.sample_ptr(0, int(y));
DepthRun* fdepth = depthtex.buf != nullptr
? (DepthRun*)depthtex.sample_ptr(0, int(y))
: nullptr;
float checkY = min(min(l1.y, r1.y), clipRect.y1);
FloatRange clipSpan =
clipRect.x_range().clip(x_range(l0, l1).merge(x_range(r0, r1)));
for (;;) {
if (y > checkY) {
if (y > clipRect.y1) break;
if (y > l1.y) {
STEP_EDGE(y, l0i, l0, l1i, l1, NEXT_POINT, r1i);
(flipped ? right : left) =
Edge(y, l0, l1, interp_outs[l0i], interp_outs[l1i], l1i);
}
if (y > r1.y) {
STEP_EDGE(y, r0i, r0, r1i, r1, PREV_POINT, l1i);
(flipped ? left : right) =
Edge(y, r0, r1, interp_outs[r0i], interp_outs[r1i], r0i);
}
clipSpan =
clipRect.x_range().clip(x_range(l0, l1).merge(x_range(r0, r1)));
checkY = min(ceil(min(l1.y, r1.y) - aaRound), clipRect.y1);
}
IntRange span = aa_span(fbuf, left, right, clipSpan);
if (span.len() > 0) {
if (vertex_shader->use_clip_distance()) {
span = span.intersect(clip_distance_range(left, right));
if (span.len() <= 0) goto next_span;
}
ctx->shaded_rows++;
ctx->shaded_pixels += span.len();
P* buf = fbuf + span.start;
DepthRun* depth =
depthtex.buf != nullptr && depthtex.cleared() ? fdepth : nullptr;
bool use_discard = fragment_shader->use_discard();
if (depth) {
if (!depth->is_flat()) {
flatten_depth_runs(depth, depthtex.width);
}
depth += span.start;
}
if (colortex.delay_clear) {
prepare_row<P>(colortex, int(y), span.start, span.end, use_discard,
depth);
}
fragment_shader->gl_FragCoord.x = init_interp(span.start + 0.5f, 1);
fragment_shader->gl_FragCoord.y = y;
{
float stepScale = 1.0f / (right.x() - left.x());
if (!isfinite(stepScale)) stepScale = 0.0f;
vec2_scalar stepZW = (right.zw() - left.zw()) * stepScale;
vec2_scalar zw = left.zw() + stepZW * (span.start + 0.5f - left.x());
fragment_shader->gl_FragCoord.z = init_interp(zw.x, stepZW.x);
fragment_shader->gl_FragCoord.w = init_interp(zw.y, stepZW.y);
fragment_shader->swgl_StepZW = stepZW;
Interpolants step = (right.interp - left.interp) * stepScale;
Interpolants o = left.interp + step * (span.start + 0.5f - left.x());
fragment_shader->init_span<true>(&o, &step);
}
clipRect.set_clip_mask(span.start, y, buf);
if (!use_discard) {
draw_span<false, true>(buf, depth, span.len(), packDepth);
} else {
draw_span<true, true>(buf, depth, span.len(), packDepth);
}
}
next_span:
y++;
left.nextRow();
right.nextRow();
fbuf += colortex.stride() / sizeof(P);
fdepth += depthtex.stride() / sizeof(DepthRun);
}
}
template <XYZW AXIS>
static int clip_side(int nump, Point3D* p, Interpolants* interp, Point3D* outP,
Interpolants* outInterp, int& outEdgeMask) {
enum SIDE { POSITIVE = 1, NEGATIVE = 2 };
int numClip = 0;
int edgeMask = outEdgeMask;
Point3D prev = p[nump - 1];
Interpolants prevInterp = interp[nump - 1];
float prevCoord = prev.select(AXIS);
int prevMask = (prevCoord < -prev.w ? NEGATIVE : 0) |
(prevCoord > prev.w ? POSITIVE : 0);
outEdgeMask = 0;
for (int i = 0; i < nump; i++, edgeMask >>= 1) {
Point3D cur = p[i];
Interpolants curInterp = interp[i];
float curCoord = cur.select(AXIS);
int curMask =
(curCoord < -cur.w ? NEGATIVE : 0) | (curCoord > cur.w ? POSITIVE : 0);
if (!(curMask & prevMask)) {
if (prevMask) {
if (numClip >= nump + 2) {
assert(false);
return 0;
}
float prevSide =
(prevMask & NEGATIVE) && (!(prevMask & POSITIVE) ||
prevCoord * (cur.w - prev.w) <
prev.w * (curCoord - prevCoord))
? -1
: 1;
float prevDist = prevCoord - prevSide * prev.w;
float curDist = curCoord - prevSide * cur.w;
float k = prevDist / (prevDist - curDist);
Point3D clipped = prev + (cur - prev) * k;
if (prevSide * clipped.select(AXIS) > clipped.w) {
k = nextafterf(k, 1.0f);
clipped = prev + (cur - prev) * k;
}
outP[numClip] = clipped;
outInterp[numClip] = prevInterp + (curInterp - prevInterp) * k;
numClip++;
}
if (curMask) {
if (numClip >= nump + 2) {
assert(false);
return 0;
}
float curSide =
(curMask & POSITIVE) && (!(curMask & NEGATIVE) ||
prevCoord * (cur.w - prev.w) <
prev.w * (curCoord - prevCoord))
? 1
: -1;
float prevDist = prevCoord - curSide * prev.w;
float curDist = curCoord - curSide * cur.w;
float k = prevDist / (prevDist - curDist);
Point3D clipped = prev + (cur - prev) * k;
if (curSide * clipped.select(AXIS) > clipped.w) {
k = nextafterf(k, 0.0f);
clipped = prev + (cur - prev) * k;
}
outP[numClip] = clipped;
outInterp[numClip] = prevInterp + (curInterp - prevInterp) * k;
outEdgeMask |= (edgeMask & 1) << numClip;
numClip++;
}
}
if (!curMask) {
if (numClip >= nump + 2) {
assert(false);
return 0;
}
outP[numClip] = cur;
outInterp[numClip] = curInterp;
outEdgeMask |= (edgeMask & 1) << numClip;
numClip++;
}
prev = cur;
prevInterp = curInterp;
prevCoord = curCoord;
prevMask = curMask;
}
return numClip;
}
static inline void draw_perspective_clipped(int nump, Point3D* p_clip,
Interpolants* interp_clip,
Texture& colortex,
Texture& depthtex) {
ClipRect clipRect(colortex);
if (!clipRect.overlaps(nump, p_clip)) {
return;
}
if (colortex.internal_format == GL_RGBA8) {
draw_perspective_spans<uint32_t>(nump, p_clip, interp_clip, colortex,
depthtex, clipRect);
} else if (colortex.internal_format == GL_R8) {
draw_perspective_spans<uint8_t>(nump, p_clip, interp_clip, colortex,
depthtex, clipRect);
} else {
assert(false);
}
}
static void draw_perspective(int nump, Interpolants interp_outs[4],
Texture& colortex, Texture& depthtex) {
assert(nump >= 3);
vec4 pos = vertex_shader->gl_Position;
vec3_scalar scale =
vec3_scalar(ctx->viewport.width(), ctx->viewport.height(), 1) * 0.5f;
vec3_scalar offset =
make_vec3(make_vec2(ctx->viewport.origin() - colortex.offset), 0.0f) +
scale;
if (test_all(pos.z > -pos.w && pos.z < pos.w)) {
Float w = 1.0f / pos.w;
vec3 screen = pos.sel(X, Y, Z) * w * scale + offset;
Point3D p[4] = {{screen.x.x, screen.y.x, screen.z.x, w.x},
{screen.x.y, screen.y.y, screen.z.y, w.y},
{screen.x.z, screen.y.z, screen.z.z, w.z},
{screen.x.w, screen.y.w, screen.z.w, w.w}};
draw_perspective_clipped(nump, p, interp_outs, colortex, depthtex);
} else {
Point3D p[4] = {{pos.x.x, pos.y.x, pos.z.x, pos.w.x},
{pos.x.y, pos.y.y, pos.z.y, pos.w.y},
{pos.x.z, pos.y.z, pos.z.z, pos.w.z},
{pos.x.w, pos.y.w, pos.z.w, pos.w.w}};
Point3D p_clip[4 + 6];
Interpolants interp_clip[4 + 6];
nump = clip_side<Z>(nump, p, interp_outs, p_clip, interp_clip,
swgl_AAEdgeMask);
if (nump < 3) {
return;
}
for (int i = 0; i < nump; i++) {
if (p_clip[i].w <= 0.0f) {
Point3D p_tmp[4 + 6];
Interpolants interp_tmp[4 + 6];
nump = clip_side<X>(nump, p_clip, interp_clip, p_tmp, interp_tmp,
swgl_AAEdgeMask);
if (nump < 3) return;
nump = clip_side<Y>(nump, p_tmp, interp_tmp, p_clip, interp_clip,
swgl_AAEdgeMask);
if (nump < 3) return;
break;
}
}
for (int i = 0; i < nump; i++) {
float w = 1.0f / p_clip[i].w;
p_clip[i] = isfinite(w)
? Point3D(p_clip[i].sel(X, Y, Z) * w * scale + offset, w)
: Point3D(0.0f);
}
draw_perspective_clipped(nump, p_clip, interp_clip, colortex, depthtex);
}
}
static void draw_quad(int nump, Texture& colortex, Texture& depthtex) {
Interpolants interp_outs[4];
swgl_ClipFlags = 0;
vertex_shader->run_primitive((char*)interp_outs, sizeof(Interpolants));
vec4 pos = vertex_shader->gl_Position;
if (test_any(pos.w != pos.w.x)) {
draw_perspective(nump, interp_outs, colortex, depthtex);
return;
}
float w = 1.0f / pos.w.x;
if (!isfinite(w)) w = 0.0f;
vec2 screen = (pos.sel(X, Y) * w + 1) * 0.5f *
vec2_scalar(ctx->viewport.width(), ctx->viewport.height()) +
make_vec2(ctx->viewport.origin() - colortex.offset);
Point2D p[4] = {{screen.x.x, screen.y.x},
{screen.x.y, screen.y.y},
{screen.x.z, screen.y.z},
{screen.x.w, screen.y.w}};
ClipRect clipRect(colortex);
if (!clipRect.overlaps(nump, p)) {
return;
}
float screenZ = (pos.z.x * w + 1) * 0.5f;
if (screenZ < 0 || screenZ > 1) {
return;
}
uint32_t z = uint32_t(MAX_DEPTH_VALUE * screenZ);
fragment_shader->gl_FragCoord.z = screenZ;
fragment_shader->gl_FragCoord.w = w;
if (nump == 2) {
if (int(p[0].y + 0.5f) == int(p[1].y + 0.5f)) {
p[2].y = 1 + int(p[1].y + 0.5f);
p[3].y = p[2].y;
if (int(p[0].x + 0.5f) == int(p[1].x + 0.5f)) {
p[1].x += 1.0f;
p[2].x += 1.0f;
}
} else {
p[2].x += 1.0f;
p[3].x += 1.0f;
}
nump = 4;
}
if (colortex.internal_format == GL_RGBA8) {
draw_quad_spans<uint32_t>(nump, p, z, interp_outs, colortex, depthtex,
clipRect);
} else if (colortex.internal_format == GL_R8) {
draw_quad_spans<uint8_t>(nump, p, z, interp_outs, colortex, depthtex,
clipRect);
} else {
assert(false);
}
}
template <typename INDEX>
static inline void draw_elements(GLsizei count, GLsizei instancecount,
size_t offset, VertexArray& v,
Texture& colortex, Texture& depthtex) {
Buffer& indices_buf = ctx->buffers[v.element_array_buffer_binding];
if (!indices_buf.buf || offset >= indices_buf.size) {
return;
}
assert((offset & (sizeof(INDEX) - 1)) == 0);
INDEX* indices = (INDEX*)(indices_buf.buf + offset);
count = min(count, (GLsizei)((indices_buf.size - offset) / sizeof(INDEX)));
if (count == 6 && indices[1] == indices[0] + 1 &&
indices[2] == indices[0] + 2 && indices[5] == indices[0] + 3) {
assert(indices[3] == indices[0] + 2 && indices[4] == indices[0] + 1);
vertex_shader->load_attribs(v.attribs, indices[0], 0, 4);
draw_quad(4, colortex, depthtex);
for (GLsizei instance = 1; instance < instancecount; instance++) {
vertex_shader->load_attribs(v.attribs, indices[0], instance, 0);
draw_quad(4, colortex, depthtex);
}
} else {
for (GLsizei instance = 0; instance < instancecount; instance++) {
for (GLsizei i = 0; i + 3 <= count; i += 3) {
if (indices[i + 1] != indices[i] + 1 ||
indices[i + 2] != indices[i] + 2) {
continue;
}
if (i + 6 <= count && indices[i + 5] == indices[i] + 3) {
assert(indices[i + 3] == indices[i] + 2 &&
indices[i + 4] == indices[i] + 1);
vertex_shader->load_attribs(v.attribs, indices[i], instance, 4);
draw_quad(4, colortex, depthtex);
i += 3;
} else {
vertex_shader->load_attribs(v.attribs, indices[i], instance, 3);
draw_quad(3, colortex, depthtex);
}
}
}
}
}