#ifndef X_MATH
#define X_MATH
#include <stdint.h>
#ifdef X_MATH_SSE
#include <xmmintrin.h>
#endif
static inline const float h(const uint32_t x) { return *(float*)&x; }
inline float x_trunc(float x) {
#ifdef X_MATH_SSE
return _mm_cvtss_f32(_mm_round_ss(_mm_set_ss(x), _mm_set_ss(x),
_MM_FROUND_TO_ZERO | _MM_FROUND_NO_EXC));
#else
int i = (int)x;
return (float)i;
#endif }
inline float x_floor(float x) {
#ifdef X_MATH_SSE
return _mm_cvtss_f32(_mm_floor_ss(_mm_set_ss(x), _mm_set_ss(x)));
#else
uint32_t u = *(uint32_t*)&x;
uint32_t r = u >> 31;
x -= (float)r;
u = *(uint32_t*)&x;
u -= r;
x = *(float*)&u;
return x_trunc(x);
#endif }
inline float x_ceil(float x) {
#ifdef X_MATH_SSE
return _mm_cvtss_f32(_mm_ceil_ss(_mm_set_ss(x), _mm_set_ss(x)));
#else
uint32_t u = *(uint32_t*)&x;
uint32_t r = 1 - (u >> 31);
x += (float)r;
u = *(uint32_t*)&x;
u -= r;
x = *(float*)&u;
return x_trunc(x);
#endif }
inline float x_round(float x) {
#ifdef X_MATH_SSE
return _mm_cvtss_f32(
_mm_round_ss(_mm_set_ss(x), _mm_set_ss(x),
_MM_FROUND_TO_NEAREST_INT | _MM_FROUND_NO_EXC));
#else
x += h(0x3effffff);
uint32_t u = *(uint32_t*)&x;
u >>= 31;
x -= (float)u;
return x_trunc(x);
#endif }
inline float x_mod(float x, float e) { return x - e * x_floor(x / e); }
inline float x_fract(float x) { return x - x_floor(x); }
inline float x_abs(float x) {
uint32_t u = *(uint32_t*)&x;
u &= 0x7fffffff;
return *(float*)&u;
}
inline float x_sign(float x) {
uint32_t u = *(uint32_t*)&x;
u &= 0x80000000;
u |= 0x3f800000;
return *(float*)&u;
}
inline float x_cos(float x) {
x = x_abs(x_mod(x, h(0x40c90fdb)) - h(0x40490fdb)) - h(0x3fc90fdb);
x += (h(0xbc96e670) * x) * (x * x);
x += (h(0xbe17b083) * x) * (x * x);
return x;
}
inline float x_sin(float x) { return x_cos(x - h(0x3fc90fdb)); }
inline float x_sqrt(float x) {
#ifdef X_MATH_SSE
return _mm_cvtss_f32(_mm_sqrt_ss(_mm_set_ss(x)));
#else
uint32_t i = *(uint32_t*)&x;
i = (i + 0x3f769e5c) >> 1;
float s = *(float*)&i;
s = 0.5f * (s + x / s);
#ifdef X_MATH_ACC
s = 0.5f * (s + x / s);
#endif
return s;
#endif }
inline float x_rsqrt(float x) {
#ifdef X_MATH_SSE
return _mm_cvtss_f32(_mm_rsqrt_ss(_mm_set_ss(x)));
#else
uint32_t i = *(uint32_t*)&x;
float y = x * 0.5f;
i = 0x5f3759df - (i >> 1);
x = *(float*)&i;
x *= 1.5f - (y * x * x);
#ifdef X_MATH_ACC
x *= 1.5f - (y * x * x);
#endif
return x;
#endif }
inline float x_min(float a, float b) { return a < b ? a : b; }
inline float x_max(float a, float b) { return a > b ? a : b; }
inline float x_clamp(float x, float min, float max) {
return x_min(x_max(x, min), max);
}
inline float x_atan2(float y, float x) {
uint32_t nx = *(uint32_t*)&x;
nx >>= 31;
uint32_t ny = *(uint32_t*)&y;
ny &= 0x80000000;
x = x_abs(x);
y = x_abs(y);
const uint32_t p = y > x;
y = x_min(x, y) / x_max(x, y);
union {
float r;
uint32_t u;
} z = {.r = y * y};
z.r =
((h(0xbd3e7316) * z.r + h(0x3e232344)) * z.r - h(0x3ea7be2c)) * z.r * y +
y;
const uint32_t d = p * 0x3fc90fdb;
z.r -= *(float*)&d;
z.u ^= (p ^ nx) << 31;
nx *= 0x40490fdb;
z.r += *(float*)&nx;
z.u = (z.u & 0x7fffffff) | ny;
return z.r;
}
inline float x_asin(float x) {
float s = x_sign(x);
x = x_abs(x);
float z = 1.0f - x_sqrt(1.0f - x * x);
float a = x - 0.35f;
x = h(0x3f490fdb) * (x + z + 0.12f * z * z) + h(0x3d07ae14) -
h(0x3e98a3d7) * a * a;
return s * x;
}
inline float x_acos(float x) { return x_asin(-x) + h(0x3fc90fdb); }
inline float x_exp2(float x) {
#ifdef X_MATH_ACC
const int32_t n = (int32_t)(x * h(0x4b000000));
int32_t l = n & 0xff800000;
x = (float)(n - l);
x = (h(0x27aca418) * x + h(0x33a85ada)) * x + h(0x3f803884);
l += *(int32_t*)&x;
return *(float*)&l;
#else
uint32_t v = (uint32_t)(h(0x4b000000) * (x + h(0x42fde2a9)));
return *(float*)&v;
#endif }
inline float x_exp(float x) { return x_exp2(x * h(0x3fb8aa3b)); }
inline float x_sinh(float x) {
float a = h(0x3fb8aa3b) * x - 1.0f;
float b = h(0xbfb8aa3b) * x - 1.0f;
return x_exp2(a) - x_exp2(b);
}
inline float x_cosh(float x) {
float a = h(0x3fb8aa3b) * x - 1.0f;
float b = h(0xbfb8aa3b) * x - 1.0f;
return x_exp2(a) + x_exp2(b);
}
inline float x_tanh(float x) {
float s = x_sign(x);
x = x_abs(x);
if (x < 1.0f) {
float z = 0.07f * x * x;
x += (z * x + h(0xc08db6db)) * z;
} else {
x = (1.05f * x - 0.1f) * x + 1.09f;
x = 1.0f - 1.0f / (x * x);
}
return s * x;
}
inline float x_tan(float x) {
x *= h(0x3ea2f983);
x = 2.0f * (x - x_round(x));
float y = 1.0f - x * x;
return x * (h(0xbc994764) * y + h(0x3ea1b529) + h(0x3fa30738) / y);
}
inline float x_log2(float x) {
#ifdef X_MATH_ACC
uint32_t a = *(uint32_t*)&x;
int32_t c = (a >> 23) & 0xff;
uint32_t d = a & 0x7fffff;
a = (a >> 22) & 1;
c += a - 127;
d |= (a ^ 0x7f) << 23;
x = *(float*)&d;
x -= 1.0f;
float f = (float)c;
return f + x * (x * h(0xbf213248) + h(0x3fbbc593));
#else
const uint32_t y = *(uint32_t*)&x;
float z = (float)y;
z *= h(0x34000000);
return z - h(0x42fde2a9);
#endif }
#endif