#pragma once
#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma float_control(push)
#pragma float_control(precise, on)
#endif
inline bool XM_CALLCONV XMMatrixIsNaN(FXMMATRIX M) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
size_t i = 16;
auto pWork = reinterpret_cast<const uint32_t*>(&M.m[0][0]);
do {
uint32_t uTest = pWork[0];
uTest &= 0x7FFFFFFFU;
uTest -= 0x7F800001U;
if (uTest < 0x007FFFFFU)
{
break; }
++pWork; } while (--i);
return (i != 0); #elif defined(_XM_ARM_NEON_INTRINSICS_)
float32x4_t vX = M.r[0];
float32x4_t vY = M.r[1];
float32x4_t vZ = M.r[2];
float32x4_t vW = M.r[3];
uint32x4_t xmask = vmvnq_u32(vceqq_f32(vX, vX));
uint32x4_t ymask = vmvnq_u32(vceqq_f32(vY, vY));
uint32x4_t zmask = vmvnq_u32(vceqq_f32(vZ, vZ));
uint32x4_t wmask = vmvnq_u32(vceqq_f32(vW, vW));
xmask = vorrq_u32(xmask, zmask);
ymask = vorrq_u32(ymask, wmask);
xmask = vorrq_u32(xmask, ymask);
uint8x8x2_t vTemp = vzip_u8(
vget_low_u8(vreinterpretq_u8_u32(xmask)),
vget_high_u8(vreinterpretq_u8_u32(xmask)));
uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1]));
uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1);
return (r != 0);
#elif defined(_XM_SSE_INTRINSICS_)
XMVECTOR vX = M.r[0];
XMVECTOR vY = M.r[1];
XMVECTOR vZ = M.r[2];
XMVECTOR vW = M.r[3];
vX = _mm_cmpneq_ps(vX, vX);
vY = _mm_cmpneq_ps(vY, vY);
vZ = _mm_cmpneq_ps(vZ, vZ);
vW = _mm_cmpneq_ps(vW, vW);
vX = _mm_or_ps(vX, vZ);
vY = _mm_or_ps(vY, vW);
vX = _mm_or_ps(vX, vY);
return (_mm_movemask_ps(vX) != 0);
#else
#endif
}
#if !defined(_XM_NO_INTRINSICS_) && defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#pragma float_control(pop)
#endif
inline bool XM_CALLCONV XMMatrixIsInfinite(FXMMATRIX M) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
size_t i = 16;
auto pWork = reinterpret_cast<const uint32_t*>(&M.m[0][0]);
do {
uint32_t uTest = pWork[0];
uTest &= 0x7FFFFFFFU;
if (uTest == 0x7F800000U)
{
break; }
++pWork; } while (--i);
return (i != 0); #elif defined(_XM_ARM_NEON_INTRINSICS_)
float32x4_t vX = M.r[0];
float32x4_t vY = M.r[1];
float32x4_t vZ = M.r[2];
float32x4_t vW = M.r[3];
vX = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(vX), g_XMAbsMask));
vY = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(vY), g_XMAbsMask));
vZ = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(vZ), g_XMAbsMask));
vW = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(vW), g_XMAbsMask));
uint32x4_t xmask = vceqq_f32(vX, g_XMInfinity);
uint32x4_t ymask = vceqq_f32(vY, g_XMInfinity);
uint32x4_t zmask = vceqq_f32(vZ, g_XMInfinity);
uint32x4_t wmask = vceqq_f32(vW, g_XMInfinity);
xmask = vorrq_u32(xmask, zmask);
ymask = vorrq_u32(ymask, wmask);
xmask = vorrq_u32(xmask, ymask);
uint8x8x2_t vTemp = vzip_u8(
vget_low_u8(vreinterpretq_u8_u32(xmask)),
vget_high_u8(vreinterpretq_u8_u32(xmask)));
uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1]));
uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1);
return (r != 0);
#elif defined(_XM_SSE_INTRINSICS_)
XMVECTOR vTemp1 = _mm_and_ps(M.r[0], g_XMAbsMask);
XMVECTOR vTemp2 = _mm_and_ps(M.r[1], g_XMAbsMask);
XMVECTOR vTemp3 = _mm_and_ps(M.r[2], g_XMAbsMask);
XMVECTOR vTemp4 = _mm_and_ps(M.r[3], g_XMAbsMask);
vTemp1 = _mm_cmpeq_ps(vTemp1, g_XMInfinity);
vTemp2 = _mm_cmpeq_ps(vTemp2, g_XMInfinity);
vTemp3 = _mm_cmpeq_ps(vTemp3, g_XMInfinity);
vTemp4 = _mm_cmpeq_ps(vTemp4, g_XMInfinity);
vTemp1 = _mm_or_ps(vTemp1, vTemp2);
vTemp3 = _mm_or_ps(vTemp3, vTemp4);
vTemp1 = _mm_or_ps(vTemp1, vTemp3);
return (_mm_movemask_ps(vTemp1) != 0);
#endif
}
inline bool XM_CALLCONV XMMatrixIsIdentity(FXMMATRIX M) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
auto pWork = reinterpret_cast<const uint32_t*>(&M.m[0][0]);
uint32_t uOne = pWork[0] ^ 0x3F800000U;
uint32_t uZero = pWork[1];
uZero |= pWork[2];
uZero |= pWork[3];
uZero |= pWork[4];
uOne |= pWork[5] ^ 0x3F800000U;
uZero |= pWork[6];
uZero |= pWork[7];
uZero |= pWork[8];
uZero |= pWork[9];
uOne |= pWork[10] ^ 0x3F800000U;
uZero |= pWork[11];
uZero |= pWork[12];
uZero |= pWork[13];
uZero |= pWork[14];
uOne |= pWork[15] ^ 0x3F800000U;
uZero &= 0x7FFFFFFF; uOne |= uZero;
return (uOne == 0);
#elif defined(_XM_ARM_NEON_INTRINSICS_)
uint32x4_t xmask = vceqq_f32(M.r[0], g_XMIdentityR0);
uint32x4_t ymask = vceqq_f32(M.r[1], g_XMIdentityR1);
uint32x4_t zmask = vceqq_f32(M.r[2], g_XMIdentityR2);
uint32x4_t wmask = vceqq_f32(M.r[3], g_XMIdentityR3);
xmask = vandq_u32(xmask, zmask);
ymask = vandq_u32(ymask, wmask);
xmask = vandq_u32(xmask, ymask);
uint8x8x2_t vTemp = vzip_u8(vget_low_u8(vreinterpretq_u8_u32(xmask)), vget_high_u8(vreinterpretq_u8_u32(xmask)));
uint16x4x2_t vTemp2 = vzip_u16(vreinterpret_u16_u8(vTemp.val[0]), vreinterpret_u16_u8(vTemp.val[1]));
uint32_t r = vget_lane_u32(vreinterpret_u32_u16(vTemp2.val[1]), 1);
return (r == 0xFFFFFFFFU);
#elif defined(_XM_SSE_INTRINSICS_)
XMVECTOR vTemp1 = _mm_cmpeq_ps(M.r[0], g_XMIdentityR0);
XMVECTOR vTemp2 = _mm_cmpeq_ps(M.r[1], g_XMIdentityR1);
XMVECTOR vTemp3 = _mm_cmpeq_ps(M.r[2], g_XMIdentityR2);
XMVECTOR vTemp4 = _mm_cmpeq_ps(M.r[3], g_XMIdentityR3);
vTemp1 = _mm_and_ps(vTemp1, vTemp2);
vTemp3 = _mm_and_ps(vTemp3, vTemp4);
vTemp1 = _mm_and_ps(vTemp1, vTemp3);
return (_mm_movemask_ps(vTemp1) == 0x0f);
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixMultiply
(
FXMMATRIX M1,
CXMMATRIX M2
) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
XMMATRIX mResult;
float x = M1.m[0][0];
float y = M1.m[0][1];
float z = M1.m[0][2];
float w = M1.m[0][3];
mResult.m[0][0] = (M2.m[0][0] * x) + (M2.m[1][0] * y) + (M2.m[2][0] * z) + (M2.m[3][0] * w);
mResult.m[0][1] = (M2.m[0][1] * x) + (M2.m[1][1] * y) + (M2.m[2][1] * z) + (M2.m[3][1] * w);
mResult.m[0][2] = (M2.m[0][2] * x) + (M2.m[1][2] * y) + (M2.m[2][2] * z) + (M2.m[3][2] * w);
mResult.m[0][3] = (M2.m[0][3] * x) + (M2.m[1][3] * y) + (M2.m[2][3] * z) + (M2.m[3][3] * w);
x = M1.m[1][0];
y = M1.m[1][1];
z = M1.m[1][2];
w = M1.m[1][3];
mResult.m[1][0] = (M2.m[0][0] * x) + (M2.m[1][0] * y) + (M2.m[2][0] * z) + (M2.m[3][0] * w);
mResult.m[1][1] = (M2.m[0][1] * x) + (M2.m[1][1] * y) + (M2.m[2][1] * z) + (M2.m[3][1] * w);
mResult.m[1][2] = (M2.m[0][2] * x) + (M2.m[1][2] * y) + (M2.m[2][2] * z) + (M2.m[3][2] * w);
mResult.m[1][3] = (M2.m[0][3] * x) + (M2.m[1][3] * y) + (M2.m[2][3] * z) + (M2.m[3][3] * w);
x = M1.m[2][0];
y = M1.m[2][1];
z = M1.m[2][2];
w = M1.m[2][3];
mResult.m[2][0] = (M2.m[0][0] * x) + (M2.m[1][0] * y) + (M2.m[2][0] * z) + (M2.m[3][0] * w);
mResult.m[2][1] = (M2.m[0][1] * x) + (M2.m[1][1] * y) + (M2.m[2][1] * z) + (M2.m[3][1] * w);
mResult.m[2][2] = (M2.m[0][2] * x) + (M2.m[1][2] * y) + (M2.m[2][2] * z) + (M2.m[3][2] * w);
mResult.m[2][3] = (M2.m[0][3] * x) + (M2.m[1][3] * y) + (M2.m[2][3] * z) + (M2.m[3][3] * w);
x = M1.m[3][0];
y = M1.m[3][1];
z = M1.m[3][2];
w = M1.m[3][3];
mResult.m[3][0] = (M2.m[0][0] * x) + (M2.m[1][0] * y) + (M2.m[2][0] * z) + (M2.m[3][0] * w);
mResult.m[3][1] = (M2.m[0][1] * x) + (M2.m[1][1] * y) + (M2.m[2][1] * z) + (M2.m[3][1] * w);
mResult.m[3][2] = (M2.m[0][2] * x) + (M2.m[1][2] * y) + (M2.m[2][2] * z) + (M2.m[3][2] * w);
mResult.m[3][3] = (M2.m[0][3] * x) + (M2.m[1][3] * y) + (M2.m[2][3] * z) + (M2.m[3][3] * w);
return mResult;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
XMMATRIX mResult;
float32x2_t VL = vget_low_f32(M1.r[0]);
float32x2_t VH = vget_high_f32(M1.r[0]);
float32x4_t vX = vmulq_lane_f32(M2.r[0], VL, 0);
float32x4_t vY = vmulq_lane_f32(M2.r[1], VL, 1);
float32x4_t vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
float32x4_t vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
mResult.r[0] = vaddq_f32(vZ, vW);
VL = vget_low_f32(M1.r[1]);
VH = vget_high_f32(M1.r[1]);
vX = vmulq_lane_f32(M2.r[0], VL, 0);
vY = vmulq_lane_f32(M2.r[1], VL, 1);
vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
mResult.r[1] = vaddq_f32(vZ, vW);
VL = vget_low_f32(M1.r[2]);
VH = vget_high_f32(M1.r[2]);
vX = vmulq_lane_f32(M2.r[0], VL, 0);
vY = vmulq_lane_f32(M2.r[1], VL, 1);
vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
mResult.r[2] = vaddq_f32(vZ, vW);
VL = vget_low_f32(M1.r[3]);
VH = vget_high_f32(M1.r[3]);
vX = vmulq_lane_f32(M2.r[0], VL, 0);
vY = vmulq_lane_f32(M2.r[1], VL, 1);
vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
mResult.r[3] = vaddq_f32(vZ, vW);
return mResult;
#elif defined(_XM_AVX2_INTRINSICS_)
__m256 t0 = _mm256_castps128_ps256(M1.r[0]);
t0 = _mm256_insertf128_ps(t0, M1.r[1], 1);
__m256 t1 = _mm256_castps128_ps256(M1.r[2]);
t1 = _mm256_insertf128_ps(t1, M1.r[3], 1);
__m256 u0 = _mm256_castps128_ps256(M2.r[0]);
u0 = _mm256_insertf128_ps(u0, M2.r[1], 1);
__m256 u1 = _mm256_castps128_ps256(M2.r[2]);
u1 = _mm256_insertf128_ps(u1, M2.r[3], 1);
__m256 a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(0, 0, 0, 0));
__m256 a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(0, 0, 0, 0));
__m256 b0 = _mm256_permute2f128_ps(u0, u0, 0x00);
__m256 c0 = _mm256_mul_ps(a0, b0);
__m256 c1 = _mm256_mul_ps(a1, b0);
a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(1, 1, 1, 1));
a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 1, 1, 1));
b0 = _mm256_permute2f128_ps(u0, u0, 0x11);
__m256 c2 = _mm256_fmadd_ps(a0, b0, c0);
__m256 c3 = _mm256_fmadd_ps(a1, b0, c1);
a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 2));
a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 2, 2, 2));
__m256 b1 = _mm256_permute2f128_ps(u1, u1, 0x00);
__m256 c4 = _mm256_mul_ps(a0, b1);
__m256 c5 = _mm256_mul_ps(a1, b1);
a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 3, 3));
a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(3, 3, 3, 3));
b1 = _mm256_permute2f128_ps(u1, u1, 0x11);
__m256 c6 = _mm256_fmadd_ps(a0, b1, c4);
__m256 c7 = _mm256_fmadd_ps(a1, b1, c5);
t0 = _mm256_add_ps(c2, c6);
t1 = _mm256_add_ps(c3, c7);
XMMATRIX mResult;
mResult.r[0] = _mm256_castps256_ps128(t0);
mResult.r[1] = _mm256_extractf128_ps(t0, 1);
mResult.r[2] = _mm256_castps256_ps128(t1);
mResult.r[3] = _mm256_extractf128_ps(t1, 1);
return mResult;
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX mResult;
#if defined(_XM_AVX_INTRINSICS_)
XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 0);
XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 1);
XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 2);
XMVECTOR vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 3);
#else
XMVECTOR vW = M1.r[0];
XMVECTOR vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0));
XMVECTOR vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1));
XMVECTOR vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2));
vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3));
#endif
vX = _mm_mul_ps(vX, M2.r[0]);
vY = _mm_mul_ps(vY, M2.r[1]);
vZ = _mm_mul_ps(vZ, M2.r[2]);
vW = _mm_mul_ps(vW, M2.r[3]);
vX = _mm_add_ps(vX, vZ);
vY = _mm_add_ps(vY, vW);
vX = _mm_add_ps(vX, vY);
mResult.r[0] = vX;
#if defined(_XM_AVX_INTRINSICS_)
vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 0);
vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 1);
vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 2);
vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 3);
#else
vW = M1.r[1];
vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0));
vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1));
vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2));
vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3));
#endif
vX = _mm_mul_ps(vX, M2.r[0]);
vY = _mm_mul_ps(vY, M2.r[1]);
vZ = _mm_mul_ps(vZ, M2.r[2]);
vW = _mm_mul_ps(vW, M2.r[3]);
vX = _mm_add_ps(vX, vZ);
vY = _mm_add_ps(vY, vW);
vX = _mm_add_ps(vX, vY);
mResult.r[1] = vX;
#if defined(_XM_AVX_INTRINSICS_)
vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 0);
vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 1);
vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 2);
vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 3);
#else
vW = M1.r[2];
vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0));
vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1));
vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2));
vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3));
#endif
vX = _mm_mul_ps(vX, M2.r[0]);
vY = _mm_mul_ps(vY, M2.r[1]);
vZ = _mm_mul_ps(vZ, M2.r[2]);
vW = _mm_mul_ps(vW, M2.r[3]);
vX = _mm_add_ps(vX, vZ);
vY = _mm_add_ps(vY, vW);
vX = _mm_add_ps(vX, vY);
mResult.r[2] = vX;
#if defined(_XM_AVX_INTRINSICS_)
vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 0);
vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 1);
vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 2);
vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 3);
#else
vW = M1.r[3];
vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0));
vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1));
vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2));
vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3));
#endif
vX = _mm_mul_ps(vX, M2.r[0]);
vY = _mm_mul_ps(vY, M2.r[1]);
vZ = _mm_mul_ps(vZ, M2.r[2]);
vW = _mm_mul_ps(vW, M2.r[3]);
vX = _mm_add_ps(vX, vZ);
vY = _mm_add_ps(vY, vW);
vX = _mm_add_ps(vX, vY);
mResult.r[3] = vX;
return mResult;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixMultiplyTranspose
(
FXMMATRIX M1,
CXMMATRIX M2
) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
XMMATRIX mResult;
float x = M2.m[0][0];
float y = M2.m[1][0];
float z = M2.m[2][0];
float w = M2.m[3][0];
mResult.m[0][0] = (M1.m[0][0] * x) + (M1.m[0][1] * y) + (M1.m[0][2] * z) + (M1.m[0][3] * w);
mResult.m[0][1] = (M1.m[1][0] * x) + (M1.m[1][1] * y) + (M1.m[1][2] * z) + (M1.m[1][3] * w);
mResult.m[0][2] = (M1.m[2][0] * x) + (M1.m[2][1] * y) + (M1.m[2][2] * z) + (M1.m[2][3] * w);
mResult.m[0][3] = (M1.m[3][0] * x) + (M1.m[3][1] * y) + (M1.m[3][2] * z) + (M1.m[3][3] * w);
x = M2.m[0][1];
y = M2.m[1][1];
z = M2.m[2][1];
w = M2.m[3][1];
mResult.m[1][0] = (M1.m[0][0] * x) + (M1.m[0][1] * y) + (M1.m[0][2] * z) + (M1.m[0][3] * w);
mResult.m[1][1] = (M1.m[1][0] * x) + (M1.m[1][1] * y) + (M1.m[1][2] * z) + (M1.m[1][3] * w);
mResult.m[1][2] = (M1.m[2][0] * x) + (M1.m[2][1] * y) + (M1.m[2][2] * z) + (M1.m[2][3] * w);
mResult.m[1][3] = (M1.m[3][0] * x) + (M1.m[3][1] * y) + (M1.m[3][2] * z) + (M1.m[3][3] * w);
x = M2.m[0][2];
y = M2.m[1][2];
z = M2.m[2][2];
w = M2.m[3][2];
mResult.m[2][0] = (M1.m[0][0] * x) + (M1.m[0][1] * y) + (M1.m[0][2] * z) + (M1.m[0][3] * w);
mResult.m[2][1] = (M1.m[1][0] * x) + (M1.m[1][1] * y) + (M1.m[1][2] * z) + (M1.m[1][3] * w);
mResult.m[2][2] = (M1.m[2][0] * x) + (M1.m[2][1] * y) + (M1.m[2][2] * z) + (M1.m[2][3] * w);
mResult.m[2][3] = (M1.m[3][0] * x) + (M1.m[3][1] * y) + (M1.m[3][2] * z) + (M1.m[3][3] * w);
x = M2.m[0][3];
y = M2.m[1][3];
z = M2.m[2][3];
w = M2.m[3][3];
mResult.m[3][0] = (M1.m[0][0] * x) + (M1.m[0][1] * y) + (M1.m[0][2] * z) + (M1.m[0][3] * w);
mResult.m[3][1] = (M1.m[1][0] * x) + (M1.m[1][1] * y) + (M1.m[1][2] * z) + (M1.m[1][3] * w);
mResult.m[3][2] = (M1.m[2][0] * x) + (M1.m[2][1] * y) + (M1.m[2][2] * z) + (M1.m[2][3] * w);
mResult.m[3][3] = (M1.m[3][0] * x) + (M1.m[3][1] * y) + (M1.m[3][2] * z) + (M1.m[3][3] * w);
return mResult;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float32x2_t VL = vget_low_f32(M1.r[0]);
float32x2_t VH = vget_high_f32(M1.r[0]);
float32x4_t vX = vmulq_lane_f32(M2.r[0], VL, 0);
float32x4_t vY = vmulq_lane_f32(M2.r[1], VL, 1);
float32x4_t vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
float32x4_t vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
float32x4_t r0 = vaddq_f32(vZ, vW);
VL = vget_low_f32(M1.r[1]);
VH = vget_high_f32(M1.r[1]);
vX = vmulq_lane_f32(M2.r[0], VL, 0);
vY = vmulq_lane_f32(M2.r[1], VL, 1);
vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
float32x4_t r1 = vaddq_f32(vZ, vW);
VL = vget_low_f32(M1.r[2]);
VH = vget_high_f32(M1.r[2]);
vX = vmulq_lane_f32(M2.r[0], VL, 0);
vY = vmulq_lane_f32(M2.r[1], VL, 1);
vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
float32x4_t r2 = vaddq_f32(vZ, vW);
VL = vget_low_f32(M1.r[3]);
VH = vget_high_f32(M1.r[3]);
vX = vmulq_lane_f32(M2.r[0], VL, 0);
vY = vmulq_lane_f32(M2.r[1], VL, 1);
vZ = vmlaq_lane_f32(vX, M2.r[2], VH, 0);
vW = vmlaq_lane_f32(vY, M2.r[3], VH, 1);
float32x4_t r3 = vaddq_f32(vZ, vW);
float32x4x2_t P0 = vzipq_f32(r0, r2);
float32x4x2_t P1 = vzipq_f32(r1, r3);
float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]);
float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]);
XMMATRIX mResult;
mResult.r[0] = T0.val[0];
mResult.r[1] = T0.val[1];
mResult.r[2] = T1.val[0];
mResult.r[3] = T1.val[1];
return mResult;
#elif defined(_XM_AVX2_INTRINSICS_)
__m256 t0 = _mm256_castps128_ps256(M1.r[0]);
t0 = _mm256_insertf128_ps(t0, M1.r[1], 1);
__m256 t1 = _mm256_castps128_ps256(M1.r[2]);
t1 = _mm256_insertf128_ps(t1, M1.r[3], 1);
__m256 u0 = _mm256_castps128_ps256(M2.r[0]);
u0 = _mm256_insertf128_ps(u0, M2.r[1], 1);
__m256 u1 = _mm256_castps128_ps256(M2.r[2]);
u1 = _mm256_insertf128_ps(u1, M2.r[3], 1);
__m256 a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(0, 0, 0, 0));
__m256 a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(0, 0, 0, 0));
__m256 b0 = _mm256_permute2f128_ps(u0, u0, 0x00);
__m256 c0 = _mm256_mul_ps(a0, b0);
__m256 c1 = _mm256_mul_ps(a1, b0);
a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(1, 1, 1, 1));
a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(1, 1, 1, 1));
b0 = _mm256_permute2f128_ps(u0, u0, 0x11);
__m256 c2 = _mm256_fmadd_ps(a0, b0, c0);
__m256 c3 = _mm256_fmadd_ps(a1, b0, c1);
a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(2, 2, 2, 2));
a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(2, 2, 2, 2));
__m256 b1 = _mm256_permute2f128_ps(u1, u1, 0x00);
__m256 c4 = _mm256_mul_ps(a0, b1);
__m256 c5 = _mm256_mul_ps(a1, b1);
a0 = _mm256_shuffle_ps(t0, t0, _MM_SHUFFLE(3, 3, 3, 3));
a1 = _mm256_shuffle_ps(t1, t1, _MM_SHUFFLE(3, 3, 3, 3));
b1 = _mm256_permute2f128_ps(u1, u1, 0x11);
__m256 c6 = _mm256_fmadd_ps(a0, b1, c4);
__m256 c7 = _mm256_fmadd_ps(a1, b1, c5);
t0 = _mm256_add_ps(c2, c6);
t1 = _mm256_add_ps(c3, c7);
__m256 vTemp = _mm256_unpacklo_ps(t0, t1);
__m256 vTemp2 = _mm256_unpackhi_ps(t0, t1);
__m256 vTemp3 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20);
__m256 vTemp4 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31);
vTemp = _mm256_unpacklo_ps(vTemp3, vTemp4);
vTemp2 = _mm256_unpackhi_ps(vTemp3, vTemp4);
t0 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20);
t1 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31);
XMMATRIX mResult;
mResult.r[0] = _mm256_castps256_ps128(t0);
mResult.r[1] = _mm256_extractf128_ps(t0, 1);
mResult.r[2] = _mm256_castps256_ps128(t1);
mResult.r[3] = _mm256_extractf128_ps(t1, 1);
return mResult;
#elif defined(_XM_SSE_INTRINSICS_)
#if defined(_XM_AVX_INTRINSICS_)
XMVECTOR vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 0);
XMVECTOR vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 1);
XMVECTOR vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 2);
XMVECTOR vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[0]) + 3);
#else
XMVECTOR vW = M1.r[0];
XMVECTOR vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0));
XMVECTOR vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1));
XMVECTOR vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2));
vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3));
#endif
vX = _mm_mul_ps(vX, M2.r[0]);
vY = _mm_mul_ps(vY, M2.r[1]);
vZ = _mm_mul_ps(vZ, M2.r[2]);
vW = _mm_mul_ps(vW, M2.r[3]);
vX = _mm_add_ps(vX, vZ);
vY = _mm_add_ps(vY, vW);
vX = _mm_add_ps(vX, vY);
XMVECTOR r0 = vX;
#if defined(_XM_AVX_INTRINSICS_)
vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 0);
vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 1);
vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 2);
vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[1]) + 3);
#else
vW = M1.r[1];
vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0));
vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1));
vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2));
vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3));
#endif
vX = _mm_mul_ps(vX, M2.r[0]);
vY = _mm_mul_ps(vY, M2.r[1]);
vZ = _mm_mul_ps(vZ, M2.r[2]);
vW = _mm_mul_ps(vW, M2.r[3]);
vX = _mm_add_ps(vX, vZ);
vY = _mm_add_ps(vY, vW);
vX = _mm_add_ps(vX, vY);
XMVECTOR r1 = vX;
#if defined(_XM_AVX_INTRINSICS_)
vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 0);
vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 1);
vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 2);
vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[2]) + 3);
#else
vW = M1.r[2];
vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0));
vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1));
vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2));
vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3));
#endif
vX = _mm_mul_ps(vX, M2.r[0]);
vY = _mm_mul_ps(vY, M2.r[1]);
vZ = _mm_mul_ps(vZ, M2.r[2]);
vW = _mm_mul_ps(vW, M2.r[3]);
vX = _mm_add_ps(vX, vZ);
vY = _mm_add_ps(vY, vW);
vX = _mm_add_ps(vX, vY);
XMVECTOR r2 = vX;
#if defined(_XM_AVX_INTRINSICS_)
vX = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 0);
vY = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 1);
vZ = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 2);
vW = _mm_broadcast_ss(reinterpret_cast<const float*>(&M1.r[3]) + 3);
#else
vW = M1.r[3];
vX = XM_PERMUTE_PS(vW, _MM_SHUFFLE(0, 0, 0, 0));
vY = XM_PERMUTE_PS(vW, _MM_SHUFFLE(1, 1, 1, 1));
vZ = XM_PERMUTE_PS(vW, _MM_SHUFFLE(2, 2, 2, 2));
vW = XM_PERMUTE_PS(vW, _MM_SHUFFLE(3, 3, 3, 3));
#endif
vX = _mm_mul_ps(vX, M2.r[0]);
vY = _mm_mul_ps(vY, M2.r[1]);
vZ = _mm_mul_ps(vZ, M2.r[2]);
vW = _mm_mul_ps(vW, M2.r[3]);
vX = _mm_add_ps(vX, vZ);
vY = _mm_add_ps(vY, vW);
vX = _mm_add_ps(vX, vY);
XMVECTOR r3 = vX;
XMVECTOR vTemp1 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(1, 0, 1, 0));
XMVECTOR vTemp3 = _mm_shuffle_ps(r0, r1, _MM_SHUFFLE(3, 2, 3, 2));
XMVECTOR vTemp2 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(1, 0, 1, 0));
XMVECTOR vTemp4 = _mm_shuffle_ps(r2, r3, _MM_SHUFFLE(3, 2, 3, 2));
XMMATRIX mResult;
mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0));
mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1));
mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0));
mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1));
return mResult;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixTranspose(FXMMATRIX M) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
XMMATRIX P;
P.r[0] = XMVectorMergeXY(M.r[0], M.r[2]); P.r[1] = XMVectorMergeXY(M.r[1], M.r[3]); P.r[2] = XMVectorMergeZW(M.r[0], M.r[2]); P.r[3] = XMVectorMergeZW(M.r[1], M.r[3]);
XMMATRIX MT;
MT.r[0] = XMVectorMergeXY(P.r[0], P.r[1]); MT.r[1] = XMVectorMergeZW(P.r[0], P.r[1]); MT.r[2] = XMVectorMergeXY(P.r[2], P.r[3]); MT.r[3] = XMVectorMergeZW(P.r[2], P.r[3]); return MT;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float32x4x2_t P0 = vzipq_f32(M.r[0], M.r[2]);
float32x4x2_t P1 = vzipq_f32(M.r[1], M.r[3]);
float32x4x2_t T0 = vzipq_f32(P0.val[0], P1.val[0]);
float32x4x2_t T1 = vzipq_f32(P0.val[1], P1.val[1]);
XMMATRIX mResult;
mResult.r[0] = T0.val[0];
mResult.r[1] = T0.val[1];
mResult.r[2] = T1.val[0];
mResult.r[3] = T1.val[1];
return mResult;
#elif defined(_XM_AVX2_INTRINSICS_)
__m256 t0 = _mm256_castps128_ps256(M.r[0]);
t0 = _mm256_insertf128_ps(t0, M.r[1], 1);
__m256 t1 = _mm256_castps128_ps256(M.r[2]);
t1 = _mm256_insertf128_ps(t1, M.r[3], 1);
__m256 vTemp = _mm256_unpacklo_ps(t0, t1);
__m256 vTemp2 = _mm256_unpackhi_ps(t0, t1);
__m256 vTemp3 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20);
__m256 vTemp4 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31);
vTemp = _mm256_unpacklo_ps(vTemp3, vTemp4);
vTemp2 = _mm256_unpackhi_ps(vTemp3, vTemp4);
t0 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x20);
t1 = _mm256_permute2f128_ps(vTemp, vTemp2, 0x31);
XMMATRIX mResult;
mResult.r[0] = _mm256_castps256_ps128(t0);
mResult.r[1] = _mm256_extractf128_ps(t0, 1);
mResult.r[2] = _mm256_castps256_ps128(t1);
mResult.r[3] = _mm256_extractf128_ps(t1, 1);
return mResult;
#elif defined(_XM_SSE_INTRINSICS_)
XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0));
XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2));
XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0));
XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2));
XMMATRIX mResult;
mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0));
mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1));
mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0));
mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1));
return mResult;
#endif
}
_Use_decl_annotations_
inline XMMATRIX XM_CALLCONV XMMatrixInverse
(
XMVECTOR* pDeterminant,
FXMMATRIX M
) noexcept
{
#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
XMMATRIX MT = XMMatrixTranspose(M);
XMVECTOR V0[4], V1[4];
V0[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[2]);
V1[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[3]);
V0[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[0]);
V1[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[1]);
V0[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[2], MT.r[0]);
V1[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[3], MT.r[1]);
XMVECTOR D0 = XMVectorMultiply(V0[0], V1[0]);
XMVECTOR D1 = XMVectorMultiply(V0[1], V1[1]);
XMVECTOR D2 = XMVectorMultiply(V0[2], V1[2]);
V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[2]);
V1[0] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[3]);
V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W>(MT.r[0]);
V1[1] = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(MT.r[1]);
V0[2] = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_1W>(MT.r[2], MT.r[0]);
V1[2] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_0Z, XM_PERMUTE_1X, XM_PERMUTE_1Z>(MT.r[3], MT.r[1]);
D0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], D0);
D1 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], D1);
D2 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], D2);
V0[0] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[1]);
V1[0] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D0, D2);
V0[1] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[0]);
V1[1] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D0, D2);
V0[2] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y>(MT.r[3]);
V1[2] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0W, XM_PERMUTE_0X>(D1, D2);
V0[3] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_X>(MT.r[2]);
V1[3] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_1W, XM_PERMUTE_0Y, XM_PERMUTE_0Z>(D1, D2);
XMVECTOR C0 = XMVectorMultiply(V0[0], V1[0]);
XMVECTOR C2 = XMVectorMultiply(V0[1], V1[1]);
XMVECTOR C4 = XMVectorMultiply(V0[2], V1[2]);
XMVECTOR C6 = XMVectorMultiply(V0[3], V1[3]);
V0[0] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[1]);
V1[0] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(D0, D2);
V0[1] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[0]);
V1[1] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0X>(D0, D2);
V0[2] = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y, XM_SWIZZLE_Z>(MT.r[3]);
V1[2] = XMVectorPermute<XM_PERMUTE_0W, XM_PERMUTE_0X, XM_PERMUTE_0Y, XM_PERMUTE_1Z>(D1, D2);
V0[3] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_Z, XM_SWIZZLE_W, XM_SWIZZLE_Y>(MT.r[2]);
V1[3] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(D1, D2);
C0 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
C4 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
V0[0] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[1]);
V1[0] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1X, XM_PERMUTE_0Z>(D0, D2);
V0[1] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[0]);
V1[1] = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1X>(D0, D2);
V0[2] = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_W, XM_SWIZZLE_X>(MT.r[3]);
V1[2] = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1W, XM_PERMUTE_1Z, XM_PERMUTE_0Z>(D1, D2);
V0[3] = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_W, XM_SWIZZLE_X, XM_SWIZZLE_Z>(MT.r[2]);
V1[3] = XMVectorPermute<XM_PERMUTE_1W, XM_PERMUTE_0X, XM_PERMUTE_0W, XM_PERMUTE_1Z>(D1, D2);
XMVECTOR C1 = XMVectorNegativeMultiplySubtract(V0[0], V1[0], C0);
C0 = XMVectorMultiplyAdd(V0[0], V1[0], C0);
XMVECTOR C3 = XMVectorMultiplyAdd(V0[1], V1[1], C2);
C2 = XMVectorNegativeMultiplySubtract(V0[1], V1[1], C2);
XMVECTOR C5 = XMVectorNegativeMultiplySubtract(V0[2], V1[2], C4);
C4 = XMVectorMultiplyAdd(V0[2], V1[2], C4);
XMVECTOR C7 = XMVectorMultiplyAdd(V0[3], V1[3], C6);
C6 = XMVectorNegativeMultiplySubtract(V0[3], V1[3], C6);
XMMATRIX R;
R.r[0] = XMVectorSelect(C0, C1, g_XMSelect0101.v);
R.r[1] = XMVectorSelect(C2, C3, g_XMSelect0101.v);
R.r[2] = XMVectorSelect(C4, C5, g_XMSelect0101.v);
R.r[3] = XMVectorSelect(C6, C7, g_XMSelect0101.v);
XMVECTOR Determinant = XMVector4Dot(R.r[0], MT.r[0]);
if (pDeterminant != nullptr)
*pDeterminant = Determinant;
XMVECTOR Reciprocal = XMVectorReciprocal(Determinant);
XMMATRIX Result;
Result.r[0] = XMVectorMultiply(R.r[0], Reciprocal);
Result.r[1] = XMVectorMultiply(R.r[1], Reciprocal);
Result.r[2] = XMVectorMultiply(R.r[2], Reciprocal);
Result.r[3] = XMVectorMultiply(R.r[3], Reciprocal);
return Result;
#elif defined(_XM_SSE_INTRINSICS_)
XMVECTOR vTemp1 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0));
XMVECTOR vTemp3 = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2));
XMVECTOR vTemp2 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0));
XMVECTOR vTemp4 = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2));
XMMATRIX MT;
MT.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0));
MT.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1));
MT.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0));
MT.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1));
XMVECTOR V00 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(1, 1, 0, 0));
XMVECTOR V10 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(3, 2, 3, 2));
XMVECTOR V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(1, 1, 0, 0));
XMVECTOR V11 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(3, 2, 3, 2));
XMVECTOR V02 = _mm_shuffle_ps(MT.r[2], MT.r[0], _MM_SHUFFLE(2, 0, 2, 0));
XMVECTOR V12 = _mm_shuffle_ps(MT.r[3], MT.r[1], _MM_SHUFFLE(3, 1, 3, 1));
XMVECTOR D0 = _mm_mul_ps(V00, V10);
XMVECTOR D1 = _mm_mul_ps(V01, V11);
XMVECTOR D2 = _mm_mul_ps(V02, V12);
V00 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(3, 2, 3, 2));
V10 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(1, 1, 0, 0));
V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(3, 2, 3, 2));
V11 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(1, 1, 0, 0));
V02 = _mm_shuffle_ps(MT.r[2], MT.r[0], _MM_SHUFFLE(3, 1, 3, 1));
V12 = _mm_shuffle_ps(MT.r[3], MT.r[1], _MM_SHUFFLE(2, 0, 2, 0));
D0 = XM_FNMADD_PS(V00, V10, D0);
D1 = XM_FNMADD_PS(V01, V11, D1);
D2 = XM_FNMADD_PS(V02, V12, D2);
V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 1, 3, 1));
V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(1, 0, 2, 1));
V10 = _mm_shuffle_ps(V11, D0, _MM_SHUFFLE(0, 3, 0, 2));
V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(0, 1, 0, 2));
V11 = _mm_shuffle_ps(V11, D0, _MM_SHUFFLE(2, 1, 2, 1));
XMVECTOR V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 3, 3, 1));
V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(1, 0, 2, 1));
V12 = _mm_shuffle_ps(V13, D1, _MM_SHUFFLE(0, 3, 0, 2));
XMVECTOR V03 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(0, 1, 0, 2));
V13 = _mm_shuffle_ps(V13, D1, _MM_SHUFFLE(2, 1, 2, 1));
XMVECTOR C0 = _mm_mul_ps(V00, V10);
XMVECTOR C2 = _mm_mul_ps(V01, V11);
XMVECTOR C4 = _mm_mul_ps(V02, V12);
XMVECTOR C6 = _mm_mul_ps(V03, V13);
V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(0, 0, 1, 0));
V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(2, 1, 3, 2));
V10 = _mm_shuffle_ps(D0, V11, _MM_SHUFFLE(2, 1, 0, 3));
V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(1, 3, 2, 3));
V11 = _mm_shuffle_ps(D0, V11, _MM_SHUFFLE(0, 2, 1, 2));
V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(2, 2, 1, 0));
V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(2, 1, 3, 2));
V12 = _mm_shuffle_ps(D1, V13, _MM_SHUFFLE(2, 1, 0, 3));
V03 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(1, 3, 2, 3));
V13 = _mm_shuffle_ps(D1, V13, _MM_SHUFFLE(0, 2, 1, 2));
C0 = XM_FNMADD_PS(V00, V10, C0);
C2 = XM_FNMADD_PS(V01, V11, C2);
C4 = XM_FNMADD_PS(V02, V12, C4);
C6 = XM_FNMADD_PS(V03, V13, C6);
V00 = XM_PERMUTE_PS(MT.r[1], _MM_SHUFFLE(0, 3, 0, 3));
V10 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 0, 2, 2));
V10 = XM_PERMUTE_PS(V10, _MM_SHUFFLE(0, 2, 3, 0));
V01 = XM_PERMUTE_PS(MT.r[0], _MM_SHUFFLE(2, 0, 3, 1));
V11 = _mm_shuffle_ps(D0, D2, _MM_SHUFFLE(1, 0, 3, 0));
V11 = XM_PERMUTE_PS(V11, _MM_SHUFFLE(2, 1, 0, 3));
V02 = XM_PERMUTE_PS(MT.r[3], _MM_SHUFFLE(0, 3, 0, 3));
V12 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 2, 2, 2));
V12 = XM_PERMUTE_PS(V12, _MM_SHUFFLE(0, 2, 3, 0));
V03 = XM_PERMUTE_PS(MT.r[2], _MM_SHUFFLE(2, 0, 3, 1));
V13 = _mm_shuffle_ps(D1, D2, _MM_SHUFFLE(3, 2, 3, 0));
V13 = XM_PERMUTE_PS(V13, _MM_SHUFFLE(2, 1, 0, 3));
V00 = _mm_mul_ps(V00, V10);
V01 = _mm_mul_ps(V01, V11);
V02 = _mm_mul_ps(V02, V12);
V03 = _mm_mul_ps(V03, V13);
XMVECTOR C1 = _mm_sub_ps(C0, V00);
C0 = _mm_add_ps(C0, V00);
XMVECTOR C3 = _mm_add_ps(C2, V01);
C2 = _mm_sub_ps(C2, V01);
XMVECTOR C5 = _mm_sub_ps(C4, V02);
C4 = _mm_add_ps(C4, V02);
XMVECTOR C7 = _mm_add_ps(C6, V03);
C6 = _mm_sub_ps(C6, V03);
C0 = _mm_shuffle_ps(C0, C1, _MM_SHUFFLE(3, 1, 2, 0));
C2 = _mm_shuffle_ps(C2, C3, _MM_SHUFFLE(3, 1, 2, 0));
C4 = _mm_shuffle_ps(C4, C5, _MM_SHUFFLE(3, 1, 2, 0));
C6 = _mm_shuffle_ps(C6, C7, _MM_SHUFFLE(3, 1, 2, 0));
C0 = XM_PERMUTE_PS(C0, _MM_SHUFFLE(3, 1, 2, 0));
C2 = XM_PERMUTE_PS(C2, _MM_SHUFFLE(3, 1, 2, 0));
C4 = XM_PERMUTE_PS(C4, _MM_SHUFFLE(3, 1, 2, 0));
C6 = XM_PERMUTE_PS(C6, _MM_SHUFFLE(3, 1, 2, 0));
XMVECTOR vTemp = XMVector4Dot(C0, MT.r[0]);
if (pDeterminant != nullptr)
*pDeterminant = vTemp;
vTemp = _mm_div_ps(g_XMOne, vTemp);
XMMATRIX mResult;
mResult.r[0] = _mm_mul_ps(C0, vTemp);
mResult.r[1] = _mm_mul_ps(C2, vTemp);
mResult.r[2] = _mm_mul_ps(C4, vTemp);
mResult.r[3] = _mm_mul_ps(C6, vTemp);
return mResult;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixVectorTensorProduct
(
FXMVECTOR V1,
FXMVECTOR V2
) noexcept
{
XMMATRIX mResult;
mResult.r[0] = XMVectorMultiply(XMVectorSwizzle<0, 0, 0, 0>(V1), V2);
mResult.r[1] = XMVectorMultiply(XMVectorSwizzle<1, 1, 1, 1>(V1), V2);
mResult.r[2] = XMVectorMultiply(XMVectorSwizzle<2, 2, 2, 2>(V1), V2);
mResult.r[3] = XMVectorMultiply(XMVectorSwizzle<3, 3, 3, 3>(V1), V2);
return mResult;
}
inline XMVECTOR XM_CALLCONV XMMatrixDeterminant(FXMMATRIX M) noexcept
{
static const XMVECTORF32 Sign = { { { 1.0f, -1.0f, 1.0f, -1.0f } } };
XMVECTOR V0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]);
XMVECTOR V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]);
XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[2]);
XMVECTOR V3 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]);
XMVECTOR V4 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]);
XMVECTOR V5 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[3]);
XMVECTOR P0 = XMVectorMultiply(V0, V1);
XMVECTOR P1 = XMVectorMultiply(V2, V3);
XMVECTOR P2 = XMVectorMultiply(V4, V5);
V0 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[2]);
V1 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]);
V2 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]);
V3 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[3]);
V4 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[2]);
V5 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[3]);
P0 = XMVectorNegativeMultiplySubtract(V0, V1, P0);
P1 = XMVectorNegativeMultiplySubtract(V2, V3, P1);
P2 = XMVectorNegativeMultiplySubtract(V4, V5, P2);
V0 = XMVectorSwizzle<XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_W, XM_SWIZZLE_Z>(M.r[1]);
V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Y>(M.r[1]);
V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_X>(M.r[1]);
XMVECTOR S = XMVectorMultiply(M.r[0], Sign.v);
XMVECTOR R = XMVectorMultiply(V0, P0);
R = XMVectorNegativeMultiplySubtract(V1, P1, R);
R = XMVectorMultiplyAdd(V2, P2, R);
return XMVector4Dot(S, R);
}
#define XM3RANKDECOMPOSE(a, b, c, x, y, z) \
if((x) < (y)) \
{ \
if((y) < (z)) \
{ \
(a) = 2; \
(b) = 1; \
(c) = 0; \
} \
else \
{ \
(a) = 1; \
\
if((x) < (z)) \
{ \
(b) = 2; \
(c) = 0; \
} \
else \
{ \
(b) = 0; \
(c) = 2; \
} \
} \
} \
else \
{ \
if((x) < (z)) \
{ \
(a) = 2; \
(b) = 0; \
(c) = 1; \
} \
else \
{ \
(a) = 0; \
\
if((y) < (z)) \
{ \
(b) = 2; \
(c) = 1; \
} \
else \
{ \
(b) = 1; \
(c) = 2; \
} \
} \
}
#define XM3_DECOMP_EPSILON 0.0001f
_Use_decl_annotations_
inline bool XM_CALLCONV XMMatrixDecompose
(
XMVECTOR* outScale,
XMVECTOR* outRotQuat,
XMVECTOR* outTrans,
FXMMATRIX M
) noexcept
{
static const XMVECTOR* pvCanonicalBasis[3] = {
&g_XMIdentityR0.v,
&g_XMIdentityR1.v,
&g_XMIdentityR2.v
};
assert(outScale != nullptr);
assert(outRotQuat != nullptr);
assert(outTrans != nullptr);
outTrans[0] = M.r[3];
XMVECTOR* ppvBasis[3];
XMMATRIX matTemp;
ppvBasis[0] = &matTemp.r[0];
ppvBasis[1] = &matTemp.r[1];
ppvBasis[2] = &matTemp.r[2];
matTemp.r[0] = M.r[0];
matTemp.r[1] = M.r[1];
matTemp.r[2] = M.r[2];
matTemp.r[3] = g_XMIdentityR3.v;
auto pfScales = reinterpret_cast<float*>(outScale);
size_t a, b, c;
XMVectorGetXPtr(&pfScales[0], XMVector3Length(ppvBasis[0][0]));
XMVectorGetXPtr(&pfScales[1], XMVector3Length(ppvBasis[1][0]));
XMVectorGetXPtr(&pfScales[2], XMVector3Length(ppvBasis[2][0]));
pfScales[3] = 0.f;
XM3RANKDECOMPOSE(a, b, c, pfScales[0], pfScales[1], pfScales[2])
if (pfScales[a] < XM3_DECOMP_EPSILON)
{
ppvBasis[a][0] = pvCanonicalBasis[a][0];
}
ppvBasis[a][0] = XMVector3Normalize(ppvBasis[a][0]);
if (pfScales[b] < XM3_DECOMP_EPSILON)
{
size_t aa, bb, cc;
float fAbsX, fAbsY, fAbsZ;
fAbsX = fabsf(XMVectorGetX(ppvBasis[a][0]));
fAbsY = fabsf(XMVectorGetY(ppvBasis[a][0]));
fAbsZ = fabsf(XMVectorGetZ(ppvBasis[a][0]));
XM3RANKDECOMPOSE(aa, bb, cc, fAbsX, fAbsY, fAbsZ)
ppvBasis[b][0] = XMVector3Cross(ppvBasis[a][0], pvCanonicalBasis[cc][0]);
}
ppvBasis[b][0] = XMVector3Normalize(ppvBasis[b][0]);
if (pfScales[c] < XM3_DECOMP_EPSILON)
{
ppvBasis[c][0] = XMVector3Cross(ppvBasis[a][0], ppvBasis[b][0]);
}
ppvBasis[c][0] = XMVector3Normalize(ppvBasis[c][0]);
float fDet = XMVectorGetX(XMMatrixDeterminant(matTemp));
if (fDet < 0.0f)
{
pfScales[a] = -pfScales[a];
ppvBasis[a][0] = XMVectorNegate(ppvBasis[a][0]);
fDet = -fDet;
}
fDet -= 1.0f;
fDet *= fDet;
if (XM3_DECOMP_EPSILON < fDet)
{
return false;
}
outRotQuat[0] = XMQuaternionRotationMatrix(matTemp);
return true;
}
#undef XM3_DECOMP_EPSILON
#undef XM3RANKDECOMPOSE
inline XMMATRIX XM_CALLCONV XMMatrixIdentity() noexcept
{
XMMATRIX M;
M.r[0] = g_XMIdentityR0.v;
M.r[1] = g_XMIdentityR1.v;
M.r[2] = g_XMIdentityR2.v;
M.r[3] = g_XMIdentityR3.v;
return M;
}
inline XMMATRIX XM_CALLCONV XMMatrixSet
(
float m00, float m01, float m02, float m03,
float m10, float m11, float m12, float m13,
float m20, float m21, float m22, float m23,
float m30, float m31, float m32, float m33
) noexcept
{
XMMATRIX M;
#if defined(_XM_NO_INTRINSICS_)
M.m[0][0] = m00; M.m[0][1] = m01; M.m[0][2] = m02; M.m[0][3] = m03;
M.m[1][0] = m10; M.m[1][1] = m11; M.m[1][2] = m12; M.m[1][3] = m13;
M.m[2][0] = m20; M.m[2][1] = m21; M.m[2][2] = m22; M.m[2][3] = m23;
M.m[3][0] = m30; M.m[3][1] = m31; M.m[3][2] = m32; M.m[3][3] = m33;
#else
M.r[0] = XMVectorSet(m00, m01, m02, m03);
M.r[1] = XMVectorSet(m10, m11, m12, m13);
M.r[2] = XMVectorSet(m20, m21, m22, m23);
M.r[3] = XMVectorSet(m30, m31, m32, m33);
#endif
return M;
}
inline XMMATRIX XM_CALLCONV XMMatrixTranslation
(
float OffsetX,
float OffsetY,
float OffsetZ
) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
XMMATRIX M;
M.m[0][0] = 1.0f;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = 1.0f;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = 0.0f;
M.m[2][2] = 1.0f;
M.m[2][3] = 0.0f;
M.m[3][0] = OffsetX;
M.m[3][1] = OffsetY;
M.m[3][2] = OffsetZ;
M.m[3][3] = 1.0f;
return M;
#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
XMMATRIX M;
M.r[0] = g_XMIdentityR0.v;
M.r[1] = g_XMIdentityR1.v;
M.r[2] = g_XMIdentityR2.v;
M.r[3] = XMVectorSet(OffsetX, OffsetY, OffsetZ, 1.f);
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixTranslationFromVector(FXMVECTOR Offset) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
XMMATRIX M;
M.m[0][0] = 1.0f;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = 1.0f;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = 0.0f;
M.m[2][2] = 1.0f;
M.m[2][3] = 0.0f;
M.m[3][0] = Offset.vector4_f32[0];
M.m[3][1] = Offset.vector4_f32[1];
M.m[3][2] = Offset.vector4_f32[2];
M.m[3][3] = 1.0f;
return M;
#elif defined(_XM_SSE_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
XMMATRIX M;
M.r[0] = g_XMIdentityR0.v;
M.r[1] = g_XMIdentityR1.v;
M.r[2] = g_XMIdentityR2.v;
M.r[3] = XMVectorSelect(g_XMIdentityR3.v, Offset, g_XMSelect1110.v);
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixScaling
(
float ScaleX,
float ScaleY,
float ScaleZ
) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
XMMATRIX M;
M.m[0][0] = ScaleX;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = ScaleY;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = 0.0f;
M.m[2][2] = ScaleZ;
M.m[2][3] = 0.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = 0.0f;
M.m[3][3] = 1.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
const XMVECTOR Zero = vdupq_n_f32(0);
XMMATRIX M;
M.r[0] = vsetq_lane_f32(ScaleX, Zero, 0);
M.r[1] = vsetq_lane_f32(ScaleY, Zero, 1);
M.r[2] = vsetq_lane_f32(ScaleZ, Zero, 2);
M.r[3] = g_XMIdentityR3.v;
return M;
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX M;
M.r[0] = _mm_set_ps(0, 0, 0, ScaleX);
M.r[1] = _mm_set_ps(0, 0, ScaleY, 0);
M.r[2] = _mm_set_ps(0, ScaleZ, 0, 0);
M.r[3] = g_XMIdentityR3.v;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixScalingFromVector(FXMVECTOR Scale) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
XMMATRIX M;
M.m[0][0] = Scale.vector4_f32[0];
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = Scale.vector4_f32[1];
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = 0.0f;
M.m[2][2] = Scale.vector4_f32[2];
M.m[2][3] = 0.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = 0.0f;
M.m[3][3] = 1.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
XMMATRIX M;
M.r[0] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(Scale), g_XMMaskX));
M.r[1] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(Scale), g_XMMaskY));
M.r[2] = vreinterpretq_f32_u32(vandq_u32(vreinterpretq_u32_f32(Scale), g_XMMaskZ));
M.r[3] = g_XMIdentityR3.v;
return M;
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX M;
M.r[0] = _mm_and_ps(Scale, g_XMMaskX);
M.r[1] = _mm_and_ps(Scale, g_XMMaskY);
M.r[2] = _mm_and_ps(Scale, g_XMMaskZ);
M.r[3] = g_XMIdentityR3.v;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixRotationX(float Angle) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
float fSinAngle;
float fCosAngle;
XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
XMMATRIX M;
M.m[0][0] = 1.0f;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = fCosAngle;
M.m[1][2] = fSinAngle;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = -fSinAngle;
M.m[2][2] = fCosAngle;
M.m[2][3] = 0.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = 0.0f;
M.m[3][3] = 1.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float fSinAngle;
float fCosAngle;
XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
const float32x4_t Zero = vdupq_n_f32(0);
float32x4_t T1 = vsetq_lane_f32(fCosAngle, Zero, 1);
T1 = vsetq_lane_f32(fSinAngle, T1, 2);
float32x4_t T2 = vsetq_lane_f32(-fSinAngle, Zero, 1);
T2 = vsetq_lane_f32(fCosAngle, T2, 2);
XMMATRIX M;
M.r[0] = g_XMIdentityR0.v;
M.r[1] = T1;
M.r[2] = T2;
M.r[3] = g_XMIdentityR3.v;
return M;
#elif defined(_XM_SSE_INTRINSICS_)
float SinAngle;
float CosAngle;
XMScalarSinCos(&SinAngle, &CosAngle, Angle);
XMVECTOR vSin = _mm_set_ss(SinAngle);
XMVECTOR vCos = _mm_set_ss(CosAngle);
vCos = _mm_shuffle_ps(vCos, vSin, _MM_SHUFFLE(3, 0, 0, 3));
XMMATRIX M;
M.r[0] = g_XMIdentityR0;
M.r[1] = vCos;
vCos = XM_PERMUTE_PS(vCos, _MM_SHUFFLE(3, 1, 2, 0));
vCos = _mm_mul_ps(vCos, g_XMNegateY);
M.r[2] = vCos;
M.r[3] = g_XMIdentityR3;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixRotationY(float Angle) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
float fSinAngle;
float fCosAngle;
XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
XMMATRIX M;
M.m[0][0] = fCosAngle;
M.m[0][1] = 0.0f;
M.m[0][2] = -fSinAngle;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = 1.0f;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = fSinAngle;
M.m[2][1] = 0.0f;
M.m[2][2] = fCosAngle;
M.m[2][3] = 0.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = 0.0f;
M.m[3][3] = 1.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float fSinAngle;
float fCosAngle;
XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
const float32x4_t Zero = vdupq_n_f32(0);
float32x4_t T0 = vsetq_lane_f32(fCosAngle, Zero, 0);
T0 = vsetq_lane_f32(-fSinAngle, T0, 2);
float32x4_t T2 = vsetq_lane_f32(fSinAngle, Zero, 0);
T2 = vsetq_lane_f32(fCosAngle, T2, 2);
XMMATRIX M;
M.r[0] = T0;
M.r[1] = g_XMIdentityR1.v;
M.r[2] = T2;
M.r[3] = g_XMIdentityR3.v;
return M;
#elif defined(_XM_SSE_INTRINSICS_)
float SinAngle;
float CosAngle;
XMScalarSinCos(&SinAngle, &CosAngle, Angle);
XMVECTOR vSin = _mm_set_ss(SinAngle);
XMVECTOR vCos = _mm_set_ss(CosAngle);
vSin = _mm_shuffle_ps(vSin, vCos, _MM_SHUFFLE(3, 0, 3, 0));
XMMATRIX M;
M.r[2] = vSin;
M.r[1] = g_XMIdentityR1;
vSin = XM_PERMUTE_PS(vSin, _MM_SHUFFLE(3, 0, 1, 2));
vSin = _mm_mul_ps(vSin, g_XMNegateZ);
M.r[0] = vSin;
M.r[3] = g_XMIdentityR3;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixRotationZ(float Angle) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
float fSinAngle;
float fCosAngle;
XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
XMMATRIX M;
M.m[0][0] = fCosAngle;
M.m[0][1] = fSinAngle;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = -fSinAngle;
M.m[1][1] = fCosAngle;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = 0.0f;
M.m[2][2] = 1.0f;
M.m[2][3] = 0.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = 0.0f;
M.m[3][3] = 1.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float fSinAngle;
float fCosAngle;
XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
const float32x4_t Zero = vdupq_n_f32(0);
float32x4_t T0 = vsetq_lane_f32(fCosAngle, Zero, 0);
T0 = vsetq_lane_f32(fSinAngle, T0, 1);
float32x4_t T1 = vsetq_lane_f32(-fSinAngle, Zero, 0);
T1 = vsetq_lane_f32(fCosAngle, T1, 1);
XMMATRIX M;
M.r[0] = T0;
M.r[1] = T1;
M.r[2] = g_XMIdentityR2.v;
M.r[3] = g_XMIdentityR3.v;
return M;
#elif defined(_XM_SSE_INTRINSICS_)
float SinAngle;
float CosAngle;
XMScalarSinCos(&SinAngle, &CosAngle, Angle);
XMVECTOR vSin = _mm_set_ss(SinAngle);
XMVECTOR vCos = _mm_set_ss(CosAngle);
vCos = _mm_unpacklo_ps(vCos, vSin);
XMMATRIX M;
M.r[0] = vCos;
vCos = XM_PERMUTE_PS(vCos, _MM_SHUFFLE(3, 2, 0, 1));
vCos = _mm_mul_ps(vCos, g_XMNegateX);
M.r[1] = vCos;
M.r[2] = g_XMIdentityR2;
M.r[3] = g_XMIdentityR3;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYaw
(
float Pitch,
float Yaw,
float Roll
) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
float cp = cosf(Pitch);
float sp = sinf(Pitch);
float cy = cosf(Yaw);
float sy = sinf(Yaw);
float cr = cosf(Roll);
float sr = sinf(Roll);
XMMATRIX M;
M.m[0][0] = cr * cy + sr * sp * sy;
M.m[0][1] = sr * cp;
M.m[0][2] = sr * sp * cy - cr * sy;
M.m[0][3] = 0.0f;
M.m[1][0] = cr * sp * sy - sr * cy;
M.m[1][1] = cr * cp;
M.m[1][2] = sr * sy + cr * sp * cy;
M.m[1][3] = 0.0f;
M.m[2][0] = cp * sy;
M.m[2][1] = -sp;
M.m[2][2] = cp * cy;
M.m[2][3] = 0.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = 0.0f;
M.m[3][3] = 1.0f;
return M;
#else
XMVECTOR Angles = XMVectorSet(Pitch, Yaw, Roll, 0.0f);
return XMMatrixRotationRollPitchYawFromVector(Angles);
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixRotationRollPitchYawFromVector
(
FXMVECTOR Angles ) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
float cp = cosf(Angles.vector4_f32[0]);
float sp = sinf(Angles.vector4_f32[0]);
float cy = cosf(Angles.vector4_f32[1]);
float sy = sinf(Angles.vector4_f32[1]);
float cr = cosf(Angles.vector4_f32[2]);
float sr = sinf(Angles.vector4_f32[2]);
XMMATRIX M;
M.m[0][0] = cr * cy + sr * sp * sy;
M.m[0][1] = sr * cp;
M.m[0][2] = sr * sp * cy - cr * sy;
M.m[0][3] = 0.0f;
M.m[1][0] = cr * sp * sy - sr * cy;
M.m[1][1] = cr * cp;
M.m[1][2] = sr * sy + cr * sp * cy;
M.m[1][3] = 0.0f;
M.m[2][0] = cp * sy;
M.m[2][1] = -sp;
M.m[2][2] = cp * cy;
M.m[2][3] = 0.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = 0.0f;
M.m[3][3] = 1.0f;
return M;
#else
static const XMVECTORF32 Sign = { { { 1.0f, -1.0f, -1.0f, 1.0f } } };
XMVECTOR SinAngles, CosAngles;
XMVectorSinCos(&SinAngles, &CosAngles, Angles);
XMVECTOR P0 = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_0Z, XM_PERMUTE_1Z, XM_PERMUTE_1X>(SinAngles, CosAngles);
XMVECTOR Y0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_1X, XM_PERMUTE_1Y>(SinAngles, CosAngles);
XMVECTOR P1 = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z>(SinAngles, CosAngles);
XMVECTOR Y1 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_1Y, XM_PERMUTE_0Y, XM_PERMUTE_0Y>(SinAngles, CosAngles);
XMVECTOR P2 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Z, XM_PERMUTE_0Z, XM_PERMUTE_1Z>(SinAngles, CosAngles);
XMVECTOR P3 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0Y, XM_PERMUTE_1Y, XM_PERMUTE_1Y>(SinAngles, CosAngles);
XMVECTOR Y2 = XMVectorSplatX(SinAngles);
XMVECTOR NS = XMVectorNegate(SinAngles);
XMVECTOR Q0 = XMVectorMultiply(P0, Y0);
XMVECTOR Q1 = XMVectorMultiply(P1, Sign.v);
Q1 = XMVectorMultiply(Q1, Y1);
XMVECTOR Q2 = XMVectorMultiply(P2, Y2);
Q2 = XMVectorMultiplyAdd(Q2, P3, Q1);
XMVECTOR V0 = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1Z, XM_PERMUTE_0W>(Q0, Q2);
XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_1W, XM_PERMUTE_0W>(Q0, Q2);
XMVECTOR V2 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_0W, XM_PERMUTE_0W>(Q0, NS);
XMMATRIX M;
M.r[0] = XMVectorSelect(g_XMZero, V0, g_XMSelect1110.v);
M.r[1] = XMVectorSelect(g_XMZero, V1, g_XMSelect1110.v);
M.r[2] = XMVectorSelect(g_XMZero, V2, g_XMSelect1110.v);
M.r[3] = g_XMIdentityR3;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixRotationNormal
(
FXMVECTOR NormalAxis,
float Angle
) noexcept
{
#if defined(_XM_NO_INTRINSICS_) || defined(_XM_ARM_NEON_INTRINSICS_)
float fSinAngle;
float fCosAngle;
XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
XMVECTOR A = XMVectorSet(fSinAngle, fCosAngle, 1.0f - fCosAngle, 0.0f);
XMVECTOR C2 = XMVectorSplatZ(A);
XMVECTOR C1 = XMVectorSplatY(A);
XMVECTOR C0 = XMVectorSplatX(A);
XMVECTOR N0 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(NormalAxis);
XMVECTOR N1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(NormalAxis);
XMVECTOR V0 = XMVectorMultiply(C2, N0);
V0 = XMVectorMultiply(V0, N1);
XMVECTOR R0 = XMVectorMultiply(C2, NormalAxis);
R0 = XMVectorMultiplyAdd(R0, NormalAxis, C1);
XMVECTOR R1 = XMVectorMultiplyAdd(C0, NormalAxis, V0);
XMVECTOR R2 = XMVectorNegativeMultiplySubtract(C0, NormalAxis, V0);
V0 = XMVectorSelect(A, R0, g_XMSelect1110.v);
XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_1Y, XM_PERMUTE_1Z, XM_PERMUTE_0X>(R1, R2);
XMVECTOR V2 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_0Y, XM_PERMUTE_1X>(R1, R2);
XMMATRIX M;
M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(V0, V1);
M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(V0, V1);
M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(V0, V2);
M.r[3] = g_XMIdentityR3.v;
return M;
#elif defined(_XM_SSE_INTRINSICS_)
float fSinAngle;
float fCosAngle;
XMScalarSinCos(&fSinAngle, &fCosAngle, Angle);
XMVECTOR C2 = _mm_set_ps1(1.0f - fCosAngle);
XMVECTOR C1 = _mm_set_ps1(fCosAngle);
XMVECTOR C0 = _mm_set_ps1(fSinAngle);
XMVECTOR N0 = XM_PERMUTE_PS(NormalAxis, _MM_SHUFFLE(3, 0, 2, 1));
XMVECTOR N1 = XM_PERMUTE_PS(NormalAxis, _MM_SHUFFLE(3, 1, 0, 2));
XMVECTOR V0 = _mm_mul_ps(C2, N0);
V0 = _mm_mul_ps(V0, N1);
XMVECTOR R0 = _mm_mul_ps(C2, NormalAxis);
R0 = _mm_mul_ps(R0, NormalAxis);
R0 = _mm_add_ps(R0, C1);
XMVECTOR R1 = _mm_mul_ps(C0, NormalAxis);
R1 = _mm_add_ps(R1, V0);
XMVECTOR R2 = _mm_mul_ps(C0, NormalAxis);
R2 = _mm_sub_ps(V0, R2);
V0 = _mm_and_ps(R0, g_XMMask3);
XMVECTOR V1 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(2, 1, 2, 0));
V1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(0, 3, 2, 1));
XMVECTOR V2 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(0, 0, 1, 1));
V2 = XM_PERMUTE_PS(V2, _MM_SHUFFLE(2, 0, 2, 0));
R2 = _mm_shuffle_ps(V0, V1, _MM_SHUFFLE(1, 0, 3, 0));
R2 = XM_PERMUTE_PS(R2, _MM_SHUFFLE(1, 3, 2, 0));
XMMATRIX M;
M.r[0] = R2;
R2 = _mm_shuffle_ps(V0, V1, _MM_SHUFFLE(3, 2, 3, 1));
R2 = XM_PERMUTE_PS(R2, _MM_SHUFFLE(1, 3, 0, 2));
M.r[1] = R2;
V2 = _mm_shuffle_ps(V2, V0, _MM_SHUFFLE(3, 2, 1, 0));
M.r[2] = V2;
M.r[3] = g_XMIdentityR3.v;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixRotationAxis
(
FXMVECTOR Axis,
float Angle
) noexcept
{
assert(!XMVector3Equal(Axis, XMVectorZero()));
assert(!XMVector3IsInfinite(Axis));
XMVECTOR Normal = XMVector3Normalize(Axis);
return XMMatrixRotationNormal(Normal, Angle);
}
inline XMMATRIX XM_CALLCONV XMMatrixRotationQuaternion(FXMVECTOR Quaternion) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
float qx = Quaternion.vector4_f32[0];
float qxx = qx * qx;
float qy = Quaternion.vector4_f32[1];
float qyy = qy * qy;
float qz = Quaternion.vector4_f32[2];
float qzz = qz * qz;
float qw = Quaternion.vector4_f32[3];
XMMATRIX M;
M.m[0][0] = 1.f - 2.f * qyy - 2.f * qzz;
M.m[0][1] = 2.f * qx * qy + 2.f * qz * qw;
M.m[0][2] = 2.f * qx * qz - 2.f * qy * qw;
M.m[0][3] = 0.f;
M.m[1][0] = 2.f * qx * qy - 2.f * qz * qw;
M.m[1][1] = 1.f - 2.f * qxx - 2.f * qzz;
M.m[1][2] = 2.f * qy * qz + 2.f * qx * qw;
M.m[1][3] = 0.f;
M.m[2][0] = 2.f * qx * qz + 2.f * qy * qw;
M.m[2][1] = 2.f * qy * qz - 2.f * qx * qw;
M.m[2][2] = 1.f - 2.f * qxx - 2.f * qyy;
M.m[2][3] = 0.f;
M.m[3][0] = 0.f;
M.m[3][1] = 0.f;
M.m[3][2] = 0.f;
M.m[3][3] = 1.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
static const XMVECTORF32 Constant1110 = { { { 1.0f, 1.0f, 1.0f, 0.0f } } };
XMVECTOR Q0 = XMVectorAdd(Quaternion, Quaternion);
XMVECTOR Q1 = XMVectorMultiply(Quaternion, Q0);
XMVECTOR V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_0X, XM_PERMUTE_0X, XM_PERMUTE_1W>(Q1, Constant1110.v);
XMVECTOR V1 = XMVectorPermute<XM_PERMUTE_0Z, XM_PERMUTE_0Z, XM_PERMUTE_0Y, XM_PERMUTE_1W>(Q1, Constant1110.v);
XMVECTOR R0 = XMVectorSubtract(Constant1110, V0);
R0 = XMVectorSubtract(R0, V1);
V0 = XMVectorSwizzle<XM_SWIZZLE_X, XM_SWIZZLE_X, XM_SWIZZLE_Y, XM_SWIZZLE_W>(Quaternion);
V1 = XMVectorSwizzle<XM_SWIZZLE_Z, XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_W>(Q0);
V0 = XMVectorMultiply(V0, V1);
V1 = XMVectorSplatW(Quaternion);
XMVECTOR V2 = XMVectorSwizzle<XM_SWIZZLE_Y, XM_SWIZZLE_Z, XM_SWIZZLE_X, XM_SWIZZLE_W>(Q0);
V1 = XMVectorMultiply(V1, V2);
XMVECTOR R1 = XMVectorAdd(V0, V1);
XMVECTOR R2 = XMVectorSubtract(V0, V1);
V0 = XMVectorPermute<XM_PERMUTE_0Y, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z>(R1, R2);
V1 = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1Z, XM_PERMUTE_0X, XM_PERMUTE_1Z>(R1, R2);
XMMATRIX M;
M.r[0] = XMVectorPermute<XM_PERMUTE_0X, XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0W>(R0, V0);
M.r[1] = XMVectorPermute<XM_PERMUTE_1Z, XM_PERMUTE_0Y, XM_PERMUTE_1W, XM_PERMUTE_0W>(R0, V0);
M.r[2] = XMVectorPermute<XM_PERMUTE_1X, XM_PERMUTE_1Y, XM_PERMUTE_0Z, XM_PERMUTE_0W>(R0, V1);
M.r[3] = g_XMIdentityR3.v;
return M;
#elif defined(_XM_SSE_INTRINSICS_)
static const XMVECTORF32 Constant1110 = { { { 1.0f, 1.0f, 1.0f, 0.0f } } };
XMVECTOR Q0 = _mm_add_ps(Quaternion, Quaternion);
XMVECTOR Q1 = _mm_mul_ps(Quaternion, Q0);
XMVECTOR V0 = XM_PERMUTE_PS(Q1, _MM_SHUFFLE(3, 0, 0, 1));
V0 = _mm_and_ps(V0, g_XMMask3);
XMVECTOR V1 = XM_PERMUTE_PS(Q1, _MM_SHUFFLE(3, 1, 2, 2));
V1 = _mm_and_ps(V1, g_XMMask3);
XMVECTOR R0 = _mm_sub_ps(Constant1110, V0);
R0 = _mm_sub_ps(R0, V1);
V0 = XM_PERMUTE_PS(Quaternion, _MM_SHUFFLE(3, 1, 0, 0));
V1 = XM_PERMUTE_PS(Q0, _MM_SHUFFLE(3, 2, 1, 2));
V0 = _mm_mul_ps(V0, V1);
V1 = XM_PERMUTE_PS(Quaternion, _MM_SHUFFLE(3, 3, 3, 3));
XMVECTOR V2 = XM_PERMUTE_PS(Q0, _MM_SHUFFLE(3, 0, 2, 1));
V1 = _mm_mul_ps(V1, V2);
XMVECTOR R1 = _mm_add_ps(V0, V1);
XMVECTOR R2 = _mm_sub_ps(V0, V1);
V0 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(1, 0, 2, 1));
V0 = XM_PERMUTE_PS(V0, _MM_SHUFFLE(1, 3, 2, 0));
V1 = _mm_shuffle_ps(R1, R2, _MM_SHUFFLE(2, 2, 0, 0));
V1 = XM_PERMUTE_PS(V1, _MM_SHUFFLE(2, 0, 2, 0));
Q1 = _mm_shuffle_ps(R0, V0, _MM_SHUFFLE(1, 0, 3, 0));
Q1 = XM_PERMUTE_PS(Q1, _MM_SHUFFLE(1, 3, 2, 0));
XMMATRIX M;
M.r[0] = Q1;
Q1 = _mm_shuffle_ps(R0, V0, _MM_SHUFFLE(3, 2, 3, 1));
Q1 = XM_PERMUTE_PS(Q1, _MM_SHUFFLE(1, 3, 0, 2));
M.r[1] = Q1;
Q1 = _mm_shuffle_ps(V1, R0, _MM_SHUFFLE(3, 2, 1, 0));
M.r[2] = Q1;
M.r[3] = g_XMIdentityR3;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixTransformation2D
(
FXMVECTOR ScalingOrigin,
float ScalingOrientation,
FXMVECTOR Scaling,
FXMVECTOR RotationOrigin,
float Rotation,
GXMVECTOR Translation
) noexcept
{
XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1100.v, ScalingOrigin, g_XMSelect1100.v);
XMVECTOR NegScalingOrigin = XMVectorNegate(VScalingOrigin);
XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
XMMATRIX MScalingOrientation = XMMatrixRotationZ(ScalingOrientation);
XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling);
XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
XMMATRIX MRotation = XMMatrixRotationZ(Rotation);
XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation, g_XMSelect1100.v);
XMMATRIX M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
M = XMMatrixMultiply(M, MScaling);
M = XMMatrixMultiply(M, MScalingOrientation);
M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
M = XMMatrixMultiply(M, MRotation);
M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
M.r[3] = XMVectorAdd(M.r[3], VTranslation);
return M;
}
inline XMMATRIX XM_CALLCONV XMMatrixTransformation
(
FXMVECTOR ScalingOrigin,
FXMVECTOR ScalingOrientationQuaternion,
FXMVECTOR Scaling,
GXMVECTOR RotationOrigin,
HXMVECTOR RotationQuaternion,
HXMVECTOR Translation
) noexcept
{
XMVECTOR VScalingOrigin = XMVectorSelect(g_XMSelect1110.v, ScalingOrigin, g_XMSelect1110.v);
XMVECTOR NegScalingOrigin = XMVectorNegate(ScalingOrigin);
XMMATRIX MScalingOriginI = XMMatrixTranslationFromVector(NegScalingOrigin);
XMMATRIX MScalingOrientation = XMMatrixRotationQuaternion(ScalingOrientationQuaternion);
XMMATRIX MScalingOrientationT = XMMatrixTranspose(MScalingOrientation);
XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling);
XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin, g_XMSelect1110.v);
XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation, g_XMSelect1110.v);
XMMATRIX M;
M = XMMatrixMultiply(MScalingOriginI, MScalingOrientationT);
M = XMMatrixMultiply(M, MScaling);
M = XMMatrixMultiply(M, MScalingOrientation);
M.r[3] = XMVectorAdd(M.r[3], VScalingOrigin);
M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
M = XMMatrixMultiply(M, MRotation);
M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
M.r[3] = XMVectorAdd(M.r[3], VTranslation);
return M;
}
inline XMMATRIX XM_CALLCONV XMMatrixAffineTransformation2D
(
FXMVECTOR Scaling,
FXMVECTOR RotationOrigin,
float Rotation,
FXMVECTOR Translation
) noexcept
{
XMVECTOR VScaling = XMVectorSelect(g_XMOne.v, Scaling, g_XMSelect1100.v);
XMMATRIX MScaling = XMMatrixScalingFromVector(VScaling);
XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1100.v, RotationOrigin, g_XMSelect1100.v);
XMMATRIX MRotation = XMMatrixRotationZ(Rotation);
XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1100.v, Translation, g_XMSelect1100.v);
XMMATRIX M;
M = MScaling;
M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
M = XMMatrixMultiply(M, MRotation);
M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
M.r[3] = XMVectorAdd(M.r[3], VTranslation);
return M;
}
inline XMMATRIX XM_CALLCONV XMMatrixAffineTransformation
(
FXMVECTOR Scaling,
FXMVECTOR RotationOrigin,
FXMVECTOR RotationQuaternion,
GXMVECTOR Translation
) noexcept
{
XMMATRIX MScaling = XMMatrixScalingFromVector(Scaling);
XMVECTOR VRotationOrigin = XMVectorSelect(g_XMSelect1110.v, RotationOrigin, g_XMSelect1110.v);
XMMATRIX MRotation = XMMatrixRotationQuaternion(RotationQuaternion);
XMVECTOR VTranslation = XMVectorSelect(g_XMSelect1110.v, Translation, g_XMSelect1110.v);
XMMATRIX M;
M = MScaling;
M.r[3] = XMVectorSubtract(M.r[3], VRotationOrigin);
M = XMMatrixMultiply(M, MRotation);
M.r[3] = XMVectorAdd(M.r[3], VRotationOrigin);
M.r[3] = XMVectorAdd(M.r[3], VTranslation);
return M;
}
inline XMMATRIX XM_CALLCONV XMMatrixReflect(FXMVECTOR ReflectionPlane) noexcept
{
assert(!XMVector3Equal(ReflectionPlane, XMVectorZero()));
assert(!XMPlaneIsInfinite(ReflectionPlane));
static const XMVECTORF32 NegativeTwo = { { { -2.0f, -2.0f, -2.0f, 0.0f } } };
XMVECTOR P = XMPlaneNormalize(ReflectionPlane);
XMVECTOR S = XMVectorMultiply(P, NegativeTwo);
XMVECTOR A = XMVectorSplatX(P);
XMVECTOR B = XMVectorSplatY(P);
XMVECTOR C = XMVectorSplatZ(P);
XMVECTOR D = XMVectorSplatW(P);
XMMATRIX M;
M.r[0] = XMVectorMultiplyAdd(A, S, g_XMIdentityR0.v);
M.r[1] = XMVectorMultiplyAdd(B, S, g_XMIdentityR1.v);
M.r[2] = XMVectorMultiplyAdd(C, S, g_XMIdentityR2.v);
M.r[3] = XMVectorMultiplyAdd(D, S, g_XMIdentityR3.v);
return M;
}
inline XMMATRIX XM_CALLCONV XMMatrixShadow
(
FXMVECTOR ShadowPlane,
FXMVECTOR LightPosition
) noexcept
{
static const XMVECTORU32 Select0001 = { { { XM_SELECT_0, XM_SELECT_0, XM_SELECT_0, XM_SELECT_1 } } };
assert(!XMVector3Equal(ShadowPlane, XMVectorZero()));
assert(!XMPlaneIsInfinite(ShadowPlane));
XMVECTOR P = XMPlaneNormalize(ShadowPlane);
XMVECTOR Dot = XMPlaneDot(P, LightPosition);
P = XMVectorNegate(P);
XMVECTOR D = XMVectorSplatW(P);
XMVECTOR C = XMVectorSplatZ(P);
XMVECTOR B = XMVectorSplatY(P);
XMVECTOR A = XMVectorSplatX(P);
Dot = XMVectorSelect(Select0001.v, Dot, Select0001.v);
XMMATRIX M;
M.r[3] = XMVectorMultiplyAdd(D, LightPosition, Dot);
Dot = XMVectorRotateLeft(Dot, 1);
M.r[2] = XMVectorMultiplyAdd(C, LightPosition, Dot);
Dot = XMVectorRotateLeft(Dot, 1);
M.r[1] = XMVectorMultiplyAdd(B, LightPosition, Dot);
Dot = XMVectorRotateLeft(Dot, 1);
M.r[0] = XMVectorMultiplyAdd(A, LightPosition, Dot);
return M;
}
inline XMMATRIX XM_CALLCONV XMMatrixLookAtLH
(
FXMVECTOR EyePosition,
FXMVECTOR FocusPosition,
FXMVECTOR UpDirection
) noexcept
{
XMVECTOR EyeDirection = XMVectorSubtract(FocusPosition, EyePosition);
return XMMatrixLookToLH(EyePosition, EyeDirection, UpDirection);
}
inline XMMATRIX XM_CALLCONV XMMatrixLookAtRH
(
FXMVECTOR EyePosition,
FXMVECTOR FocusPosition,
FXMVECTOR UpDirection
) noexcept
{
XMVECTOR NegEyeDirection = XMVectorSubtract(EyePosition, FocusPosition);
return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
}
inline XMMATRIX XM_CALLCONV XMMatrixLookToLH
(
FXMVECTOR EyePosition,
FXMVECTOR EyeDirection,
FXMVECTOR UpDirection
) noexcept
{
assert(!XMVector3Equal(EyeDirection, XMVectorZero()));
assert(!XMVector3IsInfinite(EyeDirection));
assert(!XMVector3Equal(UpDirection, XMVectorZero()));
assert(!XMVector3IsInfinite(UpDirection));
XMVECTOR R2 = XMVector3Normalize(EyeDirection);
XMVECTOR R0 = XMVector3Cross(UpDirection, R2);
R0 = XMVector3Normalize(R0);
XMVECTOR R1 = XMVector3Cross(R2, R0);
XMVECTOR NegEyePosition = XMVectorNegate(EyePosition);
XMVECTOR D0 = XMVector3Dot(R0, NegEyePosition);
XMVECTOR D1 = XMVector3Dot(R1, NegEyePosition);
XMVECTOR D2 = XMVector3Dot(R2, NegEyePosition);
XMMATRIX M;
M.r[0] = XMVectorSelect(D0, R0, g_XMSelect1110.v);
M.r[1] = XMVectorSelect(D1, R1, g_XMSelect1110.v);
M.r[2] = XMVectorSelect(D2, R2, g_XMSelect1110.v);
M.r[3] = g_XMIdentityR3.v;
M = XMMatrixTranspose(M);
return M;
}
inline XMMATRIX XM_CALLCONV XMMatrixLookToRH
(
FXMVECTOR EyePosition,
FXMVECTOR EyeDirection,
FXMVECTOR UpDirection
) noexcept
{
XMVECTOR NegEyeDirection = XMVectorNegate(EyeDirection);
return XMMatrixLookToLH(EyePosition, NegEyeDirection, UpDirection);
}
#ifdef _PREFAST_
#pragma prefast(push)
#pragma prefast(disable:28931, "PREfast noise: Esp:1266")
#endif
inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveLH
(
float ViewWidth,
float ViewHeight,
float NearZ,
float FarZ
) noexcept
{
assert(NearZ > 0.f && FarZ > 0.f);
assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
#if defined(_XM_NO_INTRINSICS_)
float TwoNearZ = NearZ + NearZ;
float fRange = FarZ / (FarZ - NearZ);
XMMATRIX M;
M.m[0][0] = TwoNearZ / ViewWidth;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = TwoNearZ / ViewHeight;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = 0.0f;
M.m[2][2] = fRange;
M.m[2][3] = 1.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = -fRange * NearZ;
M.m[3][3] = 0.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float TwoNearZ = NearZ + NearZ;
float fRange = FarZ / (FarZ - NearZ);
const float32x4_t Zero = vdupq_n_f32(0);
XMMATRIX M;
M.r[0] = vsetq_lane_f32(TwoNearZ / ViewWidth, Zero, 0);
M.r[1] = vsetq_lane_f32(TwoNearZ / ViewHeight, Zero, 1);
M.r[2] = vsetq_lane_f32(fRange, g_XMIdentityR3.v, 2);
M.r[3] = vsetq_lane_f32(-fRange * NearZ, Zero, 2);
return M;
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX M;
float TwoNearZ = NearZ + NearZ;
float fRange = FarZ / (FarZ - NearZ);
XMVECTOR rMem = {
TwoNearZ / ViewWidth,
TwoNearZ / ViewHeight,
fRange,
-fRange * NearZ
};
XMVECTOR vValues = rMem;
XMVECTOR vTemp = _mm_setzero_ps();
vTemp = _mm_move_ss(vTemp, vValues);
M.r[0] = vTemp;
vTemp = vValues;
vTemp = _mm_and_ps(vTemp, g_XMMaskY);
M.r[1] = vTemp;
vValues = _mm_shuffle_ps(vValues, g_XMIdentityR3, _MM_SHUFFLE(3, 2, 3, 2));
vTemp = _mm_setzero_ps();
vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0));
M.r[2] = vTemp;
vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0));
M.r[3] = vTemp;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveRH
(
float ViewWidth,
float ViewHeight,
float NearZ,
float FarZ
) noexcept
{
assert(NearZ > 0.f && FarZ > 0.f);
assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
#if defined(_XM_NO_INTRINSICS_)
float TwoNearZ = NearZ + NearZ;
float fRange = FarZ / (NearZ - FarZ);
XMMATRIX M;
M.m[0][0] = TwoNearZ / ViewWidth;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = TwoNearZ / ViewHeight;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = 0.0f;
M.m[2][2] = fRange;
M.m[2][3] = -1.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = fRange * NearZ;
M.m[3][3] = 0.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float TwoNearZ = NearZ + NearZ;
float fRange = FarZ / (NearZ - FarZ);
const float32x4_t Zero = vdupq_n_f32(0);
XMMATRIX M;
M.r[0] = vsetq_lane_f32(TwoNearZ / ViewWidth, Zero, 0);
M.r[1] = vsetq_lane_f32(TwoNearZ / ViewHeight, Zero, 1);
M.r[2] = vsetq_lane_f32(fRange, g_XMNegIdentityR3.v, 2);
M.r[3] = vsetq_lane_f32(fRange * NearZ, Zero, 2);
return M;
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX M;
float TwoNearZ = NearZ + NearZ;
float fRange = FarZ / (NearZ - FarZ);
XMVECTOR rMem = {
TwoNearZ / ViewWidth,
TwoNearZ / ViewHeight,
fRange,
fRange * NearZ
};
XMVECTOR vValues = rMem;
XMVECTOR vTemp = _mm_setzero_ps();
vTemp = _mm_move_ss(vTemp, vValues);
M.r[0] = vTemp;
vTemp = vValues;
vTemp = _mm_and_ps(vTemp, g_XMMaskY);
M.r[1] = vTemp;
vValues = _mm_shuffle_ps(vValues, g_XMNegIdentityR3, _MM_SHUFFLE(3, 2, 3, 2));
vTemp = _mm_setzero_ps();
vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0));
M.r[2] = vTemp;
vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0));
M.r[3] = vTemp;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovLH
(
float FovAngleY,
float AspectRatio,
float NearZ,
float FarZ
) noexcept
{
assert(NearZ > 0.f && FarZ > 0.f);
assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
#if defined(_XM_NO_INTRINSICS_)
float SinFov;
float CosFov;
XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
float Height = CosFov / SinFov;
float Width = Height / AspectRatio;
float fRange = FarZ / (FarZ - NearZ);
XMMATRIX M;
M.m[0][0] = Width;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = Height;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = 0.0f;
M.m[2][2] = fRange;
M.m[2][3] = 1.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = -fRange * NearZ;
M.m[3][3] = 0.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float SinFov;
float CosFov;
XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
float fRange = FarZ / (FarZ - NearZ);
float Height = CosFov / SinFov;
float Width = Height / AspectRatio;
const float32x4_t Zero = vdupq_n_f32(0);
XMMATRIX M;
M.r[0] = vsetq_lane_f32(Width, Zero, 0);
M.r[1] = vsetq_lane_f32(Height, Zero, 1);
M.r[2] = vsetq_lane_f32(fRange, g_XMIdentityR3.v, 2);
M.r[3] = vsetq_lane_f32(-fRange * NearZ, Zero, 2);
return M;
#elif defined(_XM_SSE_INTRINSICS_)
float SinFov;
float CosFov;
XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
float fRange = FarZ / (FarZ - NearZ);
float Height = CosFov / SinFov;
XMVECTOR rMem = {
Height / AspectRatio,
Height,
fRange,
-fRange * NearZ
};
XMVECTOR vValues = rMem;
XMVECTOR vTemp = _mm_setzero_ps();
vTemp = _mm_move_ss(vTemp, vValues);
XMMATRIX M;
M.r[0] = vTemp;
vTemp = vValues;
vTemp = _mm_and_ps(vTemp, g_XMMaskY);
M.r[1] = vTemp;
vTemp = _mm_setzero_ps();
vValues = _mm_shuffle_ps(vValues, g_XMIdentityR3, _MM_SHUFFLE(3, 2, 3, 2));
vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0));
M.r[2] = vTemp;
vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0));
M.r[3] = vTemp;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveFovRH
(
float FovAngleY,
float AspectRatio,
float NearZ,
float FarZ
) noexcept
{
assert(NearZ > 0.f && FarZ > 0.f);
assert(!XMScalarNearEqual(FovAngleY, 0.0f, 0.00001f * 2.0f));
assert(!XMScalarNearEqual(AspectRatio, 0.0f, 0.00001f));
assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
#if defined(_XM_NO_INTRINSICS_)
float SinFov;
float CosFov;
XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
float Height = CosFov / SinFov;
float Width = Height / AspectRatio;
float fRange = FarZ / (NearZ - FarZ);
XMMATRIX M;
M.m[0][0] = Width;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = Height;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = 0.0f;
M.m[2][2] = fRange;
M.m[2][3] = -1.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = fRange * NearZ;
M.m[3][3] = 0.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float SinFov;
float CosFov;
XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
float fRange = FarZ / (NearZ - FarZ);
float Height = CosFov / SinFov;
float Width = Height / AspectRatio;
const float32x4_t Zero = vdupq_n_f32(0);
XMMATRIX M;
M.r[0] = vsetq_lane_f32(Width, Zero, 0);
M.r[1] = vsetq_lane_f32(Height, Zero, 1);
M.r[2] = vsetq_lane_f32(fRange, g_XMNegIdentityR3.v, 2);
M.r[3] = vsetq_lane_f32(fRange * NearZ, Zero, 2);
return M;
#elif defined(_XM_SSE_INTRINSICS_)
float SinFov;
float CosFov;
XMScalarSinCos(&SinFov, &CosFov, 0.5f * FovAngleY);
float fRange = FarZ / (NearZ - FarZ);
float Height = CosFov / SinFov;
XMVECTOR rMem = {
Height / AspectRatio,
Height,
fRange,
fRange * NearZ
};
XMVECTOR vValues = rMem;
XMVECTOR vTemp = _mm_setzero_ps();
vTemp = _mm_move_ss(vTemp, vValues);
XMMATRIX M;
M.r[0] = vTemp;
vTemp = vValues;
vTemp = _mm_and_ps(vTemp, g_XMMaskY);
M.r[1] = vTemp;
vTemp = _mm_setzero_ps();
vValues = _mm_shuffle_ps(vValues, g_XMNegIdentityR3, _MM_SHUFFLE(3, 2, 3, 2));
vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 0, 0, 0));
M.r[2] = vTemp;
vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 1, 0, 0));
M.r[3] = vTemp;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterLH
(
float ViewLeft,
float ViewRight,
float ViewBottom,
float ViewTop,
float NearZ,
float FarZ
) noexcept
{
assert(NearZ > 0.f && FarZ > 0.f);
assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
#if defined(_XM_NO_INTRINSICS_)
float TwoNearZ = NearZ + NearZ;
float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
float fRange = FarZ / (FarZ - NearZ);
XMMATRIX M;
M.m[0][0] = TwoNearZ * ReciprocalWidth;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = TwoNearZ * ReciprocalHeight;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
M.m[2][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
M.m[2][2] = fRange;
M.m[2][3] = 1.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = -fRange * NearZ;
M.m[3][3] = 0.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float TwoNearZ = NearZ + NearZ;
float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
float fRange = FarZ / (FarZ - NearZ);
const float32x4_t Zero = vdupq_n_f32(0);
XMMATRIX M;
M.r[0] = vsetq_lane_f32(TwoNearZ * ReciprocalWidth, Zero, 0);
M.r[1] = vsetq_lane_f32(TwoNearZ * ReciprocalHeight, Zero, 1);
M.r[2] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
-(ViewTop + ViewBottom) * ReciprocalHeight,
fRange,
1.0f);
M.r[3] = vsetq_lane_f32(-fRange * NearZ, Zero, 2);
return M;
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX M;
float TwoNearZ = NearZ + NearZ;
float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
float fRange = FarZ / (FarZ - NearZ);
XMVECTOR rMem = {
TwoNearZ * ReciprocalWidth,
TwoNearZ * ReciprocalHeight,
-fRange * NearZ,
0
};
XMVECTOR vValues = rMem;
XMVECTOR vTemp = _mm_setzero_ps();
vTemp = _mm_move_ss(vTemp, vValues);
M.r[0] = vTemp;
vTemp = vValues;
vTemp = _mm_and_ps(vTemp, g_XMMaskY);
M.r[1] = vTemp;
M.r[2] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
-(ViewTop + ViewBottom) * ReciprocalHeight,
fRange,
1.0f);
vValues = _mm_and_ps(vValues, g_XMMaskZ);
M.r[3] = vValues;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixPerspectiveOffCenterRH
(
float ViewLeft,
float ViewRight,
float ViewBottom,
float ViewTop,
float NearZ,
float FarZ
) noexcept
{
assert(NearZ > 0.f && FarZ > 0.f);
assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
#if defined(_XM_NO_INTRINSICS_)
float TwoNearZ = NearZ + NearZ;
float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
float fRange = FarZ / (NearZ - FarZ);
XMMATRIX M;
M.m[0][0] = TwoNearZ * ReciprocalWidth;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = TwoNearZ * ReciprocalHeight;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = (ViewLeft + ViewRight) * ReciprocalWidth;
M.m[2][1] = (ViewTop + ViewBottom) * ReciprocalHeight;
M.m[2][2] = fRange;
M.m[2][3] = -1.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = fRange * NearZ;
M.m[3][3] = 0.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float TwoNearZ = NearZ + NearZ;
float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
float fRange = FarZ / (NearZ - FarZ);
const float32x4_t Zero = vdupq_n_f32(0);
XMMATRIX M;
M.r[0] = vsetq_lane_f32(TwoNearZ * ReciprocalWidth, Zero, 0);
M.r[1] = vsetq_lane_f32(TwoNearZ * ReciprocalHeight, Zero, 1);
M.r[2] = XMVectorSet((ViewLeft + ViewRight) * ReciprocalWidth,
(ViewTop + ViewBottom) * ReciprocalHeight,
fRange,
-1.0f);
M.r[3] = vsetq_lane_f32(fRange * NearZ, Zero, 2);
return M;
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX M;
float TwoNearZ = NearZ + NearZ;
float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
float fRange = FarZ / (NearZ - FarZ);
XMVECTOR rMem = {
TwoNearZ * ReciprocalWidth,
TwoNearZ * ReciprocalHeight,
fRange * NearZ,
0
};
XMVECTOR vValues = rMem;
XMVECTOR vTemp = _mm_setzero_ps();
vTemp = _mm_move_ss(vTemp, vValues);
M.r[0] = vTemp;
vTemp = vValues;
vTemp = _mm_and_ps(vTemp, g_XMMaskY);
M.r[1] = vTemp;
M.r[2] = XMVectorSet((ViewLeft + ViewRight) * ReciprocalWidth,
(ViewTop + ViewBottom) * ReciprocalHeight,
fRange,
-1.0f);
vValues = _mm_and_ps(vValues, g_XMMaskZ);
M.r[3] = vValues;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixOrthographicLH
(
float ViewWidth,
float ViewHeight,
float NearZ,
float FarZ
) noexcept
{
assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
#if defined(_XM_NO_INTRINSICS_)
float fRange = 1.0f / (FarZ - NearZ);
XMMATRIX M;
M.m[0][0] = 2.0f / ViewWidth;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = 2.0f / ViewHeight;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = 0.0f;
M.m[2][2] = fRange;
M.m[2][3] = 0.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = -fRange * NearZ;
M.m[3][3] = 1.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float fRange = 1.0f / (FarZ - NearZ);
const float32x4_t Zero = vdupq_n_f32(0);
XMMATRIX M;
M.r[0] = vsetq_lane_f32(2.0f / ViewWidth, Zero, 0);
M.r[1] = vsetq_lane_f32(2.0f / ViewHeight, Zero, 1);
M.r[2] = vsetq_lane_f32(fRange, Zero, 2);
M.r[3] = vsetq_lane_f32(-fRange * NearZ, g_XMIdentityR3.v, 2);
return M;
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX M;
float fRange = 1.0f / (FarZ - NearZ);
XMVECTOR rMem = {
2.0f / ViewWidth,
2.0f / ViewHeight,
fRange,
-fRange * NearZ
};
XMVECTOR vValues = rMem;
XMVECTOR vTemp = _mm_setzero_ps();
vTemp = _mm_move_ss(vTemp, vValues);
M.r[0] = vTemp;
vTemp = vValues;
vTemp = _mm_and_ps(vTemp, g_XMMaskY);
M.r[1] = vTemp;
vTemp = _mm_setzero_ps();
vValues = _mm_shuffle_ps(vValues, g_XMIdentityR3, _MM_SHUFFLE(3, 2, 3, 2));
vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 0, 0, 0));
M.r[2] = vTemp;
vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 1, 0, 0));
M.r[3] = vTemp;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixOrthographicRH
(
float ViewWidth,
float ViewHeight,
float NearZ,
float FarZ
) noexcept
{
assert(!XMScalarNearEqual(ViewWidth, 0.0f, 0.00001f));
assert(!XMScalarNearEqual(ViewHeight, 0.0f, 0.00001f));
assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
#if defined(_XM_NO_INTRINSICS_)
float fRange = 1.0f / (NearZ - FarZ);
XMMATRIX M;
M.m[0][0] = 2.0f / ViewWidth;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = 2.0f / ViewHeight;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = 0.0f;
M.m[2][2] = fRange;
M.m[2][3] = 0.0f;
M.m[3][0] = 0.0f;
M.m[3][1] = 0.0f;
M.m[3][2] = fRange * NearZ;
M.m[3][3] = 1.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float fRange = 1.0f / (NearZ - FarZ);
const float32x4_t Zero = vdupq_n_f32(0);
XMMATRIX M;
M.r[0] = vsetq_lane_f32(2.0f / ViewWidth, Zero, 0);
M.r[1] = vsetq_lane_f32(2.0f / ViewHeight, Zero, 1);
M.r[2] = vsetq_lane_f32(fRange, Zero, 2);
M.r[3] = vsetq_lane_f32(fRange * NearZ, g_XMIdentityR3.v, 2);
return M;
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX M;
float fRange = 1.0f / (NearZ - FarZ);
XMVECTOR rMem = {
2.0f / ViewWidth,
2.0f / ViewHeight,
fRange,
fRange * NearZ
};
XMVECTOR vValues = rMem;
XMVECTOR vTemp = _mm_setzero_ps();
vTemp = _mm_move_ss(vTemp, vValues);
M.r[0] = vTemp;
vTemp = vValues;
vTemp = _mm_and_ps(vTemp, g_XMMaskY);
M.r[1] = vTemp;
vTemp = _mm_setzero_ps();
vValues = _mm_shuffle_ps(vValues, g_XMIdentityR3, _MM_SHUFFLE(3, 2, 3, 2));
vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(2, 0, 0, 0));
M.r[2] = vTemp;
vTemp = _mm_shuffle_ps(vTemp, vValues, _MM_SHUFFLE(3, 1, 0, 0));
M.r[3] = vTemp;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterLH
(
float ViewLeft,
float ViewRight,
float ViewBottom,
float ViewTop,
float NearZ,
float FarZ
) noexcept
{
assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
#if defined(_XM_NO_INTRINSICS_)
float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
float fRange = 1.0f / (FarZ - NearZ);
XMMATRIX M;
M.m[0][0] = ReciprocalWidth + ReciprocalWidth;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = ReciprocalHeight + ReciprocalHeight;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = 0.0f;
M.m[2][2] = fRange;
M.m[2][3] = 0.0f;
M.m[3][0] = -(ViewLeft + ViewRight) * ReciprocalWidth;
M.m[3][1] = -(ViewTop + ViewBottom) * ReciprocalHeight;
M.m[3][2] = -fRange * NearZ;
M.m[3][3] = 1.0f;
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
float fRange = 1.0f / (FarZ - NearZ);
const float32x4_t Zero = vdupq_n_f32(0);
XMMATRIX M;
M.r[0] = vsetq_lane_f32(ReciprocalWidth + ReciprocalWidth, Zero, 0);
M.r[1] = vsetq_lane_f32(ReciprocalHeight + ReciprocalHeight, Zero, 1);
M.r[2] = vsetq_lane_f32(fRange, Zero, 2);
M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
-(ViewTop + ViewBottom) * ReciprocalHeight,
-fRange * NearZ,
1.0f);
return M;
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX M;
float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
float fRange = 1.0f / (FarZ - NearZ);
XMVECTOR rMem = {
fReciprocalWidth,
fReciprocalHeight,
fRange,
1.0f
};
XMVECTOR rMem2 = {
-(ViewLeft + ViewRight),
-(ViewTop + ViewBottom),
-NearZ,
1.0f
};
XMVECTOR vValues = rMem;
XMVECTOR vTemp = _mm_setzero_ps();
vTemp = _mm_move_ss(vTemp, vValues);
vTemp = _mm_add_ss(vTemp, vTemp);
M.r[0] = vTemp;
vTemp = vValues;
vTemp = _mm_and_ps(vTemp, g_XMMaskY);
vTemp = _mm_add_ps(vTemp, vTemp);
M.r[1] = vTemp;
vTemp = vValues;
vTemp = _mm_and_ps(vTemp, g_XMMaskZ);
M.r[2] = vTemp;
vValues = _mm_mul_ps(vValues, rMem2);
M.r[3] = vValues;
return M;
#endif
}
inline XMMATRIX XM_CALLCONV XMMatrixOrthographicOffCenterRH
(
float ViewLeft,
float ViewRight,
float ViewBottom,
float ViewTop,
float NearZ,
float FarZ
) noexcept
{
assert(!XMScalarNearEqual(ViewRight, ViewLeft, 0.00001f));
assert(!XMScalarNearEqual(ViewTop, ViewBottom, 0.00001f));
assert(!XMScalarNearEqual(FarZ, NearZ, 0.00001f));
#if defined(_XM_NO_INTRINSICS_)
float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
float fRange = 1.0f / (NearZ - FarZ);
XMMATRIX M;
M.m[0][0] = ReciprocalWidth + ReciprocalWidth;
M.m[0][1] = 0.0f;
M.m[0][2] = 0.0f;
M.m[0][3] = 0.0f;
M.m[1][0] = 0.0f;
M.m[1][1] = ReciprocalHeight + ReciprocalHeight;
M.m[1][2] = 0.0f;
M.m[1][3] = 0.0f;
M.m[2][0] = 0.0f;
M.m[2][1] = 0.0f;
M.m[2][2] = fRange;
M.m[2][3] = 0.0f;
M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
-(ViewTop + ViewBottom) * ReciprocalHeight,
fRange * NearZ,
1.0f);
return M;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
float ReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
float ReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
float fRange = 1.0f / (NearZ - FarZ);
const float32x4_t Zero = vdupq_n_f32(0);
XMMATRIX M;
M.r[0] = vsetq_lane_f32(ReciprocalWidth + ReciprocalWidth, Zero, 0);
M.r[1] = vsetq_lane_f32(ReciprocalHeight + ReciprocalHeight, Zero, 1);
M.r[2] = vsetq_lane_f32(fRange, Zero, 2);
M.r[3] = XMVectorSet(-(ViewLeft + ViewRight) * ReciprocalWidth,
-(ViewTop + ViewBottom) * ReciprocalHeight,
fRange * NearZ,
1.0f);
return M;
#elif defined(_XM_SSE_INTRINSICS_)
XMMATRIX M;
float fReciprocalWidth = 1.0f / (ViewRight - ViewLeft);
float fReciprocalHeight = 1.0f / (ViewTop - ViewBottom);
float fRange = 1.0f / (NearZ - FarZ);
XMVECTOR rMem = {
fReciprocalWidth,
fReciprocalHeight,
fRange,
1.0f
};
XMVECTOR rMem2 = {
-(ViewLeft + ViewRight),
-(ViewTop + ViewBottom),
NearZ,
1.0f
};
XMVECTOR vValues = rMem;
XMVECTOR vTemp = _mm_setzero_ps();
vTemp = _mm_move_ss(vTemp, vValues);
vTemp = _mm_add_ss(vTemp, vTemp);
M.r[0] = vTemp;
vTemp = vValues;
vTemp = _mm_and_ps(vTemp, g_XMMaskY);
vTemp = _mm_add_ps(vTemp, vTemp);
M.r[1] = vTemp;
vTemp = vValues;
vTemp = _mm_and_ps(vTemp, g_XMMaskZ);
M.r[2] = vTemp;
vValues = _mm_mul_ps(vValues, rMem2);
M.r[3] = vValues;
return M;
#endif
}
#ifdef _PREFAST_
#pragma prefast(pop)
#endif
inline XMMATRIX::XMMATRIX
(
float m00, float m01, float m02, float m03,
float m10, float m11, float m12, float m13,
float m20, float m21, float m22, float m23,
float m30, float m31, float m32, float m33
) noexcept
{
r[0] = XMVectorSet(m00, m01, m02, m03);
r[1] = XMVectorSet(m10, m11, m12, m13);
r[2] = XMVectorSet(m20, m21, m22, m23);
r[3] = XMVectorSet(m30, m31, m32, m33);
}
_Use_decl_annotations_
inline XMMATRIX::XMMATRIX(const float* pArray) noexcept
{
assert(pArray != nullptr);
r[0] = XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray));
r[1] = XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray + 4));
r[2] = XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray + 8));
r[3] = XMLoadFloat4(reinterpret_cast<const XMFLOAT4*>(pArray + 12));
}
inline XMMATRIX XMMATRIX::operator- () const noexcept
{
XMMATRIX R;
R.r[0] = XMVectorNegate(r[0]);
R.r[1] = XMVectorNegate(r[1]);
R.r[2] = XMVectorNegate(r[2]);
R.r[3] = XMVectorNegate(r[3]);
return R;
}
inline XMMATRIX& XM_CALLCONV XMMATRIX::operator+= (FXMMATRIX M) noexcept
{
r[0] = XMVectorAdd(r[0], M.r[0]);
r[1] = XMVectorAdd(r[1], M.r[1]);
r[2] = XMVectorAdd(r[2], M.r[2]);
r[3] = XMVectorAdd(r[3], M.r[3]);
return *this;
}
inline XMMATRIX& XM_CALLCONV XMMATRIX::operator-= (FXMMATRIX M) noexcept
{
r[0] = XMVectorSubtract(r[0], M.r[0]);
r[1] = XMVectorSubtract(r[1], M.r[1]);
r[2] = XMVectorSubtract(r[2], M.r[2]);
r[3] = XMVectorSubtract(r[3], M.r[3]);
return *this;
}
inline XMMATRIX& XM_CALLCONV XMMATRIX::operator*=(FXMMATRIX M) noexcept
{
*this = XMMatrixMultiply(*this, M);
return *this;
}
inline XMMATRIX& XMMATRIX::operator*= (float S) noexcept
{
r[0] = XMVectorScale(r[0], S);
r[1] = XMVectorScale(r[1], S);
r[2] = XMVectorScale(r[2], S);
r[3] = XMVectorScale(r[3], S);
return *this;
}
inline XMMATRIX& XMMATRIX::operator/= (float S) noexcept
{
#if defined(_XM_NO_INTRINSICS_)
XMVECTOR vS = XMVectorReplicate(S);
r[0] = XMVectorDivide(r[0], vS);
r[1] = XMVectorDivide(r[1], vS);
r[2] = XMVectorDivide(r[2], vS);
r[3] = XMVectorDivide(r[3], vS);
return *this;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
float32x4_t vS = vdupq_n_f32(S);
r[0] = vdivq_f32(r[0], vS);
r[1] = vdivq_f32(r[1], vS);
r[2] = vdivq_f32(r[2], vS);
r[3] = vdivq_f32(r[3], vS);
#else
float32x2_t vS = vdup_n_f32(S);
float32x2_t R0 = vrecpe_f32(vS);
float32x2_t S0 = vrecps_f32(R0, vS);
R0 = vmul_f32(S0, R0);
S0 = vrecps_f32(R0, vS);
R0 = vmul_f32(S0, R0);
float32x4_t Reciprocal = vcombine_f32(R0, R0);
r[0] = vmulq_f32(r[0], Reciprocal);
r[1] = vmulq_f32(r[1], Reciprocal);
r[2] = vmulq_f32(r[2], Reciprocal);
r[3] = vmulq_f32(r[3], Reciprocal);
#endif
return *this;
#elif defined(_XM_SSE_INTRINSICS_)
__m128 vS = _mm_set_ps1(S);
r[0] = _mm_div_ps(r[0], vS);
r[1] = _mm_div_ps(r[1], vS);
r[2] = _mm_div_ps(r[2], vS);
r[3] = _mm_div_ps(r[3], vS);
return *this;
#endif
}
inline XMMATRIX XM_CALLCONV XMMATRIX::operator+ (FXMMATRIX M) const noexcept
{
XMMATRIX R;
R.r[0] = XMVectorAdd(r[0], M.r[0]);
R.r[1] = XMVectorAdd(r[1], M.r[1]);
R.r[2] = XMVectorAdd(r[2], M.r[2]);
R.r[3] = XMVectorAdd(r[3], M.r[3]);
return R;
}
inline XMMATRIX XM_CALLCONV XMMATRIX::operator- (FXMMATRIX M) const noexcept
{
XMMATRIX R;
R.r[0] = XMVectorSubtract(r[0], M.r[0]);
R.r[1] = XMVectorSubtract(r[1], M.r[1]);
R.r[2] = XMVectorSubtract(r[2], M.r[2]);
R.r[3] = XMVectorSubtract(r[3], M.r[3]);
return R;
}
inline XMMATRIX XM_CALLCONV XMMATRIX::operator*(FXMMATRIX M) const noexcept
{
return XMMatrixMultiply(*this, M);
}
inline XMMATRIX XMMATRIX::operator* (float S) const noexcept
{
XMMATRIX R;
R.r[0] = XMVectorScale(r[0], S);
R.r[1] = XMVectorScale(r[1], S);
R.r[2] = XMVectorScale(r[2], S);
R.r[3] = XMVectorScale(r[3], S);
return R;
}
inline XMMATRIX XMMATRIX::operator/ (float S) const noexcept
{
#if defined(_XM_NO_INTRINSICS_)
XMVECTOR vS = XMVectorReplicate(S);
XMMATRIX R;
R.r[0] = XMVectorDivide(r[0], vS);
R.r[1] = XMVectorDivide(r[1], vS);
R.r[2] = XMVectorDivide(r[2], vS);
R.r[3] = XMVectorDivide(r[3], vS);
return R;
#elif defined(_XM_ARM_NEON_INTRINSICS_)
#if defined(_M_ARM64) || defined(_M_HYBRID_X86_ARM64) || defined(_M_ARM64EC) || __aarch64__
float32x4_t vS = vdupq_n_f32(S);
XMMATRIX R;
R.r[0] = vdivq_f32(r[0], vS);
R.r[1] = vdivq_f32(r[1], vS);
R.r[2] = vdivq_f32(r[2], vS);
R.r[3] = vdivq_f32(r[3], vS);
#else
float32x2_t vS = vdup_n_f32(S);
float32x2_t R0 = vrecpe_f32(vS);
float32x2_t S0 = vrecps_f32(R0, vS);
R0 = vmul_f32(S0, R0);
S0 = vrecps_f32(R0, vS);
R0 = vmul_f32(S0, R0);
float32x4_t Reciprocal = vcombine_f32(R0, R0);
XMMATRIX R;
R.r[0] = vmulq_f32(r[0], Reciprocal);
R.r[1] = vmulq_f32(r[1], Reciprocal);
R.r[2] = vmulq_f32(r[2], Reciprocal);
R.r[3] = vmulq_f32(r[3], Reciprocal);
#endif
return R;
#elif defined(_XM_SSE_INTRINSICS_)
__m128 vS = _mm_set_ps1(S);
XMMATRIX R;
R.r[0] = _mm_div_ps(r[0], vS);
R.r[1] = _mm_div_ps(r[1], vS);
R.r[2] = _mm_div_ps(r[2], vS);
R.r[3] = _mm_div_ps(r[3], vS);
return R;
#endif
}
inline XMMATRIX XM_CALLCONV operator*
(
float S,
FXMMATRIX M
) noexcept
{
XMMATRIX R;
R.r[0] = XMVectorScale(M.r[0], S);
R.r[1] = XMVectorScale(M.r[1], S);
R.r[2] = XMVectorScale(M.r[2], S);
R.r[3] = XMVectorScale(M.r[3], S);
return R;
}
_Use_decl_annotations_
inline XMFLOAT3X3::XMFLOAT3X3(const float* pArray) noexcept
{
assert(pArray != nullptr);
for (size_t Row = 0; Row < 3; Row++)
{
for (size_t Column = 0; Column < 3; Column++)
{
m[Row][Column] = pArray[Row * 3 + Column];
}
}
}
_Use_decl_annotations_
inline XMFLOAT4X3::XMFLOAT4X3(const float* pArray) noexcept
{
assert(pArray != nullptr);
m[0][0] = pArray[0];
m[0][1] = pArray[1];
m[0][2] = pArray[2];
m[1][0] = pArray[3];
m[1][1] = pArray[4];
m[1][2] = pArray[5];
m[2][0] = pArray[6];
m[2][1] = pArray[7];
m[2][2] = pArray[8];
m[3][0] = pArray[9];
m[3][1] = pArray[10];
m[3][2] = pArray[11];
}
_Use_decl_annotations_
inline XMFLOAT3X4::XMFLOAT3X4(const float* pArray) noexcept
{
assert(pArray != nullptr);
m[0][0] = pArray[0];
m[0][1] = pArray[1];
m[0][2] = pArray[2];
m[0][3] = pArray[3];
m[1][0] = pArray[4];
m[1][1] = pArray[5];
m[1][2] = pArray[6];
m[1][3] = pArray[7];
m[2][0] = pArray[8];
m[2][1] = pArray[9];
m[2][2] = pArray[10];
m[2][3] = pArray[11];
}
_Use_decl_annotations_
inline XMFLOAT4X4::XMFLOAT4X4(const float* pArray) noexcept
{
assert(pArray != nullptr);
m[0][0] = pArray[0];
m[0][1] = pArray[1];
m[0][2] = pArray[2];
m[0][3] = pArray[3];
m[1][0] = pArray[4];
m[1][1] = pArray[5];
m[1][2] = pArray[6];
m[1][3] = pArray[7];
m[2][0] = pArray[8];
m[2][1] = pArray[9];
m[2][2] = pArray[10];
m[2][3] = pArray[11];
m[3][0] = pArray[12];
m[3][1] = pArray[13];
m[3][2] = pArray[14];
m[3][3] = pArray[15];
}