directx_math/
convert.rs

1
2#[allow(unused_imports)]
3use std::mem;
4
5use crate::*;
6
7/// Converts an XMVECTOR with int32_t components to an XMVECTOR with float components and applies a uniform bias.
8///
9/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMConvertVectorIntToFloat>
10#[inline]
11pub fn XMConvertVectorIntToFloat(VInt: FXMVECTOR, DivExponent: u32) -> XMVECTOR {
12    debug_assert!(DivExponent < 32);
13
14    #[cfg(_XM_NO_INTRINSICS_)]
15    unsafe {
16        let fScale = 1.0 / (1 >> DivExponent) as f32;
17        let mut Result: XMVECTOR = crate::undefined();
18        for ElementIndex in 0..4 {
19            let iTemp = VInt.vector4_u32[ElementIndex];
20            Result.vector4_f32[ElementIndex] = iTemp as f32 * fScale;
21        }
22        return Result;
23    }
24
25    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
26    {
27        unimplemented!()
28    }
29
30    #[cfg(_XM_SSE_INTRINSICS_)]
31    unsafe {
32        // Convert to floats
33        let mut vResult = _mm_cvtepi32_ps(_mm_castps_si128(VInt));
34        // Convert DivExponent into 1.0f/(1<<DivExponent)
35        let uScale = 0x3F800000u32 - (DivExponent << 23);
36        // Splat the scalar value
37        let vScale = _mm_set1_epi32(uScale as i32);
38        vResult = _mm_mul_ps(vResult, _mm_castsi128_ps(vScale));
39        return vResult;
40    }
41}
42
43/// Converts an XMVECTOR with float components to an XMVECTOR with int32_t components and applies a uniform bias.
44///
45/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMConvertVectorFloatToInt>
46#[inline]
47pub fn XMConvertVectorFloatToInt(VFloat: FXMVECTOR, MulExponent: u32) -> XMVECTOR {
48    debug_assert!(MulExponent < 32);
49
50    #[cfg(_XM_NO_INTRINSICS_)]
51    unsafe {
52        let fScale = (1u32 << MulExponent) as f32;
53        let mut Result: XMVECTOR = crate::undefined();
54        for ElementIndex in 0..4 {
55            let iResult: i32;
56            let fTemp: f32 = VFloat.vector4_f32[ElementIndex] * fScale;
57            if (fTemp <= -(65536.0 * 32768.0))
58            {
59                iResult = (-0x7FFFFFFF) - 1;
60            }
61            else if (fTemp > (65536.0 * 32768.0) - 128.0)
62            {
63                iResult = 0x7FFFFFFF;
64            }
65            else {
66                iResult = (fTemp as i32);
67            }
68            Result.vector4_u32[ElementIndex] = (iResult as u32);
69        }
70        return Result;
71    }
72
73    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
74    {
75        unimplemented!()
76    }
77
78    #[cfg(_XM_SSE_INTRINSICS_)]
79    unsafe {
80        let mut vResult: XMVECTOR = _mm_set_ps1((1u32 << MulExponent) as f32);
81        vResult = _mm_mul_ps(vResult, VFloat);
82        // In case of positive overflow, detect it
83        let mut vOverflow: XMVECTOR = _mm_cmpgt_ps(vResult, *g_XMMaxInt);
84        // Float to int conversion
85        let vResulti: __m128i = _mm_cvttps_epi32(vResult);
86        // If there was positive overflow, set to 0x7FFFFFFF
87        vResult = _mm_and_ps(vOverflow, *g_XMAbsMask);
88        vOverflow = _mm_andnot_ps(vOverflow, _mm_castsi128_ps(vResulti));
89        vOverflow = _mm_or_ps(vOverflow, vResult);
90        return vOverflow;
91    }
92}
93
94
95/// Converts an XMVECTOR with uint32_t components to an XMVECTOR with float components and applies a uniform bias.
96///
97/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMConvertVectorUIntToFloat>
98#[inline]
99pub fn XMConvertVectorUIntToFloat(
100    VUInt: FXMVECTOR,
101    DivExponent: u32
102) -> XMVECTOR
103{
104    debug_assert!(DivExponent  < 32);
105
106    #[cfg(_XM_NO_INTRINSICS_)]
107    unsafe {
108        let fScale = 1.0 / (1u32 << DivExponent) as f32;
109        let mut Result: XMVECTOR = crate::undefined();
110        for ElementIndex in 0..4 {
111            Result.vector4_f32[ElementIndex] = (VUInt.vector4_u32[ElementIndex] as f32) * fScale;
112        }
113        return Result;
114    }
115
116    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
117    {
118        unimplemented!()
119    }
120
121    #[cfg(_XM_SSE_INTRINSICS_)]
122    unsafe {
123        // For the values that are higher than 0x7FFFFFFF, a fixup is needed
124        // Determine which ones need the fix.
125        let mut vMask: XMVECTOR = _mm_and_ps(VUInt, *g_XMNegativeZero);
126        // Force all values positive
127        let mut vResult: XMVECTOR = _mm_xor_ps(VUInt, vMask);
128        // Convert to floats
129        vResult = _mm_cvtepi32_ps(_mm_castps_si128(vResult));
130        // Convert 0x80000000 -> 0xFFFFFFFF
131        let mut iMask: __m128i = _mm_srai_epi32(_mm_castps_si128(vMask), 31);
132        // For only the ones that are too big, add the fixup
133        vMask = _mm_and_ps(_mm_castsi128_ps(iMask), *g_XMFixUnsigned);
134        vResult = _mm_add_ps(vResult, vMask);
135        // Convert DivExponent into 1.0f/(1<<DivExponent)
136        let uScale: u32 = 0x3F800000u32 - (DivExponent << 23);
137        // Splat
138        iMask = _mm_set1_epi32(uScale as i32);
139        vResult = _mm_mul_ps(vResult, _mm_castsi128_ps(iMask));
140        return vResult;
141    }
142}
143
144
145/// Converts an XMVECTOR with uint32_t components to an XMVECTOR with float components and applies a uniform bias.
146///
147/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMConvertVectorFloatToUInt>
148#[inline]
149pub fn XMConvertVectorFloatToUInt(
150    VFloat: FXMVECTOR,
151    MulExponent: u32
152) -> XMVECTOR
153{
154    debug_assert!(MulExponent  < 32);
155
156    #[cfg(_XM_NO_INTRINSICS_)]
157    unsafe {
158        let fScale = (1u32 << MulExponent) as f32;
159        let mut Result: XMVECTOR = crate::undefined();
160        for ElementIndex in 0..4 {
161            let uResult: u32;
162            let fTemp: f32 = VFloat.vector4_f32[ElementIndex] * fScale;
163            if (fTemp <= 0.0)
164            {
165                uResult = 0;
166            }
167            else if (fTemp >= (65536.0 * 65536.0))
168            {
169                uResult = 0xFFFFFFFFu32;
170            }
171            else {
172                uResult = (fTemp as u32);
173            }
174            Result.vector4_u32[ElementIndex] = uResult;
175        }
176        return Result;
177    }
178
179    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
180    {
181        unimplemented!()
182    }
183
184    #[cfg(_XM_SSE_INTRINSICS_)]
185    unsafe {
186        let mut vResult: XMVECTOR = _mm_set_ps1((1u32 << MulExponent) as f32);
187        vResult = _mm_mul_ps(vResult, VFloat);
188        // Clamp to >=0
189        vResult = _mm_max_ps(vResult, *g_XMZero);
190        // Any numbers that are too big, set to 0xFFFFFFFFU
191        let vOverflow: XMVECTOR = _mm_cmpgt_ps(vResult, *g_XMMaxUInt);
192        let mut vValue: XMVECTOR = *g_XMUnsignedFix;
193        // Too large for a signed integer?
194        let mut vMask: XMVECTOR = _mm_cmpge_ps(vResult, vValue);
195        // Zero for number's lower than 0x80000000, 32768.0f*65536.0f otherwise
196        vValue = _mm_and_ps(vValue, vMask);
197        // Perform fixup only on numbers too large (Keeps low bit precision)
198        vResult = _mm_sub_ps(vResult, vValue);
199        let vResulti: __m128i = _mm_cvttps_epi32(vResult);
200        // Convert from signed to unsigned pnly if greater than 0x80000000
201        vMask = _mm_and_ps(vMask, *g_XMNegativeZero);
202        vResult = _mm_xor_ps(_mm_castsi128_ps(vResulti), vMask);
203        // On those that are too large, set to 0xFFFFFFFF
204        vResult = _mm_or_ps(vResult, vOverflow);
205        return vResult;
206    }
207}
208
209// TODO: XMLoadInt
210// TODO: XMLoadFloat
211
212/// Loads data into the x and y components of an XMVECTOR.
213///
214/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadInt2>
215#[inline]
216pub fn XMLoadInt2(
217    pSource: &[u32; 2],
218) -> FXMVECTOR
219{
220    #[cfg(_XM_NO_INTRINSICS_)]
221    unsafe {
222        let mut V: XMVECTOR = crate::undefined();
223        V.vector4_u32[0] = pSource[0];
224        V.vector4_u32[1] = pSource[1];
225        V.vector4_u32[2] = 0;
226        V.vector4_u32[3] = 0;
227        return V;
228    }
229
230    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
231    {
232        unimplemented!()
233    }
234
235    #[cfg(_XM_SSE_INTRINSICS_)]
236    unsafe {
237        return _mm_castpd_ps(_mm_load_sd(mem::transmute::<_, *const f64>(pSource)));
238    }
239}
240
241#[test]
242fn test_XMLoadInt2() {
243    let a = XMLoadInt2(&[-1i32 as u32, 1 as u32]);
244    let b = XMVectorSetInt(-1i32 as u32, 1 as u32, 0, 0);
245    assert_eq!(-1, XMVectorGetIntX(a) as i32);
246    assert_eq!( 1, XMVectorGetIntY(a) as i32);
247    assert!(XMVector2EqualInt(a, b));
248
249    let c = XMLoadInt2(&[1, 2]);
250    let d = XMVectorSetInt(1, 2 as u32, 0, 0);
251    assert_eq!(1, XMVectorGetIntX(c));
252    assert_eq!(2, XMVectorGetIntY(d));
253    assert!(XMVector2EqualInt(c, d));
254}
255
256// TODO: XMLoadInt2A
257
258/// Loads data into the x, y, and z components of an XMVECTOR, without type checking.
259///
260/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadInt3>
261#[inline]
262pub fn XMLoadFloat2(
263    pSource: &XMFLOAT2,
264) -> FXMVECTOR
265{
266    #[cfg(_XM_NO_INTRINSICS_)]
267    unsafe {
268        let mut V: XMVECTOR = crate::undefined();
269        V.vector4_f32[0] = pSource.x;
270        V.vector4_f32[1] = pSource.y;
271        V.vector4_f32[2] = 0.0;
272        V.vector4_f32[3] = 0.0;
273        return V;
274    }
275
276    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
277    {
278        unimplemented!()
279    }
280
281    #[cfg(_XM_SSE_INTRINSICS_)]
282    unsafe {
283        return _mm_castpd_ps(_mm_load_sd(mem::transmute::<_, *const f64>(pSource)));
284    }
285}
286
287// TODO: XMLoadFloat2A
288// TODO: XMLoadSInt2
289// TODO: XMLoadUInt2
290
291/// Loads data into the x, y, and z components of an XMVECTOR, without type checking.
292///
293/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadInt3>
294#[inline]
295pub fn XMLoadInt3(
296    pSource: &[u32; 3],
297) -> FXMVECTOR
298{
299    #[cfg(_XM_NO_INTRINSICS_)]
300    unsafe {
301        let mut V: XMVECTOR = crate::undefined();
302        V.vector4_u32[0] = pSource[0];
303        V.vector4_u32[1] = pSource[1];
304        V.vector4_u32[2] = pSource[2];
305        V.vector4_u32[3] = 0;
306        return V;
307    }
308
309    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
310    {
311        unimplemented!()
312    }
313
314    #[cfg(_XM_SSE4_INTRINSICS_)]
315    unsafe {
316        let xy: __m128 = _mm_castpd_ps(_mm_load_sd(mem::transmute::<_, *const f64>(pSource)));
317        let z: __m128 = _mm_load_ss(mem::transmute::<_, *const f32>(&pSource[2]));
318        return _mm_insert_ps(xy, z, 0x20);
319    }
320
321    #[cfg(all(_XM_SSE_INTRINSICS_, not(_XM_SSE4_INTRINSICS_)))]
322    unsafe {
323        let xy: __m128 = _mm_castpd_ps(_mm_load_sd(mem::transmute::<_, *const f64>(pSource)));
324        let z: __m128 = _mm_load_ss(mem::transmute::<_, *const f32>(&pSource[2]));
325        return _mm_movelh_ps(xy, z);
326    }
327}
328
329#[test]
330fn test_XMLoadInt3() {
331    let a = XMLoadInt3(&[-1i32 as u32, 0 as u32, 1 as u32]);
332    let b = XMVectorSetInt(-1i32 as u32, 0 as u32, 1, 0);
333    assert_eq!(-1, XMVectorGetIntX(a) as i32);
334    assert_eq!( 0, XMVectorGetIntY(a) as i32);
335    assert_eq!( 1, XMVectorGetIntZ(a) as i32);
336    assert!(XMVector3EqualInt(a, b));
337
338    let c = XMLoadInt3(&[1, 2, 3]);
339    let d = XMVectorSetInt(1, 2 as u32, 3, 0);
340    assert_eq!(1, XMVectorGetIntX(c));
341    assert_eq!(2, XMVectorGetIntY(d));
342    assert_eq!(3, XMVectorGetIntZ(d));
343    assert!(XMVector3EqualInt(c, d));
344}
345
346// TODO: XMLoadInt3A
347
348/// Loads an XMFLOAT3 into an XMVECTOR.
349///
350/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadFloat3>
351#[inline]
352pub fn XMLoadFloat3(
353    pSource: &XMFLOAT3,
354) -> XMVECTOR
355{
356    #[cfg(_XM_NO_INTRINSICS_)]
357    unsafe {
358        let mut V: XMVECTOR = crate::undefined();
359        V.vector4_f32[0] = pSource.x;
360        V.vector4_f32[1] = pSource.y;
361        V.vector4_f32[2] = pSource.z;
362        V.vector4_f32[3] = 0.0;
363        return V;
364    }
365
366    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
367    {
368        unimplemented!()
369    }
370
371    #[cfg(_XM_SSE4_INTRINSICS_)]
372    unsafe {
373        let xy: __m128 = _mm_castpd_ps(_mm_load_sd(mem::transmute::<_, *const f64>(pSource)));
374        let z: __m128 = _mm_load_ss(&pSource.z);
375        return _mm_insert_ps(xy, z, 0x20);
376    }
377
378    #[cfg(all(_XM_SSE_INTRINSICS_, not(_XM_SSE4_INTRINSICS_)))]
379    unsafe {
380        let xy: __m128 = _mm_castpd_ps(_mm_load_sd(mem::transmute::<_, *const f64>(pSource)));
381        let z: __m128 = _mm_load_ss(&pSource.z);
382        return _mm_movelh_ps(xy, z);
383    }
384}
385
386#[test]
387fn test_XMLoadFloat3() {
388    let a = XMLoadFloat3(&XMFLOAT3 { x: 1.0, y: 2.0, z: 3.0 });
389    assert_eq!(1.0, XMVectorGetX(a));
390    assert_eq!(2.0, XMVectorGetY(a));
391    assert_eq!(3.0, XMVectorGetZ(a));
392    assert_eq!(0.0, XMVectorGetW(a));
393}
394
395/// Loads an XMFLOAT3 into an aligned XMVECTOR.
396///
397/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadFloat3A>
398#[inline]
399pub fn XMLoadFloat3A(
400    pSource: &Align16<XMFLOAT3>,
401) -> XMVECTOR
402{
403    #[cfg(_XM_NO_INTRINSICS_)]
404    unsafe {
405        let mut V: XMVECTOR = crate::undefined();
406        V.vector4_f32[0] = pSource.x;
407        V.vector4_f32[1] = pSource.y;
408        V.vector4_f32[2] = pSource.z;
409        V.vector4_f32[3] = 0.0;
410        return V;
411    }
412
413    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
414    {
415        unimplemented!()
416    }
417
418    #[cfg(all(_XM_SSE_INTRINSICS_))]
419    unsafe {
420         // Reads an extra float which is zero'd
421        let V: __m128 = _mm_load_ps(&pSource.x);
422        return _mm_and_ps(V, g_XMMask3.v);
423    }
424}
425
426#[test]
427fn test_XMLoadFloat3A() {
428    let a = XMLoadFloat3A(&Align16(XMFLOAT3 { x: 1.0, y: 2.0, z: 3.0 }));
429    assert_eq!(1.0, XMVectorGetX(a));
430    assert_eq!(2.0, XMVectorGetY(a));
431    assert_eq!(3.0, XMVectorGetZ(a));
432    assert_eq!(0.0, XMVectorGetW(a));
433}
434
435// TODO: XMLoadSInt3
436
437// /// Loads signed integer data into the x, y, and z components of an XMVECTOR.
438// ///
439// /// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadSInt3>
440// #[inline]
441// pub fn XMLoadSInt3(
442//     pSource: &XMINT3,
443// ) -> FXMVECTOR
444// {
445//     #[cfg(_XM_NO_INTRINSICS_)]
446//     unsafe {
447//         let mut V: XMVECTOR = crate::undefined();
448//         V.vector4_f32[0] = (pSource.x as f32);
449//         V.vector4_f32[1] = (pSource.y as f32);
450//         V.vector4_f32[2] = (pSource.z as f32);
451//         V.vector4_f32[3] = 0.0;
452//         return V;
453//     }
454
455//     #[cfg(_XM_ARM_NEON_INTRINSICS_)]
456//     {
457//         unimplemented!()
458//     }
459
460//     #[cfg(_XM_SSE_INTRINSICS_)]
461//     unsafe {
462//         let xy: __m128 = _mm_castpd_ps(_mm_load_sd(mem::transmute::<_, *const f64>(pSource)));
463//         let z: __m128 = _mm_load_ss(mem::transmute::<_, *const f32>(&pSource.z));
464//         let V: __m128 = _mm_movelh_ps(xy, z);
465//         return _mm_cvtepi32_ps(_mm_castps_si128(V));
466//     }
467// }
468
469// #[test]
470// fn test_XMLoadSInt3() {
471//     let a = XMLoadSInt3(&XMINT3 { x: 1, y: 2, z: 3 });
472//     assert_eq!(1.0, XMVectorGetX(a));
473//     assert_eq!(2.0, XMVectorGetY(a));
474//     assert_eq!(3.0, XMVectorGetZ(a));
475//     assert_eq!(0.0, XMVectorGetW(a));
476
477//     let a = XMLoadSInt3(&XMINT3 { x: -1, y: -2, z: -3 });
478//     assert_eq!(-1.0, XMVectorGetX(a));
479//     assert_eq!(-2.0, XMVectorGetY(a));
480//     assert_eq!(-3.0, XMVectorGetZ(a));
481//     assert_eq!(0.0, XMVectorGetW(a));
482// }
483
484
485// TODO: XMLoadUInt3
486
487/// Loads data into an XMVECTOR, without type checking.
488///
489/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadInt4>
490#[inline]
491pub fn XMLoadInt4(
492    pSource: &[u32; 4],
493) -> FXMVECTOR
494{
495    #[cfg(_XM_NO_INTRINSICS_)]
496    unsafe {
497        let mut V: XMVECTOR = crate::undefined();
498        V.vector4_u32[0] = pSource[0];
499        V.vector4_u32[1] = pSource[1];
500        V.vector4_u32[2] = pSource[2];
501        V.vector4_u32[3] = pSource[3];
502        return V;
503    }
504
505    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
506    {
507        unimplemented!()
508    }
509
510    #[cfg(all(_XM_SSE_INTRINSICS_))]
511    unsafe {
512        let V: __m128i = _mm_loadu_si128(mem::transmute::<_, *const __m128i>(pSource));
513        return _mm_castsi128_ps(V);
514    }
515}
516
517#[test]
518fn test_XMLoadInt4() {
519    let a = XMLoadInt4(&[-1i32 as u32, 0 as u32, 1 as u32, 2 as u32]);
520    let b = XMVectorSetInt(-1i32 as u32, 0 as u32, 1, 2);
521    assert_eq!(-1, XMVectorGetIntX(a) as i32);
522    assert_eq!( 0, XMVectorGetIntY(a) as i32);
523    assert_eq!( 1, XMVectorGetIntZ(a) as i32);
524    assert_eq!( 2, XMVectorGetIntW(a) as i32);
525    assert!(XMVector4EqualInt(a, b));
526
527    let c = XMLoadInt4(&[1, 2, 3, 4]);
528    let d = XMVectorSetInt(1, 2 as u32, 3, 4);
529    assert_eq!(1, XMVectorGetIntX(c));
530    assert_eq!(2, XMVectorGetIntY(d));
531    assert_eq!(3, XMVectorGetIntZ(d));
532    assert_eq!(4, XMVectorGetIntW(d));
533    assert!(XMVector4EqualInt(c, d));
534}
535
536// TODO: XMLoadInt4A
537
538/// Loads an XMFLOAT4 into an XMVECTOR.
539///
540/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadFloat4>
541#[inline]
542pub fn XMLoadFloat4(
543    pSource: &XMFLOAT4,
544) -> XMVECTOR
545{
546    #[cfg(_XM_NO_INTRINSICS_)]
547    unsafe {
548        let mut V: XMVECTOR = crate::undefined();
549        V.vector4_f32[0] = pSource.x;
550        V.vector4_f32[1] = pSource.y;
551        V.vector4_f32[2] = pSource.z;
552        V.vector4_f32[3] = pSource.w;
553        return V;
554    }
555
556    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
557    {
558        unimplemented!()
559    }
560
561    #[cfg(_XM_SSE_INTRINSICS_)]
562    unsafe {
563        return _mm_loadu_ps(&pSource.x);
564    }
565}
566
567#[test]
568fn test_XMLoadFloat4() {
569    let a = XMLoadFloat4(&XMFLOAT4 { x: 1.0, y: 2.0, z: 3.0, w: 4.0 });
570    assert_eq!(1.0, XMVectorGetX(a));
571    assert_eq!(2.0, XMVectorGetY(a));
572    assert_eq!(3.0, XMVectorGetZ(a));
573    assert_eq!(4.0, XMVectorGetW(a));
574}
575
576/// Loads an XMFLOAT4 into an aligned XMVECTOR.
577///
578/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadFloat4A>
579#[inline]
580pub fn XMLoadFloat4A(
581    pSource: &Align16<XMFLOAT4>,
582) -> XMVECTOR
583{
584    #[cfg(_XM_NO_INTRINSICS_)]
585    unsafe {
586        let mut V: XMVECTOR = crate::undefined();
587        V.vector4_f32[0] = pSource.x;
588        V.vector4_f32[1] = pSource.y;
589        V.vector4_f32[2] = pSource.z;
590        V.vector4_f32[3] = pSource.w;
591        return V;
592    }
593
594    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
595    {
596        unimplemented!()
597    }
598
599    #[cfg(_XM_SSE_INTRINSICS_)]
600    unsafe {
601        return _mm_load_ps(&pSource.x);
602    }
603}
604
605#[test]
606fn test_XMLoadFloat4A() {
607    let a = XMLoadFloat4A(&Align16(XMFLOAT4 { x: 1.0, y: 2.0, z: 3.0, w: 4.0 }));
608    assert_eq!(1.0, XMVectorGetX(a));
609    assert_eq!(2.0, XMVectorGetY(a));
610    assert_eq!(3.0, XMVectorGetZ(a));
611    assert_eq!(4.0, XMVectorGetW(a));
612}
613
614// TODO: XMLoadSInt4
615// TODO: XMLoadUInt4
616
617/// Loads an XMFLOAT3X3 into an MATRIX.
618///
619/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadFloat3x3>
620#[inline]
621pub fn XMLoadFloat3x3(
622    pSource: &XMFLOAT3X3,
623) -> XMMATRIX
624{
625    #[cfg(_XM_NO_INTRINSICS_)]
626    unsafe {
627        let mut M: XMMATRIX  = crate::undefined();
628        M.r[0].vector4_f32[0] = pSource.m[0][0];
629        M.r[0].vector4_f32[1] = pSource.m[0][1];
630        M.r[0].vector4_f32[2] = pSource.m[0][2];
631        M.r[0].vector4_f32[3] = 0.0;
632
633        M.r[1].vector4_f32[0] = pSource.m[1][0];
634        M.r[1].vector4_f32[1] = pSource.m[1][1];
635        M.r[1].vector4_f32[2] = pSource.m[1][2];
636        M.r[1].vector4_f32[3] = 0.0;
637
638        M.r[2].vector4_f32[0] = pSource.m[2][0];
639        M.r[2].vector4_f32[1] = pSource.m[2][1];
640        M.r[2].vector4_f32[2] = pSource.m[2][2];
641        M.r[2].vector4_f32[3] = 0.0;
642
643        M.r[3].vector4_f32[0] = 0.0;
644        M.r[3].vector4_f32[1] = 0.0;
645        M.r[3].vector4_f32[2] = 0.0;
646        M.r[3].vector4_f32[3] = 1.0;
647        return M;
648    }
649
650    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
651    {
652        unimplemented!()
653    }
654
655    #[cfg(_XM_SSE_INTRINSICS_)]
656    unsafe {
657        let Z: __m128 = _mm_setzero_ps();
658
659        let V1: __m128 = _mm_loadu_ps(&pSource.m[0][0]);
660        let V2: __m128 = _mm_loadu_ps(&pSource.m[1][1]);
661        let V3: __m128 = _mm_load_ss(&pSource.m[2][2]);
662
663        let T1: __m128 = _mm_unpackhi_ps(V1, Z);
664        let T2: __m128 = _mm_unpacklo_ps(V2, Z);
665        let T3: __m128 = _mm_shuffle_ps(V3, T2, _MM_SHUFFLE(0, 1, 0, 0));
666        let T4: __m128 = _mm_movehl_ps(T2, T3);
667        let T5: __m128 = _mm_movehl_ps(Z, T1);
668
669        let mut M: XMMATRIX  = crate::undefined();
670        M.r[0] = _mm_movelh_ps(V1, T1);
671        M.r[1] = _mm_add_ps(T4, T5);
672        M.r[2] = _mm_shuffle_ps(V2, V3, _MM_SHUFFLE(1, 0, 3, 2));
673        M.r[3] = *g_XMIdentityR3;
674        return M;
675    }
676}
677
678/// Loads an XMFLOAT4X3 into an XMMATRIX.
679///
680/// ## Parameters
681///
682/// `pSource` Address of the XMFLOAT4X3 structure to load. This parameter must point to cached memory.
683///
684/// ## Return value
685///
686/// Returns an XMMATRIX loaded with the data from the pSource parameter.
687/// This function performs a partial load of the returned XMMATRIX. See Getting Started for more information.
688///
689/// ## Remarks
690///
691/// XMFLOAT4X3 is a row-major form of the matrix. This function cannot be used to read column-major data
692/// since it assumes the last column is `0 0 0 1`.
693///
694/// The members of the XMFLOAT4X3 structure (_11, _12, _13, and so on) are loaded into the corresponding
695/// members of the XMMATRIX. The remaining members of the returned XMMATRIX are `0.0`, except for _44, which
696/// is `1.0`.
697///
698/// ## Example
699///
700/// ```rust
701/// # use directx_math::*;
702/// let data = XMFLOAT4X3::from([
703///     [10.0, 11.0, 12.0],
704///     [13.0, 14.0, 15.0],
705///     [16.0, 17.0, 18.0],
706///     [19.0, 20.0, 21.0],
707/// ]);
708///
709/// let m = XMLoadFloat4x3(&data);
710/// let m: [[f32; 4]; 4] = XMMatrix(m).into();
711/// assert_eq!(m, [
712///     [10.0, 11.0, 12.0, 0.0],
713///     [13.0, 14.0, 15.0, 0.0],
714///     [16.0, 17.0, 18.0, 0.0],
715///     [19.0, 20.0, 21.0, 1.0],
716/// ]);
717///
718/// ```
719///
720/// ## Reference
721///
722/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadFloat4x3>
723#[inline]
724pub fn XMLoadFloat4x3(
725    pSource: &XMFLOAT4X3,
726) -> XMMATRIX
727{
728    #[cfg(_XM_NO_INTRINSICS_)]
729    unsafe {
730        let mut M: XMMATRIX = crate::undefined();
731        M.r[0].vector4_f32[0] = pSource.m[0][0];
732        M.r[0].vector4_f32[1] = pSource.m[0][1];
733        M.r[0].vector4_f32[2] = pSource.m[0][2];
734        M.r[0].vector4_f32[3] = 0.0;
735
736        M.r[1].vector4_f32[0] = pSource.m[1][0];
737        M.r[1].vector4_f32[1] = pSource.m[1][1];
738        M.r[1].vector4_f32[2] = pSource.m[1][2];
739        M.r[1].vector4_f32[3] = 0.0;
740
741        M.r[2].vector4_f32[0] = pSource.m[2][0];
742        M.r[2].vector4_f32[1] = pSource.m[2][1];
743        M.r[2].vector4_f32[2] = pSource.m[2][2];
744        M.r[2].vector4_f32[3] = 0.0;
745
746        M.r[3].vector4_f32[0] = pSource.m[3][0];
747        M.r[3].vector4_f32[1] = pSource.m[3][1];
748        M.r[3].vector4_f32[2] = pSource.m[3][2];
749        M.r[3].vector4_f32[3] = 1.0;
750        return M;
751    }
752
753    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
754    {
755        unimplemented!()
756    }
757
758    #[cfg(_XM_SSE_INTRINSICS_)]
759    unsafe {
760        // Use unaligned load instructions to
761        // load the 12 floats
762        // vTemp1 = x1,y1,z1,x2
763        let mut vTemp1: XMVECTOR = _mm_loadu_ps(&pSource.m[0][0]);
764        // vTemp2 = y2,z2,x3,y3
765        let mut vTemp2: XMVECTOR = _mm_loadu_ps(&pSource.m[1][1]);
766        // vTemp4 = z3,x4,y4,z4
767        let vTemp4: XMVECTOR = _mm_loadu_ps(&pSource.m[2][2]);
768        // vTemp3 = x3,y3,z3,z3
769        let mut vTemp3: XMVECTOR = _mm_shuffle_ps(vTemp2, vTemp4, _MM_SHUFFLE(0, 0, 3, 2));
770        // vTemp2 = y2,z2,x2,x2
771        vTemp2 = _mm_shuffle_ps(vTemp2, vTemp1, _MM_SHUFFLE(3, 3, 1, 0));
772        // vTemp2 = x2,y2,z2,z2
773        vTemp2 = XM_PERMUTE_PS!(vTemp2, _MM_SHUFFLE(1, 1, 0, 2));
774        // vTemp1 = x1,y1,z1,0
775        vTemp1 = _mm_and_ps(vTemp1, g_XMMask3.v);
776        // vTemp2 = x2,y2,z2,0
777        vTemp2 = _mm_and_ps(vTemp2, g_XMMask3.v);
778        // vTemp3 = x3,y3,z3,0
779        vTemp3 = _mm_and_ps(vTemp3, g_XMMask3.v);
780        // vTemp4i = x4,y4,z4,0
781        let mut vTemp4i: __m128i = _mm_srli_si128(_mm_castps_si128(vTemp4), 32 / 8);
782        // vTemp4i = x4,y4,z4,1.0f
783        vTemp4i = _mm_or_si128(vTemp4i, g_XMIdentityR3.m128i());
784        let M = XMMATRIX { r: [
785            vTemp1,
786            vTemp2,
787            vTemp3,
788            _mm_castsi128_ps(vTemp4i)
789        ]};
790        return M;
791    }
792}
793// TODO: XMLoadFloat4x3A
794
795/// Loads an XMFLOAT3X4 into an XMMATRIX.
796///
797/// ## Parameters
798///
799/// `pSource` Type: const XMFLOAT3X4 *Pointer to the constant XMFLOAT3X4 structure to load. This argument must point
800/// to cached memory.
801///
802/// ## Return value
803///
804/// Type: XMMATRIX
805/// An XMMATRIX loaded with the data from the pSource argument.
806/// This function performs a partial load of the returned XMMATRIX. For more info, see Getting started (DirectXMath).
807///
808/// ## Remarks
809///
810/// XMFLOAT3X4 is a row-major form of the matrix. XMLoadFloat3x4 could be used to read column-major data,
811/// but that would then need to be transposed with XMMatrixTranspose before use in other XMMATRIX functions.
812///
813/// ## Example
814///
815/// ```rust
816/// # use directx_math::*;
817/// let data = XMFLOAT3X4::from([
818///     [10.0, 13.0, 16.0, 19.0],
819///     [11.0, 14.0, 17.0, 20.0],
820///     [12.0, 15.0, 18.0, 21.0],
821/// ]);
822///
823/// let m = XMLoadFloat3x4(&data);
824/// let m: [[f32; 4]; 4] = XMMatrix(m).into();
825/// assert_eq!(m, [
826///     [10.0, 11.0, 12.0, 0.0],
827///     [13.0, 14.0, 15.0, 0.0],
828///     [16.0, 17.0, 18.0, 0.0],
829///     [19.0, 20.0, 21.0, 1.0],
830/// ]);
831///
832/// ```
833///
834/// ## Reference
835///
836/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadFloat3x4>
837#[inline]
838pub fn XMLoadFloat3x4(
839    pSource: &XMFLOAT3X4,
840) -> XMMATRIX
841{
842    #[cfg(_XM_NO_INTRINSICS_)]
843    unsafe {
844        let mut M: XMMATRIX = crate::undefined();
845        M.r[0].vector4_f32[0] = pSource.m[0][0];
846        M.r[0].vector4_f32[1] = pSource.m[1][0];
847        M.r[0].vector4_f32[2] = pSource.m[2][0];
848        M.r[0].vector4_f32[3] = 0.0;
849
850        M.r[1].vector4_f32[0] = pSource.m[0][1];
851        M.r[1].vector4_f32[1] = pSource.m[1][1];
852        M.r[1].vector4_f32[2] = pSource.m[2][1];
853        M.r[1].vector4_f32[3] = 0.0;
854
855        M.r[2].vector4_f32[0] = pSource.m[0][2];
856        M.r[2].vector4_f32[1] = pSource.m[1][2];
857        M.r[2].vector4_f32[2] = pSource.m[2][2];
858        M.r[2].vector4_f32[3] = 0.0;
859
860        M.r[3].vector4_f32[0] = pSource.m[0][3];
861        M.r[3].vector4_f32[1] = pSource.m[1][3];
862        M.r[3].vector4_f32[2] = pSource.m[2][3];
863        M.r[3].vector4_f32[3] = 1.0;
864        return M;
865    }
866
867    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
868    {
869        unimplemented!()
870    }
871
872    #[cfg(_XM_SSE_INTRINSICS_)]
873    unsafe {
874        let mut M: XMMATRIX = crate::undefined();
875        M.r[0] = _mm_loadu_ps(&pSource.m[0][0]);
876        M.r[1] = _mm_loadu_ps(&pSource.m[1][0]);
877        M.r[2] = _mm_loadu_ps(&pSource.m[2][0]);
878        M.r[3] = g_XMIdentityR3.v;
879
880        // x.x,x.y,y.x,y.y
881        let vTemp1: XMVECTOR = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0));
882        // x.z,x.w,y.z,y.w
883        let vTemp3: XMVECTOR = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2));
884        // z.x,z.y,w.x,w.y
885        let vTemp2: XMVECTOR = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0));
886        // z.z,z.w,w.z,w.w
887        let vTemp4: XMVECTOR = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2));
888
889        let mut mResult: XMMATRIX = crate::undefined();
890
891        // x.x,y.x,z.x,w.x
892        mResult.r[0] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0));
893        // x.y,y.y,z.y,w.y
894        mResult.r[1] = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1));
895        // x.z,y.z,z.z,w.z
896        mResult.r[2] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0));
897        // x.w,y.w,z.w,w.w
898        mResult.r[3] = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(3, 1, 3, 1));
899        return mResult;
900    }
901}
902
903// TODO: XMLoadFloat3x4A
904
905/// Loads an XMFLOAT4X4 into an XMMATRIX.
906///
907/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadFloat4x4>
908#[inline]
909pub fn XMLoadFloat4x4(
910    pSource: &XMFLOAT4X4,
911) -> XMMATRIX
912{
913    #[cfg(_XM_NO_INTRINSICS_)]
914    unsafe {
915        let mut M: XMMATRIX  = crate::undefined();
916        M.r[0].vector4_f32[0] = pSource.m[0][0];
917        M.r[0].vector4_f32[1] = pSource.m[0][1];
918        M.r[0].vector4_f32[2] = pSource.m[0][2];
919        M.r[0].vector4_f32[3] = pSource.m[0][3];
920
921        M.r[1].vector4_f32[0] = pSource.m[1][0];
922        M.r[1].vector4_f32[1] = pSource.m[1][1];
923        M.r[1].vector4_f32[2] = pSource.m[1][2];
924        M.r[1].vector4_f32[3] = pSource.m[1][3];
925
926        M.r[2].vector4_f32[0] = pSource.m[2][0];
927        M.r[2].vector4_f32[1] = pSource.m[2][1];
928        M.r[2].vector4_f32[2] = pSource.m[2][2];
929        M.r[2].vector4_f32[3] = pSource.m[2][3];
930
931        M.r[3].vector4_f32[0] = pSource.m[3][0];
932        M.r[3].vector4_f32[1] = pSource.m[3][1];
933        M.r[3].vector4_f32[2] = pSource.m[3][2];
934        M.r[3].vector4_f32[3] = pSource.m[3][3];
935        return M;
936    }
937
938    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
939    {
940        unimplemented!()
941    }
942
943    #[cfg(_XM_SSE_INTRINSICS_)]
944    unsafe {
945        let mut M: XMMATRIX  = crate::undefined();
946        M.r[0] = _mm_loadu_ps(&pSource.m[0][0]); // _11
947        M.r[1] = _mm_loadu_ps(&pSource.m[1][0]); // _21
948        M.r[2] = _mm_loadu_ps(&pSource.m[2][0]); // _31
949        M.r[3] = _mm_loadu_ps(&pSource.m[3][0]); // _41
950        return M;
951    }
952}
953
954/// Loads an XMFLOAT4X4 into an aligned XMMATRIX.
955///
956/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMLoadFloat4x4A>
957#[inline]
958pub fn XMLoadFloat4x4A(
959    pSource: &Align16<XMFLOAT4X4>,
960) -> XMMATRIX
961{
962    #[cfg(_XM_NO_INTRINSICS_)]
963    unsafe {
964        let mut M: XMMATRIX  = crate::undefined();
965        M.r[0].vector4_f32[0] = pSource.m[0][0];
966        M.r[0].vector4_f32[1] = pSource.m[0][1];
967        M.r[0].vector4_f32[2] = pSource.m[0][2];
968        M.r[0].vector4_f32[3] = pSource.m[0][3];
969
970        M.r[1].vector4_f32[0] = pSource.m[1][0];
971        M.r[1].vector4_f32[1] = pSource.m[1][1];
972        M.r[1].vector4_f32[2] = pSource.m[1][2];
973        M.r[1].vector4_f32[3] = pSource.m[1][3];
974
975        M.r[2].vector4_f32[0] = pSource.m[2][0];
976        M.r[2].vector4_f32[1] = pSource.m[2][1];
977        M.r[2].vector4_f32[2] = pSource.m[2][2];
978        M.r[2].vector4_f32[3] = pSource.m[2][3];
979
980        M.r[3].vector4_f32[0] = pSource.m[3][0];
981        M.r[3].vector4_f32[1] = pSource.m[3][1];
982        M.r[3].vector4_f32[2] = pSource.m[3][2];
983        M.r[3].vector4_f32[3] = pSource.m[3][3];
984        return M;
985    }
986
987    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
988    {
989        unimplemented!()
990    }
991
992    #[cfg(_XM_SSE_INTRINSICS_)]
993    unsafe {
994        let mut M: XMMATRIX  = crate::undefined();
995        M.r[0] = _mm_load_ps(&pSource.m[0][0]); // _11
996        M.r[1] = _mm_load_ps(&pSource.m[1][0]); // _21
997        M.r[2] = _mm_load_ps(&pSource.m[2][0]); // _31
998        M.r[3] = _mm_load_ps(&pSource.m[3][0]); // _41
999        return M;
1000    }
1001}
1002
1003// TODO: XMStoreInt
1004// TODO: XMStoreFloat
1005
1006/// Stores an XMVECTOR in a float.
1007///
1008/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreFloat>
1009#[inline]
1010pub fn XMStoreFloat(
1011    pDestination: &mut f32,
1012    V: FXMVECTOR,
1013)
1014{
1015    #[cfg(_XM_NO_INTRINSICS_)]
1016    {
1017        *pDestination = XMVectorGetX(V);
1018    }
1019
1020    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1021    {
1022        unimplemented!()
1023    }
1024
1025    #[cfg(all(_XM_SSE_INTRINSICS_))]
1026    unsafe {
1027        _mm_store_ss(pDestination, V);
1028    }
1029}
1030
1031/// Stores an XMVECTOR in a 2-element uint32_t array.
1032///
1033/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreInt2>
1034#[inline]
1035pub fn XMStoreInt2(
1036    pDestination: &mut [u32; 2],
1037    V: FXMVECTOR,
1038)
1039{
1040    #[cfg(_XM_NO_INTRINSICS_)]
1041    unsafe {
1042        pDestination[0] = V.vector4_u32[0];
1043        pDestination[1] = V.vector4_u32[1];
1044    }
1045
1046    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1047    {
1048        unimplemented!()
1049    }
1050
1051    #[cfg(all(_XM_SSE_INTRINSICS_))]
1052    unsafe {
1053        _mm_store_sd(mem::transmute::<_, *mut f64>(pDestination), _mm_castps_pd(V));
1054    }
1055}
1056
1057#[test]
1058fn test_XMStoreInt2() {
1059    let mut a: [u32; 2] = [0, 0];
1060    XMStoreInt2(&mut a, XMVectorSetInt(1, 2, 0, 0));
1061    assert_eq!(1, a[0]);
1062    assert_eq!(2, a[1]);
1063
1064    let mut b: [u32; 2] = [-1i32 as u32, 1];
1065    XMStoreInt2(&mut b, XMVectorSetInt(-1i32 as u32, 1, 0, 0));
1066    assert_eq!(-1, b[0] as i32);
1067    assert_eq!( 1, b[1] as i32);
1068}
1069
1070// TODO: XMStoreInt2A
1071
1072/// Stores an XMVECTOR in an XMFLOAT2.
1073///
1074/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreFloat2>
1075#[inline]
1076pub fn XMStoreFloat2(
1077    pDestination: &mut XMFLOAT2,
1078    V: FXMVECTOR,
1079)
1080{
1081    #[cfg(_XM_NO_INTRINSICS_)]
1082    unsafe {
1083        pDestination.x = V.vector4_f32[0];
1084        pDestination.y = V.vector4_f32[1];
1085    }
1086
1087    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1088    {
1089        unimplemented!()
1090    }
1091
1092    #[cfg(all(_XM_SSE_INTRINSICS_))]
1093    unsafe {
1094        let pDestination: *mut XMFLOAT2 = mem::transmute(pDestination);
1095        _mm_store_sd(mem::transmute::<_, *mut f64>(pDestination), _mm_castps_pd(V));
1096    }
1097}
1098
1099#[test]
1100fn test_XMStoreFloat2() {
1101    let mut a = XMFLOAT2 { x: 0.0, y: 0.0 };
1102    XMStoreFloat2(&mut a, XMVectorSet(1.0, 2.0, 0.0, 0.0));
1103    assert_eq!(1.0, a.x);
1104    assert_eq!(2.0, a.y);
1105}
1106
1107// TODO: XMStoreFloat2A
1108// TODO: XMStoreSInt2
1109// TODO: XMStoreUInt2
1110
1111/// Stores an XMVECTOR in a 3-element uint32_t array.
1112///
1113/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreInt3>
1114#[inline]
1115pub fn XMStoreInt3(
1116    pDestination: &mut [u32; 3],
1117    V: FXMVECTOR,
1118)
1119{
1120    #[cfg(_XM_NO_INTRINSICS_)]
1121    unsafe {
1122        pDestination[0] = V.vector4_u32[0];
1123        pDestination[1] = V.vector4_u32[1];
1124        pDestination[2] = V.vector4_u32[2];
1125    }
1126
1127    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1128    {
1129        unimplemented!()
1130    }
1131
1132    #[cfg(all(_XM_SSE_INTRINSICS_))]
1133    unsafe {
1134        _mm_store_sd(mem::transmute::<_, *mut f64>(&mut *pDestination), _mm_castps_pd(V));
1135        let z: __m128 = XM_PERMUTE_PS!(V, _MM_SHUFFLE(2, 2, 2, 2));
1136        _mm_store_ss(mem::transmute::<_, *mut f32>(&mut pDestination[2]), z);
1137    }
1138}
1139
1140#[test]
1141fn test_XMStoreInt3() {
1142    let mut a: [u32; 3] = [0, 0, 0];
1143    XMStoreInt3(&mut a, XMVectorSetInt(1, 2, 3, 0));
1144    assert_eq!(1, a[0]);
1145    assert_eq!(2, a[1]);
1146    assert_eq!(3, a[2]);
1147
1148    let mut b: [u32; 3] = [-1i32 as u32, 1, 2];
1149    XMStoreInt3(&mut b, XMVectorSetInt(-1i32 as u32, 1, 2, 0));
1150    assert_eq!(-1, b[0] as i32);
1151    assert_eq!( 1, b[1] as i32);
1152    assert_eq!( 2, b[2] as i32);
1153}
1154
1155// TODO: XMStoreInt3A
1156
1157/// Stores an XMVECTOR in an XMFLOAT3.
1158///
1159/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreFloat3>
1160#[inline]
1161pub fn XMStoreFloat3(
1162    pDestination: &mut XMFLOAT3,
1163    V: FXMVECTOR,
1164)
1165{
1166    #[cfg(_XM_NO_INTRINSICS_)]
1167    unsafe {
1168        pDestination.x = V.vector4_f32[0];
1169        pDestination.y = V.vector4_f32[1];
1170        pDestination.z = V.vector4_f32[2];
1171    }
1172
1173    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1174    {
1175        unimplemented!()
1176    }
1177
1178    #[cfg(_XM_SSE4_INTRINSICS_)]
1179    unsafe {
1180        *mem::transmute::<_, *mut i32>(&mut pDestination.x) = _mm_extract_ps(V, 0);
1181        *mem::transmute::<_, *mut i32>(&mut pDestination.y) = _mm_extract_ps(V, 1);
1182        *mem::transmute::<_, *mut i32>(&mut pDestination.z) = _mm_extract_ps(V, 2);
1183    }
1184
1185    #[cfg(all(_XM_SSE_INTRINSICS_, not(_XM_SSE4_INTRINSICS_)))]
1186    unsafe {
1187        let pDestination: *mut XMFLOAT3 = mem::transmute(pDestination);
1188        _mm_store_sd(mem::transmute::<_, *mut f64>(pDestination), _mm_castps_pd(V));
1189        let z: __m128 = XM_PERMUTE_PS!(V, _MM_SHUFFLE(2, 2, 2, 2));
1190        _mm_store_ss(&mut (*pDestination).z, z);
1191    }
1192}
1193
1194#[test]
1195fn test_XMStoreFloat3() {
1196    let mut a = XMFLOAT3 { x: 0.0, y: 0.0, z: 0.0 };
1197    XMStoreFloat3(&mut a, XMVectorSet(1.0, 2.0, 3.0, 0.0));
1198    assert_eq!(1.0, a.x);
1199    assert_eq!(2.0, a.y);
1200    assert_eq!(3.0, a.z);
1201}
1202
1203/// Stores an XMVECTOR in an aligned XMFLOAT3.
1204///
1205/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreFloat3A>
1206#[inline]
1207pub fn XMStoreFloat3A(
1208    pDestination: &mut Align16<XMFLOAT3>,
1209    V: FXMVECTOR,
1210)
1211{
1212    #[cfg(_XM_NO_INTRINSICS_)]
1213    unsafe {
1214        pDestination.x = V.vector4_f32[0];
1215        pDestination.y = V.vector4_f32[1];
1216        pDestination.z = V.vector4_f32[2];
1217    }
1218
1219    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1220    {
1221        unimplemented!()
1222    }
1223
1224    #[cfg(_XM_SSE4_INTRINSICS_)]
1225    unsafe {
1226        _mm_store_sd(mem::transmute(&mut pDestination.x), _mm_castps_pd(V));
1227        *mem::transmute::<_, *mut i32>(&mut pDestination.z) = _mm_extract_ps(V, 2);
1228    }
1229
1230    #[cfg(all(_XM_SSE_INTRINSICS_, not(_XM_SSE4_INTRINSICS_)))]
1231    unsafe {
1232        let pDestination: *mut XMFLOAT3 = mem::transmute(pDestination);
1233        _mm_store_sd(mem::transmute::<_, *mut f64>(pDestination), _mm_castps_pd(V));
1234        let z: __m128 = _mm_movehl_ps(V, V);
1235        _mm_store_ss(&mut (*pDestination).z, z);
1236    }
1237}
1238
1239#[test]
1240fn test_XMStoreFloat3A() {
1241    let mut a = Align16(XMFLOAT3 { x: 0.0, y: 0.0, z: 0.0 });
1242    XMStoreFloat3A(&mut a, XMVectorSet(1.0, 2.0, 3.0, 0.0));
1243    assert_eq!(1.0, a.x);
1244    assert_eq!(2.0, a.y);
1245    assert_eq!(3.0, a.z);
1246}
1247
1248// TODO: XMStoreSInt3
1249// TODO: XMStoreUInt3
1250
1251/// Stores an XMVECTOR in a 4-element uint32_t array.
1252///
1253/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreInt4>
1254#[inline]
1255pub fn XMStoreInt4(
1256    pDestination: &mut [u32; 4],
1257    V: FXMVECTOR,
1258)
1259{
1260    #[cfg(_XM_NO_INTRINSICS_)]
1261    unsafe {
1262        pDestination[0] = V.vector4_u32[0];
1263        pDestination[1] = V.vector4_u32[1];
1264        pDestination[2] = V.vector4_u32[2];
1265        pDestination[3] = V.vector4_u32[3];
1266    }
1267
1268    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1269    {
1270        unimplemented!()
1271    }
1272
1273    #[cfg(all(_XM_SSE_INTRINSICS_))]
1274    unsafe {
1275        _mm_storeu_si128(mem::transmute::<_, *mut __m128i>(pDestination), _mm_castps_si128(V));
1276    }
1277}
1278
1279#[test]
1280fn test_XMStoreInt4() {
1281    let mut a: [u32; 4] = [0, 0, 0, 0];
1282    XMStoreInt4(&mut a, XMVectorSetInt(1, 2, 3, 4));
1283    assert_eq!(1, a[0]);
1284    assert_eq!(2, a[1]);
1285    assert_eq!(3, a[2]);
1286    assert_eq!(4, a[3]);
1287
1288    let mut b: [u32; 4] = [-1i32 as u32, 1, 2, 3];
1289    XMStoreInt4(&mut b, XMVectorSetInt(-1i32 as u32, 1, 2, 3));
1290    assert_eq!(-1, b[0] as i32);
1291    assert_eq!( 1, b[1] as i32);
1292    assert_eq!( 2, b[2] as i32);
1293    assert_eq!( 3, b[3] as i32);
1294}
1295
1296// TODO: XMStoreInt4A
1297
1298/// Stores an XMVECTOR in an XMFLOAT4.
1299///
1300/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreFloat4>
1301#[inline]
1302pub fn XMStoreFloat4(
1303    pDestination: &mut XMFLOAT4,
1304    V: FXMVECTOR,
1305)
1306{
1307    #[cfg(_XM_NO_INTRINSICS_)]
1308    unsafe {
1309        pDestination.x = V.vector4_f32[0];
1310        pDestination.y = V.vector4_f32[1];
1311        pDestination.z = V.vector4_f32[2];
1312        pDestination.w = V.vector4_f32[3];
1313    }
1314
1315    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1316    {
1317        unimplemented!()
1318    }
1319
1320    #[cfg(_XM_SSE_INTRINSICS_)]
1321    unsafe {
1322        _mm_storeu_ps(&mut pDestination.x, V);
1323    }
1324}
1325
1326#[test]
1327fn test_XMStoreFloat4() {
1328    let mut a = XMFLOAT4 { x: 0.0, y: 0.0, z: 0.0, w: 0.0 };
1329    XMStoreFloat4(&mut a, XMVectorSet(1.0, 2.0, 3.0, 4.0));
1330    assert_eq!(1.0, a.x);
1331    assert_eq!(2.0, a.y);
1332    assert_eq!(3.0, a.z);
1333    assert_eq!(4.0, a.w);
1334}
1335
1336/// Stores an XMVECTOR in an aligned XMFLOAT4.
1337///
1338/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreFloat4A>
1339#[inline]
1340pub fn XMStoreFloat4A(
1341    pDestination: &mut Align16<XMFLOAT4>,
1342    V: FXMVECTOR,
1343)
1344{
1345    #[cfg(_XM_NO_INTRINSICS_)]
1346    unsafe {
1347        pDestination.x = V.vector4_f32[0];
1348        pDestination.y = V.vector4_f32[1];
1349        pDestination.z = V.vector4_f32[2];
1350        pDestination.w = V.vector4_f32[3];
1351    }
1352
1353    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1354    {
1355        unimplemented!()
1356    }
1357
1358    #[cfg(_XM_SSE_INTRINSICS_)]
1359    unsafe {
1360        _mm_store_ps(&mut pDestination.x, V);
1361    }
1362}
1363
1364#[test]
1365fn test_XMStoreFloat4A() {
1366    let mut a = Align16(XMFLOAT4 { x: 0.0, y: 0.0, z: 0.0, w: 0.0 });
1367    XMStoreFloat4(&mut a, XMVectorSet(1.0, 2.0, 3.0, 4.0));
1368    assert_eq!(1.0, a.x);
1369    assert_eq!(2.0, a.y);
1370    assert_eq!(3.0, a.z);
1371    assert_eq!(4.0, a.w);
1372}
1373
1374// TODO: XMStoreSInt4
1375// TODO: XMStoreUInt4
1376
1377/// Stores an XMMATRIX in an XMFLOAT3X3.
1378///
1379/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreFloat3x3>
1380#[inline]
1381pub fn XMStoreFloat3x3(
1382    pDestination: &mut XMFLOAT3X3,
1383    M: FXMMATRIX,
1384)
1385{
1386    #[cfg(_XM_NO_INTRINSICS_)]
1387    unsafe {
1388        pDestination.m[0][0] = M.r[0].vector4_f32[0];
1389        pDestination.m[0][1] = M.r[0].vector4_f32[1];
1390        pDestination.m[0][2] = M.r[0].vector4_f32[2];
1391
1392        pDestination.m[1][0] = M.r[1].vector4_f32[0];
1393        pDestination.m[1][1] = M.r[1].vector4_f32[1];
1394        pDestination.m[1][2] = M.r[1].vector4_f32[2];
1395
1396        pDestination.m[2][0] = M.r[2].vector4_f32[0];
1397        pDestination.m[2][1] = M.r[2].vector4_f32[1];
1398        pDestination.m[2][2] = M.r[2].vector4_f32[2];
1399    }
1400
1401    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1402    {
1403        unimplemented!()
1404    }
1405
1406    #[cfg(_XM_SSE_INTRINSICS_)]
1407    unsafe {
1408        let mut vTemp1: XMVECTOR = M.r[0];
1409        let mut vTemp2: XMVECTOR = M.r[1];
1410        let mut vTemp3: XMVECTOR = M.r[2];
1411        let vWork: XMVECTOR = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(0, 0, 2, 2));
1412        vTemp1 = _mm_shuffle_ps(vTemp1, vWork, _MM_SHUFFLE(2, 0, 1, 0));
1413        _mm_storeu_ps(&mut pDestination.m[0][0], vTemp1);
1414        vTemp2 = _mm_shuffle_ps(vTemp2, vTemp3, _MM_SHUFFLE(1, 0, 2, 1));
1415        _mm_storeu_ps(&mut pDestination.m[1][1], vTemp2);
1416        vTemp3 = XM_PERMUTE_PS!(vTemp3, _MM_SHUFFLE(2, 2, 2, 2));
1417        _mm_store_ss(&mut pDestination.m[2][2], vTemp3);
1418    }
1419}
1420
1421/// Stores an XMMATRIX in an XMFLOAT4X3.
1422///
1423/// ## Parameters
1424///
1425/// `pDestination` Address at which to store the data.
1426///
1427/// `M` Matrix containing the data to store.
1428///
1429/// ## Return value
1430///
1431/// None.
1432///
1433/// ## Remarks
1434///
1435/// XMFLOAT4X3 is a row-major matrix form. This function cannot be used to write out column-major data since
1436/// it assumes the last column is `0` 0 `0` 1.
1437///
1438/// This function takes a matrix and writes the components out to twelve single-precision floating-point
1439/// values at the given address. The most significant component of the first row vector is written to the
1440/// first four bytes of the address, followed by the second most significant component of the first row,
1441/// followed by the third most significant component of the first row. The most significant three components
1442/// of the second row are then written out in a like manner to memory beginning at byte 12, followed by
1443/// the third row to memory beginning at byte 24, and finally the fourth row to memory beginning at byte
1444/// 36.
1445///
1446/// The following pseudocode demonstrates the operation of the function.
1447///
1448/// ```text
1449/// pDestination->_11 = M[0].x; // 4 bytes to address (uint8_t*)pDestination
1450/// pDestination->_12 = M[0].y; // 4 bytes to address (uint8_t*)pDestination + 4
1451/// pDestination->_13 = M[0].z; // 4 bytes to address (uint8_t*)pDestination + 8
1452///
1453/// pDestination->_21 = M[1].x; // 4 bytes to address (uint8_t*)pDestination + 12
1454/// pDestination->_22 = M[1].y; // 4 bytes to address (uint8_t*)pDestination + 16
1455/// pDestination->_23 = M[1].z; // 4 bytes to address (uint8_t*)pDestination + 20
1456///
1457/// pDestination->_31 = M[2].x; // 4 bytes to address (uint8_t*)pDestination + 24
1458/// pDestination->_32 = M[2].y; // 4 bytes to address (uint8_t*)pDestination + 28
1459/// pDestination->_33 = M[2].z; // 4 bytes to address (uint8_t*)pDestination + 32
1460///
1461/// pDestination->_41 = M[3].x; // 4 bytes to address (uint8_t*)pDestination + 36
1462/// pDestination->_42 = M[3].y; // 4 bytes to address (uint8_t*)pDestination + 40
1463/// pDestination->_43 = M[3].z; // 4 bytes to address (uint8_t*)pDestination + 44
1464/// ```
1465///
1466/// ## Example
1467///
1468/// ```
1469/// # use directx_math::*;
1470/// let m = XMMatrix::from(&[
1471///     [10.0, 11.0, 12.0, 0.0],
1472///     [13.0, 14.0, 15.0, 0.0],
1473///     [16.0, 17.0, 18.0, 0.0],
1474///     [19.0, 20.0, 21.0, 1.0],
1475/// ]);
1476///
1477/// let mut data = XMFLOAT4X3::default();
1478/// XMStoreFloat4x3(&mut data, *m);
1479///
1480/// let data: &[[f32; 3]; 4] = data.as_ref();
1481/// assert_eq!(data, &[
1482///     [10.0, 11.0, 12.0],
1483///     [13.0, 14.0, 15.0],
1484///     [16.0, 17.0, 18.0],
1485///     [19.0, 20.0, 21.0],
1486/// ]);
1487/// ```
1488///
1489/// ## Reference
1490///
1491/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreFloat4x3>
1492#[inline]
1493pub fn XMStoreFloat4x3(
1494    pDestination: &mut XMFLOAT4X3,
1495    M: FXMMATRIX,
1496)
1497{
1498    #[cfg(_XM_NO_INTRINSICS_)]
1499    unsafe {
1500        pDestination.m[0][0] = M.r[0].vector4_f32[0];
1501        pDestination.m[0][1] = M.r[0].vector4_f32[1];
1502        pDestination.m[0][2] = M.r[0].vector4_f32[2];
1503
1504        pDestination.m[1][0] = M.r[1].vector4_f32[0];
1505        pDestination.m[1][1] = M.r[1].vector4_f32[1];
1506        pDestination.m[1][2] = M.r[1].vector4_f32[2];
1507
1508        pDestination.m[2][0] = M.r[2].vector4_f32[0];
1509        pDestination.m[2][1] = M.r[2].vector4_f32[1];
1510        pDestination.m[2][2] = M.r[2].vector4_f32[2];
1511
1512        pDestination.m[3][0] = M.r[3].vector4_f32[0];
1513        pDestination.m[3][1] = M.r[3].vector4_f32[1];
1514        pDestination.m[3][2] = M.r[3].vector4_f32[2];
1515    }
1516
1517    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1518    {
1519        unimplemented!()
1520    }
1521
1522    #[cfg(_XM_SSE_INTRINSICS_)]
1523    unsafe {
1524        let mut vTemp1: XMVECTOR = M.r[0];
1525        let mut vTemp2: XMVECTOR = M.r[1];
1526        let mut vTemp3: XMVECTOR = M.r[2];
1527        let vTemp4: XMVECTOR = M.r[3];
1528        let vTemp2x: XMVECTOR = _mm_shuffle_ps(vTemp2, vTemp3, _MM_SHUFFLE(1, 0, 2, 1));
1529        vTemp2 = _mm_shuffle_ps(vTemp2, vTemp1, _MM_SHUFFLE(2, 2, 0, 0));
1530        vTemp1 = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(0, 2, 1, 0));
1531        vTemp3 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(0, 0, 2, 2));
1532        vTemp3 = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 1, 2, 0));
1533        _mm_storeu_ps(&mut pDestination.m[0][0], vTemp1);
1534        _mm_storeu_ps(&mut pDestination.m[1][1], vTemp2x);
1535        _mm_storeu_ps(&mut pDestination.m[2][2], vTemp3);
1536    }
1537}
1538
1539// TODO: XMStoreFloat4x3A
1540
1541/// Stores an XMMATRIX in an XMFLOAT3X4.
1542///
1543/// ## Parameters
1544///
1545/// `pDestination` Type: XMFLOAT3X4 *Pointer to the XMFLOAT3X4 structure in which to store the data.
1546///
1547/// `M` Type: XMMATRIXMatrix containing the data to store.
1548///
1549/// ## Return value
1550///
1551/// None
1552///
1553/// ## Remarks
1554///
1555/// XMFLOAT3X4 is a row-major form of the matrix.
1556///
1557/// To write out column-major data requires that the XMMATRIX be transposed via XMMatrixTranspose before
1558/// calling the store function.
1559///
1560/// ## Example
1561///
1562/// ```rust
1563/// # use directx_math::*;
1564/// let m = XMMatrix::from(&[
1565///     [10.0, 11.0, 12.0, 0.0],
1566///     [13.0, 14.0, 15.0, 0.0],
1567///     [16.0, 17.0, 18.0, 0.0],
1568///     [19.0, 20.0, 21.0, 1.0],
1569/// ]);
1570///
1571/// let mut data = XMFLOAT3X4::default();
1572/// XMStoreFloat3x4(&mut data, *m);
1573///
1574/// let data: &[[f32; 4]; 3] = data.as_ref();
1575/// assert_eq!(data, &[
1576///     [10.0, 13.0, 16.0, 19.0],
1577///     [11.0, 14.0, 17.0, 20.0],
1578///     [12.0, 15.0, 18.0, 21.0],
1579/// ]);
1580///
1581/// ```
1582/// ## Reference
1583///
1584/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreFloat3x4>
1585#[inline]
1586pub fn XMStoreFloat3x4(
1587    pDestination: &mut XMFLOAT3X4,
1588    M: FXMMATRIX,
1589)
1590{
1591    #[cfg(_XM_NO_INTRINSICS_)]
1592    unsafe {
1593        pDestination.m[0][0] = M.r[0].vector4_f32[0];
1594        pDestination.m[0][1] = M.r[1].vector4_f32[0];
1595        pDestination.m[0][2] = M.r[2].vector4_f32[0];
1596        pDestination.m[0][3] = M.r[3].vector4_f32[0];
1597
1598        pDestination.m[1][0] = M.r[0].vector4_f32[1];
1599        pDestination.m[1][1] = M.r[1].vector4_f32[1];
1600        pDestination.m[1][2] = M.r[2].vector4_f32[1];
1601        pDestination.m[1][3] = M.r[3].vector4_f32[1];
1602
1603        pDestination.m[2][0] = M.r[0].vector4_f32[2];
1604        pDestination.m[2][1] = M.r[1].vector4_f32[2];
1605        pDestination.m[2][2] = M.r[2].vector4_f32[2];
1606        pDestination.m[2][3] = M.r[3].vector4_f32[2];
1607    }
1608
1609    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1610    {
1611        unimplemented!()
1612    }
1613
1614    #[cfg(_XM_SSE_INTRINSICS_)]
1615    unsafe {
1616        // x.x,x.y,y.x,y.y
1617        let vTemp1: XMVECTOR = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(1, 0, 1, 0));
1618        // x.z,x.w,y.z,y.w
1619        let vTemp3: XMVECTOR = _mm_shuffle_ps(M.r[0], M.r[1], _MM_SHUFFLE(3, 2, 3, 2));
1620        // z.x,z.y,w.x,w.y
1621        let vTemp2: XMVECTOR = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(1, 0, 1, 0));
1622        // z.z,z.w,w.z,w.w
1623        let vTemp4: XMVECTOR = _mm_shuffle_ps(M.r[2], M.r[3], _MM_SHUFFLE(3, 2, 3, 2));
1624
1625        // x.x,y.x,z.x,w.x
1626        let r0: XMVECTOR = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(2, 0, 2, 0));
1627        // x.y,y.y,z.y,w.y
1628        let r1: XMVECTOR = _mm_shuffle_ps(vTemp1, vTemp2, _MM_SHUFFLE(3, 1, 3, 1));
1629        // x.z,y.z,z.z,w.z
1630        let r2: XMVECTOR = _mm_shuffle_ps(vTemp3, vTemp4, _MM_SHUFFLE(2, 0, 2, 0));
1631
1632        _mm_storeu_ps(&mut pDestination.m[0][0], r0);
1633        _mm_storeu_ps(&mut pDestination.m[1][0], r1);
1634        _mm_storeu_ps(&mut pDestination.m[2][0], r2);
1635    }
1636}
1637// TODO: XMStoreFloat3x4A
1638
1639/// Stores an XMMATRIX in an XMFLOAT4X4.
1640///
1641/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreFloat4x4>
1642#[inline]
1643pub fn XMStoreFloat4x4(
1644    pDestination: &mut XMFLOAT4X4,
1645    M: FXMMATRIX,
1646)
1647{
1648    #[cfg(_XM_NO_INTRINSICS_)]
1649    unsafe {
1650        pDestination.m[0][0] = M.r[0].vector4_f32[0];
1651        pDestination.m[0][1] = M.r[0].vector4_f32[1];
1652        pDestination.m[0][2] = M.r[0].vector4_f32[2];
1653        pDestination.m[0][3] = M.r[0].vector4_f32[3];
1654
1655        pDestination.m[1][0] = M.r[1].vector4_f32[0];
1656        pDestination.m[1][1] = M.r[1].vector4_f32[1];
1657        pDestination.m[1][2] = M.r[1].vector4_f32[2];
1658        pDestination.m[1][3] = M.r[1].vector4_f32[3];
1659
1660        pDestination.m[2][0] = M.r[2].vector4_f32[0];
1661        pDestination.m[2][1] = M.r[2].vector4_f32[1];
1662        pDestination.m[2][2] = M.r[2].vector4_f32[2];
1663        pDestination.m[2][3] = M.r[2].vector4_f32[3];
1664
1665        pDestination.m[3][0] = M.r[3].vector4_f32[0];
1666        pDestination.m[3][1] = M.r[3].vector4_f32[1];
1667        pDestination.m[3][2] = M.r[3].vector4_f32[2];
1668        pDestination.m[3][3] = M.r[3].vector4_f32[3];
1669    }
1670
1671    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1672    {
1673        unimplemented!()
1674    }
1675
1676    #[cfg(_XM_SSE_INTRINSICS_)]
1677    unsafe {
1678        _mm_storeu_ps(&mut pDestination.m[0][0], M.r[0]); // _11
1679        _mm_storeu_ps(&mut pDestination.m[1][0], M.r[1]); // _21
1680        _mm_storeu_ps(&mut pDestination.m[2][0], M.r[2]); // _31
1681        _mm_storeu_ps(&mut pDestination.m[3][0], M.r[3]); // _41
1682    }
1683}
1684
1685/// Stores an XMMATRIX in an aligned XMFLOAT4X4.
1686///
1687/// <https://docs.microsoft.com/en-us/windows/win32/api/directxmath/nf-directxmath-XMStoreFloat4x4A>
1688#[inline]
1689pub fn XMStoreFloat4x4A(
1690    pDestination: &mut Align16<XMFLOAT4X4>,
1691    M: FXMMATRIX,
1692)
1693{
1694    #[cfg(_XM_NO_INTRINSICS_)]
1695    unsafe {
1696        pDestination.m[0][0] = M.r[0].vector4_f32[0];
1697        pDestination.m[0][1] = M.r[0].vector4_f32[1];
1698        pDestination.m[0][2] = M.r[0].vector4_f32[2];
1699        pDestination.m[0][3] = M.r[0].vector4_f32[3];
1700
1701        pDestination.m[1][0] = M.r[1].vector4_f32[0];
1702        pDestination.m[1][1] = M.r[1].vector4_f32[1];
1703        pDestination.m[1][2] = M.r[1].vector4_f32[2];
1704        pDestination.m[1][3] = M.r[1].vector4_f32[3];
1705
1706        pDestination.m[2][0] = M.r[2].vector4_f32[0];
1707        pDestination.m[2][1] = M.r[2].vector4_f32[1];
1708        pDestination.m[2][2] = M.r[2].vector4_f32[2];
1709        pDestination.m[2][3] = M.r[2].vector4_f32[3];
1710
1711        pDestination.m[3][0] = M.r[3].vector4_f32[0];
1712        pDestination.m[3][1] = M.r[3].vector4_f32[1];
1713        pDestination.m[3][2] = M.r[3].vector4_f32[2];
1714        pDestination.m[3][3] = M.r[3].vector4_f32[3];
1715    }
1716
1717    #[cfg(_XM_ARM_NEON_INTRINSICS_)]
1718    {
1719        unimplemented!()
1720    }
1721
1722    #[cfg(_XM_SSE_INTRINSICS_)]
1723    unsafe {
1724        _mm_store_ps(&mut pDestination.m[0][0], M.r[0]); // _11
1725        _mm_store_ps(&mut pDestination.m[1][0], M.r[1]); // _21
1726        _mm_store_ps(&mut pDestination.m[2][0], M.r[2]); // _31
1727        _mm_store_ps(&mut pDestination.m[3][0], M.r[3]); // _41
1728    }
1729}
1730