1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
//! Module for code specific to `u16` base images.
//!
//! The channel order of the pixel value processing functions here is assumed to
//! be `A1_B5_G5_R5`, which matches up with OpenGL's
//! `GL_UNSIGNED_SHORT_1_5_5_5_REV` pixel type.
//!
//! In the future, I will attempt to make this able to unpack, use, and repack
//! other color channel orderings.

#![cfg(target_endian = "little")]
#![allow(dead_code)]
#![allow(unused_macros)]

use super::*;

/// This allows `u16` specific extensions to the `WritableImage` concepts.
pub trait WritableImageU16Ext: WritableImage<u16> {
  /// This copies the data from the source into the destination any time the
  /// source's alpha bit is set.
  ///
  /// Currently this is the same as a call to `blit_generic`, but in future
  /// versions this will attempt to take advantage of SIMD for a speed boost.
  fn blit_rgba16<RI>(&mut self, src: &RI, offset: (isize, isize))
  where
    RI: ReadableImage<u16>,
  {
    // /*
    #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
    {
      // We're on x86 or x86_64, so we'll use explicit SIMD versions as
      // appropriate, because the compiler just isn't smart enough to unroll it
      // by hand.
      #[cfg(feature = "std")]
      {
        if is_x86_feature_detected!("avx2") {
          unsafe { blit_rgb16_avx2_explicit(self, src, offset) };
        } else if is_x86_feature_detected!("sse2") {
          unsafe { blit_rgb16_sse2_explicit(self, src, offset) };
        } else {
          // holy cripes how old is your CPU? these were added to x86 in 2001!
          unsafe { blit_rgb16_fully_unrolled_no_intrinsics(self, src, offset) };
        }
      }
      #[cfg(all(not(feature = "std"), target_feature = "avx2"))]
      {
        unsafe { blit_rgb16_avx2_explicit(self, src, offset) };
      }
      #[cfg(all(not(feature = "std"), not(target_feature = "avx2"), target_feature = "sse2"))]
      {
        unsafe { blit_rgb16_sse2_explicit(self, src, offset) };
      }
      #[cfg(all(not(feature = "std"), not(target_feature = "avx2"), not(target_feature = "sse2")))]
      {
        unsafe { blit_rgb16_fully_unrolled_no_intrinsics(self, src, offset) };
      }
    }
    // */
    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))]
    {
      // We're NOT on x86 or x86_64, so we just do it using a fully unrolled
      // loop, which is faster than using blit_generic at least.
      unsafe { blit_blend_rectilinear_fully_unrolled_no_intrinsics(self, src, offset) };
    }
  }
}

unsafe fn blit_rgb16_fully_unrolled_no_intrinsics<WI, RI>(dest: &mut WI, src: &RI, offset: (isize, isize))
where
  WI: WritableImage<u16> + ?Sized,
  RI: ReadableImage<u16>,
{
  dest.blit_generic(src, offset, |src, dest| if (src as i16) < 0 { src } else { dest });
}

#[target_feature(enable = "sse2")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn blit_rgb16_sse2_explicit<WI, RI>(dest: &mut WI, src: &RI, offset: (isize, isize))
where
  WI: WritableImage<u16> + ?Sized,
  RI: ReadableImage<u16>,
{
  // TODO: use sse2 for the gba blit
  dest.blit_generic(src, offset, |src, dest| if (src as i16) < 0 { src } else { dest });
}

#[target_feature(enable = "avx2")]
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
unsafe fn blit_rgb16_avx2_explicit<WI, RI>(dest: &mut WI, src: &RI, offset: (isize, isize))
where
  WI: WritableImage<u16> + ?Sized,
  RI: ReadableImage<u16>,
{
  // TODO: use avx2 for the gba blit
  dest.blit_generic(src, offset, |src, dest| if (src as i16) < 0 { src } else { dest });
}