sw_composite/
lib.rs

1pub mod blend;
2
3const BILINEAR_INTERPOLATION_BITS: u32 = 4;
4
5const A32_SHIFT: u32 = 24;
6const R32_SHIFT: u32 = 16;
7const G32_SHIFT: u32 = 8;
8const B32_SHIFT: u32 = 0;
9
10
11type Alpha256 = u32;
12
13/// A unpremultiplied color.
14#[derive(Clone, Copy, PartialEq, Debug)]
15pub struct Color(u32);
16
17impl Color {
18    pub fn new(a: u8, r: u8, g: u8, b: u8) -> Color {
19        Color(
20            ((a as u32) << A32_SHIFT) |
21            ((r as u32) << R32_SHIFT) |
22            ((g as u32) << G32_SHIFT) |
23            ((b as u32) << B32_SHIFT)
24        )
25    }
26
27    /// Get the alpha component.
28    pub fn a(self) -> u8 {
29        (self.0 >> A32_SHIFT & 0xFF) as u8
30    }
31
32    /// Get the red component.
33    pub fn r(self) -> u8 {
34        (self.0 >> R32_SHIFT & 0xFF) as u8
35    }
36
37    /// Get the green component.
38    pub fn g(self) -> u8 {
39        (self.0 >> G32_SHIFT & 0xFF) as u8
40    }
41
42    /// Get the blue component.
43    pub fn b(self) -> u8 {
44        (self.0 >> B32_SHIFT & 0xFF) as u8
45    }
46}
47
48#[cfg(test)]
49#[test]
50fn test_color_argb() {
51    assert_eq!(Color::new(1, 2, 3, 4).a(), 1);
52    assert_eq!(Color::new(1, 2, 3, 4).r(), 2);
53    assert_eq!(Color::new(1, 2, 3, 4).g(), 3);
54    assert_eq!(Color::new(1, 2, 3, 4).b(), 4);
55}
56
57#[derive(Clone, Copy)]
58pub struct Image<'a> {
59    pub width: i32,
60    pub height: i32,
61    pub data: &'a [u32],
62}
63
64/// t is 0..256
65#[inline]
66pub fn lerp(a: u32, b: u32, t: u32) -> u32 {
67    // this method is from http://stereopsis.com/doubleblend.html
68    let mask = 0xff00ff;
69    let brb = b & 0xff00ff;
70    let bag = (b >> 8) & 0xff00ff;
71
72    let arb = a & 0xff00ff;
73    let aag = (a >> 8) & 0xff00ff;
74
75    let drb = brb.wrapping_sub(arb);
76    let dag = bag.wrapping_sub(aag);
77
78    let drb = drb.wrapping_mul(t) >> 8;
79    let dag = dag.wrapping_mul(t) >> 8;
80
81    let rb = arb + drb;
82    let ag = aag + dag;
83    (rb & mask) | ((ag << 8) & !mask)
84}
85
86#[cfg(test)]
87#[test]
88fn test_lerp() {
89    for i in 0..=256 {
90        assert_eq!(lerp(0xffffffff, 0xffffffff, i), 0xffffffff);
91    }
92}
93
94#[derive(Clone, Copy, Debug)]
95pub struct GradientStop {
96    pub position: f32,
97    pub color: Color,
98}
99
100pub struct GradientSource {
101    matrix: MatrixFixedPoint,
102    lut: [u32; 256],
103}
104
105pub struct TwoCircleRadialGradientSource {
106    matrix: MatrixFixedPoint,
107    c1x: f32,
108    c1y: f32,
109    r1: f32,
110    c2x: f32,
111    c2y: f32,
112    r2: f32,
113    lut: [u32; 256],
114}
115
116#[derive(Clone, Copy)]
117pub enum Spread {
118    Pad,
119    Reflect,
120    Repeat,
121}
122
123/// maps `x` to 0..255 according to `spread`
124fn apply_spread(x: i32, spread: Spread) -> i32 {
125    match spread {
126        Spread::Pad => {
127            if x > 255 {
128                255
129            } else if x < 0 {
130                0
131            } else {
132                x
133            }
134        }
135        Spread::Repeat => {
136            x & 255
137        }
138        Spread::Reflect => {
139            // a trick from skia to reflect the bits. 256 -> 255
140            let sign = (x << 23) >> 31;
141            (x ^ sign) & 255
142        }
143    }
144}
145
146impl GradientSource {
147    pub fn radial_gradient_eval(&self, x: u16, y: u16, spread: Spread) -> u32 {
148        let p = self.matrix.transform(x, y);
149        // there's no chance that p will overflow when squared
150        // so it's safe to use sqrt
151        let px = p.x as f32;
152        let py = p.y as f32;
153        let mut distance = (px * px + py * py).sqrt() as i32;
154        distance >>= 8;
155
156        self.lut[apply_spread(distance, spread) as usize]
157    }
158
159    pub fn linear_gradient_eval(&self, x: u16, y: u16, spread: Spread) -> u32 {
160        let p = self.matrix.transform(x, y);
161        let lx = p.x >> 8;
162
163        self.lut[apply_spread(lx, spread) as usize]
164    }
165}
166// This is called TwoPointConical in Skia
167impl TwoCircleRadialGradientSource {
168    pub fn eval(&self, x: u16, y: u16, spread: Spread) -> u32 {
169        let p = self.matrix.transform(x, y);
170        // XXX: this is slow and bad
171        // the derivation is from pixman radial_get_scanline_narrow
172        // " Mathematically the gradient can be defined as the family of circles
173        //
174        //    ((1-t)·c₁ + t·(c₂), (1-t)·r₁ + t·r₂)
175        //
176        // excluding those circles whose radius would be < 0."
177        // i.e. anywhere where r < 0 we return 0 (transparent black).
178        let px = p.x as f32 / 65536.;
179        let py = p.y as f32 / 65536.;
180        let cdx = self.c2x - self.c1x;
181        let cdy = self.c2y - self.c1y;
182        let pdx = px - self.c1x;
183        let pdy = py - self.c1y;
184        let dr = self.r2 - self.r1;
185        let a = cdx*cdx + cdy*cdy - dr*dr;
186        let b = pdx*cdx + pdy*cdy + self.r1*dr;
187        let c = pdx*pdx + pdy*pdy - self.r1*self.r1;
188        let discr = b*b - a*c;
189
190        let t = if a == 0. {
191            let t =  1./2. * (c / b);
192            if self.r1 * (1. - t) + t * self.r2 < 0. {
193                return 0;
194            }
195            t
196        } else {
197            if discr < 0. {
198                return 0;
199            } else {
200                let t1 = (b + discr.sqrt())/a;
201                let t2 = (b - discr.sqrt())/a;
202                if t1 > t2 {
203                    t1
204                } else {
205                    t2
206                }
207            }
208        };
209
210        self.lut[apply_spread((t * 255.) as i32, spread) as usize]
211    }
212}
213
214pub struct SweepGradientSource {
215    matrix: MatrixFixedPoint,
216    t_bias: f32,
217    t_scale: f32,
218    lut: [u32; 256],
219}
220
221fn if_then_else<T>(cond: bool, x: T, y: T) -> T {
222    if cond { x } else { y }
223}
224
225impl SweepGradientSource {
226    // This implementation is taken from Skia
227    pub fn eval(&self, x: u16, y: u16, spread: Spread) -> u32 {
228        let p = self.matrix.transform(x, y);
229        // XXX: this is slow and bad
230        // the derivation is from pixman radial_get_scanline_narrow
231        // " Mathematically the gradient can be defined as the family of circles
232        //
233        //    ((1-t)·c₁ + t·(c₂), (1-t)·r₁ + t·r₂)
234        //
235        // excluding those circles whose radius would be < 0."
236        // i.e. anywhere where r < 0 we return 0 (transparent black).
237        let px = p.x as f32 / 65536.;
238        let py = p.y as f32 / 65536.;
239
240        let xabs = px.abs();
241        let yabs = py.abs();
242
243        let slope = xabs.min(yabs)/xabs.max(yabs);
244        let s = slope * slope;
245
246        // Use a 7th degree polynomial to approximate atan.
247        // This was generated using sollya.gforge.inria.fr.
248        // A float optimized polynomial was generated using the following command.
249        // P1 = fpminimax((1/(2*Pi))*atan(x),[|1,3,5,7|],[|24...|],[2^(-40),1],relative);
250        let mut phi = slope
251                * (0.15912117063999176025390625     + s
252                * (-5.185396969318389892578125e-2   + s
253                * (2.476101927459239959716796875e-2 + s
254                * (-7.0547382347285747528076171875e-3))));
255
256        phi = if_then_else(xabs < yabs, 1.0/4.0 - phi, phi);
257        phi = if_then_else(px < 0.0   , 1.0/2.0 - phi, phi);
258        phi = if_then_else(py < 0.0   , 1.0 - phi     , phi);
259        phi = if_then_else(phi != phi , 0.              , phi);  // Check for NaN.
260        let r = phi;
261
262        let t = r * self.t_scale - self.t_bias;
263
264        let result = self.lut[apply_spread((t * 255.) as i32, spread) as usize];
265        result
266    }
267}
268
269#[derive(Clone, Debug)]
270pub struct Gradient {
271    pub stops: Vec<GradientStop>
272}
273
274impl Gradient {
275    pub fn make_source(&self, matrix: &MatrixFixedPoint, alpha: u32) -> Box<GradientSource> {
276        let mut source = Box::new(GradientSource { matrix: (*matrix).clone(), lut: [0; 256] });
277        self.build_lut(&mut source.lut, alpha_to_alpha256(alpha));
278        source
279    }
280
281    /// evaluate the gradient for a particular
282    /// `t` directly
283    #[cfg(test)]
284    fn eval(&self, t: f32, spread: Spread) -> Color {
285        let t = match spread {
286            Spread::Pad => {
287                if t > 1. {
288                    1.
289                } else if t < 0. {
290                    0.
291                } else {
292                    t
293                }
294            }
295            Spread::Repeat => {
296                t % 1.
297            }
298            Spread::Reflect => {
299                let k = t % 2.;
300                if k > 1. {
301                    2. - k
302                } else {
303                    k
304                }
305            }
306        };
307
308        let mut stop_idx = 0;
309        let mut above = &self.stops[stop_idx];
310        while stop_idx < self.stops.len()-1 && t > above.position {
311            stop_idx += 1;
312            above = &self.stops[stop_idx]
313        }
314        let mut below = above;
315        if stop_idx > 0 && below.position > t {
316            below = &self.stops[stop_idx-1]
317        }
318        assert!((t < above.position && t > below.position) ||
319            above as *const GradientStop == below as *const GradientStop);
320
321        if above as *const GradientStop == below as *const GradientStop {
322            above.color
323        } else {
324            let diff = above.position - below.position;
325            let t = (t - below.position) / diff;
326            assert!(t <= 1.);
327            Color(lerp(below.color.0, above.color.0, (t * 256. + 0.5) as u32))
328        }
329    }
330
331    pub fn make_two_circle_source(
332        &self,
333        c1x: f32,
334        c1y: f32,
335        r1: f32,
336        c2x: f32,
337        c2y: f32,
338        r2: f32,
339        matrix: &MatrixFixedPoint,
340        alpha: u32,
341    ) -> Box<TwoCircleRadialGradientSource> {
342        let mut source = Box::new(TwoCircleRadialGradientSource {
343            c1x, c1y, r1, c2x, c2y, r2, matrix: matrix.clone(), lut: [0; 256]
344        });
345        self.build_lut(&mut source.lut, alpha_to_alpha256(alpha));
346        source
347    }
348
349    pub fn make_sweep_source(
350        &self,
351        start_angle: f32,
352        end_angle: f32,
353        matrix: &MatrixFixedPoint,
354        alpha: u32,
355    ) -> Box<SweepGradientSource> {
356        let t0 = start_angle / 360.;
357        let t1 = end_angle / 360.;
358        let t_bias = -t0;
359        let t_scale = 1. / (t1 - t0);
360        let mut source = Box::new(SweepGradientSource {
361            t_bias, t_scale, matrix: matrix.clone(), lut: [0; 256]
362        });
363        self.build_lut(&mut source.lut, alpha_to_alpha256(alpha));
364        source
365    }
366
367    fn build_lut(&self, lut: &mut [u32; 256], alpha: Alpha256) {
368        let mut stop_idx = 0;
369        let mut stop = &self.stops[stop_idx];
370
371        let mut last_color = alpha_mul(stop.color.0, alpha);
372
373        let mut next_color = last_color;
374        let mut next_pos = (255. * stop.position) as u32;
375
376        let mut i = 0;
377
378        const FIXED_SHIFT: u32 = 8;
379        const FIXED_ONE: u32 = 1 << FIXED_SHIFT;
380        const FIXED_HALF: u32 = FIXED_ONE >> 1;
381
382        while i < 255 {
383            while next_pos <= i {
384                stop_idx += 1;
385                last_color = next_color;
386                if stop_idx >= self.stops.len() {
387                    stop = &self.stops[self.stops.len() - 1];
388                    next_pos = 255;
389                    next_color = alpha_mul(stop.color.0, alpha);
390                    break;
391                } else {
392                    stop = &self.stops[stop_idx];
393                }
394                next_pos = (255. * stop.position) as u32;
395                next_color = alpha_mul(stop.color.0, alpha);
396            }
397            let inverse = (FIXED_ONE * 256) / (next_pos - i);
398            let mut t = 0;
399            // XXX we could actually avoid doing any multiplications inside
400            // this loop by accumulating (next_color - last_color)*inverse
401            // that's what Skia does
402            while i <= next_pos && i < 255 {
403                // stops need to be represented in unpremultipled form otherwise we lose information
404                // that we need when lerping between colors
405                lut[i as usize] = premultiply(lerp(last_color, next_color, (t + FIXED_HALF) >> FIXED_SHIFT));
406                t += inverse;
407
408                i += 1;
409            }
410        }
411        // we manually assign the last stop to ensure that it ends up in the last spot even
412        // if there's a stop very close to the end. This also avoids a divide-by-zero when
413        // calculating inverse
414        lut[255] = premultiply(alpha_mul(self.stops[self.stops.len() - 1].color.0, alpha));
415    }
416
417}
418
419#[cfg(test)]
420#[test]
421fn test_gradient_eval() {
422    let white = Color(0xffffffff);
423    let black = Color(0x00000000);
424
425    let g = Gradient{ stops: vec![GradientStop { position: 0.5, color: white }]};
426    assert_eq!(g.eval(0., Spread::Pad), white);
427    assert_eq!(g.eval(1., Spread::Pad), white);
428
429    let g = Gradient{ stops: vec![GradientStop { position: 0.5, color: white },
430                                  GradientStop { position: 1., color: black }]};
431    assert_eq!(g.eval(0., Spread::Pad), white);
432    assert_eq!(g.eval(1., Spread::Pad), black);
433    //assert_eq!(g.eval(0.75, Spread::Pad), black);
434
435    let g = Gradient{ stops: vec![GradientStop { position: 0.5, color: white },
436                                  GradientStop { position: 0.5, color: black }]};
437    assert_eq!(g.eval(0., Spread::Pad), white);
438    assert_eq!(g.eval(1., Spread::Pad), black);
439    assert_eq!(g.eval(0.5, Spread::Pad), white);
440
441    let g = Gradient {
442        stops: vec![
443            GradientStop {
444                position: 0.5,
445                color: Color::new(255, 255, 255, 255),
446            },
447            GradientStop {
448                position: 1.0,
449                color: Color::new(0, 0, 0, 0),
450            },
451        ],
452    };
453
454    assert_eq!(g.eval(-0.1, Spread::Pad), white);
455    assert_eq!(g.eval(1., Spread::Pad), black);
456
457    let mut lut = [0; 256];
458    g.build_lut(&mut lut, 256);
459    assert_eq!(lut[0], white.0);
460    assert_eq!(lut[1], white.0);
461    assert_eq!(lut[255], black.0);
462}
463
464#[cfg(test)]
465#[test]
466fn test_gradient_lut() {
467    let white = Color(0xffffffff);
468    let black = Color(0x00000000);
469
470    let g = Gradient {
471        stops: vec![
472            GradientStop { position: 0.0, color: white },
473            GradientStop { position: 0.999999761, color: white },
474            GradientStop { position: 1., color: black },
475        ]
476    };
477
478    let mut lut = [0; 256];
479    g.build_lut(&mut lut, 256);
480    assert_eq!(lut[255], black.0);
481}
482
483#[cfg(test)]
484#[test]
485fn test_gradient_pos_range() {
486    let white = Color(0xffffffff);
487    let black = Color(0x00000000);
488
489    let g = Gradient {
490        stops: vec![
491            GradientStop { position: -1.0, color: white },
492            GradientStop { position: 1.2, color: black },
493        ]
494    };
495
496    let mut lut = [0; 256];
497    g.build_lut(&mut lut, 256);
498    // Must not panic.
499}
500
501pub trait PixelFetch {
502    fn get_pixel(bitmap: &Image,  x: i32,  y: i32) -> u32;
503}
504
505
506pub struct PadFetch;
507impl PixelFetch for PadFetch {
508    fn get_pixel(bitmap: &Image, mut x: i32, mut y: i32) -> u32 {
509        if x < 0 {
510            x = 0;
511        }
512        if x >= bitmap.width {
513            x = bitmap.width - 1;
514        }
515
516        if y < 0 {
517            y = 0;
518        }
519        if y >= bitmap.height {
520            y = bitmap.height - 1;
521        }
522
523        bitmap.data[(y * bitmap.width + x) as usize]
524    }
525}
526
527pub struct RepeatFetch;
528impl PixelFetch for RepeatFetch {
529    fn get_pixel(bitmap: &Image, mut x: i32, mut y: i32) -> u32 {
530
531        // XXX: This is a very slow approach to repeating.
532        // We should instead do the wrapping in the iterator
533        x = x % bitmap.width;
534        if x < 0 {
535            x = x + bitmap.width;
536        }
537
538        y = y % bitmap.height;
539        if y < 0 {
540            y = y + bitmap.height;
541        }
542
543        bitmap.data[(y * bitmap.width + x) as usize]
544    }
545}
546
547
548// Inspired by Filter_32_opaque from Skia.
549fn bilinear_interpolation(
550    tl: u32,
551    tr: u32,
552    bl: u32,
553    br: u32,
554    mut distx: u32,
555    mut disty: u32,
556) -> u32 {
557    let distxy;
558    let distxiy;
559    let distixy;
560    let distixiy;
561    let mut lo;
562    let mut hi;
563
564    distx <<= 4 - BILINEAR_INTERPOLATION_BITS;
565    disty <<= 4 - BILINEAR_INTERPOLATION_BITS;
566
567    distxy = distx * disty;
568    distxiy = (distx << 4) - distxy; // distx * (16 - disty)
569    distixy = (disty << 4) - distxy; // disty * (16 - distx)
570
571    // (16 - distx) * (16 - disty)
572    // The intermediate calculation can underflow so we use
573    // wrapping arithmetic to let the compiler know that it's ok
574    distixiy = (16u32 * 16)
575        .wrapping_sub(disty << 4)
576        .wrapping_sub(distx << 4)
577        .wrapping_add(distxy);
578
579    lo = (tl & 0xff00ff) * distixiy;
580    hi = ((tl >> 8) & 0xff00ff) * distixiy;
581
582    lo += (tr & 0xff00ff) * distxiy;
583    hi += ((tr >> 8) & 0xff00ff) * distxiy;
584
585    lo += (bl & 0xff00ff) * distixy;
586    hi += ((bl >> 8) & 0xff00ff) * distixy;
587
588    lo += (br & 0xff00ff) * distxy;
589    hi += ((br >> 8) & 0xff00ff) * distxy;
590
591    ((lo >> 8) & 0xff00ff) | (hi & !0xff00ff)
592}
593
594// Inspired by Filter_32_alpha from Skia.
595fn bilinear_interpolation_alpha(
596    tl: u32,
597    tr: u32,
598    bl: u32,
599    br: u32,
600    mut distx: u32,
601    mut disty: u32,
602    alpha: Alpha256
603) -> u32 {
604    let distxy;
605    let distxiy;
606    let distixy;
607    let distixiy;
608    let mut lo;
609    let mut hi;
610
611    distx <<= 4 - BILINEAR_INTERPOLATION_BITS;
612    disty <<= 4 - BILINEAR_INTERPOLATION_BITS;
613
614    distxy = distx * disty;
615    distxiy = (distx << 4) - distxy; // distx * (16 - disty)
616    distixy = (disty << 4) - distxy; // disty * (16 - distx)
617    // (16 - distx) * (16 - disty)
618    // The intermediate calculation can underflow so we use
619    // wrapping arithmetic to let the compiler know that it's ok
620    distixiy = (16u32 * 16)
621        .wrapping_sub(disty << 4)
622        .wrapping_sub(distx << 4)
623        .wrapping_add(distxy);
624
625    lo = (tl & 0xff00ff) * distixiy;
626    hi = ((tl >> 8) & 0xff00ff) * distixiy;
627
628    lo += (tr & 0xff00ff) * distxiy;
629    hi += ((tr >> 8) & 0xff00ff) * distxiy;
630
631    lo += (bl & 0xff00ff) * distixy;
632    hi += ((bl >> 8) & 0xff00ff) * distixy;
633
634    lo += (br & 0xff00ff) * distxy;
635    hi += ((br >> 8) & 0xff00ff) * distxy;
636
637    lo = ((lo >> 8) & 0xff00ff) * alpha;
638    hi = ((hi >> 8) & 0xff00ff) * alpha;
639
640    ((lo >> 8) & 0xff00ff) | (hi & !0xff00ff)
641}
642
643const FIXED_FRACTION_BITS: u32 = 16;
644pub const FIXED_ONE: i32 = 1 << FIXED_FRACTION_BITS;
645const FIXED_HALF: i32 = FIXED_ONE >> 1;
646
647fn bilinear_weight(x: Fixed) -> u32 {
648    // discard the unneeded bits of precision
649    let reduced = x >> (FIXED_FRACTION_BITS - BILINEAR_INTERPOLATION_BITS);
650    // extract the remaining fraction
651    let fraction = reduced & ((1 << BILINEAR_INTERPOLATION_BITS) - 1);
652    fraction as u32
653}
654
655type Fixed = i32;
656
657fn fixed_to_int(x: Fixed) -> i32 {
658    x >> FIXED_FRACTION_BITS
659}
660
661// there are various tricks the can be used
662// to make this faster. Let's just do simplest
663// thing for now
664pub fn float_to_fixed(x: f32) -> Fixed {
665    ((x * (1 << FIXED_FRACTION_BITS) as f32) + 0.5) as i32
666}
667
668pub fn fetch_bilinear<Fetch: PixelFetch>(image: &Image, x: Fixed, y: Fixed) -> u32 {
669    let dist_x = bilinear_weight(x);
670    let dist_y = bilinear_weight(y);
671
672    let x1 = fixed_to_int(x);
673    let y1 = fixed_to_int(y);
674    let x2 = x1 + 1;
675    let y2 = y1 + 1;
676
677    let tl = Fetch::get_pixel(image, x1, y1);
678    let tr = Fetch::get_pixel(image, x2, y1);
679    let bl = Fetch::get_pixel(image, x1, y2);
680    let br = Fetch::get_pixel(image, x2, y2);
681
682    bilinear_interpolation(tl, tr, bl, br, dist_x, dist_y)
683}
684
685pub fn fetch_bilinear_alpha<Fetch: PixelFetch>(image: &Image, x: Fixed, y: Fixed, alpha: Alpha256) -> u32 {
686    let dist_x = bilinear_weight(x);
687    let dist_y = bilinear_weight(y);
688
689    let x1 = fixed_to_int(x);
690    let y1 = fixed_to_int(y);
691    let x2 = x1 + 1;
692    let y2 = y1 + 1;
693
694    let tl = Fetch::get_pixel(image, x1, y1);
695    let tr = Fetch::get_pixel(image, x2, y1);
696    let bl = Fetch::get_pixel(image, x1, y2);
697    let br = Fetch::get_pixel(image, x2, y2);
698
699    bilinear_interpolation_alpha(tl, tr, bl, br, dist_x, dist_y, alpha)
700}
701
702pub fn fetch_nearest<Fetch: PixelFetch>(image: &Image, x: Fixed, y: Fixed) -> u32 {
703    Fetch::get_pixel(image, fixed_to_int(x + FIXED_HALF), fixed_to_int(y + FIXED_HALF))
704}
705
706pub fn fetch_nearest_alpha<Fetch: PixelFetch>(image: &Image, x: Fixed, y: Fixed, alpha: Alpha256) -> u32 {
707    alpha_mul(Fetch::get_pixel(image, fixed_to_int(x + FIXED_HALF), fixed_to_int(y + FIXED_HALF)), alpha)
708}
709
710#[derive(Clone, Copy, PartialEq, Debug)]
711pub struct PointFixedPoint {
712    pub x: Fixed,
713    pub y: Fixed,
714}
715
716#[derive(Clone, Debug)]
717pub struct MatrixFixedPoint {
718    pub xx: Fixed,
719    pub xy: Fixed,
720    pub yx: Fixed,
721    pub yy: Fixed,
722    pub x0: Fixed,
723    pub y0: Fixed,
724}
725
726impl MatrixFixedPoint {
727    pub fn transform(&self, x: u16, y: u16) -> PointFixedPoint {
728        let x = x as i32;
729        let y = y as i32;
730        // When taking integer parameters we can use a regular multiply instead of a fixed one.
731        //
732        // We're also using wrapping to prevent overflow panics in debug.
733        // Therefore usage of large numbers is an undefined behavior.
734        PointFixedPoint {
735            x: x.wrapping_mul(self.xx).wrapping_add(self.xy.wrapping_mul(y)).wrapping_add(self.x0),
736            y: y.wrapping_mul(self.yy).wrapping_add(self.yx.wrapping_mul(x)).wrapping_add(self.y0),
737        }
738    }
739}
740
741#[cfg(test)]
742#[test]
743fn test_large_matrix() {
744    let matrix = MatrixFixedPoint {
745        xx: std::i32::MAX, xy: std::i32::MAX, yx: std::i32::MAX,
746        yy: std::i32::MAX, x0: std::i32::MAX, y0: std::i32::MAX,
747    };
748    // `transform()` must not panic
749    assert_eq!(
750        matrix.transform(std::u16::MAX, std::u16::MAX),
751        PointFixedPoint { x: 2147352577, y: 2147352577 }
752    );
753}
754
755fn premultiply(c: u32) -> u32 {
756    // This could be optimized by using SWAR
757    let a = get_packed_a32(c);
758    let mut r = get_packed_r32(c);
759    let mut g = get_packed_g32(c);
760    let mut b = get_packed_b32(c);
761
762    if a < 255 {
763        r = muldiv255(r, a);
764        g = muldiv255(g, a);
765        b = muldiv255(b, a);
766    }
767
768    pack_argb32(a, r, g, b)
769}
770
771#[inline]
772fn pack_argb32(a: u32, r: u32, g: u32, b: u32) -> u32 {
773    debug_assert!(r <= a);
774    debug_assert!(g <= a);
775    debug_assert!(b <= a);
776    (a << A32_SHIFT) | (r << R32_SHIFT) | (g << G32_SHIFT) | (b << B32_SHIFT)
777}
778
779#[inline]
780fn get_packed_a32(packed: u32) -> u32 { ((packed) << (24 - A32_SHIFT)) >> 24 }
781#[inline]
782fn get_packed_r32(packed: u32) -> u32 { ((packed) << (24 - R32_SHIFT)) >> 24 }
783#[inline]
784fn get_packed_g32(packed: u32) -> u32 { ((packed) << (24 - G32_SHIFT)) >> 24 }
785#[inline]
786fn get_packed_b32(packed: u32) -> u32 { ((packed) << (24 - B32_SHIFT)) >> 24 }
787
788#[inline]
789fn packed_alpha(x: u32) -> u32 {
790    x >> A32_SHIFT
791}
792
793// this is an approximation of true 'over' that does a division by 256 instead
794// of 255. It is the same style of blending that Skia does. It corresponds 
795// to Skia's SKPMSrcOver
796#[inline]
797pub fn over(src: u32, dst: u32) -> u32 {
798    let a = packed_alpha(src);
799    let a = 256 - a;
800    let mask = 0xff00ff;
801    let rb = ((dst & 0xff00ff) * a) >> 8;
802    let ag = ((dst >> 8) & 0xff00ff) * a;
803    src + ((rb & mask) | (ag & !mask))
804}
805
806#[cfg(test)]
807#[test]
808fn test_over() {
809    let src = 0x81004000;
810    let dst = 0xffffffff;
811    assert_eq!(over(src, dst), over_exact(src, dst));
812    assert_eq!(over(src, dst), 0xff7ebe7e);
813}
814
815#[inline]
816// this calculates over() with a division by 255 and
817// handles saturation to 255 that is needed for handling
818// superluminescent pixels.
819pub fn over_exact(src: u32, dst: u32) -> u32 {
820    let a = packed_alpha(src);
821    let a = 255 - a;
822    let mask = 0xff00ff;
823    let t = (dst & mask) * a + 0x800080;
824    let mut rb = (t + ((t >> 8) & mask)) >> 8;
825    rb &= mask;
826
827    rb += src & mask;
828
829    // saturate
830    rb |= 0x1000100 - ((rb >> 8) & mask);
831    rb &= mask;
832
833    let t = ((dst >> 8) & mask) * a + 0x800080;
834    let mut ag = (t + ((t >> 8) & mask)) >> 8;
835    ag &= mask;
836    ag += (src >> 8) & mask;
837
838    // saturate
839    ag |= 0x1000100 - ((ag >> 8) & mask);
840    ag &= mask;
841
842    (ag << 8) + rb
843}
844
845#[cfg(test)]
846#[test]
847fn test_over_exact() {
848    assert_eq!(over_exact(0xff00ff00, 0xffff0000), 0xff00ff00);
849    assert_eq!(over_exact(0x80008000, 0xffff0000), 0xff7f8000);
850
851    // superluminance - over() gives the worong result
852    // over_exact() saturates.
853    assert_eq!(over(0x80ffff00, 0xffff0000), 0x7eff00);
854    assert_eq!(over_exact(0x80ffff00, 0xffff0000), 0xffffff00);
855}
856
857pub fn alpha_to_alpha256(alpha: u32) -> u32 {
858    alpha + 1
859}
860
861// Calculates 256 - (value * alpha256) / 255 in range [0,256],
862// for [0,255] value and [0,256] alpha256.
863#[inline]
864fn alpha_mul_inv256(value: u32, alpha256: u32) -> u32 {
865    let prod = value * alpha256;
866    256 - ((prod + (prod >> 8)) >> 8)
867}
868
869// Calculates (value * alpha256) / 255 in range [0,256],
870// for [0,255] value and [0,256] alpha256.
871fn alpha_mul_256(value: u32, alpha256: u32) -> u32 {
872    let prod = value * alpha256;
873    (prod + (prod >> 8)) >> 8
874}
875
876// Calculates floor(a*b/255 + 0.5)
877#[inline]
878pub fn muldiv255(a: u32, b: u32) -> u32 {
879    // The deriviation for this formula can be
880    // found in "Three Wrongs Make a Right" by Jim Blinn.
881    let tmp = a * b + 128;
882    (tmp + (tmp >> 8)) >> 8
883}
884
885// Calculates floor(a/255 + 0.5)
886pub fn div255(a: u32) -> u32 {
887    // The deriviation for this formula can be
888    // found in "Three Wrongs Make a Right" by Jim Blinn.
889    let tmp = a + 128;
890    (tmp + (tmp >> 8)) >> 8
891}
892
893#[inline]
894pub fn alpha_mul(x: u32, a: Alpha256) -> u32 {
895    let mask = 0xFF00FF;
896
897    let src_rb = ((x & mask) * a) >> 8;
898    let src_ag = ((x >> 8) & mask) * a;
899
900    (src_rb & mask) | (src_ag & !mask)
901}
902
903// This approximates the division by 255 using a division by 256.
904// It matches the behaviour of SkBlendARGB32 from Skia in 2017.
905// The behaviour of SkBlendARGB32 was changed in 2016 by Lee Salzman
906// in Skia:40254c2c2dc28a34f96294d5a1ad94a99b0be8a6 to keep more of the
907// intermediate precision. This was changed to use the alpha setup code
908// from the original implementation and additional precision from the reimplementation.
909// this combined approach avoids getting incorrect results when `alpha` is 0
910// and is slightly faster. However, it suffered from overflow and so
911// was switched back to a modified version the previous one that adds 1
912// to result.
913#[inline]
914pub fn over_in(src: u32, dst: u32, alpha: u32) -> u32 {
915    let src_alpha = alpha_to_alpha256(alpha);
916    let dst_alpha = alpha_mul_inv256(packed_alpha(src), src_alpha);
917
918    let mask = 0xFF00FF;
919
920    let src_rb = (src & mask) * src_alpha;
921    let src_ag = ((src >> 8) & mask) * src_alpha;
922
923    let dst_rb = (dst & mask) * dst_alpha;
924    let dst_ag = ((dst >> 8) & mask) * dst_alpha;
925
926    // we sum src and dst before reducing to 8 bit to avoid accumulating rounding errors
927    (((src_rb + dst_rb) >> 8) & mask) | ((src_ag + dst_ag) & !mask)
928}
929
930pub fn over_in_legacy_lerp(src: u32, dst: u32, alpha: u32) -> u32 {
931    let src_scale = alpha_to_alpha256(alpha);
932    let dst_scale = alpha_to_alpha256(255 - alpha_mul(packed_alpha(src), src_scale));
933    alpha_mul(src, src_scale) + alpha_mul(dst, dst_scale)
934}
935
936#[cfg(target_arch = "x86")]
937use std::arch::x86::{self as x86_intrinsics, __m128i};
938#[cfg(target_arch = "x86_64")]
939use std::arch::x86_64::{self as x86_intrinsics, __m128i};
940
941#[cfg(not(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2")))]
942pub fn over_in_row(src: &[u32], dst: &mut [u32], alpha: u32) {
943    for (dst, src) in dst.iter_mut().zip(src) {
944        *dst = over_in(*src, *dst, alpha as u32);
945    }
946}
947
948#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2"))]
949pub fn over_in_row(src: &[u32], dst: &mut [u32], alpha: u32) {
950    use x86_intrinsics::{
951        _mm_loadu_si128,
952        _mm_storeu_si128,
953    };
954
955    unsafe {
956        let mut len = src.len().min(dst.len());
957        let mut src_ptr = src.as_ptr() as *const __m128i;
958        let mut dst_ptr = dst.as_mut_ptr() as *mut __m128i;
959
960        while len >= 4 {
961            _mm_storeu_si128(dst_ptr, over_in_sse2(_mm_loadu_si128(src_ptr), _mm_loadu_si128(dst_ptr), alpha));
962            src_ptr = src_ptr.offset(1);
963            dst_ptr = dst_ptr.offset(1);
964            len -= 4;
965        }
966        let mut src_ptr = src_ptr as *const u32;
967        let mut dst_ptr = dst_ptr as *mut u32;
968        while len >= 1 {
969            *dst_ptr = over_in(*src_ptr, *dst_ptr, alpha);
970            src_ptr = src_ptr.offset(1);
971            dst_ptr = dst_ptr.offset(1);
972            len -= 1;
973        }
974    }
975}
976
977// derived from Skia's SkBlendARGB32_SSE2
978#[cfg(all(any(target_arch = "x86", target_arch = "x86_64"), target_feature = "sse2"))]
979fn over_in_sse2(src: __m128i, dst: __m128i, alpha: u32) -> __m128i {
980    use x86_intrinsics::{
981        _mm_set1_epi16,
982        _mm_set1_epi32,
983        _mm_mullo_epi16,
984        _mm_add_epi16,
985        _mm_sub_epi32,
986        _mm_add_epi32,
987        _mm_srli_epi16,
988        _mm_shufflelo_epi16,
989        _mm_shufflehi_epi16,
990        _mm_srli_epi32,
991        _mm_and_si128,
992        _mm_andnot_si128,
993        _mm_or_si128,
994    };
995
996    #[allow(non_snake_case)]
997    pub const fn _MM_SHUFFLE(z: u32, y: u32, x: u32, w: u32) -> i32 {
998        ((z << 6) | (y << 4) | (x << 2) | w) as i32
999    }
1000
1001    unsafe {
1002        let alpha = alpha_to_alpha256(alpha);
1003
1004        let src_scale = _mm_set1_epi16(alpha as i16);
1005        // SkAlphaMulInv256(SkGetPackedA32(src), src_scale)
1006        let mut dst_scale = _mm_srli_epi32(src, 24);
1007        // High words in dst_scale are 0, so it's safe to multiply with 16-bit src_scale.
1008        dst_scale = _mm_mullo_epi16(dst_scale, src_scale);
1009        dst_scale = _mm_add_epi32(dst_scale, _mm_srli_epi32(dst_scale, 8));
1010        dst_scale = _mm_srli_epi32(dst_scale, 8);
1011        dst_scale = _mm_sub_epi32(_mm_set1_epi32(0x100), dst_scale);
1012
1013        // Duplicate scales into 2x16-bit pattern per pixel.
1014        dst_scale = _mm_shufflelo_epi16(dst_scale, _MM_SHUFFLE(2, 2, 0, 0));
1015        dst_scale = _mm_shufflehi_epi16(dst_scale, _MM_SHUFFLE(2, 2, 0, 0));
1016
1017        let mask = _mm_set1_epi32(0x00FF00FF);
1018
1019        // Unpack the 16x8-bit source/destination into 2 8x16-bit splayed halves.
1020        let mut src_rb = _mm_and_si128(mask, src);
1021        let mut src_ag = _mm_srli_epi16(src, 8);
1022        let mut dst_rb = _mm_and_si128(mask, dst);
1023        let mut dst_ag = _mm_srli_epi16(dst, 8);
1024
1025        // Scale them.
1026        src_rb = _mm_mullo_epi16(src_rb, src_scale);
1027        src_ag = _mm_mullo_epi16(src_ag, src_scale);
1028        dst_rb = _mm_mullo_epi16(dst_rb, dst_scale);
1029        dst_ag = _mm_mullo_epi16(dst_ag, dst_scale);
1030
1031        // Add the scaled source and destination.
1032        dst_rb = _mm_add_epi16(src_rb, dst_rb);
1033        dst_ag = _mm_add_epi16(src_ag, dst_ag);
1034
1035        // Unsplay the halves back together.
1036        dst_rb = _mm_srli_epi16(dst_rb, 8);
1037        dst_ag = _mm_andnot_si128(mask, dst_ag);
1038        _mm_or_si128(dst_rb, dst_ag)
1039    }
1040}
1041
1042// Similar to over_in but includes an additional clip alpha value
1043#[inline]
1044pub fn over_in_in(src: u32, dst: u32, mask: u32, clip: u32) -> u32 {
1045    let src_alpha = alpha_to_alpha256(mask);
1046    let src_alpha = alpha_to_alpha256(alpha_mul_256(clip, src_alpha));
1047    let dst_alpha = alpha_mul_inv256(packed_alpha(src), src_alpha);
1048
1049    let mask = 0xFF00FF;
1050
1051    let src_rb = (src & mask) * src_alpha;
1052    let src_ag = ((src >> 8) & mask) * src_alpha;
1053
1054    let dst_rb = (dst & mask) * dst_alpha;
1055    let dst_ag = ((dst >> 8) & mask) * dst_alpha;
1056
1057    // we sum src and dst before reducing to 8 bit to avoid accumulating rounding errors
1058    (((src_rb + dst_rb) >> 8) & mask) | ((src_ag + dst_ag) & !mask)
1059}
1060
1061#[cfg(test)]
1062#[test]
1063fn test_over_in() {
1064    assert_eq!(over_in(0xff00ff00, 0xffff0000, 0xff), 0xff00ff00);
1065    let mut dst = [0xffff0000, 0, 0, 0];
1066    over_in_row(&[0xff00ff00, 0, 0, 0], &mut dst, 0xff);
1067    assert_eq!(dst[0], 0xff00ff00);
1068    assert_eq!(over_in_in(0xff00ff00, 0xffff0000, 0xff, 0xff), 0xff00ff00);
1069
1070
1071    // ensure that blending `color` on top of itself
1072    // doesn't change the color
1073    for c in 0..=255 {
1074        for a in 0..=255 {
1075            let color = 0xff000000 | c;
1076            assert_eq!(over_in(color, color, a), color);
1077            let mut dst = [color, 0, 0, 0];
1078            over_in_row(&[color, 0, 0, 0], &mut dst, a);
1079            assert_eq!(dst[0], color);
1080        }
1081    }
1082
1083    // make sure we don't overflow
1084    for s in 0..=255 {
1085        for a in 0..=255 {
1086            let result = over_in(s << 24, 0xff000000, a);
1087            let mut dst = [0xff000000, 0, 0, 0];
1088            over_in_row(&[s << 24, 0, 0, 0], &mut dst, a);
1089            assert_eq!(dst[0], result);
1090        }
1091    }
1092
1093    // test that blending by 0 preserves the destination
1094    assert_eq!(over_in(0xff000000, 0xff0000ff, 0x0), 0xff0000ff);
1095    assert_eq!(over_in_legacy_lerp(0xff000000, 0xff0000ff, 0x0), 0xff0000ff);
1096
1097    // tests that blending is broken with the legacy version
1098    assert_eq!(over_in_legacy_lerp(0xff2e3338, 0xff2e3338, 127), 0xff2e3238);
1099
1100    let mut dst = [0xff0000ff, 0, 0, 0];
1101    over_in_row(&[0xff000000, 0, 0, 0], &mut dst, 0);
1102    assert_eq!(dst[0], 0xff0000ff);
1103}
1104
1105pub fn alpha_lerp(src: u32, dst: u32, mask: u32, clip: u32) -> u32 {
1106    let alpha = alpha_mul_256(alpha_to_alpha256(mask), clip);
1107    lerp(src, dst, alpha)
1108}
sw_composite/lib.rs

sw_composite/
lib.rs