extern crate rgb;
extern crate imgref;
use rgb::*;
use rgb::alt::*;
use imgref::*;
use std::cmp;

/// This is subsampling expressed as scale relative to luma
/// (i.e. inverse of the way how libjpeg counts number of samples per channel),
/// e.g. `(2,2)` means 1 chroma sample per 4 luma samples.
#[derive(Copy, Clone, Debug, PartialEq)]
pub struct PixelSize {
    pub cb: (u8, u8),
    pub cr: (u8, u8),
}

/// Recommended settings for image encoding
pub struct ChromaEvaluation {
    /// Recommended subsampling in number of chroma pixels per luma pixel (i.e. inverse of the way how libjpeg counts number of samples per channel).
    pub subsampling: PixelSize,
    /// Recommended JPEG quality for the chroma channel
    pub chroma_quality: f32,
    /// FYI. Approximate maximum contrast found in the image. The values are kinda arbitrary fudge factors. ~6000 is high sharpness
    pub sharpness: Option<Sharpness>,
}

#[derive(Copy, Clone, Debug, Default)]
pub struct YCbCrApprox {
    y: u16,
    cb: u16,
    cr: u16,
}

/// Fast CbCr appproximation
pub trait RGBToCbQuick {
    fn to_cbcr_q(self) -> YCbCrApprox;
}

impl RGBToCbQuick for RGB8 {
    fn to_cbcr_q(self) -> YCbCrApprox {
        let r = self.r as i16;
        let g = self.g as i16;
        let b = self.b as i16;

        let y = (3 * r + 5 * g + b) as u16; // Max is 9*255

        let cb = 3 * b - 2 * g - r; // Max is 6*255. Deliberately smaller scale.
        let cr = 6 * r - 5 * g - b; // Max is 12*255.

        if cr >= 0 {
            YCbCrApprox {y, cb: (cb + 3*255) as u16, cr: (cr + 6*255) as u16}
        } else {
            // Make it less sensitive to mostly-green
            YCbCrApprox {y, cb: (cb / 2 + 3*255) as u16, cr: (cr / 2 + 6*255) as u16}
        }
    }
}

impl RGBToCbQuick for GRAY8 {
    #[inline] fn to_cbcr_q(self) -> YCbCrApprox {Default::default()}
}

impl RGBToCbQuick for GRAY16 {
    #[inline] fn to_cbcr_q(self) -> YCbCrApprox {Default::default()}
}

impl RGBToCbQuick for GRAYA8 {
    #[inline] fn to_cbcr_q(self) -> YCbCrApprox {Default::default()}
}

impl RGBToCbQuick for GRAYA16 {
    #[inline] fn to_cbcr_q(self) -> YCbCrApprox {Default::default()}
}

impl RGBToCbQuick for RGBA8 {
    #[inline]
    fn to_cbcr_q(self) -> YCbCrApprox {
        self.rgb().to_cbcr_q()
    }
}

impl RGBToCbQuick for BGRA8 {
    #[inline]
    fn to_cbcr_q(self) -> YCbCrApprox {
        RGB8::new(self.r, self.g, self.b).to_cbcr_q()
    }
}

impl RGBToCbQuick for BGR8 {
    #[inline]
    fn to_cbcr_q(self) -> YCbCrApprox {
        RGB8::new(self.r, self.g, self.b).to_cbcr_q()
    }
}

impl RGBToCbQuick for RGB16 {
    #[inline]
    fn to_cbcr_q(self) -> YCbCrApprox {
        self.map(|c| (c>>8) as u8).to_cbcr_q()
    }
}

impl RGBToCbQuick for RGBA16 {
    #[inline]
    fn to_cbcr_q(self) -> YCbCrApprox {
        self.rgb().to_cbcr_q()
    }
}

#[inline]
fn gradient_diff<T: Copy + RGBToCbQuick>(c0: YCbCrApprox, r1: T, c2: YCbCrApprox) -> (u32, u32) {
    let c1 = r1.to_cbcr_q();
    let cb_d = (c0.cb + c2.cb) as i32 - 2*c1.cb as i32; // average is an ideal slope we can do with upscaling
    let cr_d = (c0.cr + c2.cr) as i32 - 2*c1.cr as i32; // average is an ideal slope we can do with upscaling

    let y_max = 9*255;
    // chroma problem is worse for gray-ish colors. Black/white is better. Range is ymax/2 ... ymax
    let contrast_boost = (y_max - (y_max/2 - c1.y as i16).abs()) as i32;

    let edge = (c0.y as i16 - c2.y as i16).abs() as i32;
    let no_edge_boost = y_max as i32 * 2 - edge; // range is y_max .. 2*y_max, so that even hard edges keep some weight
    let boost = (no_edge_boost + contrast_boost) as u32 / 32; // 1.5*y_max .. 3*y_max -> 107..215
    let cb_diff = (cb_d*cb_d) as u32 * boost / 128;
    let cr_diff = (cr_d*cr_d) as u32 * boost / 128;

    (cb_diff, cr_diff)
}

/// Analyze `img`, and recommend best subsampling for the image.
///
/// `subsampling` is the worst allowed subsampling. If you pass (1,1) then output will also be (1,1). If you pass (2,2), then output may be same or sharper than that.
///
/// `chroma_quality` is the target image quality, from 0.0 to 100.0. Worse quality allows harsher subsampling.
pub fn adjust_sampling<T: Copy + RGBToCbQuick>(img: ImgRef<T>, mut subsampling: PixelSize, mut chroma_quality: f32) -> ChromaEvaluation {
    assert!(chroma_quality >= 0.0 && chroma_quality <= 100.0);

    // No gain when chroma can't be smaller
    if img.width() <= 8 {
        subsampling.cb.0 = 1;
        subsampling.cr.0 = 1;
    }
    if img.height() <= 8 {
        subsampling.cb.1 = 1;
        subsampling.cr.1 = 1;
    }

    let sharpness = if subsampling.cb.0 >= 2 || subsampling.cb.1 >= 2 || subsampling.cr.0 >= 2 || subsampling.cr.1 >= 2 {
        let threshold = (chroma_quality * 2.) as u32;

        let (cb_sh, cr_sh) = image_sharpness(img, 60*threshold);
        subsampling = PixelSize {
            cb: adjust_sampling_ch(img, subsampling.cb, threshold, cb_sh),
            cr: adjust_sampling_ch(img, subsampling.cr, threshold, cr_sh),
        };

        let sh = Sharpness {
            horiz: cmp::max(cb_sh.horiz, cr_sh.horiz),
            vert: cmp::max(cb_sh.vert, cr_sh.vert),
            peak: cmp::max(cb_sh.peak, cr_sh.peak),
        };

        if sh.horiz < 2*threshold && sh.vert < 2*threshold {
            chroma_quality *= 0.9;
        }

        match subsampling.cr {
            (2,2) => { },
            (1,1) => { chroma_quality *= 0.9; },
                _ => { chroma_quality *= 0.93; },
        };

        Some(sh)
    } else {
        None
    };

    ChromaEvaluation {
        subsampling,
        chroma_quality,
        sharpness,
    }
}

fn adjust_sampling_ch<T: Copy + RGBToCbQuick>(img: ImgRef<T>, mut sampling: (u8, u8), threshold: u32, mut sh: Sharpness) -> (u8, u8) {
    // If there's no sharp color, then subsample all the way
    if sh.horiz == 0 {
        sampling.0 = 2;
    }
    if sh.vert == 0 {
        sampling.1 = 2;
    }

    // Assume large images don't need sharp color that much (because they'll be displayed scaled down),
    // but tiny images do need sharp color (since they may be enlarged or scrutinized).
    match img.width() {
        0...100 => sh.horiz *= 2,
        0...1800 => {},
        _ => sh.horiz /= 2,
    };
    match img.height() {
        0...100 => sh.vert *= 2,
        0...1600 => {},
        _ => sh.vert /= 2,
    };

    // Prefer 1,1 and 2,1 over 1,2 due to scaling quality in older libjpeg-turbo
    if sh.horiz > 50*threshold && sh.vert > 50*threshold {
        (1,1)
    }
    else if (sampling.0 >= 2 && sh.vert > 25*threshold) || sh.vert > 50*threshold {
        (sampling.0, 1)
    }
    else if sh.horiz > 60*threshold {
        (1, sampling.1)
    } else {
        sampling
    }
}

#[derive(Copy, Clone, Debug)]
pub struct Sharpness {
    pub vert: u32,
    pub horiz: u32,
    pub peak: u32,
}

fn image_sharpness<T: Copy + RGBToCbQuick>(img: ImgRef<T>, break_if_exceeds: u32) -> (Sharpness, Sharpness) {
    if img.height() < 3 || img.width() < 3 {
        let dud = Sharpness{vert:0, horiz:0, peak:100};
        return (dud, dud);
    }
    let img_width = img.width();

    debug_assert!(img.buf.len() > img.stride()*2);
    debug_assert!(img.buf.len() >= img.width()*3);
    let mut row_iter = img.buf.chunks(img.stride());
    let mut row0 = row_iter.next().unwrap();
    let mut row1 = row_iter.next().unwrap();
    let mut row2 = row_iter.next().unwrap();

    let mut sumh = (0,0);
    let mut sumv = (0,0);

    let mut max_sumv = (0,0);
    let mut max_sumh = (0,0);
    let mut max_diff = (0,0);
    let mut fragment_height = 0;
    let fragment_max_height = if img.height() > 128 {img.height() / 6} else {img.height() / 4}; // NB: scan skips lines
    loop {
        let mut c0 = row0[0].to_cbcr_q();
        for i in 0 .. (img_width - 2)/2 {
            let i = i*2;
            let a0 = c0; let b0 = row0[i+1]; c0 = row0[i+2].to_cbcr_q();
            let a1 = row1[i];
            let a2 = row2[i].to_cbcr_q();

            // it would be nice to do something for colors in subpixel AA
            // But naive division by luma diff doesn't help
            let h = gradient_diff(a0,b0,c0);
            let v = gradient_diff(a0,a1,a2);

            if v.0 > max_diff.0 { max_diff.0 = v.0; }
            if v.1 > max_diff.1 { max_diff.1 = v.1; }
            if h.0 > max_diff.0 { max_diff.0 = h.0; }
            if h.1 > max_diff.1 { max_diff.1 = h.1; }

            sumh.0 += h.0 as usize;
            sumh.1 += h.1 as usize;
            sumv.0 += v.0 as usize;
            sumv.1 += v.1 as usize;
        }

        fragment_height += 1;
        if fragment_height >= fragment_max_height {
            max_sumh.0 = cmp::max(max_sumh.0, (sumh.0 / (fragment_height * img_width)) as u32);
            max_sumh.1 = cmp::max(max_sumh.1, (sumh.1 / (fragment_height * img_width)) as u32);
            max_sumv.0 = cmp::max(max_sumv.0, (sumv.0 / (fragment_height * img_width)) as u32);
            max_sumv.1 = cmp::max(max_sumv.1, (sumv.1 / (fragment_height * img_width)) as u32);
            if (max_sumv.0 > break_if_exceeds || max_sumh.0 > break_if_exceeds) &&
               (max_sumv.1 > break_if_exceeds || max_sumh.1 > break_if_exceeds) {
                break;
            }
            sumh = (0,0);
            sumv = (0,0);
            fragment_height = 0;
        }
        row0 = row2;
        row1 = if let Some(r) = row_iter.next() {r} else {break;};
        row2 = if let Some(r) = row_iter.next() {r} else {break;};
    }
    if fragment_height > 16 {
        max_sumh.0 = cmp::max(max_sumh.0, (sumh.0 / (fragment_height * img_width)) as u32);
        max_sumh.1 = cmp::max(max_sumh.1, (sumh.1 / (fragment_height * img_width)) as u32);
        max_sumv.0 = cmp::max(max_sumv.0, (sumv.0 / (fragment_height * img_width)) as u32);
        max_sumv.1 = cmp::max(max_sumv.1, (sumv.1 / (fragment_height * img_width)) as u32);
    }

    // It would be nice to use information-weighed average
    let max_diff_max = (6*256*2)*(6*256*2);
    (Sharpness {
        horiz: max_sumh.0,
        vert: max_sumv.0,
        peak: (max_diff.0 / (max_diff_max/100)) as u32,
    },
    Sharpness {
        horiz: max_sumh.1,
        vert: max_sumv.1,
        peak: (max_diff.1 / (max_diff_max/100)) as u32,
    })
}