cgats 0.2.0 - Docs.rs

pub use deltae::{DEMethod, LabValue, Delta};

use crate::*;

impl Cgats {
    /// Calculate the DeltaE between each value of two CGATS data sets.
    /// Returns an error if either of the two sets do not contain `Lab` data.
    pub fn delta(&self, other: &Self, method: DEMethod) -> Result<Self> {
        self.can_delta(other)?;
        let lab_ref_l = self.get_col_by_field(&LAB_L).ok_or("unable to find reference L* column")?;
        let lab_ref_a = self.get_col_by_field(&LAB_A).ok_or("unable to find reference a* column")?;
        let lab_ref_b = self.get_col_by_field(&LAB_B).ok_or("unable to find reference b* column")?;

        let lab_sam_l = other.get_col_by_field(&LAB_L).ok_or("unable to find sample L* column")?;
        let lab_sam_a = other.get_col_by_field(&LAB_A).ok_or("unable to find sample a* column")?;
        let lab_sam_b = other.get_col_by_field(&LAB_B).ok_or("unable to find sample b* column")?;

        let labify = |lab: ((&DataPoint, &DataPoint), &DataPoint)| {
            LabValue {
                l: lab.0.0.to_float_unchecked(),
                a: lab.0.1.to_float_unchecked(),
                b: lab.1.to_float_unchecked(),
            }
        };

        let lab_ref = lab_ref_l
            .zip(lab_ref_a)
            .zip(lab_ref_b)
            .map(labify);

        let lab_sam = lab_sam_l
            .zip(lab_sam_a)
            .zip(lab_sam_b)
            .map(labify);

        let delta = lab_ref.zip(lab_sam)
            .map(|(lab_ref, lab_sam)| DataPoint::new_float(*lab_ref.delta(lab_sam, method).value()))
            .collect::<Vec<_>>();

        let mut cgats = Cgats::default();
        cgats.data_format.fields = vec![Field::from(method)];
        cgats.data = delta;
        cgats.reindex_sample_id();

        Ok(cgats)
    }

    /// Calculate DeltaE and generate a report
    pub fn de_report(&self, other: &Self, method: DEMethod, upper_pct: f32) -> Result<DeReport> {
        let delta = self.delta(other, method)?;
        let mut values: Vec<f32> = delta.get_col_by_field(&Field::from(method))
            .unwrap()
            .map(DataPoint::to_float_unchecked)
            .collect();
        values.sort_by(|a,b| a.partial_cmp(b).unwrap());
        Ok(DeReport {
            method,
            values,
            upper_pct,
        })
    }

    /// Determine if it is possible to calculate DeltaE between two CGATS data sets.
    pub fn can_delta(&self, other: &Self) -> Result<()> {
        if !self.has_color_type(&ColorType::Lab) || !other.has_color_type(&ColorType::Lab) {
            return err!("data set does not contain Lab data")
        }

        if self.n_rows() != other.n_rows() {
            return err!("data sets do not contain the same number of samples")
        }

        Ok(())
    }
}

impl From<DEMethod> for Field {
    fn from(method: DEMethod) -> Self {
        match method {
            DEMethod::DE2000 => Field::DE_2000,
            DEMethod::DE1976 => Field::DE_1976,
            DEMethod::DE1994G => Field::DE_1994,
            DEMethod::DE1994T => Field::DE_1994T,
            DEMethod::DECMC(a, b) => {
                if a == b {
                    Field::DE_CMC
                } else if a / b == 2.0 {
                    Field::DE_CMC2
                } else {
                    Field::Other(format!("DECMC({a},{b})"))
                }
            }
        }
    }
}

impl TryFrom<Field> for DEMethod {
    type Error = BoxErr;
    fn try_from(field: Field) -> Result<Self> {
        Ok(match field {
            Field::DE_2000 => DEMethod::DE2000,
            Field::DE_1976 => DEMethod::DE1976,
            Field::DE_1994 => DEMethod::DE1994G,
            Field::DE_1994T => DEMethod::DE1994T,
            Field::DE_CMC => DEMethod::DECMC(1.0, 1.0),
            Field::DE_CMC2 => DEMethod::DECMC(2.0, 1.0),
            other => return err!(format!("not a valid DEMethod: '{}'", other)),
        })
    }
}

/// Analysis structure for evaluating DeltaE data sets.
/// Created by using the [`Cgats::de_report`] method.
pub struct DeReport {
    method: DEMethod,
    values: Vec<f32>,
    upper_pct: f32,
}

impl DeReport {
    /// Returns the number of samples in the set
    pub fn n_samples(&self) -> usize {
        self.values.len()
    }

    /// Return the number of samples multiplied by a percentage
     fn n_samples_pct(&self, pct: f32) -> usize {
        n_samples_pct(self.n_samples(), pct)
    }

    /// Returns the DeltaE method used in the calculation
    pub fn method(&self) -> &DEMethod {
        &self.method
    }

    /// Returns the overall report
    fn overall(&self) -> AvgMinMax {
        AvgMinMax {
            name: "OVERALL".to_string(),
            pct: 1.0,
            count: self.n_samples(),
            avg: self.values.iter()
                .partial_avg()
                .expect("DE is empty"),
            min: *self.values.iter().min_by(|a,b| a.partial_cmp(b).unwrap()).expect("DE is empty"),
            max: *self.values.iter().max_by(|a,b| a.partial_cmp(b).unwrap()).expect("DE is empty"),
            stdev: stdev(self.values.iter()),
        }
    }

    /// Returns best report
    fn best(&self) -> AvgMinMax {
        let take = self.n_samples_pct(self.upper_pct);
        AvgMinMax {
            name: "BEST".to_string(),
            pct: self.upper_pct,
            count: self.n_samples_pct(self.upper_pct),
            avg: self.values.iter()
                .take(take)
                .partial_avg()
                .expect("DE is empty"),
            min: *self.values.iter().take(take).min_by(|a,b| a.partial_cmp(b).unwrap()).expect("DE is empty"),
            max: *self.values.iter().take(take).max_by(|a,b| a.partial_cmp(b).unwrap()).expect("DE is empty"),
            stdev: stdev(self.values.iter().take(take)),
        }
    }

    /// Returns worst report
    fn worst(&self) -> AvgMinMax {
        let skip = self.n_samples_pct(self.upper_pct);
        AvgMinMax {
            name: "WORST".to_string(),
            pct: 1.0 - self.upper_pct,
            count: self.n_samples_pct(1.0 - self.upper_pct),
            avg: self.values.iter()
                .skip(skip)
                .partial_avg()
                .expect("DE is empty"),
            min: *self.values.iter().skip(skip).min_by(|a,b| a.partial_cmp(b).unwrap()).expect("DE is empty"),
            max: *self.values.iter().skip(skip).max_by(|a,b| a.partial_cmp(b).unwrap()).expect("DE is empty"),
            stdev: stdev(self.values.iter().skip(skip)),
        }
    }

    /// Returns the 95th Percentile of DeltaE values in the set
    pub fn efactor(&self) -> f32 {
        *self.values.get(self.n_samples_pct(0.95)).unwrap()
    }

    /// Iterator over the DeltaE values in the set
    pub fn values(&self) -> impl Iterator<Item=&f32> {
        self.values.iter()
    }
}

impl fmt::Display for DeReport {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        writeln!(f, "Number of Samples: {}", self.n_samples())?;
        writeln!(f, "DE Formula: {}", self.method)?;
        writeln!(f, "E-Factor (95th Percentile): {:0.2}\n", self.efactor())?;
        match f.precision() {
            Some(p) => {
                writeln!(f, "{:.p$}", self.overall())?;
                writeln!(f, "{:.p$}", self.best())?;
                writeln!(f, "{:.p$}", self.worst())
            }
            None => {
                writeln!(f, "{}", self.overall())?;
                writeln!(f, "{}", self.best())?;
                writeln!(f, "{}", self.worst())
            }
        }
    }
}

fn n_samples_pct(pop: usize, pct: f32) -> usize {
    (pct.clamp(0.0, 1.0) * pop as f32).round() as usize
}

#[test]
fn percentile() {
    assert_eq!(n_samples_pct(100, 0.95), 95);
    assert_eq!(n_samples_pct(100, 0.05), 5);
    assert_eq!(n_samples_pct(126, 0.90), 113);
    assert_eq!(n_samples_pct(126, 0.10), 13);
    assert_eq!(n_samples_pct(1,   0.95), 1);
    assert_eq!(n_samples_pct(1,   0.50), 1);
    assert_eq!(n_samples_pct(1,   0.49), 0);
    assert_eq!(n_samples_pct(1,   0.05), 0);
    assert_eq!(n_samples_pct(2,   0.49), 1);
    assert_eq!(n_samples_pct(2,   0.50), 1);
    assert_eq!(n_samples_pct(2,   0.51), 1);
}

fn stdev<'a, I: Iterator<Item=&'a f32>>(iter: I) -> f32 {
    let values = iter.copied().collect::<Vec<_>>();
    let len = values.len() as f32;
    let avg = values.iter().sum::<f32>() / len as f32;
    f32::sqrt(values.into_iter().map(|val| (val - avg).powi(2)).sum::<f32>() / len)
}

#[test]
fn stdev_test() {
    let values = [1.0, 3.0, 4.0, 7.0, 8.0];
    assert_eq!(stdev(values.iter()), 2.57682);

    let values = [9.0, 2.0, 5.0, 4.0, 12.0, 7.0, 8.0, 11.0, 9.0, 3.0, 7.0, 4.0, 12.0, 5.0, 4.0, 10.0, 9.0, 6.0, 9.0, 4.0];
    assert_eq!(stdev(values.iter()), 2.9832866);
}

/// Analysis structure
struct AvgMinMax {
    name: String,
    pct: f32,
    count: usize,
    avg: f32,
    min: f32,
    max: f32,
    stdev: f32,
}

impl fmt::Display for AvgMinMax {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        let p = f.precision().unwrap_or(4);
        writeln!(f, "{} {:0.0}% - ({} colors)", self.name, self.pct * 100.0, self.count)?;
        writeln!(f, "{:>18}: {:0.p$}", "Average DE", self.avg)?;
        writeln!(f, "{:>18}: {:0.p$}", "Max DE", self.max)?;
        writeln!(f, "{:>18}: {:0.p$}", "Min DE", self.min)?;
        writeln!(f, "{:>18}: {:0.p$}", "StDev DE", self.stdev)
    }
}