lace 0.9.1

A probabilistic cross-categorization engine
Documentation
use std::collections::HashSet;
use std::convert::TryInto;
use std::hash::Hash;

use serde::Deserialize;
use serde::Serialize;

use crate::codebook::Codebook;
use crate::error::IndexError;
use crate::index::ColumnIndex;
use crate::interface::oracle::utils;
use crate::Datum;

/// Describes a the conditions (or not) on a conditional distribution
#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, PartialOrd, Hash)]
#[serde(rename_all = "snake_case")]
#[derive(Default)]
pub enum Given<Ix: ColumnIndex> {
    /// The conditions in `(column_id, value)` tuples. The tuple
    /// `(11, Datum::Continuous(2.3))` indicates that we wish to condition on
    /// the value of column 11 being 2.3.
    Conditions(Vec<(Ix, Datum)>),
    /// The absence of conditioning observations
    #[default]
    Nothing,
}

impl<Ix: ColumnIndex> Given<Ix> {
    /// Determine whether there are no conditions
    ///
    /// # Example
    ///
    /// ```
    /// # use lace::data::Datum;
    /// # use lace::Given;
    /// let nothing_given = Given::<usize>::Nothing;
    ///
    /// assert!(nothing_given.is_nothing());
    ///
    /// let something_given = Given::Conditions(
    ///     vec![(1, Datum::Categorical(1_u32.into()))]
    /// );
    ///
    /// assert!(!something_given.is_nothing());
    /// ```
    pub fn is_nothing(&self) -> bool {
        matches!(self, Given::Nothing)
    }

    /// Determine whether there are conditions
    ///
    /// # Example
    ///
    /// ```
    /// # use lace::data::Datum;
    /// # use lace::Given;
    /// let nothing_given = Given::<usize>::Nothing;
    ///
    /// assert!(!nothing_given.is_conditions());
    ///
    /// let something_given = Given::Conditions(
    ///     vec![(1, Datum::Categorical(1_u32.into()))]
    /// );
    ///
    /// assert!(something_given.is_conditions());
    /// ```
    pub fn is_conditions(&self) -> bool {
        matches!(self, Given::Conditions(..))
    }

    /// Attempt to convert all indices in the condition into integers.
    ///
    /// # Notes
    ///
    /// Will return `IndexError` if any of the names do not exists or indices
    /// are out of bounds.
    pub fn canonical(
        self,
        codebook: &Codebook,
    ) -> Result<Given<usize>, IndexError> {
        match self {
            Self::Nothing => Ok(Given::Nothing),
            Self::Conditions(mut conditions) => {
                let conditions = conditions
                    .drain(..)
                    .map(|(col_ix, value)| {
                        col_ix.col_ix(codebook).and_then(|ix| {
                            utils::pre_process_datum(value, ix, codebook)
                                .map(|x| (ix, x))
                        })
                    })
                    .collect::<Result<Vec<(usize, Datum)>, IndexError>>()?;
                Ok(Given::Conditions(conditions))
            }
        }
    }
}

///
///
/// # Example
///
/// ```
/// # use lace::Given;
/// # use lace::error::IntoGivenError;
/// use std::convert::TryInto;
/// use lace::Datum;
///
/// let conditions_good = vec![
///     (0_usize, Datum::Categorical(0_u32.into())),
///     (1_usize, Datum::Categorical(0_u32.into())),
/// ];
///
/// let given_good: Result<Given<usize>, IntoGivenError> = conditions_good.try_into();
/// assert!(given_good.is_ok());
///
/// // duplicate indices
/// let conditions_bad = vec![
///     (0_usize, Datum::Categorical(0_u32.into())),
///     (0_usize, Datum::Categorical(0_u32.into())),
/// ];
/// let given_bad: Result<Given<usize>, IntoGivenError> = conditions_bad.try_into();
///
/// assert_eq!(
///     given_bad.unwrap_err(),
///     IntoGivenError::DuplicateConditionIndicesError
/// );
/// ```
#[derive(Debug, Clone, Copy, PartialEq, PartialOrd, Eq, Ord, Hash)]
pub enum IntoGivenError {
    /// Tried to covert `Some(vec![])` into a Given. Use `None` instead
    EmptyConditionsError,
    /// The same column index appears more than once
    DuplicateConditionIndicesError,
}

impl<Ix: ColumnIndex + Hash + Eq> TryInto<Given<Ix>> for Vec<(Ix, Datum)> {
    type Error = IntoGivenError;

    fn try_into(mut self) -> Result<Given<Ix>, Self::Error> {
        if self.is_empty() {
            Ok(Given::Nothing)
        } else {
            let mut set: HashSet<Ix> = HashSet::new();
            if self.drain(..).any(|(ix, _)| !set.insert(ix)) {
                Err(IntoGivenError::DuplicateConditionIndicesError)
            } else {
                Ok(Given::Conditions(self))
            }
        }
    }
}

impl<Ix: ColumnIndex + Hash + Eq> TryInto<Given<Ix>>
    for Option<Vec<(Ix, Datum)>>
{
    type Error = IntoGivenError;

    fn try_into(self) -> Result<Given<Ix>, Self::Error> {
        match self {
            Some(conditions) => {
                if conditions.is_empty() {
                    Err(IntoGivenError::EmptyConditionsError)
                } else {
                    conditions.try_into()
                }
            }
            None => Ok(Given::Nothing),
        }
    }
}