nbslim 0.1.0

Rust implementation of SIA, SIATEC, COSIATEC, and RecurSIA algorithms for compressing Note Block Studio (.nbs) music files by discovering translational equivalence classes in 2D point sets.
Documentation
use std::fmt;
use std::collections::HashSet;

/// A translational equivalence class (TEC) consisting of a pattern (a set of points)
/// and a set of non‑zero translation vectors that map the pattern onto other
/// occurrences within the same dataset.
///
/// The pattern is stored as a sorted `Vec<(u32, u32)>` to ensure deterministic ordering.
/// Translators are `(i32, i32)` vectors, possibly negative in either dimension.
/// Sub‑TECs are used by recursive compression algorithms (RECURSIA) to encode
/// nested patterns.
#[derive(Debug, Clone, Eq)]
pub struct TranslationalEquivalence {
    /// Sorted list of points `(tick, pitch)` forming the pattern.
    pub pattern: Vec<(u32, u32)>,
    /// Set of non‑zero translation vectors `(Δtick, Δpitch)` that map `pattern`
    /// onto other patterns in the dataset.
    pub translators: HashSet<(i32, i32)>,
    /// Optional sub‑TECs obtained after recursive compression of the pattern.
    pub sub_tecs: Vec<TranslationalEquivalence>,
}

impl PartialEq for TranslationalEquivalence {
    fn eq(&self, other: &Self) -> bool {
        self.pattern == other.pattern && self.translators == other.translators && self.sub_tecs == other.sub_tecs
    }
}
impl TranslationalEquivalence {
    /// Creates a new TEC. The pattern is automatically sorted.
    ///
    /// # Arguments
    /// * `pattern` - The points forming the pattern (will be sorted).
    /// * `translators` - Non‑zero translation vectors.
    /// * `sub_tecs` - Optional recursively compressed sub‑TECs for the pattern.
    pub fn new(
        pattern: Vec<(u32, u32)>,
        translators: HashSet<(i32, i32)>,
        sub_tecs: Option<Vec<TranslationalEquivalence>>,
    ) -> Self {
        let mut pattern_sorted = pattern;
        pattern_sorted.sort();
        TranslationalEquivalence {
            pattern: pattern_sorted,
            translators,
            sub_tecs: sub_tecs.unwrap_or_else(Vec::new),
        }
    }

    /// Covered set = pattern ∪ (pattern + v) for every translator v.
    /// 
    /// Returns the set of all points that belong to any occurrence in this TEC.
    pub fn coverage(&self) -> HashSet<(u32, u32)> {
        let mut sub_cov: HashSet<(u32, u32)> = self.pattern.iter().copied().collect();
        for sub in &self.sub_tecs {
            sub_cov.extend(sub.coverage());
        }

        let mut cov = sub_cov.clone();
        for &(dx, dy) in &self.translators {
            for &(x, y) in &sub_cov {
                cov.insert(((x as i32 + dx) as u32, (y as i32 + dy) as u32));
            }
        }
        cov
    }

    /// Recursive compression ratio = coverage size / total encoding units.
    /// - For a leaf TEC (no sub_tecs), total = |pattern| + |translators|.
    /// - For a non-leaf TEC, total = |translators| + sum(compression units of sub_tecs).
    pub fn compression_ratio(&self) -> f64 {
        fn total_encoding_units(tec: &TranslationalEquivalence) -> usize {
            if tec.sub_tecs.is_empty() {
                tec.pattern.len() + tec.translators.len()
            } else {
                let mut units = tec.sub_tecs.len();
                for sub in &tec.sub_tecs {
                    units += total_encoding_units(sub);
                }
                units
            }
        }

        let cov_len = self.coverage().len();
        let total_units = total_encoding_units(&self);
        return cov_len as f64 / total_units as f64;
    }

    /// Bounding‑box compactness: |pattern| divided by the number of dataset points
    /// that lie inside the axis‑aligned bounding box of the pattern.
    /// - If sub_tecs exist, merge all leaf pattern points from the subtree,
    /// then compute compactness of that merged point set.
    /// - Otherwise, compute compactness from self.pattern directly.
    ///
    /// Higher compactness means the pattern occupies a relatively dense region of
    /// the dataset. This Python‑exposed method clones the input set.
    pub fn compactness(&self, points: &HashSet<(u32, u32)>) -> f64 {
        let pattern_set = self.coverage();
        if pattern_set.is_empty() {
            return 0.0;
        }
        
        let xs: Vec<u32> = pattern_set.iter().map(|p| p.0).collect();
        let ys: Vec<u32> = pattern_set.iter().map(|p| p.1).collect();
        let min_x = *xs.iter().min().unwrap();
        let max_x = *xs.iter().max().unwrap();
        let min_y = *ys.iter().min().unwrap();
        let max_y = *ys.iter().max().unwrap();
        
        let mut count = 0;
        for &(x, y) in points {
            if x >= min_x && x <= max_x && y >= min_y && y <= max_y {
                count += 1;
            }
        }
        if count == 0 {
            0.0
        } else {
            self.pattern.len() as f64 / count as f64
        }
    }

    pub fn summary(&self, indent: usize) -> String {
        let mut lines: Vec<String> = Vec::new();
        let spaces = " ".repeat(indent);
        if self.pattern.is_empty() {
            lines.push(format!("{}translators={:?}", spaces, self.translators))
        } else {
            lines.push(format!("{}pattern={:?}, translators={:?}", spaces, self.pattern, self.translators))
        }
        lines.push(format!("{}  coverage count: {}", spaces, self.coverage().len()));
        lines.push(format!("{}  compression ratio: {}", spaces, self.compression_ratio()));
        if !self.sub_tecs.is_empty() {
            lines.push(format!("{}  sub-tecs:", spaces));
            for sub in &self.sub_tecs {
                let sub_summary = sub.summary(indent + 2);
                lines.push(sub_summary);
            }
        }

        return lines.join("\n");
    }
}

impl fmt::Display for TranslationalEquivalence {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        write!(
            f, "TEC(pattern={:?}, translators={:?}, sub_tecs=[{}])", 
            self.pattern, 
            self.translators, 
            Vec::from_iter(self.sub_tecs.iter().map(|tec| format!("{}", tec))).join(", ")
        )
    }
}


/// A comparable key that implements the multi‑level ordering rules defined in
/// the ISBETTERTEC function from the literature.
///
/// Lower key value corresponds to a **better** TEC according to:
/// 1. Higher compression ratio
/// 2. Higher compactness
/// 3. Larger coverage size
/// 4. Larger pattern size
/// 5. Smaller temporal width (duration)
/// 6. Smaller bounding‑box area
///
/// This struct is used with `sort_by_key` and `min_by_key` to select optimal TECs
/// in algorithms like `COSIATEC` and `SIATECCompress`.
pub(crate) struct SortKey {
    cr: f64,      // f64 do not implement `Eq`
    comp: f64,    // f64 do not implement `Eq`
    cov_len: usize, 
    pat_len: usize, 
    width: u32, 
    area: u64
}

impl SortKey {
    pub fn new(cr: f64, comp: f64, cov_len: usize, pat_len: usize, width: u32, area: u64) -> Self {
        debug_assert!(cr.is_finite(), "compression ratio must be finite");
        debug_assert!(comp.is_finite(), "compactness must be finite");
        Self { cr, comp, cov_len, pat_len, width, area }
    }
}

impl Eq for SortKey {}
impl PartialEq for SortKey {
    fn eq(&self, other: &Self) -> bool {
        self.cr == other.cr && 
        self.comp == other.comp && 
        self.cov_len == other.cov_len && 
        self.pat_len == other.pat_len && 
        self.width == other.width && 
        self.area == other.area
    }
}
impl PartialOrd for SortKey {
    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
        // For descending fields, store negative. 
        match (-self.cr).partial_cmp(&(-other.cr)) {
            Some(core::cmp::Ordering::Equal) => {}
            ord => return ord,
        }
        match (-self.comp).partial_cmp(&(-other.comp)) {
            Some(core::cmp::Ordering::Equal) => {}
            ord => return ord,
        }
        match (-(self.cov_len as i64)).partial_cmp(&(-(other.cov_len as i64))) {
            Some(core::cmp::Ordering::Equal) => {}
            ord => return ord,
        }
        match (-(self.pat_len as i64)).partial_cmp(&(-(other.pat_len as i64))) {
            Some(core::cmp::Ordering::Equal) => {}
            ord => return ord,
        }
        // For ascending fields, store positive.
        match self.width.partial_cmp(&other.width) {
            Some(core::cmp::Ordering::Equal) => {}
            ord => return ord,
        }
        self.area.partial_cmp(&other.area)
    }
}
impl Ord for SortKey {
    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
        // partial_cmp is guaranteed to return Some because all comparands are finite.
        self.partial_cmp(other).unwrap()
    }
}

/// Constructs a `SortKey` for a given TEC with respect to a dataset.
/// The key follows the ISBETTERTEC multi‑level comparison rules and can be used
/// to sort TECs from best to worst.
///
/// # Arguments
/// * `tec` - The TEC to evaluate.
/// * `dataset_points` - The set of points in the dataset (used for compactness calculation).
///
/// # Returns
/// A `SortKey` where lower key indicates a better TEC.
pub fn tec_sort_key(tec: &TranslationalEquivalence, dataset_points: &HashSet<(u32, u32)>) -> SortKey {
    let cr = tec.compression_ratio();
    let comp = tec.compactness(dataset_points);
    let cov_len = tec.coverage().len();
    let pat_len = tec.pattern.len();
    let width = (
        tec.pattern
            .iter()
            .map(|x| x.0 as i32)
            .max().unwrap() - 
        tec.pattern
            .iter()
            .map(|x| x.0 as i32)
            .min().unwrap()
    ) as u32;
    let x_range = width;
    let y_range = (
        tec.pattern
            .iter()
            .map(|x| x.1 as i32)
            .max().unwrap() - 
        tec.pattern
            .iter()
            .map(|x| x.1 as i32)
            .min().unwrap()
    ) as u32;
    let area = x_range as u64 * y_range as u64;
    SortKey::new(cr, comp, cov_len, pat_len, width, area)
}