nbslim 0.1.0

Rust implementation of SIA, SIATEC, COSIATEC, and RecurSIA algorithms for compressing Note Block Studio (.nbs) music files by discovering translational equivalence classes in 2D point sets.
Documentation
use std::collections::{HashMap, HashSet};

use crate::tec::TranslationalEquivalence;


const PITCH_BITS: u32 = 14;
const OFFSET: u32 = 1200;

/// Converts a list of MIDI-like note events into 2D points for compression,
/// along with a mapping from each point back to **all** original note data
/// that share that exact point (i.e., a multi‑set).
///
/// The y-coordinate is encoded as:
///   `(instrument << PITCH_BITS) | (pitch_in_cents + OFFSET)`
/// This packs instrument ID and pitch offset into a single `u32` value.
/// The x-coordinate is simply the tick (time) value.
///
/// # Arguments
/// * `notes` - A slice of tuples representing note events:
///   `(tick, layer, instrument, key, velocity, panning, pitch)`
///
/// # Returns
/// A tuple `(points, mapping)` where:
/// - `points`: `Vec<(u32, u32)>` – the generated 2D points `(tick, encoded_y)`,
///   with possible duplicates.
/// - `mapping`: `HashMap<(u32, u32), Vec<NoteData>>` mapping a point to all
///   original notes that produced it, in the order encountered.
pub fn notes_to_points(
    notes: &Vec<(usize, usize, usize, usize, usize, i64, i64)>,
) -> (
    Vec<(u32, u32)>,
    HashMap<(u32, u32), Vec<(usize, usize, usize, usize, usize, i64, i64)>>,
) {
    let mut points = Vec::new();
    let mut mapping = HashMap::new();

    for &(tick, layer, instrument, key, velocity, panning, pitch) in notes {
        let pitch_cents = ((key * 100) as i64 + pitch) as u32;
        let pitch_off = pitch_cents + OFFSET;
        let encoded_y = ((instrument as u32) << PITCH_BITS) | pitch_off;
        let point = (tick as u32, encoded_y);
        points.push(point);
        mapping
            .entry(point)
            .or_insert_with(Vec::new)
            .push((tick, layer, instrument, key, velocity, panning, pitch));
    }

    (points, mapping)
}

/// Merge all TECs that satisfy the filter into a single TEC containing all their coverage points.
///
/// The merged TEC has no translators (i.e., it is not a true TEC) and is used only for
/// reconstruction. It helps avoid many tiny TECs that would each occupy a small layer block.
pub fn merge_tecs(
    tecs: Vec<TranslationalEquivalence>, 
    filter: fn(&TranslationalEquivalence) -> bool
) -> Vec<TranslationalEquivalence> {
    let mut to_merge = Vec::new();
    let mut to_keep  = Vec::new();
    for tec in tecs {
        if filter(&tec) {
            to_merge.push(tec);
        } else {
            to_keep.push(tec);
        }
    }

    if to_merge.is_empty() {
        return to_keep;
    }

    // Collect all points from the TECs to be merged
    let mut merged_points = HashSet::new();
    for tec in to_merge {
        merged_points.extend(tec.coverage());
    }
    
    if merged_points.is_empty() {
        // No points (shouldn't happen) – just return empty Vec
        return vec![]
    }

    // Sort points to get a deterministic pattern
    
    let mut sorted_points = Vec::from_iter(merged_points);
    sorted_points.sort();
    let merged_tec = TranslationalEquivalence::new(
        sorted_points,
        HashSet::new(), 
        None
    );
    
    to_keep.push(merged_tec);
    to_keep
}


/// Calculate compression statistics for a list of TECs.
/// 
/// Returns a tuple: (original_count, encoded_units, compression_ratio):
pub fn compression_stats(
    tecs: &Vec<TranslationalEquivalence>, 
    original_points: &Vec<(u32, u32)>
) -> (usize, usize, f64) {
    fn _count_units(tecs: &Vec<TranslationalEquivalence>) -> usize {
        let mut units = 0usize;
        for tec in tecs {
            units += tec.translators.len();
            units += if tec.sub_tecs.is_empty() {
                tec.pattern.len()
            } else {
                _count_units(&tec.sub_tecs)
            };
        }

        units
    }

    let original = original_points.len();
    let encoded = _count_units(tecs);
    let ratio = if encoded > 0 {
        original as f64 / encoded as f64
    } else {
        0.0
    };

    (original, encoded, ratio)
}

/// Reconstructs the original set of points from a list of TECs, then maps
/// each point back to its original note data using a pre‑built mapping.
///
/// The reconstruction first computes the coverage (pattern + all translated copies)
/// of each TEC, merges them, and returns the **sorted unique** points.
/// Then, for each such point, it retrieves all notes associated with it
/// from the mapping (in the order they were originally inserted) and flattens
/// them into the result vector.
///
/// **Important:** This function assumes that the total number of times a point
/// appears in the compressed representation is exactly the number of notes
/// stored in the mapping for that point. Because the compression process does
/// not preserve duplicate points, the final note order is deterministic but
/// may not match the original order across different points. For exact order
/// preservation, you must maintain an auxiliary index sequence.
///
/// # Arguments
/// * `tecs` - A vector of `TranslationalEquivalence` objects, typically produced
///            by a compression algorithm.
/// * `mapping` - The mapping from a point to all original notes that produced it,
///               as returned by `notes_to_points`.
///
/// # Returns
/// A `Vec` of original note tuples, in the order: first all notes belonging to
/// the smallest (tick, y) point (sorted), then the next point, etc. Notes that
/// shared the exact same point are returned in the insertion order recorded in
/// the mapping.
pub fn points_to_notes(
    tecs: &Vec<TranslationalEquivalence>,
    mapping: &HashMap<(u32, u32), Vec<(usize, usize, usize, usize, usize, i64, i64)>>,
) -> Vec<(usize, usize, usize, usize, usize, i64, i64)> {
    // 1. Rebuild the unique points covered by all TECs (sorted)
    let mut all_points = HashSet::new();
    for tec in tecs {
        all_points.extend(tec.coverage());
    }

    // 2. For each unique point, collect all original notes from the mapping
    let mut result = Vec::new();
    for point in all_points {
        if let Some(notes_at_point) = mapping.get(&point) {
            result.extend(notes_at_point.iter().cloned());
        }
        // If a point from coverage is not present in mapping (should never happen),
        // we simply skip it – it indicates an inconsistency.
    }
    result.sort();
    result
}