rsomics-tin 0.1.0

//! Transcript Integrity Number (TIN) computation from RNA-seq BAM data.
//!
//! TIN measures coverage uniformity across the mRNA body using Shannon entropy.
//! Given per-base coverage `C_i` at sampled mRNA positions:
//! `p_i = C_i / sum(C_j)`, `H = -sum(p_i * ln(p_i))` (non-zero only), `TIN = 100 * exp(H) / l`
//! where `l` is the total number of sampled positions (including zero-coverage positions).
//!
//! Transcripts with fewer than `min_cov` UNIQUE read-start positions are assigned TIN=0.
//! The sample count `n` is halved while `n >= mRNA_len`; when `mRNA_len <= n`, all bases
//! are used. Exon boundary positions are always added to the position set (tin.py behaviour).
//!
//! ## Origin
//!
//! This crate is an independent Rust reimplementation of `tin.py` based on:
//! - Wang L, Wang S, Li W. "`RSeQC`: quality control of RNA-seq experiments."
//!   Bioinformatics. 2012;28(16):2184-5. doi:10.1093/bioinformatics/bts356
//! - Wang L, Nie J, Sicotte H, et al. "Measure transcript integrity using RNA-seq data."
//!   BMC Bioinformatics. 2016;17:58. doi:10.1186/s12859-016-0922-z
//! - The BED12 format specification
//! - Black-box behaviour testing against `RSeQC` `tin.py` 5.0.4
//!
//! No source code from the GPL-v3 upstream was used as reference during
//! implementation. Test fixtures are independently generated.
//!
//! License: MIT OR Apache-2.0.
//! Upstream credit: `RSeQC` <https://rseqc.sourceforge.net/> (GPL-v3).

#![allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]

use std::collections::HashSet;
use std::fs::File;
use std::path::Path;

use noodles::bam;
use noodles::sam::alignment::record::cigar::op::Kind;
use rsomics_common::{Result, RsomicsError};
use serde::Serialize;

/// One row in the per-transcript output (`.tin.xls`).
#[derive(Debug, Clone)]
pub struct TinOutput {
    pub gene_id: String,
    pub chrom: String,
    pub tx_start: i64,
    pub tx_end: i64,
    /// TIN score in [0, 100]. 0.0 means below minCov or zero coverage.
    pub tin: f64,
}

/// Sample-level summary statistics emitted to `.summary.txt`.
#[derive(Debug, Serialize)]
pub struct TinSummary {
    pub mean: f64,
    pub median: f64,
    pub stdev: f64,
}

/// One transcript parsed from BED12.
#[derive(Debug, Clone)]
struct Transcript {
    gene_id: String,
    chrom: String,
    /// 0-based, half-open `[tx_start, tx_end)`
    tx_start: i64,
    tx_end: i64,
    /// Exon blocks as 0-based half-open `[exon_start, exon_end)` in genomic coordinates.
    exons: Vec<(i64, i64)>,
}

impl Transcript {
    fn mrna_len(&self) -> i64 {
        self.exons.iter().map(|(s, e)| e - s).sum()
    }
}

/// Parse BED12 file into a list of transcripts.
fn parse_bed12(path: &Path) -> Result<Vec<Transcript>> {
    let content = std::fs::read_to_string(path)
        .map_err(|e| RsomicsError::Io(std::io::Error::other(format!("reading BED12: {e}"))))?;

    let mut transcripts = Vec::new();

    for line in content.lines() {
        if line.starts_with('#') || line.starts_with("track") || line.starts_with("browser") {
            continue;
        }
        let fields: Vec<&str> = line.split('\t').collect();
        if fields.len() < 12 {
            continue;
        }

        let chrom = fields[0].to_string();
        let Ok(tx_start) = fields[1].parse::<i64>() else {
            continue;
        };
        let Ok(tx_end) = fields[2].parse::<i64>() else {
            continue;
        };
        let gene_id = fields[3].to_string();

        let Ok(block_count) = fields[9].parse::<usize>() else {
            continue;
        };

        let block_sizes: Vec<i64> = fields[10]
            .trim_end_matches(',')
            .split(',')
            .filter(|s| !s.is_empty())
            .filter_map(|s| s.parse().ok())
            .collect();
        let block_starts: Vec<i64> = fields[11]
            .trim_end_matches(',')
            .split(',')
            .filter(|s| !s.is_empty())
            .filter_map(|s| s.parse().ok())
            .collect();

        if block_sizes.len() != block_count || block_starts.len() != block_count {
            continue;
        }

        let exons: Vec<(i64, i64)> = block_starts
            .iter()
            .zip(block_sizes.iter())
            .map(|(&rel_start, &size)| {
                let abs_start = tx_start + rel_start;
                (abs_start, abs_start + size)
            })
            .collect();

        if exons.is_empty() {
            continue;
        }

        transcripts.push(Transcript {
            gene_id,
            chrom,
            tx_start,
            tx_end,
            exons,
        });
    }

    Ok(transcripts)
}

/// Build the sampled genomic positions for a transcript, matching `tin.py` exactly.
///
/// Positions are 1-based genomic coordinates. Exon boundary positions are always
/// included in addition to the equally-spaced sample. When `mrna_len <= sample_size`,
/// every mRNA base is returned (all-bases mode).
///
/// Returns the sorted, deduplicated position list. Avoids materialising the full
/// per-base mRNA index list by walking exons arithmetically.
fn pick_positions(tx: &Transcript, sample_size: usize) -> Vec<i64> {
    #[allow(clippy::cast_sign_loss)]
    let mrna_len = tx.mrna_len() as usize;
    if mrna_len == 0 {
        return Vec::new();
    }

    // Collect exon boundary positions (1-based).
    let mut exon_bounds: Vec<i64> = Vec::with_capacity(tx.exons.len() * 2);
    for &(ex_start, ex_end) in &tx.exons {
        exon_bounds.push(ex_start + 1);
        exon_bounds.push(ex_end);
    }

    // Generate the equally-spaced sample without building the full per-base list.
    // tin.py: step = int(mRNA_size / sample_size); indx = range(0, len(gene_all_base), step)
    // gene_all_base[i] = genomic position of mRNA base at index i.
    // We compute gene_all_base[i] by walking exons: find which exon contains mRNA index i.
    let chose_bases: Vec<i64> = if mrna_len <= sample_size {
        // All-bases mode: every mRNA position in genomic order.
        let mut v: Vec<i64> = Vec::with_capacity(mrna_len);
        for &(ex_start, ex_end) in &tx.exons {
            for gpos in (ex_start + 1)..=ex_end {
                v.push(gpos);
            }
        }
        v
    } else {
        let step = mrna_len / sample_size;
        // Walk exons to map mRNA index i → genomic 1-based position.
        // Collect indices 0, step, 2*step, ... < mrna_len.
        let mut sampled: Vec<i64> = Vec::with_capacity(mrna_len / step + 1);
        let mut mrna_offset: usize = 0; // mRNA index of the start of the current exon
        let mut next_idx: usize = 0; // next mRNA sample index to collect

        for &(ex_start, ex_end) in &tx.exons {
            #[allow(clippy::cast_sign_loss)]
            let exon_len = (ex_end - ex_start) as usize;
            // Indices for this exon: [mrna_offset, mrna_offset + exon_len)
            // Sample indices in this range: next_idx, next_idx+step, ...
            while next_idx < mrna_offset + exon_len {
                let local = next_idx - mrna_offset; // offset within this exon
                #[allow(clippy::cast_possible_wrap)]
                let gpos = ex_start + 1 + local as i64; // 1-based genomic
                sampled.push(gpos);
                next_idx += step;
            }
            mrna_offset += exon_len;
        }
        sampled
    };

    // Merge exon bounds + sampled bases, deduplicate (first-seen wins), then sort.
    // Mirrors tin.py: sorted(uniqify(exon_bounds + chose_bases)).
    let mut seen: HashSet<i64> = HashSet::with_capacity(exon_bounds.len() + chose_bases.len());
    let mut merged: Vec<i64> = Vec::with_capacity(exon_bounds.len() + chose_bases.len());
    for pos in exon_bounds.iter().chain(chose_bases.iter()) {
        if seen.insert(*pos) {
            merged.push(*pos);
        }
    }
    merged.sort_unstable();
    merged
}

/// Shannon entropy `H = -sum(p_i * ln(p_i))` over non-zero values.
///
/// Returns 0.0 when all values are zero or the list is empty.
fn shannon_entropy(vals: &[f64]) -> f64 {
    let total: f64 = vals.iter().sum();
    if total == 0.0 {
        return 0.0;
    }
    vals.iter()
        .filter(|&&v| v > 0.0)
        .map(|&v| {
            let p = v / total;
            -p * p.ln()
        })
        .sum()
}

/// Compute TIN from a coverage vector and the total position count `l`.
///
/// Only non-zero coverage positions contribute to entropy; `l` is the denominator
/// (total sampled positions including zero-coverage ones), matching `tin.py`.
/// Returns 0.0 when total coverage is zero.
#[must_use]
pub fn compute_tin_score(mrna_cov: &[u32], n: usize) -> f64 {
    let mrna_len = mrna_cov.len();
    if mrna_len == 0 || n == 0 {
        return 0.0;
    }

    let step = mrna_len / n;
    if step == 0 {
        return 0.0;
    }

    let sample: Vec<f64> = (0..n).map(|i| f64::from(mrna_cov[i * step])).collect();
    let total: f64 = sample.iter().sum();
    if total == 0.0 {
        return 0.0;
    }
    let h = shannon_entropy(&sample);
    #[allow(clippy::cast_precision_loss)]
    let l = n as f64;
    100.0 * h.exp() / l
}

/// Choose the number of sample positions: halve `n` while `n >= mrna_len`.
///
/// From `tin.py` help: "if this number is larger than the length of mRNA (L),
/// it will be halved until it's smaller than L."
#[must_use]
pub fn effective_sample_size(mut n: usize, mrna_len: usize) -> usize {
    while n >= mrna_len {
        n /= 2;
        if n == 0 {
            return 0;
        }
    }
    n
}

/// Per-transcript accumulator used during the linear BAM sweep.
struct TxAccum {
    positions: Vec<i64>,         // sorted, 1-based sampled genomic positions
    coverage: Vec<f64>,          // coverage[i] corresponds to positions[i]
    unique_starts: HashSet<i64>, // 0-based read starts within [tx_start, tx_end)
}

#[allow(clippy::too_many_lines)]
/// Compute TIN for all transcripts in a BED12 from a sorted, indexed BAM.
///
/// Uses a single sequential BAM scan rather than per-transcript indexed seeks,
/// avoiding repeated BGZF-decompress cycles when transcripts are dense.
///
/// # Parameters
///
/// - `bam_path`: sorted + indexed BAM (`.bai` index must exist).
/// - `bed_path`: BED12 gene model.
/// - `min_cov`: minimum unique read-start positions for a transcript (below → TIN=0). Default 10.
/// - `sample_size`: initial number of equally-spaced mRNA positions to sample. Default 100.
pub fn compute_tin(
    bam_path: &Path,
    bed_path: &Path,
    min_cov: u64,
    sample_size: usize,
) -> Result<Vec<TinOutput>> {
    let transcripts = parse_bed12(bed_path)?;

    // Group transcripts by chromosome in BAM order.
    // Within each chromosome, sort by tx_start for sweep-line ordering.
    // We preserve the original index so we can emit results in BED12 order.
    let mut tx_by_idx: Vec<(usize, &Transcript)> = transcripts.iter().enumerate().collect();
    tx_by_idx.sort_unstable_by(|a, b| {
        a.1.chrom
            .cmp(&b.1.chrom)
            .then(a.1.tx_start.cmp(&b.1.tx_start))
    });

    // Pre-compute sampled positions for every transcript.
    let positions_vec: Vec<Vec<i64>> = transcripts
        .iter()
        .map(|tx| pick_positions(tx, sample_size))
        .collect();

    // Accumulator: one entry per transcript (in BED12 order, indexed by original index).
    let mut accums: Vec<TxAccum> = positions_vec
        .iter()
        .map(|pos| TxAccum {
            coverage: vec![0.0; pos.len()],
            positions: pos.clone(),
            unique_starts: HashSet::new(),
        })
        .collect();

    // Open BAM for sequential scan (no index needed for the linear pass).
    let file = File::open(bam_path)
        .map_err(|e| RsomicsError::InvalidInput(format!("{}: {e}", bam_path.display())))?;
    let mut reader = bam::io::Reader::new(file);
    let header = reader.read_header().map_err(RsomicsError::Io)?;

    // Build a chromosome → sorted list of (tx_start, tx_end, original_tx_index) mapping.
    // For each BAM record we'll find overlapping transcripts.
    let mut chrom_txs: std::collections::HashMap<String, Vec<(i64, i64, usize)>> =
        std::collections::HashMap::new();
    for (orig_idx, tx) in transcripts.iter().enumerate() {
        chrom_txs
            .entry(tx.chrom.clone())
            .or_default()
            .push((tx.tx_start, tx.tx_end, orig_idx));
    }
    // Sort each chrom's list by tx_start for binary-search lookup.
    for v in chrom_txs.values_mut() {
        v.sort_unstable_by_key(|&(s, _, _)| s);
    }

    let mut record = bam::Record::default();
    loop {
        match reader.read_record(&mut record) {
            Ok(0) => break,
            Ok(_) => {}
            Err(e) => return Err(RsomicsError::Io(e)),
        }

        let flags = record.flags();
        if flags.is_unmapped() || flags.is_secondary() || flags.is_qc_fail() {
            continue;
        }

        let start_1based: i64 = match record.alignment_start() {
            Some(Ok(pos)) => {
                #[allow(clippy::cast_possible_wrap)]
                let v = usize::from(pos) as i64;
                v
            }
            _ => continue,
        };
        let start_0based = start_1based - 1;

        // Determine reference name from the record's reference sequence ID.
        let Some(Ok(ref_id)) = record.reference_sequence_id() else {
            continue;
        };
        let chrom: &str = match header.reference_sequences().get_index(ref_id) {
            Some((name, _)) => match std::str::from_utf8(name.as_ref()) {
                Ok(s) => s,
                Err(_) => continue,
            },
            None => continue,
        };

        let Some(tx_list) = chrom_txs.get(chrom) else {
            continue;
        };

        // Compute read end from CIGAR (reference-consuming length).
        let cigar = record.cigar();
        let read_ref_len: i64 = cigar
            .iter()
            .filter_map(std::result::Result::ok)
            .map(|op| match op.kind() {
                Kind::Match
                | Kind::SequenceMatch
                | Kind::SequenceMismatch
                | Kind::Deletion
                | Kind::Skip => {
                    #[allow(clippy::cast_possible_wrap)]
                    {
                        op.len() as i64
                    }
                }
                _ => 0,
            })
            .sum();
        // Find transcripts overlapping [start_0based, start_0based + read_ref_len) using
        // binary search into the chromosome-sorted transcript list.
        // A transcript overlaps the read if tx_start < search_end AND tx_end > start_0based.
        let search_end = start_0based + read_ref_len; // exclusive 0-based read end
        // First tx whose tx_start < search_end: leftmost tx to consider.
        // We want tx_start < search_end (tx begins before read ends).
        // All tx with tx_end > start_0based AND tx_start < search_end overlap.
        let first_candidate = tx_list.partition_point(|&(s, _, _)| s < start_0based);
        // Walk back to find transcripts that started before start_0based but extend past it.
        // In a sorted list we might miss transcripts that started before but overlap.
        // Simpler: scan all with tx_start < search_end (upper bound), check tx_end > start_0based.
        let upper = tx_list.partition_point(|&(s, _, _)| s < search_end);

        // Determine lower: we need tx_start such that tx could still overlap.
        // Since BAM is sorted and we process records in order, transcripts whose tx_end <=
        // start_0based are already fully processed. However we can't prune easily without
        // tracking "active" set. Use a scan from 0..upper with tx_end > start_0based filter.
        // For typical gene models (not insanely long transcripts) this is fast.
        // Use binary search: find last tx whose tx_end could overlap.
        // Actually just scan 0..upper and skip non-overlapping:
        let lower = if first_candidate > 0 {
            // Look back for transcripts that start before start_0based but may overlap
            let mut lo = first_candidate;
            while lo > 0 && tx_list[lo - 1].1 > start_0based {
                lo -= 1;
            }
            lo
        } else {
            0
        };

        for &(tx_start, tx_end, orig_idx) in &tx_list[lower..upper] {
            if tx_end <= start_0based {
                continue; // no overlap
            }
            // This transcript overlaps the read.
            let accum = &mut accums[orig_idx];
            let positions = &accum.positions;

            if positions.is_empty() {
                continue;
            }

            // min_reads: track unique read starts within [tx_start, tx_end).
            if start_0based >= tx_start && start_0based < tx_end {
                accum.unique_starts.insert(start_0based);
            }

            // Coverage: walk CIGAR match intervals, find sampled positions within each.
            let last_pos = *positions.last().unwrap();
            #[allow(clippy::cast_sign_loss)]
            let mut ref_cursor = start_1based as usize;
            for op_result in cigar.iter() {
                let Ok(op) = op_result else { break };
                let len = op.len();
                match op.kind() {
                    Kind::Match | Kind::SequenceMatch | Kind::SequenceMismatch => {
                        #[allow(clippy::cast_possible_wrap)]
                        let block_start = ref_cursor as i64;
                        #[allow(clippy::cast_possible_wrap)]
                        let block_end = block_start + len as i64;
                        if block_start > last_pos {
                            break;
                        }
                        let lo = positions.partition_point(|&p| p < block_start);
                        let hi = positions.partition_point(|&p| p < block_end);
                        for idx in lo..hi {
                            accum.coverage[idx] += 1.0;
                        }
                        ref_cursor += len;
                    }
                    Kind::Deletion | Kind::Skip => {
                        ref_cursor += len;
                    }
                    Kind::SoftClip | Kind::HardClip | Kind::Pad | Kind::Insertion => {}
                }
            }
        }
    }

    // Compute TIN from accumulated coverage.
    let mut results: Vec<Option<TinOutput>> = (0..transcripts.len()).map(|_| None).collect();
    for (orig_idx, tx) in transcripts.iter().enumerate() {
        let accum = &accums[orig_idx];
        let positions = &accum.positions;

        let tin = if positions.is_empty() || accum.unique_starts.len() as u64 <= min_cov {
            0.0
        } else {
            let h = shannon_entropy(&accum.coverage);
            if h == 0.0 {
                0.0
            } else {
                #[allow(clippy::cast_precision_loss)]
                let l_f = positions.len() as f64;
                100.0 * h.exp() / l_f
            }
        };

        eprint!(" {} transcripts finished", orig_idx + 1);
        results[orig_idx] = Some(TinOutput {
            gene_id: tx.gene_id.clone(),
            chrom: tx.chrom.clone(),
            tx_start: tx.tx_start,
            tx_end: tx.tx_end,
            tin,
        });
    }

    eprintln!();
    Ok(results.into_iter().map(|r| r.unwrap()).collect())
}

/// Compute sample-level summary statistics over non-zero TIN scores.
///
/// Mirrors `tin.py`: only transcripts with TIN > 0 contribute to mean/median/stdev.
/// Standard deviation uses the population formula (divide by N, not N-1).
#[must_use]
pub fn summarise(outputs: &[TinOutput]) -> TinSummary {
    let nonzero: Vec<f64> = outputs
        .iter()
        .filter(|r| r.tin > 0.0)
        .map(|r| r.tin)
        .collect();

    if nonzero.is_empty() {
        return TinSummary {
            mean: 0.0,
            median: 0.0,
            stdev: 0.0,
        };
    }

    #[allow(clippy::cast_precision_loss)]
    let n = nonzero.len() as f64;
    let mean = nonzero.iter().sum::<f64>() / n;

    let mut sorted = nonzero.clone();
    sorted.sort_by(|a, b| a.partial_cmp(b).unwrap());
    let median = if sorted.len() % 2 == 1 {
        sorted[sorted.len() / 2]
    } else {
        let mid = sorted.len() / 2;
        // Average of the two middle values for even-length arrays
        sorted[mid - 1] / 2.0 + sorted[mid] / 2.0
    };

    // Population standard deviation (divides by N, matching numpy default)
    let variance = nonzero.iter().map(|&x| (x - mean).powi(2)).sum::<f64>() / n;
    let stdev = variance.sqrt();

    TinSummary {
        mean,
        median,
        stdev,
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn tin_uniform_100_positions() {
        // All 100 positions have coverage 10 → p_i = 1/100 → H = ln(100) → TIN = 100
        let cov: Vec<u32> = vec![10; 100];
        let tin = compute_tin_score(&cov, 100);
        assert!(
            (tin - 100.0).abs() < 1e-10,
            "expected TIN=100 for uniform, got {tin}"
        );
    }

    #[test]
    fn tin_single_spike() {
        // All coverage at one position → H = 0 → TIN = exp(0)/n * 100 = 1.0
        let mut cov = vec![0u32; 100];
        cov[50] = 20;
        let tin = compute_tin_score(&cov, 100);
        assert!(
            (tin - 1.0).abs() < 1e-10,
            "expected TIN≈1 for spike, got {tin}"
        );
    }

    #[test]
    fn tin_zero_coverage() {
        let cov = vec![0u32; 100];
        let tin = compute_tin_score(&cov, 100);
        assert!(tin == 0.0, "expected 0.0, got {tin}");
    }

    #[test]
    fn effective_sample_size_no_halving() {
        // n=100, mrna_len=1000: 100 < 1000, no change
        assert_eq!(effective_sample_size(100, 1000), 100);
    }

    #[test]
    fn effective_sample_size_halving_once() {
        // n=100, mrna_len=100: 100 >= 100 → halve to 50 → 50 < 100
        assert_eq!(effective_sample_size(100, 100), 50);
    }

    #[test]
    fn effective_sample_size_halving_twice() {
        // n=100, mrna_len=50: 100≥50 → 50≥50 → 25 < 50
        assert_eq!(effective_sample_size(100, 50), 25);
    }

    #[test]
    fn parse_bed12_single_exon() {
        use std::io::Write;
        let mut f = tempfile::NamedTempFile::new().unwrap();
        writeln!(f, "chr1\t0\t1000\ttx1\t0\t+\t0\t1000\t0\t1\t1000,\t0,").unwrap();
        let txs = parse_bed12(f.path()).unwrap();
        assert_eq!(txs.len(), 1);
        assert_eq!(txs[0].mrna_len(), 1000);
        assert_eq!(txs[0].exons, vec![(0, 1000)]);
    }

    #[test]
    fn parse_bed12_multi_exon() {
        use std::io::Write;
        let mut f = tempfile::NamedTempFile::new().unwrap();
        // 3 exons: offsets 0,200,300 sizes 100,150,200 → absolute: [100,200],[300,450],[400,600]
        writeln!(
            f,
            "chr1\t100\t600\ttx2\t0\t+\t100\t600\t0\t3\t100,150,200,\t0,200,300,"
        )
        .unwrap();
        let txs = parse_bed12(f.path()).unwrap();
        assert_eq!(txs[0].mrna_len(), 450);
        assert_eq!(txs[0].exons, vec![(100, 200), (300, 450), (400, 600)]);
    }

    #[test]
    fn pick_positions_single_exon() {
        // tx: chr1:0-1000, one exon [0,1000), mrna_len=1000, sample_size=100
        // step=10, gene_all_base=[1..=1000], indx=[0,10,...,990]
        // chose_bases=[1,11,...,991], exon_bounds=[1,1000]
        // uniqify([1,1000]+[1,11,...,991])=[1,1000,11,...,991] → sorted=[1,11,...,991,1000]
        let tx = Transcript {
            gene_id: "tx".into(),
            chrom: "chr1".into(),
            tx_start: 0,
            tx_end: 1000,
            exons: vec![(0, 1000)],
        };
        let positions = pick_positions(&tx, 100);
        assert_eq!(positions.len(), 101, "expected 101 positions");
        assert_eq!(positions[0], 1, "first position should be 1 (1-based)");
        assert_eq!(
            *positions.last().unwrap(),
            1000,
            "last position should be 1000"
        );
    }

    #[test]
    fn summarise_nonzero_only() {
        let outputs = vec![
            TinOutput {
                gene_id: "a".into(),
                chrom: "chr1".into(),
                tx_start: 0,
                tx_end: 100,
                tin: 90.0,
            },
            TinOutput {
                gene_id: "b".into(),
                chrom: "chr1".into(),
                tx_start: 0,
                tx_end: 100,
                tin: 0.0,
            },
            TinOutput {
                gene_id: "c".into(),
                chrom: "chr1".into(),
                tx_start: 0,
                tx_end: 100,
                tin: 80.0,
            },
        ];
        let s = summarise(&outputs);
        assert!((s.mean - 85.0).abs() < 1e-10, "mean = {}", s.mean);
        assert!((s.median - 85.0).abs() < 1e-10, "median = {}", s.median);
        // pop stdev of [90, 80]: sqrt(((90-85)^2 + (80-85)^2) / 2) = sqrt(25) = 5
        assert!((s.stdev - 5.0).abs() < 1e-10, "stdev = {}", s.stdev);
    }
}