rsomics-tree-tipdist 0.1.0

Patristic tip-to-tip distance matrix from a phylogenetic tree (sum of branch lengths between every pair of tips) — scikit-bio TreeNode.cophenet equivalent, byte-exact TSV
Documentation
use std::fmt::Write as _;

/// Append an f64 formatted as CPython `repr` / numpy `str` does, so the emitted
/// TSV is byte-identical to skbio's DistanceMatrix.write output.
///
/// CPython renders the shortest correctly-rounded digit string, fixed-point
/// when the decimal exponent is in -4..16 and `e`-form otherwise. Rust's `{:e}`
/// already yields the shortest round-tripping digits in one pass; the only
/// residual divergence from CPython is the last digit of an exact halfway tie,
/// which can occur above ~7.5e13 — outside the range any patristic distance
/// reaches. We render the digits with CPython's fixed/scientific layout.
pub fn push_repr(out: &mut String, x: f64) {
    if x.is_nan() {
        out.push_str("nan");
        return;
    }
    if x.is_infinite() {
        out.push_str(if x < 0.0 { "-inf" } else { "inf" });
        return;
    }

    let neg = x.is_sign_negative();
    let mag = x.abs();
    if mag == 0.0 {
        out.push_str(if neg { "-0.0" } else { "0.0" });
        return;
    }

    let mut sci = String::new();
    let _ = write!(sci, "{mag:e}");
    let epos = sci.find('e').unwrap();
    let exp: i32 = sci[epos + 1..].parse().unwrap();
    let mut digits = sci;
    digits.truncate(epos);
    if let Some(dot) = digits.find('.') {
        digits.remove(dot);
    }
    let digits = digits.trim_end_matches('0');
    let digits = if digits.is_empty() { "0" } else { digits };
    let ndigits = digits.len() as i32;

    if neg {
        out.push('-');
    }
    if (-4..16).contains(&exp) {
        fixed(out, digits, ndigits, exp);
    } else {
        scientific(out, digits, exp);
    }
}

fn fixed(out: &mut String, digits: &str, ndigits: i32, exp: i32) {
    if exp >= 0 {
        if exp + 1 >= ndigits {
            out.push_str(digits);
            for _ in 0..(exp + 1 - ndigits) {
                out.push('0');
            }
            out.push_str(".0");
        } else {
            let split = (exp + 1) as usize;
            out.push_str(&digits[..split]);
            out.push('.');
            out.push_str(&digits[split..]);
        }
    } else {
        out.push_str("0.");
        for _ in 0..(-exp - 1) {
            out.push('0');
        }
        out.push_str(digits);
    }
}

fn scientific(out: &mut String, digits: &str, exp: i32) {
    out.push_str(&digits[..1]);
    if digits.len() > 1 {
        out.push('.');
        out.push_str(&digits[1..]);
    }
    out.push('e');
    out.push(if exp < 0 { '-' } else { '+' });
    let _ = write!(out, "{:02}", exp.abs());
}

#[cfg(test)]
fn py_repr(x: f64) -> String {
    let mut s = String::new();
    push_repr(&mut s, x);
    s
}

#[cfg(test)]
mod tests {
    use super::py_repr;

    #[test]
    fn matches_cpython_repr() {
        let cases = [
            (0.0, "0.0"),
            (-0.0, "-0.0"),
            (3.0, "3.0"),
            (7.0, "7.0"),
            (5.1, "5.1"),
            (100.0, "100.0"),
            (0.1, "0.1"),
            (0.0001, "0.0001"),
            (1e-5, "1e-05"),
            (1.234e-5, "1.234e-05"),
            (1e16, "1e+16"),
            (1.5e16, "1.5e+16"),
            (1000000000000000.0, "1000000000000000.0"),
            (0.3333333333333333, "0.3333333333333333"),
            (0.30000000000000004, "0.30000000000000004"),
            (2.0500000000000003, "2.0500000000000003"),
            (-2.5, "-2.5"),
        ];
        for (x, want) in cases {
            assert_eq!(py_repr(x), want, "py_repr({x})");
        }
    }
}