sciforge 0.0.3

A comprehensive scientific computing library in pure Rust with zero dependencies
Documentation
pub fn pwm_score(pwm: &[Vec<f64>], sequence: &[u8]) -> f64 {
    let n = pwm.len().min(sequence.len());
    let mut score = 0.0;
    for i in 0..n {
        let idx = base_to_index(sequence[i]);
        if idx < pwm[i].len() {
            score += pwm[i][idx];
        }
    }
    score
}

pub fn pwm_scan(pwm: &[Vec<f64>], sequence: &[u8], threshold: f64) -> Vec<(usize, f64)> {
    let motif_len = pwm.len();
    let mut hits = Vec::new();
    if sequence.len() < motif_len {
        return hits;
    }
    for i in 0..=sequence.len() - motif_len {
        let score = pwm_score(pwm, &sequence[i..i + motif_len]);
        if score >= threshold {
            hits.push((i, score));
        }
    }
    hits
}

pub fn information_content(pwm: &[Vec<f64>]) -> Vec<f64> {
    pwm.iter()
        .map(|col| {
            let max_bits = 2.0_f64;
            let entropy: f64 = col
                .iter()
                .filter(|&&p| p > 0.0)
                .map(|&p| -p * p.log2())
                .sum();
            max_bits - entropy
        })
        .collect()
}

pub fn total_information(pwm: &[Vec<f64>]) -> f64 {
    information_content(pwm).iter().sum()
}

pub fn consensus_sequence(pwm: &[Vec<f64>]) -> String {
    let bases = [b'A', b'C', b'G', b'T'];
    pwm.iter()
        .map(|col| {
            let max_idx = col
                .iter()
                .enumerate()
                .max_by(|a, b| a.1.partial_cmp(b.1).unwrap_or(std::cmp::Ordering::Equal))
                .map(|(i, _)| i)
                .unwrap_or(0);
            if max_idx < bases.len() {
                bases[max_idx] as char
            } else {
                'N'
            }
        })
        .collect()
}

pub fn frequency_matrix(sequences: &[&[u8]], length: usize) -> Vec<Vec<f64>> {
    let n = sequences.len() as f64;
    let mut matrix = vec![vec![0.0; 4]; length];
    for seq in sequences {
        for (i, &base) in seq.iter().enumerate() {
            if i >= length {
                break;
            }
            let idx = base_to_index(base);
            if idx < 4 {
                matrix[i][idx] += 1.0;
            }
        }
    }
    for col in matrix.iter_mut() {
        for val in col.iter_mut() {
            *val /= n;
        }
    }
    matrix
}

fn base_to_index(base: u8) -> usize {
    match base {
        b'A' | b'a' => 0,
        b'C' | b'c' => 1,
        b'G' | b'g' => 2,
        b'T' | b't' => 3,
        _ => 4,
    }
}