sciforge 0.0.3 - Docs.rs

pub const BLOSUM62: [[i8; 20]; 20] = [
    [
        4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0,
    ],
    [
        -1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3,
    ],
    [
        -2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3,
    ],
    [
        -2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3,
    ],
    [
        0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1,
    ],
    [
        -1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2,
    ],
    [
        -1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2,
    ],
    [
        0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3,
    ],
    [
        -2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3,
    ],
    [
        -1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3,
    ],
    [
        -1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1,
    ],
    [
        -1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2,
    ],
    [
        -1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1,
    ],
    [
        -2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1,
    ],
    [
        -1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2,
    ],
    [
        1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2,
    ],
    [
        0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0,
    ],
    [
        -3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3,
    ],
    [
        -2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1,
    ],
    [
        0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4,
    ],
];

pub fn aa_to_index(aa: u8) -> Option<usize> {
    match aa {
        b'A' => Some(0),
        b'R' => Some(1),
        b'N' => Some(2),
        b'D' => Some(3),
        b'C' => Some(4),
        b'Q' => Some(5),
        b'E' => Some(6),
        b'G' => Some(7),
        b'H' => Some(8),
        b'I' => Some(9),
        b'L' => Some(10),
        b'K' => Some(11),
        b'M' => Some(12),
        b'F' => Some(13),
        b'P' => Some(14),
        b'S' => Some(15),
        b'T' => Some(16),
        b'W' => Some(17),
        b'Y' => Some(18),
        b'V' => Some(19),
        _ => None,
    }
}

pub fn blosum62_score(a: u8, b: u8) -> i8 {
    match (aa_to_index(a), aa_to_index(b)) {
        (Some(i), Some(j)) => BLOSUM62[i][j],
        _ => 0,
    }
}

pub fn needleman_wunsch(seq_a: &[u8], seq_b: &[u8], gap_penalty: i32) -> (Vec<u8>, Vec<u8>, i32) {
    let m = seq_a.len();
    let n = seq_b.len();
    let mut score = vec![vec![0i32; n + 1]; m + 1];

    for (i, si) in score.iter_mut().enumerate() {
        si[0] = gap_penalty * i as i32;
    }
    for (j, s0j) in score[0].iter_mut().enumerate() {
        *s0j = gap_penalty * j as i32;
    }

    for i in 1..=m {
        for j in 1..=n {
            let match_score =
                score[i - 1][j - 1] + blosum62_score(seq_a[i - 1], seq_b[j - 1]) as i32;
            let delete = score[i - 1][j] + gap_penalty;
            let insert = score[i][j - 1] + gap_penalty;
            score[i][j] = match_score.max(delete).max(insert);
        }
    }

    let mut align_a = Vec::new();
    let mut align_b = Vec::new();
    let mut i = m;
    let mut j = n;
    while i > 0 || j > 0 {
        if i > 0
            && j > 0
            && score[i][j]
                == score[i - 1][j - 1] + blosum62_score(seq_a[i - 1], seq_b[j - 1]) as i32
        {
            align_a.push(seq_a[i - 1]);
            align_b.push(seq_b[j - 1]);
            i -= 1;
            j -= 1;
        } else if i > 0 && score[i][j] == score[i - 1][j] + gap_penalty {
            align_a.push(seq_a[i - 1]);
            align_b.push(b'-');
            i -= 1;
        } else {
            align_a.push(b'-');
            align_b.push(seq_b[j - 1]);
            j -= 1;
        }
    }
    align_a.reverse();
    align_b.reverse();
    (align_a, align_b, score[m][n])
}

pub fn smith_waterman(seq_a: &[u8], seq_b: &[u8], gap_penalty: i32) -> (Vec<u8>, Vec<u8>, i32) {
    let m = seq_a.len();
    let n = seq_b.len();
    let mut score = vec![vec![0i32; n + 1]; m + 1];
    let mut max_score = 0;
    let mut max_i = 0;
    let mut max_j = 0;

    for i in 1..=m {
        for j in 1..=n {
            let match_s = score[i - 1][j - 1] + blosum62_score(seq_a[i - 1], seq_b[j - 1]) as i32;
            let delete = score[i - 1][j] + gap_penalty;
            let insert = score[i][j - 1] + gap_penalty;
            score[i][j] = 0_i32.max(match_s).max(delete).max(insert);
            if score[i][j] > max_score {
                max_score = score[i][j];
                max_i = i;
                max_j = j;
            }
        }
    }

    let mut align_a = Vec::new();
    let mut align_b = Vec::new();
    let mut i = max_i;
    let mut j = max_j;
    while i > 0 && j > 0 && score[i][j] > 0 {
        if score[i][j] == score[i - 1][j - 1] + blosum62_score(seq_a[i - 1], seq_b[j - 1]) as i32 {
            align_a.push(seq_a[i - 1]);
            align_b.push(seq_b[j - 1]);
            i -= 1;
            j -= 1;
        } else if score[i][j] == score[i - 1][j] + gap_penalty {
            align_a.push(seq_a[i - 1]);
            align_b.push(b'-');
            i -= 1;
        } else {
            align_a.push(b'-');
            align_b.push(seq_b[j - 1]);
            j -= 1;
        }
    }
    align_a.reverse();
    align_b.reverse();
    (align_a, align_b, max_score)
}

pub fn alignment_identity(align_a: &[u8], align_b: &[u8]) -> f64 {
    let matches = align_a
        .iter()
        .zip(align_b.iter())
        .filter(|&(&a, &b)| a != b'-' && b != b'-' && a == b)
        .count();
    let total = align_a.len();
    if total == 0 {
        return 0.0;
    }
    matches as f64 / total as f64
}

pub fn affine_gap_needleman_wunsch(
    seq_a: &[u8],
    seq_b: &[u8],
    gap_open: i32,
    gap_extend: i32,
) -> (Vec<u8>, Vec<u8>, i32) {
    let m = seq_a.len();
    let n = seq_b.len();
    let neg_inf = i32::MIN / 2;

    let mut m_mat = vec![vec![neg_inf; n + 1]; m + 1];
    let mut ix = vec![vec![neg_inf; n + 1]; m + 1];
    let mut iy = vec![vec![neg_inf; n + 1]; m + 1];

    m_mat[0][0] = 0;
    for (i, ixi) in ix.iter_mut().enumerate().skip(1) {
        ixi[0] = gap_open + (i as i32 - 1) * gap_extend;
    }
    for (j, iy0j) in iy[0].iter_mut().enumerate().skip(1) {
        *iy0j = gap_open + (j as i32 - 1) * gap_extend;
    }

    for i in 1..=m {
        for j in 1..=n {
            let s = blosum62_score(seq_a[i - 1], seq_b[j - 1]) as i32;
            m_mat[i][j] = s + m_mat[i - 1][j - 1]
                .max(ix[i - 1][j - 1])
                .max(iy[i - 1][j - 1]);
            ix[i][j] = (m_mat[i - 1][j] + gap_open).max(ix[i - 1][j] + gap_extend);
            iy[i][j] = (m_mat[i][j - 1] + gap_open).max(iy[i][j - 1] + gap_extend);
        }
    }

    let final_score = m_mat[m][n].max(ix[m][n]).max(iy[m][n]);
    let mut align_a = Vec::new();
    let mut align_b = Vec::new();
    let mut i = m;
    let mut j = n;
    let mut state = if m_mat[m][n] >= ix[m][n] && m_mat[m][n] >= iy[m][n] {
        0
    } else if ix[m][n] >= iy[m][n] {
        1
    } else {
        2
    };

    while i > 0 || j > 0 {
        match state {
            0 => {
                if i == 0 || j == 0 {
                    break;
                }
                align_a.push(seq_a[i - 1]);
                align_b.push(seq_b[j - 1]);
                let prev = m_mat[i - 1][j - 1]
                    .max(ix[i - 1][j - 1])
                    .max(iy[i - 1][j - 1]);
                if i > 0 && j > 0 && prev == ix[i - 1][j - 1] {
                    state = 1;
                } else if i > 0 && j > 0 && prev == iy[i - 1][j - 1] {
                    state = 2;
                }
                i -= 1;
                j -= 1;
            }
            1 => {
                if i == 0 {
                    break;
                }
                align_a.push(seq_a[i - 1]);
                align_b.push(b'-');
                if ix[i][j] == ix[i - 1][j] + gap_extend {
                    state = 1;
                } else {
                    state = 0;
                }
                i -= 1;
            }
            _ => {
                if j == 0 {
                    break;
                }
                align_a.push(b'-');
                align_b.push(seq_b[j - 1]);
                if iy[i][j] == iy[i][j - 1] + gap_extend {
                    state = 2;
                } else {
                    state = 0;
                }
                j -= 1;
            }
        }
    }
    align_a.reverse();
    align_b.reverse();
    (align_a, align_b, final_score)
}

pub fn multiple_alignment_score(alignment: &[Vec<u8>]) -> i32 {
    if alignment.len() < 2 {
        return 0;
    }
    let cols = alignment.iter().map(|s| s.len()).min().unwrap_or(0);
    let mut total = 0i32;
    for col in 0..cols {
        for i in 0..alignment.len() {
            for j in (i + 1)..alignment.len() {
                let a = alignment[i][col];
                let b = alignment[j][col];
                if a != b'-' && b != b'-' {
                    total += blosum62_score(a, b) as i32;
                }
            }
        }
    }
    total
}

pub fn pairwise_distance(align_a: &[u8], align_b: &[u8]) -> f64 {
    let mut mismatches = 0;
    let mut compared = 0;
    for (&a, &b) in align_a.iter().zip(align_b.iter()) {
        if a != b'-' && b != b'-' {
            compared += 1;
            if a != b {
                mismatches += 1;
            }
        }
    }
    if compared == 0 {
        return 1.0;
    }
    mismatches as f64 / compared as f64
}

pub fn jukes_cantor_distance(p_distance: f64) -> f64 {
    if p_distance >= 0.75 {
        return f64::INFINITY;
    }
    -0.75 * (1.0 - 4.0 * p_distance / 3.0).ln()
}

pub fn gap_fraction(aligned: &[u8]) -> f64 {
    if aligned.is_empty() {
        return 0.0;
    }
    let gaps = aligned.iter().filter(|&&b| b == b'-').count();
    gaps as f64 / aligned.len() as f64
}