pub mod kernel;
pub mod batch;
pub mod bam;
pub mod gpu_dispatcher;
pub mod gpu_kernels;
pub mod cuda_kernels;
pub mod cuda_kernels_rtc;
pub mod cuda_device_context;
pub mod cuda_runtime;
pub mod kernel_compiler;
pub mod kernel_launcher;
pub mod smith_waterman_cuda;
pub mod hmmer3_parser;
pub mod hmm_multiformat;
pub mod simd_viterbi;
pub mod profile_dp;
pub mod gpu_memory;
pub mod cigar_gen;
pub mod gpu_executor;
pub mod gpu_halo_buffer;
pub mod gpu_tiling_strategy;
pub use bam::{BamFile, BamRecord};
pub use gpu_dispatcher::{GpuDispatcher, GpuAvailability, AlignmentStrategy, GpuDeviceInfo};
pub use cuda_device_context::CudaDeviceContext;
pub use cuda_runtime::{GpuRuntime, GpuBuffer};
pub use kernel_compiler::{KernelCompiler, KernelType, CompiledKernel, KernelCache};
pub use kernel_launcher::{SmithWatermanKernel, NeedlemanWunschKernel, KernelExecutionResult};
pub use smith_waterman_cuda::SmithWatermanCudaKernel;
pub use hmmer3_parser::{HmmerModel, HmmerError, KarlinParameters};
pub use hmm_multiformat::{MultiFormatHmmParser, UniversalHmmProfile, HmmFormat, HmmParser};
pub use simd_viterbi::{ViterbiDecoder, ViterbiPath};
pub use profile_dp::{Pssm, ProfileAlignment, align_profiles};
pub use gpu_memory::{GpuMemoryPool, MultiGpuMemory, MemoryAllocation};
pub use cigar_gen::{CigarString, CigarOp, traceback_to_cigar};
pub use gpu_executor::GpuExecutor;
pub use gpu_halo_buffer::{HaloBufferManager, HaloTile, HaloConfig};
pub use gpu_tiling_strategy::{GpuTilingStrategy, TilingProfile, TilingStats};
use crate::error::{Error, Result};
use crate::protein::{Protein, AminoAcid};
use crate::scoring::{ScoringMatrix, AffinePenalty};
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct Cigar {
operations: Vec<(u32, CigarOp)>,
}
impl Cigar {
pub fn new() -> Self {
Cigar {
operations: Vec::new(),
}
}
pub fn push(&mut self, count: u32, op: CigarOp) {
if count == 0 {
return;
}
if let Some((last_count, last_op)) = self.operations.last_mut() {
if *last_op == op {
*last_count += count;
return;
}
}
self.operations.push((count, op));
}
pub fn coalesce(&mut self) {
let mut result = Vec::new();
for (count, op) in self.operations.drain(..) {
if let Some((last_count, last_op)) = result.last_mut() {
if *last_op == op {
*last_count += count;
continue;
}
}
result.push((count, op));
}
self.operations = result;
}
pub fn operations(&self) -> &[(u32, CigarOp)] {
&self.operations
}
pub fn query_length(&self) -> u32 {
self.operations
.iter()
.filter_map(|(count, op)| match op {
CigarOp::Match | CigarOp::Insertion | CigarOp::SeqMatch | CigarOp::SeqMismatch => Some(count),
_ => None,
})
.sum::<u32>()
}
pub fn reference_length(&self) -> u32 {
self.operations
.iter()
.filter_map(|(count, op)| match op {
CigarOp::Match | CigarOp::Deletion | CigarOp::Skip | CigarOp::SeqMatch | CigarOp::SeqMismatch => Some(count),
_ => None,
})
.sum::<u32>()
}
}
impl Default for Cigar {
fn default() -> Self {
Self::new()
}
}
impl std::fmt::Display for Cigar {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
for (count, op) in &self.operations {
write!(f, "{}{}", count, op)?;
}
Ok(())
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SamRecord {
pub qname: String,
pub query_seq: String,
pub query_qual: String,
pub rname: String,
pub reference_seq: String,
pub pos: u32,
pub mapq: u8,
pub cigar: String,
pub flag: u16,
pub optional_fields: Vec<String>,
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct SamHeader {
pub version: String,
pub sort_order: String,
pub references: Vec<(String, u32)>,
pub program: Option<String>,
}
impl SamHeader {
pub fn new(version: &str) -> Self {
SamHeader {
version: version.to_string(),
sort_order: "unsorted".to_string(),
references: Vec::new(),
program: None,
}
}
pub fn with_sort_order(mut self, order: &str) -> Self {
self.sort_order = order.to_string();
self
}
pub fn add_reference(&mut self, name: &str, length: u32) {
self.references.push((name.to_string(), length));
}
pub fn with_program(mut self, program: &str) -> Self {
self.program = Some(program.to_string());
self
}
pub fn to_header_lines(&self) -> Vec<String> {
let mut lines = Vec::new();
let mut hd = format!("@HD\tVN:{}", self.version);
if self.sort_order != "unsorted" {
hd.push_str(&format!("\tSO:{}", self.sort_order));
}
lines.push(hd);
if let Some(ref prog) = self.program {
lines.push(format!("@PG\tID:omics-simd\tPN:omics-simd\tVN:{}", prog));
}
for (name, length) in &self.references {
lines.push(format!("@SQ\tSN:{}\tLN:{}", name, length));
}
lines
}
}
impl SamRecord {
pub fn new() -> Self {
SamRecord {
qname: "query".to_string(),
query_seq: String::new(),
query_qual: String::new(),
rname: "reference".to_string(),
reference_seq: String::new(),
pos: 0,
mapq: 60,
cigar: String::new(),
flag: 0,
optional_fields: Vec::new(),
}
}
pub fn from_alignment(
result: &AlignmentResult,
query_name: &str,
reference_name: &str,
reference_start: u32,
) -> Self {
let mut record = SamRecord::new();
record.qname = query_name.to_string();
record.rname = reference_name.to_string();
record.pos = reference_start.saturating_add(1); record.cigar = result.cigar.clone();
record.query_seq = result.aligned_seq1.replace('-', "");
record.mapq = 60;
record.optional_fields.push(format!("AS:i:{}", result.score));
record
}
pub fn add_optional_field(&mut self, field: &str) {
self.optional_fields.push(field.to_string());
}
pub fn to_sam_line(&self) -> String {
let mut line = format!(
"{}\t{}\t{}\t{}\t{}\t{}\t",
self.qname,
self.flag,
self.rname,
self.pos,
self.mapq,
self.cigar
);
line.push_str("*\t0\t0\t");
line.push_str(&self.query_seq);
line.push('\t');
if self.query_qual.is_empty() {
line.push('*');
} else {
line.push_str(&self.query_qual);
}
for field in &self.optional_fields {
line.push('\t');
line.push_str(field);
}
line
}
}
impl Default for SamRecord {
fn default() -> Self {
Self::new()
}
}
#[derive(Debug, Clone, Serialize, Deserialize)]
pub struct AlignmentResult {
pub score: i32,
pub aligned_seq1: String,
pub aligned_seq2: String,
pub start_pos1: usize, pub start_pos2: usize,
pub end_pos1: usize,
pub end_pos2: usize,
pub cigar: String,
pub soft_clips: (u32, u32),
}
impl AlignmentResult {
pub fn identity(&self) -> f64 {
let matches = self.aligned_seq1
.chars()
.zip(self.aligned_seq2.chars())
.filter(|(a, b)| a == b && a != &'-')
.count();
let total = self.aligned_seq1.chars().filter(|c| c != &'-').count();
if total == 0 {
0.0
} else {
(matches as f64 / total as f64) * 100.0
}
}
pub fn gap_count(&self) -> usize {
self.aligned_seq1
.chars()
.zip(self.aligned_seq2.chars())
.filter(|(a, b)| *a == '-' || *b == '-')
.count()
}
pub fn generate_cigar(&mut self) {
let mut cigar = Cigar::new();
if self.soft_clips.0 > 0 {
cigar.push(self.soft_clips.0, CigarOp::SoftClip);
}
let bytes1 = self.aligned_seq1.as_bytes();
let bytes2 = self.aligned_seq2.as_bytes();
for i in 0..bytes1.len().min(bytes2.len()) {
let a = bytes1[i] as char;
let b = bytes2[i] as char;
match (a, b) {
('-', _) => cigar.push(1, CigarOp::Deletion),
(_, '-') => cigar.push(1, CigarOp::Insertion),
(c1, c2) if c1 == c2 => cigar.push(1, CigarOp::SeqMatch),
_ => cigar.push(1, CigarOp::SeqMismatch),
}
}
if self.soft_clips.1 > 0 {
cigar.push(self.soft_clips.1, CigarOp::SoftClip);
}
cigar.coalesce();
self.cigar = cigar.to_string();
}
}
pub struct SmithWaterman {
matrix: ScoringMatrix,
penalty: AffinePenalty,
use_simd: bool,
bandwidth: Option<usize>, }
impl SmithWaterman {
pub fn new() -> Self {
SmithWaterman {
matrix: ScoringMatrix::default(),
penalty: AffinePenalty::default(),
use_simd: true, bandwidth: None,
}
}
pub fn with_matrix(matrix: ScoringMatrix) -> Self {
SmithWaterman {
matrix,
penalty: AffinePenalty::default(),
use_simd: cfg!(any(target_arch = "x86_64", target_arch = "aarch64")),
bandwidth: None,
}
}
pub fn with_penalty(penalty: AffinePenalty) -> Self {
SmithWaterman {
matrix: ScoringMatrix::default(),
penalty,
use_simd: cfg!(any(target_arch = "x86_64", target_arch = "aarch64")),
bandwidth: None,
}
}
pub fn scalar_only(mut self) -> Self {
self.use_simd = false;
self
}
pub fn with_simd(mut self, use_simd: bool) -> Self {
self.use_simd = use_simd;
self
}
pub fn with_bandwidth(mut self, bandwidth: usize) -> Self {
self.bandwidth = Some(bandwidth);
self
}
pub fn without_bandwidth(mut self) -> Self {
self.bandwidth = None;
self
}
pub fn align(&self, seq1: &Protein, seq2: &Protein) -> Result<AlignmentResult> {
if seq1.is_empty() || seq2.is_empty() {
return Err(Error::EmptySequence);
}
if let Some(bandwidth) = self.bandwidth {
return self.align_banded(seq1, seq2, bandwidth);
}
if self.use_simd {
self.align_simd(seq1, seq2)
} else {
self.align_scalar(seq1, seq2)
}
}
fn align_simd(&self, seq1: &Protein, seq2: &Protein) -> Result<AlignmentResult> {
#[cfg(target_arch = "x86_64")]
{
let (h, max_i, max_j) = kernel::striped_simd::smith_waterman_striped_avx2(
seq1.sequence(),
seq2.sequence(),
&self.matrix,
self.penalty.open,
self.penalty.extend,
)?;
self.build_result(seq1, seq2, &h, max_i, max_j)
}
#[cfg(target_arch = "aarch64")]
{
let (h, max_i, max_j) = kernel::neon::smith_waterman_neon(
seq1.sequence(),
seq2.sequence(),
&self.matrix,
self.penalty.open,
self.penalty.extend,
)
.or_else(|_| self.align_scalar(seq1, seq2))?;
self.build_result(seq1, seq2, &h, max_i, max_j)
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
self.align_scalar(seq1, seq2)
}
}
fn align_scalar(&self, seq1: &Protein, seq2: &Protein) -> Result<AlignmentResult> {
let (h, max_i, max_j) = kernel::scalar::smith_waterman_scalar(
seq1.sequence(),
seq2.sequence(),
&self.matrix,
self.penalty.open,
self.penalty.extend,
)?;
self.build_result(seq1, seq2, &h, max_i, max_j)
}
fn align_banded(&self, seq1: &Protein, seq2: &Protein, bandwidth: usize) -> Result<AlignmentResult> {
let (h, max_i, max_j) = kernel::banded::smith_waterman_banded(
seq1.sequence(),
seq2.sequence(),
&self.matrix,
self.penalty.open,
self.penalty.extend,
bandwidth,
)?;
self.build_result(seq1, seq2, &h, max_i, max_j)
}
fn build_result(
&self,
seq1: &Protein,
seq2: &Protein,
h: &[Vec<i32>],
max_i: usize,
max_j: usize,
) -> Result<AlignmentResult> {
let score = h[max_i][max_j];
let seq1_bytes = seq1.sequence();
let seq2_bytes = seq2.sequence();
let (aligned1, aligned2, cigar, start_i, start_j) = self.traceback_sw_with_cigar(h, seq1_bytes, seq2_bytes, max_i, max_j)?;
let result = AlignmentResult {
score,
aligned_seq1: aligned1,
aligned_seq2: aligned2,
start_pos1: start_i, start_pos2: start_j, end_pos1: max_i, end_pos2: max_j, cigar, soft_clips: (start_i as u32, (seq1.len() - max_i) as u32), };
Ok(result)
}
fn traceback_sw_with_cigar(
&self,
h: &[Vec<i32>],
seq1: &[AminoAcid],
seq2: &[AminoAcid],
mut i: usize,
mut j: usize,
) -> Result<(String, String, String, usize, usize)> {
let mut aligned1 = String::new();
let mut aligned2 = String::new();
let mut cigar_ops = Vec::new();
while i > 0 && j > 0 && h[i][j] > 0 {
let match_score = self.matrix.score(seq1[i - 1], seq2[j - 1]);
let diagonal = h[i - 1][j - 1] + match_score;
let up = h[i - 1][j] + self.penalty.extend;
let _left = h[i][j - 1] + self.penalty.extend;
if h[i][j] == diagonal {
let c1 = seq1[i - 1].to_code() as char;
let c2 = seq2[j - 1].to_code() as char;
aligned1.push(c1);
aligned2.push(c2);
let op = if c1 == c2 {
CigarOp::SeqMatch
} else {
CigarOp::SeqMismatch
};
cigar_ops.push((op, 1u32));
i -= 1;
j -= 1;
} else if h[i][j] == up {
aligned1.push(seq1[i - 1].to_code());
aligned2.push('-');
cigar_ops.push((CigarOp::Insertion, 1));
i -= 1;
} else {
aligned1.push('-');
aligned2.push(seq2[j - 1].to_code());
cigar_ops.push((CigarOp::Deletion, 1));
j -= 1;
}
}
let aligned1_str: String = aligned1.chars().rev().collect();
let aligned2_str: String = aligned2.chars().rev().collect();
cigar_ops.reverse();
let mut cigar = Cigar::new();
if !cigar_ops.is_empty() {
let mut current_op = cigar_ops[0].0;
let mut current_len = cigar_ops[0].1;
for &(op, len) in &cigar_ops[1..] {
if op == current_op {
current_len += len;
} else {
cigar.push(current_len, current_op);
current_op = op;
current_len = len;
}
}
cigar.push(current_len, current_op);
}
let cigar_str = cigar.to_string();
Ok((aligned1_str, aligned2_str, cigar_str, i, j))
}
#[allow(dead_code)]
fn traceback_sw(
&self,
h: &[Vec<i32>],
seq1: &[AminoAcid],
seq2: &[AminoAcid],
mut i: usize,
mut j: usize,
) -> Result<(String, String)> {
let mut aligned1 = String::new();
let mut aligned2 = String::new();
while i > 0 && j > 0 && h[i][j] > 0 {
let match_score = self.matrix.score(seq1[i - 1], seq2[j - 1]);
let diagonal = h[i - 1][j - 1] + match_score;
let up = h[i - 1][j] + self.penalty.extend;
let _left = h[i][j - 1] + self.penalty.extend;
if h[i][j] == diagonal {
aligned1.push(seq1[i - 1].to_code());
aligned2.push(seq2[j - 1].to_code());
i -= 1;
j -= 1;
} else if h[i][j] == up {
aligned1.push(seq1[i - 1].to_code());
aligned2.push('-');
i -= 1;
} else {
aligned1.push('-');
aligned2.push(seq2[j - 1].to_code());
j -= 1;
}
}
Ok((aligned1.chars().rev().collect(), aligned2.chars().rev().collect()))
}
}
impl Default for SmithWaterman {
fn default() -> Self {
Self::new()
}
}
pub struct NeedlemanWunsch {
matrix: ScoringMatrix,
penalty: AffinePenalty,
use_simd: bool,
bandwidth: Option<usize>, }
impl NeedlemanWunsch {
pub fn new() -> Self {
NeedlemanWunsch {
matrix: ScoringMatrix::default(),
penalty: AffinePenalty::default(),
use_simd: cfg!(any(target_arch = "x86_64", target_arch = "aarch64")),
bandwidth: None,
}
}
pub fn scalar_only(mut self) -> Self {
self.use_simd = false;
self
}
pub fn with_simd(mut self, use_simd: bool) -> Self {
self.use_simd = use_simd;
self
}
pub fn with_bandwidth(mut self, bandwidth: usize) -> Self {
self.bandwidth = Some(bandwidth);
self
}
pub fn without_bandwidth(mut self) -> Self {
self.bandwidth = None;
self
}
pub fn align(&self, seq1: &Protein, seq2: &Protein) -> Result<AlignmentResult> {
if seq1.is_empty() || seq2.is_empty() {
return Err(Error::EmptySequence);
}
if let Some(bandwidth) = self.bandwidth {
return self.align_banded(seq1, seq2, bandwidth);
}
if self.use_simd {
self.align_simd(seq1, seq2)
} else {
self.align_scalar(seq1, seq2)
}
}
fn align_simd(&self, seq1: &Protein, seq2: &Protein) -> Result<AlignmentResult> {
#[cfg(target_arch = "x86_64")]
{
let h = kernel::striped_simd::needleman_wunsch_striped_avx2(
seq1.sequence(),
seq2.sequence(),
&self.matrix,
self.penalty.open,
self.penalty.extend,
)?;
self.build_result(seq1, seq2, &h)
}
#[cfg(target_arch = "aarch64")]
{
let h = kernel::neon::needleman_wunsch_neon(
seq1.sequence(),
seq2.sequence(),
&self.matrix,
self.penalty.open,
self.penalty.extend,
)
.or_else(|_| self.align_scalar(seq1, seq2).map(|r| vec![vec![r.score]]))?;
self.build_result(seq1, seq2, &h)
}
#[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
{
self.align_scalar(seq1, seq2)
}
}
fn align_scalar(&self, seq1: &Protein, seq2: &Protein) -> Result<AlignmentResult> {
let h = kernel::scalar::needleman_wunsch_scalar(
seq1.sequence(),
seq2.sequence(),
&self.matrix,
self.penalty.open,
self.penalty.extend,
)?;
self.build_result(seq1, seq2, &h)
}
fn align_banded(&self, seq1: &Protein, seq2: &Protein, bandwidth: usize) -> Result<AlignmentResult> {
let h = kernel::banded::needleman_wunsch_banded(
seq1.sequence(),
seq2.sequence(),
&self.matrix,
self.penalty.open,
self.penalty.extend,
bandwidth,
)?;
self.build_result(seq1, seq2, &h)
}
fn build_result(
&self,
seq1: &Protein,
seq2: &Protein,
h: &[Vec<i32>],
) -> Result<AlignmentResult> {
let m = seq1.len();
let n = seq2.len();
let score = h[m][n];
let seq1_bytes = seq1.sequence();
let seq2_bytes = seq2.sequence();
let (aligned1, aligned2, cigar) = self.traceback_nw_with_cigar(h, seq1_bytes, seq2_bytes)?;
let result = AlignmentResult {
score,
aligned_seq1: aligned1,
aligned_seq2: aligned2,
start_pos1: 0,
start_pos2: 0,
end_pos1: m,
end_pos2: n,
cigar, soft_clips: (0, 0),
};
Ok(result)
}
fn traceback_nw(
&self,
h: &[Vec<i32>],
seq1: &[AminoAcid],
seq2: &[AminoAcid],
) -> Result<(String, String)> {
let mut i = seq1.len();
let mut j = seq2.len();
let mut aligned1 = String::new();
let mut aligned2 = String::new();
while i > 0 || j > 0 {
if i > 0 && j > 0 {
let match_score = self.matrix.score(seq1[i - 1], seq2[j - 1]);
let diagonal = h[i - 1][j - 1] + match_score;
let up = h[i - 1][j] + self.penalty.extend;
let _left = h[i][j - 1] + self.penalty.extend;
if h[i][j] == diagonal {
aligned1.push(seq1[i - 1].to_code());
aligned2.push(seq2[j - 1].to_code());
i -= 1;
j -= 1;
continue;
} else if h[i][j] == up {
aligned1.push(seq1[i - 1].to_code());
aligned2.push('-');
i -= 1;
continue;
}
}
if j > 0 {
aligned1.push('-');
aligned2.push(seq2[j - 1].to_code());
j -= 1;
} else if i > 0 {
aligned1.push(seq1[i - 1].to_code());
aligned2.push('-');
i -= 1;
}
}
Ok((aligned1.chars().rev().collect(), aligned2.chars().rev().collect()))
}
fn traceback_nw_with_cigar(
&self,
h: &[Vec<i32>],
seq1: &[AminoAcid],
seq2: &[AminoAcid],
) -> Result<(String, String, String)> {
let mut i = seq1.len();
let mut j = seq2.len();
let mut aligned1 = String::new();
let mut aligned2 = String::new();
let mut cigar_ops = Vec::new();
while i > 0 || j > 0 {
if i > 0 && j > 0 {
let match_score = self.matrix.score(seq1[i - 1], seq2[j - 1]);
let diagonal = h[i - 1][j - 1] + match_score;
let up = h[i - 1][j] + self.penalty.extend;
let _left = h[i][j - 1] + self.penalty.extend;
if h[i][j] == diagonal {
aligned1.push(seq1[i - 1].to_code());
aligned2.push(seq2[j - 1].to_code());
let c1 = seq1[i - 1].to_code();
let c2 = seq2[j - 1].to_code();
let op = if c1 == c2 {
CigarOp::SeqMatch
} else {
CigarOp::SeqMismatch
};
cigar_ops.push((op, 1u32));
i -= 1;
j -= 1;
continue;
} else if h[i][j] == up {
aligned1.push(seq1[i - 1].to_code());
aligned2.push('-');
cigar_ops.push((CigarOp::Insertion, 1));
i -= 1;
continue;
}
}
if j > 0 {
aligned1.push('-');
aligned2.push(seq2[j - 1].to_code());
cigar_ops.push((CigarOp::Deletion, 1));
j -= 1;
} else if i > 0 {
aligned1.push(seq1[i - 1].to_code());
aligned2.push('-');
cigar_ops.push((CigarOp::Insertion, 1));
i -= 1;
}
}
let aligned1_str: String = aligned1.chars().rev().collect();
let aligned2_str: String = aligned2.chars().rev().collect();
cigar_ops.reverse();
let mut cigar = Cigar::new();
if !cigar_ops.is_empty() {
let mut current_op = cigar_ops[0].0;
let mut current_len = cigar_ops[0].1;
for &(op, len) in &cigar_ops[1..] {
if op == current_op {
current_len += len;
} else {
cigar.push(current_len, current_op);
current_op = op;
current_len = len;
}
}
cigar.push(current_len, current_op);
}
let cigar_str = cigar.to_string();
Ok((aligned1_str, aligned2_str, cigar_str))
}
}
impl Default for NeedlemanWunsch {
fn default() -> Self {
Self::new()
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cigar_operations() {
let mut cigar = Cigar::new();
cigar.push(5, CigarOp::Match);
cigar.push(2, CigarOp::Insertion);
cigar.push(3, CigarOp::Match);
let cigar_str = cigar.to_string();
assert_eq!(cigar_str, "5M2I3M");
}
#[test]
fn test_cigar_coalesce() {
let mut cigar = Cigar::new();
cigar.push(2, CigarOp::Match);
cigar.push(1, CigarOp::Insertion);
cigar.push(3, CigarOp::Match);
cigar.push(2, CigarOp::Match);
let mut cigar_copy = cigar.clone();
cigar_copy.coalesce();
assert_eq!(cigar_copy.to_string(), "2M1I5M");
}
#[test]
fn test_cigar_lengths() {
let mut cigar = Cigar::new();
cigar.push(5, CigarOp::SeqMatch);
cigar.push(2, CigarOp::Insertion);
cigar.push(3, CigarOp::Deletion);
cigar.push(1, CigarOp::Match);
assert_eq!(cigar.query_length(), 8); assert_eq!(cigar.reference_length(), 9); }
#[test]
fn test_alignment_result_identity() -> Result<()> {
let mut result = AlignmentResult {
score: 10,
aligned_seq1: "AGSG".to_string(),
aligned_seq2: "AGSG".to_string(),
start_pos1: 0,
start_pos2: 0,
end_pos1: 4,
end_pos2: 4,
cigar: String::new(),
soft_clips: (0, 0),
};
assert_eq!(result.identity(), 100.0);
result.aligned_seq1 = "AGSG".to_string();
result.aligned_seq2 = "AASG".to_string();
assert_eq!(result.identity(), 75.0);
Ok(())
}
#[test]
fn test_alignment_cigar_generation() -> Result<()> {
let mut result = AlignmentResult {
score: 15,
aligned_seq1: "AG-SG".to_string(),
aligned_seq2: "AGASG".to_string(),
start_pos1: 0,
start_pos2: 0,
end_pos1: 4,
end_pos2: 5,
cigar: String::new(),
soft_clips: (0, 0),
};
result.generate_cigar();
assert!(!result.cigar.is_empty());
assert!(result.cigar.contains('D') || result.cigar.contains('='));
Ok(())
}
#[test]
fn test_smith_waterman() -> Result<()> {
let sw = SmithWaterman::new();
let seq1 = Protein::from_string("AGSG")?;
let seq2 = Protein::from_string("AGS")?;
let result = sw.align(&seq1, &seq2)?;
assert!(result.score >= 0);
assert!(!result.cigar.is_empty());
Ok(())
}
#[test]
fn test_smith_waterman_with_cigar() -> Result<()> {
let sw = SmithWaterman::new();
let seq1 = Protein::from_string("MGLSD")?;
let seq2 = Protein::from_string("MGLS")?;
let result = sw.align(&seq1, &seq2)?;
assert!(result.score > 0);
assert!(!result.cigar.is_empty());
for (prefix, _src) in result.cigar.split(|c: char| !c.is_numeric()).zip(1..) {
if !prefix.is_empty() {
prefix.parse::<u32>()
.map_err(|e| crate::error::Error::Custom(
format!("Invalid CIGAR numeric count '{}': {}", prefix, e)
))?;
}
}
Ok(())
}
#[test]
fn test_sam_record_creation() {
let record = SamRecord::new();
assert_eq!(record.qname, "query");
assert_eq!(record.rname, "reference");
assert_eq!(record.pos, 0);
assert_eq!(record.mapq, 60);
}
#[test]
fn test_sam_record_from_alignment() -> Result<()> {
let sw = SmithWaterman::new();
let seq1 = Protein::from_string("AGSGDSAF")?;
let seq2 = Protein::from_string("AGSGD")?;
let result = sw.align(&seq1, &seq2)?;
let sam_record = SamRecord::from_alignment(&result, "query1", "ref1", 100);
assert_eq!(sam_record.qname, "query1");
assert_eq!(sam_record.rname, "ref1");
assert_eq!(sam_record.pos, 101); assert!(!sam_record.cigar.is_empty());
assert!(sam_record.optional_fields.iter().any(|f| f.starts_with("AS:i:")));
Ok(())
}
#[test]
fn test_sam_record_to_line() {
let mut record = SamRecord::new();
record.qname = "read1".to_string();
record.rname = "chr1".to_string();
record.pos = 1000;
record.cigar = "5M1D4M".to_string();
record.query_seq = "ACGTACGTAC".to_string();
record.add_optional_field("AS:i:100");
let line = record.to_sam_line();
assert!(line.contains("read1"));
assert!(line.contains("chr1"));
assert!(line.contains("1000"));
assert!(line.contains("5M1D4M"));
assert!(line.contains("ACGTACGTAC"));
assert!(line.contains("AS:i:100"));
}
#[test]
fn test_sam_header_generation() {
let mut header = SamHeader::new("1.6");
header.add_reference("chr1", 248956422);
header.add_reference("chr2", 242193529);
header = header.with_program("omics-simd-0.1.0");
let lines = header.to_header_lines();
assert!(lines.iter().any(|l| l.starts_with("@HD")));
assert!(lines.iter().any(|l| l.contains("VN:1.6")));
assert!(lines.iter().any(|l| l.starts_with("@SQ") && l.contains("chr1")));
assert!(lines.iter().any(|l| l.starts_with("@SQ") && l.contains("chr2")));
assert!(lines.iter().any(|l| l.starts_with("@PG")));
}
}