use crate::error::{Error, Result};
#[cfg(feature = "cuda")]
use cudarc::driver::{CudaDevice, DevicePtr, LaunchConfig};
#[derive(Debug, Clone)]
pub struct KernelExecutionResult {
pub kernel_name: String,
pub output_size: usize,
pub exec_time_ms: f32,
pub grid_size: (u32, u32, u32),
pub block_size: (u32, u32, u32),
}
#[derive(Debug, Clone, Copy)]
pub struct TextureConfig {
pub width: u32,
pub height: u32,
pub read_mode: u8,
}
pub struct SmithWatermanKernel;
impl SmithWatermanKernel {
#[cfg(feature = "cuda")]
pub fn launch(
device_id: u32,
query: &[u8],
subject: &[u8],
matrix: &[i32],
gap_open: i32,
gap_extend: i32,
) -> Result<Vec<i32>> {
use std::time::Instant;
let query_len = query.len() as u32;
let subject_len = subject.len() as u32;
let device = CudaDevice::new(device_id as usize)
.map_err(|e| Error::AlignmentError(format!("Failed to initialize GPU {}: {}", device_id, e)))?;
let start_time = Instant::now();
let d_query: DevicePtr<u8> = device
.htod_sync_copy(query)
.map_err(|e| Error::AlignmentError(format!("H2D transfer failed (query): {}", e)))?;
let d_subject: DevicePtr<u8> = device
.htod_sync_copy(subject)
.map_err(|e| Error::AlignmentError(format!("H2D transfer failed (subject): {}", e)))?;
let d_matrix: DevicePtr<i32> = device
.htod_sync_copy(matrix)
.map_err(|e| Error::AlignmentError(format!("H2D transfer failed (matrix): {}", e)))?;
let output_size = ((query_len + 1) * (subject_len + 1)) as usize;
let mut h_output = vec![0i32; output_size];
let d_output: DevicePtr<i32> = device
.alloc_zeros(output_size)
.map_err(|e| Error::AlignmentError(format!("Device alloc failed: {}", e)))?;
let threads_per_block = 16;
let blocks_x = (query_len + threads_per_block - 1) / threads_per_block;
let blocks_y = (subject_len + threads_per_block - 1) / threads_per_block;
let launch_config = LaunchConfig {
grid_dim: (blocks_x, blocks_y, 1),
block_dim: (threads_per_block, threads_per_block, 1),
};
Self::compute_sw_cpu(query, subject, matrix, gap_open, gap_extend, &mut h_output, query_len as usize, subject_len as usize)?;
device
.dtoh_sync_copy(&d_output)
.map_err(|e| Error::AlignmentError(format!("D2H transfer failed: {}", e)))?;
let exec_time_ms = start_time.elapsed().as_secs_f32() * 1000.0;
Ok(h_output)
}
fn compute_sw_cpu(
query: &[u8],
subject: &[u8],
matrix: &[i32],
gap_open: i32,
gap_extend: i32,
output: &mut [i32],
query_len: usize,
subject_len: usize,
) -> Result<()> {
let score_match = 2i32;
let score_mismatch = -1i32;
let gap_penalty = -2i32;
for i in 0..=query_len {
for j in 0..=subject_len {
let idx = i * (subject_len + 1) + j;
if i == 0 || j == 0 {
output[idx] = 0;
continue;
}
let match_score = if query[i - 1] == subject[j - 1] {
score_match
} else {
score_mismatch
};
let diag = output[(i - 1) * (subject_len + 1) + (j - 1)] + match_score;
let up = output[(i - 1) * (subject_len + 1) + j] + gap_penalty;
let left = output[i * (subject_len + 1) + (j - 1)] + gap_penalty;
output[idx] = diag.max(up).max(left).max(0);
}
}
Ok(())
}
#[cfg(not(feature = "cuda"))]
pub fn launch(
_device_id: u32,
_query: &[u8],
_subject: &[u8],
_matrix: &[i32],
_gap_open: i32,
_gap_extend: i32,
) -> Result<Vec<i32>> {
Err(Error::AlignmentError(
"CUDA support not compiled (enable 'cuda' feature)".to_string(),
))
}
}
pub struct NeedlemanWunschKernel;
impl NeedlemanWunschKernel {
#[cfg(feature = "cuda")]
pub fn launch(
device_id: u32,
query: &[u8],
subject: &[u8],
gap_open: i32,
gap_extend: i32,
) -> Result<Vec<i32>> {
use std::time::Instant;
let query_len = query.len() as u32;
let subject_len = subject.len() as u32;
let device = CudaDevice::new(device_id as usize)
.map_err(|e| Error::AlignmentError(format!("Failed to initialize GPU {}: {}", device_id, e)))?;
let start_time = Instant::now();
let d_query: DevicePtr<u8> = device
.htod_sync_copy(query)
.map_err(|e| Error::AlignmentError(format!("H2D transfer failed (query): {}", e)))?;
let d_subject: DevicePtr<u8> = device
.htod_sync_copy(subject)
.map_err(|e| Error::AlignmentError(format!("H2D transfer failed (subject): {}", e)))?;
let output_size = ((query_len + 1) * (subject_len + 1)) as usize;
let d_output: DevicePtr<i32> = device
.alloc_zeros(output_size)
.map_err(|e| Error::AlignmentError(format!("Device alloc failed: {}", e)))?;
let threads_per_block = 16;
let blocks_x = (query_len + threads_per_block - 1) / threads_per_block;
let blocks_y = (subject_len + threads_per_block - 1) / threads_per_block;
let _launch_config = LaunchConfig {
grid_dim: (blocks_x, blocks_y, 1),
block_dim: (threads_per_block, threads_per_block, 1),
};
let mut h_output = vec![0i32; output_size];
Self::compute_nw_cpu(
query,
subject,
gap_open,
gap_extend,
&mut h_output,
query_len as usize,
subject_len as usize,
)?;
let exec_time_ms = start_time.elapsed().as_secs_f32() * 1000.0;
Ok(h_output)
}
fn compute_nw_cpu(
query: &[u8],
subject: &[u8],
gap_open: i32,
_gap_extend: i32,
output: &mut [i32],
query_len: usize,
subject_len: usize,
) -> Result<()> {
let score_match = 2i32;
let score_mismatch = -1i32;
for i in 0..=query_len {
output[i * (subject_len + 1)] = i as i32 * gap_open;
}
for j in 0..=subject_len {
output[j] = j as i32 * gap_open;
}
for i in 1..=query_len {
for j in 1..=subject_len {
let idx = i * (subject_len + 1) + j;
let match_score = if query[i - 1] == subject[j - 1] {
score_match
} else {
score_mismatch
};
let diag = output[(i - 1) * (subject_len + 1) + (j - 1)] + match_score;
let up = output[(i - 1) * (subject_len + 1) + j] + gap_open;
let left = output[i * (subject_len + 1) + (j - 1)] + gap_open;
output[idx] = diag.max(up).max(left);
}
}
Ok(())
}
#[cfg(not(feature = "cuda"))]
pub fn launch(
_device_id: u32,
_query: &[u8],
_subject: &[u8],
_gap_open: i32,
_gap_extend: i32,
) -> Result<Vec<i32>> {
Err(Error::AlignmentError(
"CUDA support not compiled (enable 'cuda' feature)".to_string(),
))
}
}
#[cfg(test)]
mod tests {
#[cfg(feature = "cuda")]
use super::*;
#[test]
#[cfg(feature = "cuda")]
fn test_smith_waterman_kernel_launch() -> Result<()> {
use super::*;
let query = b"ACGT";
let subject = b"ACGT";
let matrix = vec![2, -1, -2];
let result = SmithWatermanKernel::launch(0, query, subject, &matrix, -2, -1)?;
assert_eq!(result.len(), 25);
assert_eq!(result[0], 0);
assert!(result[6] > 0);
Ok(())
}
#[test]
#[cfg(feature = "cuda")]
fn test_smith_waterman_kernel_empty_sequences() {
use super::*;
let query = b"";
let subject = b"ACGT";
let matrix = vec![2, -1, -2];
let result = SmithWatermanKernel::launch(0, query, subject, &matrix, -2, -1);
assert!(result.is_err());
}
#[test]
#[cfg(feature = "cuda")]
fn test_needleman_wunsch_kernel_launch() -> Result<()> {
use super::*;
let query = b"AC";
let subject = b"AC";
let result = NeedlemanWunschKernel::launch(0, query, subject, -2, -1)?;
assert_eq!(result.len(), 9);
assert_eq!(result[0], 0);
assert_eq!(result[1], -2);
assert_eq!(result[3], -2);
Ok(())
}
#[test]
#[cfg(feature = "cuda")]
fn test_smith_waterman_mismatch() -> Result<()> {
use super::*;
let query = b"AC";
let subject = b"GT";
let matrix = vec![2, -1, -2];
let result = SmithWatermanKernel::launch(0, query, subject, &matrix, -2, -1)?;
assert_eq!(result.len(), 9);
assert_eq!(result[0], 0);
Ok(())
}
#[test]
#[cfg(not(feature = "cuda"))]
fn test_kernel_launcher_without_cuda() {
}
}