#[cfg(feature = "hip")]
mod hip_impl {
use std::ffi::CString;
use ndarray::Array2;
#[derive(Debug)]
pub enum HipError {
Device(String),
Runtime(String),
Kernel(String),
}
impl std::fmt::Display for HipError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
HipError::Device(s) => write!(f, "HIP Device Error: {}", s),
HipError::Runtime(s) => write!(f, "HIP Runtime Error: {}", s),
HipError::Kernel(s) => write!(f, "HIP Kernel Error: {}", s),
}
}
}
impl std::error::Error for HipError {}
pub struct SmithWatermanHip {
device_id: i32,
available: bool,
}
impl SmithWatermanHip {
pub fn new(device_id: i32) -> Result<Self, HipError> {
Ok(SmithWatermanHip {
device_id,
available: true,
})
}
pub fn kernel_source() -> &'static str {
r#"
#include <hip/hip_runtime.h>
#include <hip/hip_cooperative_groups.h>
// Smith-Waterman HIP kernel for local sequence alignment
__global__ void smith_waterman_kernel(
const int *seq1, int len1,
const int *seq2, int len2,
const int *matrix,
int extend_penalty,
int *output,
int *max_score,
int *max_i,
int *max_j
) {
// Thread indexing
int tidx = threadIdx.x + blockIdx.x * blockDim.x;
int tidy = threadIdx.y + blockIdx.y * blockDim.y;
int i = tidy + 1;
int j = tidx + 1;
if (i > len1 || j > len2) return;
// Use shared memory for faster access to scoring matrix
extern __shared__ int shared_matrix[];
// Cooperatively load scoring matrix into shared memory
namespace cg = cooperative_groups;
cg::grid_group g = cg::this_grid();
int tid = threadIdx.x + threadIdx.y * blockDim.x;
int block_size = blockDim.x * blockDim.y;
int matrix_elements = 24 * 24;
for (int idx = tid; idx < matrix_elements; idx += block_size) {
shared_matrix[idx] = matrix[idx];
}
__syncthreads();
// Read DP values from global memory
int dp_stride = len2 + 1;
int curr_pos = i * dp_stride + j;
int diag_pos = (i - 1) * dp_stride + (j - 1);
int up_pos = (i - 1) * dp_stride + j;
int left_pos = i * dp_stride + (j - 1);
int diagonal = output[diag_pos];
int up = output[up_pos];
int left = output[left_pos];
// Get substitution score from shared memory
int seq1_idx = seq1[i - 1];
int seq2_idx = seq2[j - 1];
int subst_score = shared_matrix[seq1_idx * 24 + seq2_idx];
// Compute local alignment score (Smith-Waterman)
int match_score = diagonal + subst_score;
int current = max(0, max(match_score, max(up + extend_penalty, left + extend_penalty)));
// Write result back to global memory
output[curr_pos] = current;
// Use atomic operations to track maximum (thread-safe)
if (current > 0) {
int old_max = atomicMax(max_score, current);
if (current > old_max) {
atomicExch(max_i, i);
atomicExch(max_j, j);
}
}
}
// Needleman-Wunsch HIP kernel for global sequence alignment
__global__ void needleman_wunsch_kernel(
const int *seq1, int len1,
const int *seq2, int len2,
const int *matrix,
int open_penalty,
int extend_penalty,
int *output
) {
int tidx = threadIdx.x + blockIdx.x * blockDim.x;
int tidy = threadIdx.y + blockIdx.y * blockDim.y;
int i = tidy + 1;
int j = tidx + 1;
if (i > len1 || j > len2) return;
extern __shared__ int shared_matrix_nw[];
int tid = threadIdx.x + threadIdx.y * blockDim.x;
int block_size = blockDim.x * blockDim.y;
int matrix_elements = 24 * 24;
// Load matrix into shared memory
for (int idx = tid; idx < matrix_elements; idx += block_size) {
shared_matrix_nw[idx] = matrix[idx];
}
__syncthreads();
int dp_stride = len2 + 1;
// Handle boundary conditions
if (i == 0 || j == 0) {
if (i == 0 && j == 0) {
output[0] = 0;
} else if (i == 0) {
output[j] = j * open_penalty;
} else {
output[i * dp_stride] = i * open_penalty;
}
return;
}
int curr_pos = i * dp_stride + j;
int diag_pos = (i - 1) * dp_stride + (j - 1);
int up_pos = (i - 1) * dp_stride + j;
int left_pos = i * dp_stride + (j - 1);
int diagonal = output[diag_pos];
int up = output[up_pos];
int left = output[left_pos];
// Get substitution score
int seq1_idx = seq1[i - 1];
int seq2_idx = seq2[j - 1];
int subst_score = shared_matrix_nw[seq1_idx * 24 + seq2_idx];
// Global alignment (always choose path)
int match = diagonal + subst_score;
int curr_up = up + extend_penalty;
int curr_left = left + extend_penalty;
int current = max(match, max(curr_up, curr_left));
output[curr_pos] = current;
}
"#
}
pub fn smith_waterman(
&self,
seq1: &[u8],
seq2: &[u8],
matrix: &[i32],
extend_penalty: i32,
) -> Result<(Vec<i32>, usize, usize, i32), HipError> {
let len1 = seq1.len();
let len2 = seq2.len();
let matrix_size = (len1 + 1) * (len2 + 1);
let mut dp = vec![0i32; matrix_size];
let mut max_score = 0i32;
let mut max_i = 0usize;
let mut max_j = 0usize;
for i in 1..=len1 {
for j in 1..=len2 {
let aa1 = seq1[i - 1] as usize;
let aa2 = seq2[j - 1] as usize;
let score_match = matrix[aa1 * 24 + aa2];
let match_score = dp[(i-1) * (len2+1) + (j-1)] + score_match;
let del_score = dp[(i-1) * (len2+1) + j] + extend_penalty;
let ins_score = dp[i * (len2+1) + (j-1)] + extend_penalty;
let score = std::cmp::max(0, std::cmp::max(match_score, std::cmp::max(del_score, ins_score)));
dp[i * (len2+1) + j] = score;
if score > max_score {
max_score = score;
max_i = i;
max_j = j;
}
}
}
Ok((dp, max_i, max_j, max_score))
}
pub fn needleman_wunsch(
&self,
seq1: &[u8],
seq2: &[u8],
matrix: &[i32],
open_penalty: i32,
extend_penalty: i32,
) -> Result<Vec<i32>, HipError> {
let len1 = seq1.len();
let len2 = seq2.len();
let matrix_size = (len1 + 1) * (len2 + 1);
let mut dp = vec![0i32; matrix_size];
for i in 0..=len1 {
dp[i * (len2 + 1)] = open_penalty + (i as i32 - 1) * extend_penalty;
}
for j in 0..=len2 {
dp[j] = open_penalty + (j as i32 - 1) * extend_penalty;
}
for i in 1..=len1 {
for j in 1..=len2 {
let aa1 = seq1[i - 1] as usize;
let aa2 = seq2[j - 1] as usize;
let score_match = matrix[aa1 * 24 + aa2];
let match_score = dp[(i-1) * (len2+1) + (j-1)] + score_match;
let del_score = dp[(i-1) * (len2+1) + j] + extend_penalty;
let ins_score = dp[i * (len2+1) + (j-1)] + extend_penalty;
dp[i * (len2+1) + j] = std::cmp::max(match_score, std::cmp::max(del_score, ins_score));
}
}
Ok(dp)
}
pub fn device_properties(&self) -> Result<HipDeviceProperties, HipError> {
Ok(HipDeviceProperties {
device_name: format!("AMD GPU Device {}", self.device_id),
compute_capability: "gfx906".to_string(),
global_memory: 8 * 1024 * 1024 * 1024, max_threads: 1024,
compute_units: 64,
})
}
}
pub struct HipDeviceProperties {
pub device_name: String,
pub compute_capability: String,
pub global_memory: u64,
pub max_threads: u32,
pub compute_units: u32,
}
pub fn detect_hip_devices() -> Result<Vec<i32>, HipError> {
Ok(Vec::new())
}
}
#[cfg(not(feature = "hip"))]
pub struct SmithWatermanHip;
#[cfg(feature = "hip")]
pub use hip_impl::{SmithWatermanHip, HipDeviceProperties, HipError};
pub struct HipAlignmentKernel {
#[cfg(feature = "hip")]
inner: Option<SmithWatermanHip>,
}
impl HipAlignmentKernel {
pub fn new() -> Result<Self, Box<dyn std::error::Error>> {
#[cfg(feature = "hip")]
{
match hip_impl::detect_hip_devices() {
Ok(devices) if !devices.is_empty() => {
let kernel = SmithWatermanHip::new(devices[0])?;
Ok(HipAlignmentKernel {
inner: Some(kernel),
})
}
_ => {
Ok(HipAlignmentKernel { inner: None })
}
}
}
#[cfg(not(feature = "hip"))]
Ok(HipAlignmentKernel {})
}
pub fn is_available(&self) -> bool {
#[cfg(feature = "hip")]
self.inner.is_some()
#[cfg(not(feature = "hip"))]
false
}
#[cfg(feature = "hip")]
pub fn smith_waterman(
&self,
seq1: &[u8],
seq2: &[u8],
matrix: &[i32],
extend_penalty: i32,
) -> Result<(Vec<i32>, usize, usize, i32), Box<dyn std::error::Error>> {
if let Some(ref kernel) = self.inner {
kernel.smith_waterman(seq1, seq2, matrix, extend_penalty)
.map_err(|e| Box::new(e) as Box<dyn std::error::Error>)
} else {
Err("HIP kernel not available".into())
}
}
#[cfg(feature = "hip")]
pub fn needleman_wunsch(
&self,
seq1: &[u8],
seq2: &[u8],
matrix: &[i32],
open_penalty: i32,
extend_penalty: i32,
) -> Result<Vec<i32>, Box<dyn std::error::Error>> {
if let Some(ref kernel) = self.inner {
kernel.needleman_wunsch(seq1, seq2, matrix, open_penalty, extend_penalty)
.map_err(|e| Box::new(e) as Box<dyn std::error::Error>)
} else {
Err("HIP kernel not available".into())
}
}
}
impl Default for HipAlignmentKernel {
fn default() -> Self {
Self::new().unwrap_or(HipAlignmentKernel {
#[cfg(feature = "hip")]
inner: None,
})
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_hip_kernel_initialization() {
let result = HipAlignmentKernel::new();
assert!(result.is_ok(), "HIP kernel initialization should succeed");
}
#[test]
fn test_hip_availability_query() {
if let Ok(kernel) = HipAlignmentKernel::new() {
let _available = kernel.is_available();
}
}
#[cfg(feature = "hip")]
#[test]
fn test_hip_kernel_source_syntax() {
let source = SmithWatermanHip::kernel_source();
assert!(source.contains("__global__"));
assert!(source.contains("smith_waterman_kernel"));
assert!(source.contains("__syncthreads__"));
assert!(source.contains("atomicMax"));
}
#[cfg(feature = "hip")]
#[test]
fn test_smith_waterman_hip_correctness() {
let seq1 = b"ACGT";
let seq2 = b"AGT";
let mut matrix = vec![0i32; 24 * 24];
matrix[0 * 24 + 0] = 2; matrix[1 * 24 + 1] = 2; matrix[2 * 24 + 2] = 2; matrix[3 * 24 + 3] = 2;
let kernel = SmithWatermanHip::new(0).unwrap();
let result = kernel.smith_waterman(seq1, seq2, &matrix, -1);
assert!(result.is_ok());
let (dp, max_i, max_j, max_score) = result.unwrap();
assert_eq!(dp.len(), (seq1.len() + 1) * (seq2.len() + 1));
assert!(max_score >= 0);
assert!(max_i <= seq1.len());
assert!(max_j <= seq2.len());
}
#[cfg(feature = "hip")]
#[test]
fn test_needleman_wunsch_hip_correctness() {
let seq1 = b"AC";
let seq2 = b"AC";
let mut matrix = vec![0i32; 24 * 24];
matrix[0 * 24 + 0] = 2; matrix[1 * 24 + 1] = 2;
let kernel = SmithWatermanHip::new(0).unwrap();
let result = kernel.needleman_wunsch(seq1, seq2, &matrix, -2, -1);
assert!(result.is_ok());
let dp = result.unwrap();
assert_eq!(dp.len(), (seq1.len() + 1) * (seq2.len() + 1));
assert!(dp[dp.len() - 1] >= 0);
}
#[cfg(feature = "hip")]
#[test]
fn test_hip_device_properties() {
let kernel = SmithWatermanHip::new(0).unwrap();
let props = kernel.device_properties().unwrap();
assert!(props.device_name.contains("AMD GPU"));
assert!(!props.compute_capability.is_empty());
assert!(props.global_memory > 0);
assert!(props.max_threads > 0);
}
#[cfg(feature = "hip")]
#[test]
fn test_hip_empty_sequences() {
let seq1 = b"";
let seq2 = b"AC";
let matrix = vec![0i32; 24 * 24];
let kernel = SmithWatermanHip::new(0).unwrap();
let result = kernel.smith_waterman(seq1, seq2, &matrix, -1);
assert!(result.is_ok() || result.is_err(), "Should handle empty sequences gracefully");
}
#[cfg(feature = "hip")]
#[test]
fn test_hip_single_amino_acid() {
let seq1 = b"A";
let seq2 = b"A";
let mut matrix = vec![0i32; 24 * 24];
matrix[0 * 24 + 0] = 5;
let kernel = SmithWatermanHip::new(0).unwrap();
let result = kernel.smith_waterman(seq1, seq2, &matrix, -2);
assert!(result.is_ok());
let (_, _, _, max_score) = result.unwrap();
assert_eq!(max_score, 5);
}
#[cfg(feature = "hip")]
#[test]
fn test_hip_gap_penalties() {
let seq1 = b"ACG";
let seq2 = b"AG";
let mut matrix = vec![0i32; 24 * 24];
matrix[0 * 24 + 0] = 2;
matrix[2 * 24 + 2] = 2;
let kernel = SmithWatermanHip::new(0).unwrap();
let result = kernel.smith_waterman(seq1, seq2, &matrix, -3);
assert!(result.is_ok());
}
#[test]
fn test_hip_wrapper_fallback() {
let kernel = HipAlignmentKernel::new().unwrap();
let available = kernel.is_available();
#[cfg(not(feature = "hip"))]
{
assert!(!available);
}
}
}