use super::gpu_halo_buffer::{HaloBufferManager, HaloConfig};
use crate::scoring::ScoringMatrix;
use crate::error::Result;
#[derive(Debug, Clone)]
pub struct TilingProfile {
pub gpu_model: String,
pub tile_size: usize,
pub batch_size: usize,
pub shared_memory_limit: usize,
pub double_buffer: bool,
}
impl TilingProfile {
pub fn v100() -> Self {
TilingProfile {
gpu_model: "V100".to_string(),
tile_size: 32,
batch_size: 256,
shared_memory_limit: 96, double_buffer: true,
}
}
pub fn a100() -> Self {
TilingProfile {
gpu_model: "A100".to_string(),
tile_size: 32,
batch_size: 512,
shared_memory_limit: 192, double_buffer: true,
}
}
pub fn rtx3090() -> Self {
TilingProfile {
gpu_model: "RTX3090".to_string(),
tile_size: 16,
batch_size: 128,
shared_memory_limit: 96, double_buffer: false,
}
}
pub fn conservative() -> Self {
TilingProfile {
gpu_model: "Generic".to_string(),
tile_size: 16,
batch_size: 64,
shared_memory_limit: 48, double_buffer: false,
}
}
}
pub struct GpuTilingStrategy {
halo_manager: HaloBufferManager,
profile: TilingProfile,
matrix: ScoringMatrix,
gap_open: i32,
gap_extend: i32,
}
impl GpuTilingStrategy {
pub fn new(
seq1_len: usize,
seq2_len: usize,
matrix: ScoringMatrix,
gap_open: i32,
gap_extend: i32,
profile: TilingProfile,
) -> Result<Self> {
let config = HaloConfig {
tile_width: profile.tile_size,
tile_height: profile.tile_size,
halo_size: 1,
};
let halo_manager = HaloBufferManager::new(seq1_len, seq2_len, config);
Ok(GpuTilingStrategy {
halo_manager,
profile,
matrix,
gap_open,
gap_extend,
})
}
pub fn num_tiles(&self) -> usize {
self.halo_manager.num_tile_rows() * self.halo_manager.num_tile_cols()
}
pub fn gpu_memory_requirement(&self) -> usize {
self.halo_manager.total_gpu_memory()
}
pub fn is_beneficial(&self) -> bool {
self.gpu_memory_requirement() < 512 * 1024 * 1024
}
pub fn tiles_in_order(&self) -> Vec<(usize, usize)> {
let mut tiles = Vec::new();
let num_rows = self.halo_manager.num_tile_rows();
let num_cols = self.halo_manager.num_tile_cols();
for row in 0..num_rows {
for col in 0..num_cols {
tiles.push((row, col));
}
}
tiles
}
pub fn compute_tile(&mut self, tile_row: usize, tile_col: usize) -> Result<()> {
let _tile = self.halo_manager.get_tile(tile_row, tile_col);
Ok(())
}
pub fn get_result(&self) -> Vec<Vec<i32>> {
self.halo_manager.assemble_result()
}
pub fn estimate_time_ms(&self) -> f64 {
let num_cells = (self.halo_manager.seq_len1 + 1) * (self.halo_manager.seq_len2 + 1);
(num_cells as f64) / 1e9 * 1000.0
}
}
#[derive(Debug, Clone)]
pub struct TilingStats {
pub total_tiles: usize,
pub completed_tiles: usize,
pub gpu_memory_bytes: usize,
pub estimated_time_ms: f64,
pub throughput: f64,
}
impl TilingStats {
pub fn from_strategy(strategy: &GpuTilingStrategy) -> Self {
let total_tiles = strategy.num_tiles();
let gpu_memory_bytes = strategy.gpu_memory_requirement();
let estimated_time_ms = strategy.estimate_time_ms();
TilingStats {
total_tiles,
completed_tiles: 0,
gpu_memory_bytes,
estimated_time_ms,
throughput: (total_tiles as f64) / (estimated_time_ms / 1000.0),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use crate::scoring::MatrixType;
#[test]
fn test_tiling_profile_v100() {
let profile = TilingProfile::v100();
assert_eq!(profile.gpu_model, "V100");
assert_eq!(profile.tile_size, 32);
assert_eq!(profile.batch_size, 256);
}
#[test]
fn test_tiling_profile_conservative() {
let profile = TilingProfile::conservative();
assert_eq!(profile.tile_size, 16);
assert_eq!(profile.batch_size, 64);
}
#[test]
fn test_tiling_strategy_creation() -> Result<()> {
let matrix = ScoringMatrix::new(MatrixType::Blosum62)?;
let profile = TilingProfile::conservative();
let strategy = GpuTilingStrategy::new(1000, 1000, matrix, -11, -1, profile)?;
assert!(strategy.num_tiles() > 0);
assert!(strategy.gpu_memory_requirement() > 0);
Ok(())
}
#[test]
fn test_tiles_in_order() -> Result<()> {
let matrix = ScoringMatrix::new(MatrixType::Blosum62)?;
let profile = TilingProfile::conservative();
let strategy = GpuTilingStrategy::new(64, 64, matrix, -11, -1, profile)?;
let tiles = strategy.tiles_in_order();
assert_eq!(tiles.len(), 16);
assert_eq!(tiles[0], (0, 0));
assert_eq!(tiles[1], (0, 1));
assert_eq!(tiles[4], (1, 0));
Ok(())
}
#[test]
fn test_large_sequence_tiling() -> Result<()> {
let matrix = ScoringMatrix::new(MatrixType::Blosum62)?;
let profile = TilingProfile::a100();
let strategy = GpuTilingStrategy::new(100_000, 100_000, matrix, -11, -1, profile)?;
assert!(strategy.num_tiles() > 0);
let num_tiles = strategy.num_tiles();
assert!(
num_tiles > 1000,
"Should have many tiles for large sequences, got {}",
num_tiles
);
let mem_bytes = strategy.gpu_memory_requirement();
let mem_mb = mem_bytes / (1024 * 1024);
assert!(
mem_bytes > 0,
"Memory requirement should be positive"
);
Ok(())
}
#[test]
fn test_tiling_stats() -> Result<()> {
let matrix = ScoringMatrix::new(MatrixType::Blosum62)?;
let profile = TilingProfile::conservative();
let strategy = GpuTilingStrategy::new(1000, 1000, matrix, -11, -1, profile)?;
let stats = TilingStats::from_strategy(&strategy);
assert_eq!(stats.completed_tiles, 0);
assert!(stats.total_tiles > 0);
assert!(stats.gpu_memory_bytes > 0);
assert!(stats.estimated_time_ms > 0.0);
Ok(())
}
#[test]
fn test_memory_requirement_scales_linearly() -> Result<()> {
let matrix = ScoringMatrix::new(MatrixType::Blosum62)?;
let profile = TilingProfile::conservative();
let strategy1 = GpuTilingStrategy::new(1000, 1000, matrix.clone(), -11, -1, profile.clone())?;
let strategy2 = GpuTilingStrategy::new(2000, 2000, matrix, -11, -1, profile)?;
let mem1 = strategy1.gpu_memory_requirement();
let mem2 = strategy2.gpu_memory_requirement();
assert!(mem2 > mem1 * 3 && mem2 < mem1 * 5);
Ok(())
}
}