primer3 0.1.0

Safe Rust bindings to the primer3 primer design library
Documentation
//! Dynamic programming sequence alignment.
//!
//! Wraps the `dpal` (dynamic programming alignment) function from the primer3
//! C library. This performs pairwise DNA sequence alignment with configurable
//! gap penalties, scoring matrices, and alignment modes.
//!
//! # Maximum sequence length
//!
//! Both sequences must be at most 1600 bp (`DPAL_MAX_ALIGN`).
//!
//! # Example
//!
//! ```no_run
//! use primer3::{align, AlignmentArgs, AlignmentMode};
//!
//! let result = align("ACGTACGT", "ACGTACGT", &AlignmentArgs::default()).unwrap();
//! assert!(result.score() > 0.0);
//! ```

use std::ffi::CStr;
use std::sync::Mutex;

use crate::error::{Primer3Error, Result};

/// Serializes access to `dpal()`. `_dpal_generic()` in the upstream
/// `dpal.c` uses function-local `static` scoring and traceback matrices
/// (~40 MB combined), which races under concurrent calls. Attempting to
/// patch them to `_Thread_local` at build time instead overflows Rust's
/// default 2 MB test-thread stack on Linux (glibc puts large static TLS
/// into the thread stack mapping), so we serialize at the Rust boundary
/// — matching the same approach used for `design_primers`.
static ALIGN_MUTEX: Mutex<()> = Mutex::new(());

/// Maximum sequence length for alignment (1600 bp).
pub const MAX_ALIGN_LENGTH: usize = primer3_sys::DPAL_MAX_ALIGN;

/// Alignment mode specifying how sequences are aligned.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum AlignmentMode {
    /// Local alignment — find the best-scoring subsequence alignment.
    #[default]
    Local,
    /// Global alignment anchored at the end of the first sequence.
    GlobalEnd,
    /// Arbitrary global alignment, anchored at the end of either sequence.
    Global,
    /// Local alignment that includes the end of the first sequence.
    LocalEnd,
}

impl AlignmentMode {
    fn to_c(self) -> std::os::raw::c_int {
        match self {
            Self::Local => primer3_sys::DPAL_LOCAL,
            Self::GlobalEnd => primer3_sys::DPAL_GLOBAL_END,
            Self::Global => primer3_sys::DPAL_GLOBAL,
            Self::LocalEnd => primer3_sys::DPAL_LOCAL_END,
        }
    }
}

/// Output mode for alignment computation.
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Default)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub enum AlignmentOutput {
    /// Score-only mode (fast, no path or structure).
    #[default]
    ScoreOnly,
    /// Compute the alignment path.
    Full,
    /// Compute secondary structure strings.
    Structure,
}

/// Parameters for dynamic programming alignment.
///
/// The default uses the standard nucleotide scoring matrix with local
/// alignment, gap penalty of -100, and max gap size of 3.
#[derive(Debug, Clone, Copy)]
#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
pub struct AlignmentArgs {
    /// Alignment mode (default: `Local`).
    pub mode: AlignmentMode,
    /// Output detail level (default: `ScoreOnly`).
    pub output: AlignmentOutput,
    /// Gap opening penalty (default: -100).
    pub gap: i32,
    /// Gap extension penalty (default: -100).
    pub gap_ext: i32,
    /// Maximum allowable gap size. -1 means unlimited (default: 3).
    pub max_gap: i32,
    /// Stop search when score exceeds this value. 0 means no limit (default: 0).
    pub score_max: i32,
    /// Support IUPAC ambiguity codes in scoring (default: false).
    pub use_ambiguity_codes: bool,
}

impl Default for AlignmentArgs {
    fn default() -> Self {
        Self {
            mode: AlignmentMode::default(),
            output: AlignmentOutput::default(),
            gap: -100,
            gap_ext: -100,
            max_gap: 3,
            score_max: 0,
            use_ambiguity_codes: false,
        }
    }
}

/// Result of a dynamic programming alignment.
#[derive(Debug, Clone)]
pub struct AlignmentResult {
    score: f64,
    path_length: usize,
    align_end_1: i32,
    align_end_2: i32,
    secondary_structure: Option<String>,
}

impl AlignmentResult {
    /// Alignment score. Higher is better.
    pub fn score(&self) -> f64 {
        self.score
    }

    /// Length of the alignment path (0 in score-only mode).
    pub fn path_length(&self) -> usize {
        self.path_length
    }

    /// Last alignment position in the first sequence (-1 if not computed).
    pub fn align_end_1(&self) -> i32 {
        self.align_end_1
    }

    /// Last alignment position in the second sequence (-1 if not computed).
    pub fn align_end_2(&self) -> i32 {
        self.align_end_2
    }

    /// ASCII representation of the secondary structure (only in `Structure` mode).
    pub fn secondary_structure(&self) -> Option<&str> {
        self.secondary_structure.as_deref()
    }
}

/// Performs dynamic programming alignment of two DNA sequences.
///
/// # Errors
///
/// Returns an error if either sequence is empty, exceeds [`MAX_ALIGN_LENGTH`],
/// or the C library reports an alignment error.
///
/// # Example
///
/// ```no_run
/// use primer3::{align, AlignmentArgs};
///
/// let result = align("ACGTACGT", "ACGTACGT", &AlignmentArgs::default()).unwrap();
/// println!("Score = {}", result.score());
/// ```
pub fn align(seq1: &str, seq2: &str, args: &AlignmentArgs) -> Result<AlignmentResult> {
    if seq1.is_empty() || seq2.is_empty() {
        return Err(Primer3Error::InvalidSequence("sequence is empty".into()));
    }
    let max_len = seq1.len().max(seq2.len());
    if max_len > MAX_ALIGN_LENGTH {
        return Err(Primer3Error::SequenceTooLong {
            length: max_len,
            max: MAX_ALIGN_LENGTH,
            operation: "alignment",
        });
    }

    // Allocate dpal_args on heap (~256 KB due to scoring matrix)
    let mut c_args: Box<primer3_sys::dpal_args> = Box::new(unsafe { std::mem::zeroed() });

    // Initialize with default nucleotide scoring and gap penalties
    unsafe {
        primer3_sys::set_dpal_args(c_args.as_mut());
        primer3_sys::dpal_set_default_nt_args(c_args.as_mut());
    }

    if args.use_ambiguity_codes {
        let ok = unsafe { primer3_sys::dpal_set_ambiguity_code_matrix(c_args.as_mut()) };
        if ok == 0 {
            return Err(Primer3Error::Library("failed to set ambiguity code matrix".into()));
        }
    }

    c_args.flag = args.mode.to_c();
    c_args.gap = args.gap;
    c_args.gapl = args.gap_ext;
    c_args.max_gap = args.max_gap;
    c_args.score_max = args.score_max;

    let mode = match args.output {
        AlignmentOutput::ScoreOnly => primer3_sys::DPM_FAST,
        AlignmentOutput::Full => primer3_sys::DPM_GENERAL,
        AlignmentOutput::Structure => primer3_sys::DPM_STRUCT,
    };

    // Null-terminated uppercase byte sequences
    let mut s1: Vec<u8> = seq1.bytes().map(|b| b.to_ascii_uppercase()).collect();
    s1.push(0);
    let mut s2: Vec<u8> = seq2.bytes().map(|b| b.to_ascii_uppercase()).collect();
    s2.push(0);

    // Allocate dpal_results on heap (~12.8 KB due to path array)
    let mut results: Box<primer3_sys::dpal_results> = Box::new(unsafe { std::mem::zeroed() });

    // Serialize access to dpal() — see ALIGN_MUTEX comment above.
    let _lock = ALIGN_MUTEX.lock().unwrap_or_else(std::sync::PoisonError::into_inner);
    unsafe {
        primer3_sys::dpal(s1.as_ptr(), s2.as_ptr(), c_args.as_ref(), mode, results.as_mut());
    }

    // Check for error (DPAL_ERROR_SCORE = INT_MIN)
    if (results.score as i64) == i64::from(i32::MIN) {
        let msg = if results.msg.is_null() {
            "alignment failed".to_string()
        } else {
            unsafe { CStr::from_ptr(results.msg) }.to_string_lossy().into_owned()
        };
        return Err(Primer3Error::Library(msg.into_boxed_str()));
    }

    let secondary_structure = if results.sec_struct.is_null() {
        None
    } else {
        let s = unsafe { CStr::from_ptr(results.sec_struct) }.to_string_lossy().into_owned();
        unsafe { libc::free(results.sec_struct.cast::<std::ffi::c_void>()) };
        Some(s)
    };

    Ok(AlignmentResult {
        score: results.score,
        path_length: results.path_length as usize,
        align_end_1: results.align_end_1,
        align_end_2: results.align_end_2,
        secondary_structure,
    })
}