Skip to main content

gapsmith_align/
blast.rs

1//! BLAST+ aligners: `blastp` (protein-protein) and `tblastn` (protein query vs.
2//! nucleotide database, translated in all 6 frames).
3//!
4//! Shell-out target: `makeblastdb` + `blastp` / `tblastn`. Command template
5//! mirrors `src/gapseq_find.sh:585–589`:
6//!
7//! ```text
8//! makeblastdb -in TARGET -dbtype prot -out orgdb
9//! blastp -db orgdb -query QUERY -qcov_hsp_perc 75 -num_threads N
10//!        -outfmt "6 qseqid pident evalue bitscore qcovs stitle sstart send"
11//! ```
12//!
13//! `tblastn` differs only in `-dbtype nucl` and the search executable.
14
15use crate::error::{io_err, AlignError};
16use crate::hit::Hit;
17use crate::tsv::parse_tsv;
18use crate::{AlignOpts, Aligner};
19use std::io::Cursor;
20use std::path::Path;
21use std::process::Command;
22
23const COLUMNS: &str = "qseqid pident evalue bitscore qcovs stitle sstart send";
24
25pub struct BlastpAligner;
26pub struct TblastnAligner;
27
28impl BlastpAligner {
29    pub fn new() -> Self {
30        Self
31    }
32}
33impl Default for BlastpAligner {
34    fn default() -> Self {
35        Self::new()
36    }
37}
38impl TblastnAligner {
39    pub fn new() -> Self {
40        Self
41    }
42}
43impl Default for TblastnAligner {
44    fn default() -> Self {
45        Self::new()
46    }
47}
48
49fn run_blast(
50    search_tool: &'static str,
51    dbtype: &'static str,
52    query_fasta: &Path,
53    target_fasta: &Path,
54    opts: &AlignOpts,
55) -> Result<Vec<Hit>, AlignError> {
56    require_tool("makeblastdb")?;
57    require_tool(search_tool)?;
58
59    let tmp = tempfile::tempdir().map_err(|e| io_err(Path::new("/tmp"), e))?;
60    let db_prefix = tmp.path().join("orgdb");
61
62    // makeblastdb -in TARGET -dbtype {prot,nucl} -out db_prefix
63    let out = Command::new("makeblastdb")
64        .arg("-in")
65        .arg(target_fasta)
66        .arg("-dbtype")
67        .arg(dbtype)
68        .arg("-out")
69        .arg(&db_prefix)
70        .output()
71        .map_err(|e| io_err(Path::new("makeblastdb"), e))?;
72    if !out.status.success() {
73        return Err(AlignError::ToolFailed {
74            tool: "makeblastdb",
75            status: out.status,
76            stderr: String::from_utf8_lossy(&out.stderr).to_string(),
77        });
78    }
79
80    // blastp / tblastn
81    let mut cmd = Command::new(search_tool);
82    cmd.arg("-db")
83        .arg(&db_prefix)
84        .arg("-query")
85        .arg(query_fasta)
86        .arg("-qcov_hsp_perc")
87        .arg(opts.coverage_pct.to_string())
88        .arg("-num_threads")
89        .arg(opts.threads.to_string())
90        .arg("-outfmt")
91        .arg(format!("6 {COLUMNS}"));
92    if let Some(e) = opts.evalue {
93        cmd.arg("-evalue").arg(e.to_string());
94    }
95    for a in &opts.extra_args {
96        cmd.arg(a);
97    }
98
99    tracing::debug!(?cmd, "running blast");
100    let out = cmd
101        .output()
102        .map_err(|e| io_err(Path::new(search_tool), e))?;
103    if !out.status.success() {
104        return Err(AlignError::ToolFailed {
105            tool: search_tool,
106            status: out.status,
107            stderr: String::from_utf8_lossy(&out.stderr).to_string(),
108        });
109    }
110    if !opts.quiet && !out.stderr.is_empty() {
111        eprintln!("{}", String::from_utf8_lossy(&out.stderr));
112    }
113    parse_tsv(Cursor::new(out.stdout), false)
114}
115
116impl Aligner for BlastpAligner {
117    fn name(&self) -> &'static str {
118        "blastp"
119    }
120    fn align(
121        &self,
122        query_fasta: &Path,
123        target_fasta: &Path,
124        opts: &AlignOpts,
125    ) -> Result<Vec<Hit>, AlignError> {
126        run_blast("blastp", "prot", query_fasta, target_fasta, opts)
127    }
128}
129
130impl Aligner for TblastnAligner {
131    fn name(&self) -> &'static str {
132        "tblastn"
133    }
134    fn align(
135        &self,
136        query_fasta: &Path,
137        target_fasta: &Path,
138        opts: &AlignOpts,
139    ) -> Result<Vec<Hit>, AlignError> {
140        run_blast("tblastn", "nucl", query_fasta, target_fasta, opts)
141    }
142}
143
144fn require_tool(tool: &'static str) -> Result<(), AlignError> {
145    if which(tool).is_some() {
146        Ok(())
147    } else {
148        Err(AlignError::ToolMissing { tool })
149    }
150}
151
152pub(crate) fn which(name: &str) -> Option<std::path::PathBuf> {
153    let path = std::env::var_os("PATH")?;
154    for dir in std::env::split_paths(&path) {
155        let candidate = dir.join(name);
156        if is_executable(&candidate) {
157            return Some(candidate);
158        }
159    }
160    None
161}
162
163#[cfg(unix)]
164pub(crate) fn is_executable(p: &std::path::Path) -> bool {
165    use std::os::unix::fs::PermissionsExt;
166    p.metadata()
167        .map(|m| m.is_file() && m.permissions().mode() & 0o111 != 0)
168        .unwrap_or(false)
169}
170#[cfg(not(unix))]
171pub(crate) fn is_executable(p: &std::path::Path) -> bool {
172    p.is_file()
173}