1use std::fs::File;
2use std::io::prelude::*;
3use std::io::BufReader;
4use std::io::Write;
5use std::path::Path;
6use std::path::PathBuf;
7use std::process::Command;
8
9use ahash::HashMap;
10use ahash::HashMapExt;
11use anyhow::Result;
12use derive_builder::Builder;
13use pyo3::prelude::*;
14use serde::{Deserialize, Serialize};
15use tempfile::tempdir;
16
17use pyo3_stub_gen::derive::*;
18
19pub const MIN_SEQ_SIZE: usize = 20;
20#[gen_stub_pyclass]
28#[pyclass(module = "deepbiop.utils")]
29#[derive(Debug, Default, Builder, Clone, Serialize, Deserialize)]
30pub struct PslAlignment {
31 #[pyo3(get, set)]
32 pub qname: String,
33 #[pyo3(get, set)]
34 pub qsize: usize,
35 #[pyo3(get, set)]
36 pub qstart: usize,
37 #[pyo3(get, set)]
38 pub qend: usize,
39 #[pyo3(get, set)]
40 pub qmatch: usize,
41 #[pyo3(get, set)]
42 pub tname: String,
43 #[pyo3(get, set)]
44 pub tsize: usize,
45 #[pyo3(get, set)]
46 pub tstart: usize,
47 #[pyo3(get, set)]
48 pub tend: usize,
49 #[pyo3(get, set)]
50 pub identity: f32,
51}
52
53pub fn parse_psl_by_qname<P: AsRef<Path>>(file: P) -> Result<HashMap<String, Vec<PslAlignment>>> {
54 let result = parse_psl(file)?;
55 Ok(result.into_iter().fold(HashMap::new(), |mut acc, al| {
56 let qname = al.qname.clone();
57 acc.entry(qname).or_default().push(al);
58 acc
59 }))
60}
61
62pub fn parse_psl<P: AsRef<Path>>(file: P) -> Result<Vec<PslAlignment>> {
63 let file = File::open(file)?;
64 let mut reader = BufReader::new(file);
65 let mut line = String::new();
66
67 for _ in 0..5 {
69 reader.read_line(&mut line)?;
70 line.clear();
71 }
72 let mut alignments = Vec::new();
73
74 while reader.read_line(&mut line)? > 0 {
76 let fields: Vec<&str> = line.split_whitespace().collect();
77
78 let match_: usize = lexical::parse(fields[0])?;
79 let qname = fields[9];
80 let qsize: usize = lexical::parse(fields[10])?;
81 let qstart: usize = lexical::parse(fields[11])?;
82 let qend: usize = lexical::parse(fields[12])?;
83
84 let tname = fields[13];
85 let tsize: usize = lexical::parse(fields[14])?;
86 let tstart: usize = lexical::parse(fields[15])?;
87 let tend: usize = lexical::parse(fields[16])?;
88
89 let identity = match_ as f32 / qsize as f32;
90
91 let al = PslAlignmentBuilder::default()
92 .qname(qname.to_string())
93 .qsize(qsize)
94 .qstart(qstart)
95 .qend(qend)
96 .qmatch(match_)
97 .tname(tname.to_string())
98 .tsize(tsize)
99 .tstart(tstart)
100 .tend(tend)
101 .identity(identity)
102 .build()?;
103 alignments.push(al);
104 line.clear();
105 }
106 Ok(alignments)
107}
108
109pub fn blat_for_seq<P: AsRef<Path> + AsRef<std::ffi::OsStr>>(
110 path: P,
111 blat_cli: P,
112 two_bit: P,
113 output: P,
114) -> Result<Vec<PslAlignment>> {
115 let _output = Command::new(blat_cli)
116 .arg("-stepSize=5")
117 .arg("-repMatch=2253")
118 .arg("-minScore=20")
119 .arg("-minIdentity=0")
120 .arg(two_bit)
121 .arg(path)
122 .arg(&output)
123 .output()?;
124
125 if !_output.status.success() {
126 return Err(anyhow::anyhow!(
127 "blat failed: {}",
128 String::from_utf8_lossy(&_output.stderr)
129 ));
130 }
131 parse_psl(output)
132}
133
134pub fn blat<P: AsRef<Path> + AsRef<std::ffi::OsStr>>(
136 seq: &str,
137 blat_cli: P,
138 two_bit: P,
139 output: Option<&str>,
140) -> Result<Vec<PslAlignment>> {
141 log::debug!("blat_cli: {}", seq);
142
143 let dir = tempdir()?;
145 let file1 = dir.path().join("seq.fa");
146 let mut tmp_file1 = File::create(file1.clone())?;
147 writeln!(tmp_file1, ">seq\n")?;
148 writeln!(tmp_file1, "{}", seq)?;
149
150 let output_file = if let Some(value) = output {
152 PathBuf::from(value)
153 } else {
154 dir.path().join("output.psl")
155 };
156
157 let _output = Command::new(blat_cli)
158 .arg("-stepSize=5")
159 .arg("-repMatch=2253")
160 .arg("-minScore=20")
161 .arg("-minIdentity=0")
162 .arg(two_bit)
163 .arg(file1)
164 .arg(output_file.clone())
165 .output()?;
166
167 if !_output.status.success() || !output_file.exists() {
168 return Err(anyhow::anyhow!(
169 "blat failed: {}",
170 String::from_utf8_lossy(&_output.stderr)
171 ));
172 }
173
174 parse_psl(output_file)
175}
176
177#[cfg(test)]
178mod tests {
179 #[test]
182 fn test_parse_psl() {
183 let _file = "tests/data/sample.psl";
184 }
185}