hmmer_rs/lib.rs
1mod hmm;
2mod hmmsearch;
3mod libhmmer_sys_extras;
4
5use log::*;
6use std::ffi::CStr;
7use std::fmt::Debug;
8
9pub use crate::hmm::*;
10pub use crate::hmmsearch::*;
11
12pub enum Alphabet {
13 Protein,
14 RNA,
15 DNA,
16}
17
18pub struct EaselSequence {
19 // TODO: Implement Drop trait to free this
20 pub c_sq: *mut libhmmer_sys::ESL_SQ,
21}
22
23impl EaselSequence {
24 pub fn new(alphabet: Alphabet) -> Self {
25 let c_alphabet = match alphabet {
26 // *const libhmmer_sys::ESL_ALPHABET
27 Alphabet::Protein => unsafe {
28 libhmmer_sys::esl_alphabet_Create(libhmmer_sys::eslAMINO.try_into().unwrap())
29 },
30 Alphabet::RNA => unsafe {
31 libhmmer_sys::esl_alphabet_Create(libhmmer_sys::eslRNA.try_into().unwrap())
32 },
33 Alphabet::DNA => unsafe {
34 libhmmer_sys::esl_alphabet_Create(libhmmer_sys::eslDNA.try_into().unwrap())
35 },
36 };
37 let c_sq = unsafe { libhmmer_sys::esl_sq_CreateDigital(c_alphabet) };
38 Self { c_sq }
39 }
40
41 /// Replace (or initialise) the sequence data in this object with the given
42 /// sequence. This method is a simplification of the sqascii_ReadSequence()
43 /// C function in easel, in esl_sqio_ascii.c.
44 ///
45 /// The input sequence is assumed to be in the same alphabet as the one used
46 /// to instantiate this struct. It is not NULL terminated.
47 ///
48 /// The seq given here is converted into a newly allocated dsq, so does not
49 /// need to live after this function returns.
50 pub fn replace_sequence(&mut self, seq: &[u8]) -> Result<(), &'static str> {
51 let n = seq.len() as i64;
52
53 unsafe {
54 // free() previous sequence
55 if (*self.c_sq).dsq.is_null() {
56 debug!("Freeing previous dsq pointer");
57 libc::free((*self.c_sq).dsq as *mut libc::c_void);
58 }
59 (*self.c_sq).dsq = libc::malloc(seq.len() + 2) as *mut u8;
60
61 // esl_abc_Digitize(const ESL_ALPHABET *a, const char *seq, ESL_DSQ *dsq)
62 self.digitise_sequence(seq)?;
63
64 (*self.c_sq).n = n;
65
66 // sq->start = 1;
67 (*self.c_sq).start = 1;
68 // sq->end = sq->n;
69 (*self.c_sq).end = n;
70 // sq->C = 0;
71 (*self.c_sq).C = 0;
72 // sq->W = sq->n;
73 (*self.c_sq).W = n;
74 // sq->L = sq->n;
75 (*self.c_sq).L = n;
76 }
77
78 debug!("Replaced sequence, now have {:#?}", self);
79 Ok(())
80 }
81
82 // Reimplementation of libhmmer_sys::esl_abc_Digitize but don't require a
83 // NULL terminated sequence as input. Assumes self.dsq is already allocated.
84 fn digitise_sequence(&mut self, seq: &[u8]) -> Result<(), &'static str> {
85 // let sstatus = libhmmer_sys::esl_abc_Digitize((*self.c_sq).abc, seq.as_ptr() as *const i8, (*self.c_sq).dsq);
86
87 // int status;
88 // int64_t i; /* position in seq */
89 // int64_t j; /* position in dsq */
90 // ESL_DSQ x;
91
92 // status = eslOK;
93 // dsq[0] = eslDSQ_SENTINEL;
94 // for (i = 0, j = 1; seq[i] != '\0'; i++)
95 // {
96 // x = a->inmap[(int) seq[i]];
97 // if (esl_abc_XIsValid(a, x)) dsq[j] = x;
98 // else if (x == eslDSQ_IGNORED) continue;
99 // else {
100 // status = eslEINVAL;
101 // dsq[j] = esl_abc_XGetUnknown(a);
102 // }
103 // j++;
104 // }
105 // dsq[j] = eslDSQ_SENTINEL;
106 // return status;
107
108 // easel/esl_alphabet.h:#define esl_abc_XIsValid(a, x) ((x) < (a)->Kp)
109
110 // Set initial sentinal
111 unsafe {
112 *(*self.c_sq).dsq = libhmmer_sys::eslDSQ_SENTINEL as u8;
113 };
114
115 // Set actual sequence
116 #[allow(non_snake_case)]
117 let Kp: u8 = unsafe { (*(*self.c_sq).abc).Kp.try_into().unwrap() };
118 for (i, s) in seq.iter().enumerate() {
119 let x = unsafe { (*(*self.c_sq).abc).inmap[*s as usize] };
120 if x < Kp {
121 // easel/esl_alphabet.h:#define esl_abc_XGetUnknown(a) ((a)->Kp)
122 unsafe {
123 *(*self.c_sq).dsq.add(i + 1) = x;
124 };
125 } else if x == libhmmer_sys::eslDSQ_IGNORED as u8 {
126 continue;
127 } else {
128 return Err("Invalid character in sequence");
129 }
130 }
131
132 // Set final sentinal
133 unsafe {
134 *((*self.c_sq).dsq.offset(seq.len() as isize + 1)) =
135 libhmmer_sys::eslDSQ_SENTINEL as u8;
136 };
137
138 Ok(())
139 }
140}
141
142impl Debug for EaselSequence {
143 fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144 // name: *mut c_char
145 // acc: *mut c_char
146 // desc: *mut c_char
147 // tax_id: i32
148 // seq: *mut c_char
149 // dsq: *mut ESL_DSQ
150 // ss: *mut c_char
151 // n: i64
152 // start: i64
153 // end: i64
154 // C: i64
155 // W: i64
156 // L: i64
157 // source: *mut c_char
158 // nalloc: c_int
159 // aalloc: c_int
160 // dalloc: c_int
161 // salloc: i64
162 // srcalloc: c_int
163 // idx: i64
164 // roff: off_t
165 // hoff: off_t
166 // doff: off_t
167 // eoff: off_t
168 // xr_tag: *mut *mut c_char
169 // xr: *mut *mut c_char
170 // nxr: c_int
171 // abc: *const ESL_ALPHABET
172 unsafe {
173 f.debug_struct("EaselSequence")
174 .field("c_sq", &self.c_sq)
175 .field("name", &CStr::from_ptr((*self.c_sq).name).to_string_lossy())
176 .field("acc", &CStr::from_ptr((*self.c_sq).acc).to_string_lossy())
177 .field(
178 "dsq",
179 &CStr::from_ptr((*self.c_sq).dsq as *mut i8).to_string_lossy(),
180 )
181 .field("dsq ptr", &(*self.c_sq).dsq)
182 .field(
183 "dsq length",
184 &libhmmer_sys::esl_abc_dsqlen((*self.c_sq).dsq),
185 )
186 .field("tax_id", &(*self.c_sq).tax_id)
187 // .field("seq", &CStr::from_ptr((*self.c_sq).seq).to_string_lossy())
188 // .field("ss", &CStr::from_ptr((*self.c_sq).ss).to_string_lossy())
189 .field("n", &(*self.c_sq).n)
190 .field("start", &(*self.c_sq).start)
191 .field("end", &(*self.c_sq).end)
192 .field("C", &(*self.c_sq).C)
193 .field("W", &(*self.c_sq).W)
194 .field("L", &(*self.c_sq).L)
195 .finish()
196 }
197 }
198}
199
200impl Drop for EaselSequence {
201 fn drop(&mut self) {
202 unsafe {
203 libhmmer_sys::esl_sq_Destroy(self.c_sq);
204 }
205 }
206}