hmmer_rs/
lib.rs

1mod hmm;
2mod hmmsearch;
3mod libhmmer_sys_extras;
4
5use log::*;
6use std::ffi::CStr;
7use std::fmt::Debug;
8
9pub use crate::hmm::*;
10pub use crate::hmmsearch::*;
11
12pub enum Alphabet {
13    Protein,
14    RNA,
15    DNA,
16}
17
18pub struct EaselSequence {
19    // TODO: Implement Drop trait to free this
20    pub c_sq: *mut libhmmer_sys::ESL_SQ,
21}
22
23impl EaselSequence {
24    pub fn new(alphabet: Alphabet) -> Self {
25        let c_alphabet = match alphabet {
26            // *const libhmmer_sys::ESL_ALPHABET
27            Alphabet::Protein => unsafe {
28                libhmmer_sys::esl_alphabet_Create(libhmmer_sys::eslAMINO.try_into().unwrap())
29            },
30            Alphabet::RNA => unsafe {
31                libhmmer_sys::esl_alphabet_Create(libhmmer_sys::eslRNA.try_into().unwrap())
32            },
33            Alphabet::DNA => unsafe {
34                libhmmer_sys::esl_alphabet_Create(libhmmer_sys::eslDNA.try_into().unwrap())
35            },
36        };
37        let c_sq = unsafe { libhmmer_sys::esl_sq_CreateDigital(c_alphabet) };
38        Self { c_sq }
39    }
40
41    /// Replace (or initialise) the sequence data in this object with the given
42    /// sequence. This method is a simplification of the sqascii_ReadSequence()
43    /// C function in easel, in esl_sqio_ascii.c.
44    ///
45    /// The input sequence is assumed to be in the same alphabet as the one used
46    /// to instantiate this struct. It is not NULL terminated.
47    ///
48    /// The seq given here is converted into a newly allocated dsq, so does not
49    /// need to live after this function returns.
50    pub fn replace_sequence(&mut self, seq: &[u8]) -> Result<(), &'static str> {
51        let n = seq.len() as i64;
52
53        unsafe {
54            // free() previous sequence
55            if (*self.c_sq).dsq.is_null() {
56                debug!("Freeing previous dsq pointer");
57                libc::free((*self.c_sq).dsq as *mut libc::c_void);
58            }
59            (*self.c_sq).dsq = libc::malloc(seq.len() + 2) as *mut u8;
60
61            // esl_abc_Digitize(const ESL_ALPHABET *a, const char *seq, ESL_DSQ *dsq)
62            self.digitise_sequence(seq)?;
63
64            (*self.c_sq).n = n;
65
66            // sq->start = 1;
67            (*self.c_sq).start = 1;
68            // sq->end   = sq->n;
69            (*self.c_sq).end = n;
70            // sq->C     = 0;
71            (*self.c_sq).C = 0;
72            // sq->W     = sq->n;
73            (*self.c_sq).W = n;
74            // sq->L     = sq->n;
75            (*self.c_sq).L = n;
76        }
77
78        debug!("Replaced sequence, now have {:#?}", self);
79        Ok(())
80    }
81
82    // Reimplementation of libhmmer_sys::esl_abc_Digitize but don't require a
83    // NULL terminated sequence as input. Assumes self.dsq is already allocated.
84    fn digitise_sequence(&mut self, seq: &[u8]) -> Result<(), &'static str> {
85        // let sstatus = libhmmer_sys::esl_abc_Digitize((*self.c_sq).abc, seq.as_ptr() as *const i8, (*self.c_sq).dsq);
86
87        // int     status;
88        // int64_t i;			/* position in seq */
89        // int64_t j;			/* position in dsq */
90        // ESL_DSQ x;
91
92        // status = eslOK;
93        // dsq[0] = eslDSQ_SENTINEL;
94        // for (i = 0, j = 1; seq[i] != '\0'; i++)
95        //   {
96        //     x = a->inmap[(int) seq[i]];
97        //     if      (esl_abc_XIsValid(a, x)) dsq[j] = x;
98        //     else if (x == eslDSQ_IGNORED) continue;
99        //     else {
100        //   status   = eslEINVAL;
101        //   dsq[j] = esl_abc_XGetUnknown(a);
102        //     }
103        //     j++;
104        //   }
105        // dsq[j] = eslDSQ_SENTINEL;
106        // return status;
107
108        // easel/esl_alphabet.h:#define esl_abc_XIsValid(a, x)       ((x) < (a)->Kp)
109
110        // Set initial sentinal
111        unsafe {
112            *(*self.c_sq).dsq = libhmmer_sys::eslDSQ_SENTINEL as u8;
113        };
114
115        // Set actual sequence
116        #[allow(non_snake_case)]
117        let Kp: u8 = unsafe { (*(*self.c_sq).abc).Kp.try_into().unwrap() };
118        for (i, s) in seq.iter().enumerate() {
119            let x = unsafe { (*(*self.c_sq).abc).inmap[*s as usize] };
120            if x < Kp {
121                // easel/esl_alphabet.h:#define esl_abc_XGetUnknown(a)       ((a)->Kp)
122                unsafe {
123                    *(*self.c_sq).dsq.add(i + 1) = x;
124                };
125            } else if x == libhmmer_sys::eslDSQ_IGNORED as u8 {
126                continue;
127            } else {
128                return Err("Invalid character in sequence");
129            }
130        }
131
132        // Set final sentinal
133        unsafe {
134            *((*self.c_sq).dsq.offset(seq.len() as isize + 1)) =
135                libhmmer_sys::eslDSQ_SENTINEL as u8;
136        };
137
138        Ok(())
139    }
140}
141
142impl Debug for EaselSequence {
143    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
144        // name: *mut c_char
145        // acc: *mut c_char
146        // desc: *mut c_char
147        // tax_id: i32
148        // seq: *mut c_char
149        // dsq: *mut ESL_DSQ
150        // ss: *mut c_char
151        // n: i64
152        // start: i64
153        // end: i64
154        // C: i64
155        // W: i64
156        // L: i64
157        // source: *mut c_char
158        // nalloc: c_int
159        // aalloc: c_int
160        // dalloc: c_int
161        // salloc: i64
162        // srcalloc: c_int
163        // idx: i64
164        // roff: off_t
165        // hoff: off_t
166        // doff: off_t
167        // eoff: off_t
168        // xr_tag: *mut *mut c_char
169        // xr: *mut *mut c_char
170        // nxr: c_int
171        // abc: *const ESL_ALPHABET
172        unsafe {
173            f.debug_struct("EaselSequence")
174                .field("c_sq", &self.c_sq)
175                .field("name", &CStr::from_ptr((*self.c_sq).name).to_string_lossy())
176                .field("acc", &CStr::from_ptr((*self.c_sq).acc).to_string_lossy())
177                .field(
178                    "dsq",
179                    &CStr::from_ptr((*self.c_sq).dsq as *mut i8).to_string_lossy(),
180                )
181                .field("dsq ptr", &(*self.c_sq).dsq)
182                .field(
183                    "dsq length",
184                    &libhmmer_sys::esl_abc_dsqlen((*self.c_sq).dsq),
185                )
186                .field("tax_id", &(*self.c_sq).tax_id)
187                // .field("seq", &CStr::from_ptr((*self.c_sq).seq).to_string_lossy())
188                // .field("ss", &CStr::from_ptr((*self.c_sq).ss).to_string_lossy())
189                .field("n", &(*self.c_sq).n)
190                .field("start", &(*self.c_sq).start)
191                .field("end", &(*self.c_sq).end)
192                .field("C", &(*self.c_sq).C)
193                .field("W", &(*self.c_sq).W)
194                .field("L", &(*self.c_sq).L)
195                .finish()
196        }
197    }
198}
199
200impl Drop for EaselSequence {
201    fn drop(&mut self) {
202        unsafe {
203            libhmmer_sys::esl_sq_Destroy(self.c_sq);
204        }
205    }
206}