hmm_tblout/
lib.rs

1/*!
2The `hmm_tblout` crate is purely for parsing the output of
3certain HMMER3 programs. The output is generated by passing
4the `--tblout` flag to the HMMER3 program.
5
6# Example
7
8```no_run
9use hmm_tblout;
10
11fn main() -> Result<(), Box<dyn std::error::Error>> {
12    // get the command line args, only parse the
13    // first one which should be a fasta file
14    let args: Vec<String> = std::env::args().collect();
15    if args.len() < 2 {
16        println!("Usage: print_coordinates <tblout_file>");
17        std::process::exit(1);
18    }
19
20    let reader = hmm_tblout::Reader::from_path(args[1].clone())?;
21
22    for record in reader.into_records() {
23        let r = record?;
24        let tname = r.target_name();
25        let strand = r.strand().unwrap();
26        let alifrom = r.ali_from().unwrap();
27        let alito = r.ali_to().unwrap();
28
29        println!("{}\t{}\t{}\t{}", tname, strand, alifrom, alito);
30    }
31
32    Ok(())
33}
34```
35*/
36
37mod error;
38mod reader;
39mod record;
40mod writer;
41
42// don't want these in the public API.
43use record::{DNARecord, ProteinRecord};
44
45pub use crate::{
46    error::{Error, ErrorKind, Result},
47    reader::{Reader, RecordsIntoIter, RecordsIter},
48    record::{Meta, Program, Record, Strand},
49    writer::Writer,
50};
51
52#[cfg(test)]
53mod tests {
54    use record::Header;
55
56    use super::*;
57
58    fn b(s: &str) -> &[u8] {
59        s.as_bytes()
60    }
61
62    const NHMMER_FILE: &str = "# target name        accession  query name           accession  hmmfrom hmm to  alifrom   ali to  envfrom   env to   sq len strand   E-value  score  bias  description of target
63#------------------- ---------- -------------------- ---------- ------- -------  -------  -------  -------  -------  ------- ------ --------- ------ ----- ---------------------
64SUPER_1              -          TR                   -                1     315 10988331 10987997 10988331 10987995 52766903    -     6.5e-34  124.1   1.2  -
65SUPER_3              -          TR                   -              139     204 17951382 17951472 17951359 17951492 49366223    +        0.74   15.6   3.0  -
66SUPER_4              -          TR                   -               29     130 20542008 20542119 20541989 20542141 47284787    +         1.1   15.1   5.7  -
67SUPER_4              -          TR                   -               29     126 32993383 32993283 32993398 32993263 47284787    -         2.2   14.1   1.0  -
68SUPER_4              -          TR                   -               23     126 14861076 14861174 14861063 14861195 47284787    +         2.4   13.9   3.4  -
69SUPER_5              -          TR                   -               21     126 27641355 27641256 27641369 27641229 46997258    -         2.5   13.8   6.5  -
70SUPER_8              -          TR                   -              158     191 33162494 33162454 33162514 33162446 33878543    -         3.1   13.6   2.0  -
71SUPER_5              -          TR                   -              107     162 46228956 46228885 46228975 46228876 46997258    -         3.1   13.6   0.0  -
72SUPER_1              -          TR                   -               68     173 15697438 15697327 15697464 15697325 52766903    -         4.4   13.1   0.7  -
73SUPER_3              -          TR                   -               24     128 36147201 36147098 36147221 36147078 49366223    -         4.5   13.0   1.9  -
74SUPER_1              -          TR                   -                1      84 36988055 36987966 36988055 36987944 52766903    -         4.7   13.0   2.1  -
75SUPER_6              -          TR                   -               38     122 20863320 20863225 20863338 20863202 43664244    -           5   12.9   2.2  -
76SUPER_8              -          TR                   -                1      79 32349659 32349576 32349659 32349556 33878543    -         5.9   12.6   2.5  -
77SUPER_2              -          TR                   -               22     128 38320620 38320502 38320631 38320480 52745556    -         6.5   12.5   0.0  -
78SUPER_1              -          TR                   -                2      55  1602497  1602446  1602498  1602420 52766903    -         8.6   12.1   7.2  -
79SUPER_2              -          TR                   -                2     100 26735237 26735139 26735238 26735122 52745556    -           9   12.0   1.4  -
80#
81# Program:         nhmmer
82# Version:         3.4 (Aug 2023)
83# Pipeline mode:   SEARCH
84# Query file:      TR.hmm
85# Target file:     /lustre/scratch124/tol/projects/darwin/data/dicots/Misopates_orontium/assembly/curated/daMisOron1.1/daMisOron1.1.primary.fa.gz
86# Option settings: /software/team301/hmmer-3.4/src/nhmmer --tblout Misopates_orontium.tbl --dna --cpu 10 TR.hmm /lustre/scratch124/tol/projects/darwin/data/dicots/Misopates_orontium/assembly/curated/daMisOron1.1/daMisOron1.1.primary.fa.gz
87# Current dir:     /lustre/scratch123/tol/teams/blaxter/users/mb39/ARU/tr_detection
88# Date:            Fri May  3 10:07:36 2024
89# [ok]";
90
91    const JACKHMMER_FILE: &str = "#                                                                 --- full sequence ---- --- best 1 domain ---- --- domain number estimation ----
92# target name          accession  query name           accession    E-value  score  bias   E-value  score  bias   exp reg clu  ov env dom rep inc description of target
93#  ------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ -----   --- --- --- --- --- --- --- --- ---------------------
94ENSTSYP00000010994     -          CMIL_MAD||KEN1/4-29  -            9.6e-21   66.2   0.7   9.3e-20   63.1   0.3   2.9   3   0   0   3   3   1   1 -
95ENSMICP00000006968     -          CMIL_MAD||KEN1/4-29  -            1.2e-20   65.9   0.7   9.2e-20   63.1   0.3   2.7   3   0   0   3   3   1   1 -
96ENSOCUP00000005644     -          CMIL_MAD||KEN1/4-29  -            1.3e-20   65.8   1.1   9.6e-20   63.0   0.3   2.9   3   0   0   3   3   2   1 -
97ENSCJAP00000038871     -          CMIL_MAD||KEN1/4-29  -            1.5e-20   65.6   1.0   9.4e-20   63.1   0.3   2.7   3   0   0   3   3   2   1 -
98ENSSBOP00000009355     -          CMIL_MAD||KEN1/4-29  -            2.2e-20   65.1   0.5   9.3e-20   63.1   0.3   2.3   2   0   0   2   2   1   1 -
99ENSHGLP00100017467     -          CMIL_MAD||KEN1/4-29  -            2.2e-20   65.1   0.6   9.4e-20   63.1   0.3   2.3   2   0   0   2   2   1   1 -
100ENSRBIP00000038600     -          CMIL_MAD||KEN1/4-29  -            2.2e-20   65.1   0.5   9.3e-20   63.1   0.3   2.3   2   0   0   2   2   1   1 -
101ENSCCAP00000017737     -          CMIL_MAD||KEN1/4-29  -            2.3e-20   65.0   0.5   9.2e-20   63.1   0.3   2.2   2   0   0   2   2   1   1 -
102ENSAMXP00000012570     -          CMIL_MAD||CDII/858-891 -            6.4e-10   32.2   0.0   2.3e-09   30.4   0.0   2.0   1   0   0   1   1   1   1 -
103ENSLOCP00000015810     -          CMIL_MAD||CDII/858-891 -            1.5e-09   31.0   0.0   4.7e-09   29.4   0.0   2.0   1   0   0   1   1   1   1 -
104ENSXETP00000000565     -          CMIL_MAD||CDII/858-891 -            6.6e-09   28.9   2.7   1.3e-08   28.0   0.4   2.6   2   0   0   2   2   2   1 -
105ENSDARP00000101544     -          CMIL_MAD||CDII/858-891 -            1.3e-05   18.4   0.5   4.5e-05   16.7   0.1   2.2   2   0   0   2   2   1   1 -
106#
107# Program:         jackhmmer
108# Version:         3.1b2 (February 2015)
109# Pipeline mode:   SEARCH
110# Query file:      TromerBuBR1_CMI.fasta
111# Target file:     ../../../Sequences/Human_BUB1B_orthologues_2018_12_05.fa
112# Option settings: jackhmmer --tblout EnsemblBuBR1_CMI_jackhmmer.tblout --domtblout EnsemblBuBR1_CMI_jackhmmer.domtblout --qformat fasta --tformat fasta TromerBuBR1_CMI.fasta ../../../Sequences/Human_BUB1B_orthologues_2018_12_05.fa 
113# Current dir:     /media/axelle/Angel_backup/Dropbox/BuBR1/BuBR1_coevolution/Data/Domain_annotation/BuBR1/CMI
114# Date:            Thu Aug  8 09:53:09 2019
115# [ok]";
116
117    const PHMMER_FILE: &str = "#                                                               --- full sequence ---- --- best 1 domain ---- --- domain number estimation ----
118# target name        accession  query name           accession    E-value  score  bias   E-value  score  bias   exp reg clu  ov env dom rep inc description of target
119#------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ -----   --- --- --- --- --- --- --- --- ---------------------
120HBB_HUMAN            -          MYG_ESCGI            -            2.3e-11   30.3   0.1   2.5e-11   30.2   0.1   1.0   1   0   0   1   1   1   1 Human beta hemoglobin.
121HBB_HUMAN            -          MYG_HORSE            -            5.1e-12   32.3   0.1   6.1e-12   32.1   0.1   1.0   1   0   0   1   1   1   1 Human beta hemoglobin.
122HBB_HUMAN            -          MYG_PROGU            -            9.3e-13   34.6   0.0   1.1e-12   34.4   0.0   1.0   1   0   0   1   1   1   1 Human beta hemoglobin.
123HBB_HUMAN            -          MYG_SAISC            -            7.4e-13   35.1   0.0     8e-13   35.0   0.0   1.0   1   0   0   1   1   1   1 Human beta hemoglobin.
124HBB_HUMAN            -          HBB_LARRI            -            1.5e-70  222.2   0.1   1.6e-70  222.0   0.1   1.0   1   0   0   1   1   1   1 Human beta hemoglobin.
125HBB_HUMAN            -          HBB1_VAREX           -              7e-67  210.6   0.2   7.7e-67  210.5   0.2   1.0   1   0   0   1   1   1   1 Human beta hemoglobin.
126HBB_HUMAN            -          HBB2_XENTR           -            1.2e-52  163.9   0.1   1.4e-52  163.7   0.1   1.0   1   0   0   1   1   1   1 Human beta hemoglobin.
127HBB_HUMAN            -          HBBL_RANCA           -            1.5e-57  180.0   0.1   1.7e-57  179.9   0.1   1.0   1   0   0   1   1   1   1 Human beta hemoglobin.
128HBB_HUMAN            -          HBB2_TRICR           -            3.4e-46  143.0   0.0   3.7e-46  142.8   0.0   1.0   1   0   0   1   1   1   1 Human beta hemoglobin.
129#
130# Program:         phmmer
131# Version:         3.2 (June 2018)
132# Pipeline mode:   SEARCH
133# Query file:      /var/lib/cwl/stgc72dd3bc-8891-4b83-83e7-215178692be8/globins45.fa
134# Target file:     /var/lib/cwl/stgf46f4fb0-f7ec-4fbf-b608-74ca6d8a20a0/HBB_HUMAN
135# Option settings: phmmer -o globins45.fa.phmmer_matches.out --tblout globins45.fa.phmmer_matches.tblout /var/lib/cwl/stgc72dd3bc-8891-4b83-83e7-215178692be8/globins45.fa /var/lib/cwl/stgf46f4fb0-f7ec-4fbf-b608-74ca6d8a20a0/HBB_HUMAN 
136# Current dir:     /tmp/cwl
137# Date:            Tue Jul 17 13:01:19 2018
138# [ok]";
139
140    const NHMMSCAN_FILE: &str = "# target name        accession  query name           accession  hmmfrom hmm to alifrom  ali to envfrom  env to  modlen strand   E-value  score  bias  description of target
141#------------------- ---------- -------------------- ---------- ------- ------- ------- ------- ------- ------- ------- ------ --------- ------ ----- ---------------------
142CLASSA_ARL           -          ARL-1                -                1     845       1     845       1     846     846    +    1.5e-275  909.7  88.2  -
143CLASSA_BLAZ          -          ARL-1                -                1     828       1     828       1     842     843    +    6.7e-108  355.9  83.7  -
144CLASSA_PC1           -          ARL-1                -                1     825       1     825       1     840     843    +    3.7e-104  343.6  81.8  -
145CLASSA_TLA           -          ARL-1                -                2     809       2     764       1     785     903    +     1.3e-10   34.5  67.0  -
146CLASSA_CEPA          -          ARL-1                -               25     468      13     441       2     461     900    +     1.6e-10   34.1  32.0  -
147CLASSD_LCR           -          ARL-1                -               14     642      26     714       5     735     783    +     1.3e-09   31.0  58.1  -
148CLASSA_PER           -          ARL-1                -               25     882      13     822       2     842     924    +     1.6e-09   30.7  73.1  -
149CLASSA_CFXA          -          ARL-1                -              322     869     256     770     235     791     963    +     2.8e-09   29.7  48.7  -
150#
151# Program:         hmmscan
152# Version:         3.2 (June 2018)
153# Pipeline mode:   SCAN
154# Query file:      example_gene_seqs.fasta
155# Target file:     nARGhmm/Total_95_families_nucleotide.hmm
156# Option settings: nhmmscan --tblout nhmmscan_output -E 1e-6 nARGhmm/Total_95_families_nucleotide.hmm example_gene_seqs.fasta 
157# Current dir:     /Volumes/DATA/Ph.D.Works/BETALACTAMSE_WORK_12-NOV-18/Beta-lactamase_NEW_WORK/Standalone_Version/blacfampred_standalone
158# Date:            Sat Jul 24 16:56:59 2021
159# [ok]";
160
161    const HMMSCAN_FILE: &str = "#                                                               --- full sequence ---- --- best 1 domain ---- --- domain number estimation ----
162# target name        accession  query name           accession    E-value  score  bias   E-value  score  bias   exp reg clu  ov env dom rep inc description of target
163#------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ -----   --- --- --- --- --- --- --- --- ---------------------
164ABC_membrane_2       PF06472.14 11LoS11_3_18_3       -              2e-74  250.4   5.3     2e-74  250.4   5.3   1.4   2   0   0   2   2   2   1 ABC transporter transmembrane region 2
165SbmA_BacA            PF05992.11 11LoS11_3_18_3       -            3.3e-41  141.7   9.4   4.4e-41  141.2   9.4   1.1   1   0   0   1   1   1   1 SbmA/BacA-like family
166ABC_tran             PF00005.26 11LoS11_3_18_3       -              6e-17   62.3   0.0   1.8e-16   60.8   0.0   1.8   1   1   0   1   1   1   1 ABC transporter
167AAA_29               PF13555.5  11LoS11_3_18_3       -            6.1e-06   25.8   0.5   2.1e-05   24.1   0.1   2.1   2   0   0   2   2   2   1 P-loop containing region of AAA domain
168ABC_membrane_2       PF06472.14 11LoS18_3_1_2        -            4.2e-87  292.0   2.1   5.3e-87  291.7   2.1   1.1   1   0   0   1   1   1   1 ABC transporter transmembrane region 2
169HisKA                PF00512.24 11LoS6_2_10_2        -            6.2e-07   29.3   0.3     2e-06   27.7   0.1   2.0   2   0   0   2   2   2   1 His Kinase A (phospho-acceptor) domain
170Esterase_phd         PF10503.8  11LoS7_1_2_1         -            3.9e-16   59.1   0.8   6.9e-16   58.3   0.7   1.5   1   1   0   1   1   1   1 Esterase PHB depolymerase
171Peptidase_S9         PF00326.20 11LoS7_1_2_1         -            3.5e-09   36.3   0.4   1.3e-07   31.3   0.2   2.4   1   1   1   2   2   2   1 Prolyl oligopeptidase family
172Esterase             PF00756.19 11LoS7_1_2_1         -            7.3e-06   25.7   0.0     1e-05   25.2   0.0   1.3   1   0   0   1   1   1   1 Putative esterase
173Abhydrolase_2        PF02230.15 11LoS7_1_2_1         -            1.8e-05   24.6   0.0   0.00012   21.8   0.0   2.0   1   1   0   1   1   1   1 Phospholipase/Carboxylesterase
174Abhydrolase_6        PF12697.6  11LoS7_1_2_1         -            2.1e-05   25.2   2.1     4e-05   24.2   2.1   1.5   1   0   0   1   1   1   1 Alpha/beta hydrolase family
175Ribonuc_L-PSP        PF01042.20 11LoS7_1_2_2         -            1.9e-23   82.6   0.6   2.3e-23   82.4   0.6   1.1   1   0   0   1   1   1   1 Endoribonuclease L-PSP
176ABC_membrane_2       PF06472.14 13LoS28_1_10_2       -            4.2e-87  292.0   2.1   5.3e-87  291.7   2.1   1.1   1   0   0   1   1   1   1 ABC transporter transmembrane region 2
177AAA_29               PF13555.5  CW1_7_2              -            3.2e-05   23.5   0.0   0.00011   21.8   0.0   1.9   1   0   0   1   1   1   1 P-loop containing region of AAA domain
178#
179# Program:         hmmscan
180# Version:         3.1b2 (February 2015)
181# Pipeline mode:   SCAN
182# Query file:      orf.out.txt
183# Target file:     /srv/projects/db/pfam/2017-06-11-Pfam31.0/Pfam-A.hmm
184# Option settings: hmmscan --tblout hmmscan.tblout.txt -E 0.0001 --cpu 4 /srv/projects/db/pfam/2017-06-11-Pfam31.0/Pfam-A.hmm orf.out.txt 
185# Current dir:     /rhome/arahm010/project220/src
186# Date:            Fri Dec 14 01:07:39 2018
187# [ok]";
188
189    const HMMSEARCH_FILE: &str = "#                                                               --- full sequence ---- --- best 1 domain ---- --- domain number estimation ----
190# target name        accession  query name           accession    E-value  score  bias   E-value  score  bias   exp reg clu  ov env dom rep inc description of target
191#------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ -----   --- --- --- --- --- --- --- --- ---------------------
192sp|P29082|SOR_ACIAM  -          SOR                  PF07682.13  1.5e-152  492.8   0.8  1.7e-152  492.6   0.8   1.0   1   0   0   1   1   1   1 Sulfur oxygenase/reductase OS=Acidianus ambivalens OX=2283 GN=sor PE=1 SV=3
193#
194# Program:         hmmsearch
195# Version:         3.2.1 (June 2018)
196# Pipeline mode:   SEARCH
197# Query file:      SOR.hmm
198# Target file:     sor.faa
199# Option settings: hmmsearch --tblout sor.sor.tblout SOR.hmm sor.faa 
200# Current dir:     /Users/arkadiygarber/MagicLamp/hmms/litho
201# Date:            Mon May 24 13:53:50 2021
202# [ok]";
203
204    const CMSCAN_FILE: &str = "#target name         accession query name           accession mdl mdl from   mdl to seq from   seq to strand trunc pass   gc  bias  score   E-value inc description of target
205#------------------- --------- -------------------- --------- --- -------- -------- -------- -------- ------ ----- ---- ---- ----- ------ --------- --- ---------------------
206Intron_gpII          RF00029   u14                  -          cm        1       77     1236     1367      +    no    1 0.52   0.0   50.3   2.1e-11 !   Group II catalytic intron
207Intron_gpII          RF00029   u15                  -          cm        1       77    16059    16120      +    no    1 0.66   0.0   63.3   1.8e-13 !   Group II catalytic intron
208Intron_gpII          RF00029   u16                  -          cm        1       77    52587    52722      +    no    1 0.44   0.0   12.8       0.9 ?   Group II catalytic intron
209#
210# Program:         cmscan
211# Version:         1.1.4 (Dec 2020)
212# Pipeline mode:   SCAN
213# Query file:      ../data/Acaena_ovalifolia.fasta
214# Target file:     ../rfam_introns/group_II_intron.cm
215# Option settings: /software/team301/infernal-1.1.4-linux-intel-gcc/binaries/cmscan --tblout test.tbl ../rfam_introns/group_II_intron.cm ../data/Acaena_ovalifolia.fasta 
216# Current dir:     /lustre/scratch123/tol/teams/blaxter/users/mb39/ARU/mito_structural_variation/annotation/src
217# Date:            Tue Feb 11 14:09:12 2025
218# [ok]";
219
220    #[test]
221    fn test_whole_file() {
222        let reader = Reader::from_reader(b(NHMMER_FILE));
223
224        let first = reader.unwrap().records().next().unwrap().unwrap();
225
226        assert_eq!(first.target_name(), "SUPER_1".to_string())
227    }
228
229    #[test]
230    fn test_meta() {
231        let reader = Reader::from_reader(b(NHMMER_FILE));
232        let r = reader.unwrap();
233        let meta = r.meta();
234        assert_eq!(meta.program(), Program::Nhmmer);
235        assert_eq!(meta.version(), "3.4 (Aug 2023)".to_string());
236        assert_eq!(meta.date(), "Fri May  3 10:07:36 2024".to_string());
237    }
238
239    #[test]
240    fn test_jackhmmer_meta() {
241        let reader = Reader::from_reader(b(JACKHMMER_FILE));
242        let r = reader.unwrap();
243        let meta = r.meta();
244        assert_eq!(meta.program(), Program::Jackhmmer);
245        assert_eq!(meta.version(), "3.1b2 (February 2015)".to_string());
246    }
247
248    #[test]
249    fn test_jackhmmer_records() {
250        let reader = Reader::from_reader(b(JACKHMMER_FILE));
251        let mut r = reader.unwrap();
252        let mut records = r.records();
253
254        // first record
255        let first = records.next().unwrap().unwrap();
256        // and the 10th
257        let tenth = records.nth(9).unwrap().unwrap();
258
259        assert_eq!(first.target_name(), "ENSTSYP00000010994".to_string());
260        assert_eq!(tenth.target_name(), "ENSXETP00000000565".to_string());
261    }
262
263    #[test]
264    fn test_phmmer_records() {
265        let reader = Reader::from_reader(b(PHMMER_FILE));
266        let mut r = reader.unwrap();
267        let mut records = r.records();
268
269        // first record
270        let first = records.next().unwrap().unwrap();
271        // and the 10th
272        let third = records.nth(1).unwrap().unwrap();
273
274        assert_eq!(first.target_name(), "HBB_HUMAN".to_string());
275        assert_eq!(third.target_name(), "HBB_HUMAN".to_string());
276
277        // and another field
278        assert_eq!(first.e_value_full().unwrap(), 2.3e-11);
279        assert_eq!(third.e_value_full().unwrap(), 9.3e-13);
280    }
281
282    // test the writing module now
283
284    #[test]
285    fn test_phmmer_write() {
286        let reader = Reader::from_reader(b(PHMMER_FILE));
287        let mut r = reader.unwrap();
288        let records = r.records();
289        let output = Vec::new();
290        let mut writer = writer::Writer::new(output);
291        for record in records {
292            let rec = record.unwrap();
293            writer.write_record(&rec).unwrap();
294            break;
295        }
296
297        // read first line of output
298        let out = String::from_utf8(writer.into_inner().unwrap()).unwrap();
299
300        assert_eq!(
301            out,
302            "HBB_HUMAN       -          MYG_ESCGI       -            2.30e-11  30.30  0.10   2.50e-11  30.20  0.10  1.00   1   0   0   1   1   1   1 Human beta hemoglobin.\n"
303        );
304    }
305
306    // NOTE: that this test fails, we produce very slightly different output.
307    // it's as far as I can tell, on the floating point precision.
308    #[test]
309    fn in_same_as_out() {
310        let reader = Reader::from_reader(b(PHMMER_FILE));
311        let mut r = reader.unwrap();
312        let header = r.header().clone();
313        let meta = r.meta().clone();
314        let records = r.records();
315        let output = Vec::new();
316        let mut writer = writer::Writer::new(output);
317
318        // wrap output in header
319        writer.write_header(header).unwrap();
320        for record in records {
321            let rec = record.unwrap();
322            writer.write_record(&rec).unwrap();
323        }
324        // and the meta
325        writer.write_meta(meta).unwrap();
326
327        assert_eq!(
328            String::from_utf8(writer.into_inner().unwrap()).unwrap(),
329            PHMMER_FILE
330        );
331    }
332
333    // NOTE: this also fails, again as we produce very slightly different output. Should be fine though.
334    #[test]
335    fn in_same_as_out_n() {
336        let reader = Reader::from_reader(b(NHMMER_FILE));
337        let mut r = reader.unwrap();
338        let header = r.header().clone();
339        let meta = r.meta().clone();
340        let records = r.records();
341        let output = Vec::new();
342        let mut writer = writer::Writer::new(output);
343
344        // wrap output in header
345        writer.write_header(header.clone()).unwrap();
346        for record in records {
347            let rec = record.unwrap();
348            writer.write_record(&rec).unwrap();
349        }
350        // and the meta
351        writer.write_meta(meta).unwrap();
352
353        assert_eq!(
354            String::from_utf8(writer.into_inner().unwrap()).unwrap(),
355            NHMMER_FILE
356        );
357    }
358
359    // test header
360    #[test]
361    fn test_header() {
362        let reader = Reader::from_reader(b(PHMMER_FILE));
363        let r = reader.unwrap();
364        let header = r.header().clone();
365
366        let header_true = vec![
367                "#                                                               --- full sequence ---- --- best 1 domain ---- --- domain number estimation ----\n",
368                "# target name        accession  query name           accession    E-value  score  bias   E-value  score  bias   exp reg clu  ov env dom rep inc description of target\n",
369                "#------------------- ---------- -------------------- ---------- --------- ------ ----- --------- ------ -----   --- --- --- --- --- --- --- --- ---------------------\n",
370            ];
371        let header_true = Header::new(
372            Some(header_true[0].into()),
373            header_true[1].into(),
374            header_true[2].into(),
375        );
376
377        assert_eq!(header.get_protein_only(), header_true.get_protein_only());
378        assert_eq!(header.get_columns(), header_true.get_columns());
379        assert_eq!(header.get_dashes(), header_true.get_dashes());
380    }
381
382    #[test]
383    fn test_cmscan() {
384        let reader = Reader::from_reader(b(CMSCAN_FILE));
385        let mut r = reader.unwrap();
386        let mut records = r.records();
387
388        // first record
389        let first = records.next().unwrap().unwrap();
390
391        assert_eq!(first.target_name(), "Intron_gpII".to_string());
392        assert_eq!(first.target_accession(), "RF00029".to_string());
393        assert_eq!(first.query_name(), "u14".to_string());
394        assert_eq!(first.query_accession(), "-".to_string());
395        assert_eq!(first.mdl_from().unwrap(), 1);
396        assert_eq!(first.mdl_to().unwrap(), 77);
397    }
398}