Skip to main content

ncbitaxonomy/
lib.rs

1#![recursion_limit = "1024"]
2#[macro_use]
3extern crate diesel;
4#[macro_use]
5extern crate diesel_migrations;
6extern crate dotenv;
7extern crate thiserror;
8extern crate indextree;
9extern crate core;
10extern crate seq_io;
11extern crate clap;
12
13/// ncbitaxonomy: a module for working with a local copy of the NCBI taxonomy database
14
15use thiserror::Error;
16use std::io;
17
18#[derive(Error, Debug)]
19pub enum NcbiTaxonomyError {
20    #[error(transparent)]
21    Io(#[from] io::Error),
22    #[error("format error in nodes.dmp in line {0}")]
23    NodeFileFormatError(String),
24    #[error(transparent)]
25    ParseIntError(#[from] ::std::num::ParseIntError)
26}
27
28#[derive(Error, Debug)]
29pub enum ToSqliteError {
30    #[error(transparent)]
31    Diesel(#[from] diesel::result::Error),
32    #[error(transparent)]
33    MigrationError(#[from] diesel_migrations::RunMigrationsError),
34    #[error("Error looking up id {0}")]
35    IdLookupError(String)
36}
37
38use std::collections::{HashMap, HashSet};
39use std::fs::File;
40use std::io::{BufReader,BufRead};
41use indextree::{Arena, NodeId, Traverse};
42pub use indextree::NodeEdge;
43use std::iter::FromIterator;
44
45pub mod models;
46pub mod schema;
47
48use diesel::prelude::*;
49use diesel::sqlite::SqliteConnection;
50use dotenv::dotenv;
51use std::env;
52
53use self::models::*;
54use diesel::expression::dsl::count;
55
56pub const VERSION: &str = env!("CARGO_PKG_VERSION");
57
58fn establish_connection(db_url: Option<&str>) -> SqliteConnection {
59    dotenv().ok();
60
61    let database_url = match db_url {
62        Some(val) => val.to_owned(),
63        None => env::var("DATABASE_URL").expect("DATABASE_URL must be set")
64    };
65
66    SqliteConnection::establish(&database_url)
67        .unwrap_or_else(|_| panic!("Error connecting to {}", database_url))
68}
69
70fn get_canonical_ranks() -> HashSet<String> {
71    // canonical ranks (+ superkingdom) as they appear in the NCBI taxonomy database
72    HashSet::from_iter(vec!["superkingdom", "kingdom", "phylum", "class", "order", "family", "genus", "species"].iter().map(|x| (*x).to_string()))
73}
74
75pub trait NcbiTaxonomy {
76    fn contains_id(&self, taxid: i32) -> bool;
77    fn contains_name(&self, name: &str) -> bool;
78    fn is_descendant(&self, name: &str, ancestor_name: &str) -> bool;
79    fn is_descendant_taxid(&self, taxid: i32, ancestor_taxid: i32) -> bool;
80    fn get_name_by_id(&self, taxid: i32) -> Option<String>;
81    fn get_id_by_name(&self, name: &str) -> Option<i32>;
82    fn get_lineage(&self, name: &str) -> Option<Vec<i32>>;
83    fn get_distance_to_common_ancestor_taxid(&self, taxid1: i32, taxid2: i32, only_canonical: bool) -> Option<(i32, i32)>;
84    fn get_distance_to_common_ancestor(&self, name1: &str, name2: &str, only_canonical: bool) -> Option<(i32, String)>;
85}
86
87#[derive(Debug)]
88pub struct NcbiFileTaxonomy {
89    arena: Arena<i32>,
90    name_to_node: HashMap<String, NodeId>,
91    id_to_node: HashMap<i32, NodeId>,
92    id_to_name: HashMap<i32, String>,
93    id_to_rank: HashMap<i32, String>
94}
95
96impl NcbiFileTaxonomy {
97
98    /// from_ncbi_files
99    ///
100    /// Reads the `nodes.dmp` file and `names.dmp` file from the NCBI Taxonomy database to
101    /// generate a NcbiTaxonomy structure
102    ///
103    /// # Examples
104    ///
105    /// ```
106    /// use ncbitaxonomy::*;
107    ///
108    /// let taxonomy = NcbiFileTaxonomy::from_ncbi_files("data/nodes.dmp", "data/names.dmp");
109    /// ```
110    pub fn from_ncbi_files(nodes_filename: &str, names_filename: &str) -> Result<NcbiFileTaxonomy, NcbiTaxonomyError> {
111        let mut child_ids_by_parent_id: HashMap<i32, Vec<i32>> = HashMap::new();
112        let mut id_to_rank = HashMap::new();
113        let nodes_file = File::open(nodes_filename)?;
114        for line_maybe in BufReader::new(nodes_file).lines() {
115            let line = line_maybe?;
116            let mut fields = line.split("\t|\t");
117            let id_str = fields.next().ok_or_else(|| NcbiTaxonomyError::NodeFileFormatError(line.clone()))?;
118            let parent_id_str = fields.next().ok_or_else(|| NcbiTaxonomyError::NodeFileFormatError(line.clone()))?;
119            let rank = fields.next().ok_or_else(|| NcbiTaxonomyError::NodeFileFormatError(line.clone()))?.to_string();
120            let id = id_str.parse::<i32>()?;
121            let parent_id = parent_id_str.parse::<i32>()?;
122            id_to_rank.insert(id, rank);
123            if parent_id != id {  // this happens for the root node
124                // thanks to https://stackoverflow.com/questions/33243784/append-to-vector-as-value-of-hashmap/33243862
125                // for this way to get the existing entry or insert an empty list.
126                child_ids_by_parent_id.entry(parent_id).or_insert_with(Vec::new).push(id);
127            }
128        }
129
130        let mut keys = child_ids_by_parent_id.keys().collect::<Vec<&i32>>();
131        keys.sort_unstable_by(|a, b| a.cmp(b));
132
133        let mut arena: Arena<i32> = Arena::new();
134
135        let mut id_to_node: HashMap<i32, NodeId> = HashMap::new();
136        for id in keys {
137            let node_id = match id_to_node.get(id) {
138                Some(node_id) => *node_id,
139                None => arena.new_node(*id),
140            };
141            id_to_node.insert(*id, node_id);
142            for child in child_ids_by_parent_id.get(&id).expect("ID not found in child_ids_by_parent_id") {
143                let child_node_id = match id_to_node.get(child) {
144                    Some(child_node_id) => *child_node_id,
145                    None => arena.new_node(*child),
146                };
147                id_to_node.insert(*child, child_node_id);
148                assert_ne!(node_id, child_node_id, "child node id same as node: {} (for {} {})", node_id, *id, *child);
149                node_id.append(child_node_id, &mut arena).unwrap();  // might return Failure, in which case we panic!
150            }
151        }
152
153        // now its time to read the names_filename that maps names to IDs
154        let mut name_to_node = HashMap::new();
155        let mut id_to_name = HashMap::new();
156        let name_file = File::open(names_filename)?;
157        for line_maybe in BufReader::new(name_file).lines() {
158            let line = line_maybe?;
159            let fields = line.split("\t|\t").collect::<Vec<&str>>();
160            if fields[3].starts_with("scientific name") {
161                let id_str = fields[0];
162                let id = id_str.parse::<i32>().or_else(|e| Err(NcbiTaxonomyError::ParseIntError(e)))?;
163                let name = if fields[2] != "" { fields[2].to_string() } else { fields[1].to_string() };
164                let node_id = id_to_node.get(&id).expect("ID not found in id_to_node");
165                id_to_name.insert(id, name.clone());
166                name_to_node.insert(name, *node_id);
167            }
168        }
169
170        let tree = NcbiFileTaxonomy { arena, name_to_node, id_to_node, id_to_name, id_to_rank };
171        Ok(tree)
172    }
173
174    pub fn save_to_sqlite(&self, db_url: Option<&str>) -> Result<SqliteConnection, ToSqliteError> {
175        // design of storing a tree in a relational DB inspired by:
176        // https://makandracards.com/makandra/45275-storing-trees-in-databases
177        use schema::taxonomy;
178        let connection = establish_connection(db_url);
179
180        embed_migrations!();
181        embedded_migrations::run(&connection)?;
182
183        connection.transaction::<_, ToSqliteError, _>(|| {
184            for (id, nodeid) in self.id_to_node.iter() {
185                let mut ancestors_vec = nodeid.ancestors(&self.arena).map(|nodeid| self.get_id_by_node(nodeid).unwrap().to_string()).collect::<Vec<String>>();
186                ancestors_vec.reverse();
187                let ancestors_string = ancestors_vec.join("/");
188                let name = match self.id_to_name.get(id) {
189                    Some(val) => val,
190                    None => { return Err(ToSqliteError::IdLookupError(id.to_string())) }
191                };
192
193                let taxon_record = NewTaxon {
194                    id,
195                    ancestry: match ancestors_string  {
196                        v if v == "1" => None,
197                        _ => Some(&ancestors_string[..])
198                    },
199                    name,
200                    rank: match self.id_to_rank.get(id) {
201                        Some(v) => Some(&v[..]),
202                        None => None
203                    }
204
205                };
206                diesel::insert_into(taxonomy::table)
207                    .values(&taxon_record   )
208                    .execute(&connection)?;
209            }
210            Ok(())
211        })?;
212        Ok(connection)
213    }
214
215    /// get_node_by_id
216    ///
217    /// get a NodeId from a numeric NCBI Taxonomy ID
218    pub fn get_node_by_id(&self, id: i32) -> Option<&NodeId> {
219        self.id_to_node.get(&id)
220    }
221
222    /// traversal
223    ///
224    /// traverse the tree nodes (in depth first order) from the node with a given NCBI Taxonomy ID
225    pub fn traversal(&self, from: i32) -> Option<Traverse<i32>> {
226        match self.get_node_by_id(from) {
227            Some(node_id) => Some(node_id.traverse(&self.arena)),
228            None => None
229        }
230    }
231
232    /// get_id_by_node
233    ///
234    /// get the NCBI Taxonomy ID held by the node with a given NodeId
235    pub fn get_id_by_node(&self, node_id: NodeId) -> Option<i32> {
236        match self.arena.get(node_id)  {
237            Some(node) => Some(node.data),
238            None => None
239        }
240    }
241}
242
243impl NcbiTaxonomy for NcbiFileTaxonomy {
244    /// contains_id
245    ///
246    /// check whether the taxonomy contains a (number) ID
247    fn contains_id(&self, id: i32) -> bool {
248        self.id_to_node.contains_key(&id)
249    }
250
251    /// contains_name
252    ///
253    /// check whether the taxonomy contains a node with the specified name
254    ///
255    /// **note:** the name used is what is reported as a the 'scientific name' in the NCBI Taxonomy database.
256    /// synonyms are currently not supported
257    fn contains_name(&self, name: &str) -> bool {
258        self.name_to_node.contains_key(name)
259    }
260
261    /// is_descendant
262    ///
263    /// check if a certain named node is a descendant of another named named
264    fn is_descendant(&self, name: &str, ancestor_name: &str) -> bool {
265        let id = match self.get_id_by_name(name) {
266            Some(id) => id,
267            None => return false
268        };
269        let ancestor_id = match self.get_id_by_name(ancestor_name) {
270            Some(id) => id,
271            None => return false
272        };
273        self.is_descendant_taxid(id, ancestor_id)
274    }
275
276    /// is_descendant_taxid
277    ///
278    /// check if a certain node with taxid is a descendant of another taxid
279    fn is_descendant_taxid(&self, taxid: i32, ancestor_taxid: i32) -> bool {
280        let id = match self.id_to_node.get(&taxid) {
281            Some(id) => id,
282            None => return false
283        };
284        let ancestor_id = match self.id_to_node.get(&ancestor_taxid) {
285            Some(id) => id,
286            None => return false
287        };
288        for node in id.ancestors(&self.arena) {
289            if node == *ancestor_id {
290                return true
291            }
292        }
293        false
294    }
295
296    /// get_name_by_id
297    ///
298    /// get the scientific name associated with a given NCBI Taxonomy ID
299    fn get_name_by_id(&self, id: i32) -> Option<String> {
300        self.id_to_name.get(&id).cloned()
301    }
302
303    fn get_id_by_name(&self, name: &str) -> Option<i32> {
304        match self.name_to_node.get(name) {
305            Some(nodeid) => self.get_id_by_node(*nodeid),
306            None => None
307        }
308    }
309
310    /// get_lineage
311    ///
312    /// get the list of IDs of a taxon and its parents (up to the root)
313    fn get_lineage(&self, name: &str) -> Option<Vec<i32>> {
314        let node_id = match self.name_to_node.get(name)  {
315            Some(val) => val,
316            None => return None
317        };
318
319        Some(node_id.ancestors(&self.arena).map(|node_id| *(&self.get_id_by_node(node_id).unwrap())).collect())
320    }
321
322    /// get_distance_to_common_ancestor_id
323    ///
324    /// get the distance (in steps in the tree) between taxid1 and the common ancestor with taxid2
325    fn get_distance_to_common_ancestor_taxid(&self, taxid1: i32, taxid2: i32, only_canonical: bool) -> Option<(i32, i32)> {
326        let canonical_ranks = get_canonical_ranks();
327        if taxid1 == taxid2 {
328            return Some((0, taxid1))
329        }
330
331        let taxon1 = self.id_to_node.get(&taxid1)?;
332        let taxon2 = self.id_to_node.get(&taxid2)?;
333
334        let mut ancestors_distance1 = HashMap::new();
335        let mut current_distance = 0;
336        let taxid1_rank = self.id_to_rank.get(&taxid1)?;
337        if !only_canonical || canonical_ranks.contains(taxid1_rank) {
338            // we know that taxid1 != taxid2, so either taxid2 is an ancestor of
339            // taxid1 or there is a common ancestor further back. in the first case,
340            // taxid2 will be found in the ancestors of taxid1, so thifs distance will
341            // not be used. in the second case, taxid1 might be found in the ancestors
342            // of taxid2, so this distance will still not be used
343            ancestors_distance1.insert(taxid1, 0);
344        }
345        for node in taxon1.ancestors(&self.arena) {
346            let nodeid = self.get_id_by_node(node)?;
347            let rank = self.id_to_rank.get(&nodeid)?;
348
349            if !only_canonical || canonical_ranks.contains(rank) {
350                current_distance += 1;
351                if nodeid == taxid2 {
352                    // taxid2 is an ancestor of taxid1
353                    return Some((current_distance, taxid2))
354                }
355                ancestors_distance1.insert(nodeid, current_distance);
356            }
357        }
358
359        // if we got here then we did not see taxid2 yet, i.e. taxid2 is not a
360        // direct ancestor of taxid1
361        current_distance = 0;
362        for node in taxon2.ancestors(&self.arena) {
363            let nodeid = self.get_id_by_node(node).unwrap();
364            let rank = self.id_to_rank.get(&nodeid)?;
365            if !only_canonical || canonical_ranks.contains(rank) {
366                current_distance += 1;
367                if ancestors_distance1.contains_key(&nodeid) {
368                    // the distance to te common ancestor is the distance from taxon2
369                    // to an ancestor that is also an ancestor to taxon1
370                    return Some((current_distance, nodeid))
371                }
372            }
373        }
374        None
375    }
376
377    /// get_distance_to_common_ancestor
378    ///
379    /// find the distance in the tree between name1 and name2
380    fn get_distance_to_common_ancestor(&self, name1: &str, name2: &str, only_canonical: bool) -> Option<(i32, String)> {
381        let taxon1 = self.name_to_node.get(name1)?;
382
383        let taxon2 = self.name_to_node.get(name2)?;
384
385        match self.get_distance_to_common_ancestor_taxid(self.get_id_by_node(*taxon1).unwrap(),
386                                                   self.get_id_by_node(*taxon2).unwrap(), only_canonical) {
387            Some((distance, taxid)) => Some((distance, self.get_name_by_id(taxid).unwrap())),
388            None => None
389        }
390    }
391}
392
393pub struct NcbiSqliteTaxonomy {
394    connection: SqliteConnection
395}
396
397impl NcbiSqliteTaxonomy {
398    pub fn new(db_url: Option<&str>) -> Self {
399        NcbiSqliteTaxonomy {
400            connection: establish_connection(db_url)
401        }
402    }
403
404    pub fn from_connection(connection: SqliteConnection) -> Self {
405        NcbiSqliteTaxonomy {
406            connection
407        }
408    }
409
410    fn get_ancestry_for_taxid(&self, taxid: i32) -> Option<String> {
411        use schema::taxonomy::dsl::*;
412
413        let results: Vec<Option<String>> = taxonomy.filter(id.eq(taxid))
414            .select(ancestry)
415            .load(&self.connection)
416            .expect("Error loading taxonomy");
417
418        match results.len() {
419            1 => results[0].clone(),
420            _ => panic!("taxid {} not found in taxonomy", taxid)
421        }
422    }
423
424    fn get_ancestors(&self, taxid: i32) -> Vec<i32> {
425        let ancestry_string = self.get_ancestry_for_taxid(taxid);
426        match ancestry_string {
427            None => vec![], // the root taxon has no ancestry
428            Some(val) => {
429                let mut ancestors: Vec<i32> = val.split('/').map(|id_str| id_str.parse::<i32>().unwrap()).collect();
430                ancestors.reverse();
431                ancestors
432            }
433        }
434    }
435
436    fn get_rank(&self, taxid: i32) -> Option<String> {
437        use schema::taxonomy::dsl::*;
438
439        let results: Vec<Option<String>> = taxonomy.filter(id.eq(taxid))
440            .select(rank)
441            .load(&self.connection)
442            .expect("Error loading taxonomy");
443
444        match results.len() {
445            1 => results[0].clone(),
446            _ => panic!("taxid {} not found in taxonomy", taxid)
447        }
448    }
449}
450
451impl NcbiTaxonomy for NcbiSqliteTaxonomy {
452
453    fn contains_id(&self, taxid: i32) -> bool {
454        use schema::taxonomy::dsl::*;
455
456        let results: Vec<i64> = taxonomy.filter(id.eq(taxid))
457            .select(count(id))
458            .load(&self.connection)
459            .expect("Error loading taxonomy");
460
461        results[0] == 1
462    }
463
464    fn contains_name(&self, name_str: &str) -> bool {
465        use schema::taxonomy::dsl::*;
466
467        let results: Vec<i64> = taxonomy.filter(name.eq(name_str))
468            .select(count(id))
469            .load(&self.connection)
470            .expect("Error loading taxonomy");
471
472        results[0] == 1
473    }
474
475    fn is_descendant(&self, name_str: &str, ancestor: &str) -> bool {
476        let taxid = match self.get_id_by_name(name_str) {
477            Some(val) => val,
478            None => return false
479        };
480
481        let ancestor_taxid = match self.get_id_by_name(ancestor) {
482            Some(val) => val,
483            None => return false
484        };
485
486        self.is_descendant_taxid(taxid, ancestor_taxid)
487    }
488
489    fn is_descendant_taxid(&self, taxid: i32, ancestor_taxid: i32) -> bool {
490        use schema::taxonomy::dsl::*;
491
492        // ancestor pattern is id/id/id so if ancestor_taxid is an ancestor
493        // of taxid, LIKE 'ancestor_taxid/%' OR LIKE '%/ancestor_taxid/%' OR LIKE '%/ancestor_taxid'
494        // will be true
495        let pattern1 = format!("{}/%", ancestor_taxid);
496        let pattern2 = format!("%/{}/%", ancestor_taxid);
497        let pattern3 = format!("%/{}", ancestor_taxid);
498
499        let results: Vec<i64> = taxonomy.filter(
500                id.eq(taxid).and(
501                    ancestry.like(pattern1)
502                        .or(ancestry.like(pattern2))
503                        .or(ancestry.like(pattern3))
504                ))
505            .select(count(id))
506            .load(&self.connection)
507            .expect("Error loading taxonomy");
508
509        match results.len() {
510            1 => true,
511            _ => false
512        }
513    }
514
515    fn get_name_by_id(&self, taxid: i32) -> Option<String> {
516        use schema::taxonomy::dsl::*;
517
518        let results: Vec<String> = taxonomy.filter(id.eq(taxid))
519            .select(name)
520            .load(&self.connection)
521            .expect("Error loading taxonomy");
522
523        match results.len() {
524            1 => Some(results[0].clone()),
525            _ => None
526        }
527    }
528
529    fn get_id_by_name(&self, name_str: &str) -> Option<i32> {
530        use schema::taxonomy::dsl::*;
531
532        let results: Vec<i32> = taxonomy.filter(name.eq(name_str))
533            .select(id)
534            .load(&self.connection)
535            .expect("Error loading taxonomy");
536
537        match results.len() {
538            1 => Some(results[0]),
539            _ => None
540        }
541    }
542
543    fn get_lineage(&self, name: &str) -> Option<Vec<i32>> {
544        let tax_id = match self.get_id_by_name(name)  {
545            Some(val) => val,
546            None => return None
547        };
548        Some(self.get_ancestors(tax_id))
549    }
550
551    fn get_distance_to_common_ancestor_taxid(&self, taxid1: i32, taxid2: i32, only_canonical: bool) -> Option<(i32, i32)> {
552        // canonical ranks (+ superkingdom) as they appear in the NCBI taxonomy database
553        let canonical_ranks = get_canonical_ranks();
554
555        if taxid1 == taxid2 {
556            return Some((0, taxid1))
557        }
558
559        let mut ancestors_distance1 = HashMap::new();
560        let mut current_distance = 0;
561        // TODO: make rank a NON NULL column
562        let taxid1_rank = self.get_rank(taxid1)?;
563        if !only_canonical || canonical_ranks.contains(&taxid1_rank) {
564            // see comment above for why distance is 0
565            ancestors_distance1.insert(taxid1, 0);
566        }
567        for taxid in self.get_ancestors(taxid1) {
568            let current_rank = self.get_rank(taxid)?;
569            if taxid == taxid2 {
570                return Some((current_distance, taxid2))
571            }
572            if !only_canonical || canonical_ranks.contains(&current_rank) {
573                current_distance += 1;
574                ancestors_distance1.insert(taxid, current_distance);
575            }
576        }
577
578        current_distance = 0;
579        for taxid in self.get_ancestors(taxid2) {
580            let current_rank = self.get_rank(taxid)?;
581            if !only_canonical || canonical_ranks.contains(&current_rank) {
582                current_distance += 1;
583                if ancestors_distance1.contains_key(&taxid) {
584                    return Some((current_distance, taxid))
585                }
586            }
587        }
588        None
589    }
590
591    fn get_distance_to_common_ancestor(&self, name1: &str, name2: &str, only_canonical: bool) -> Option<(i32, String)> {
592        let taxid1 = match self.get_id_by_name(name1) {
593            Some(val) => val,
594            None => return None
595        };
596
597        let taxid2 = match self.get_id_by_name(name2) {
598            Some(val) => val,
599            None => return None
600        };
601
602        match self.get_distance_to_common_ancestor_taxid(taxid1, taxid2, only_canonical) {
603            Some((distance, taxid)) => Some((distance, self.get_name_by_id(taxid).unwrap())),
604            None => None
605        }
606    }
607}
608
609#[cfg(test)]
610mod tests {
611    use super::{NcbiFileTaxonomy, NcbiSqliteTaxonomy, NcbiTaxonomy, NodeEdge};
612
613    pub struct NcbiFileTaxonomyFixture {
614        pub taxonomy: NcbiFileTaxonomy,
615    }
616
617    impl Default for NcbiFileTaxonomyFixture {
618        fn default() -> Self {
619            let tree = NcbiFileTaxonomy::from_ncbi_files("data/sample_tree_nodes.dmp", "data/sample_tree_names.dmp").unwrap();
620            Self { taxonomy: tree }
621        }
622    }
623
624    pub struct NcbiSqliteTaxonomyFixture {
625        pub taxonomy: NcbiSqliteTaxonomy,
626    }
627
628    impl Default for NcbiSqliteTaxonomyFixture {
629        fn default() -> Self {
630            let tree = NcbiSqliteTaxonomy::new(Some("data/ncbi_taxonomy.sqlite"));
631            Self { taxonomy: tree }
632        }
633    }
634
635    #[test]
636    fn contains_id() {
637        let fixture = NcbiFileTaxonomyFixture::default();
638        assert!(fixture.taxonomy.contains_id(504556));
639    }
640
641    #[test]
642    fn sqlite_contains_id() {
643        let fixture = NcbiSqliteTaxonomyFixture::default();
644        assert!(fixture.taxonomy.contains_id(504556));
645    }
646
647    #[test]
648    fn contains_name() {
649        let fixture = NcbiFileTaxonomyFixture::default();
650        assert!(fixture.taxonomy.contains_name("Propionibacterium phage PAS7"));
651        assert!(fixture.taxonomy.contains_name("Viruses"));
652        assert!(fixture.taxonomy.contains_name("environmental samples <bacteriophages>"));
653    }
654
655    #[test]
656    fn sqlite_contains_name() {
657        let fixture = NcbiSqliteTaxonomyFixture::default();
658        assert!(fixture.taxonomy.contains_name("Propionibacterium phage PAS7"));
659        assert!(fixture.taxonomy.contains_name("Viruses"));
660        assert!(fixture.taxonomy.contains_name("environmental samples <bacteriophages>"));
661    }
662
663    #[test]
664    fn get_node_by_id() {
665        let fixture = NcbiFileTaxonomyFixture::default();
666        assert_eq!(fixture.taxonomy.get_node_by_id(999999999), None);
667        assert!(match fixture.taxonomy.get_node_by_id(504556) { Some(_) => true, None => false })
668    }
669
670    #[test]
671    fn get_id_by_node() {
672        let fixture = NcbiFileTaxonomyFixture::default();
673        let node_id = fixture.taxonomy.get_node_by_id(504556).unwrap();
674        assert_eq!(fixture.taxonomy.get_id_by_node(*node_id).unwrap(), 504556);
675    }
676
677    #[test]
678    fn get_name_by_id() {
679        let fixture = NcbiFileTaxonomyFixture::default();
680        assert_eq!(fixture.taxonomy.get_name_by_id(370556).unwrap(), "Streptococcus phage 9429.1");
681    }
682
683    #[test]
684    fn sqlite_get_name_by_id() {
685        let fixture = NcbiSqliteTaxonomyFixture::default();
686        assert_eq!(fixture.taxonomy.get_name_by_id(370556).unwrap(), "Streptococcus phage 9429.1");
687    }
688
689    #[test]
690    fn get_lineage() {
691        let fixture = NcbiFileTaxonomyFixture::default();
692        assert_eq!(fixture.taxonomy.get_lineage("Streptococcus phage 9429.1"), Some(vec![370556,387088,12333,10239,1]));
693    }
694
695    #[test]
696    fn get_lineage_sqlite() {
697        let fixture = NcbiSqliteTaxonomyFixture::default();
698        assert_eq!(fixture.taxonomy.get_lineage("Streptococcus phage 9429.1"), Some(vec![370556,387088,12333,10239,1]));
699    }
700
701    #[test]
702    fn traversal() {
703        let fixture = NcbiFileTaxonomyFixture::default();
704        let traversal = fixture.taxonomy.traversal(12333);
705        match traversal {
706            Some(traversal) => {
707                let mut counter = 0;
708                for node_edge in traversal {
709                    match node_edge {
710                        NodeEdge::Start(_) => counter += 1,
711                        _ => ()
712                    }
713                }
714                assert_eq!(counter, 500)
715            }
716            None => assert_eq!(true, false, "Failed to load traversal from 12333")
717        }
718    }
719
720    #[test]
721    fn descendants() {
722        let fixture = NcbiFileTaxonomyFixture::default();
723        assert!(fixture.taxonomy.is_descendant("Propionibacterium phage PAS7", "unclassified bacterial viruses"));
724    }
725
726    #[test]
727    fn sqlite_descendants() {
728        let fixture = NcbiSqliteTaxonomyFixture::default();
729        assert!(fixture.taxonomy.is_descendant("Propionibacterium phage PAS7", "unclassified bacterial viruses"));
730    }
731
732    #[test]
733    fn taxid_descendants() {
734        let fixture = NcbiFileTaxonomyFixture::default();
735        assert!(fixture.taxonomy.is_descendant_taxid(504556, 12333));
736    }
737
738    #[test]
739    fn sqlite_taxid_descendants() {
740        let fixture = NcbiSqliteTaxonomyFixture::default();
741        assert!(fixture.taxonomy.is_descendant_taxid(504556, 12333));
742    }
743
744    #[test]
745    fn distance_to_common_ancestor_taxid() {
746        let fixture = NcbiFileTaxonomyFixture::default();
747        assert_eq!(fixture.taxonomy.get_distance_to_common_ancestor_taxid(156615, 12340, false), Some((2, 12333)));
748    }
749
750    #[test]
751    fn distance_to_common_ancestor_taxid_canonical() {
752        let fixture = NcbiFileTaxonomyFixture::default();
753        assert_eq!(fixture.taxonomy.get_distance_to_common_ancestor_taxid(156615, 12340, true), Some((2, 10239)));
754    }
755
756    #[test]
757    fn distance_to_common_ancestor() {
758        let fixture = NcbiFileTaxonomyFixture::default();
759        assert_eq!(fixture.taxonomy.get_distance_to_common_ancestor("Cyanophage clone GS2601", "Enterobacteria phage 933J", false),
760                   Some((2, "unclassified bacterial viruses".to_string())));
761    }
762
763    #[test]
764    fn distance_to_common_ancestor_canonical() {
765        let fixture = NcbiFileTaxonomyFixture::default();
766        assert_eq!(fixture.taxonomy.get_distance_to_common_ancestor("Cyanophage clone GS2601", "Enterobacteria phage 933J", true),
767                   Some((2, "Viruses".to_string())));
768    }
769
770    #[test]
771    fn sqlite_distance_to_common_ancestor_taxid() {
772        let fixture = NcbiSqliteTaxonomyFixture::default();
773        assert_eq!(fixture.taxonomy.get_distance_to_common_ancestor_taxid(156615, 12340, false), Some((2, 12333)));
774    }
775
776    #[test]
777    fn sqlite_distance_to_common_ancestor_taxid_canonical() {
778        let fixture = NcbiSqliteTaxonomyFixture::default();
779        assert_eq!(fixture.taxonomy.get_distance_to_common_ancestor_taxid(156615, 12340, true), Some((2, 10239)));
780    }
781
782    #[test]
783    fn sqlite_distance_to_common_ancestor() {
784        let fixture = NcbiSqliteTaxonomyFixture::default();
785        assert_eq!(fixture.taxonomy.get_distance_to_common_ancestor("Cyanophage clone GS2601", "Enterobacteria phage 933J", false),
786                   Some((2, "unclassified bacterial viruses".to_string())));
787    }
788
789    #[test]
790    fn sqlite_distance_to_common_ancestor_canonical() {
791        let fixture = NcbiSqliteTaxonomyFixture::default();
792        assert_eq!(fixture.taxonomy.get_distance_to_common_ancestor("Cyanophage clone GS2601", "Enterobacteria phage 933J", true),
793                   Some((2, "Viruses".to_string())));
794    }
795}