checkm/
lib.rs

1extern crate csv;
2extern crate env_logger;
3extern crate log;
4
5use log::*;
6
7use std::collections::BTreeMap;
8
9pub struct CheckM1TabTable {}
10
11impl CheckM1TabTable {
12    pub fn good_quality_genome_names(
13        file_path: &str,
14        min_completeness: f32,
15        max_contamination: f32,
16    ) -> Result<Vec<String>, CheckMReadError> {
17        let mut passes = vec![];
18        let qualities = CheckM1TabTable::read_file_path(file_path)?;
19        for (genome, quality) in qualities.genome_to_quality.iter() {
20            trace!("Genome: {}, Quality: {:?}", genome, quality);
21            if quality.completeness >= min_completeness
22                && quality.contamination <= max_contamination
23            {
24                passes.push(genome.clone())
25            }
26        }
27        debug!(
28            "Read in {} genomes from {}, {} passed the quality thresholds",
29            qualities.genome_to_quality.len(),
30            file_path,
31            passes.len()
32        );
33        Ok(passes)
34    }
35
36    pub fn read_file_path(
37        file_path: &str,
38    ) -> Result<CheckMResult<CheckM1GenomeQuality>, CheckMReadError> {
39        let mut qualities = BTreeMap::new();
40        let rdr = csv::ReaderBuilder::new()
41            .delimiter(b'\t')
42            .has_headers(true)
43            .from_path(std::path::Path::new(file_path));
44        let mut total_seen = 0usize;
45
46        if rdr.is_err() {
47            return Err(CheckMReadError {
48                msg: format!("Failed to parse CheckM v1 genome quality {}", file_path),
49            });
50        }
51
52        for result in rdr.unwrap().records() {
53            let res = result.expect("Parsing error in CheckM tab table file");
54            if res.len() != 14 {
55                return Err(CheckMReadError {
56                    msg: format!(
57                        "Parsing error in CheckM tab table file - didn't find 14 columns in line {:?}",
58                        res
59                    ),
60                });
61            }
62            let completeness: f32 = res[11]
63                .parse::<f32>()
64                .expect("Error parsing completeness in checkm tab table");
65            let contamination: f32 = res[12]
66                .parse::<f32>()
67                .expect("Error parsing contamination in checkm tab table");
68            let strain_heterogeneity: f32 = res[13]
69                .parse::<f32>()
70                .expect("Error parsing contamination in checkm tab table");
71            trace!(
72                "For {}, found completeness {} and contamination {}",
73                &res[0],
74                completeness,
75                contamination
76            );
77            match qualities.insert(
78                res[0].to_string(),
79                CheckM1GenomeQuality {
80                    completeness: completeness / 100.,
81                    contamination: contamination / 100.,
82                    strain_heterogeneity: strain_heterogeneity / 100.,
83                },
84            ) {
85                None => {}
86                Some(_) => {
87                    return Err(CheckMReadError {
88                        msg: format!(
89                            "The genome {} was found multiple times in the checkm file {}",
90                            &res[0], file_path
91                        ),
92                    });
93                }
94            };
95            total_seen += 1;
96        }
97        debug!("Read in {} genomes from {}", total_seen, file_path);
98        Ok(CheckMResult {
99            genome_to_quality: qualities,
100        })
101    }
102}
103pub struct CheckM2QualityReport {}
104impl CheckM2QualityReport {
105    pub fn good_quality_genome_names(
106        file_path: &str,
107        min_completeness: f32,
108        max_contamination: f32,
109    ) -> Result<Vec<String>, CheckMReadError> {
110        let mut passes = vec![];
111        let qualities = CheckM2QualityReport::read_file_path(file_path)?;
112        for (genome, quality) in qualities.genome_to_quality.iter() {
113            trace!("Genome: {}, Quality: {:?}", genome, quality);
114            if quality.completeness >= min_completeness
115                && quality.contamination <= max_contamination
116            {
117                passes.push(genome.clone())
118            }
119        }
120        debug!(
121            "Read in {} genomes from {}, {} passed the quality thresholds",
122            qualities.genome_to_quality.len(),
123            file_path,
124            passes.len()
125        );
126        Ok(passes)
127    }
128
129    pub fn read_file_path(
130        file_path: &str,
131    ) -> Result<CheckMResult<CheckM2GenomeQuality>, CheckMReadError> {
132        let mut qualities = BTreeMap::new();
133        let rdr_res = csv::ReaderBuilder::new()
134            .delimiter(b'\t')
135            .has_headers(true)
136            .from_path(std::path::Path::new(file_path));
137        let mut total_seen = 0usize;
138
139        if rdr_res.is_err() {
140            return Err(CheckMReadError {
141                msg: format!("Failed to parse CheckM v2 genome quality {}", file_path),
142            });
143        }
144        let mut rdr = rdr_res.unwrap();
145
146        // The number of columns can change, so just check the first 3 columns have the expected names
147        let headers = rdr.headers().unwrap().clone();
148        let completeness_choice = &headers[1] == "Completeness_General";
149        if &headers[0] != "Name"
150            || (&headers[1] != "Completeness" && !completeness_choice)
151            || &headers[2] != "Contamination"
152            || (&headers[3] != "Completeness_Specific" && completeness_choice)
153            || (&headers[4] != "Completeness_Model_Used" && completeness_choice)
154        {
155            return Err(CheckMReadError {
156                msg: format!(
157                    "Parsing error in CheckM2 qualities file - didn't find expected headers in line {:?}",
158                    headers
159                ),
160            });
161        }
162
163        for result in rdr.records() {
164            let res = result.expect("Parsing error in CheckM qualities file");
165
166            let completeness: f32 = if completeness_choice {
167                match &res[4] {
168                    "Neural Network (Specific Model)" => &res[3],
169                    "Gradient Boost (General Model)" => &res[1],
170                    _ => {
171                        return Err(CheckMReadError {
172                            msg: format!(
173                                "Parsing error in CheckM2 qualities file - didn't find expected model name in line {:?}",
174                                res
175                            ),
176                        });
177                    }
178                }
179            } else {
180                &res[1]
181            }
182            .parse::<f32>()
183            .expect("Error parsing completeness in checkm tab table");
184
185            let contamination: f32 = res[2]
186                .parse::<f32>()
187                .expect("Error parsing contamination in checkm tab table");
188            trace!(
189                "For {}, found completeness {} and contamination {}",
190                &res[0],
191                completeness,
192                contamination
193            );
194            match qualities.insert(
195                res[0].to_string(),
196                CheckM2GenomeQuality {
197                    completeness: completeness / 100.,
198                    contamination: contamination / 100.,
199                },
200            ) {
201                None => {}
202                Some(_) => {
203                    return Err(CheckMReadError {
204                        msg: format!(
205                            "The genome {} was found multiple times in the checkm file {}",
206                            &res[0], file_path
207                        ),
208                    });
209                }
210            };
211            total_seen += 1;
212        }
213        debug!("Read in {} genomes from {}", total_seen, file_path);
214        Ok(CheckMResult {
215            genome_to_quality: qualities,
216        })
217    }
218}
219
220pub trait GenomeQuality {
221    fn completeness(&self) -> f32;
222    fn contamination(&self) -> f32;
223}
224
225#[derive(Debug, PartialEq, Clone, Copy)]
226pub struct CheckM1GenomeQuality {
227    pub completeness: f32,
228    pub contamination: f32,
229    pub strain_heterogeneity: f32,
230}
231
232#[derive(Debug, PartialEq, Clone, Copy)]
233pub struct CheckM2GenomeQuality {
234    pub completeness: f32,
235    pub contamination: f32,
236}
237
238pub struct CheckMResult<T: GenomeQuality> {
239    pub genome_to_quality: BTreeMap<String, T>,
240}
241
242impl GenomeQuality for CheckM1GenomeQuality {
243    fn completeness(&self) -> f32 {
244        self.completeness
245    }
246    fn contamination(&self) -> f32 {
247        self.contamination
248    }
249}
250
251impl GenomeQuality for CheckM2GenomeQuality {
252    fn completeness(&self) -> f32 {
253        self.completeness
254    }
255    fn contamination(&self) -> f32 {
256        self.contamination
257    }
258}
259
260impl<T: GenomeQuality + Copy + std::fmt::Debug> CheckMResult<T> {
261    pub fn order_genomes_by_completeness_minus_4contamination(&self) -> Vec<&String> {
262        let mut genomes_and_qualities: Vec<_> = self.genome_to_quality.iter().collect();
263
264        // sort in reverse order
265        genomes_and_qualities.sort_unstable_by(|(_, q1), (_, q2)| {
266            (q2.completeness() - 4. * q2.contamination())
267                .partial_cmp(&(q1.completeness() - 4. * q1.contamination()))
268                .unwrap()
269        });
270        genomes_and_qualities
271            .into_iter()
272            .map(|(genome, _)| genome)
273            .collect()
274    }
275
276    pub fn order_genomes_by_completeness_minus_5contamination(&self) -> Vec<&String> {
277        let mut genomes_and_qualities: Vec<_> = self.genome_to_quality.iter().collect();
278
279        // sort in reverse order
280        genomes_and_qualities.sort_unstable_by(|(_, q1), (_, q2)| {
281            (q2.completeness() - 5. * q2.contamination())
282                .partial_cmp(&(q1.completeness() - 5. * q1.contamination()))
283                .unwrap()
284        });
285        genomes_and_qualities
286            .into_iter()
287            .map(|(genome, _)| genome)
288            .collect()
289    }
290
291    /// Map paths to FASTA paths to CheckM qualities, and return paths ordered
292    /// by their quality, where quality is completeness - 4*contamination. If
293    /// not None, min_completeness and max_contamination specify thresholds as
294    /// fractions e.g. 0.8 not 80.
295    pub fn order_fasta_paths_by_completeness_minus_4contamination<'a>(
296        &self,
297        genome_fasta_files: &[&'a str],
298        min_completeness: Option<f32>,
299        max_contamination: Option<f32>,
300    ) -> Result<Vec<&'a str>, String> {
301        let mut key_to_order = BTreeMap::new();
302        for (i, key) in self
303            .order_genomes_by_completeness_minus_4contamination()
304            .into_iter()
305            .enumerate()
306        {
307            key_to_order.insert(key.as_str(), i);
308        }
309        self.order_fasta_list(
310            &key_to_order,
311            genome_fasta_files,
312            min_completeness,
313            max_contamination,
314        )
315    }
316
317    /// Map paths to FASTA paths to CheckM qualities, and return paths ordered
318    /// by their quality, where quality is completeness - 5*contamination. If
319    /// not None, min_completeness and max_contamination specify thresholds as
320    /// fractions e.g. 0.8 not 80.
321    pub fn order_fasta_paths_by_completeness_minus_5contamination<'a>(
322        &self,
323        genome_fasta_files: &[&'a str],
324        min_completeness: Option<f32>,
325        max_contamination: Option<f32>,
326    ) -> Result<Vec<&'a str>, String> {
327        let mut key_to_order = BTreeMap::new();
328        for (i, key) in self
329            .order_genomes_by_completeness_minus_5contamination()
330            .into_iter()
331            .enumerate()
332        {
333            key_to_order.insert(key.as_str(), i);
334        }
335        self.order_fasta_list(
336            &key_to_order,
337            genome_fasta_files,
338            min_completeness,
339            max_contamination,
340        )
341    }
342
343    fn order_fasta_list<'a>(
344        &self,
345        key_to_order: &BTreeMap<&str, usize>,
346        genome_fasta_files: &[&'a str],
347        min_completeness: Option<f32>,
348        max_contamination: Option<f32>,
349    ) -> Result<Vec<&'a str>, String> {
350        let mut fasta_and_order: Vec<(&str, usize)> = vec![];
351        for fasta_path in genome_fasta_files.iter() {
352            let checkm_name = std::path::Path::new(fasta_path)
353                .file_stem()
354                .unwrap()
355                .to_str()
356                .unwrap();
357            match key_to_order.get(checkm_name) {
358                Some(rank) => {
359                    if (min_completeness.is_none()
360                        || self.genome_to_quality[checkm_name].completeness()
361                            >= min_completeness.unwrap())
362                        && (max_contamination.is_none()
363                            || self.genome_to_quality[checkm_name].contamination()
364                                <= max_contamination.unwrap())
365                    {
366                        fasta_and_order.push((*fasta_path, *rank))
367                    }
368                }
369                None => {
370                    return Err(format!(
371                        "Unable to find quality for genome fasta file {}",
372                        fasta_path
373                    ));
374                }
375            }
376        }
377        fasta_and_order.sort_unstable_by(|a, b| a.1.cmp(&b.1));
378        Ok(fasta_and_order.into_iter().map(|(g, _)| g).collect())
379    }
380
381    pub fn filter(&self, min_completeness: f32, max_contamination: f32) -> CheckMResult<T> {
382        let mut new = BTreeMap::new();
383
384        for (g, q) in self.genome_to_quality.iter() {
385            if q.completeness() >= min_completeness && q.contamination() <= max_contamination {
386                new.insert(g.clone().to_string(), *q);
387            }
388        }
389        CheckMResult {
390            genome_to_quality: new,
391        }
392    }
393
394    pub fn retrieve_via_fasta_path(&self, fasta_path: &str) -> Result<T, CheckMRetrievalError> {
395        let checkm_name_stem_res = if fasta_path.ends_with(".gz") {
396            let checkm_name_stem_gz_res = std::path::Path::new(fasta_path).file_stem();
397            if checkm_name_stem_gz_res.is_none() {
398                return Err(CheckMRetrievalError {
399                    msg: format!("Unable to find file_stem for gzipped file {}", fasta_path),
400                });
401            }
402            std::path::Path::new(checkm_name_stem_gz_res.unwrap()).file_stem()
403        } else {
404            std::path::Path::new(fasta_path).file_stem()
405        };
406
407        if checkm_name_stem_res.is_none() {
408            return Err(CheckMRetrievalError {
409                msg: format!("Unable to find file_stem for {}", fasta_path),
410            });
411        }
412        let checkm_name_stem = checkm_name_stem_res.unwrap();
413        let checkm_name_res = checkm_name_stem.to_str();
414        if checkm_name_res.is_none() {
415            return Err(CheckMRetrievalError {
416                msg: format!(
417                    "Failed to convert fasta file name to string: {}",
418                    fasta_path
419                ),
420            });
421        }
422        let checkm_name = checkm_name_res.unwrap();
423        debug!(
424            "Retrieving checkm name {}, derived from {}",
425            checkm_name, fasta_path
426        );
427        trace!("{:?}", self.genome_to_quality);
428        match self.genome_to_quality.get(checkm_name) {
429            Some(q) => Ok(*q),
430            None => {
431                // Possibly the checkm file was created with absolute paths. Try
432                // that.
433                let checkm_parent_res = std::path::Path::new(fasta_path).parent();
434                if checkm_parent_res.is_none() {
435                    return Err(CheckMRetrievalError {
436                        msg: format!("Unable to find parent of fasta path {}", fasta_path),
437                    });
438                };
439                let joined = checkm_parent_res.unwrap().join(checkm_name_stem);
440                let checkm_name2 = joined.to_str();
441                if checkm_name2.is_none() {
442                    return Err(CheckMRetrievalError {
443                        msg: format!("Unable to convert name {} to str", checkm_name),
444                    });
445                }
446                debug!(
447                    "Retrieving absolute path checkm name {}, derived from {}",
448                    checkm_name2.unwrap(),
449                    fasta_path
450                );
451                match self.genome_to_quality.get(checkm_name2.unwrap()) {
452                    Some(q) => Ok(*q),
453                    None => Err(CheckMRetrievalError {
454                        msg: format!(
455                            "Unable to find checkm name {} in checkm results",
456                            checkm_name
457                        ),
458                    }),
459                }
460            }
461        }
462    }
463}
464#[derive(Debug, PartialEq)]
465pub struct CheckMRetrievalError {
466    msg: String,
467}
468
469impl std::fmt::Display for CheckMRetrievalError {
470    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
471        write!(f, "{}", self.msg)
472    }
473}
474
475impl std::error::Error for CheckMRetrievalError {}
476
477#[derive(Debug, PartialEq)]
478pub struct CheckMReadError {
479    msg: String,
480}
481
482impl std::fmt::Display for CheckMReadError {
483    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
484        write!(f, "{}", self.msg)
485    }
486}
487
488impl std::error::Error for CheckMReadError {}
489
490#[cfg(test)]
491mod test {
492    use super::*;
493
494    fn init() {
495        let _ = env_logger::builder().is_test(true).try_init();
496    }
497
498    #[test]
499    fn test_good_quality_genome_names() {
500        init();
501        assert_eq!(
502            vec![
503                "GUT_GENOME006390.gff",
504                "GUT_GENOME011264.gff",
505                "GUT_GENOME011296.gff",
506                "GUT_GENOME011536.gff"
507            ],
508            CheckM1TabTable::good_quality_genome_names(&"tests/data/checkm.tsv", 0.56, 0.1)
509                .unwrap()
510        )
511    }
512
513    #[test]
514    fn test_checkm2_good_quality_genome_names() {
515        init();
516        assert_eq!(
517            Ok(vec!["UTC-1_IN_HT_bin.22".to_string(),]),
518            CheckM2QualityReport::good_quality_genome_names(
519                &"tests/data/checkm2/quality_report.tsv",
520                0.7,
521                0.1
522            )
523        );
524        let empty: Vec<String> = vec![];
525        assert_eq!(
526            Ok(empty),
527            CheckM2QualityReport::good_quality_genome_names(
528                &"tests/data/checkm2/quality_report.tsv",
529                0.75,
530                0.1
531            )
532        )
533    }
534
535    #[test]
536    fn test_checkm2_14column_format() {
537        init();
538        assert_eq!(
539            Ok(vec!["AAMD-1".to_string(),]),
540            CheckM2QualityReport::good_quality_genome_names(
541                &"tests/data/checkm2/quality_report2.tsv",
542                0.7,
543                0.1
544            )
545        );
546    }
547
548    #[test]
549    fn test_checkm2_13column_format() {
550        init();
551        assert_eq!(
552            Ok(vec!["genome.6".to_string(),]),
553            CheckM2QualityReport::good_quality_genome_names(
554                &"tests/data/checkm2/quality_report3.tsv",
555                0.7,
556                0.1
557            )
558        );
559    }
560
561    #[test]
562    fn test_retrieve() {
563        init();
564        let checkm = CheckM1TabTable::read_file_path(&"tests/data/checkm.tsv").unwrap();
565        assert_eq!(
566            Ok(CheckM1GenomeQuality {
567                completeness: 83.38 / 100.,
568                contamination: 0.,
569                strain_heterogeneity: 0.
570            }),
571            checkm.retrieve_via_fasta_path(&"/some/path/GUT_GENOME011264.gff.fna")
572        );
573        assert!(checkm
574            .retrieve_via_fasta_path(&"/some/path/GUT_not_a_genome_GENOME011264.gff.fna")
575            .is_err());
576    }
577
578    #[test]
579    fn test_retrieve_gzip() {
580        init();
581        let checkm = CheckM1TabTable::read_file_path(&"tests/data/checkm.tsv").unwrap();
582        assert_eq!(
583            Ok(CheckM1GenomeQuality {
584                completeness: 83.38 / 100.,
585                contamination: 0.,
586                strain_heterogeneity: 0.
587            }),
588            checkm.retrieve_via_fasta_path(&"/some/path/GUT_GENOME011264.gff.fna.gz")
589        );
590        assert!(checkm
591            .retrieve_via_fasta_path(&"/some/path/GUT_not_a_genome_GENOME011264.gff.fna.gz")
592            .is_err());
593    }
594
595    #[test]
596    fn test_retrieve_checkm2() {
597        init();
598        let checkm =
599            CheckM2QualityReport::read_file_path(&"tests/data/checkm2/quality_report.tsv").unwrap();
600        assert_eq!(
601            Ok(CheckM2GenomeQuality {
602                completeness: 70.17 / 100.,
603                contamination: 0.25 / 100.,
604            }),
605            checkm.retrieve_via_fasta_path(&"/some/path/UTC-1_IN_HT_bin.22.fna")
606        );
607        assert!(checkm
608            .retrieve_via_fasta_path(&"/some/path/GUT_not_a_genome_GENOME011264.gff.fna")
609            .is_err());
610    }
611
612    #[test]
613    fn test_retrieve_checkm2_gzip() {
614        init();
615        let checkm =
616            CheckM2QualityReport::read_file_path(&"tests/data/checkm2/quality_report.tsv").unwrap();
617        assert_eq!(
618            Ok(CheckM2GenomeQuality {
619                completeness: 70.17 / 100.,
620                contamination: 0.25 / 100.,
621            }),
622            checkm.retrieve_via_fasta_path(&"/some/path/UTC-1_IN_HT_bin.22.fna.gz")
623        );
624        assert!(checkm
625            .retrieve_via_fasta_path(&"/some/path/GUT_not_a_genome_GENOME011264.gff.fna.gz")
626            .is_err());
627    }
628
629    #[test]
630    fn test_ordering_4times() {
631        init();
632        let checkm = CheckM1TabTable::read_file_path(&"tests/data/checkm.tsv").unwrap();
633        assert_eq!(
634            vec![
635                "GUT_GENOME006390.gff",
636                "GUT_GENOME011264.gff",
637                "GUT_GENOME011296.gff",
638                "GUT_GENOME011367.gff",
639                "GUT_GENOME011536.gff"
640            ],
641            checkm.order_genomes_by_completeness_minus_4contamination()
642        );
643        let checkm = CheckM1TabTable::read_file_path(&"tests/data/checkm2.tsv").unwrap();
644        assert_eq!(
645            vec![
646                "GUT_GENOME006390.gff",
647                "GUT_GENOME011264.gff",
648                "GUT_GENOME011296.gff",
649                "GUT_GENOME011536.gff",
650                "GUT_GENOME011367.gff"
651            ],
652            checkm.order_genomes_by_completeness_minus_4contamination()
653        );
654    }
655
656    #[test]
657    fn test_ordering_5times() {
658        init();
659        // Cheating here a bit - same result as the minus_4times
660        let checkm = CheckM1TabTable::read_file_path(&"tests/data/checkm.tsv").unwrap();
661        assert_eq!(
662            vec![
663                "GUT_GENOME006390.gff",
664                "GUT_GENOME011264.gff",
665                "GUT_GENOME011296.gff",
666                "GUT_GENOME011367.gff",
667                "GUT_GENOME011536.gff"
668            ],
669            checkm.order_genomes_by_completeness_minus_5contamination()
670        );
671        let checkm = CheckM1TabTable::read_file_path(&"tests/data/checkm2.tsv").unwrap();
672        assert_eq!(
673            vec![
674                "GUT_GENOME006390.gff",
675                "GUT_GENOME011264.gff",
676                "GUT_GENOME011296.gff",
677                "GUT_GENOME011536.gff",
678                "GUT_GENOME011367.gff"
679            ],
680            checkm.order_genomes_by_completeness_minus_5contamination()
681        );
682    }
683
684    #[test]
685    fn test_fasta_ordering_4times() {
686        init();
687        let checkm = CheckM1TabTable::read_file_path(&"tests/data/checkm.tsv").unwrap();
688        assert_eq!(
689            vec![
690                "/tmp/GUT_GENOME006390.gff.fna",
691                "GUT_GENOME011264.gff.fna",
692                "GUT_GENOME011296.gff.fna",
693                "GUT_GENOME011367.gff.fna",
694                "GUT_GENOME011536.gff.fna"
695            ],
696            checkm
697                .order_fasta_paths_by_completeness_minus_4contamination(
698                    &vec![
699                        "GUT_GENOME011264.gff.fna",
700                        "/tmp/GUT_GENOME006390.gff.fna",
701                        "GUT_GENOME011296.gff.fna",
702                        "GUT_GENOME011367.gff.fna",
703                        "GUT_GENOME011536.gff.fna"
704                    ],
705                    None,
706                    None,
707                )
708                .unwrap()
709        );
710        let checkm = CheckM1TabTable::read_file_path(&"tests/data/checkm2.tsv").unwrap();
711        assert_eq!(
712            vec![
713                "/tmp/GUT_GENOME006390.gff.fna",
714                "GUT_GENOME011264.gff.fna",
715                "GUT_GENOME011296.gff.fna",
716                "GUT_GENOME011536.gff.fna",
717                "GUT_GENOME011367.gff.fna"
718            ],
719            checkm
720                .order_fasta_paths_by_completeness_minus_4contamination(
721                    &vec![
722                        "GUT_GENOME011264.gff.fna",
723                        "/tmp/GUT_GENOME006390.gff.fna",
724                        "GUT_GENOME011296.gff.fna",
725                        "GUT_GENOME011367.gff.fna",
726                        "GUT_GENOME011536.gff.fna"
727                    ],
728                    None,
729                    None,
730                )
731                .unwrap()
732        );
733    }
734
735    #[test]
736    fn test_fasta_ordering_4times_min_completeness() {
737        init();
738        let checkm = CheckM1TabTable::read_file_path(&"tests/data/checkm.tsv").unwrap();
739        assert_eq!(
740            vec![
741                "/tmp/GUT_GENOME006390.gff.fna",
742                "GUT_GENOME011264.gff.fna",
743                "GUT_GENOME011296.gff.fna"
744            ],
745            checkm
746                .order_fasta_paths_by_completeness_minus_4contamination(
747                    &vec![
748                        "GUT_GENOME011264.gff.fna",
749                        "/tmp/GUT_GENOME006390.gff.fna",
750                        "GUT_GENOME011296.gff.fna",
751                        "GUT_GENOME011367.gff.fna",
752                        "GUT_GENOME011536.gff.fna"
753                    ],
754                    Some(0.7),
755                    None,
756                )
757                .unwrap()
758        );
759    }
760
761    #[test]
762    fn test_absolute_path_retrieval() {
763        init();
764        let checkm = CheckM1TabTable::read_file_path(&"tests/data/checkm3.tsv").unwrap();
765        assert_eq!(
766            Ok(CheckM1GenomeQuality {
767                completeness: 0.9361,
768                contamination: 0.0037,
769                strain_heterogeneity: 1.0,
770            }),
771            checkm.retrieve_via_fasta_path("/tmp/GUT_GENOME006390.gff.fna")
772        );
773    }
774}