Skip to main content

aardvark_bio/
lib.rs

1
2/*!
3# Aardvark-bio
4Aardvark-bio is the underlying library that supports the Aardvark command line tool.
5The library provides the core functionality for comparing and merging variant calls.
6The waffle solver contains the main entry point for the compare command, with example usage below:
7
8## Example compare usage
9```rust
10use aardvark_bio::data_types::compare_region::CompareRegion;
11use aardvark_bio::data_types::coordinates::Coordinates;
12use aardvark_bio::data_types::phase_enums::PhasedZygosity;
13use aardvark_bio::data_types::variants::Variant;
14use aardvark_bio::data_types::summary_metrics::{SummaryGtMetrics, SummaryMetrics};
15use aardvark_bio::data_types::variant_metrics::{VariantMetrics, VariantSource};
16use aardvark_bio::waffle_solver::{CompareConfig, solve_compare_region};
17use rust_lib_reference_genome::reference_genome::ReferenceGenome;
18
19// create a basic reference genome in memory
20let mut reference_genome = ReferenceGenome::empty_reference();
21reference_genome.add_contig(
22    "mock_chr1".to_string(), "ACCGTTACCAGGACTTGACAAACCG"
23).unwrap();
24
25// create a problem to solve; this is a simple SNV in the middle of the sequence
26let coordinates = Coordinates::new("mock_chr1".to_string(), 0, 10); // ACCGTTACCA
27let truth_variants = vec![
28    Variant::new_snv(0, 2, b"C".to_vec(), b"G".to_vec()).unwrap()
29];
30let truth_zygosity = vec![
31    PhasedZygosity::PhasedHet10
32];
33
34// create a query set that is the same as the truth set
35let query_variants = truth_variants.clone();
36let query_zygosity = truth_zygosity.clone();
37
38// put it all into a CompareRegion object for the solver
39let problem = CompareRegion::new(
40    0, coordinates, truth_variants, truth_zygosity, query_variants, query_zygosity
41).unwrap();
42
43// solve the problem
44let compare_config = CompareConfig::default();
45let result = solve_compare_region(&problem, &reference_genome, compare_config, None).unwrap();
46
47// check the results
48assert_eq!(result.total_ed(), 0); // should be exact paths
49let group_metrics = result.group_metrics();
50assert_eq!(*group_metrics.joint_metrics().gt(), SummaryGtMetrics::new(1, 0, 1, 0, 0, 0));
51assert_eq!(*group_metrics.joint_metrics().hap(), SummaryMetrics::new(1, 0, 1, 0));
52assert_eq!(*group_metrics.joint_metrics().basepair(), SummaryMetrics::new(2*1, 0, 2*1, 0));
53assert_eq!(result.truth_variant_data(), &[VariantMetrics::new(VariantSource::Truth, 1, 1).unwrap()]);
54assert_eq!(result.query_variant_data(), &[VariantMetrics::new(VariantSource::Query, 1, 1).unwrap()]);
55
56// check the sequences also
57let sequence_bundle = result.sequence_bundle().unwrap();
58assert_eq!(&sequence_bundle.ref_seq,    "ACCGTTACCA");
59assert_eq!(&sequence_bundle.truth_seq1, "ACGGTTACCA");
60assert_eq!(&sequence_bundle.query_seq1, "ACGGTTACCA");
61assert_eq!(&sequence_bundle.truth_seq2, "ACCGTTACCA");
62assert_eq!(&sequence_bundle.query_seq2, "ACCGTTACCA");
63```
64*/
65
66/// Command line interface functionality that is specific to Aardvark
67pub mod cli;
68/// Contains various shared data types
69pub mod data_types;
70/// Contains Dynamic WFA implementations that enable fast quantification of edit distance between two dynamic sequences
71pub mod dwfa;
72/// Tooling for optimizing the exact GT category
73pub mod exact_gt_optimizer;
74/// Contains the core logic for identifying regions to merge
75pub mod merge_solver;
76/// Tooling for parsing input files into meaningful structs / data
77pub mod parsing;
78/// Optimizes query sequence relative to truth
79pub mod query_optimizer;
80/// Contains generic utility functions
81pub mod util;
82/// Contains the entry point for evaluating truth and query variants relative to each other
83pub mod waffle_solver;
84/// All output writers
85pub mod writers;