lorikeet_genome/annotator/
variant_annotator_engine.rs

1use hashlink::LinkedHashMap;
2use rust_htslib::bcf::Header;
3
4use crate::annotator::variant_annotation::{Annotation, AnnotationType, VariantAnnotations};
5use crate::genotype::genotype_builder::{AttributeObject, GenotypesContext};
6use crate::model::allele_likelihoods::AlleleLikelihoods;
7use crate::model::byte_array_allele::Allele;
8use crate::model::variant_context::VariantContext;
9
10/**
11 * The class responsible for computing annotations for variants.
12 * Annotations are auto-discovered - ie, any class that extends {@link VariantAnnotation} and
13 * lives in this package is treated as an annotation and the engine will attempt to create instances of it
14 * by calling the non-arg constructor (loading will fail if there is no no-arg constructor).
15 */
16#[derive(Debug)]
17pub struct VariantAnnotationEngine {}
18
19impl VariantAnnotationEngine {
20    /**
21     * Annotates the given variant context - adds all annotations that satisfy the predicate.
22     * NOTE: Due to trait impl limitations. read_likelihoods should be marginalized over the alleles
23     *       in vc BEFORE being passed to this function. GATK HaplotypeCaller Marginalizes inside
24     *       function, but this is not possible with current setup as it we want general functions :)
25     * @param vc the variant context to annotate
26     * @param features context containing the features that overlap the given variant
27     * @param ref the reference context of the variant to annotate or null if there is none
28     * @param readLikelihoods readLikelihoods indexed by sample, allele, and read within sample. May be null
29     * @param addAnnot function that indicates if the given annotation type should be added to the variant
30     *
31     */
32    pub fn annotate_context<A: Allele>(
33        vc: &VariantContext,
34        read_likelihoods: &mut AlleleLikelihoods<A>,
35        add_annotation: Box<dyn Fn(&Annotation) -> bool>,
36    ) -> VariantContext {
37        // annotate genotypes, creating another new VC in the process
38        let mut builder = VariantContext::build_from_vc(vc);
39        // genotype context annotation here
40        builder.genotypes = Self::add_genotype_annotations(&mut builder, read_likelihoods);
41        // debug!(
42        //     "genotypes {:?} empty {}",
43        //     &builder.genotypes,
44        //     builder.genotypes.is_empty()
45        // );
46        let info_annot_map =
47            Self::add_info_annotations(&mut builder, read_likelihoods, add_annotation);
48
49        builder.attributes(info_annot_map);
50
51        return builder;
52    }
53
54    fn add_genotype_annotations<A: Allele>(
55        vc: &mut VariantContext,
56        likelihoods: &mut AlleleLikelihoods<A>,
57    ) -> GenotypesContext {
58        let mut genotypes = GenotypesContext::create(vc.get_n_samples());
59
60        for g_index in 0..vc.genotypes.genotypes().len() {
61            let mut gb = vc.genotypes.genotypes()[g_index].clone();
62            for genotype_annotation in Self::genotype_annotations() {
63                genotype_annotation.annotate(vc, Some(&mut gb), likelihoods);
64            }
65
66            genotypes.add(gb);
67        }
68
69        return genotypes;
70    }
71
72    fn add_info_annotations<A: Allele>(
73        vc: &mut VariantContext,
74        likelihoods: &mut AlleleLikelihoods<A>,
75        add_annotation: Box<dyn Fn(&Annotation) -> bool>,
76    ) -> LinkedHashMap<String, AttributeObject> {
77        let mut info_annot_map = LinkedHashMap::new();
78        for annotation in Self::vc_annotations() {
79            if add_annotation(&annotation) {
80                let annotation_result = annotation.annotate(vc, None, likelihoods);
81                info_annot_map.insert(
82                    annotation_result.get_key().to_string(),
83                    annotation_result.get_object(),
84                );
85            }
86        }
87
88        return info_annot_map;
89    }
90
91    /// Annotations added to the VariantContext
92    pub fn vc_annotations() -> Vec<Annotation> {
93        vec![
94            Annotation::new(VariantAnnotations::Depth, AnnotationType::Info),
95            Annotation::new(VariantAnnotations::QualByDepth, AnnotationType::Info),
96            Annotation::new(VariantAnnotations::MappingQuality, AnnotationType::Info),
97            Annotation::new(VariantAnnotations::BaseQuality, AnnotationType::Info),
98            Annotation::new(VariantAnnotations::Qualified, AnnotationType::Info),
99        ]
100    }
101
102    /// Annotations added to the Genotype of VariantContexts
103    pub fn genotype_annotations() -> Vec<Annotation> {
104        vec![
105            Annotation::new(VariantAnnotations::Depth, AnnotationType::Format),
106            Annotation::new(
107                VariantAnnotations::DepthPerAlleleBySample,
108                AnnotationType::Format,
109            ),
110            Annotation::new(VariantAnnotations::AlleleFraction, AnnotationType::Info),
111            Annotation::new(VariantAnnotations::AlleleCount, AnnotationType::Info),
112        ]
113    }
114
115    /// Annotations that are precalculated or calculated through other annotations
116    pub fn precalculated_annotations() -> Vec<Annotation> {
117        vec![
118            Annotation::new(VariantAnnotations::Genotype, AnnotationType::Format),
119            Annotation::new(VariantAnnotations::GenotypeQuality, AnnotationType::Format),
120            Annotation::new(VariantAnnotations::PhredLikelihoods, AnnotationType::Format),
121            Annotation::new(VariantAnnotations::MLEAC, AnnotationType::Info),
122            Annotation::new(VariantAnnotations::MLEAF, AnnotationType::Info),
123        ]
124    }
125
126    /// Sorted list of annotations. Format field annotations appear first
127    fn all_annotations() -> Vec<Annotation> {
128        let mut annotations = Self::precalculated_annotations();
129        annotations.extend(Self::genotype_annotations());
130        annotations.extend(Self::vc_annotations());
131        annotations.sort();
132        return annotations;
133    }
134
135    fn strain_annotations() -> Vec<Annotation> {
136        vec![
137            Annotation::new(VariantAnnotations::VariantGroup, AnnotationType::Info),
138            Annotation::new(VariantAnnotations::Strain, AnnotationType::Format),
139        ]
140    }
141
142    /// Populates a given VCF header with all possible annotation fields and info
143    pub fn populate_vcf_header(header: &mut Header, strain_info: bool) {
144        for annotation in Self::all_annotations() {
145            header.push_record(annotation.generate_header_record().as_bytes());
146        }
147        if strain_info {
148            for annotation in Self::strain_annotations() {
149                header.push_record(annotation.generate_header_record().as_bytes());
150            }
151        }
152    }
153}