genomicframe-core 0.2.0

High-performance genomics I/O and interoperability layer
Documentation
//! Annotated VCF records
//!
//! Provides a wrapper around VcfRecord that enriches it with genomic context
//! from annotation databases (genes, regulatory elements, conservation, etc.).

use crate::formats::vcf::VcfRecord;
use crate::interval::annotation::AnnotationIndex;
use crate::interval::GenomicInterval;

/// A VCF record enriched with genomic annotations
///
/// This struct wraps a VcfRecord and lazily attaches annotations from
/// external databases (BED files, GFF files, etc.).
#[derive(Debug, Clone)]
pub struct AnnotatedVcfRecord {
    /// The original VCF record
    pub record: VcfRecord,

    /// Genomic interval representation (cached for efficiency)
    pub interval: GenomicInterval,

    /// Annotations from external databases
    /// Each vector represents one annotation source (e.g., genes, exons, repeats)
    pub annotations: Vec<Vec<String>>,
}

impl AnnotatedVcfRecord {
    /// Create an annotated record from a VCF record
    pub fn new(record: VcfRecord) -> Self {
        let interval: GenomicInterval = (&record).into();
        Self {
            record,
            interval,
            annotations: Vec::new(),
        }
    }

    /// Add annotations from an annotation index
    pub fn annotate_with(&mut self, index: &AnnotationIndex) {
        let annotations: Vec<String> = index
            .query(&self.interval)
            .into_iter()
            .map(|s| s.to_string())
            .collect();
        self.annotations.push(annotations);
    }

    /// Get all annotations flattened into a single vector
    pub fn all_annotations(&self) -> Vec<&str> {
        self.annotations
            .iter()
            .flat_map(|v| v.iter().map(|s| s.as_str()))
            .collect()
    }

    /// Check if this variant has any annotations
    pub fn is_annotated(&self) -> bool {
        self.annotations.iter().any(|v| !v.is_empty())
    }

    /// Get the first annotation from the first source (convenience method)
    pub fn primary_annotation(&self) -> Option<&str> {
        self.annotations
            .first()
            .and_then(|v| v.first())
            .map(|s| s.as_str())
    }
}

impl From<VcfRecord> for AnnotatedVcfRecord {
    fn from(record: VcfRecord) -> Self {
        Self::new(record)
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_annotated_vcf_record() {
        let record = VcfRecord {
            chrom: "chr1".to_string(),
            pos: 1000,
            id: String::from(""),
            reference: "A".to_string(),
            alt: vec!["G".to_string()],
            qual: Some(30.0),
            filter: "PASS".to_string(),
            info: String::from(""),
            format: None,
            samples: vec![],
        };

        let annotated = AnnotatedVcfRecord::new(record);

        assert_eq!(annotated.interval.chrom, "chr1");
        assert_eq!(annotated.interval.start, 999); // VCF 1-based → 0-based
        assert_eq!(annotated.interval.end, 1000);
        assert!(!annotated.is_annotated());
        assert!(annotated.primary_annotation().is_none());
    }

    #[test]
    fn test_annotation_methods() {
        let record = VcfRecord {
            chrom: "chr1".to_string(),
            pos: 1000,
            id: String::from(""),
            reference: "A".to_string(),
            alt: vec!["G".to_string()],
            qual: Some(30.0),
            filter: "PASS".to_string(),
            info: String::from(""),
            format: None,
            samples: vec![],
        };

        let mut annotated = AnnotatedVcfRecord::new(record);

        // Simulate adding annotations
        annotated.annotations.push(vec!["GeneA".to_string(), "GeneB".to_string()]);
        annotated.annotations.push(vec!["Exon1".to_string()]);

        assert!(annotated.is_annotated());
        assert_eq!(annotated.primary_annotation(), Some("GeneA"));

        let all = annotated.all_annotations();
        assert_eq!(all.len(), 3);
        assert!(all.contains(&"GeneA"));
        assert!(all.contains(&"GeneB"));
        assert!(all.contains(&"Exon1"));
    }
}