hpo 0.10.1

Human Phenotype Ontology Similarity
Documentation
//! Compare two versions of the HPO Ontology to each other
//!
//! # Examples
//!
//! ```rust
//! use hpo::Ontology;
//!
//! let old_ontology = Ontology::default();
//! let new_ontology = Ontology::default();
//!
//! let comparison = old_ontology.compare(&new_ontology);
//!
//! for term in comparison.changed_hpo_terms() {
//!     println!("Changed term: {}", term.id());
//! }
//!
//! for term in comparison.added_hpo_terms() {
//!     println!("New term: {}", term.id());
//! }
//! // ...
//! ```
use std::collections::HashSet;
use std::fmt::Display;

use crate::annotations::{Disease, Gene, OmimDisease, OrphaDisease};
use crate::term::HpoGroup;
use crate::{HpoTerm, HpoTermId, Ontology};

#[derive(Debug)]
/// Compares the content of two Ontologies
///
/// This can be used when a new HPO masterdata release is available
/// to check what is changed between the previous and new one.
pub struct Comparison<'a> {
    lhs: &'a Ontology,
    rhs: &'a Ontology,
}

impl<'a> Display for Comparison<'a> {
    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
        write!(
            f,
            "Version\t{}\t{}\nTerms\t{}\t{}\nGenes\t{}\t{}\nOmim Diseases\t{}\t{}\nOrpha Diseases\t{}\t{}",
            self.lhs.hpo_version(),
            self.rhs.hpo_version(),
            self.lhs.len(),
            self.rhs.len(),
            self.lhs.genes().count(),
            self.rhs.genes().count(),
            self.lhs.omim_diseases().count(),
            self.rhs.omim_diseases().count(),
            self.lhs.orpha_diseases().count(),
            self.rhs.orpha_diseases().count()
        )
    }
}

impl<'a> Comparison<'a> {
    /// Constructs a new [`Comparison`] from two [`Ontology`]
    /// The first argument, `lhs`, is considered the `old` or `base` Ontology,
    /// while the second argument, `rhs` is considered the `new` or `changed` one.
    pub fn new(lhs: &'a Ontology, rhs: &'a Ontology) -> Self {
        Self { lhs, rhs }
    }

    /// Returns all [`HpoTerm`]s that are exclusively in the `new` Ontology
    pub fn added_hpo_terms(&self) -> Vec<HpoTerm<'a>> {
        self.rhs
            .hpos()
            .filter(|term| self.lhs.hpo(term.id()).is_none())
            .collect()
    }

    /// Returns all [`HpoTerm`]s that are exclusively in the `old` Ontology
    pub fn removed_hpo_terms(&self) -> Vec<HpoTerm<'a>> {
        self.lhs
            .hpos()
            .filter(|term| self.rhs.hpo(term.id()).is_none())
            .collect()
    }

    /// Returns an [`HpoTermDelta`] struct for every HPO term that is different
    /// between the `old` and `new` Ontology.
    ///
    /// Differences are defined as either:
    /// - Changed name
    /// - Changed direct parents
    /// - Changed obsolete state
    /// - Changed replacement term
    pub fn changed_hpo_terms(&self) -> Vec<HpoTermDelta> {
        self.lhs
            .hpos()
            .filter_map(|term| {
                if let Some(rhs) = self.rhs.hpo(term.id()) {
                    HpoTermDelta::new(term, rhs)
                } else {
                    None
                }
            })
            .collect()
    }

    /// Returns all [`Gene`]s that are exclusively in the `new` Ontology
    pub fn added_genes(&self) -> Vec<&Gene> {
        self.rhs
            .genes()
            .filter(|gene| self.lhs.gene(gene.id()).is_none())
            .collect()
    }

    /// Returns all [`Gene`]s that are exclusively in the `old` Ontology
    pub fn removed_genes(&self) -> Vec<&Gene> {
        self.lhs
            .genes()
            .filter(|gene| self.rhs.gene(gene.id()).is_none())
            .collect()
    }

    /// Returns an [`AnnotationDelta`] struct for every [`Gene`] that is different
    /// between the `old` and `new` Ontology.
    ///
    /// Differences are defined as either:
    /// - Changed name
    /// - Changed direct associated `HpoTerm`s
    pub fn changed_genes(&self) -> Vec<AnnotationDelta> {
        self.lhs
            .genes()
            .filter_map(|gene| {
                if let Some(rhs) = self.rhs.gene(gene.id()) {
                    AnnotationDelta::gene(gene, rhs)
                } else {
                    None
                }
            })
            .collect()
    }

    /// Returns all [`OmimDisease`]s that are exclusively in the `new` Ontology
    pub fn added_omim_diseases(&self) -> Vec<&OmimDisease> {
        self.rhs
            .omim_diseases()
            .filter(|disease| self.lhs.omim_disease(disease.id()).is_none())
            .collect()
    }

    /// Returns all [`OmimDisease`]s that are exclusively in the `old` Ontology
    pub fn removed_omim_diseases(&self) -> Vec<&OmimDisease> {
        self.lhs
            .omim_diseases()
            .filter(|disease| self.rhs.omim_disease(disease.id()).is_none())
            .collect()
    }

    /// Returns an [`AnnotationDelta`] struct for every [`OmimDisease`] that is different
    /// between the `old` and `new` Ontology.
    ///
    /// Differences are defined as either:
    /// - Changed name
    /// - Changed direct associated `HpoTerm`s
    pub fn changed_omim_diseases(&self) -> Vec<AnnotationDelta> {
        self.lhs
            .omim_diseases()
            .filter_map(|disease| {
                if let Some(rhs) = self.rhs.omim_disease(disease.id()) {
                    AnnotationDelta::disease(disease, rhs)
                } else {
                    None
                }
            })
            .collect()
    }

    /// Returns all [`OrphaDisease`]s that are exclusively in the `new` Ontology
    pub fn added_orpha_diseases(&self) -> Vec<&OrphaDisease> {
        self.rhs
            .orpha_diseases()
            .filter(|disease| self.lhs.orpha_disease(disease.id()).is_none())
            .collect()
    }

    /// Returns all [`OrphaDisease`]s that are exclusively in the `old` Ontology
    pub fn removed_orpha_diseases(&self) -> Vec<&OrphaDisease> {
        self.lhs
            .orpha_diseases()
            .filter(|disease| self.rhs.orpha_disease(disease.id()).is_none())
            .collect()
    }

    /// Returns an [`AnnotationDelta`] struct for every [`OrphaDisease`] that is different
    /// between the `old` and `new` Ontology.
    ///
    /// Differences are defined as either:
    /// - Changed name
    /// - Changed direct associated `HpoTerm`s
    pub fn changed_orpha_diseases(&self) -> Vec<AnnotationDelta> {
        self.lhs
            .orpha_diseases()
            .filter_map(|disease| {
                if let Some(rhs) = self.rhs.orpha_disease(disease.id()) {
                    AnnotationDelta::disease(disease, rhs)
                } else {
                    None
                }
            })
            .collect()
    }
}

/// Differences between two [`HpoTerm`]s
pub struct HpoTermDelta {
    term_id: HpoTermId,
    changed_name: (String, String),
    added_parents: Vec<HpoTermId>,
    removed_parents: Vec<HpoTermId>,
    obsolete: (bool, bool),
    replacement: (Option<HpoTermId>, Option<HpoTermId>),
}

impl HpoTermDelta {
    /// Constructs a new [`HpoTermDelta`] by comparing two [`HpoTerm`]s
    ///
    /// Returns `None` if both are identical
    pub fn new(lhs: HpoTerm, rhs: HpoTerm) -> Option<Self> {
        let changed_name = (lhs.name().to_string(), rhs.name().to_string());

        let lhs_parents: HashSet<HpoTermId> = lhs.parents().map(|t| t.id()).collect();
        let rhs_parents: HashSet<HpoTermId> = rhs.parents().map(|t| t.id()).collect();

        let removed_parents: Vec<HpoTermId> =
            lhs_parents.difference(&rhs_parents).copied().collect();
        let added_parents: Vec<HpoTermId> = rhs_parents.difference(&lhs_parents).copied().collect();

        let obsolete = (lhs.is_obsolete(), rhs.is_obsolete());
        let replacement = (
            lhs.replaced_by().map(|t| t.id()),
            rhs.replaced_by().map(|t| t.id()),
        );

        if changed_name.0 != changed_name.1
            || !removed_parents.is_empty()
            || !added_parents.is_empty()
            || obsolete.0 != obsolete.1
            || replacement.0 != replacement.1
        {
            let term_id = lhs.id();
            Some(Self {
                term_id,
                changed_name,
                added_parents,
                removed_parents,
                obsolete,
                replacement,
            })
        } else {
            None
        }
    }

    /// Returns all direct parent [`HpoTermId`]s of the `new` term that
    /// are not parents of the `old` term
    ///
    /// Returns `None` if no such terms exist
    pub fn added_parents(&self) -> Option<&Vec<HpoTermId>> {
        if self.added_parents.is_empty() {
            None
        } else {
            Some(&self.added_parents)
        }
    }

    /// Returns all direct parent [`HpoTermId`]s of the `old` term that
    /// are not parents of the `new` term
    ///
    /// Returns `None` if no such terms exist
    pub fn removed_parents(&self) -> Option<&Vec<HpoTermId>> {
        if self.removed_parents.is_empty() {
            None
        } else {
            Some(&self.removed_parents)
        }
    }

    /// Returns the `old` and `new` name if they are different
    ///
    /// Returns `None` if the name is unchanged
    pub fn changed_name(&self) -> Option<&(String, String)> {
        if self.changed_name.0 == self.changed_name.1 {
            None
        } else {
            Some(&self.changed_name)
        }
    }

    /// Returns the `old` and `new` obsolete states if they are different
    ///
    /// Returns `None` if the obsolete state is unchanged
    pub fn changed_obsolete(&self) -> Option<(bool, bool)> {
        if self.obsolete.0 == self.obsolete.1 {
            None
        } else {
            Some(self.obsolete)
        }
    }

    /// Returns the replacement terms if they exist are different
    ///
    /// Returns `None` if the obsolete state is unchanged
    pub fn changed_replacement(&self) -> Option<(Option<HpoTermId>, Option<HpoTermId>)> {
        if self.replacement.0 == self.replacement.1 {
            None
        } else {
            Some(self.replacement)
        }
    }

    /// Returns the [`HpoTermId`] of the term
    pub fn id(&self) -> &HpoTermId {
        &self.term_id
    }
}

/// Differences between two [`Gene`]s or [`OmimDisease`]s
pub struct AnnotationDelta {
    id: String,
    names: (String, String),
    n_terms: (usize, usize),
    added_terms: Vec<HpoTermId>,
    removed_terms: Vec<HpoTermId>,
}

impl<'a> AnnotationDelta {
    /// Constructs a new [`AnnotationDelta`] by comparing two [`Gene`]s
    ///
    /// Returns `None` if both are identical
    pub fn gene(lhs: &Gene, rhs: &Gene) -> Option<Self> {
        let lhs_terms = lhs.hpo_terms();
        let rhs_terms = rhs.hpo_terms();

        let names = (lhs.name().to_string(), rhs.name().to_string());

        Self::delta(lhs_terms, rhs_terms, names, lhs.id().to_string())
    }

    /// Constructs a new [`AnnotationDelta`] by comparing two [`OmimDisease`]s
    ///
    /// Returns `None` if both are identical
    pub fn disease<D: Disease>(lhs: &D, rhs: &D) -> Option<Self> {
        let lhs_terms = lhs.hpo_terms();
        let rhs_terms = rhs.hpo_terms();

        let names = (lhs.name().to_string(), rhs.name().to_string());

        Self::delta(lhs_terms, rhs_terms, names, lhs.id().to_string())
    }

    /// Calculates the difference between both items and constructs a new
    /// `AnnotationDelta` struct or returns None
    fn delta(
        lhs_terms: &HpoGroup,
        rhs_terms: &HpoGroup,
        names: (String, String),
        id: String,
    ) -> Option<Self> {
        let added_terms: Vec<HpoTermId> = rhs_terms
            .iter()
            .filter(|termid| !lhs_terms.contains(termid))
            .collect();

        let removed_terms: Vec<HpoTermId> = lhs_terms
            .iter()
            .filter(|termid| !rhs_terms.contains(termid))
            .collect();

        if !added_terms.is_empty() || !removed_terms.is_empty() || names.0 != names.1 {
            Some(Self {
                id,
                names,
                n_terms: (lhs_terms.len(), rhs_terms.len()),
                added_terms,
                removed_terms,
            })
        } else {
            None
        }
    }

    /// Returns all directly linked [`HpoTermId`]s of the `new` annotation that
    /// are not linked to the `old` anotation
    ///
    /// Returns `None` if no such terms exist
    pub fn added_terms(&'a self) -> Option<&Vec<HpoTermId>> {
        if self.added_terms.is_empty() {
            None
        } else {
            Some(&self.added_terms)
        }
    }

    /// Returns all directly linked [`HpoTermId`]s of the `old` annotation that
    /// are not linked to the `new` anotation
    ///
    /// Returns `None` if no such terms exist
    pub fn removed_terms(&'a self) -> Option<&Vec<HpoTermId>> {
        if self.removed_terms.is_empty() {
            None
        } else {
            Some(&self.removed_terms)
        }
    }

    /// Returns the `old` and `new` name if they are different
    ///
    /// Returns `None` if the name is unchanged
    pub fn changed_name(&self) -> Option<&(String, String)> {
        if self.names.0 == self.names.1 {
            None
        } else {
            Some(&self.names)
        }
    }

    /// Returns the `String`-formatted ID of the annotation
    pub fn id(&self) -> &str {
        &self.id
    }

    /// Returns the number of terms linked to `old` and `new`
    pub fn n_terms(&self) -> (usize, usize) {
        self.n_terms
    }
}