Struct hpo::stats::Linkage

source ·
pub struct Linkage<'a> { /* private fields */ }
Expand description

Linkage matrices from HpoSets

Crate a linkage matrix from a list of HpoSets to use in dendograms or other hierarchical cluster analyses

Provided algorithms for clustering

  • Linkage::union: Create a new HpoSet for each cluster based on the union of both combined clusters. This method becomes slow with growing input data
  • Linkage::single: The minimum distance of each cluster’s nodes to the other nodes is used as distance for newly formed clusters. This is also known as the Nearest Point Algorithm.
  • Linkage::complete: The maximum distance of each cluster’s nodes to the other nodes is used as distance for newly formed clusters. This is also known by the Farthest Point Algorithm or Voor Hees Algorithm.
  • Linkage::average: The mean distance of each cluster’s nodes to the other nodes is used as distance for newly formed clusters. This is also called the UPGMA algorithm.

§Examples


use hpo::Ontology;
use hpo::HpoSet;
use hpo::similarity::GroupSimilarity;
use hpo::utils::Combinations;
use hpo::stats::Linkage;

// This method can and should utilize parallel processing, e.g.
// using rayon iterators
fn distance(combs: Combinations<HpoSet<'_>>) -> Vec<f32> {
    let sim = GroupSimilarity::default();
    combs.map(|comp| {
        1.0 - sim.calculate(comp.0, comp.1)
    }).collect()
}

let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
let sets = vec![
    ontology.gene_by_name("GBA1").unwrap().to_hpo_set(&ontology),
    ontology.gene_by_name("BRCA2").unwrap().to_hpo_set(&ontology),
    ontology.gene_by_name("EZH2").unwrap().to_hpo_set(&ontology),
    ontology.gene_by_name("DMD").unwrap().to_hpo_set(&ontology),
];


let mut cluster = Linkage::union(sets, distance).into_cluster();
let first = cluster.next().unwrap();
println!("{:?}", first);
// Cluster { idx1: 0, idx2: 3, distance: 0.008127391, size: 2 }
assert_eq!(cluster.next().unwrap().len(), 3);
assert_eq!(cluster.next().unwrap().len(), 4);
assert!(cluster.next().is_none());

Implementations§

source§

impl<'a> Linkage<'a>

source

pub fn union<T, F>(sets: T, distance: F) -> Self
where T: IntoIterator<Item = HpoSet<'a>>, F: Fn(Combinations<'_, HpoSet<'_>>) -> Vec<f32>,

Performs union-based hierarchical clustering of HpoSets

In each iteration, HpoSets are compared to each other based on the provided distance function. Clusters are formed by combining the 2 closest HpoSets into a single set (forming the union).

This method becomes exponentially slower with larger lists of sets, because it merges sets and calculates pairwise similarities for each term in each set.

§Examples

use hpo::Ontology;
use hpo::HpoSet;
use hpo::similarity::GroupSimilarity;
use hpo::utils::Combinations;
use hpo::stats::Linkage;

// This method can and should utilize parallel processing, e.g.
// using rayon iterators
fn distance(combs: Combinations<HpoSet<'_>>) -> Vec<f32> {
    let sim = GroupSimilarity::default();
    combs.map(|comp| {
        1.0 - sim.calculate(comp.0, comp.1)
    }).collect()
}

let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
let sets = vec![
    ontology.gene_by_name("GBA1").unwrap().to_hpo_set(&ontology),
    ontology.gene_by_name("BRCA2").unwrap().to_hpo_set(&ontology),
    ontology.gene_by_name("EZH2").unwrap().to_hpo_set(&ontology),
    ontology.gene_by_name("DMD").unwrap().to_hpo_set(&ontology),
];


let mut cluster = Linkage::union(sets, distance).into_cluster();
let first = cluster.next().unwrap();
println!("{:?}", first);
// Cluster { idx1: 0, idx2: 3, distance: 0.008127391, size: 2 }
assert_eq!(cluster.next().unwrap().len(), 3);
assert_eq!(cluster.next().unwrap().len(), 4);
assert!(cluster.next().is_none());
source

pub fn single<T, F>(sets: T, distance: F) -> Self
where T: IntoIterator<Item = HpoSet<'a>>, F: Fn(Combinations<'_, HpoSet<'_>>) -> Vec<f32>,

Performs single-hierarchical clustering of HpoSets

HpoSets are compared to each other based on the provided distance function. Clusters are formed by using the minimum distance of each encompassing set to the comparison set. This is also known as the Nearest Point Algorithm.

§Examples

use hpo::Ontology;
use hpo::HpoSet;
use hpo::similarity::GroupSimilarity;
use hpo::utils::Combinations;
use hpo::stats::Linkage;

// This method can and should utilize parallel processing, e.g.
// using rayon iterators
fn distance(combs: Combinations<HpoSet<'_>>) -> Vec<f32> {
    let sim = GroupSimilarity::default();
    combs.map(|comp| {
        1.0 - sim.calculate(comp.0, comp.1)
    }).collect()
}

let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
let sets = vec![
    ontology.gene_by_name("GBA1").unwrap().to_hpo_set(&ontology),
    ontology.gene_by_name("BRCA2").unwrap().to_hpo_set(&ontology),
    ontology.gene_by_name("EZH2").unwrap().to_hpo_set(&ontology),
    ontology.gene_by_name("DMD").unwrap().to_hpo_set(&ontology),
];


let mut cluster = Linkage::single(sets, distance).into_cluster();
let first = cluster.next().unwrap();
println!("{:?}", first);
// Cluster { idx1: 0, idx2: 3, distance: 0.008127391, size: 2 }
assert_eq!(cluster.next().unwrap().len(), 3);
assert_eq!(cluster.next().unwrap().len(), 4);
assert!(cluster.next().is_none());
source

pub fn complete<T, F>(sets: T, distance: F) -> Self
where T: IntoIterator<Item = HpoSet<'a>>, F: Fn(Combinations<'_, HpoSet<'_>>) -> Vec<f32>,

Performs complete-hierarchical clustering of HpoSets

HpoSets are compared to each other based on the provided distance function. Clusters are formed by using the maximum distance of each encompassing set to the comparison set. This is also known by the Farthest Point Algorithm or Voor Hees Algorithm.

source

pub fn average<T, F>(sets: T, distance: F) -> Self
where T: IntoIterator<Item = HpoSet<'a>>, F: Fn(Combinations<'_, HpoSet<'_>>) -> Vec<f32>,

Performs average-hierarchical clustering of HpoSets

HpoSets are compared to each other based on the provided distance function. Clusters are formed by using the average distance of both encompassing sets to the comparison set. This is also called the UPGMA algorithm.

§Note

This method is not implemented completely correct. Instead of calculating the average of all distances, it only uses the mean distance of all direct cluster nodes.

source

pub fn cluster(&self) -> Iter<'_>

Returns an Iterator of Cluster references

source

pub fn into_cluster(self) -> IntoIter

Returns an Iterator of owned Cluster

source

pub fn indicies(&self) -> Vec<usize>

Returns the order of the input set items in the final cluster

§Examples

use hpo::Ontology;
use hpo::HpoSet;
use hpo::similarity::GroupSimilarity;
use hpo::utils::Combinations;
use hpo::stats::Linkage;

// This method can and should utilize parallel processing, e.g.
// using rayon iterators
fn distance(combs: Combinations<HpoSet<'_>>) -> Vec<f32> {
    let sim = GroupSimilarity::default();
    combs.map(|comp| {
        1.0 - sim.calculate(comp.0, comp.1)
    }).collect()
}

let ontology = Ontology::from_binary("tests/example.hpo").unwrap();

let genes = vec!["GBA1", "BRCA2", "EZH2", "DMD"];
let sets = genes.iter().map(|gene| ontology.gene_by_name(gene).unwrap().to_hpo_set(&ontology));

let linkage = Linkage::union(sets, distance);
let indicies = linkage.indicies();
assert_eq!(indicies, vec![0usize, 3usize, 2usize, 1usize]);
for idx in indicies {
   print!("{} ", genes[idx]);
}
// "GBA1 DMD EZH2 BRCA2"
source

pub fn iter(&self) -> Iter<'_>

Returns an Iterator of Cluster references

Trait Implementations§

source§

impl<'a> IntoIterator for &'a Linkage<'a>

§

type Item = &'a Cluster

The type of the elements being iterated over.
§

type IntoIter = Iter<'a>

Which kind of iterator are we turning this into?
source§

fn into_iter(self) -> Self::IntoIter

Creates an iterator from a value. Read more
source§

impl IntoIterator for Linkage<'_>

§

type Item = Cluster

The type of the elements being iterated over.
§

type IntoIter = IntoIter

Which kind of iterator are we turning this into?
source§

fn into_iter(self) -> Self::IntoIter

Creates an iterator from a value. Read more

Auto Trait Implementations§

§

impl<'a> Freeze for Linkage<'a>

§

impl<'a> RefUnwindSafe for Linkage<'a>

§

impl<'a> Send for Linkage<'a>

§

impl<'a> Sync for Linkage<'a>

§

impl<'a> Unpin for Linkage<'a>

§

impl<'a> UnwindSafe for Linkage<'a>

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T> Instrument for T

source§

fn instrument(self, span: Span) -> Instrumented<Self>

Instruments this type with the provided Span, returning an Instrumented wrapper. Read more
source§

fn in_current_span(self) -> Instrumented<Self>

Instruments this type with the current Span, returning an Instrumented wrapper. Read more
source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> Same for T

§

type Output = T

Should always be Self
source§

impl<SS, SP> SupersetOf<SS> for SP
where SS: SubsetOf<SP>,

source§

fn to_subset(&self) -> Option<SS>

The inverse inclusion map: attempts to construct self from the equivalent element of its superset. Read more
source§

fn is_in_subset(&self) -> bool

Checks if self is actually part of its subset T (and can be converted to it).
source§

fn to_subset_unchecked(&self) -> SS

Use with care! Same as self.to_subset but without any property checks. Always succeeds.
source§

fn from_subset(element: &SS) -> SP

The inclusion map: converts self to the equivalent element of its superset.
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<V, T> VZip<V> for T
where V: MultiLane<T>,

source§

fn vzip(self) -> V

source§

impl<T> WithSubscriber for T

source§

fn with_subscriber<S>(self, subscriber: S) -> WithDispatch<Self>
where S: Into<Dispatch>,

Attaches the provided Subscriber to this type, returning a WithDispatch wrapper. Read more
source§

fn with_current_subscriber(self) -> WithDispatch<Self>

Attaches the current default Subscriber to this type, returning a WithDispatch wrapper. Read more