pub struct Linkage<'a> { /* private fields */ }
Expand description
Linkage matrices from HpoSet
s
Crate a linkage matrix from a list of HpoSet
s to use in dendograms
or other hierarchical cluster analyses
Provided algorithms for clustering
Linkage::union
: Create a newHpoSet
for each cluster based on the union of both combined clusters. This method becomes slow with growing input dataLinkage::single
: The minimum distance of each cluster’s nodes to the other nodes is used as distance for newly formed clusters. This is also known as the Nearest Point Algorithm.Linkage::complete
: The maximum distance of each cluster’s nodes to the other nodes is used as distance for newly formed clusters. This is also known by the Farthest Point Algorithm or Voor Hees Algorithm.Linkage::average
: The mean distance of each cluster’s nodes to the other nodes is used as distance for newly formed clusters. This is also called the UPGMA algorithm.
§Examples
use hpo::Ontology;
use hpo::HpoSet;
use hpo::similarity::GroupSimilarity;
use hpo::utils::Combinations;
use hpo::stats::Linkage;
// This method can and should utilize parallel processing, e.g.
// using rayon iterators
fn distance(combs: Combinations<HpoSet<'_>>) -> Vec<f32> {
let sim = GroupSimilarity::default();
combs.map(|comp| {
1.0 - sim.calculate(comp.0, comp.1)
}).collect()
}
let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
let sets = vec![
ontology.gene_by_name("GBA1").unwrap().to_hpo_set(&ontology),
ontology.gene_by_name("BRCA2").unwrap().to_hpo_set(&ontology),
ontology.gene_by_name("EZH2").unwrap().to_hpo_set(&ontology),
ontology.gene_by_name("DMD").unwrap().to_hpo_set(&ontology),
];
let mut cluster = Linkage::union(sets, distance).into_cluster();
let first = cluster.next().unwrap();
println!("{:?}", first);
// Cluster { idx1: 0, idx2: 3, distance: 0.008127391, size: 2 }
assert_eq!(cluster.next().unwrap().len(), 3);
assert_eq!(cluster.next().unwrap().len(), 4);
assert!(cluster.next().is_none());
Implementations§
source§impl<'a> Linkage<'a>
impl<'a> Linkage<'a>
sourcepub fn union<T, F>(sets: T, distance: F) -> Self
pub fn union<T, F>(sets: T, distance: F) -> Self
Performs union-based hierarchical clustering of HpoSet
s
In each iteration, HpoSet
s are compared to each other based on the
provided distance
function. Cluster
s are formed by combining the
2 closest HpoSet
s into a single set (forming the union).
This method becomes exponentially slower with larger lists of sets, because it merges sets and calculates pairwise similarities for each term in each set.
§Examples
use hpo::Ontology;
use hpo::HpoSet;
use hpo::similarity::GroupSimilarity;
use hpo::utils::Combinations;
use hpo::stats::Linkage;
// This method can and should utilize parallel processing, e.g.
// using rayon iterators
fn distance(combs: Combinations<HpoSet<'_>>) -> Vec<f32> {
let sim = GroupSimilarity::default();
combs.map(|comp| {
1.0 - sim.calculate(comp.0, comp.1)
}).collect()
}
let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
let sets = vec![
ontology.gene_by_name("GBA1").unwrap().to_hpo_set(&ontology),
ontology.gene_by_name("BRCA2").unwrap().to_hpo_set(&ontology),
ontology.gene_by_name("EZH2").unwrap().to_hpo_set(&ontology),
ontology.gene_by_name("DMD").unwrap().to_hpo_set(&ontology),
];
let mut cluster = Linkage::union(sets, distance).into_cluster();
let first = cluster.next().unwrap();
println!("{:?}", first);
// Cluster { idx1: 0, idx2: 3, distance: 0.008127391, size: 2 }
assert_eq!(cluster.next().unwrap().len(), 3);
assert_eq!(cluster.next().unwrap().len(), 4);
assert!(cluster.next().is_none());
sourcepub fn single<T, F>(sets: T, distance: F) -> Self
pub fn single<T, F>(sets: T, distance: F) -> Self
Performs single-hierarchical clustering of HpoSet
s
HpoSet
s are compared to each other based on the
provided distance
function. Cluster
s are formed by using the minimum
distance of each encompassing set to the comparison set.
This is also known as the Nearest Point Algorithm.
§Examples
use hpo::Ontology;
use hpo::HpoSet;
use hpo::similarity::GroupSimilarity;
use hpo::utils::Combinations;
use hpo::stats::Linkage;
// This method can and should utilize parallel processing, e.g.
// using rayon iterators
fn distance(combs: Combinations<HpoSet<'_>>) -> Vec<f32> {
let sim = GroupSimilarity::default();
combs.map(|comp| {
1.0 - sim.calculate(comp.0, comp.1)
}).collect()
}
let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
let sets = vec![
ontology.gene_by_name("GBA1").unwrap().to_hpo_set(&ontology),
ontology.gene_by_name("BRCA2").unwrap().to_hpo_set(&ontology),
ontology.gene_by_name("EZH2").unwrap().to_hpo_set(&ontology),
ontology.gene_by_name("DMD").unwrap().to_hpo_set(&ontology),
];
let mut cluster = Linkage::single(sets, distance).into_cluster();
let first = cluster.next().unwrap();
println!("{:?}", first);
// Cluster { idx1: 0, idx2: 3, distance: 0.008127391, size: 2 }
assert_eq!(cluster.next().unwrap().len(), 3);
assert_eq!(cluster.next().unwrap().len(), 4);
assert!(cluster.next().is_none());
sourcepub fn complete<T, F>(sets: T, distance: F) -> Self
pub fn complete<T, F>(sets: T, distance: F) -> Self
Performs complete-hierarchical clustering of HpoSet
s
HpoSet
s are compared to each other based on the
provided distance
function. Cluster
s are formed by using the maximum
distance of each encompassing set to the comparison set.
This is also known by the Farthest Point Algorithm or Voor Hees Algorithm.
sourcepub fn average<T, F>(sets: T, distance: F) -> Self
pub fn average<T, F>(sets: T, distance: F) -> Self
Performs average-hierarchical clustering of HpoSet
s
HpoSet
s are compared to each other based on the
provided distance
function. Cluster
s are formed by using the average
distance of both encompassing sets to the comparison set.
This is also called the UPGMA algorithm.
§Note
This method is not implemented completely correct. Instead of calculating the average of all distances, it only uses the mean distance of all direct cluster nodes.
sourcepub fn into_cluster(self) -> IntoIter ⓘ
pub fn into_cluster(self) -> IntoIter ⓘ
Returns an Iterator of owned Cluster
sourcepub fn indicies(&self) -> Vec<usize>
pub fn indicies(&self) -> Vec<usize>
Returns the order of the input set items in the final cluster
§Examples
use hpo::Ontology;
use hpo::HpoSet;
use hpo::similarity::GroupSimilarity;
use hpo::utils::Combinations;
use hpo::stats::Linkage;
// This method can and should utilize parallel processing, e.g.
// using rayon iterators
fn distance(combs: Combinations<HpoSet<'_>>) -> Vec<f32> {
let sim = GroupSimilarity::default();
combs.map(|comp| {
1.0 - sim.calculate(comp.0, comp.1)
}).collect()
}
let ontology = Ontology::from_binary("tests/example.hpo").unwrap();
let genes = vec!["GBA1", "BRCA2", "EZH2", "DMD"];
let sets = genes.iter().map(|gene| ontology.gene_by_name(gene).unwrap().to_hpo_set(&ontology));
let linkage = Linkage::union(sets, distance);
let indicies = linkage.indicies();
assert_eq!(indicies, vec![0usize, 3usize, 2usize, 1usize]);
for idx in indicies {
print!("{} ", genes[idx]);
}
// "GBA1 DMD EZH2 BRCA2"
Trait Implementations§
source§impl<'a> IntoIterator for &'a Linkage<'a>
impl<'a> IntoIterator for &'a Linkage<'a>
Auto Trait Implementations§
impl<'a> Freeze for Linkage<'a>
impl<'a> RefUnwindSafe for Linkage<'a>
impl<'a> Send for Linkage<'a>
impl<'a> Sync for Linkage<'a>
impl<'a> Unpin for Linkage<'a>
impl<'a> UnwindSafe for Linkage<'a>
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
source§impl<T> Instrument for T
impl<T> Instrument for T
source§fn instrument(self, span: Span) -> Instrumented<Self>
fn instrument(self, span: Span) -> Instrumented<Self>
source§fn in_current_span(self) -> Instrumented<Self>
fn in_current_span(self) -> Instrumented<Self>
source§impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
impl<SS, SP> SupersetOf<SS> for SPwhere
SS: SubsetOf<SP>,
source§fn to_subset(&self) -> Option<SS>
fn to_subset(&self) -> Option<SS>
self
from the equivalent element of its
superset. Read moresource§fn is_in_subset(&self) -> bool
fn is_in_subset(&self) -> bool
self
is actually part of its subset T
(and can be converted to it).source§fn to_subset_unchecked(&self) -> SS
fn to_subset_unchecked(&self) -> SS
self.to_subset
but without any property checks. Always succeeds.source§fn from_subset(element: &SS) -> SP
fn from_subset(element: &SS) -> SP
self
to the equivalent element of its superset.