use crate::plot::volcano::LabelStyle;
use crate::render::palette::Palette;
pub struct ManhattanPoint {
pub chromosome: String,
pub x: f64,
pub pvalue: f64,
pub label: Option<String>,
}
pub struct ChromSpan {
pub name: String,
pub x_start: f64,
pub x_end: f64,
}
pub struct ManhattanPlot {
pub points: Vec<ManhattanPoint>,
pub spans: Vec<ChromSpan>,
pub genome_wide: f64,
pub suggestive: f64,
pub color_a: String,
pub color_b: String,
pub palette: Option<Palette>,
pub point_size: f64,
pub label_top: usize,
pub label_style: LabelStyle,
pub pvalue_floor: Option<f64>,
pub legend_label: Option<String>,
pub show_tooltips: bool,
pub tooltip_labels: Option<Vec<String>>,
}
pub enum GenomeBuild {
Hg19,
Hg38,
T2T,
Custom(Vec<(String, u64)>),
}
const HG19_SIZES: &[(&str, u64)] = &[
("1", 249_250_621),
("2", 243_199_373),
("3", 198_022_430),
("4", 191_154_276),
("5", 180_915_260),
("6", 171_115_067),
("7", 159_138_663),
("8", 146_364_022),
("9", 141_213_431),
("10", 135_534_747),
("11", 135_006_516),
("12", 133_851_895),
("13", 115_169_878),
("14", 107_349_540),
("15", 102_531_392),
("16", 90_354_753),
("17", 81_195_210),
("18", 78_077_248),
("19", 59_128_983),
("20", 63_025_520),
("21", 48_129_895),
("22", 51_304_566),
("X", 155_270_560),
("Y", 59_373_566),
("MT", 16_571),
];
const HG38_SIZES: &[(&str, u64)] = &[
("1", 248_956_422),
("2", 242_193_529),
("3", 198_295_559),
("4", 190_214_555),
("5", 181_538_259),
("6", 170_805_979),
("7", 159_345_973),
("8", 145_138_636),
("9", 138_394_717),
("10", 133_797_422),
("11", 135_086_622),
("12", 133_275_309),
("13", 114_364_328),
("14", 107_043_718),
("15", 101_991_189),
("16", 90_338_345),
("17", 83_257_441),
("18", 80_373_285),
("19", 58_617_616),
("20", 64_444_167),
("21", 46_709_983),
("22", 50_818_468),
("X", 156_040_895),
("Y", 57_227_415),
("MT", 16_569),
];
const T2T_SIZES: &[(&str, u64)] = &[
("1", 248_387_328),
("2", 242_696_752),
("3", 201_105_948),
("4", 193_574_945),
("5", 182_045_439),
("6", 172_126_628),
("7", 160_567_428),
("8", 146_259_331),
("9", 150_617_247),
("10", 134_758_134),
("11", 135_127_769),
("12", 133_324_548),
("13", 113_566_686),
("14", 101_161_492),
("15", 99_753_195),
("16", 96_330_374),
("17", 84_276_897),
("18", 80_542_538),
("19", 61_707_364),
("20", 66_210_255),
("21", 45_090_682),
("22", 51_324_926),
("X", 154_259_566),
("Y", 62_460_029),
("MT", 16_569),
];
fn chrom_sort_key(name: &str) -> (u8, u32, String) {
let s = strip_chr(name);
match s {
"X" | "x" => (1, 0, String::new()),
"Y" | "y" => (2, 0, String::new()),
"MT" | "M" | "mt" | "m" => (3, 0, String::new()),
other => {
if let Ok(n) = other.parse::<u32>() {
(0, n, String::new())
} else {
(4, 0, other.to_string())
}
}
}
}
fn strip_chr(name: &str) -> &str {
name.strip_prefix("chr").unwrap_or(name)
}
fn build_sizes(build: &GenomeBuild) -> Vec<(&str, u64)> {
match build {
GenomeBuild::Hg19 => HG19_SIZES.iter().map(|&(n, s)| (n, s)).collect(),
GenomeBuild::Hg38 => HG38_SIZES.iter().map(|&(n, s)| (n, s)).collect(),
GenomeBuild::T2T => T2T_SIZES.iter().map(|&(n, s)| (n, s)).collect(),
GenomeBuild::Custom(v) => v.iter().map(|(n, s)| (strip_chr(n.as_str()), *s)).collect(),
}
}
impl Default for ManhattanPlot {
fn default() -> Self {
Self::new()
}
}
impl ManhattanPlot {
pub fn new() -> Self {
Self {
points: Vec::new(),
spans: Vec::new(),
genome_wide: -5e-8_f64.log10(), suggestive: 5.0,
color_a: "steelblue".into(),
color_b: "#5aadcb".into(),
palette: None,
point_size: 2.5,
label_top: 0,
label_style: LabelStyle::default(),
pvalue_floor: None,
legend_label: None,
show_tooltips: false,
tooltip_labels: None,
}
}
pub fn floor(&self) -> f64 {
if let Some(f) = self.pvalue_floor {
return f;
}
self.points
.iter()
.map(|p| p.pvalue)
.filter(|&p| p > 0.0)
.fold(f64::INFINITY, f64::min)
.max(1e-300)
}
pub fn with_data<I, S, G>(mut self, iter: I) -> Self
where
I: IntoIterator<Item = (S, G)>,
S: Into<String>,
G: Into<f64>,
{
let mut chrom_order: Vec<String> = Vec::new();
let mut by_chrom: std::collections::HashMap<String, Vec<f64>> =
std::collections::HashMap::new();
for (s, g) in iter {
let chrom: String = s.into();
let pvalue: f64 = g.into();
if !by_chrom.contains_key(&chrom) {
chrom_order.push(chrom.clone());
}
by_chrom.entry(chrom).or_default().push(pvalue);
}
chrom_order.sort_by_key(|c| chrom_sort_key(c));
let mut span_offset = 0.0_f64;
let mut spans = Vec::new();
let mut points = Vec::new();
for chrom in &chrom_order {
let pvalues = by_chrom
.get(chrom)
.expect("chrom_order derived from by_chrom keys");
let x_start = span_offset;
for (i, &pvalue) in pvalues.iter().enumerate() {
points.push(ManhattanPoint {
chromosome: chrom.clone(),
x: span_offset + i as f64,
pvalue,
label: None,
});
}
let x_end = span_offset + pvalues.len() as f64 - 1.0;
spans.push(ChromSpan {
name: chrom.clone(),
x_start,
x_end,
});
span_offset += pvalues.len() as f64;
}
self.points = points;
self.spans = spans;
self
}
pub fn with_data_bp<I, S, F, G>(mut self, iter: I, build: GenomeBuild) -> Self
where
I: IntoIterator<Item = (S, F, G)>,
S: Into<String>,
F: Into<f64>,
G: Into<f64>,
{
let raw: Vec<(String, f64, f64)> = iter
.into_iter()
.map(|(s, f, g)| {
let chrom_raw: String = s.into();
let chrom = strip_chr(&chrom_raw).to_string();
(chrom, f.into(), g.into())
})
.collect();
let sizes = build_sizes(&build);
let mut cum_offsets: std::collections::HashMap<&str, u64> =
std::collections::HashMap::new();
let mut running = 0u64;
for &(name, size) in &sizes {
cum_offsets.insert(name, running);
running += size;
}
let total_genome = running;
let mut points = Vec::new();
for (chrom, bp, pvalue) in &raw {
let x = if let Some(&offset) = cum_offsets.get(chrom.as_str()) {
offset as f64 + bp
} else {
total_genome as f64 + bp
};
points.push(ManhattanPoint {
chromosome: chrom.clone(),
x,
pvalue: *pvalue,
label: None,
});
}
let mut running = 0u64;
let mut spans = Vec::new();
for &(name, size) in &sizes {
spans.push(ChromSpan {
name: name.to_string(),
x_start: running as f64,
x_end: (running + size) as f64,
});
running += size;
}
let mut unknown_bounds: std::collections::HashMap<String, (f64, f64)> =
std::collections::HashMap::new();
for pt in &points {
if !cum_offsets.contains_key(pt.chromosome.as_str()) {
let e = unknown_bounds
.entry(pt.chromosome.clone())
.or_insert((f64::INFINITY, f64::NEG_INFINITY));
e.0 = e.0.min(pt.x);
e.1 = e.1.max(pt.x);
}
}
if !unknown_bounds.is_empty() {
let mut extra: Vec<ChromSpan> = unknown_bounds
.into_iter()
.map(|(name, (xs, xe))| ChromSpan {
name,
x_start: xs,
x_end: xe,
})
.collect();
extra.sort_by(|a, b| {
a.x_start
.partial_cmp(&b.x_start)
.unwrap_or(std::cmp::Ordering::Equal)
});
spans.extend(extra);
}
self.points = points;
self.spans = spans;
self
}
pub fn with_data_x<I, S, F, G>(mut self, iter: I) -> Self
where
I: IntoIterator<Item = (S, F, G)>,
S: Into<String>,
F: Into<f64>,
G: Into<f64>,
{
let raw: Vec<(String, f64, f64)> = iter
.into_iter()
.map(|(s, f, g)| (s.into(), f.into(), g.into()))
.collect();
let mut points = Vec::new();
let mut chrom_bounds: std::collections::HashMap<String, (f64, f64)> =
std::collections::HashMap::new();
let mut seen_chroms: Vec<String> = Vec::new();
for (chrom, x, pvalue) in &raw {
points.push(ManhattanPoint {
chromosome: chrom.clone(),
x: *x,
pvalue: *pvalue,
label: None,
});
if !chrom_bounds.contains_key(chrom) {
seen_chroms.push(chrom.clone());
chrom_bounds.insert(chrom.clone(), (*x, *x));
} else {
let e = chrom_bounds
.get_mut(chrom)
.expect("chrom already inserted in seen_chroms");
e.0 = e.0.min(*x);
e.1 = e.1.max(*x);
}
}
let mut spans: Vec<ChromSpan> = seen_chroms
.into_iter()
.map(|name| {
let (x_start, x_end) = chrom_bounds[&name];
ChromSpan {
name,
x_start,
x_end,
}
})
.collect();
spans.sort_by(|a, b| {
a.x_start
.partial_cmp(&b.x_start)
.unwrap_or(std::cmp::Ordering::Equal)
});
self.points = points;
self.spans = spans;
self
}
pub fn with_genome_wide(mut self, threshold: f64) -> Self {
self.genome_wide = threshold;
self
}
pub fn with_suggestive(mut self, threshold: f64) -> Self {
self.suggestive = threshold;
self
}
pub fn with_color_a<S: Into<String>>(mut self, color: S) -> Self {
self.color_a = color.into();
self
}
pub fn with_color_b<S: Into<String>>(mut self, color: S) -> Self {
self.color_b = color.into();
self
}
pub fn with_palette(mut self, palette: Palette) -> Self {
self.palette = Some(palette);
self
}
pub fn with_point_size(mut self, size: f64) -> Self {
self.point_size = size;
self
}
pub fn with_label_top(mut self, n: usize) -> Self {
self.label_top = n;
self
}
pub fn with_label_style(mut self, style: LabelStyle) -> Self {
self.label_style = style;
self
}
pub fn with_pvalue_floor(mut self, floor: f64) -> Self {
self.pvalue_floor = Some(floor);
self
}
pub fn with_legend<S: Into<String>>(mut self, label: S) -> Self {
self.legend_label = Some(label.into());
self
}
pub fn with_point_labels<I, S, F, L>(mut self, iter: I) -> Self
where
I: IntoIterator<Item = (S, F, L)>,
S: Into<String>,
F: Into<f64>,
L: Into<String>,
{
for (s, f, l) in iter {
let chrom: String = s.into();
let x: f64 = f.into();
let label: String = l.into();
if let Some(pt) = self
.points
.iter_mut()
.find(|p| p.chromosome == chrom && (p.x - x).abs() < 0.5)
{
pt.label = Some(label);
}
}
self
}
pub fn with_tooltips(mut self) -> Self {
self.show_tooltips = true;
self
}
pub fn with_tooltip_labels(
mut self,
labels: impl IntoIterator<Item = impl Into<String>>,
) -> Self {
self.tooltip_labels = Some(labels.into_iter().map(|s| s.into()).collect());
self
}
}