use crate::render::palette::Palette;
use crate::plot::volcano::LabelStyle;
pub struct ManhattanPoint {
pub chromosome: String,
pub x: f64,
pub pvalue: f64,
pub label: Option<String>,
}
pub struct ChromSpan {
pub name: String,
pub x_start: f64,
pub x_end: f64,
}
pub struct ManhattanPlot {
pub points: Vec<ManhattanPoint>,
pub spans: Vec<ChromSpan>,
pub genome_wide: f64,
pub suggestive: f64,
pub color_a: String,
pub color_b: String,
pub palette: Option<Palette>,
pub point_size: f64,
pub label_top: usize,
pub label_style: LabelStyle,
pub pvalue_floor: Option<f64>,
pub legend_label: Option<String>,
}
pub enum GenomeBuild {
Hg19,
Hg38,
T2T,
Custom(Vec<(String, u64)>),
}
const HG19_SIZES: &[(&str, u64)] = &[
("1",249_250_621),("2",243_199_373),("3",198_022_430),("4",191_154_276),
("5",180_915_260),("6",171_115_067),("7",159_138_663),("8",146_364_022),
("9",141_213_431),("10",135_534_747),("11",135_006_516),("12",133_851_895),
("13",115_169_878),("14",107_349_540),("15",102_531_392),("16",90_354_753),
("17",81_195_210),("18",78_077_248),("19",59_128_983),("20",63_025_520),
("21",48_129_895),("22",51_304_566),("X",155_270_560),("Y",59_373_566),("MT",16_571),
];
const HG38_SIZES: &[(&str, u64)] = &[
("1",248_956_422),("2",242_193_529),("3",198_295_559),("4",190_214_555),
("5",181_538_259),("6",170_805_979),("7",159_345_973),("8",145_138_636),
("9",138_394_717),("10",133_797_422),("11",135_086_622),("12",133_275_309),
("13",114_364_328),("14",107_043_718),("15",101_991_189),("16",90_338_345),
("17",83_257_441),("18",80_373_285),("19",58_617_616),("20",64_444_167),
("21",46_709_983),("22",50_818_468),("X",156_040_895),("Y",57_227_415),("MT",16_569),
];
const T2T_SIZES: &[(&str, u64)] = &[
("1",248_387_328),("2",242_696_752),("3",201_105_948),("4",193_574_945),
("5",182_045_439),("6",172_126_628),("7",160_567_428),("8",146_259_331),
("9",150_617_247),("10",134_758_134),("11",135_127_769),("12",133_324_548),
("13",113_566_686),("14",101_161_492),("15",99_753_195),("16",96_330_374),
("17",84_276_897),("18",80_542_538),("19",61_707_364),("20",66_210_255),
("21",45_090_682),("22",51_324_926),("X",154_259_566),("Y",62_460_029),("MT",16_569),
];
fn chrom_sort_key(name: &str) -> (u8, u32, String) {
let s = strip_chr(name);
match s {
"X" | "x" => (1, 0, String::new()),
"Y" | "y" => (2, 0, String::new()),
"MT" | "M" | "mt" | "m" => (3, 0, String::new()),
other => {
if let Ok(n) = other.parse::<u32>() {
(0, n, String::new())
} else {
(4, 0, other.to_string())
}
}
}
}
fn strip_chr(name: &str) -> &str {
name.strip_prefix("chr").unwrap_or(name)
}
fn build_sizes(build: &GenomeBuild) -> Vec<(&str, u64)> {
match build {
GenomeBuild::Hg19 => HG19_SIZES.iter().map(|&(n, s)| (n, s)).collect(),
GenomeBuild::Hg38 => HG38_SIZES.iter().map(|&(n, s)| (n, s)).collect(),
GenomeBuild::T2T => T2T_SIZES.iter().map(|&(n, s)| (n, s)).collect(),
GenomeBuild::Custom(v) => v.iter().map(|(n, s)| (strip_chr(n.as_str()), *s)).collect(),
}
}
impl Default for ManhattanPlot {
fn default() -> Self { Self::new() }
}
impl ManhattanPlot {
pub fn new() -> Self {
Self {
points: Vec::new(),
spans: Vec::new(),
genome_wide: -5e-8_f64.log10(), suggestive: 5.0,
color_a: "steelblue".into(),
color_b: "#5aadcb".into(),
palette: None,
point_size: 2.5,
label_top: 0,
label_style: LabelStyle::default(),
pvalue_floor: None,
legend_label: None,
}
}
pub fn floor(&self) -> f64 {
if let Some(f) = self.pvalue_floor { return f; }
self.points.iter()
.map(|p| p.pvalue)
.filter(|&p| p > 0.0)
.fold(f64::INFINITY, f64::min)
.max(1e-300)
}
pub fn with_data<I, S, G>(mut self, iter: I) -> Self
where
I: IntoIterator<Item = (S, G)>,
S: Into<String>,
G: Into<f64>,
{
let mut chrom_order: Vec<String> = Vec::new();
let mut by_chrom: std::collections::HashMap<String, Vec<f64>> =
std::collections::HashMap::new();
for (s, g) in iter {
let chrom: String = s.into();
let pvalue: f64 = g.into();
if !by_chrom.contains_key(&chrom) {
chrom_order.push(chrom.clone());
}
by_chrom.entry(chrom).or_default().push(pvalue);
}
chrom_order.sort_by_key(|c| chrom_sort_key(c));
let mut span_offset = 0.0_f64;
let mut spans = Vec::new();
let mut points = Vec::new();
for chrom in &chrom_order {
let pvalues = by_chrom.get(chrom).expect("chrom_order derived from by_chrom keys");
let x_start = span_offset;
for (i, &pvalue) in pvalues.iter().enumerate() {
points.push(ManhattanPoint {
chromosome: chrom.clone(),
x: span_offset + i as f64,
pvalue,
label: None,
});
}
let x_end = span_offset + pvalues.len() as f64 - 1.0;
spans.push(ChromSpan { name: chrom.clone(), x_start, x_end });
span_offset += pvalues.len() as f64;
}
self.points = points;
self.spans = spans;
self
}
pub fn with_data_bp<I, S, F, G>(mut self, iter: I, build: GenomeBuild) -> Self
where
I: IntoIterator<Item = (S, F, G)>,
S: Into<String>,
F: Into<f64>,
G: Into<f64>,
{
let raw: Vec<(String, f64, f64)> = iter.into_iter()
.map(|(s, f, g)| {
let chrom_raw: String = s.into();
let chrom = strip_chr(&chrom_raw).to_string();
(chrom, f.into(), g.into())
})
.collect();
let sizes = build_sizes(&build);
let mut cum_offsets: std::collections::HashMap<&str, u64> =
std::collections::HashMap::new();
let mut running = 0u64;
for &(name, size) in &sizes {
cum_offsets.insert(name, running);
running += size;
}
let total_genome = running;
let mut points = Vec::new();
for (chrom, bp, pvalue) in &raw {
let x = if let Some(&offset) = cum_offsets.get(chrom.as_str()) {
offset as f64 + bp
} else {
total_genome as f64 + bp
};
points.push(ManhattanPoint {
chromosome: chrom.clone(),
x,
pvalue: *pvalue,
label: None,
});
}
let mut running = 0u64;
let mut spans = Vec::new();
for &(name, size) in &sizes {
spans.push(ChromSpan {
name: name.to_string(),
x_start: running as f64,
x_end: (running + size) as f64,
});
running += size;
}
let mut unknown_bounds: std::collections::HashMap<String, (f64, f64)> =
std::collections::HashMap::new();
for pt in &points {
if !cum_offsets.contains_key(pt.chromosome.as_str()) {
let e = unknown_bounds
.entry(pt.chromosome.clone())
.or_insert((f64::INFINITY, f64::NEG_INFINITY));
e.0 = e.0.min(pt.x);
e.1 = e.1.max(pt.x);
}
}
if !unknown_bounds.is_empty() {
let mut extra: Vec<ChromSpan> = unknown_bounds
.into_iter()
.map(|(name, (xs, xe))| ChromSpan { name, x_start: xs, x_end: xe })
.collect();
extra.sort_by(|a, b| {
a.x_start.partial_cmp(&b.x_start).unwrap_or(std::cmp::Ordering::Equal)
});
spans.extend(extra);
}
self.points = points;
self.spans = spans;
self
}
pub fn with_data_x<I, S, F, G>(mut self, iter: I) -> Self
where
I: IntoIterator<Item = (S, F, G)>,
S: Into<String>,
F: Into<f64>,
G: Into<f64>,
{
let raw: Vec<(String, f64, f64)> = iter.into_iter()
.map(|(s, f, g)| (s.into(), f.into(), g.into()))
.collect();
let mut points = Vec::new();
let mut chrom_bounds: std::collections::HashMap<String, (f64, f64)> =
std::collections::HashMap::new();
let mut seen_chroms: Vec<String> = Vec::new();
for (chrom, x, pvalue) in &raw {
points.push(ManhattanPoint {
chromosome: chrom.clone(),
x: *x,
pvalue: *pvalue,
label: None,
});
if !chrom_bounds.contains_key(chrom) {
seen_chroms.push(chrom.clone());
chrom_bounds.insert(chrom.clone(), (*x, *x));
} else {
let e = chrom_bounds.get_mut(chrom).expect("chrom already inserted in seen_chroms");
e.0 = e.0.min(*x);
e.1 = e.1.max(*x);
}
}
let mut spans: Vec<ChromSpan> = seen_chroms
.into_iter()
.map(|name| {
let (x_start, x_end) = chrom_bounds[&name];
ChromSpan { name, x_start, x_end }
})
.collect();
spans.sort_by(|a, b| {
a.x_start.partial_cmp(&b.x_start).unwrap_or(std::cmp::Ordering::Equal)
});
self.points = points;
self.spans = spans;
self
}
pub fn with_genome_wide(mut self, threshold: f64) -> Self {
self.genome_wide = threshold;
self
}
pub fn with_suggestive(mut self, threshold: f64) -> Self {
self.suggestive = threshold;
self
}
pub fn with_color_a<S: Into<String>>(mut self, color: S) -> Self {
self.color_a = color.into();
self
}
pub fn with_color_b<S: Into<String>>(mut self, color: S) -> Self {
self.color_b = color.into();
self
}
pub fn with_palette(mut self, palette: Palette) -> Self {
self.palette = Some(palette);
self
}
pub fn with_point_size(mut self, size: f64) -> Self {
self.point_size = size;
self
}
pub fn with_label_top(mut self, n: usize) -> Self {
self.label_top = n;
self
}
pub fn with_label_style(mut self, style: LabelStyle) -> Self {
self.label_style = style;
self
}
pub fn with_pvalue_floor(mut self, floor: f64) -> Self {
self.pvalue_floor = Some(floor);
self
}
pub fn with_legend<S: Into<String>>(mut self, label: S) -> Self {
self.legend_label = Some(label.into());
self
}
pub fn with_point_labels<I, S, F, L>(mut self, iter: I) -> Self
where
I: IntoIterator<Item = (S, F, L)>,
S: Into<String>,
F: Into<f64>,
L: Into<String>,
{
for (s, f, l) in iter {
let chrom: String = s.into();
let x: f64 = f.into();
let label: String = l.into();
if let Some(pt) = self.points.iter_mut()
.find(|p| p.chromosome == chrom && (p.x - x).abs() < 0.5)
{
pt.label = Some(label);
}
}
self
}
}