use crate::rna::dupradar::dupmatrix::DupMatrix;
use crate::rna::dupradar::fitting::FitResult;
use anyhow::Result;
use plotters::prelude::*;
use plotters_svg::SVGBackend;
use std::collections::HashMap;
const SCALE: u32 = 4;
const fn s(v: u32) -> u32 {
v * SCALE
}
const DENSITY_COLORS: [(u8, u8, u8); 5] = [
(0, 255, 255), (0, 0, 255), (0, 255, 0), (255, 255, 0), (255, 0, 0), ];
fn density_color(t: f64) -> RGBColor {
let t = t.clamp(0.0, 1.0);
let n = DENSITY_COLORS.len() - 1;
let idx = t * n as f64;
let i = (idx.floor() as usize).min(n - 1);
let frac = if i == n - 1 && idx >= n as f64 {
1.0
} else {
idx - i as f64
};
let r = DENSITY_COLORS[i].0 as f64 * (1.0 - frac) + DENSITY_COLORS[i + 1].0 as f64 * frac;
let g = DENSITY_COLORS[i].1 as f64 * (1.0 - frac) + DENSITY_COLORS[i + 1].1 as f64 * frac;
let b = DENSITY_COLORS[i].2 as f64 * (1.0 - frac) + DENSITY_COLORS[i + 1].2 as f64 * frac;
RGBColor(r as u8, g as u8, b as u8)
}
fn estimate_density(x: &[f64], y: &[f64], nbins: usize) -> Vec<f64> {
if x.is_empty() {
return vec![];
}
let x_min = x.iter().cloned().fold(f64::INFINITY, f64::min);
let x_max = x.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let y_min = y.iter().cloned().fold(f64::INFINITY, f64::min);
let y_max = y.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let x_range = x_max - x_min;
let y_range = y_max - y_min;
if x_range == 0.0 || y_range == 0.0 {
return vec![1.0; x.len()];
}
let mut x_sorted = x.to_vec();
x_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let mut y_sorted = y.to_vec();
y_sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let bw_x = (quantile(&x_sorted, 0.95) - quantile(&x_sorted, 0.05)) / 25.0;
let bw_y = (quantile(&y_sorted, 0.95) - quantile(&y_sorted, 0.05)) / 25.0;
let bw_x = if bw_x > 0.0 { bw_x } else { x_range / 25.0 };
let bw_y = if bw_y > 0.0 { bw_y } else { y_range / 25.0 };
let grid_x_min = x_min - 1.5 * bw_x;
let grid_x_max = x_max + 1.5 * bw_x;
let grid_y_min = y_min - 1.5 * bw_y;
let grid_y_max = y_max + 1.5 * bw_y;
let grid_x_range = grid_x_max - grid_x_min;
let grid_y_range = grid_y_max - grid_y_min;
let x_step = grid_x_range / (nbins - 1) as f64;
let y_step = grid_y_range / (nbins - 1) as f64;
let bw_x_bins = (bw_x / x_step).max(1.0);
let bw_y_bins = (bw_y / y_step).max(1.0);
let tau = 3.4;
let radius_x = (bw_x_bins * tau).ceil() as i32;
let radius_y = (bw_y_bins * tau).ceil() as i32;
let sigma2_x = 2.0 * bw_x_bins * bw_x_bins;
let sigma2_y = 2.0 * bw_y_bins * bw_y_bins;
let grid_size = nbins;
let mut grid = vec![vec![0.0f64; grid_size]; grid_size];
for i in 0..x.len() {
let fx = (x[i] - grid_x_min) / x_step;
let fy = (y[i] - grid_y_min) / y_step;
let ix = fx.floor() as i32;
let iy = fy.floor() as i32;
let sx = fx - ix as f64; let sy = fy - iy as f64; for (dx, wx) in [(0i32, 1.0 - sx), (1, sx)] {
for (dy, wy) in [(0i32, 1.0 - sy), (1, sy)] {
let gx = ix + dx;
let gy = iy + dy;
if gx >= 0 && (gx as usize) < grid_size && gy >= 0 && (gy as usize) < grid_size {
grid[gx as usize][gy as usize] += wx * wy;
}
}
}
}
let kernel_x: Vec<f64> = (-radius_x..=radius_x)
.map(|dx| (-(dx * dx) as f64 / sigma2_x).exp())
.collect();
let kernel_y: Vec<f64> = (-radius_y..=radius_y)
.map(|dy| (-(dy * dy) as f64 / sigma2_y).exp())
.collect();
let mut temp = vec![vec![0.0f64; grid_size]; grid_size];
#[allow(clippy::needless_range_loop)] for by in 0..grid_size {
for bx in 0..grid_size {
let mut sum = 0.0;
for (ki, dx) in (-radius_x..=radius_x).enumerate() {
let nx = bx as i32 + dx;
if nx >= 0 && (nx as usize) < grid_size {
sum += grid[nx as usize][by] * kernel_x[ki];
}
}
temp[bx][by] = sum;
}
}
let mut smoothed = vec![vec![0.0f64; grid_size]; grid_size];
#[allow(clippy::needless_range_loop)] for bx in 0..grid_size {
for by in 0..grid_size {
let mut sum = 0.0;
for (ki, dy) in (-radius_y..=radius_y).enumerate() {
let ny = by as i32 + dy;
if ny >= 0 && (ny as usize) < grid_size {
sum += temp[bx][ny as usize] * kernel_y[ki];
}
}
smoothed[bx][by] = sum;
}
}
let mut dens: Vec<f64> = (0..x.len())
.map(|i| {
let bx = ((x[i] - grid_x_min) / x_step).round() as usize;
let by = ((y[i] - grid_y_min) / y_step).round() as usize;
let bx = bx.min(grid_size - 1);
let by = by.min(grid_size - 1);
smoothed[bx][by]
})
.collect();
let mn = dens.iter().cloned().fold(f64::INFINITY, f64::min);
let mx = dens.iter().cloned().fold(0.0f64, f64::max);
let range = mx - mn;
if range > 0.0 {
for d in dens.iter_mut() {
*d = (*d - mn) / range; }
}
dens
}
fn format_rpk_tick(log_val: f64) -> String {
let val = 10.0_f64.powf(log_val);
if val >= 10000.0 {
let exp = log_val.round() as i32;
format!("1e+{:02}", exp)
} else if val >= 1.0 {
format!("{}", val as u64)
} else if val >= 0.01 {
let s = format!("{:.2}", val);
s.trim_end_matches('0').trim_end_matches('.').to_string()
} else {
format!("{:.0e}", val)
}
}
fn quantile(sorted: &[f64], p: f64) -> f64 {
if sorted.is_empty() {
return 0.0;
}
if p <= 0.0 {
return sorted[0];
}
if p >= 1.0 {
return *sorted.last().unwrap();
}
let n = sorted.len();
let idx = p * (n - 1) as f64;
let lo = idx.floor() as usize;
let hi = idx.ceil() as usize;
let frac = idx.fract();
if lo == hi {
sorted[lo]
} else {
sorted[lo] * (1.0 - frac) + sorted[hi] * frac
}
}
fn r_pretty_breaks(lo: f64, hi: f64, n: usize) -> Vec<f64> {
let dx = hi - lo;
if dx == 0.0 {
let cell = if lo.abs() < 1e-15 {
1.0
} else {
lo.abs() * 0.4
};
return vec![lo - cell, lo + cell];
}
let cell = dx / n as f64; let base = 10f64.powf(cell.log10().floor());
let candidates = [base, 2.0 * base, 5.0 * base, 10.0 * base];
let nice_cell = candidates
.iter()
.copied()
.find(|&c| c >= cell * (1.0 - 1e-10))
.unwrap_or(10.0 * base);
let lo_break = (lo / nice_cell).floor() * nice_cell;
let hi_break = (hi / nice_cell).ceil() * nice_cell;
let n_steps = ((hi_break - lo_break) / nice_cell).round() as usize;
(0..=n_steps)
.map(|i| lo_break + i as f64 * nice_cell)
.collect()
}
#[allow(clippy::too_many_arguments)]
fn draw_dashed_hline<DB: DrawingBackend>(
area: &DrawingArea<DB, plotters::coord::Shift>,
x_start: i32,
x_end: i32,
y: i32,
color: &RGBColor,
sw: u32,
dash: i32,
gap: i32,
) {
let mut x = x_start;
while x < x_end {
let xe = (x + dash).min(x_end);
area.draw(&PathElement::new(
vec![(x, y), (xe, y)],
color.stroke_width(sw),
))
.ok();
x = xe + gap;
}
}
#[allow(clippy::too_many_arguments)]
fn draw_dashed_curve_via_chart<DB: DrawingBackend>(
chart: &mut ChartContext<
DB,
Cartesian2d<plotters::coord::types::RangedCoordf64, plotters::coord::types::RangedCoordf64>,
>,
data_points: &[(f64, f64)],
color: &RGBColor,
radius: u32,
dash_px: f64,
gap_px: f64,
) where
DB::ErrorType: 'static,
{
if data_points.len() < 2 {
return;
}
let pa = chart.plotting_area();
let pixel_pts: Vec<(f64, f64)> = data_points
.iter()
.map(|&(dx, dy)| {
let coord = pa.map_coordinate(&(dx, dy));
(coord.0 as f64, coord.1 as f64)
})
.collect();
let mut circles_to_draw: Vec<(f64, f64)> = Vec::new();
let mut on = true;
let mut remaining = dash_px;
let mut seg = 0;
let mut cx = pixel_pts[0].0;
let mut cy = pixel_pts[0].1;
if on {
circles_to_draw.push(data_points[0]);
}
while seg < pixel_pts.len() - 1 {
let (nx, ny) = pixel_pts[seg + 1];
let dx = nx - cx;
let dy = ny - cy;
let seg_len = (dx * dx + dy * dy).sqrt();
if seg_len < 0.001 {
seg += 1;
if seg < pixel_pts.len() {
cx = pixel_pts[seg].0;
cy = pixel_pts[seg].1;
}
continue;
}
if seg_len <= remaining {
remaining -= seg_len;
cx = nx;
cy = ny;
seg += 1;
if on && seg < data_points.len() {
circles_to_draw.push(data_points[seg]);
}
if remaining < 0.5 {
on = !on;
remaining = if on { dash_px } else { gap_px };
}
} else {
let frac = remaining / seg_len;
cx += dx * frac;
cy += dy * frac;
if on {
let data_frac_x =
data_points[seg].0 + frac * (data_points[seg + 1].0 - data_points[seg].0);
let data_frac_y =
data_points[seg].1 + frac * (data_points[seg + 1].1 - data_points[seg].1);
circles_to_draw.push((data_frac_x, data_frac_y));
}
on = !on;
remaining = if on { dash_px } else { gap_px };
}
}
chart
.draw_series(
circles_to_draw
.into_iter()
.map(|pt| Circle::new(pt, radius, color.filled())),
)
.ok();
}
#[allow(clippy::too_many_arguments)]
fn draw_dashed_vline_via_chart<DB: DrawingBackend>(
chart: &mut ChartContext<
DB,
Cartesian2d<plotters::coord::types::RangedCoordf64, plotters::coord::types::RangedCoordf64>,
>,
x_data: f64,
y_min: f64,
y_max: f64,
color: &RGBColor,
sw: u32,
dash_data_len: f64,
gap_data_len: f64,
) where
DB::ErrorType: 'static,
{
let mut y = y_min;
while y < y_max {
let ye = (y + dash_data_len).min(y_max);
chart
.draw_series(std::iter::once(PathElement::new(
vec![(x_data, y), (x_data, ye)],
color.stroke_width(sw),
)))
.ok();
y = ye + gap_data_len;
}
}
#[allow(clippy::too_many_arguments)]
fn render_density_scatter<DB: DrawingBackend>(
root: DrawingArea<DB, plotters::coord::Shift>,
dm: &DupMatrix,
fit: &FitResult,
rpkm_threshold: Option<f64>,
rpkm_value: f64,
title: &str,
subtitle: &str,
pxs: f64, ) -> Result<()>
where
DB::ErrorType: 'static,
{
let ps = |v: f64| (v * pxs) as u32;
root.fill(&WHITE)?;
let title_height = ps(50.0);
let (top_area, plot_area) = root.split_vertically(title_height);
let cx = (top_area.dim_in_pixel().0 as i32) / 2;
top_area.draw(&Text::new(
title.to_string(),
(cx, (pxs * 4.0) as i32),
("sans-serif", ps(18.0))
.into_font()
.style(FontStyle::Bold)
.color(&BLACK)
.pos(plotters::style::text_anchor::Pos::new(
plotters::style::text_anchor::HPos::Center,
plotters::style::text_anchor::VPos::Top,
)),
))?;
top_area.draw(&Text::new(
subtitle.to_string(),
(cx, (pxs * 25.0) as i32),
("sans-serif", ps(14.0)).into_font().color(&BLACK).pos(
plotters::style::text_anchor::Pos::new(
plotters::style::text_anchor::HPos::Center,
plotters::style::text_anchor::VPos::Top,
),
),
))?;
let mut xd: Vec<f64> = Vec::new();
let mut yd: Vec<f64> = Vec::new();
for r in &dm.rows {
if r.rpk > 0.0 && r.dup_rate.is_finite() {
xd.push(r.rpk.log10());
yd.push(r.dup_rate * 100.0);
}
}
if xd.is_empty() {
anyhow::bail!("No valid data points for density scatter plot");
}
let densities = estimate_density(&xd, &yd, 500);
let raw_min = xd.iter().cloned().fold(f64::INFINITY, f64::min);
let raw_max = xd.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let tick_min = raw_min.floor() as i32; let tick_max = raw_max.ceil() as i32;
let pad = (raw_max - raw_min) * 0.04;
let x_min = (raw_min - pad).min(tick_min as f64); let x_max = (raw_max + pad).max(tick_max as f64 + 0.3);
let mut chart = ChartBuilder::on(&plot_area)
.margin_top(ps(5.0))
.margin_right(ps(25.0))
.margin_bottom(ps(10.0))
.margin_left(ps(10.0))
.x_label_area_size(ps(40.0))
.y_label_area_size(ps(55.0))
.build_cartesian_2d(x_min..x_max, 0.0f64..100.0)?;
chart
.configure_mesh()
.bold_line_style(TRANSPARENT)
.light_line_style(TRANSPARENT)
.x_desc("expression (reads/kbp)")
.y_desc("% duplicate reads")
.x_label_formatter(&|v| {
let r = (*v * 10.0).round() / 10.0;
if (r - r.round()).abs() < 0.01 {
format_rpk_tick(r)
} else {
String::new()
}
})
.y_labels(21) .y_label_formatter(&|v| {
let iv = v.round() as i32;
if (0..=100).contains(&iv) && iv % 25 == 0 && (*v - iv as f64).abs() < 0.1 {
format!("{}", iv)
} else {
String::new()
}
})
.axis_desc_style(("sans-serif", ps(14.0)))
.label_style(("sans-serif", ps(12.0)))
.draw()?;
let point_radius = (pxs * 1.2).round().max(1.0) as u32;
let mut pixel_map: HashMap<(i32, i32), (f64, f64, f64)> = HashMap::new();
for (i, (&x, &y)) in xd.iter().zip(yd.iter()).enumerate() {
let px = chart.backend_coord(&(x, y));
let d = densities[i];
let key = (px.0, px.1);
pixel_map
.entry(key)
.and_modify(|existing| {
if d > existing.2 {
*existing = (x, y, d);
}
})
.or_insert((x, y, d));
}
let mut deduped: Vec<(f64, f64, f64)> = pixel_map.into_values().collect();
deduped.sort_by(|a, b| a.2.partial_cmp(&b.2).unwrap_or(std::cmp::Ordering::Equal));
let max_dens = deduped.iter().map(|d| d.2).fold(0.0f64, f64::max);
for (x, y, d) in deduped {
let t = if max_dens > 0.0 { d / max_dens } else { 0.0 };
let c = density_color(t);
chart.draw_series(std::iter::once(Circle::new(
(x, y),
point_radius,
c.filled(),
)))?;
}
let n_curve = (2000.0 * pxs) as usize;
let curve_pts: Vec<(f64, f64)> = (0..=n_curve)
.map(|i| {
let x = x_min + (x_max - x_min) * i as f64 / n_curve as f64;
let y = (fit.predict(x) * 100.0).clamp(0.0, 100.0);
(x, y)
})
.collect();
let curve_radius = (pxs * 0.8).round().max(1.0) as u32;
let dash_px = 5.0 * pxs;
let gap_px = 10.0 * pxs;
draw_dashed_curve_via_chart(
&mut chart,
&curve_pts,
&BLACK,
curve_radius,
dash_px,
gap_px,
);
let sw = (pxs.round() as u32).max(1);
let vline_dash = 1.0;
let vline_gap = 1.0;
let rpk_1k = 3.0f64;
if rpk_1k >= x_min && rpk_1k <= x_max {
draw_dashed_vline_via_chart(
&mut chart, rpk_1k, 0.0, 100.0, &RED, sw, vline_dash, vline_gap,
);
}
if let Some(rpk_pos) = rpkm_threshold {
if rpk_pos > 0.0 {
let lr = rpk_pos.log10();
if lr.is_finite() && lr >= x_min && lr <= x_max {
draw_dashed_vline_via_chart(
&mut chart, lr, 0.0, 100.0, &GREEN, sw, vline_dash, vline_gap,
);
}
}
}
let pa = chart.plotting_area();
let pa_dim = pa.dim_in_pixel();
let pa_pos = pa.get_base_pixel();
let pa_x = (pa_pos.0, pa_pos.0 + pa_dim.0 as i32);
let pa_y = (pa_pos.1, pa_pos.1 + pa_dim.1 as i32);
let fsz = ps(11.0);
let line_h = (pxs * 14.0) as i32;
let legend_sw = (pxs * 1.5).round().max(1.0) as u32;
let sample_len = (pxs * 24.0) as i32;
let text_gap = (pxs * 4.0) as i32;
let lpad = (pxs * 6.0) as i32;
let samp_dash = (pxs * 3.0) as i32;
let samp_gap = (pxs * 3.0) as i32;
{
let bw = (pxs * 70.0) as i32;
let bh = (pxs * 32.0) as i32;
let bx = pa_x.0 + (pxs * 8.0) as i32;
let by = pa_y.0 + (pxs * 8.0) as i32;
plot_area.draw(&Rectangle::new(
[(bx, by), (bx + bw, by + bh)],
ShapeStyle {
color: WHITE.to_rgba(),
filled: true,
stroke_width: 0,
},
))?;
plot_area.draw(&Rectangle::new(
[(bx, by), (bx + bw, by + bh)],
BLACK.stroke_width(sw),
))?;
let tx = bx + (pxs * 6.0) as i32;
let mut cy = by + (pxs * 5.0) as i32;
plot_area.draw(&Text::new(
format!("Int: {:.2}", fit.intercept),
(tx, cy),
("sans-serif", fsz).into_font().color(&BLACK),
))?;
cy += line_h;
plot_area.draw(&Text::new(
format!("Sl: {:.2}", fit.slope),
(tx, cy),
("sans-serif", fsz).into_font().color(&BLACK),
))?;
}
{
let has_rpkm = rpkm_threshold.is_some_and(|v| v > 0.0);
let n_rows = if has_rpkm { 2 } else { 1 };
let lw = (pxs * 110.0) as i32;
let lh = (pxs * (8.0 + n_rows as f64 * 14.0 + 4.0)) as i32;
let lx = pa_x.1 - lw - (pxs * 8.0) as i32;
let ly = pa_y.1 - lh - (pxs * 30.0) as i32;
plot_area.draw(&Rectangle::new(
[(lx, ly), (lx + lw, ly + lh)],
ShapeStyle {
color: WHITE.to_rgba(),
filled: true,
stroke_width: 0,
},
))?;
plot_area.draw(&Rectangle::new(
[(lx, ly), (lx + lw, ly + lh)],
BLACK.stroke_width(sw),
))?;
let row_y = ly + (pxs * 6.0) as i32;
let line_y = row_y + (pxs * 5.0) as i32;
draw_dashed_hline(
&plot_area,
lx + lpad,
lx + lpad + sample_len,
line_y,
&RED,
legend_sw,
samp_dash,
samp_gap,
);
plot_area.draw(&Text::new(
"1 read/bp",
(lx + lpad + sample_len + text_gap, row_y),
("sans-serif", fsz).into_font().color(&BLACK),
))?;
if has_rpkm {
let row_y2 = row_y + line_h;
let line_y2 = row_y2 + (pxs * 5.0) as i32;
draw_dashed_hline(
&plot_area,
lx + lpad,
lx + lpad + sample_len,
line_y2,
&GREEN,
legend_sw,
samp_dash,
samp_gap,
);
let rpkm_label = if rpkm_value == rpkm_value.round() {
format!("{} RPKM", rpkm_value as i64)
} else {
format!("{} RPKM", rpkm_value)
};
plot_area.draw(&Text::new(
rpkm_label,
(lx + lpad + sample_len + text_gap, row_y2),
("sans-serif", fsz).into_font().color(&BLACK),
))?;
}
}
plot_area.present()?;
Ok(())
}
pub fn density_scatter_plot(
dm: &DupMatrix,
fit: &FitResult,
rpkm_threshold: Option<f64>,
rpkm_value: f64,
sample_name: &str,
output_path: &std::path::Path,
) -> Result<()> {
let title = "Density scatter plot";
let png_path = output_path.with_extension("png");
let root = BitMapBackend::new(&png_path, (s(480), s(480))).into_drawing_area();
render_density_scatter(
root,
dm,
fit,
rpkm_threshold,
rpkm_value,
title,
sample_name,
SCALE as f64,
)?;
let svg_path = output_path.with_extension("svg");
let root = SVGBackend::new(&svg_path, (480, 480)).into_drawing_area();
render_density_scatter(
root,
dm,
fit,
rpkm_threshold,
rpkm_value,
title,
sample_name,
1.0,
)?;
Ok(())
}
fn render_boxplot<DB: DrawingBackend>(
root: DrawingArea<DB, plotters::coord::Shift>,
dm: &DupMatrix,
title: &str,
subtitle: &str,
pxs: f64,
) -> Result<()>
where
DB::ErrorType: 'static,
{
let ps = |v: f64| (v * pxs) as u32;
root.fill(&WHITE)?;
let title_height = ps(50.0);
let (top_area, plot_area) = root.split_vertically(title_height);
let cx = (top_area.dim_in_pixel().0 as i32) / 2;
top_area.draw(&Text::new(
title.to_string(),
(cx, (pxs * 4.0) as i32),
("sans-serif", ps(18.0))
.into_font()
.style(FontStyle::Bold)
.color(&BLACK)
.pos(plotters::style::text_anchor::Pos::new(
plotters::style::text_anchor::HPos::Center,
plotters::style::text_anchor::VPos::Top,
)),
))?;
top_area.draw(&Text::new(
subtitle.to_string(),
(cx, (pxs * 25.0) as i32),
("sans-serif", ps(14.0)).into_font().color(&BLACK).pos(
plotters::style::text_anchor::Pos::new(
plotters::style::text_anchor::HPos::Center,
plotters::style::text_anchor::VPos::Top,
),
),
))?;
let step = 0.05;
let n_bins = (1.0 / step) as usize;
let mut all_rpk: Vec<f64> = dm.rows.iter().map(|r| r.rpk).collect();
all_rpk.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
if all_rpk.is_empty() {
anyhow::bail!("No valid data for boxplot");
}
let all_gd: Vec<(f64, f64)> = dm.rows.iter().map(|r| (r.rpk, r.dup_rate)).collect();
let mut labels: Vec<String> = Vec::new();
let mut bins: Vec<Vec<f64>> = Vec::new();
for bi in 0..n_bins {
let pl = bi as f64 * step;
let ph = (bi + 1) as f64 * step;
let ql = quantile(&all_rpk, pl);
let qh = quantile(&all_rpk, ph);
let bin_genes: Vec<(f64, f64)> = all_gd
.iter()
.filter(|(r, _)| {
if bi == n_bins - 1 {
*r >= ql && *r <= qh } else {
*r >= ql && *r < qh
}
})
.copied()
.collect();
let vals: Vec<f64> = bin_genes
.iter()
.filter(|(_, d)| d.is_finite())
.map(|(_, d)| *d)
.collect();
let mean_rpk: f64 = if bin_genes.is_empty() {
f64::NAN
} else {
let rpk_vals: Vec<f64> = bin_genes.iter().map(|(r, _)| *r).collect();
rpk_vals.iter().sum::<f64>() / rpk_vals.len() as f64
};
let rpk_s = if mean_rpk.is_nan() || mean_rpk == 0.0 {
"NaN".into()
} else {
format!("{:.1}", mean_rpk)
};
labels.push(format!(
"{} - {} % / {}",
(pl * 100.0) as u32,
(ph * 100.0) as u32,
rpk_s
));
bins.push(vals);
}
let mut chart = ChartBuilder::on(&plot_area)
.margin_top(ps(5.0))
.margin_right(ps(15.0))
.margin_bottom(ps(5.0))
.margin_left(ps(10.0))
.x_label_area_size(ps(70.0))
.y_label_area_size(ps(50.0))
.build_cartesian_2d(0.0f64..n_bins as f64, 0.0f64..1.0)?;
chart
.configure_mesh()
.bold_line_style(TRANSPARENT)
.light_line_style(TRANSPARENT)
.y_desc("duplication (%)")
.x_desc("mean expression (reads/kbp)")
.x_label_formatter(&|_v| String::new()) .x_labels(n_bins)
.y_labels(6)
.y_label_formatter(&|v| format!("{:.1}", v))
.axis_desc_style(("sans-serif", ps(13.0)))
.label_style(("sans-serif", ps(11.0)))
.draw()?;
{
let chart_plot_area = chart.plotting_area();
let font_size = ps(5.0) as f64;
for (i, label) in labels.iter().enumerate() {
let x_coord = i as f64 + 0.5;
let (px, py) = chart_plot_area.map_coordinate(&(x_coord, 0.0f64));
let text_style = ("sans-serif", font_size)
.into_font()
.color(&BLACK)
.transform(FontTransform::Rotate270);
plot_area.draw_text(label, &text_style, (px - 3, py + 5))?;
}
}
let gray_fill = RGBAColor(190, 190, 190, 1.0);
for (idx, vals) in bins.iter().enumerate() {
if vals.is_empty() {
continue;
}
let mut sv = vals.clone();
sv.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
let q1 = quantile(&sv, 0.25);
let med = quantile(&sv, 0.5);
let q3 = quantile(&sv, 0.75);
let iqr = q3 - q1;
let wl = sv
.iter()
.find(|&&v| v >= q1 - 1.5 * iqr)
.copied()
.unwrap_or(q1);
let wh = sv
.iter()
.rev()
.find(|&&v| v <= q3 + 1.5 * iqr)
.copied()
.unwrap_or(q3);
let bl = idx as f64 + 0.2;
let br = idx as f64 + 0.8;
let cx = idx as f64 + 0.5;
let cap = 0.1;
chart.draw_series(std::iter::once(Rectangle::new(
[(bl, q1), (br, q3)],
ShapeStyle {
color: gray_fill,
filled: true,
stroke_width: ps(1.0),
},
)))?;
chart.draw_series(std::iter::once(Rectangle::new(
[(bl, q1), (br, q3)],
BLACK.stroke_width(ps(1.0)),
)))?;
chart.draw_series(LineSeries::new(
vec![(bl, med), (br, med)],
BLACK.stroke_width(ps(2.0)),
))?;
for &(from, to, cap_y) in &[(q1, wl, wl), (q3, wh, wh)] {
chart.draw_series(LineSeries::new(
vec![(cx, from), (cx, to)],
BLACK.stroke_width(ps(1.0)),
))?;
chart.draw_series(LineSeries::new(
vec![(bl + cap, cap_y), (br - cap, cap_y)],
BLACK.stroke_width(ps(1.0)),
))?;
}
for &v in &sv {
if v < wl || v > wh {
chart.draw_series(std::iter::once(Circle::new(
(cx, v),
ps(3.0),
BLACK.stroke_width(ps(1.0)),
)))?;
}
}
}
plot_area.present()?;
Ok(())
}
pub fn duprate_boxplot(
dm: &DupMatrix,
sample_name: &str,
output_path: &std::path::Path,
) -> Result<()> {
let title = "Percent Duplication by Expression";
let root = BitMapBackend::new(output_path, (s(480), s(480))).into_drawing_area();
render_boxplot(root, dm, title, sample_name, SCALE as f64)?;
let svg = output_path.with_extension("svg");
let root = SVGBackend::new(&svg, (480, 480)).into_drawing_area();
render_boxplot(root, dm, title, sample_name, 1.0)?;
Ok(())
}
fn render_histogram<DB: DrawingBackend>(
root: DrawingArea<DB, plotters::coord::Shift>,
dm: &DupMatrix,
title: &str,
subtitle: &str,
pxs: f64,
) -> Result<()>
where
DB::ErrorType: 'static,
{
let ps = |v: f64| (v * pxs) as u32;
root.fill(&WHITE)?;
let title_height = ps(50.0);
let (top_area, plot_area) = root.split_vertically(title_height);
let cx = (top_area.dim_in_pixel().0 as i32) / 2;
top_area.draw(&Text::new(
title.to_string(),
(cx, (pxs * 4.0) as i32),
("sans-serif", ps(18.0))
.into_font()
.style(FontStyle::Bold)
.color(&BLACK)
.pos(plotters::style::text_anchor::Pos::new(
plotters::style::text_anchor::HPos::Center,
plotters::style::text_anchor::VPos::Top,
)),
))?;
top_area.draw(&Text::new(
subtitle.to_string(),
(cx, (pxs * 25.0) as i32),
("sans-serif", ps(14.0)).into_font().color(&BLACK).pos(
plotters::style::text_anchor::Pos::new(
plotters::style::text_anchor::HPos::Center,
plotters::style::text_anchor::VPos::Top,
),
),
))?;
let log_rpk: Vec<f64> = dm
.rows
.iter()
.filter(|r| r.rpk > 0.0)
.map(|r| r.rpk.log10())
.collect();
if log_rpk.is_empty() {
anyhow::bail!("No valid data for expression histogram");
}
let raw_min = log_rpk.iter().cloned().fold(f64::INFINITY, f64::min);
let raw_max = log_rpk.iter().cloned().fold(f64::NEG_INFINITY, f64::max);
let breaks = r_pretty_breaks(raw_min, raw_max, 100);
let n_bins = breaks.len() - 1;
let x_min = breaks[0];
let bw = breaks[1] - breaks[0]; let x_max = *breaks.last().unwrap() + 0.3;
let mut hist = vec![0u32; n_bins];
for &v in &log_rpk {
let b = ((v - x_min) / bw).floor() as usize;
hist[b.min(n_bins - 1)] += 1;
}
let y_max = *hist.iter().max().unwrap_or(&1) as f64;
let mut chart = ChartBuilder::on(&plot_area)
.margin_top(ps(5.0))
.margin_right(ps(25.0))
.margin_bottom(ps(10.0))
.margin_left(ps(10.0))
.x_label_area_size(ps(40.0))
.y_label_area_size(ps(45.0))
.build_cartesian_2d(x_min..x_max, 0.0..y_max * 1.08)?;
chart
.configure_mesh()
.bold_line_style(TRANSPARENT)
.light_line_style(TRANSPARENT)
.x_desc("reads per kilobase (RPK)")
.y_desc("Frequency")
.x_label_formatter(&|v| {
let r = (*v * 10.0).round() / 10.0;
if (r - r.round()).abs() < 0.01 {
format_rpk_tick(r)
} else {
String::new()
}
})
.y_labels(6)
.y_label_formatter(&|v| {
if *v == v.floor() && *v >= 0.0 {
format!("{}", *v as i32)
} else {
String::new()
}
})
.axis_desc_style(("sans-serif", ps(14.0)))
.label_style(("sans-serif", ps(12.0)))
.draw()?;
let gray = RGBAColor(190, 190, 190, 1.0);
for (i, &c) in hist.iter().enumerate() {
if c == 0 {
continue;
}
let x0 = x_min + i as f64 * bw;
let x1 = x0 + bw;
chart.draw_series(std::iter::once(Rectangle::new(
[(x0, 0.0), (x1, c as f64)],
ShapeStyle {
color: gray,
filled: true,
stroke_width: 0,
},
)))?;
chart.draw_series(std::iter::once(Rectangle::new(
[(x0, 0.0), (x1, c as f64)],
BLACK.stroke_width(1),
)))?;
}
let thr = 3.0f64;
if thr >= x_min && thr <= x_max {
chart.draw_series(LineSeries::new(
vec![(thr, 0.0), (thr, y_max * 1.08)],
RED.stroke_width(ps(1.0)),
))?;
}
plot_area.present()?;
Ok(())
}
pub fn expression_histogram(
dm: &DupMatrix,
sample_name: &str,
output_path: &std::path::Path,
) -> Result<()> {
let title = "Distribution of RPK values per gene";
let root = BitMapBackend::new(output_path, (s(480), s(480))).into_drawing_area();
render_histogram(root, dm, title, sample_name, SCALE as f64)?;
let svg = output_path.with_extension("svg");
let root = SVGBackend::new(&svg, (480, 480)).into_drawing_area();
render_histogram(root, dm, title, sample_name, 1.0)?;
Ok(())
}
pub fn write_intercept_slope(
fit: &FitResult,
sample_name: &str,
path: &std::path::Path,
) -> Result<()> {
use std::io::Write;
let mut f = std::fs::File::create(path)?;
write!(
f,
"{sample_name} - dupRadar Int (duprate at low read counts): {}\n\
{sample_name} - dupRadar Sl (progression of the duplication rate): {}\n",
fit.intercept, fit.slope
)?;
Ok(())
}
pub fn write_mqc_intercept(
fit: &FitResult,
sample_name: &str,
path: &std::path::Path,
) -> Result<()> {
use std::io::Write;
let mut f = std::fs::File::create(path)?;
writeln!(f, "# id: dupRadar")?;
writeln!(f, "# plot_type: 'generalstats'")?;
writeln!(f, "# pconfig:")?;
writeln!(f, "# dupRadar_intercept:")?;
writeln!(f, "# title: 'dupRadar int'")?;
writeln!(f, "# namespace: 'dupRadar'")?;
writeln!(
f,
"# description: 'dupRadar duplication rate at low read counts'"
)?;
writeln!(f, "# max: 100")?;
writeln!(f, "# min: 0")?;
writeln!(f, "# format: '{{:.2f}}'")?;
writeln!(f, "Sample\tdupRadar_intercept")?;
writeln!(f, "{}\t{}", sample_name, fit.intercept)?;
Ok(())
}
pub fn write_mqc_curve(fit: &FitResult, dm: &DupMatrix, path: &std::path::Path) -> Result<()> {
use std::io::Write;
let mut f = std::fs::File::create(path)?;
writeln!(f, "# id: 'dupradar'")?;
writeln!(f, "# section_name: 'dupRadar'")?;
writeln!(f, "# description: 'Duplication rate vs expression'")?;
writeln!(f, "# plot_type: 'linegraph'")?;
writeln!(f, "# pconfig:")?;
writeln!(f, "# title: 'dupRadar General Linear Model'")?;
writeln!(f, "# xlab: 'Expression (reads/kbp)'")?;
writeln!(f, "# ylab: 'Duplication rate (%)'")?;
writeln!(f, "# ymin: 0")?;
writeln!(f, "# ymax: 100")?;
writeln!(f, "# xlog: True")?;
let rpks: Vec<f64> = dm
.rows
.iter()
.filter(|r| r.rpk > 0.0)
.map(|r| r.rpk)
.collect();
if rpks.is_empty() {
return Ok(());
}
let mn = rpks.iter().copied().fold(f64::INFINITY, f64::min);
let mx = rpks.iter().copied().fold(f64::NEG_INFINITY, f64::max);
let lmin = mn.log10();
let lmax = mx.log10();
let n = 100;
for i in 0..=n {
let lr = lmin + (lmax - lmin) * (i as f64) / (n as f64);
let rpk = 10.0_f64.powf(lr);
let pct = fit.predict_rpk(rpk) * 100.0;
writeln!(f, "{}\t{}", rpk, pct)?;
}
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_density_color_endpoints() {
let c0 = density_color(0.0);
assert_eq!((c0.0, c0.1, c0.2), (0, 255, 255)); let c1 = density_color(1.0);
assert_eq!((c1.0, c1.1, c1.2), (255, 0, 0)); let c_mid = density_color(0.5);
assert_eq!((c_mid.0, c_mid.1, c_mid.2), (0, 255, 0)); }
#[test]
fn test_quantile() {
let d = vec![1.0, 2.0, 3.0, 4.0, 5.0];
assert!((quantile(&d, 0.0) - 1.0).abs() < 1e-10);
assert!((quantile(&d, 0.5) - 3.0).abs() < 1e-10);
assert!((quantile(&d, 1.0) - 5.0).abs() < 1e-10);
assert!((quantile(&d, 0.25) - 2.0).abs() < 1e-10);
}
#[test]
fn test_estimate_density() {
let x = vec![0.0, 0.1, 0.0, 0.1, 5.0];
let y = vec![0.0, 0.1, 0.1, 0.0, 5.0];
let d = estimate_density(&x, &y, 100);
assert_eq!(d.len(), 5);
assert!(d[0] > d[4]);
}
#[test]
fn test_format_rpk_tick() {
assert_eq!(format_rpk_tick(-2.0), "0.01");
assert_eq!(format_rpk_tick(-1.0), "0.1");
assert_eq!(format_rpk_tick(0.0), "1");
assert_eq!(format_rpk_tick(1.0), "10");
assert_eq!(format_rpk_tick(2.0), "100");
assert_eq!(format_rpk_tick(3.0), "1000");
}
}