use plotters::chart::ChartBuilder;
use plotters::prelude::*;
use polars::prelude::*;
use std::error::Error;
use std::io::Cursor;
use std::path::PathBuf;
#[derive(Debug)]
enum PlotRangeError {
IoError(std::io::Error),
PolarsError(PolarsError),
HexDecodeError(hex::FromHexError),
InvalidColumn(String),
InvalidData(String),
PlotterError(String),
}
impl std::fmt::Display for PlotRangeError {
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
match self {
PlotRangeError::IoError(e) => write!(f, "IO error: {}", e),
PlotRangeError::PolarsError(e) => write!(f, "Data processing error: {}", e),
PlotRangeError::HexDecodeError(e) => write!(f, "Invalid color format: {}", e),
PlotRangeError::InvalidColumn(msg) => write!(f, "Invalid column: {}", msg),
PlotRangeError::InvalidData(msg) => write!(f, "Invalid data: {}", msg),
PlotRangeError::PlotterError(msg) => write!(f, "Plotting error: {}", msg),
}
}
}
impl Error for PlotRangeError {}
impl From<std::io::Error> for PlotRangeError {
fn from(error: std::io::Error) -> Self {
PlotRangeError::IoError(error)
}
}
impl From<PolarsError> for PlotRangeError {
fn from(error: PolarsError) -> Self {
PlotRangeError::PolarsError(error)
}
}
impl From<hex::FromHexError> for PlotRangeError {
fn from(error: hex::FromHexError) -> Self {
PlotRangeError::HexDecodeError(error)
}
}
use clap::Parser;
#[allow(non_snake_case)]
#[derive(Debug, Parser)]
#[command(name = "plotrange", about = "Plots genomic ranges and intervals", version)]
struct Opt {
#[arg(value_name = "FILE")]
input: Option<PathBuf>,
#[arg(long, short, default_value = "1")]
chr: usize,
#[arg(long, short, default_value = "2")]
start: usize,
#[arg(long, short, default_value = "3")]
end: usize,
#[arg(long, short, default_value = "0.7")]
alpha: f64,
#[arg(long, default_value = "1E88E5")]
color: String,
#[arg(long)]
color_by: Option<usize>,
#[arg(long)]
gradient: Option<usize>,
#[arg(short, long, default_value = r"\t")]
delimiter: String,
#[arg(long)]
header: bool,
#[arg(long, default_value = "0")]
skip: usize,
#[arg(long, short, value_name = "FILE")]
outfile: Option<PathBuf>,
#[arg(long)]
svg: bool,
#[arg(short, long)]
title: Option<String>,
#[arg(long, default_value = "2560")]
width: u32,
#[arg(long, default_value = "1200")]
height: u32,
#[arg(long, default_value = "Position")]
xdesc: String,
#[arg(long, default_value = "Ranges")]
ydesc: String,
#[arg(long, default_value = "70")]
xdesc_area: u32,
#[arg(long, default_value = "100")]
ydesc_area: u32,
#[arg(long, default_value = "sans-serif")]
label_font: String,
#[arg(long, default_value = "24")]
label_font_size: u32,
#[arg(long, default_value = "sans-serif")]
axis_desc_font: String,
#[arg(long, default_value = "22")]
axis_desc_font_size: u32,
#[arg(long, default_value = "sans-serif")]
title_font: String,
#[arg(long, default_value = "24")]
title_font_size: u32,
#[arg(long, default_value = "20")]
bar_height: u32,
#[arg(long)]
si_format_x: bool,
#[arg(long)]
si_format_y: bool,
#[arg(long)]
group_by_chr: bool,
}
fn main() -> Result<(), PlotRangeError> {
let mut opt = Opt::parse();
let mut input: Box<dyn std::io::Read + 'static> = if let Some(path) = &opt.input {
Box::new(std::fs::File::open(path)?)
} else {
opt.input = Some(std::path::Path::new("STDIN").to_path_buf());
Box::new(std::io::stdin())
};
if opt.outfile.is_none() {
let mut outname = PathBuf::new();
outname.set_file_name(format!(
"{}{}",
opt.input
.as_ref()
.ok_or_else(|| PlotRangeError::InvalidData("Input path missing".to_string()))?
.file_name()
.ok_or_else(|| PlotRangeError::InvalidData("Invalid input filename".to_string()))?
.to_string_lossy(),
if opt.svg {
".plotrange.svg"
} else {
".plotrange.png"
}
));
opt.outfile = Some(outname)
}
let delimiter = match opt.delimiter.as_str() {
r"\t" => b'\t',
_ => *opt
.delimiter
.as_bytes()
.first()
.ok_or_else(|| PlotRangeError::InvalidData("Not a valid delimiter".to_string()))?,
};
let mut buf = Vec::new();
input.read_to_end(&mut buf)?;
let csv_parse_options = CsvParseOptions::default()
.with_separator(delimiter)
.with_try_parse_dates(false)
.with_missing_is_null(true)
.with_truncate_ragged_lines(true);
let csv_read_options = CsvReadOptions::default()
.with_parse_options(csv_parse_options)
.with_ignore_errors(true)
.with_skip_rows(opt.skip)
.with_has_header(opt.header);
let df = csv_read_options
.into_reader_with_file_handle(Cursor::new(buf))
.finish()?;
plot_ranges(&opt, df)
}
fn format_si_number(value: f64) -> String {
let abs_value = value.abs();
if abs_value >= 1e12 {
format!("{:.2}T", value / 1e12)
} else if abs_value >= 1e9 {
format!("{:.2}G", value / 1e9)
} else if abs_value >= 1e6 {
format!("{:.2}M", value / 1e6)
} else if abs_value >= 1e3 {
format!("{:.2}K", value / 1000.0)
} else if abs_value >= 1.0 {
format!("{:.2}", value)
} else if abs_value >= 1e-3 {
format!("{:.2}m", value * 1e3)
} else if abs_value >= 1e-6 {
format!("{:.2}μ", value * 1e6)
} else if abs_value >= 1e-9 {
format!("{:.2}n", value * 1e9)
} else if abs_value >= 1e-12 {
format!("{:.2}p", value * 1e12)
} else if abs_value == 0.0 {
"0".to_string()
} else {
format!("{:.2e}", value)
}
}
fn plot_ranges(opt: &Opt, df: DataFrame) -> Result<(), PlotRangeError> {
let plot_filename = opt
.outfile
.as_ref()
.ok_or_else(|| PlotRangeError::InvalidData("Output file path missing".to_string()))?
.to_str()
.ok_or_else(|| PlotRangeError::InvalidData("Invalid output file path".to_string()))?
.to_string();
println!("{}", plot_filename);
if opt.svg {
plot_on_backend(
opt,
df,
SVGBackend::new(&plot_filename, (opt.width, opt.height)),
)?;
} else {
plot_on_backend(
opt,
df,
BitMapBackend::new(&plot_filename, (opt.width, opt.height)),
)?;
}
Ok(())
}
fn plot_on_backend<B>(opt: &Opt, df: DataFrame, backend: B) -> Result<(), PlotRangeError>
where
B: DrawingBackend,
{
let plot_filename = opt
.outfile
.as_ref()
.ok_or_else(|| PlotRangeError::InvalidData("Output file path missing".to_string()))?
.to_str()
.ok_or_else(|| PlotRangeError::InvalidData("Invalid output file path".to_string()))?
.to_string();
let root = backend.into_drawing_area();
root.fill(&WHITE).map_err(|e| PlotRangeError::PlotterError(format!("Fill error: {:?}", e)))?;
root.titled(
opt.title.as_ref().unwrap_or(&plot_filename),
(opt.title_font.as_str(), opt.title_font_size),
).map_err(|e| PlotRangeError::PlotterError(format!("Title error: {:?}", e)))?;
let mut chart = ChartBuilder::on(&root);
chart
.x_label_area_size(opt.xdesc_area)
.y_label_area_size(opt.ydesc_area)
.margin(26u32);
let chr_col = df
.get_columns()
.get(opt.chr - 1)
.ok_or_else(|| PlotRangeError::InvalidColumn(format!("Chromosome column {} not found", opt.chr)))?
.as_series()
.ok_or_else(|| PlotRangeError::InvalidColumn("Chromosome column conversion failed".to_string()))?;
let start_col = df
.get_columns()
.get(opt.start - 1)
.ok_or_else(|| PlotRangeError::InvalidColumn(format!("Start column {} not found", opt.start)))?
.as_series()
.ok_or_else(|| PlotRangeError::InvalidColumn("Start column conversion failed".to_string()))?;
let end_col = df
.get_columns()
.get(opt.end - 1)
.ok_or_else(|| PlotRangeError::InvalidColumn(format!("End column {} not found", opt.end)))?
.as_series()
.ok_or_else(|| PlotRangeError::InvalidColumn("End column conversion failed".to_string()))?;
let start_series = start_col.cast(&DataType::Float64)?;
let starts = start_series
.f64()
.map_err(|_| PlotRangeError::InvalidData("Start column is not numeric".to_string()))?;
let end_series = end_col.cast(&DataType::Float64)?;
let ends = end_series
.f64()
.map_err(|_| PlotRangeError::InvalidData("End column is not numeric".to_string()))?;
let min_start = starts
.min()
.unwrap_or_default();
let max_end = ends
.max()
.unwrap_or_default();
let plot_color = hex::decode(&opt.color)?;
let plot_plotters_color = RGBColor(plot_color[0], plot_color[1], plot_color[2]);
let num_ranges = df.height();
let y_max = if opt.group_by_chr {
let unique_chrs = chr_col
.cast(&DataType::String)?
.str()
.map_err(|_| PlotRangeError::InvalidData("Chromosome column is not string".to_string()))?
.into_iter()
.collect::<std::collections::HashSet<_>>()
.len() as f64;
unique_chrs
} else {
num_ranges as f64
};
let mut grid = chart
.build_cartesian_2d(min_start..max_end, 0.0..y_max)
.map_err(|e| PlotRangeError::InvalidData(format!("Grid creation error: {}", e)))?;
let mesh_result = match (opt.si_format_x, opt.si_format_y) {
(true, true) => {
grid.configure_mesh()
.disable_x_mesh()
.bold_line_style(WHITE.mix(0.3))
.y_desc(&opt.ydesc)
.x_desc(&opt.xdesc)
.label_style((opt.label_font.as_str(), opt.label_font_size))
.axis_desc_style((opt.axis_desc_font.as_str(), opt.axis_desc_font_size))
.x_label_formatter(&|x| format_si_number(*x))
.y_label_formatter(&|y| format_si_number(*y))
.draw()
}
(true, false) => {
grid.configure_mesh()
.disable_x_mesh()
.bold_line_style(WHITE.mix(0.3))
.y_desc(&opt.ydesc)
.x_desc(&opt.xdesc)
.label_style((opt.label_font.as_str(), opt.label_font_size))
.axis_desc_style((opt.axis_desc_font.as_str(), opt.axis_desc_font_size))
.x_label_formatter(&|x| format_si_number(*x))
.draw()
}
(false, true) => {
grid.configure_mesh()
.disable_x_mesh()
.bold_line_style(WHITE.mix(0.3))
.y_desc(&opt.ydesc)
.x_desc(&opt.xdesc)
.label_style((opt.label_font.as_str(), opt.label_font_size))
.axis_desc_style((opt.axis_desc_font.as_str(), opt.axis_desc_font_size))
.y_label_formatter(&|y| format_si_number(*y))
.draw()
}
(false, false) => {
grid.configure_mesh()
.disable_x_mesh()
.bold_line_style(WHITE.mix(0.3))
.y_desc(&opt.ydesc)
.x_desc(&opt.xdesc)
.label_style((opt.label_font.as_str(), opt.label_font_size))
.axis_desc_style((opt.axis_desc_font.as_str(), opt.axis_desc_font_size))
.draw()
}
};
mesh_result.map_err(|e| PlotRangeError::InvalidData(format!("Draw error: {}", e)))?;
let mut shapes = Vec::new();
let bar_half_height = opt.bar_height as f64 / 2.0;
if opt.group_by_chr {
let mut chr_to_y = std::collections::HashMap::new();
let mut current_y = 0.5f64;
let chr_series = chr_col.cast(&DataType::String)?;
let chr_strings = chr_series
.str()
.map_err(|_| PlotRangeError::InvalidData("Chromosome column is not string".to_string()))?;
for i in 0..df.height() {
let chr = chr_strings.get(i).unwrap_or("unknown");
let start = starts.get(i).unwrap_or(0.0);
let end = ends.get(i).unwrap_or(0.0);
let y_pos = *chr_to_y.entry(chr.to_string()).or_insert_with(|| {
let y = current_y;
current_y += 1.0;
y
});
shapes.push(Rectangle::new(
[(start, y_pos - bar_half_height / y_max), (end, y_pos + bar_half_height / y_max)],
plot_plotters_color.mix(opt.alpha).filled(),
));
}
} else {
for i in 0..df.height() {
let start = starts.get(i).unwrap_or(0.0);
let end = ends.get(i).unwrap_or(0.0);
let y_pos = i as f64 + 0.5;
shapes.push(Rectangle::new(
[(start, y_pos - bar_half_height / y_max), (end, y_pos + bar_half_height / y_max)],
plot_plotters_color.mix(opt.alpha).filled(),
));
}
}
grid.draw_series(shapes)
.map_err(|e| PlotRangeError::InvalidData(format!("Backend Error: {}", e)))?;
root.present().map_err(|e| PlotRangeError::PlotterError(format!("Present error: {:?}", e)))?;
Ok(())
}