use anyhow::Result;
use log::*;
use perbase_lib::utils;
use serde::{Deserialize, Serialize};
use smartstring::alias::*;
use std::path::PathBuf;
use structopt::{StructOpt, clap::AppSettings};
#[derive(StructOpt)]
#[structopt(author, setting = AppSettings::ArgRequiredElseHelp)]
pub struct MergeAdjacent {
in_file: Option<PathBuf>,
#[structopt(long, short = "T", default_value = utils::NUM_CPU.as_str())]
compression_threads: usize,
#[structopt(long, short = "L", default_value = "2")]
compression_level: u32,
#[structopt(long, short = "n")]
no_header: bool,
#[structopt(long, short = "Z")]
bgzip: bool,
#[structopt(long, short = "o")]
output: Option<PathBuf>,
}
impl MergeAdjacent {
pub fn run(&self) -> Result<()> {
let mut reader = utils::get_reader(
&self.in_file,
!self.no_header,
self.in_file
.as_ref()
.map(utils::is_bgzipped)
.unwrap_or(false),
)?;
let mut writer = utils::get_writer(
&self.output,
self.bgzip,
true,
self.compression_threads,
self.compression_level,
)?;
let mut iter = reader.deserialize().map(|r| {
let rec: BedLike = r.expect("Deserialzied record");
rec
});
if let Some(first) = iter.next() {
let mut curr_seq = first.ref_seq.clone();
let mut curr_start = first.pos;
let mut curr_end = if let Some(end) = first.end {
end
} else {
first.pos + 1
};
let mut curr_depth = first.depth;
for bedlike in iter {
if bedlike.depth == curr_depth
&& curr_end >= bedlike.pos
&& bedlike.ref_seq == curr_seq
{
curr_end = if let Some(end) = bedlike.end {
end
} else {
bedlike.pos + 1
};
} else {
writer.serialize(BedLike {
ref_seq: curr_seq.clone(),
pos: curr_start,
end: Some(curr_end),
depth: curr_depth,
})?;
curr_seq = bedlike.ref_seq;
curr_start = bedlike.pos;
curr_end = if let Some(end) = bedlike.end {
end
} else {
bedlike.pos + 1
};
curr_depth = bedlike.depth;
}
}
writer.serialize(BedLike {
ref_seq: curr_seq.clone(),
pos: curr_start,
end: Some(curr_end),
depth: curr_depth,
})?;
} else {
info!("No input records");
}
Ok(())
}
}
#[derive(Serialize, Deserialize, Debug)]
#[serde(rename_all = "SCREAMING_SNAKE_CASE")]
struct BedLike {
#[serde(alias = "REF", alias = "chrom", rename = "REF")]
ref_seq: String, #[serde(alias = "chromStart")]
pos: usize,
#[serde(alias = "chromEnd")]
end: Option<usize>,
#[serde(alias = "COV")]
depth: usize,
}