use crate::bed::BedRecord;
use chrono::Datelike;
use colored::Colorize;
use indoc::indoc;
use rayon::prelude::*;
use std::collections::HashMap;
use std::fs::File;
use std::io::{self, Read, Write};
use std::path::PathBuf;
const SOURCE: &str = "bed2gff";
const VERSION: &str = env!("CARGO_PKG_VERSION");
const GFF3: &str = "##gff-version 3";
const REPOSITORY: &str = "github.com/alejandrogzi/bed2gff";
pub fn bed_reader(file: &PathBuf) -> Vec<BedRecord> {
let bed = reader(file).unwrap();
let records = parallel_parse(&bed).unwrap();
records
}
pub fn get_isoforms(file: &String) -> HashMap<String, String> {
let pairs = parallel_hash_rev(file);
if pairs.len() == 0 {
println!(
"{} {}",
"Fail:".bright_red().bold(),
"BED file could not be converted. Please check your isoforms file."
);
std::process::exit(1);
}
pairs
}
pub fn reader(file: &PathBuf) -> io::Result<String> {
let mut file = File::open(file)?;
let mut contents = String::new();
file.read_to_string(&mut contents)?;
Ok(contents)
}
pub fn parallel_hash<'a>(s: &'a str) -> HashMap<String, String> {
s.par_lines()
.filter_map(|line| {
let mut words = line.split_whitespace();
if let Some(fw) = words.next() {
if let Some(sw) = words.next() {
return Some((fw.to_owned(), sw.to_owned()));
}
}
None
})
.collect()
}
pub fn parallel_hash_rev<'a>(s: &'a str) -> HashMap<String, String> {
s.par_lines()
.filter_map(|line| {
let mut words = line.split_whitespace();
if let Some(fw) = words.next() {
if let Some(sw) = words.next() {
return Some((sw.to_owned(), fw.to_owned()));
}
}
None
})
.collect()
}
pub fn parallel_parse<'a>(s: &'a str) -> Result<Vec<BedRecord>, &'static str> {
let records: Result<Vec<BedRecord>, &'static str> =
s.par_lines().map(|line| BedRecord::parse(line)).collect();
records
}
pub fn custom_par_parse(
records: &Vec<BedRecord>,
) -> Result<HashMap<String, (String, u32, u32, String)>, &'static str> {
let gene_coordinates = records
.into_par_iter()
.fold(
|| HashMap::new(),
|mut acc: HashMap<String, (String, u32, u32, String)>, record| {
acc.entry(record.name.clone()).or_insert((
record.chrom.clone(),
record.tx_start,
record.tx_end,
record.strand.clone(),
));
acc
},
)
.reduce(
|| HashMap::new(),
|mut a: HashMap<String, (String, u32, u32, String)>, b| {
for (key, (chrom, start, end, strand)) in b {
a.entry(key).or_insert((chrom, start, end, strand));
}
a
},
);
Ok(gene_coordinates)
}
pub fn combine_maps_par(
isoforms: &HashMap<String, String>,
gene_track: &HashMap<String, (String, u32, u32, String)>,
) -> Vec<(String, String, u32, u32, String, String, String)> {
let coords = isoforms
.par_iter()
.fold(
|| HashMap::new(),
|mut acc: HashMap<String, (String, u32, u32, String)>, (transcript, gene)| {
if let Some(&(ref chrom, start, end, ref strand)) = gene_track.get(transcript) {
let entry = acc.entry(gene.clone()).or_insert((
chrom.to_string(),
start,
end,
strand.to_string(),
));
entry.1 = entry.1.min(start); entry.2 = entry.2.max(end); }
acc
},
)
.reduce(
|| HashMap::new(),
|mut a, b| {
for (gene, (chrom, start, end, strand)) in b {
let entry = a.entry(gene).or_insert((chrom, start, end, strand));
entry.1 = entry.1.min(start); entry.2 = entry.2.max(end); }
a
},
);
let lines = coords
.par_iter()
.map(|(gene, (chrom, start, end, strand))| {
(
chrom.to_string(),
"gene".to_string(),
start + 1,
*end,
strand.to_string(),
".".to_string(),
format!("ID={};gene_id={}", gene, gene),
)
})
.collect();
lines
}
pub fn max_mem_usage_mb() -> f64 {
let rusage = unsafe {
let mut rusage = std::mem::MaybeUninit::uninit();
libc::getrusage(libc::RUSAGE_SELF, rusage.as_mut_ptr());
rusage.assume_init()
};
let maxrss = rusage.ru_maxrss as f64;
if cfg!(target_os = "macos") {
maxrss / 1024.0 / 1024.0
} else {
maxrss / 1024.0
}
}
pub fn msg() {
println!(
"{}\n{}\n{}\n",
"\n##### BED2GFF #####".bright_blue().bold(),
indoc!(
"A fast BED-to-GTF converter written in Rust.
Repository: https://github.com/alejandrogzi/bed2gff
Feel free to contact the developer if any issue/bug is found."
),
format!("Version: {}", VERSION)
);
}
pub fn get_date() -> String {
let now = chrono::Utc::now();
let year = now.year();
let month = now.month();
let day = now.day();
format!("{}-{}-{}", year, month, day)
}
pub fn comments(file: &mut Box<dyn Write>) {
let _ = file.write_all(format!("{}\n", GFF3).as_bytes());
let _ = file.write_all(format!("#provider: {}\n", SOURCE).as_bytes());
let _ = file.write_all(format!("#version: {}\n", VERSION).as_bytes());
let _ = file.write_all(format!("#contact: {}\n", REPOSITORY).as_bytes());
let _ = file.write_all(format!("#date: {}\n", get_date()).as_bytes());
}