use colored::Colorize;
use env_logger::{Builder, Target};
use itertools::Itertools;
use log::LevelFilter;
use rayon::prelude::*;
use rust_htslib::bam;
use rust_htslib::bam::Read;
use rust_htslib::faidx;
use rustybam::cli::Commands;
use rustybam::fastx;
use rustybam::paf::paf_swap_query_and_target;
use rustybam::*;
use std::collections::HashMap;
use std::time::Instant;
fn main() {
parse_cli();
}
pub fn parse_cli() {
let pg_start = Instant::now();
let args = cli::make_cli_parse();
let matches = cli::make_cli_app().get_matches();
let subcommand = matches.subcommand_name().unwrap();
let min_log_level = match matches.occurrences_of("verbose") {
0 => LevelFilter::Warn,
1 => LevelFilter::Info,
2 => LevelFilter::Debug,
_ => LevelFilter::Trace,
};
Builder::new()
.target(Target::Stderr)
.filter(None, min_log_level)
.init();
log::debug!("DEBUG logging enabled");
log::trace!("TRACE logging enabled");
rayon::ThreadPoolBuilder::new()
.num_threads(args.threads)
.build_global()
.unwrap();
match &args.command {
Some(Commands::Stats { bam, qbed, paf }) => {
bamstats::print_cigar_stats_header(*qbed);
if *paf {
for paf in paf::Paf::from_file(bam).records {
let stats = bamstats::stats_from_paf(paf);
bamstats::print_cigar_stats(stats, *qbed);
}
return;
}
let mut bam_reader = if bam == "-" {
bam::Reader::from_stdin().unwrap()
} else {
bam::Reader::from_path(bam).unwrap_or_else(|_| panic!("Failed to open {}", bam))
};
bam_reader.set_threads(args.threads).unwrap();
let bam_header = bam::Header::from_template(bam_reader.header());
for (_idx, rec) in bam_reader.records().enumerate() {
let rec = rec.unwrap();
if !rec.is_unmapped() {
let stats = bamstats::cigar_stats(rec, &bam_header);
bamstats::print_cigar_stats(stats, *qbed);
}
}
}
Some(Commands::Nucfreq {
bam,
region,
bed,
small,
}) => {
let mut rgns = Vec::new();
if let Some(region_f) = region {
rgns.push(bed::parse_region(region_f));
}
if let Some(bed_f) = bed {
rgns.append(&mut bed::parse_bed(bed_f));
}
for rgn in rgns {
let med_rgns = bed::split_region(&rgn, 1_000_000);
for med_rgn in med_rgns {
let small_rgns = bed::split_region(&med_rgn, 10_000);
let vec: Vec<nucfreq::Nucfreq> = small_rgns
.into_par_iter()
.map(|r| nucfreq::region_nucfreq(bam, &r, 4))
.flatten()
.collect();
if *small {
nucfreq::small_nucfreq(&vec)
} else {
nucfreq::print_nucfreq_header();
nucfreq::print_nucfreq(&vec);
}
}
}
}
Some(Commands::Repeat { fasta, min }) => {
let genome = suns::Genome::from_file(fasta);
let unique_intervals = genome.get_longest_perfect_repeats(*min);
println!("#chr\tstart\tend\trepeat_length");
for (chr, start, length) in &unique_intervals {
println!("{}\t{}\t{}\t{}", chr, start, start + length, length - 1,);
}
}
Some(Commands::Suns {
fasta,
kmer_size,
max_size,
validate,
}) => {
let genome = suns::Genome::from_file(fasta);
let sun_intervals = genome.find_sun_intervals(*kmer_size);
println!("#chr\tstart\tend\tsun_seq");
for (chr, start, end, seq) in &sun_intervals {
if end - start < *max_size {
println!(
"{}\t{}\t{}\t{}",
chr,
start,
end,
std::str::from_utf8(seq).unwrap()
);
}
}
if *validate {
suns::validate_suns(&genome, &sun_intervals, *kmer_size);
}
}
Some(Commands::BedLength {
bed,
readable,
column,
}) => {
let rgns = bed::parse_bed(bed);
match *column {
Some(c) => {
let mut dict = HashMap::new();
for rgn in rgns {
let bp = dict.entry(rgn.get_column(c).clone()).or_insert(0_u64);
*bp += rgn.en - rgn.st;
}
log::trace!("{:?}", dict);
for (key, count) in dict {
if *readable {
println!("{}\t{}", key, (count as f64) / 1e6);
} else {
println!("{}\t{}", key, count);
}
}
}
None => {
let count: u64 = rgns.into_iter().map(|rgn| rgn.en - rgn.st).sum();
if *readable {
println!("{}", (count as f64) / 1e6);
} else {
println!("{}", count);
}
}
}
}
Some(Commands::Invert { paf }) => {
let paf = paf::Paf::from_file(paf);
for rec in &paf.records {
let inv_rec = paf_swap_query_and_target(rec);
println!("{}", inv_rec);
}
}
Some(Commands::Liftover {
paf,
bed,
qbed,
largest,
}) => {
let rgns = bed::parse_bed(bed);
let paf = paf::Paf::from_file(paf);
let new_recs = liftover::trim_paf_by_rgns(&rgns, &paf.records, *qbed);
if *largest {
for (_key, group) in &new_recs
.into_iter()
.sorted_by_key(|pac_rec| pac_rec.id.clone())
.group_by(|paf_rec| paf_rec.id.clone())
{
let largest_rec = group.max_by_key(|p| (p.t_en - p.t_st)).unwrap();
println!("{}", largest_rec);
}
} else {
for rec in new_recs {
println!("{}", rec);
}
}
}
Some(Commands::TrimPaf {
paf,
match_score,
diff_score,
indel_score,
remove_contained,
}) => {
let mut paf = paf::Paf::from_file(paf);
paf.overlapping_paf_recs(*match_score, *diff_score, *indel_score, *remove_contained);
for rec in &paf.records {
println!("{}", rec);
}
}
Some(Commands::Filter {
paf,
paired_len,
aln,
query,
}) => {
let mut paf = paf::Paf::from_file(paf);
log::info!("{} PAF records BEFORE filtering.", paf.records.len());
paf.filter_query_len(*query);
paf.filter_aln_len(*aln);
paf.filter_aln_pairs(*paired_len);
log::info!("{} PAF records AFTER filtering.", paf.records.len());
for rec in paf.records {
println!("{}", rec);
}
}
Some(Commands::Orient {
paf,
scaffold,
insert,
}) => {
let mut paf = paf::Paf::from_file(paf);
paf.orient();
if *scaffold {
paf.scaffold(*insert);
}
for rec in &paf.records {
println!("{}", rec);
}
}
Some(Commands::BreakPaf { paf, max_size }) => {
let paf = paf::Paf::from_file(paf);
for mut paf in paf.records {
paf.aligned_pairs();
let pafs = liftover::break_paf_on_indels(&paf, *max_size);
for trimed_paf in pafs {
println!("{}", trimed_paf);
}
}
}
Some(Commands::PafToSam { paf, fasta }) => {
let fasta_reader = fasta
.as_ref()
.map(|fasta| faidx::Reader::from_path(fasta).unwrap());
let paf = paf::Paf::from_file(paf);
println!("{}", paf.sam_header());
for rec in paf.records {
println!("{}", rec.to_sam_string(fasta_reader.as_ref()));
}
}
Some(Commands::FastxSplit { fastx }) => {
fastx::run_split_fastx(fastx, "-");
}
Some(Commands::GetFasta {
fasta,
bed,
strand,
name,
}) => {
getfasta::get_fasta(fasta, bed, *name, *strand);
}
None => {}
};
let duration = pg_start.elapsed();
log::info!(
"{} done! Time elapsed: {}",
subcommand.bright_green().bold(),
format!("{:.2?}", duration).bright_yellow().bold()
);
}