use crate::core::{SeqReader, SeqRecord, LOOKUP_TABLES};
use anyhow::Result;
use clap::Args;
#[derive(Args, Debug)]
pub struct GcArgs {
#[arg(value_name = "in.fa")]
pub input: String,
#[arg(short = 'w')]
pub at_mode: bool,
#[arg(short = 'f', value_name = "FLOAT", default_value = "0.6")]
pub min_frac: f64,
#[arg(short = 'l', value_name = "INT", default_value = "20")]
pub min_length: usize,
#[arg(short = 'x', value_name = "FLOAT", default_value = "10.0")]
pub xdropoff: f64,
}
pub fn run(args: &GcArgs) -> Result<()> {
let mut reader = if args.input == "-" {
SeqReader::from_stdin()
} else {
SeqReader::from_path(&args.input)?
};
let q = (1.0 - args.min_frac) / args.min_frac;
let mut record = SeqRecord::new(Vec::new(), Vec::new());
while reader.read_next(&mut record)? {
let mut start = 0usize;
let mut max_i = 0usize;
let mut n_hits = 0i64;
let mut start_hits = 0i64;
let mut max_hits = 0i64;
let mut sc = 0.0f64;
let mut max = 0.0f64;
for (i, &base) in record.seq.iter().enumerate() {
let c = LOOKUP_TABLES.nt16[base as usize];
let hit = if args.at_mode {
c == 1 || c == 8 || c == 9 } else {
c == 2 || c == 4 || c == 6 };
if hit {
n_hits += 1;
}
if hit {
if sc == 0.0 {
start = i;
start_hits = n_hits;
}
sc += q;
if sc > max {
max = sc;
max_i = i;
max_hits = n_hits;
}
} else if sc > 0.0 {
sc -= 1.0;
if sc < 0.0 || max - sc > args.xdropoff {
if max_i + 1 - start >= args.min_length {
println!(
"{}\t{}\t{}\t{}",
String::from_utf8_lossy(&record.name),
start,
max_i + 1,
max_hits - start_hits + 1
);
}
sc = 0.0;
max = 0.0;
}
}
}
if max > 0.0 && max_i + 1 - start >= args.min_length {
println!(
"{}\t{}\t{}\t{}",
String::from_utf8_lossy(&record.name),
start,
max_i + 1,
max_hits - start_hits + 1
);
}
}
Ok(())
}