use crate::core::{SeqReader, SeqRecord, LOOKUP_TABLES};
use anyhow::Result;
use clap::Args;
#[derive(Args, Debug)]
pub struct GapArgs {
#[arg(value_name = "in.fa")]
pub input: String,
#[arg(short = 'l', value_name = "INT", default_value = "50")]
pub min_size: usize,
}
pub fn run(args: &GapArgs) -> Result<()> {
let mut reader = if args.input == "-" {
SeqReader::from_stdin()
} else {
SeqReader::from_path(&args.input)?
};
let mut record = SeqRecord::new(Vec::new(), Vec::new());
while reader.read_next(&mut record)? {
let mut gap_len = 0usize;
for i in 0..=record.seq.len() {
let c = if i < record.seq.len() {
LOOKUP_TABLES.nt6[record.seq[i] as usize]
} else {
5
};
if i == record.seq.len() || (1..=4).contains(&c) {
if gap_len > 0 && gap_len >= args.min_size {
println!(
"{}\t{}\t{}",
String::from_utf8_lossy(&record.name),
i - gap_len,
i
);
}
gap_len = 0;
} else {
gap_len += 1;
}
}
}
Ok(())
}