seqtkrs 0.1.1

A Rust reimplementation of seqtk, a fast and lightweight tool for processing biological sequences in FASTA/FASTQ format
Documentation
use crate::core::{SeqReader, SeqRecord, LOOKUP_TABLES};
use anyhow::Result;
use clap::Args;

#[derive(Args, Debug)]
pub struct GapArgs {
    /// 输入FASTA文件
    #[arg(value_name = "in.fa")]
    pub input: String,

    /// 最小gap大小
    #[arg(short = 'l', value_name = "INT", default_value = "50")]
    pub min_size: usize,
}

pub fn run(args: &GapArgs) -> Result<()> {
    let mut reader = if args.input == "-" {
        SeqReader::from_stdin()
    } else {
        SeqReader::from_path(&args.input)?
    };

    let mut record = SeqRecord::new(Vec::new(), Vec::new());

    while reader.read_next(&mut record)? {
        let mut gap_len = 0usize;
        for i in 0..=record.seq.len() {
            let c = if i < record.seq.len() {
                LOOKUP_TABLES.nt6[record.seq[i] as usize]
            } else {
                5
            };

            if i == record.seq.len() || (1..=4).contains(&c) {
                if gap_len > 0 && gap_len >= args.min_size {
                    println!(
                        "{}\t{}\t{}",
                        String::from_utf8_lossy(&record.name),
                        i - gap_len,
                        i
                    );
                }
                gap_len = 0;
            } else {
                gap_len += 1;
            }
        }
    }

    Ok(())
}