seqtkrs 0.1.1

A Rust reimplementation of seqtk, a fast and lightweight tool for processing biological sequences in FASTA/FASTQ format
Documentation
use crate::core::{SeqReader, SeqRecord, LOOKUP_TABLES};
use anyhow::Result;
use clap::Args;

#[derive(Args, Debug)]
pub struct HetyArgs {
    /// 输入FASTA文件
    #[arg(value_name = "in.fa")]
    pub input: String,

    /// 窗口大小
    #[arg(short = 'w', value_name = "INT", default_value = "50000")]
    pub win_size: usize,

    /// 窗口内起始位置数量
    #[arg(short = 't', value_name = "INT", default_value = "5")]
    pub n_start: usize,

    /// 将小写字母视为掩码
    #[arg(short = 'm')]
    pub lower_mask: bool,
}

pub fn run(args: &HetyArgs) -> Result<()> {
    let mut reader = if args.input == "-" {
        SeqReader::from_stdin()
    } else {
        SeqReader::from_path(&args.input)?
    };

    let win_step = args.win_size / args.n_start;
    let mut buf = vec![0u8; args.win_size];
    let mut record = SeqRecord::new(Vec::new(), Vec::new());

    while reader.read_next(&mut record)? {
        let mut cnt = [0u64; 3];
        let mut next = 0usize;

        for i in 0..=record.seq.len() {
            if (i >= args.win_size && i % win_step == 0) || i == record.seq.len() {
                if i == record.seq.len() && record.seq.len() >= args.win_size {
                    for y in (record.seq.len() - args.win_size)..next {
                        cnt[buf[y % args.win_size] as usize] -= 1;
                    }
                }
                if cnt[1] + cnt[2] > 0 {
                    println!(
                        "{}\t{}\t{}\t{:.2}\t{}\t{}",
                        String::from_utf8_lossy(&record.name),
                        next,
                        i,
                        (cnt[2] as f64) / ((cnt[1] + cnt[2]) as f64) * (args.win_size as f64),
                        cnt[1] + cnt[2],
                        cnt[2]
                    );
                }
                next = i;
            }

            if i < record.seq.len() {
                let y = i % args.win_size;
                let mut c = record.seq[i];

                if args.lower_mask && c.is_ascii_lowercase() {
                    c = b'N';
                }

                let c = LOOKUP_TABLES.nt16[c as usize];
                let x = LOOKUP_TABLES.bitcnt[c as usize];

                if i >= args.win_size {
                    cnt[buf[y] as usize] -= 1;
                }

                buf[y] = if x == 2 {
                    2
                } else if x == 1 {
                    1
                } else {
                    0
                };
                cnt[buf[y] as usize] += 1;
            }
        }
    }

    Ok(())
}