seqtkrs 0.1.1

A Rust reimplementation of seqtk, a fast and lightweight tool for processing biological sequences in FASTA/FASTQ format
Documentation
use crate::core::{SeqReader, SeqRecord};
use anyhow::Result;
use clap::Args;

#[derive(Args, Debug)]
pub struct HrunArgs {
    /// 输入FASTA文件
    #[arg(value_name = "in.fa")]
    pub input: String,

    /// 最小同聚物长度
    #[arg(value_name = "minLen", default_value = "7")]
    pub min_len: usize,
}

pub fn run(args: &HrunArgs) -> Result<()> {
    let mut reader = if args.input == "-" {
        SeqReader::from_stdin()
    } else {
        SeqReader::from_path(&args.input)?
    };

    let mut record = SeqRecord::new(Vec::new(), Vec::new());

    while reader.read_next(&mut record)? {
        if record.seq.is_empty() {
            continue;
        }

        let mut c = record.seq[0];
        let mut run_len = 1usize;
        let mut beg = 0usize;

        for i in 1..record.seq.len() {
            if record.seq[i] != c {
                if run_len >= args.min_len {
                    println!(
                        "{}\t{}\t{}\t{}",
                        String::from_utf8_lossy(&record.name),
                        beg,
                        beg + run_len,
                        c as char
                    );
                }
                c = record.seq[i];
                run_len = 1;
                beg = i;
            } else {
                run_len += 1;
            }
        }

        if run_len >= args.min_len {
            println!(
                "{}\t{}\t{}\t{}",
                String::from_utf8_lossy(&record.name),
                beg,
                beg + run_len,
                c as char
            );
        }
    }

    Ok(())
}