seqtkrs 0.1.1

A Rust reimplementation of seqtk, a fast and lightweight tool for processing biological sequences in FASTA/FASTQ format
Documentation
use crate::core::{SeqReader, SeqRecord};
use anyhow::Result;
use clap::Args;

#[derive(Args, Debug)]
pub struct FamaskArgs {
    /// 源FASTA文件
    #[arg(value_name = "src.fa")]
    pub source: String,

    /// 掩码FASTA文件
    #[arg(value_name = "mask.fa")]
    pub mask: String,
}

pub fn run(args: &FamaskArgs) -> Result<()> {
    let mut reader_src = if args.source == "-" {
        SeqReader::from_stdin()
    } else {
        SeqReader::from_path(&args.source)?
    };

    let mut reader_mask = SeqReader::from_path(&args.mask)?;

    let mut record_src = SeqRecord::new(Vec::new(), Vec::new());
    let mut record_mask = SeqRecord::new(Vec::new(), Vec::new());

    while reader_src.read_next(&mut record_src)? {
        if !reader_mask.read_next(&mut record_mask)? {
            break;
        }

        if record_src.name != record_mask.name {
            eprintln!(
                "不同的序列名称: {} != {}",
                String::from_utf8_lossy(&record_src.name),
                String::from_utf8_lossy(&record_mask.name)
            );
        }

        if record_src.seq.len() != record_mask.seq.len() {
            eprintln!(
                "不等的序列长度: {} != {}",
                record_src.seq.len(),
                record_mask.seq.len()
            );
        }

        let min_l = record_src.seq.len().min(record_mask.seq.len());
        print!(">{}", String::from_utf8_lossy(&record_src.name));

        for i in 0..min_l {
            let mut c0 = record_src.seq[i];
            let c1 = record_mask.seq[i];

            if c1 == b'x' {
                c0 = c0.to_ascii_lowercase();
            } else if c1 != b'X' {
                c0 = c1;
            }

            if i % 60 == 0 {
                println!();
            }
            print!("{}", c0 as char);
        }
        println!();
    }

    Ok(())
}