seqtkrs 0.1.1

A Rust reimplementation of seqtk, a fast and lightweight tool for processing biological sequences in FASTA/FASTQ format
Documentation
use crate::core::{SeqReader, SeqRecord, SeqWriter};
use anyhow::Result;
use clap::Args;
use rustc_hash::FxHashMap;
use std::fs::File;
use std::io::{BufRead, BufReader};

#[derive(Args, Debug)]
pub struct MutfaArgs {
    /// 输入FASTA文件
    #[arg(value_name = "in.fa")]
    pub input: String,

    /// SNP文件(chr pos any base)
    #[arg(value_name = "in.snp")]
    pub snp_file: String,
}

pub fn run(args: &MutfaArgs) -> Result<()> {
    // 读取SNP文件
    let mut mutations: FxHashMap<Vec<u8>, Vec<(usize, u8)>> = FxHashMap::default();

    let file = File::open(&args.snp_file)?;
    let reader = BufReader::new(file);

    for line in reader.lines() {
        let line = line?;
        if line.is_empty() || line.starts_with('#') {
            continue;
        }

        let fields: Vec<&str> = line.split_whitespace().collect();
        if fields.len() < 4 {
            continue;
        }

        let chr = fields[0].as_bytes().to_vec();
        let pos: usize = fields[1].parse::<usize>()? - 1; // 转为0-based
        let new_base = fields[3].as_bytes()[0];

        if new_base.is_ascii_alphabetic() {
            mutations
                .entry(chr)
                .or_default()
                .push((pos, new_base));
        }
    }

    // 对每个染色体的突变位置排序
    for muts in mutations.values_mut() {
        muts.sort_by_key(|&(pos, _)| pos);
    }

    // 读取FASTA并应用突变
    let mut reader = if args.input == "-" {
        SeqReader::from_stdin()
    } else {
        SeqReader::from_path(&args.input)?
    };

    let mut writer = SeqWriter::to_stdout().with_line_width(60);
    let mut record = SeqRecord::new(Vec::new(), Vec::new());

    while reader.read_next(&mut record)? {
        if let Some(muts) = mutations.get(&record.name) {
            for &(pos, new_base) in muts {
                if pos < record.seq.len() {
                    record.seq[pos] = new_base;
                }
            }
        }
        writer.write_record(&record)?;
    }

    writer.flush()?;
    Ok(())
}