seqtkrs 0.1.1

A Rust reimplementation of seqtk, a fast and lightweight tool for processing biological sequences in FASTA/FASTQ format
Documentation
/// 序列记录结构,表示一条FASTA或FASTQ序列
///
/// # 字段说明
/// - `name`: 序列名称(不包含 '>' 或 '@' 前缀)
/// - `comment`: 可选的注释信息(序列名称后的内容)
/// - `seq`: 序列碱基内容
/// - `qual`: 可选的质量值(仅FASTQ格式)
#[derive(Debug, Clone)]
pub struct SeqRecord {
    /// 序列名称
    pub name: Vec<u8>,

    /// 注释信息
    pub comment: Option<Vec<u8>>,

    /// 序列内容
    pub seq: Vec<u8>,

    /// 质量值(FASTQ专用)
    pub qual: Option<Vec<u8>>,
}

impl SeqRecord {
    /// 创建新的序列记录
    ///
    /// # 参数
    /// - `name`: 序列名称
    /// - `seq`: 序列内容
    ///
    /// # 示例
    /// ```
    /// use seqtkrs::core::seq_record::SeqRecord;
    /// let record = SeqRecord::new(b"seq1".to_vec(), b"ACGT".to_vec());
    /// ```
    #[inline]
    pub fn new(name: Vec<u8>, seq: Vec<u8>) -> Self {
        Self {
            name,
            comment: None,
            seq,
            qual: None,
        }
    }

    /// 创建带质量值的FASTQ记录
    ///
    /// # 参数
    /// - `name`: 序列名称
    /// - `seq`: 序列内容
    /// - `qual`: 质量值
    #[inline]
    pub fn with_qual(name: Vec<u8>, seq: Vec<u8>, qual: Vec<u8>) -> Self {
        Self {
            name,
            comment: None,
            seq,
            qual: Some(qual),
        }
    }

    /// 判断是否为FASTQ格式(通过质量值字段判断)
    #[inline]
    pub fn is_fastq(&self) -> bool {
        self.qual.is_some()
    }

    /// 获取序列长度
    #[inline]
    pub fn len(&self) -> usize {
        self.seq.len()
    }

    /// 判断序列是否为空
    #[inline]
    pub fn is_empty(&self) -> bool {
        self.seq.is_empty()
    }

    /// 就地反向互补序列
    ///
    /// # 参数
    /// - `comp_table`: 碱基互补查找表(256元素数组)
    ///
    /// # 说明
    /// 此方法会同时反转序列和质量值,并使用查找表进行碱基互补转换
    pub fn reverse_complement(&mut self, comp_table: &[u8; 256]) {
        // 反转序列
        self.seq.reverse();

        // 对每个碱基进行互补转换
        for base in &mut self.seq {
            *base = comp_table[*base as usize];
        }

        // 如果有质量值,也需要反转
        if let Some(qual) = &mut self.qual {
            qual.reverse();
        }
    }

    /// 清空记录内容以便复用,避免内存重新分配
    ///
    /// # 说明
    /// 这是性能优化的关键:在循环中复用SeqRecord对象而不是创建新对象
    pub fn clear(&mut self) {
        self.name.clear();
        self.comment = None;
        self.seq.clear();
        self.qual = None;
    }
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_seq_record_basic() {
        let record = SeqRecord::new(b"seq1".to_vec(), b"ACGT".to_vec());

        assert_eq!(record.name, b"seq1");
        assert_eq!(record.seq, b"ACGT");
        assert_eq!(record.len(), 4);
        assert!(!record.is_fastq());
        assert!(!record.is_empty());
    }

    #[test]
    fn test_seq_record_with_qual() {
        let record = SeqRecord::with_qual(b"seq1".to_vec(), b"ACGT".to_vec(), b"IIII".to_vec());

        assert!(record.is_fastq());
        assert_eq!(record.qual, Some(b"IIII".to_vec()));
    }

    #[test]
    fn test_clear() {
        let mut record = SeqRecord::with_qual(b"seq1".to_vec(), b"ACGT".to_vec(), b"IIII".to_vec());

        record.clear();

        assert!(record.name.is_empty());
        assert!(record.seq.is_empty());
        assert!(record.comment.is_none());
        assert!(record.qual.is_none());
    }
}