use bio::{bio_types::sequence::Sequence, io::fastq};
use std::fmt;
#[derive(Debug, Hash, PartialEq, Eq, Clone)]
pub struct SeqPair {
pub forward: Sequence,
pub reverse: Option<Sequence>,
}
impl SeqPair {
pub fn new(forward: Sequence, reverse: Option<Sequence>) -> Self {
Self { forward, reverse }
}
pub fn from_readpair(rp: &ReadPair) -> Self {
Self::new(
rp.forward.seq().to_vec(),
rp.reverse.as_ref().map(|x| x.seq().to_vec()),
)
}
}
#[derive(Debug)]
pub struct ReadPair {
pub forward: fastq::Record,
pub reverse: Option<fastq::Record>,
pub group: ReadGroup,
}
impl ReadPair {
pub fn key(&self) -> SeqPair {
SeqPair::from_readpair(self)
}
pub fn into_seqpair(self) -> SeqPair {
SeqPair::new(
self.forward.seq().to_vec(),
self.reverse.map(|x| x.seq().to_vec()),
)
}
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
pub enum ReadGroup {
Ungrouped,
Unmatched,
Match(String),
}
impl fmt::Display for ReadGroup {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
ReadGroup::Ungrouped => write!(f, ""),
ReadGroup::Unmatched => write!(f, "_unmatched_"),
ReadGroup::Match(x) => write!(f, "{}", x),
}
}
}
#[cfg(test)]
mod tests {
use super::*;
use bio::io::fastq;
use std::collections::HashSet;
fn make_record(id: &str, seq: &[u8]) -> fastq::Record {
let qual = vec![b'I'; seq.len()]; fastq::Record::with_attrs(id, None, seq, &qual)
}
fn make_readpair(f_seq: &[u8], r_seq: Option<&[u8]>, group: ReadGroup) -> ReadPair {
ReadPair {
forward: make_record("F", f_seq),
reverse: r_seq.map(|s| make_record("R", s)),
group,
}
}
#[test]
fn single_end_key_matches_constructor() {
let rp = make_readpair(b"ACGTACGT", None, ReadGroup::Ungrouped);
let key = rp.key();
let constructed = SeqPair::new(b"ACGTACGT".to_vec(), None);
assert_eq!(
key, constructed,
"single-end key must equal constructor-derived SeqPair"
);
assert_eq!(key.forward.as_slice(), b"ACGTACGT");
assert!(key.reverse.is_none());
}
#[test]
fn paired_end_key_includes_reverse() {
let rp = make_readpair(b"AAAA", Some(b"TTTT"), ReadGroup::Match("g1".into()));
let key = rp.key();
assert_eq!(key.forward.as_slice(), b"AAAA");
assert_eq!(key.reverse.as_ref().unwrap().as_slice(), b"TTTT");
let constructed = SeqPair::new(b"AAAA".to_vec(), Some(b"TTTT".to_vec()));
assert_eq!(
key, constructed,
"paired-end key should equal constructed SeqPair"
);
}
#[test]
fn hash_equality_semantics() {
let mut set: HashSet<SeqPair> = HashSet::new();
let a = SeqPair::new(b"AAAA".to_vec(), Some(b"TTTT".to_vec()));
let b = SeqPair::new(b"AAAA".to_vec(), Some(b"TTTT".to_vec())); let c = SeqPair::new(b"AAAA".to_vec(), Some(b"TTTA".to_vec())); let d = SeqPair::new(b"AAAA".to_vec(), None);
assert!(set.insert(a.clone()));
assert!(!set.insert(b), "identical key should not insert twice");
assert!(
set.insert(c),
"different reverse must produce a distinct key"
);
assert!(
set.insert(d),
"absence of reverse must produce a distinct key"
);
assert_eq!(set.len(), 3);
}
#[test]
fn readgroup_display_variants() {
assert_eq!(ReadGroup::Ungrouped.to_string(), "");
assert_eq!(ReadGroup::Unmatched.to_string(), "_unmatched_");
assert_eq!(ReadGroup::Match("poolA".into()).to_string(), "poolA");
}
#[test]
fn reverse_presence_changes_key() {
let rp_single = make_readpair(b"GGGG", None, ReadGroup::Ungrouped);
let rp_paired = make_readpair(b"GGGG", Some(b"A"), ReadGroup::Ungrouped);
let k_single = rp_single.key();
let k_paired = rp_paired.key();
assert_ne!(
k_single, k_paired,
"adding a reverse read must change the key"
);
assert_eq!(k_single.forward.as_slice(), b"GGGG");
assert!(k_single.reverse.is_none());
assert_eq!(k_paired.forward.as_slice(), b"GGGG");
assert_eq!(k_paired.reverse.unwrap().as_slice(), b"A");
}
#[test]
fn readpair_key_is_deterministic() {
let rp = make_readpair(b"TACT", Some(b"AGGA"), ReadGroup::Match("g2".into()));
let k1 = rp.key();
let k2 = rp.key();
assert_eq!(k1, k2, "key generation must be deterministic");
}
#[test]
fn tiny_reads_smoke() {
let rp1 = make_readpair(b"A", None, ReadGroup::Ungrouped);
let rp2 = make_readpair(b"A", Some(b""), ReadGroup::Ungrouped);
let k1 = rp1.key();
let k2 = rp2.key();
assert_eq!(k1.forward.as_slice(), b"A");
assert!(k1.reverse.is_none());
assert_eq!(k2.forward.as_slice(), b"A");
assert_eq!(k2.reverse.as_ref().unwrap().as_slice(), b"");
assert_ne!(
k1, k2,
"empty reverse is still a distinct key from no reverse"
);
}
}