use crate::phred::NO_CALL_BASE;
use serde::Serialize;
#[derive(Debug, Clone, Serialize)]
pub struct ConsensusVariantReviewInfo {
pub chrom: String,
pub pos: i32,
pub ref_allele: String,
pub genotype: String,
pub filters: String,
#[serde(rename = "A")]
pub consensus_a: usize,
#[serde(rename = "C")]
pub consensus_c: usize,
#[serde(rename = "G")]
pub consensus_g: usize,
#[serde(rename = "T")]
pub consensus_t: usize,
#[serde(rename = "N")]
pub consensus_n: usize,
pub consensus_read: String,
pub consensus_insert: String,
pub consensus_call: char,
pub consensus_qual: u8,
pub a: usize,
pub c: usize,
pub g: usize,
pub t: usize,
pub n: usize,
}
#[derive(Debug, Clone)]
pub struct Variant {
pub chrom: String,
pub pos: i32,
pub ref_base: char,
pub genotype: Option<String>,
pub filters: Option<String>,
}
impl Variant {
#[must_use]
pub fn new(chrom: String, pos: i32, ref_base: char) -> Self {
Self { chrom, pos, ref_base, genotype: None, filters: None }
}
#[must_use]
pub fn with_genotype(mut self, genotype: String) -> Self {
self.genotype = Some(genotype);
self
}
#[must_use]
pub fn with_filters(mut self, filters: String) -> Self {
self.filters = Some(filters);
self
}
}
#[derive(Debug, Default, Clone)]
pub struct BaseCounts {
pub a: usize,
pub c: usize,
pub g: usize,
pub t: usize,
pub n: usize,
}
impl BaseCounts {
pub fn add_base(&mut self, base: u8) {
match base.to_ascii_uppercase() {
b'A' => self.a += 1,
b'C' => self.c += 1,
b'G' => self.g += 1,
b'T' => self.t += 1,
NO_CALL_BASE => self.n += 1,
_ => {}
}
}
#[must_use]
pub fn total(&self) -> usize {
self.a + self.c + self.g + self.t + self.n
}
}
#[must_use]
pub fn read_number_suffix(is_first_of_pair: bool) -> &'static str {
if is_first_of_pair { "/1" } else { "/2" }
}
#[must_use]
#[allow(clippy::cast_possible_truncation, clippy::cast_possible_wrap)]
pub fn format_insert_string(
record: &fgumi_raw_bam::RawRecord,
header: &noodles::sam::Header,
) -> String {
if !record.is_paired() {
return "NA".to_string();
}
if record.is_unmapped() || record.is_mate_unmapped() {
return "NA".to_string();
}
let ref_id = record.ref_id();
let mate_ref_id = record.mate_ref_id();
if ref_id < 0 || mate_ref_id < 0 {
return "NA".to_string();
}
if ref_id != mate_ref_id {
return "NA".to_string();
}
let is_reverse = record.is_reverse();
let mate_is_reverse = record.is_mate_reverse();
if is_reverse == mate_is_reverse {
return "NA".to_string(); }
let tlen = record.template_length();
if tlen == 0 || (!is_reverse && tlen < 0) || (is_reverse && tlen > 0) {
return "NA".to_string();
}
#[allow(clippy::cast_sign_loss)]
let ref_name = match header.reference_sequences().get_index(ref_id as usize) {
Some((name, _)) => String::from_utf8_lossy(name).to_string(),
None => return "NA".to_string(),
};
let outer = if is_reverse {
match record.alignment_end_1based() {
Some(pos) => pos as i32,
None => return "NA".to_string(),
}
} else {
match record.alignment_start_1based() {
Some(pos) => pos as i32,
None => return "NA".to_string(),
}
};
let other_coord = outer + tlen + if tlen < 0 { 1 } else { -1 };
let (start, end) =
if outer < other_coord { (outer, other_coord) } else { (other_coord, outer) };
let is_first = record.is_first_segment();
let start_equals_outer = start == outer;
let pairing = match (is_first, start_equals_outer) {
(true, true) | (false, false) => "F1R2",
(true, false) | (false, true) => "F2R1",
};
format!("{ref_name}:{start}-{end} | {pairing}")
}
#[cfg(test)]
mod tests {
use super::*;
use fgumi_raw_bam::{RawRecord, SamBuilder as RawSamBuilder, flags as raw_flags};
use noodles::sam::Header;
#[test]
fn test_read_number_suffix() {
assert_eq!(read_number_suffix(true), "/1");
assert_eq!(read_number_suffix(false), "/2");
}
#[test]
fn test_format_insert_string_unpaired() {
let header = Header::builder().build();
let mut b = RawSamBuilder::new();
b.sequence(b"ACGT").qualities(&[30; 4]).flags(0);
let record: RawRecord = b.build();
let result = format_insert_string(&record, &header);
assert_eq!(result, "NA");
}
#[test]
fn test_base_counts_new() {
let counts = BaseCounts::default();
assert_eq!(counts.a, 0);
assert_eq!(counts.c, 0);
assert_eq!(counts.g, 0);
assert_eq!(counts.t, 0);
assert_eq!(counts.n, 0);
assert_eq!(counts.total(), 0);
}
#[test]
fn test_base_counts_add_base() {
let mut counts = BaseCounts::default();
counts.add_base(b'A');
assert_eq!(counts.a, 1);
assert_eq!(counts.total(), 1);
counts.add_base(b'a'); assert_eq!(counts.a, 2);
counts.add_base(b'C');
counts.add_base(b'G');
counts.add_base(b'T');
counts.add_base(b'N');
assert_eq!(counts.c, 1);
assert_eq!(counts.g, 1);
assert_eq!(counts.t, 1);
assert_eq!(counts.n, 1);
assert_eq!(counts.total(), 6);
}
#[test]
fn test_base_counts_ignore_non_bases() {
let mut counts = BaseCounts::default();
counts.add_base(b'X');
counts.add_base(b'-');
counts.add_base(b'.');
assert_eq!(counts.total(), 0);
}
#[test]
fn test_variant_new() {
let variant = Variant::new("chr1".to_string(), 100, 'A');
assert_eq!(variant.chrom, "chr1");
assert_eq!(variant.pos, 100);
assert_eq!(variant.ref_base, 'A');
assert!(variant.genotype.is_none());
assert!(variant.filters.is_none());
}
#[test]
fn test_variant_with_genotype_and_filters() {
let variant = Variant::new("chr1".to_string(), 100, 'A')
.with_genotype("0/1".to_string())
.with_filters("PASS".to_string());
assert_eq!(variant.genotype, Some("0/1".to_string()));
assert_eq!(variant.filters, Some("PASS".to_string()));
}
fn create_test_header() -> Header {
use bstr::BString;
use noodles::sam::header::record::value::{Map, map::ReferenceSequence};
use std::num::NonZeroUsize;
Header::builder()
.add_reference_sequence(
BString::from("chr1"),
Map::<ReferenceSequence>::new(
NonZeroUsize::try_from(1000).expect("non-zero value 1000"),
),
)
.build()
}
fn create_paired_read(
start: usize,
mate_start: usize,
is_reverse: bool,
mate_is_reverse: bool,
is_first: bool,
template_len: i32,
) -> RawRecord {
use fgumi_raw_bam::testutil::encode_op;
let mut flags: u16 = raw_flags::PAIRED;
if is_first {
flags |= raw_flags::FIRST_SEGMENT;
} else {
flags |= raw_flags::LAST_SEGMENT;
}
if is_reverse {
flags |= raw_flags::REVERSE;
}
if mate_is_reverse {
flags |= raw_flags::MATE_REVERSE;
}
let mut b = RawSamBuilder::new();
b.sequence(b"AAAAAAAAAA")
.qualities(&[30; 10])
.flags(flags)
.ref_id(0)
.pos(i32::try_from(start).expect("start fits i32") - 1)
.cigar_ops(&[encode_op(0, 10)])
.mate_ref_id(0)
.mate_pos(i32::try_from(mate_start).expect("mate_start fits i32") - 1)
.template_length(template_len);
b.build()
}
#[test]
fn test_format_insert_string_unmapped_returns_na() {
let header = create_test_header();
let mut b = RawSamBuilder::new();
b.sequence(b"ACGT")
.qualities(&[30; 4])
.flags(raw_flags::PAIRED | raw_flags::UNMAPPED | raw_flags::FIRST_SEGMENT);
let record: RawRecord = b.build();
let result = format_insert_string(&record, &header);
assert_eq!(result, "NA");
}
#[test]
fn test_format_insert_string_mate_unmapped_returns_na() {
let header = create_test_header();
let mut record = create_paired_read(100, 200, false, true, true, 101);
let flags = record.flags() | raw_flags::MATE_UNMAPPED;
record.set_flags(flags);
let result = format_insert_string(&record, &header);
assert_eq!(result, "NA");
}
#[test]
fn test_format_insert_string_same_orientation_returns_na() {
let header = create_test_header();
let record = create_paired_read(100, 200, false, false, true, 101);
let result = format_insert_string(&record, &header);
assert_eq!(result, "NA");
let record = create_paired_read(100, 200, true, true, true, -101);
let result = format_insert_string(&record, &header);
assert_eq!(result, "NA");
}
#[test]
fn test_format_insert_string_cross_contig_returns_na() {
let header = create_test_header();
let mut record = create_paired_read(100, 200, false, true, true, 101);
record.set_mate_ref_id(1);
let result = format_insert_string(&record, &header);
assert_eq!(result, "NA");
}
#[test]
fn test_format_insert_string_fr_pair_f1r2() {
let header = create_test_header();
let record = create_paired_read(100, 191, false, true, true, 101);
let result = format_insert_string(&record, &header);
assert_eq!(result, "chr1:100-200 | F1R2");
}
#[test]
fn test_format_insert_string_fr_pair_f2r1() {
let header = create_test_header();
let record = create_paired_read(191, 100, true, false, true, -101);
let result = format_insert_string(&record, &header);
assert_eq!(result, "chr1:100-200 | F2R1");
}
#[test]
fn test_format_insert_string_second_of_pair_forward() {
let header = create_test_header();
let record = create_paired_read(100, 191, false, true, false, 101);
let result = format_insert_string(&record, &header);
assert_eq!(result, "chr1:100-200 | F2R1");
}
#[test]
fn test_format_insert_string_second_of_pair_reverse() {
let header = create_test_header();
let record = create_paired_read(191, 100, true, false, false, -101);
let result = format_insert_string(&record, &header);
assert_eq!(result, "chr1:100-200 | F1R2");
}
#[test]
fn test_format_insert_string_rf_pair_returns_na() {
let header = create_test_header();
let record_fwd = create_paired_read(200, 100, false, true, true, -101);
assert_eq!(format_insert_string(&record_fwd, &header), "NA");
let record_rev = create_paired_read(100, 200, true, false, true, 101);
assert_eq!(format_insert_string(&record_rev, &header), "NA");
}
#[test]
fn test_format_insert_string_opposite_strand_tlen_zero_returns_na() {
let header = create_test_header();
let record = create_paired_read(100, 100, false, true, true, 0);
assert_eq!(format_insert_string(&record, &header), "NA");
}
}