use fgumi_raw_bam::fields::{aux_data_offset_from_record, tag_value_size};
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct RawCompareResult {
pub core_match: bool,
pub tags_match: bool,
pub tag_order_match: bool,
}
#[must_use]
pub fn raw_records_byte_equal(r1: &[u8], r2: &[u8]) -> bool {
r1 == r2
}
const BIN_OFFSET: usize = 10;
const BIN_LEN: usize = 2;
#[must_use]
pub fn raw_core_fields_equal(r1: &[u8], r2: &[u8]) -> bool {
let (Some(off1), Some(off2)) =
(aux_data_offset_from_record(r1), aux_data_offset_from_record(r2))
else {
return false;
};
let end1 = off1.min(r1.len());
let end2 = off2.min(r2.len());
if end1 != end2 {
return false;
}
r1[..BIN_OFFSET] == r2[..BIN_OFFSET]
&& r1[BIN_OFFSET + BIN_LEN..end1] == r2[BIN_OFFSET + BIN_LEN..end2]
}
#[must_use]
pub fn raw_tags_byte_equal(r1: &[u8], r2: &[u8]) -> bool {
let aux1 = fgumi_raw_bam::fields::aux_data_slice(r1);
let aux2 = fgumi_raw_bam::fields::aux_data_slice(r2);
aux1 == aux2
}
fn collect_tag_entries(aux_data: &[u8]) -> Option<Vec<([u8; 2], usize, usize)>> {
let mut entries = Vec::new();
let mut i = 0;
while i < aux_data.len() {
if i + 3 > aux_data.len() {
return None;
}
let tag_name = [aux_data[i], aux_data[i + 1]];
let val_type = aux_data[i + 2];
let val_start = i + 2; let data_start = i + 3;
let size = tag_value_size(val_type, aux_data.get(data_start..)?)?;
let val_end = data_start + size;
entries.push((tag_name, val_start, val_end));
i = val_end;
}
Some(entries)
}
fn is_int_type(t: u8) -> bool {
matches!(t, b'c' | b'C' | b's' | b'S' | b'i' | b'I')
}
fn decode_int_tag(type_byte: u8, data: &[u8]) -> Option<i64> {
match type_byte {
b'c' => data.first().map(|&b| i64::from(b as i8)),
b'C' => data.first().map(|&b| i64::from(b)),
b's' => data.get(..2).map(|b| i64::from(i16::from_le_bytes([b[0], b[1]]))),
b'S' => data.get(..2).map(|b| i64::from(u16::from_le_bytes([b[0], b[1]]))),
b'i' => data.get(..4).map(|b| i64::from(i32::from_le_bytes([b[0], b[1], b[2], b[3]]))),
b'I' => data.get(..4).map(|b| i64::from(u32::from_le_bytes([b[0], b[1], b[2], b[3]]))),
_ => None,
}
}
#[must_use]
pub fn raw_tags_equal_order_independent(r1: &[u8], r2: &[u8]) -> bool {
let aux1 = fgumi_raw_bam::fields::aux_data_slice(r1);
let aux2 = fgumi_raw_bam::fields::aux_data_slice(r2);
let (Some(entries1), Some(entries2)) = (collect_tag_entries(aux1), collect_tag_entries(aux2))
else {
return false;
};
if entries1.len() != entries2.len() {
return false;
}
for &(tag1, start1, end1) in &entries1 {
let Some(&(_, start2, end2)) = entries2.iter().find(|(t, _, _)| *t == tag1) else {
return false;
};
if aux1[start1..end1] == aux2[start2..end2] {
continue;
}
let type1 = aux1[start1];
let type2 = aux2[start2];
if is_int_type(type1) && is_int_type(type2) {
let data1 = &aux1[start1 + 1..end1];
let data2 = &aux2[start2 + 1..end2];
match (decode_int_tag(type1, data1), decode_int_tag(type2, data2)) {
(Some(v1), Some(v2)) if v1 == v2 => continue,
_ => return false,
}
} else {
return false;
}
}
true
}
#[must_use]
pub fn raw_compare_structured(r1: &[u8], r2: &[u8]) -> RawCompareResult {
let core_match = raw_core_fields_equal(r1, r2);
let tags_match = raw_tags_byte_equal(r1, r2);
let tag_order_match = if tags_match { true } else { raw_tags_equal_order_independent(r1, r2) };
RawCompareResult { core_match, tags_match, tag_order_match }
}
#[cfg(test)]
mod tests {
use super::*;
use crate::sam::SamTag;
use fgumi_raw_bam::testutil::make_bam_bytes;
fn make_z_tag(tag: [u8; 2], value: &[u8]) -> Vec<u8> {
let mut aux = vec![tag[0], tag[1], b'Z'];
aux.extend_from_slice(value);
aux.push(0); aux
}
fn make_i_tag(tag: [u8; 2], value: i32) -> Vec<u8> {
let mut aux = vec![tag[0], tag[1], b'i'];
aux.extend_from_slice(&value.to_le_bytes());
aux
}
fn make_c_tag(tag: [u8; 2], value: u8) -> Vec<u8> {
vec![tag[0], tag[1], b'C', value]
}
fn make_s_tag(tag: [u8; 2], value: i16) -> Vec<u8> {
let mut aux = vec![tag[0], tag[1], b's'];
aux.extend_from_slice(&value.to_le_bytes());
aux
}
fn make_upper_s_tag(tag: [u8; 2], value: u16) -> Vec<u8> {
let mut aux = vec![tag[0], tag[1], b'S'];
aux.extend_from_slice(&value.to_le_bytes());
aux
}
fn make_upper_i_tag(tag: [u8; 2], value: u32) -> Vec<u8> {
let mut aux = vec![tag[0], tag[1], b'I'];
aux.extend_from_slice(&value.to_le_bytes());
aux
}
fn make_f_tag(tag: [u8; 2], value: f32) -> Vec<u8> {
let mut aux = vec![tag[0], tag[1], b'f'];
aux.extend_from_slice(&value.to_le_bytes());
aux
}
fn base_record(aux: &[u8]) -> Vec<u8> {
make_bam_bytes(1, 100, 0x41, b"rea", &[], 4, 1, 200, aux)
}
#[test]
fn test_byte_equal_identical() {
let bytes = base_record(&[]);
assert!(raw_records_byte_equal(&bytes, &bytes));
}
#[test]
fn test_byte_equal_different() {
let r1 = base_record(&[]);
let r2 = make_bam_bytes(2, 100, 0x41, b"rea", &[], 4, 1, 200, &[]);
assert!(!raw_records_byte_equal(&r1, &r2));
}
#[test]
fn test_byte_equal_empty() {
let r1: &[u8] = &[];
let r2: &[u8] = &[];
assert!(raw_records_byte_equal(r1, r2));
}
#[test]
fn test_core_equal_identical_records() {
let rec = base_record(&[]);
assert!(raw_core_fields_equal(&rec, &rec));
}
#[test]
fn test_core_equal_same_core_different_tags() {
let aux1 = make_z_tag(*SamTag::RG, b"sample1");
let aux2 = make_z_tag(*SamTag::RG, b"sample2");
let r1 = base_record(&aux1);
let r2 = base_record(&aux2);
assert!(raw_core_fields_equal(&r1, &r2));
}
#[test]
fn test_core_not_equal_different_pos() {
let r1 = make_bam_bytes(1, 100, 0, b"rea", &[], 4, 1, 200, &[]);
let r2 = make_bam_bytes(1, 200, 0, b"rea", &[], 4, 1, 200, &[]);
assert!(!raw_core_fields_equal(&r1, &r2));
}
#[test]
fn test_core_not_equal_different_flags() {
let r1 = make_bam_bytes(1, 100, 0, b"rea", &[], 4, 1, 200, &[]);
let r2 = make_bam_bytes(1, 100, 0x10, b"rea", &[], 4, 1, 200, &[]);
assert!(!raw_core_fields_equal(&r1, &r2));
}
#[test]
fn test_core_equal_short_record_returns_false() {
let short = vec![0u8; 10];
let rec = base_record(&[]);
assert!(!raw_core_fields_equal(&short, &rec));
}
#[test]
fn test_core_equal_different_bin_values() {
let mut r1 = base_record(&[]);
let mut r2 = base_record(&[]);
r1[10..12].copy_from_slice(&100u16.to_le_bytes());
r2[10..12].copy_from_slice(&200u16.to_le_bytes());
assert!(raw_core_fields_equal(&r1, &r2));
}
#[test]
fn test_tags_byte_equal_no_tags() {
let r1 = base_record(&[]);
let r2 = base_record(&[]);
assert!(raw_tags_byte_equal(&r1, &r2));
}
#[test]
fn test_tags_byte_equal_same_tags() {
let aux = make_z_tag(*SamTag::RG, b"sample1");
let r1 = base_record(&aux);
let r2 = base_record(&aux);
assert!(raw_tags_byte_equal(&r1, &r2));
}
#[test]
fn test_tags_byte_not_equal_different_values() {
let r1 = base_record(&make_z_tag(*SamTag::RG, b"sample1"));
let r2 = base_record(&make_z_tag(*SamTag::RG, b"sample2"));
assert!(!raw_tags_byte_equal(&r1, &r2));
}
#[test]
fn test_tags_byte_not_equal_different_order() {
let mut aux1 = make_z_tag(*SamTag::RG, b"s1");
aux1.extend_from_slice(&make_i_tag(*b"NM", 5));
let mut aux2 = make_i_tag(*b"NM", 5);
aux2.extend_from_slice(&make_z_tag(*SamTag::RG, b"s1"));
let r1 = base_record(&aux1);
let r2 = base_record(&aux2);
assert!(!raw_tags_byte_equal(&r1, &r2));
}
#[test]
fn test_order_independent_same_order() {
let mut aux = make_z_tag(*SamTag::RG, b"s1");
aux.extend_from_slice(&make_i_tag(*b"NM", 5));
let r1 = base_record(&aux);
let r2 = base_record(&aux);
assert!(raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_different_order() {
let mut aux1 = make_z_tag(*SamTag::RG, b"s1");
aux1.extend_from_slice(&make_i_tag(*b"NM", 5));
let mut aux2 = make_i_tag(*b"NM", 5);
aux2.extend_from_slice(&make_z_tag(*SamTag::RG, b"s1"));
let r1 = base_record(&aux1);
let r2 = base_record(&aux2);
assert!(raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_different_values() {
let r1 = base_record(&make_i_tag(*b"NM", 5));
let r2 = base_record(&make_i_tag(*b"NM", 10));
assert!(!raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_missing_tag() {
let mut aux1 = make_z_tag(*SamTag::RG, b"s1");
aux1.extend_from_slice(&make_i_tag(*b"NM", 5));
let aux2 = make_z_tag(*SamTag::RG, b"s1");
let r1 = base_record(&aux1);
let r2 = base_record(&aux2);
assert!(!raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_extra_tag() {
let aux1 = make_i_tag(*b"NM", 5);
let mut aux2 = make_i_tag(*b"NM", 5);
aux2.extend_from_slice(&make_z_tag(*SamTag::RG, b"s1"));
let r1 = base_record(&aux1);
let r2 = base_record(&aux2);
assert!(!raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_no_tags() {
let r1 = base_record(&[]);
let r2 = base_record(&[]);
assert!(raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_multiple_types() {
let mut aux1 = make_z_tag(*SamTag::RG, b"grp");
aux1.extend_from_slice(&make_i_tag(*b"NM", 3));
aux1.extend_from_slice(&make_c_tag(*b"MQ", 42));
aux1.extend_from_slice(&make_f_tag(*b"GC", 0.75));
let mut aux2 = make_f_tag(*b"GC", 0.75);
aux2.extend_from_slice(&make_c_tag(*b"MQ", 42));
aux2.extend_from_slice(&make_z_tag(*SamTag::RG, b"grp"));
aux2.extend_from_slice(&make_i_tag(*b"NM", 3));
let r1 = base_record(&aux1);
let r2 = base_record(&aux2);
assert!(raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_short_record() {
let short = vec![0u8; 10];
let rec = base_record(&make_i_tag(*b"NM", 5));
assert!(!raw_tags_equal_order_independent(&short, &rec));
}
#[test]
fn test_structured_identical() {
let aux = make_i_tag(*b"NM", 5);
let rec = base_record(&aux);
let result = raw_compare_structured(&rec, &rec);
assert_eq!(
result,
RawCompareResult { core_match: true, tags_match: true, tag_order_match: true }
);
}
#[test]
fn test_structured_different_core() {
let r1 = make_bam_bytes(1, 100, 0, b"rea", &[], 4, 1, 200, &[]);
let r2 = make_bam_bytes(1, 999, 0, b"rea", &[], 4, 1, 200, &[]);
let result = raw_compare_structured(&r1, &r2);
assert!(!result.core_match);
assert!(result.tags_match); }
#[test]
fn test_structured_same_core_different_tag_order() {
let mut aux1 = make_z_tag(*SamTag::RG, b"s1");
aux1.extend_from_slice(&make_i_tag(*b"NM", 5));
let mut aux2 = make_i_tag(*b"NM", 5);
aux2.extend_from_slice(&make_z_tag(*SamTag::RG, b"s1"));
let r1 = base_record(&aux1);
let r2 = base_record(&aux2);
let result = raw_compare_structured(&r1, &r2);
assert!(result.core_match);
assert!(!result.tags_match);
assert!(result.tag_order_match);
}
#[test]
fn test_structured_different_tags() {
let r1 = base_record(&make_i_tag(*b"NM", 5));
let r2 = base_record(&make_i_tag(*b"NM", 10));
let result = raw_compare_structured(&r1, &r2);
assert!(result.core_match);
assert!(!result.tags_match);
assert!(!result.tag_order_match);
}
#[test]
fn test_structured_short_records() {
let short = vec![0u8; 10];
let rec = base_record(&[]);
let result = raw_compare_structured(&short, &rec);
assert!(!result.core_match);
}
#[test]
fn test_structured_different_bin_different_tag_order() {
let mut aux1 = make_z_tag(*SamTag::RG, b"s1");
aux1.extend_from_slice(&make_i_tag(*b"NM", 5));
let mut aux2 = make_i_tag(*b"NM", 5);
aux2.extend_from_slice(&make_z_tag(*SamTag::RG, b"s1"));
let mut r1 = base_record(&aux1);
let mut r2 = base_record(&aux2);
r1[10..12].copy_from_slice(&100u16.to_le_bytes());
r2[10..12].copy_from_slice(&200u16.to_le_bytes());
let result = raw_compare_structured(&r1, &r2);
assert!(result.core_match);
assert!(!result.tags_match);
assert!(result.tag_order_match);
}
#[test]
fn test_collect_entries_empty() {
let entries =
collect_tag_entries(&[]).expect("collect_tag_entries should succeed for empty input");
assert!(entries.is_empty());
}
#[test]
fn test_collect_entries_single_int() {
let aux = make_i_tag(*b"NM", 42);
let entries = collect_tag_entries(&aux)
.expect("collect_tag_entries should succeed for single int tag");
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].0, *b"NM");
}
#[test]
fn test_collect_entries_multiple() {
let mut aux = make_z_tag(*SamTag::RG, b"grp");
aux.extend_from_slice(&make_i_tag(*b"NM", 5));
let entries = collect_tag_entries(&aux)
.expect("collect_tag_entries should succeed for multiple tags");
assert_eq!(entries.len(), 2);
assert_eq!(entries[0].0, *SamTag::RG);
assert_eq!(entries[1].0, *b"NM");
}
#[test]
fn test_collect_entries_truncated_returns_none() {
let aux = [b'N', b'M'];
assert!(collect_tag_entries(&aux).is_none());
}
#[test]
fn test_collect_entries_b_array_tag() {
let aux = fgumi_raw_bam::testutil::make_b_int_array_tag(*b"XA", &[1, 2, 3]);
let entries =
collect_tag_entries(&aux).expect("collect_tag_entries should succeed for B-array tag");
assert_eq!(entries.len(), 1);
assert_eq!(entries[0].0, *b"XA");
assert_eq!(entries[0].2, aux.len());
}
#[test]
fn test_order_independent_same_value_different_int_types_u8_vs_i16() {
let r1 = base_record(&make_c_tag(*b"cD", 158));
let r2 = base_record(&make_s_tag(*b"cD", 158));
assert!(raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_same_value_different_int_types_u8_vs_i32() {
let r1 = base_record(&make_c_tag(*b"NM", 42));
let r2 = base_record(&make_i_tag(*b"NM", 42));
assert!(raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_same_value_different_int_types_i16_vs_i32() {
let r1 = base_record(&make_s_tag(*b"MQ", 300));
let r2 = base_record(&make_i_tag(*b"MQ", 300));
assert!(raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_same_value_different_int_types_u32_vs_i32() {
let r1 = base_record(&make_upper_i_tag(*b"NM", 1000));
let r2 = base_record(&make_i_tag(*b"NM", 1000));
assert!(raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_same_value_different_int_types_u32_vs_u8() {
let r1 = base_record(&make_upper_i_tag(*b"NM", 42));
let r2 = base_record(&make_c_tag(*b"NM", 42));
assert!(raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_different_values_different_int_types() {
let r1 = base_record(&make_c_tag(*b"NM", 5));
let r2 = base_record(&make_i_tag(*b"NM", 10));
assert!(!raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_int_vs_non_int_same_tag_not_equal() {
let r1 = base_record(&make_c_tag(*b"XY", 65)); let r2 = base_record(&make_z_tag(*b"XY", b"A"));
assert!(!raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_structured_semantic_int_tags_match() {
let r1 = base_record(&make_c_tag(*b"cD", 158));
let r2 = base_record(&make_s_tag(*b"cD", 158));
let result = raw_compare_structured(&r1, &r2);
assert!(result.core_match);
assert!(!result.tags_match); assert!(result.tag_order_match); }
#[test]
fn test_order_independent_semantic_int_with_reordered_tags() {
let mut aux1 = make_z_tag(*SamTag::RG, b"grp");
aux1.extend_from_slice(&make_c_tag(*b"cD", 200));
aux1.extend_from_slice(&make_i_tag(*b"NM", 5));
let mut aux2 = make_i_tag(*b"NM", 5);
aux2.extend_from_slice(&make_upper_s_tag(*b"cD", 200));
aux2.extend_from_slice(&make_z_tag(*SamTag::RG, b"grp"));
let r1 = base_record(&aux1);
let r2 = base_record(&aux2);
assert!(raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_negative_value_across_types() {
let r1 = base_record(&[b'X', b'N', b'c', (-100i8) as u8]);
let r2 = base_record(&make_s_tag(*b"XN", -100));
assert!(raw_tags_equal_order_independent(&r1, &r2));
}
#[test]
fn test_order_independent_same_bytes_different_sign_interpretation() {
let r1 = base_record(&[b'X', b'V', b'c', 0xFF]); let r2 = base_record(&make_c_tag(*b"XV", 255)); assert!(!raw_tags_equal_order_independent(&r1, &r2));
}
}