use std::fmt;
use serde::{Deserialize, Serialize};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
pub enum CigarOp {
Match,
Insertion,
Deletion,
Skip,
SoftClip,
HardClip,
SeqMatch,
SeqMismatch,
Pad,
}
impl fmt::Display for CigarOp {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(
f,
"{}",
match self {
CigarOp::Match => 'M',
CigarOp::Insertion => 'I',
CigarOp::Deletion => 'D',
CigarOp::Skip => 'N',
CigarOp::SoftClip => 'S',
CigarOp::HardClip => 'H',
CigarOp::SeqMatch => '=',
CigarOp::SeqMismatch => 'X',
CigarOp::Pad => 'P',
}
)
}
}
#[derive(Debug, Clone, Copy)]
pub struct CigarElement {
pub op: CigarOp,
pub len: u32,
}
impl fmt::Display for CigarElement {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}{}", self.len, self.op)
}
}
#[derive(Debug, Clone)]
pub struct CigarString {
pub elements: Vec<CigarElement>,
}
impl CigarString {
pub fn from_ops(ops: &[(CigarOp, u32)]) -> Self {
let mut elements = Vec::new();
if ops.is_empty() {
return CigarString { elements };
}
let (mut current_op, mut current_len) = ops[0];
for &(op, len) in &ops[1..] {
if op == current_op {
current_len += len;
} else {
elements.push(CigarElement {
op: current_op,
len: current_len,
});
current_op = op;
current_len = len;
}
}
elements.push(CigarElement {
op: current_op,
len: current_len,
});
CigarString { elements }
}
pub fn parse(s: &str) -> Result<Self, String> {
let mut elements = Vec::new();
let mut current_len = String::new();
for ch in s.chars() {
match ch {
'0'..='9' => current_len.push(ch),
'M' | 'I' | 'D' | 'N' | 'S' | 'H' | '=' | 'X' | 'P' => {
let len = current_len
.parse::<u32>()
.map_err(|_| format!("Invalid CIGAR length: {}", current_len))?;
let op = match ch {
'M' => CigarOp::Match,
'I' => CigarOp::Insertion,
'D' => CigarOp::Deletion,
'N' => CigarOp::Skip,
'S' => CigarOp::SoftClip,
'H' => CigarOp::HardClip,
'=' => CigarOp::SeqMatch,
'X' => CigarOp::SeqMismatch,
'P' => CigarOp::Pad,
_ => unreachable!(),
};
elements.push(CigarElement { op, len });
current_len.clear();
}
_ => return Err(format!("Invalid CIGAR character: {}", ch)),
}
}
if !current_len.is_empty() {
return Err("CIGAR string ended with incomplete operation".to_string());
}
Ok(CigarString { elements })
}
pub fn to_string_sam(&self) -> String {
self.elements
.iter()
.map(|elem| elem.to_string())
.collect::<Vec<_>>()
.join("")
}
pub fn query_len(&self) -> u32 {
self.elements
.iter()
.map(|elem| match elem.op {
CigarOp::Match
| CigarOp::Insertion
| CigarOp::SoftClip
| CigarOp::SeqMatch
| CigarOp::SeqMismatch => elem.len,
_ => 0,
})
.sum()
}
pub fn reference_len(&self) -> u32 {
self.elements
.iter()
.map(|elem| match elem.op {
CigarOp::Match
| CigarOp::Deletion
| CigarOp::Skip
| CigarOp::SeqMatch
| CigarOp::SeqMismatch => elem.len,
_ => 0,
})
.sum()
}
}
impl fmt::Display for CigarString {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.to_string_sam())
}
}
#[derive(Debug, Clone, Copy)]
pub enum BacktrackOp {
Diagonal,
Up,
Left,
}
pub fn traceback_to_cigar(
backtrack_matrix: &[Vec<BacktrackOp>],
query: &[u8],
reference: &[u8],
mut query_pos: usize,
mut ref_pos: usize,
) -> CigarString {
let mut ops = Vec::new();
while query_pos > 0 || ref_pos > 0 {
let op = if query_pos > 0 && ref_pos > 0 {
backtrack_matrix[query_pos][ref_pos]
} else if query_pos > 0 {
BacktrackOp::Up
} else {
BacktrackOp::Left
};
match op {
BacktrackOp::Diagonal => {
let cigar_op = if query[query_pos - 1] == reference[ref_pos - 1] {
CigarOp::SeqMatch
} else {
CigarOp::SeqMismatch
};
ops.push((cigar_op, 1));
query_pos -= 1;
ref_pos -= 1;
}
BacktrackOp::Up => {
ops.push((CigarOp::Insertion, 1));
query_pos -= 1;
}
BacktrackOp::Left => {
ops.push((CigarOp::Deletion, 1));
ref_pos -= 1;
}
}
}
ops.reverse();
CigarString::from_ops(&ops)
}
pub fn cigar_from_hmm_path(path: &[u8], _length: usize) -> CigarString {
let mut ops = Vec::new();
let mut current_op = None;
let mut current_len = 0u32;
for &state in path {
let new_op = match state {
0 => CigarOp::SeqMatch, 1 => CigarOp::Insertion, 2 => CigarOp::Deletion, _ => CigarOp::Match,
};
if let Some(op) = current_op {
if op == new_op {
current_len += 1;
} else {
ops.push((op, current_len));
current_op = Some(new_op);
current_len = 1;
}
} else {
current_op = Some(new_op);
current_len = 1;
}
}
if let Some(op) = current_op {
ops.push((op, current_len));
}
CigarString::from_ops(&ops)
}
pub fn query_position_from_cigar(cigar: &CigarString, bp: u32) -> Option<u32> {
let mut pos = 0;
for elem in &cigar.elements {
let elem_len = elem.len;
match elem.op {
CigarOp::Match | CigarOp::Insertion | CigarOp::SoftClip | CigarOp::SeqMatch | CigarOp::SeqMismatch => {
if pos + elem_len >= bp {
return Some(pos + (bp - pos).min(elem_len));
}
pos += elem_len;
}
_ => {}
}
}
None
}
pub fn reference_position_from_cigar(cigar: &CigarString, bp: u32) -> Option<u32> {
let mut pos = 0;
for elem in &cigar.elements {
let elem_len = elem.len;
match elem.op {
CigarOp::Match | CigarOp::Deletion | CigarOp::Skip | CigarOp::SeqMatch | CigarOp::SeqMismatch => {
if pos + elem_len >= bp {
return Some(pos + (bp - pos).min(elem_len));
}
pos += elem_len;
}
_ => {}
}
}
None
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_cigar_parsing() {
let cigar_str = "10M2I5D";
let cigar = CigarString::parse(cigar_str).unwrap();
assert_eq!(cigar.elements.len(), 3);
assert_eq!(cigar.elements[0].len, 10);
assert_eq!(cigar.elements[1].op, CigarOp::Insertion);
}
#[test]
fn test_cigar_lengths() {
let ops = vec![
(CigarOp::Match, 10),
(CigarOp::Insertion, 2),
(CigarOp::Deletion, 5),
];
let cigar = CigarString::from_ops(&ops);
assert_eq!(cigar.query_len(), 12);
assert_eq!(cigar.reference_len(), 15);
}
#[test]
fn test_cigar_generation() {
let ops = vec![(CigarOp::Match, 5), (CigarOp::Insertion, 2)];
let cigar = CigarString::from_ops(&ops);
assert_eq!(cigar.to_string_sam(), "5M2I");
}
#[test]
fn test_hmm_cigar_generation() {
let path = vec![0, 0, 0, 1, 1, 0, 0, 2];
let cigar = cigar_from_hmm_path(&path, 8);
assert!(!cigar.elements.is_empty());
}
}