use crate::contig::ContigSequence;
use std::io::Write;
pub fn write_gfa<W: Write>(
out: &mut W,
contigs: &[ContigSequence],
min_contig: usize,
) -> std::io::Result<()> {
writeln!(out, "H\tVN:Z:1.0")?;
let mut seg_num = 0;
let mut segments: Vec<(usize, String)> = Vec::new();
for contig in contigs {
if contig.len_min() < min_contig {
continue;
}
seg_num += 1;
let seq = contig.primary_sequence();
let len = seq.len();
writeln!(out, "S\tContig_{}\t{}\tLN:i:{}", seg_num, seq, len)?;
segments.push((seg_num, seq));
}
let rc_segments: Vec<String> = segments
.iter()
.map(|(_, seq)| seq.chars().rev().map(crate::model::complement).collect())
.collect();
for i in 0..segments.len() {
for j in 0..segments.len() {
if i == j {
continue;
}
let (id_a, ref seq_a) = segments[i];
let (id_b, ref seq_b) = segments[j];
let max_overlap = seq_a.len().min(seq_b.len()).min(1000);
for overlap in (20..=max_overlap).rev() {
if seq_a.ends_with(&seq_b[..overlap]) {
writeln!(
out,
"L\tContig_{}\t+\tContig_{}\t+\t{}M",
id_a, id_b, overlap
)?;
break;
}
}
if i < j {
let rc_b = &rc_segments[j];
for overlap in (20..=max_overlap).rev() {
if seq_a.ends_with(&rc_b[..overlap]) {
writeln!(
out,
"L\tContig_{}\t+\tContig_{}\t-\t{}M",
id_a, id_b, overlap
)?;
break;
}
}
}
}
}
Ok(())
}
#[derive(Clone, Debug, PartialEq, Eq)]
pub struct VarNum {
data: Vec<u32>,
}
impl VarNum {
pub fn new(n: u32) -> Self {
VarNum { data: vec![n] }
}
pub fn zero() -> Self {
VarNum { data: vec![0] }
}
pub fn add_assign(&mut self, other: &VarNum) {
let mut overflow: u64 = 0;
for i in 0..other.data.len().max(self.data.len()) {
if i == self.data.len() {
self.data.push(0);
}
overflow += self.data[i] as u64;
if i < other.data.len() {
overflow += other.data[i] as u64;
}
self.data[i] = overflow as u32;
overflow >>= 32;
}
if overflow > 0 {
self.data.push(overflow as u32);
}
}
}
impl PartialOrd for VarNum {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
Some(self.cmp(other))
}
}
impl Ord for VarNum {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
let n = self.data.len();
let m = other.data.len();
let eff_n = self.data.iter().rposition(|&x| x != 0).map_or(1, |p| p + 1);
let eff_m = other
.data
.iter()
.rposition(|&x| x != 0)
.map_or(1, |p| p + 1);
if eff_n != eff_m {
return eff_n.cmp(&eff_m);
}
for i in (0..eff_n).rev() {
let a = if i < n { self.data[i] } else { 0 };
let b = if i < m { other.data[i] } else { 0 };
if a != b {
return a.cmp(&b);
}
}
std::cmp::Ordering::Equal
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_write_gfa_basic() {
let mut c1 = ContigSequence::new();
c1.insert_new_chunk_with("ACGTACGTACGTACGTACGTACGT".chars().collect());
let mut c2 = ContigSequence::new();
c2.insert_new_chunk_with("TTTTTTTTTTTTTTTTTTTTTTTT".chars().collect());
let mut output = Vec::new();
write_gfa(&mut output, &[c1, c2], 10).unwrap();
let result = String::from_utf8(output).unwrap();
assert!(result.contains("H\tVN:Z:1.0"));
assert!(result.contains("S\tContig_1"));
assert!(result.contains("S\tContig_2"));
}
#[test]
fn test_write_gfa_forward_overlap_link() {
let mut c1 = ContigSequence::new();
c1.insert_new_chunk_with("GGGGAAAAAAAAAAAAAAAAAAAA".chars().collect());
let mut c2 = ContigSequence::new();
c2.insert_new_chunk_with("AAAAAAAAAAAAAAAAAAAATTTT".chars().collect());
let mut output = Vec::new();
write_gfa(&mut output, &[c1, c2], 1).unwrap();
let result = String::from_utf8(output).unwrap();
assert!(
result.contains(
"L Contig_1 + Contig_2 + 20M
"
),
"{result}"
);
}
#[test]
fn test_write_gfa_reverse_complement_overlap_link() {
let mut c1 = ContigSequence::new();
c1.insert_new_chunk_with("GGGGAAAAAAAAAAAAAAAAAAAA".chars().collect());
let mut c2 = ContigSequence::new();
c2.insert_new_chunk_with("CCCCTTTTTTTTTTTTTTTTTTTT".chars().collect());
let mut output = Vec::new();
write_gfa(&mut output, &[c1, c2], 1).unwrap();
let result = String::from_utf8(output).unwrap();
assert!(
result.contains(
"L Contig_1 + Contig_2 - 20M
"
),
"{result}"
);
}
#[test]
fn test_varnum_addition() {
let mut a = VarNum::new(u32::MAX);
let b = VarNum::new(1);
a.add_assign(&b);
assert_eq!(a.data, vec![0, 1]); }
#[test]
fn test_varnum_ordering() {
assert!(VarNum::new(5) > VarNum::new(3));
assert!(VarNum::new(0) < VarNum::new(1));
let mut big = VarNum::new(0);
big.data = vec![0, 1]; assert!(big > VarNum::new(u32::MAX));
}
}