use std::sync::{Arc, Mutex};
use std::io::{self, Write};
use crate::pos::{PosT, offset, is_rev, make_pos_t, incr_pos};
use crate::seqindex::SeqIndex;
use crate::links::RankSelectBitVector;
use crate::mmap::mmap_open;
use iitree_rs::IITree;
pub fn emit_gfa<W: Write>(
out: &mut W,
_graph_length: usize,
seq_v_file: &str,
_node_iitree: Arc<Mutex<IITree<u64, PosT>>>,
path_iitree: Arc<Mutex<IITree<u64, PosT>>>,
seq_id_cbv: &RankSelectBitVector,
seqidx: Arc<SeqIndex>,
links: &[(PosT, PosT)],
_num_threads: usize,
) -> io::Result<()> {
writeln!(out, "H\tVN:Z:1.0")?;
let mmap_handle = mmap_open(seq_v_file)?;
let seq_v_slice = unsafe {
std::slice::from_raw_parts(mmap_handle.ptr as *const u8, mmap_handle.size)
};
let n_nodes = seq_id_cbv.rank(seq_id_cbv.size() - 1);
let node_sequences: Vec<(usize, String)> = (1..=n_nodes)
.map(|id| {
let node_start = match seq_id_cbv.select(id) {
Some(pos) => pos,
None => return (id, String::new()),
};
let node_end = match seq_id_cbv.select(id + 1) {
Some(pos) => pos,
None => return (id, String::new()),
};
let node_length = node_end - node_start;
let seq = &seq_v_slice[node_start..node_start + node_length];
let seq_string = String::from_utf8_lossy(seq).to_string();
(id, seq_string)
})
.collect();
for (id, seq) in node_sequences {
if !seq.is_empty() {
writeln!(out, "S\t{}\t{}", id, seq)?;
}
}
for (from, to) in links {
if *from != 0 && *to != 0 {
writeln!(
out,
"L\t{}\t{}\t{}\t{}\t0M",
offset(*from),
if is_rev(*from) { "-" } else { "+" },
offset(*to),
if is_rev(*to) { "-" } else { "+" }
)?;
}
}
let num_seqs = seqidx.n_seqs();
for i in 1..=num_seqs {
let j_start = match seqidx.nth_seq_offset(i) {
Some(offset) => offset,
None => continue,
};
let seq_len = match seqidx.nth_seq_length(i) {
Some(len) => len,
None => continue,
};
let k = j_start + seq_len;
let mut path_v: Vec<PosT> = Vec::new();
let mut seen_bp = 0u64;
let mut j = j_start;
while j < k {
let mut overlap_count = 0u64;
let mut ovlp_start_in_q = 0u64;
let mut ovlp_end_in_q = 0u64;
let mut pos_start_in_s = 0u64;
if let Ok(path_guard) = path_iitree.lock() {
path_guard.overlap(j, j + 1, |_idx, start, end, pos| {
overlap_count += 1;
ovlp_start_in_q = start;
ovlp_end_in_q = end;
pos_start_in_s = pos;
}).ok();
}
if overlap_count != 1 {
let seq_name = seqidx.nth_name(i).unwrap_or_else(|| "<unknown>".to_string());
return Err(io::Error::new(
io::ErrorKind::Other,
format!("[gfa] error: found {} overlaps for seq {} idx {} at j={} of {}",
overlap_count, seq_name, i, j, k)
));
}
let match_is_rev = is_rev(pos_start_in_s);
let length = ovlp_end_in_q - ovlp_start_in_q;
let mut p = pos_start_in_s;
for _ in 0..length {
let p_offset = offset(p) as usize;
if seq_id_cbv.select(1).is_some() && p_offset < seq_id_cbv.size() {
let node_id = seq_id_cbv.rank(p_offset + 1);
if node_id > 0 {
let node_start = seq_id_cbv.select(node_id);
if let Some(start_pos) = node_start {
if start_pos == p_offset {
path_v.push(make_pos_t(node_id as u64, match_is_rev));
}
}
}
}
incr_pos(&mut p);
}
seen_bp += length;
j = ovlp_end_in_q;
}
if seen_bp != seq_len {
let seq_name = seqidx.nth_name(i).unwrap_or_else(|| "<unknown>".to_string());
return Err(io::Error::new(
io::ErrorKind::Other,
format!("[gfa] length mismatch for {}, expected {} but got {}",
seq_name, seq_len, seen_bp)
));
}
let seq_name = seqidx.nth_name(i).unwrap_or_else(|| format!("seq{}", i));
write!(out, "P\t{}\t", seq_name)?;
for (idx, p) in path_v.iter().enumerate() {
if idx > 0 {
write!(out, ",")?;
}
write!(out, "{}{}", offset(*p), if is_rev(*p) { "-" } else { "+" })?;
}
writeln!(out, "\t*")?;
}
drop(mmap_handle);
Ok(())
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_gfa_header() {
let mut output = Vec::new();
write!(&mut output, "H\tVN:Z:1.0\n").unwrap();
let result = String::from_utf8(output).unwrap();
assert_eq!(result, "H\tVN:Z:1.0\n");
}
}