use crate::implementation::{
initialise_logging, HeapType, MatchtigEdgeData, NodeWeightArrayType, PerformanceDataType,
TigAlgorithm,
};
use crate::{
EulertigAlgorithm, EulertigAlgorithmConfiguration, GreedytigAlgorithm,
GreedytigAlgorithmConfiguration, MatchtigAlgorithm, MatchtigAlgorithmConfiguration,
PathtigAlgorithm,
};
use disjoint_sets::UnionFind;
use genome_graph::bigraph::implementation::node_bigraph_wrapper::PetBigraph;
use genome_graph::bigraph::interface::dynamic_bigraph::DynamicBigraph;
use genome_graph::bigraph::interface::static_bigraph::{StaticBigraph, StaticEdgeCentricBigraph};
use genome_graph::bigraph::interface::BidirectedData;
use genome_graph::bigraph::traitgraph::index::GraphIndex;
use genome_graph::bigraph::traitgraph::interface::{
GraphBase, ImmutableGraphContainer, MutableGraphContainer,
};
use genome_graph::bigraph::traitgraph::traitsequence::interface::Sequence;
use log::LevelFilter;
#[cfg(unix)]
use std::ffi::OsString;
use std::ffi::{CStr, CString};
use std::os::raw::c_char;
#[cfg(unix)]
use std::os::unix::ffi::OsStringExt;
use std::path::PathBuf;
use std::time::Instant;
use traitgraph_algo::dijkstra::DijkstraWeightedEdgeData;
type MatchtigGraph = PetBigraph<(), ExternalEdgeData>;
pub struct MatchtigsData {
graph: MatchtigGraph,
union_find: UnionFind,
}
#[derive(Clone, Eq, PartialEq, Debug)]
struct ExternalEdgeData {
weight: usize,
dummy_edge_id: usize,
unitig_id: usize,
forwards: bool,
}
impl MatchtigEdgeData<usize> for ExternalEdgeData {
fn is_dummy(&self) -> bool {
self.dummy_edge_id != 0
}
fn is_forwards(&self) -> bool {
self.forwards
}
fn new(sequence_handle: usize, forwards: bool, weight: usize, dummy_id: usize) -> Self {
Self {
weight,
dummy_edge_id: dummy_id,
unitig_id: sequence_handle,
forwards,
}
}
}
impl BidirectedData for ExternalEdgeData {
fn mirror(&self) -> Self {
let mut result = self.clone();
result.forwards = !result.forwards;
result
}
}
impl DijkstraWeightedEdgeData<usize> for ExternalEdgeData {
fn weight(&self) -> usize {
self.weight
}
}
#[no_mangle]
pub extern "C" fn matchtigs_initialise() {
initialise_logging(LevelFilter::Info);
}
#[no_mangle]
pub extern "C" fn matchtigs_initialise_graph(unitig_amount: usize) -> *mut MatchtigsData {
Box::leak(Box::new(MatchtigsData {
graph: MatchtigGraph::default(),
union_find: UnionFind::new(unitig_amount * 4),
}))
}
#[inline]
const fn unitig_forward_in_node(unitig: usize) -> usize {
unitig * 4
}
#[inline]
const fn unitig_forward_out_node(unitig: usize) -> usize {
unitig * 4 + 2
}
#[inline]
const fn unitig_backward_in_node(unitig: usize) -> usize {
unitig * 4 + 3
}
#[inline]
const fn unitig_backward_out_node(unitig: usize) -> usize {
unitig * 4 + 1
}
#[no_mangle]
pub unsafe extern "C" fn matchtigs_merge_nodes(
matchtigs_data: *mut MatchtigsData,
unitig_a: usize,
strand_a: bool,
unitig_b: usize,
strand_b: bool,
) {
let out_a = if strand_a {
unitig_forward_out_node(unitig_a)
} else {
unitig_backward_out_node(unitig_a)
};
let in_b = if strand_b {
unitig_forward_in_node(unitig_b)
} else {
unitig_backward_in_node(unitig_b)
};
let mirror_in_a = if strand_a {
unitig_backward_in_node(unitig_a)
} else {
unitig_forward_in_node(unitig_a)
};
let mirror_out_b = if strand_b {
unitig_backward_out_node(unitig_b)
} else {
unitig_forward_out_node(unitig_b)
};
let matchtigs_data = unsafe { &mut *matchtigs_data };
matchtigs_data.union_find.union(out_a, in_b);
matchtigs_data.union_find.union(mirror_in_a, mirror_out_b);
}
#[no_mangle]
pub unsafe extern "C" fn matchtigs_build_graph(
matchtigs_data: *mut MatchtigsData,
unitig_weights: *const usize,
) {
let start = Instant::now();
let matchtigs_data = unsafe { &mut *matchtigs_data };
let unitig_weights = {
assert!(!unitig_weights.is_null());
std::slice::from_raw_parts(unitig_weights, matchtigs_data.union_find.len() / 4)
};
matchtigs_data.union_find.force();
let mut representatives = matchtigs_data.union_find.to_vec();
representatives.sort_unstable();
representatives.dedup();
for _ in 0..representatives.len() {
matchtigs_data.graph.add_node(());
}
for (unitig_id, &unitig_weight) in unitig_weights.iter().enumerate() {
let n1: <MatchtigGraph as GraphBase>::NodeIndex = representatives
.binary_search(
&matchtigs_data
.union_find
.find(unitig_forward_in_node(unitig_id)),
)
.unwrap()
.into();
let n2: <MatchtigGraph as GraphBase>::NodeIndex = representatives
.binary_search(
&matchtigs_data
.union_find
.find(unitig_forward_out_node(unitig_id)),
)
.unwrap()
.into();
let mirror_n2: <MatchtigGraph as GraphBase>::NodeIndex = representatives
.binary_search(
&matchtigs_data
.union_find
.find(unitig_backward_in_node(unitig_id)),
)
.unwrap()
.into();
let mirror_n1: <MatchtigGraph as GraphBase>::NodeIndex = representatives
.binary_search(
&matchtigs_data
.union_find
.find(unitig_backward_out_node(unitig_id)),
)
.unwrap()
.into();
matchtigs_data.graph.set_mirror_nodes(n1, mirror_n1);
matchtigs_data.graph.set_mirror_nodes(n2, mirror_n2);
matchtigs_data.graph.add_edge(
n1,
n2,
ExternalEdgeData::new(unitig_id, true, unitig_weight, 0),
);
matchtigs_data.graph.add_edge(
mirror_n2,
mirror_n1,
ExternalEdgeData::new(unitig_id, false, unitig_weight, 0),
);
}
assert!(matchtigs_data.graph.verify_node_pairing());
assert!(matchtigs_data.graph.verify_edge_mirror_property());
let end = Instant::now();
info!(
"Took {:.6}s to build the tig graph",
(end - start).as_secs_f64()
);
}
#[no_mangle]
pub unsafe extern "C" fn matchtigs_compute_tigs(
matchtigs_data: *mut MatchtigsData,
tig_algorithm: usize,
threads: usize,
k: usize,
matching_file_prefix: *const c_char,
matcher_path: *const c_char,
tigs_edge_out: *mut isize,
tigs_insert_out: *mut usize,
tigs_out_limits: *mut usize,
) -> usize {
let mut matchtigs_data = unsafe { Box::from_raw(matchtigs_data) };
info!("Computing tigs for k = {} and {} threads", k, threads);
info!(
"Graph has {} nodes and {} edges",
matchtigs_data.graph.node_count(),
matchtigs_data.graph.edge_count()
);
let matching_file_prefix = CString::from({
assert!(!matching_file_prefix.is_null());
CStr::from_ptr(matching_file_prefix)
})
.into_bytes();
let matching_file_prefix = match () {
#[cfg(unix)]
() => PathBuf::from(OsString::from_vec(matching_file_prefix)),
#[cfg(not(unix))]
() => PathBuf::from(
&std::str::from_utf8(matching_file_prefix.as_slice())
.expect("Invalid encoding for matching file prefix"),
),
};
let matcher_path = CString::from({
assert!(!matcher_path.is_null());
CStr::from_ptr(matcher_path)
})
.into_bytes();
let matcher_path = match () {
#[cfg(unix)]
() => PathBuf::from(OsString::from_vec(matcher_path)),
#[cfg(not(unix))]
() => PathBuf::from(
&std::str::from_utf8(matcher_path.as_slice())
.expect("Invalid encoding for matcher path"),
),
};
let tigs_edge_out = {
assert!(!tigs_edge_out.is_null());
std::slice::from_raw_parts_mut(tigs_edge_out, matchtigs_data.graph.edge_count() * 2)
};
let tigs_insert_out = {
assert!(!tigs_insert_out.is_null());
std::slice::from_raw_parts_mut(tigs_insert_out, matchtigs_data.graph.edge_count() * 2)
};
let tigs_out_limits = {
assert!(!tigs_out_limits.is_null());
std::slice::from_raw_parts_mut(tigs_out_limits, matchtigs_data.graph.edge_count())
};
let tigs = match tig_algorithm {
1 => matchtigs_data
.graph
.edge_indices()
.filter_map(|e| {
if e.as_usize() % 2 == 0 {
Some(vec![e])
} else {
None
}
})
.collect(),
2 => PathtigAlgorithm::compute_tigs(&mut matchtigs_data.graph, &()),
3 => EulertigAlgorithm::compute_tigs(
&mut matchtigs_data.graph,
&EulertigAlgorithmConfiguration { k },
),
4 => MatchtigAlgorithm::compute_tigs(
&mut matchtigs_data.graph,
&MatchtigAlgorithmConfiguration {
threads,
k,
heap_type: HeapType::StdBinaryHeap,
node_weight_array_type: NodeWeightArrayType::EpochNodeWeightArray,
matching_file_prefix: &matching_file_prefix,
matcher_path: &matcher_path,
},
),
5 => GreedytigAlgorithm::compute_tigs(
&mut matchtigs_data.graph,
&GreedytigAlgorithmConfiguration {
threads,
k,
staged_parallelism_divisor: None,
resource_limit_factor: 1,
heap_type: HeapType::StdBinaryHeap,
node_weight_array_type: NodeWeightArrayType::EpochNodeWeightArray,
performance_data_type: PerformanceDataType::None,
},
),
tig_algorithm => panic!("Unknown tigs algorithm identifier {tig_algorithm}"),
};
let mut limit = 0;
for (i, tig) in tigs.iter().enumerate() {
for (edge_index, &edge) in tig.iter().enumerate() {
let edge_data = matchtigs_data.graph.edge_data(edge);
tigs_edge_out[limit + edge_index] =
edge_data.unitig_id as isize * if edge_data.is_forwards() { 1 } else { -1 };
tigs_insert_out[limit + edge_index] = if edge_data.is_original() {
0
} else {
edge_data.weight()
};
}
limit += tig.len();
tigs_out_limits[i] = limit;
}
tigs.len()
}