1use crate::implementation::{
8 initialise_logging, HeapType, MatchtigEdgeData, NodeWeightArrayType, PerformanceDataType,
9 TigAlgorithm,
10};
11use crate::{
12 EulertigAlgorithm, EulertigAlgorithmConfiguration, GreedytigAlgorithm,
13 GreedytigAlgorithmConfiguration, MatchtigAlgorithm, MatchtigAlgorithmConfiguration,
14 PathtigAlgorithm,
15};
16use disjoint_sets::UnionFind;
17use genome_graph::bigraph::implementation::node_bigraph_wrapper::PetBigraph;
18use genome_graph::bigraph::interface::dynamic_bigraph::DynamicBigraph;
19use genome_graph::bigraph::interface::static_bigraph::{StaticBigraph, StaticEdgeCentricBigraph};
20use genome_graph::bigraph::interface::BidirectedData;
21use genome_graph::bigraph::traitgraph::index::GraphIndex;
22use genome_graph::bigraph::traitgraph::interface::{
23 GraphBase, ImmutableGraphContainer, MutableGraphContainer,
24};
25use genome_graph::bigraph::traitgraph::traitsequence::interface::Sequence;
26use log::LevelFilter;
27#[cfg(unix)]
28use std::ffi::OsString;
29use std::ffi::{CStr, CString};
30use std::os::raw::c_char;
31#[cfg(unix)]
32use std::os::unix::ffi::OsStringExt;
33use std::path::PathBuf;
34use std::time::Instant;
35use traitgraph_algo::dijkstra::DijkstraWeightedEdgeData;
36
37type MatchtigGraph = PetBigraph<(), ExternalEdgeData>;
38
39pub struct MatchtigsData {
41 graph: MatchtigGraph,
42 union_find: UnionFind,
43}
44
45#[derive(Clone, Eq, PartialEq, Debug)]
46struct ExternalEdgeData {
47 weight: usize,
48 dummy_edge_id: usize,
50 unitig_id: usize,
51 forwards: bool,
52}
53
54impl MatchtigEdgeData<usize> for ExternalEdgeData {
55 fn is_dummy(&self) -> bool {
56 self.dummy_edge_id != 0
57 }
58
59 fn is_forwards(&self) -> bool {
60 self.forwards
61 }
62
63 fn new(sequence_handle: usize, forwards: bool, weight: usize, dummy_id: usize) -> Self {
64 Self {
65 weight,
66 dummy_edge_id: dummy_id,
67 unitig_id: sequence_handle,
68 forwards,
69 }
70 }
71}
72
73impl BidirectedData for ExternalEdgeData {
74 fn mirror(&self) -> Self {
75 let mut result = self.clone();
76 result.forwards = !result.forwards;
77 result
78 }
79}
80
81impl DijkstraWeightedEdgeData<usize> for ExternalEdgeData {
82 fn weight(&self) -> usize {
83 self.weight
84 }
85}
86
87#[no_mangle]
90pub extern "C" fn matchtigs_initialise() {
91 initialise_logging(LevelFilter::Info);
92}
93
94#[no_mangle]
97pub extern "C" fn matchtigs_initialise_graph(unitig_amount: usize) -> *mut MatchtigsData {
98 Box::leak(Box::new(MatchtigsData {
99 graph: MatchtigGraph::default(),
100 union_find: UnionFind::new(unitig_amount * 4),
101 }))
102}
103
104#[inline]
105const fn unitig_forward_in_node(unitig: usize) -> usize {
106 unitig * 4
107}
108
109#[inline]
110const fn unitig_forward_out_node(unitig: usize) -> usize {
111 unitig * 4 + 2
112}
113
114#[inline]
115const fn unitig_backward_in_node(unitig: usize) -> usize {
116 unitig * 4 + 3
117}
118
119#[inline]
120const fn unitig_backward_out_node(unitig: usize) -> usize {
121 unitig * 4 + 1
122}
123
124#[no_mangle]
135pub unsafe extern "C" fn matchtigs_merge_nodes(
136 matchtigs_data: *mut MatchtigsData,
137 unitig_a: usize,
138 strand_a: bool,
139 unitig_b: usize,
140 strand_b: bool,
141) {
142 let out_a = if strand_a {
145 unitig_forward_out_node(unitig_a)
146 } else {
147 unitig_backward_out_node(unitig_a)
148 };
149 let in_b = if strand_b {
150 unitig_forward_in_node(unitig_b)
151 } else {
152 unitig_backward_in_node(unitig_b)
153 };
154
155 let mirror_in_a = if strand_a {
156 unitig_backward_in_node(unitig_a)
157 } else {
158 unitig_forward_in_node(unitig_a)
159 };
160 let mirror_out_b = if strand_b {
161 unitig_backward_out_node(unitig_b)
162 } else {
163 unitig_forward_out_node(unitig_b)
164 };
165
166 let matchtigs_data = unsafe { &mut *matchtigs_data };
168 matchtigs_data.union_find.union(out_a, in_b);
169 matchtigs_data.union_find.union(mirror_in_a, mirror_out_b);
170}
171
172#[no_mangle]
180pub unsafe extern "C" fn matchtigs_build_graph(
181 matchtigs_data: *mut MatchtigsData,
182 unitig_weights: *const usize,
183) {
184 let start = Instant::now();
185 let matchtigs_data = unsafe { &mut *matchtigs_data };
186
187 let unitig_weights = {
188 assert!(!unitig_weights.is_null());
189
190 std::slice::from_raw_parts(unitig_weights, matchtigs_data.union_find.len() / 4)
191 };
192
193 matchtigs_data.union_find.force();
194 let mut representatives = matchtigs_data.union_find.to_vec();
195 representatives.sort_unstable();
196 representatives.dedup();
197
198 for _ in 0..representatives.len() {
199 matchtigs_data.graph.add_node(());
200 }
201
202 for (unitig_id, &unitig_weight) in unitig_weights.iter().enumerate() {
203 let n1: <MatchtigGraph as GraphBase>::NodeIndex = representatives
204 .binary_search(
205 &matchtigs_data
206 .union_find
207 .find(unitig_forward_in_node(unitig_id)),
208 )
209 .unwrap()
210 .into();
211 let n2: <MatchtigGraph as GraphBase>::NodeIndex = representatives
212 .binary_search(
213 &matchtigs_data
214 .union_find
215 .find(unitig_forward_out_node(unitig_id)),
216 )
217 .unwrap()
218 .into();
219 let mirror_n2: <MatchtigGraph as GraphBase>::NodeIndex = representatives
220 .binary_search(
221 &matchtigs_data
222 .union_find
223 .find(unitig_backward_in_node(unitig_id)),
224 )
225 .unwrap()
226 .into();
227 let mirror_n1: <MatchtigGraph as GraphBase>::NodeIndex = representatives
228 .binary_search(
229 &matchtigs_data
230 .union_find
231 .find(unitig_backward_out_node(unitig_id)),
232 )
233 .unwrap()
234 .into();
235
236 matchtigs_data.graph.set_mirror_nodes(n1, mirror_n1);
237 matchtigs_data.graph.set_mirror_nodes(n2, mirror_n2);
238
239 matchtigs_data.graph.add_edge(
240 n1,
241 n2,
242 ExternalEdgeData::new(unitig_id, true, unitig_weight, 0),
243 );
244 matchtigs_data.graph.add_edge(
245 mirror_n2,
246 mirror_n1,
247 ExternalEdgeData::new(unitig_id, false, unitig_weight, 0),
248 );
249 }
250
251 assert!(matchtigs_data.graph.verify_node_pairing());
252 assert!(matchtigs_data.graph.verify_edge_mirror_property());
253
254 let end = Instant::now();
255 info!(
256 "Took {:.6}s to build the tig graph",
257 (end - start).as_secs_f64()
258 );
259}
260
261#[no_mangle]
280pub unsafe extern "C" fn matchtigs_compute_tigs(
281 matchtigs_data: *mut MatchtigsData,
282 tig_algorithm: usize,
283 threads: usize,
284 k: usize,
285 matching_file_prefix: *const c_char,
286 matcher_path: *const c_char,
287 tigs_edge_out: *mut isize,
288 tigs_insert_out: *mut usize,
289 tigs_out_limits: *mut usize,
290) -> usize {
291 let mut matchtigs_data = unsafe { Box::from_raw(matchtigs_data) };
292 info!("Computing tigs for k = {} and {} threads", k, threads);
293 info!(
294 "Graph has {} nodes and {} edges",
295 matchtigs_data.graph.node_count(),
296 matchtigs_data.graph.edge_count()
297 );
298
299 let matching_file_prefix = CString::from({
300 assert!(!matching_file_prefix.is_null());
301
302 CStr::from_ptr(matching_file_prefix)
303 })
304 .into_bytes();
305 let matching_file_prefix = match () {
306 #[cfg(unix)]
307 () => PathBuf::from(OsString::from_vec(matching_file_prefix)),
308 #[cfg(not(unix))]
309 () => PathBuf::from(
310 &std::str::from_utf8(matching_file_prefix.as_slice())
311 .expect("Invalid encoding for matching file prefix"),
312 ),
313 };
314
315 let matcher_path = CString::from({
316 assert!(!matcher_path.is_null());
317
318 CStr::from_ptr(matcher_path)
319 })
320 .into_bytes();
321
322 let matcher_path = match () {
323 #[cfg(unix)]
324 () => PathBuf::from(OsString::from_vec(matcher_path)),
325 #[cfg(not(unix))]
326 () => PathBuf::from(
327 &std::str::from_utf8(matcher_path.as_slice())
328 .expect("Invalid encoding for matcher path"),
329 ),
330 };
331
332 let tigs_edge_out = {
333 assert!(!tigs_edge_out.is_null());
334
335 std::slice::from_raw_parts_mut(tigs_edge_out, matchtigs_data.graph.edge_count() * 2)
336 };
337
338 let tigs_insert_out = {
339 assert!(!tigs_insert_out.is_null());
340
341 std::slice::from_raw_parts_mut(tigs_insert_out, matchtigs_data.graph.edge_count() * 2)
342 };
343
344 let tigs_out_limits = {
345 assert!(!tigs_out_limits.is_null());
346
347 std::slice::from_raw_parts_mut(tigs_out_limits, matchtigs_data.graph.edge_count())
348 };
349
350 let tigs = match tig_algorithm {
351 1 => matchtigs_data
352 .graph
353 .edge_indices()
354 .filter_map(|e| {
355 if e.as_usize() % 2 == 0 {
356 Some(vec![e])
357 } else {
358 None
359 }
360 })
361 .collect(),
362 2 => PathtigAlgorithm::compute_tigs(&mut matchtigs_data.graph, &()),
363 3 => EulertigAlgorithm::compute_tigs(
364 &mut matchtigs_data.graph,
365 &EulertigAlgorithmConfiguration { k },
366 ),
367 4 => MatchtigAlgorithm::compute_tigs(
368 &mut matchtigs_data.graph,
369 &MatchtigAlgorithmConfiguration {
370 threads,
371 k,
372 heap_type: HeapType::StdBinaryHeap,
373 node_weight_array_type: NodeWeightArrayType::EpochNodeWeightArray,
374 matching_file_prefix: &matching_file_prefix,
375 matcher_path: &matcher_path,
376 },
377 ),
378 5 => GreedytigAlgorithm::compute_tigs(
379 &mut matchtigs_data.graph,
380 &GreedytigAlgorithmConfiguration {
381 threads,
382 k,
383 staged_parallelism_divisor: None,
384 resource_limit_factor: 1,
385 heap_type: HeapType::StdBinaryHeap,
386 node_weight_array_type: NodeWeightArrayType::EpochNodeWeightArray,
387 performance_data_type: PerformanceDataType::None,
388 },
389 ),
390 tig_algorithm => panic!("Unknown tigs algorithm identifier {tig_algorithm}"),
391 };
392
393 let mut limit = 0;
394 for (i, tig) in tigs.iter().enumerate() {
395 for (edge_index, &edge) in tig.iter().enumerate() {
396 let edge_data = matchtigs_data.graph.edge_data(edge);
397 tigs_edge_out[limit + edge_index] =
398 edge_data.unitig_id as isize * if edge_data.is_forwards() { 1 } else { -1 };
399 tigs_insert_out[limit + edge_index] = if edge_data.is_original() {
400 0
401 } else {
402 edge_data.weight()
403 };
404 }
405 limit += tig.len();
406 tigs_out_limits[i] = limit;
407 }
408
409 tigs.len()
410}