libmatchtigs/implementation/greedytigs/
mod.rs

1use crate::implementation::{
2    debug_assert_graph_has_no_consecutive_dummy_edges, make_graph_eulerian_with_breaking_edges,
3    MatchtigEdgeData, PerformanceDataType, RelaxedAtomicBoolSlice, RelaxedAtomicBoolVec,
4    TARGET_DIJKSTRA_BLOCK_TIME,
5};
6use crate::{HeapType, NodeWeightArrayType, TigAlgorithm};
7use atomic_counter::{AtomicCounter, RelaxedCounter};
8use genome_graph::bigraph::algo::eulerian::{
9    compute_eulerian_superfluous_out_biedges,
10    compute_minimum_bidirected_eulerian_cycle_decomposition, decomposes_into_eulerian_bicycles,
11    find_non_eulerian_binodes_with_differences,
12};
13use genome_graph::bigraph::interface::dynamic_bigraph::DynamicEdgeCentricBigraph;
14use genome_graph::bigraph::interface::static_bigraph::StaticBigraph;
15use genome_graph::bigraph::interface::BidirectedData;
16use genome_graph::bigraph::traitgraph::index::{GraphIndex, OptionalGraphIndex};
17use genome_graph::bigraph::traitgraph::interface::GraphBase;
18use genome_graph::bigraph::traitgraph::walks::{EdgeWalk, VecEdgeWalk};
19use log::{error, info, warn};
20use std::collections::BinaryHeap;
21use std::marker::PhantomData;
22use std::ops::AddAssign;
23use std::sync::Mutex;
24use std::time::Instant;
25use traitgraph_algo::dijkstra::epoch_array_dijkstra_node_weight_array::EpochNodeWeightArray;
26use traitgraph_algo::dijkstra::performance_counters::{
27    DijkstraPerformanceCounter, DijkstraPerformanceData, NoopDijkstraPerformanceCounter,
28};
29use traitgraph_algo::dijkstra::{
30    Dijkstra, DijkstraExhaustiveness, DijkstraHeap, DijkstraWeightedEdgeData, NodeWeightArray,
31};
32
33/// The greedy matchtigs algorithm.
34#[derive(Default)]
35pub struct GreedytigAlgorithm<SequenceHandle> {
36    _phantom_data: PhantomData<SequenceHandle>,
37}
38
39/// The options for the greedy matchtigs algorithm.
40pub struct GreedytigAlgorithmConfiguration {
41    /// The number of threads to use.
42    pub threads: usize,
43    /// The k used to build the de Bruijn graph.
44    pub k: usize,
45    /// If given, enables staged parallelism mode.
46    /// In this mode, shortest path queries are limited in the amount of memory they can use, and larger queries are postponed to be executed with less queries in parallel.
47    pub staged_parallelism_divisor: Option<f64>,
48    /// If staged parallelism mode is on, then the resource limits are calculated based on the existing nodes in the graph times this factor, divided by the number of threads.
49    pub resource_limit_factor: usize,
50    /// The type of the node weight array used by Dijkstra's algorithm.
51    pub node_weight_array_type: NodeWeightArrayType,
52    /// The type of the heap used by Dijkstra's algorithm.
53    pub heap_type: HeapType,
54    /// The type of the performance data collector used by Dijkstra's algorithm.
55    pub performance_data_type: PerformanceDataType,
56}
57
58impl GreedytigAlgorithmConfiguration {
59    /// Construct the configuration for the greedytig algorithm, using default values wherever possible.
60    // part of public interface
61    #[allow(dead_code)]
62    pub fn new(threads: usize, k: usize) -> Self {
63        Self {
64            threads,
65            k,
66            staged_parallelism_divisor: None,
67            resource_limit_factor: 0,
68            node_weight_array_type: NodeWeightArrayType::HashbrownHashMap,
69            heap_type: HeapType::StdBinaryHeap,
70            performance_data_type: PerformanceDataType::None,
71        }
72    }
73}
74
75impl<Graph: GraphBase, SequenceHandle: Default + Clone> TigAlgorithm<Graph>
76    for GreedytigAlgorithm<SequenceHandle>
77where
78    Graph: DynamicEdgeCentricBigraph + Send + Sync,
79    Graph::NodeIndex: Send + Sync,
80    Graph::EdgeData: BidirectedData + Eq + Clone + MatchtigEdgeData<SequenceHandle>,
81{
82    type Configuration = GreedytigAlgorithmConfiguration;
83
84    fn compute_tigs(
85        graph: &mut Graph,
86        configuration: &Self::Configuration,
87    ) -> Vec<VecEdgeWalk<Graph>> {
88        compute_greedytigs_choose_heap_type(graph, configuration)
89    }
90}
91
92fn compute_greedytigs_choose_heap_type<
93    NodeIndex: GraphIndex<OptionalNodeIndex> + Send + Sync,
94    OptionalNodeIndex: OptionalGraphIndex<NodeIndex>,
95    SequenceHandle: Default + Clone,
96    EdgeData: BidirectedData + Eq + MatchtigEdgeData<SequenceHandle> + Clone,
97    Graph: DynamicEdgeCentricBigraph<
98            NodeIndex = NodeIndex,
99            OptionalNodeIndex = OptionalNodeIndex,
100            EdgeData = EdgeData,
101        > + Send
102        + Sync,
103>(
104    graph: &mut Graph,
105    configuration: &GreedytigAlgorithmConfiguration,
106) -> Vec<VecEdgeWalk<Graph>> {
107    match configuration.heap_type {
108        HeapType::StdBinaryHeap => {
109            compute_greedytigs_choose_node_weight_array_type::<_, _, _, _, _, BinaryHeap<_>>(
110                graph,
111                configuration,
112            )
113        }
114    }
115}
116
117fn compute_greedytigs_choose_node_weight_array_type<
118    NodeIndex: GraphIndex<OptionalNodeIndex> + Send + Sync,
119    OptionalNodeIndex: OptionalGraphIndex<NodeIndex>,
120    SequenceHandle: Default + Clone,
121    EdgeData: BidirectedData + Eq + MatchtigEdgeData<SequenceHandle> + Clone,
122    Graph: DynamicEdgeCentricBigraph<
123            NodeIndex = NodeIndex,
124            OptionalNodeIndex = OptionalNodeIndex,
125            EdgeData = EdgeData,
126        > + Send
127        + Sync,
128    DijkstraHeapType: DijkstraHeap<usize, Graph::NodeIndex>,
129>(
130    graph: &mut Graph,
131    configuration: &GreedytigAlgorithmConfiguration,
132) -> Vec<VecEdgeWalk<Graph>> {
133    match configuration.node_weight_array_type {
134        NodeWeightArrayType::EpochNodeWeightArray => {
135            compute_greedytigs_choose_dijkstra_performance_type::<
136                _,
137                _,
138                _,
139                _,
140                _,
141                DijkstraHeapType,
142                EpochNodeWeightArray<_>,
143            >(graph, configuration)
144        }
145        NodeWeightArrayType::HashbrownHashMap => {
146            compute_greedytigs_choose_dijkstra_performance_type::<
147                _,
148                _,
149                _,
150                _,
151                _,
152                DijkstraHeapType,
153                hashbrown::HashMap<_, _>,
154            >(graph, configuration)
155        }
156    }
157}
158
159fn compute_greedytigs_choose_dijkstra_performance_type<
160    NodeIndex: GraphIndex<OptionalNodeIndex> + Send + Sync,
161    OptionalNodeIndex: OptionalGraphIndex<NodeIndex>,
162    SequenceHandle: Default + Clone,
163    EdgeData: BidirectedData + Eq + MatchtigEdgeData<SequenceHandle> + Clone,
164    Graph: DynamicEdgeCentricBigraph<
165            NodeIndex = NodeIndex,
166            OptionalNodeIndex = OptionalNodeIndex,
167            EdgeData = EdgeData,
168        > + Send
169        + Sync,
170    DijkstraHeapType: DijkstraHeap<usize, Graph::NodeIndex>,
171    DijkstraNodeWeightArray: NodeWeightArray<usize>,
172>(
173    graph: &mut Graph,
174    configuration: &GreedytigAlgorithmConfiguration,
175) -> Vec<VecEdgeWalk<Graph>> {
176    match configuration.performance_data_type {
177        PerformanceDataType::None => compute_greedytigs::<
178            _,
179            _,
180            _,
181            _,
182            _,
183            DijkstraHeapType,
184            DijkstraNodeWeightArray,
185            NoopDijkstraPerformanceCounter,
186        >(graph, configuration),
187        PerformanceDataType::Complete => compute_greedytigs::<
188            _,
189            _,
190            _,
191            _,
192            _,
193            DijkstraHeapType,
194            DijkstraNodeWeightArray,
195            DijkstraPerformanceCounter,
196        >(graph, configuration),
197    }
198}
199
200/// Computes greedy matchtigs in the given graph.
201fn compute_greedytigs<
202    NodeIndex: GraphIndex<OptionalNodeIndex> + Send + Sync,
203    OptionalNodeIndex: OptionalGraphIndex<NodeIndex>,
204    SequenceHandle: Default + Clone,
205    EdgeData: BidirectedData + Eq + MatchtigEdgeData<SequenceHandle> + Clone,
206    Graph: DynamicEdgeCentricBigraph<
207            NodeIndex = NodeIndex,
208            OptionalNodeIndex = OptionalNodeIndex,
209            EdgeData = EdgeData,
210        > + Send
211        + Sync,
212    DijkstraHeapType: DijkstraHeap<usize, Graph::NodeIndex>,
213    DijkstraNodeWeightArray: NodeWeightArray<usize>,
214    DijkstraPerformance: DijkstraPerformanceData + Default + AddAssign + Send,
215>(
216    graph: &mut Graph,
217    configuration: &GreedytigAlgorithmConfiguration,
218) -> Vec<VecEdgeWalk<Graph>> {
219    let threads = configuration.threads;
220    let k = configuration.k;
221
222    info!("Collecting nodes with missing incoming or outgoing edges");
223    let mut out_nodes = Vec::new(); // Misses outgoing edges
224    let mut in_node_count = 0;
225    let in_node_map = RelaxedAtomicBoolVec::new(graph.node_count());
226    let mut node_multiplicities = vec![0; graph.node_count()];
227    let mut unbalanced_self_mirror_count = 0;
228
229    for node_index in graph.node_indices() {
230        let diff = compute_eulerian_superfluous_out_biedges(graph, node_index);
231        if graph.is_self_mirror_node(node_index) && diff != 0 {
232            in_node_count += 1;
233            in_node_map.set(node_index.as_usize(), true);
234            node_multiplicities[node_index.as_usize()] = diff;
235            out_nodes.push(node_index);
236            unbalanced_self_mirror_count += 1;
237        } else if diff > 0 {
238            in_node_count += 1;
239            in_node_map.set(node_index.as_usize(), true);
240            node_multiplicities[node_index.as_usize()] = diff;
241        } else if diff < 0 {
242            out_nodes.push(node_index);
243            node_multiplicities[node_index.as_usize()] = diff;
244        }
245    }
246
247    info!(
248        "Found {} nodes with missing outgoing edges",
249        out_nodes.len()
250    );
251    info!("Found {} nodes with missing incoming edges", in_node_count);
252    info!(
253        "Of those there are {} self-mirrors",
254        unbalanced_self_mirror_count
255    );
256
257    info!("Computing shortest paths between nodes with missing outgoing and nodes with missing incoming edges");
258
259    let dummy_sequence = SequenceHandle::default();
260    let mut dummy_edge_id = {
261        info!(
262            "Using {} threads to run ~{} dijkstras",
263            threads,
264            out_nodes.len()
265        );
266        let start = Instant::now();
267
268        let locked_node_multiplicities: Vec<_> = node_multiplicities
269            .iter()
270            .copied()
271            .map(Mutex::new)
272            .collect();
273        let executed_dijkstras = RelaxedCounter::default();
274
275        #[allow(clippy::too_many_arguments)]
276        fn compute_dijkstras<
277            EdgeData: DijkstraWeightedEdgeData<usize>,
278            Graph: StaticBigraph<EdgeData = EdgeData>,
279            DijkstraNodeWeightArray: NodeWeightArray<usize>,
280            DijkstraHeapType: DijkstraHeap<usize, Graph::NodeIndex>,
281            DijkstraPerformance: DijkstraPerformanceData + Default,
282        >(
283            graph: &Graph,
284            dijkstra: &mut Dijkstra<Graph, usize, DijkstraNodeWeightArray, DijkstraHeapType>,
285            distances: &mut Vec<(Graph::NodeIndex, usize)>,
286            shortest_paths: &mut Vec<(Graph::NodeIndex, Graph::NodeIndex, usize)>,
287            out_nodes: &[Graph::NodeIndex],
288            missed_nodes: RelaxedAtomicBoolSlice,
289            dijkstra_performance_limit: usize,
290            in_node_map: &RelaxedAtomicBoolVec,
291            locked_node_multiplicities: &[Mutex<isize>],
292            k: usize,
293            executed_dijkstras: &RelaxedCounter,
294            output: bool,
295            output_step: usize,
296            total_dijkstras: usize,
297        ) -> DijkstraPerformance {
298            let mut last_output = 0;
299            let mut dijkstra_performance = DijkstraPerformance::default();
300
301            for (i, &out_node) in out_nodes.iter().enumerate() {
302                let out_node_is_self_mirror = graph.is_self_mirror_node(out_node);
303                let out_node_mirror = graph.mirror_node(out_node).unwrap();
304                // The linter suggestion to use atomic values for locked_node_multiplicities here is spurious,
305                // as we explicitly want to update a set of up to four values atomically later on.
306                let out_node_multiplicity_lock = locked_node_multiplicities
307                    [out_node_mirror.as_usize()]
308                .lock()
309                .unwrap();
310                let mut out_node_multiplicity: isize = *out_node_multiplicity_lock;
311                drop(out_node_multiplicity_lock);
312
313                debug_assert!(
314                    (0..=4).contains(&out_node_multiplicity),
315                    "out_node_multiplicity = {out_node_multiplicity}, out_node_is_self_mirror = {out_node_is_self_mirror}"
316                );
317
318                if out_node_multiplicity == 0 {
319                    continue;
320                }
321
322                while out_node_multiplicity > 0 {
323                    let target_amount = (out_node_multiplicity + 1).try_into().unwrap();
324                    let dijkstra_status = dijkstra.shortest_path_lens(
325                        graph,
326                        out_node,
327                        in_node_map,
328                        target_amount,
329                        k - 1,
330                        true,
331                        distances,
332                        dijkstra_performance_limit,
333                        dijkstra_performance_limit,
334                        dijkstra_performance,
335                    );
336                    dijkstra_performance = dijkstra_status.performance_data;
337
338                    if distances.is_empty() {
339                        missed_nodes.set(
340                            i,
341                            dijkstra_status.exhaustiveness != DijkstraExhaustiveness::Complete,
342                        );
343
344                        // If no in_nodes are reachable, fix this with breaking dummy arcs in post-processing
345                        break;
346                    }
347
348                    let abort_after_this = distances.len() < target_amount;
349
350                    for &(in_node, distance) in distances.iter() {
351                        let mut is_self_mirror_edge = false;
352                        if in_node == out_node_mirror {
353                            if out_node_multiplicity < 2 {
354                                continue;
355                            } else {
356                                is_self_mirror_edge = true;
357                            }
358                        };
359                        let is_self_mirror_edge = is_self_mirror_edge;
360                        debug_assert!(!is_self_mirror_edge || !out_node_is_self_mirror);
361
362                        let in_node_mirror = graph.mirror_node(in_node).unwrap();
363                        let in_node_is_self_mirror = graph.is_self_mirror_node(in_node);
364                        debug_assert!(!is_self_mirror_edge || !in_node_is_self_mirror);
365
366                        let mut lock_indices = vec![out_node.as_usize()];
367                        if !out_node_is_self_mirror {
368                            lock_indices.push(out_node_mirror.as_usize());
369                        }
370                        if !is_self_mirror_edge {
371                            lock_indices.push(in_node.as_usize());
372                            if !in_node_is_self_mirror {
373                                lock_indices.push(in_node_mirror.as_usize());
374                            }
375                        }
376                        let lock_indices = lock_indices;
377
378                        let lock_permutation = permutation::sort(lock_indices.as_slice());
379                        let mut locks = vec![None, None, None, None];
380
381                        for lock_rank in 0..lock_indices.len() {
382                            let lock_index = lock_permutation.apply_inv_idx(lock_rank);
383
384                            // The linter suggestion to use atomic values for locked_node_multiplicities here is spurious,
385                            // as we explicitly want to update a set of up to four values atomically.
386                            locks[lock_index] = Some(
387                                locked_node_multiplicities[lock_indices[lock_index]]
388                                    .lock()
389                                    .unwrap(),
390                            );
391                        }
392
393                        let mut locks: Vec<_> = locks
394                            .into_iter()
395                            .take(lock_indices.len())
396                            .map(|o| o.unwrap())
397                            .collect();
398                        let in_node_lock_offset = if out_node_is_self_mirror { 1 } else { 2 };
399                        let multiplicity_reduction = if is_self_mirror_edge { 2 } else { 1 };
400
401                        if out_node_is_self_mirror {
402                            debug_assert!(*locks[0] >= 0);
403                            debug_assert!(*locks[0] <= 1);
404                            out_node_multiplicity = *locks[0];
405                        } else {
406                            debug_assert!(*locks[0] <= 0);
407                            debug_assert!(*locks[0] >= -4);
408                            debug_assert_eq!(*locks[0], -*locks[1]);
409                            out_node_multiplicity = -*locks[0];
410                        }
411
412                        if out_node_multiplicity == 0 {
413                            break;
414                        }
415
416                        if !is_self_mirror_edge {
417                            if in_node_is_self_mirror {
418                                debug_assert!(*locks[in_node_lock_offset] >= 0,
419                                              "*locks[in_node_lock_offset] = {}, in_node_lock_offset = {}, out_node_is_self_mirror = {}, in_node_is_self_mirror = {}, is_self_mirror_edge = {}, lock_indices = {:?}, lock_permutation = {:?}, locks = {:?}",
420                                              *locks[in_node_lock_offset],
421                                              in_node_lock_offset,
422                                              out_node_is_self_mirror,
423                                              in_node_is_self_mirror,
424                                              is_self_mirror_edge,
425                                              lock_indices,
426                                              lock_permutation,
427                                              locks.iter().map(|l| **l).collect::<Vec<_>>());
428                                debug_assert!(*locks[in_node_lock_offset] <= 1,
429                                              "*locks[in_node_lock_offset] = {}, in_node_lock_offset = {}, out_node_is_self_mirror = {}, in_node_is_self_mirror = {}, is_self_mirror_edge = {}, lock_indices = {:?}, lock_permutation = {:?}, locks = {:?}",
430                                              *locks[in_node_lock_offset],
431                                              in_node_lock_offset,
432                                              out_node_is_self_mirror,
433                                              in_node_is_self_mirror,
434                                              is_self_mirror_edge,
435                                              lock_indices,
436                                              lock_permutation,
437                                              locks.iter().map(|l| **l).collect::<Vec<_>>());
438                            } else {
439                                debug_assert!(*locks[in_node_lock_offset] >= 0,
440                                              "*locks[in_node_lock_offset] = {}, in_node_lock_offset = {}, in_node_is_self_mirror = {}, is_self_mirror_edge = {}, lock_permutation = {:?}, locks = {:?}",
441                                              *locks[in_node_lock_offset],
442                                              in_node_lock_offset,
443                                              in_node_is_self_mirror,
444                                              is_self_mirror_edge,
445                                              lock_permutation,
446                                              locks.iter().map(|l| **l).collect::<Vec<_>>());
447                                debug_assert!(*locks[in_node_lock_offset] <= 4);
448                                debug_assert_eq!(
449                                    *locks[in_node_lock_offset],
450                                    -*locks[in_node_lock_offset + 1]
451                                );
452                            }
453
454                            let in_node_multiplicity = *locks[in_node_lock_offset];
455                            if in_node_multiplicity == 0 {
456                                in_node_map.set(in_node.as_usize(), false);
457                                continue;
458                            }
459                        }
460
461                        shortest_paths.push((out_node, in_node, distance));
462
463                        if out_node_is_self_mirror {
464                            *locks[0] -= 1;
465                            debug_assert!(*locks[0] >= 0);
466                            debug_assert!(*locks[0] <= 1);
467                        } else {
468                            *locks[0] += multiplicity_reduction;
469                            *locks[1] -= multiplicity_reduction;
470                            debug_assert!(*locks[0] <= 0);
471                            debug_assert!(*locks[0] >= -4);
472                            debug_assert_eq!(*locks[0], -*locks[1]);
473                        }
474                        out_node_multiplicity = -(*locks[0]);
475
476                        if !is_self_mirror_edge {
477                            *locks[in_node_lock_offset] -= 1;
478
479                            if in_node_is_self_mirror {
480                                debug_assert!(*locks[in_node_lock_offset] >= 0);
481                                debug_assert!(*locks[in_node_lock_offset] <= 1);
482                            } else {
483                                *locks[in_node_lock_offset + 1] += 1;
484                                debug_assert!(*locks[in_node_lock_offset] >= 0);
485                                debug_assert!(*locks[in_node_lock_offset] <= 4);
486                                debug_assert_eq!(
487                                    *locks[in_node_lock_offset],
488                                    -*locks[in_node_lock_offset + 1]
489                                );
490                            }
491                        }
492
493                        if out_node_multiplicity == 0 {
494                            in_node_map.set(out_node_mirror.as_usize(), false);
495                        }
496
497                        if !is_self_mirror_edge {
498                            if *locks[in_node_lock_offset] == 0 {
499                                in_node_map.set(in_node.as_usize(), false);
500                            }
501                        }
502                    }
503
504                    if abort_after_this {
505                        missed_nodes.set(
506                            i,
507                            dijkstra_status.exhaustiveness != DijkstraExhaustiveness::Complete,
508                        );
509
510                        break;
511                    }
512                }
513
514                executed_dijkstras.inc();
515                if output && executed_dijkstras.get() - last_output > output_step {
516                    last_output += output_step;
517                    println!(
518                        "{}%, ~{} total shortest paths",
519                        last_output * 100 / total_dijkstras,
520                        (shortest_paths.len() as f64 * total_dijkstras as f64 / i as f64) as u64,
521                    );
522                }
523            }
524
525            dijkstra_performance
526        }
527
528        let dijkstra_start = Instant::now();
529
530        let results = Mutex::new(Vec::new());
531        let shared_graph = &*graph;
532        let mut dijkstra_performance = DijkstraPerformance::default();
533
534        let mut unbalanced_out_nodes = out_nodes;
535        let mut missed_nodes = RelaxedAtomicBoolVec::new(unbalanced_out_nodes.len());
536
537        for iteration in 0.. {
538            let threads = if iteration == 0 {
539                threads
540            } else {
541                (((threads as f64)
542                    / configuration
543                        .staged_parallelism_divisor
544                        .expect("Iteration > 0 will never be reached if this is None")
545                        .powi(iteration)) as usize)
546                    .max(1)
547            };
548            let resource_limit =
549                if threads == 1 || configuration.staged_parallelism_divisor.is_none() {
550                    usize::MAX
551                } else {
552                    graph
553                        .node_count()
554                        .saturating_mul(configuration.resource_limit_factor)
555                        / threads
556                };
557            let offset = Mutex::new(0);
558
559            crossbeam::scope(|scope| {
560                info!("Starting {threads} dijkstra threads with a resource limit of {resource_limit} each");
561                let mut thread_handles = Vec::new();
562
563                for _ in 0..threads {
564                    thread_handles.push(scope.spawn(|_| {
565                        let graph = shared_graph;
566                        let mut dijkstra =
567                            Dijkstra::<_, _, DijkstraNodeWeightArray, DijkstraHeapType>::new(graph);
568                        let mut distances = Vec::new();
569                        let mut shortest_paths = Vec::new();
570                        let mut chunk_size = 1024;
571                        let mut dijkstra_performance = DijkstraPerformance::default();
572
573                        loop {
574                            let (current_offset, current_limit) = {
575                                let mut offset = offset.lock().unwrap();
576                                let current_offset = *offset;
577                                let remaining_nodes = unbalanced_out_nodes.len() - *offset;
578
579                                if remaining_nodes == 0 {
580                                    break;
581                                }
582
583                                chunk_size = chunk_size
584                                    .min(remaining_nodes / threads)
585                                    .max(10)
586                                    .max(chunk_size / 10)
587                                    .min(remaining_nodes);
588                                let current_limit = current_offset + chunk_size;
589                                *offset = current_limit;
590                                (current_offset, current_limit)
591                            };
592
593                            let dijkstra_start = Instant::now();
594                            dijkstra_performance += compute_dijkstras(
595                                graph,
596                                &mut dijkstra,
597                                &mut distances,
598                                &mut shortest_paths,
599                                &unbalanced_out_nodes[current_offset..current_limit],
600                                missed_nodes.slice(current_offset..current_limit),
601                                resource_limit,
602                                &in_node_map,
603                                &locked_node_multiplicities,
604                                k,
605                                &executed_dijkstras,
606                                false,
607                                unbalanced_out_nodes.len() / 100,
608                                unbalanced_out_nodes.len(),
609                            );
610                            let dijkstra_end = Instant::now();
611
612                            let duration = (dijkstra_end - dijkstra_start).as_secs_f32();
613                            chunk_size = ((chunk_size as f32) * (TARGET_DIJKSTRA_BLOCK_TIME / duration))
614                                as usize;
615                            chunk_size = chunk_size.max(10);
616                        }
617
618                        results.lock().unwrap().append(&mut shortest_paths);
619                        dijkstra_performance
620                    }));
621                }
622
623                info!("Waiting for {} dijkstra threads to finish", thread_handles.len());
624                for thread_handle in thread_handles {
625                    dijkstra_performance += thread_handle.join().unwrap();
626                }
627            }).unwrap();
628
629            unbalanced_out_nodes = unbalanced_out_nodes
630                .iter()
631                .zip(missed_nodes.iter())
632                .filter_map(|(&out_node, missed)| if missed { Some(out_node) } else { None })
633                .collect();
634            missed_nodes.reinitialise(unbalanced_out_nodes.len());
635            info!(
636                "{} remaining unbalanced out nodes",
637                unbalanced_out_nodes.len()
638            );
639
640            if unbalanced_out_nodes.is_empty() {
641                break;
642            }
643            assert!(configuration.staged_parallelism_divisor.is_some(), "Have nodes where Dijkstra missed part of the search space, but not in staged parallelism mode");
644        }
645
646        let results = results.into_inner().unwrap();
647        if let (Some(unnecessary_heap_elements), Some(iterations)) = (
648            dijkstra_performance.unnecessary_heap_elements(),
649            dijkstra_performance.iterations(),
650        ) {
651            info!(
652                "Dijkstras had a factor of {:.3} unnecessary heap elements",
653                unnecessary_heap_elements as f64 / iterations as f64
654            );
655        }
656        if let Some(max_max_heap_size) = dijkstra_performance.max_max_heap_size() {
657            info!("Dijktras had a maximum maximum heap size of {max_max_heap_size}");
658        }
659        if let Some(max_max_distance_array_size) =
660            dijkstra_performance.max_max_distance_array_size()
661        {
662            info!("Dijktras had a maximum maximum distance array size of {max_max_distance_array_size}");
663        }
664        if let Some(average_max_heap_size) = dijkstra_performance.average_max_heap_size() {
665            info!("Dijktras had an average maximum heap size of {average_max_heap_size:.0}");
666        }
667        if let Some(average_max_distance_array_size) =
668            dijkstra_performance.average_max_distance_array_size()
669        {
670            info!(
671                "Dijktras had an average maximum heap size of {average_max_distance_array_size:.0}"
672            );
673        }
674
675        info!("Found {} shortest paths", results.len());
676        let dijkstra_time = (Instant::now() - dijkstra_start).as_nanos();
677
678        let mut dummy_edge_id = 0;
679        for (out_node, in_node, distance) in results {
680            let sequence = dummy_sequence.clone();
681            dummy_edge_id += 1;
682            let edge_data = EdgeData::new(sequence, true, distance, dummy_edge_id);
683            graph.add_edge(out_node, in_node, edge_data.clone());
684            graph.add_edge(
685                graph.mirror_node(in_node).unwrap(),
686                graph.mirror_node(out_node).unwrap(),
687                edge_data.mirror(),
688            );
689        }
690
691        let end = Instant::now();
692        info!(
693            "Took {:.6}s for computing paths and getting edges, of this {:.6}s are from dijkstra",
694            (end - start).as_secs_f64(),
695            dijkstra_time as f64 / 1e9
696        );
697        dummy_edge_id
698    };
699
700    debug_assert!(graph.verify_node_pairing());
701    debug_assert!(graph.verify_edge_mirror_property());
702    debug_assert_graph_has_no_consecutive_dummy_edges(graph, k);
703
704    info!("Making graph Eulerian by adding breaking dummy edges");
705    make_graph_eulerian_with_breaking_edges(graph, dummy_sequence, &mut dummy_edge_id, k);
706
707    // Check if the graph now really is Eulerian, and if not, output some debug information
708    if !decomposes_into_eulerian_bicycles(graph) {
709        let non_eulerian_nodes_and_differences = find_non_eulerian_binodes_with_differences(graph);
710        error!(
711            "Failed to make the graph Eulerian. Non-Eulerian nodes and differences:\n{:?}",
712            non_eulerian_nodes_and_differences
713        );
714        panic!("Failed to make the graph Eulerian.");
715    }
716    debug_assert!(graph.verify_node_pairing());
717    debug_assert!(graph.verify_edge_mirror_property());
718    debug_assert_graph_has_no_consecutive_dummy_edges(graph, k);
719
720    info!("Finding Eulerian bicycle");
721    //debug!("{:?}", graph);
722    let mut eulerian_cycles = compute_minimum_bidirected_eulerian_cycle_decomposition(graph);
723    info!("Found {} Eulerian bicycles", eulerian_cycles.len());
724
725    info!("Breaking Eulerian bicycles at expensive temporary edges");
726    let mut greedytigs = Vec::new();
727
728    let mut removed_edges = 0;
729    for eulerian_cycle in &mut eulerian_cycles {
730        info!(
731            "Processing Eulerian bicycle with {} biedges",
732            eulerian_cycle.len()
733        );
734        debug_assert!(eulerian_cycle.is_circular_walk(graph));
735
736        // Rotate cycle such that longest dummy is first edge
737        let mut longest_dummy_weight = 0;
738        let mut longest_dummy_index = 0;
739        for (index, &edge) in eulerian_cycle.iter().enumerate() {
740            let edge_data = graph.edge_data(edge);
741            if edge_data.is_dummy() && edge_data.weight() > longest_dummy_weight {
742                longest_dummy_weight = edge_data.weight();
743                longest_dummy_index = index;
744            }
745        }
746        if longest_dummy_weight > 0 {
747            eulerian_cycle.rotate_left(longest_dummy_index);
748        }
749
750        let mut offset = 0;
751        let mut last_edge_is_dummy = false;
752        for (current_cycle_index, &current_edge_index) in eulerian_cycle.iter().enumerate() {
753            let edge_data = graph.edge_data(current_edge_index);
754
755            if edge_data.is_original() {
756                last_edge_is_dummy = false;
757            } else {
758                if last_edge_is_dummy {
759                    warn!(
760                        "Found consecutive dummy edges at {}",
761                        current_edge_index.as_usize()
762                    );
763                }
764                last_edge_is_dummy = true;
765            }
766
767            if (edge_data.weight() >= k && edge_data.is_dummy())
768                || (edge_data.is_dummy() && current_cycle_index == 0)
769            {
770                if offset < current_cycle_index {
771                    greedytigs.push(eulerian_cycle[offset..current_cycle_index].to_owned());
772                } else if current_cycle_index > 0 {
773                    warn!("Found consecutive breaking edges");
774                }
775                offset = current_cycle_index + 1;
776                removed_edges += 1;
777            }
778        }
779        if offset < eulerian_cycle.len() {
780            if graph
781                .edge_data(*eulerian_cycle.last().unwrap())
782                .is_original()
783            {
784                greedytigs.push(eulerian_cycle[offset..eulerian_cycle.len()].to_owned());
785            } else if offset < eulerian_cycle.len() - 1 {
786                greedytigs.push(eulerian_cycle[offset..eulerian_cycle.len() - 1].to_owned());
787            }
788        }
789    }
790
791    info!("Found {} expensive temporary edges", removed_edges);
792    info!("Found {} greedytigs", greedytigs.len());
793
794    for greedytig in &greedytigs {
795        debug_assert!(!greedytig.is_empty());
796        debug_assert!(graph.edge_data(*greedytig.first().unwrap()).is_original());
797        debug_assert!(graph.edge_data(*greedytig.last().unwrap()).is_original());
798    }
799
800    greedytigs
801}
libmatchtigs/implementation/greedytigs/mod.rs

libmatchtigs/implementation/greedytigs/
mod.rs